From db984d8a8b98a985fe9a272794fa20081908ad13 Mon Sep 17 00:00:00 2001 From: July Date: Thu, 28 Mar 2024 14:43:17 -0400 Subject: [PATCH] AO3Bridge: move tags to categories and remove duplicate fic summary (#4031) * AO3Bridge: move tags to categories and remove duplicate fic summary * [AO3Bridge] Fix tag html entity encoding --- bridges/AO3Bridge.php | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index 32bbb0a2..85f0f9f8 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -91,12 +91,26 @@ class AO3Bridge extends BridgeAbstract continue; // discard deleted works } $item['title'] = $title->plaintext; - $item['content'] = $element; $item['uri'] = $title->href; $strdate = $element->find('div p.datetime', 0)->plaintext; $item['timestamp'] = strtotime($strdate); + // detach from rest of page because remove() is buggy + $element = str_get_html($element->outertext()); + $tags = $element->find('ul.required-tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + $tags = $element->find('ul.tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + + $item['content'] = implode('', $element->childNodes()); + $chapters = $element->find('dl dd.chapters', 0); // bookmarked series and external works do not have a chapters count $chapters = (isset($chapters) ? $chapters->plaintext : 0); @@ -123,6 +137,10 @@ class AO3Bridge extends BridgeAbstract $response = $httpClient->request($url, $agent); $html = \str_get_html($response->getBody()); $html = defaultLinkTo($html, self::URI); + // remove duplicate fic summary + if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) { + $ficsum->remove(); + } $item['content'] .= $html->find('#workskin', 0); }