diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php index 613c1c58..2c631871 100644 --- a/bridges/ArsTechnicaBridge.php +++ b/bridges/ArsTechnicaBridge.php @@ -35,39 +35,34 @@ class ArsTechnicaBridge extends FeedExpander protected function parseItem(array $item) { - $item_html = getSimpleHTMLDOMCached($item['uri'] . '&'); + $item_html = getSimpleHTMLDOMCached($item['uri']); $item_html = defaultLinkTo($item_html, self::URI); + $item['content'] = $item_html->find('.article-content', 0); - $item_content = $item_html->find('.article-content.post-page', 0); - if (!$item_content) { - // The dom selector probably broke. Let's just return the item as-is - return $item; + $pages = $item_html->find('nav.page-numbers > .numbers > a', -2); + if (null !== $pages) { + for ($i = 2; $i <= $pages->innertext; $i++) { + $page_url = $item['uri'] . '&page=' . $i; + $page_html = getSimpleHTMLDOMCached($page_url); + $page_html = defaultLinkTo($page_html, self::URI); + $item['content'] .= $page_html->find('.article-content', 0); + } + $item['content'] = str_get_html($item['content']); } - $item['content'] = $item_content; - // remove various ars advertising $item['content']->find('#social-left', 0)->remove(); foreach ($item['content']->find('.ars-component-buy-box') as $ad) { $ad->remove(); } - foreach ($item['content']->find('i-amphtml-sizer') as $ad) { + foreach ($item['content']->find('.ad_wrapper') as $ad) { $ad->remove(); } foreach ($item['content']->find('.sidebar') as $ad) { $ad->remove(); } - foreach ($item['content']->find('a') as $link) { //remove amp redirect links - $url = $link->getAttribute('href'); - if (str_contains($url, 'go.redirectingat.com')) { - $url = extractFromDelimiters($url, 'url=', '&'); - $url = urldecode($url); - $link->setAttribute('href', $url); - } - } - - $item['content'] = backgroundToImg(str_replace('data-amp-original-style="background-image', 'style="background-image', $item['content'])); + $item['content'] = backgroundToImg($item['content']); $item['uid'] = explode('=', $item['uri'])[1];