mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-04 16:49:35 +00:00
[ArsTechnicaBridge] Properly handle paged content (#3855)
* [ArsTechnicaBridge] Properly handle paged content * [ArsTechnicaBridge] Remove normal site ad wrapper
This commit is contained in:
parent
f40f997405
commit
ea2b4d7506
@ -35,39 +35,34 @@ class ArsTechnicaBridge extends FeedExpander
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri'] . '&');
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
$item['content'] = $item_html->find('.article-content', 0);
|
||||
|
||||
$item_content = $item_html->find('.article-content.post-page', 0);
|
||||
if (!$item_content) {
|
||||
// The dom selector probably broke. Let's just return the item as-is
|
||||
return $item;
|
||||
$pages = $item_html->find('nav.page-numbers > .numbers > a', -2);
|
||||
if (null !== $pages) {
|
||||
for ($i = 2; $i <= $pages->innertext; $i++) {
|
||||
$page_url = $item['uri'] . '&page=' . $i;
|
||||
$page_html = getSimpleHTMLDOMCached($page_url);
|
||||
$page_html = defaultLinkTo($page_html, self::URI);
|
||||
$item['content'] .= $page_html->find('.article-content', 0);
|
||||
}
|
||||
$item['content'] = str_get_html($item['content']);
|
||||
}
|
||||
|
||||
$item['content'] = $item_content;
|
||||
|
||||
// remove various ars advertising
|
||||
$item['content']->find('#social-left', 0)->remove();
|
||||
foreach ($item['content']->find('.ars-component-buy-box') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
foreach ($item['content']->find('i-amphtml-sizer') as $ad) {
|
||||
foreach ($item['content']->find('.ad_wrapper') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
foreach ($item['content']->find('.sidebar') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
|
||||
foreach ($item['content']->find('a') as $link) { //remove amp redirect links
|
||||
$url = $link->getAttribute('href');
|
||||
if (str_contains($url, 'go.redirectingat.com')) {
|
||||
$url = extractFromDelimiters($url, 'url=', '&');
|
||||
$url = urldecode($url);
|
||||
$link->setAttribute('href', $url);
|
||||
}
|
||||
}
|
||||
|
||||
$item['content'] = backgroundToImg(str_replace('data-amp-original-style="background-image', 'style="background-image', $item['content']));
|
||||
$item['content'] = backgroundToImg($item['content']);
|
||||
|
||||
$item['uid'] = explode('=', $item['uri'])[1];
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user