mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-05 17:19:37 +00:00
[EconomistBridge] Fix for new layout (#2489)
This commit is contained in:
parent
1a3419a2d4
commit
ac8e94ec56
@ -95,23 +95,36 @@ class EconomistBridge extends FeedExpander {
|
|||||||
|
|
||||||
protected function parseItem($feedItem){
|
protected function parseItem($feedItem){
|
||||||
$item = parent::parseItem($feedItem);
|
$item = parent::parseItem($feedItem);
|
||||||
|
|
||||||
$article = getSimpleHTMLDOM($item['uri']);
|
$article = getSimpleHTMLDOM($item['uri']);
|
||||||
// before the article can be added, it needs to be cleaned up, thus, the extra function
|
// before the article can be added, it needs to be cleaned up, thus, the extra function
|
||||||
$item['content'] = $this->cleanContent($article);
|
// We also need to distinguish between old style and new style articles
|
||||||
|
if ($article->find('article', 0)->getAttribute('data-test-id') == 'Article') {
|
||||||
|
$contentNode = 'div.layout-article-body';
|
||||||
|
$imgNode = 'div.article__lead-image';
|
||||||
|
$categoryNode = 'span.article__subheadline';
|
||||||
|
} else {
|
||||||
|
$contentNode = 'div.e4sqmp1';
|
||||||
|
$imgNode = 'figure.e3y6nua0';
|
||||||
|
$categoryNode = 'span.ern1uyf0';
|
||||||
|
}
|
||||||
|
|
||||||
|
$item['content'] = $this->cleanContent($article, $contentNode);
|
||||||
// only the article lead image is retained if it's there
|
// only the article lead image is retained if it's there
|
||||||
if (!is_null($article->find('div.article__lead-image', 0))) {
|
if (!is_null($article->find($imgNode, 0))) {
|
||||||
$item['enclosures'][] = $article->find('div.article__lead-image', 0)->find('img', 0)->getAttribute('src');
|
$item['enclosures'][] = $article->find($imgNode, 0)->find('img', 0)->getAttribute('src');
|
||||||
} else {
|
} else {
|
||||||
$item['enclosures'][] = '';
|
$item['enclosures'][] = '';
|
||||||
}
|
}
|
||||||
|
// add the subheadline as category. This will create a link in new articles
|
||||||
|
// and a text in old articles
|
||||||
|
$item['categories'][] = $article->find($categoryNode, 0)->innertext;
|
||||||
|
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function cleanContent($article){
|
private function cleanContent($article, $contentNode){
|
||||||
// the actual article is in this div
|
// the actual article is in this div
|
||||||
$content = $article->find('div.layout-article-body', 0)->innertext;
|
$content = $article->find($contentNode, 0)->innertext;
|
||||||
// clean the article content. Remove all div's since the text is in paragraph elements
|
// clean the article content. Remove all div's since the text is in paragraph elements
|
||||||
foreach (array(
|
foreach (array(
|
||||||
'<div '
|
'<div '
|
||||||
|
Loading…
Reference in New Issue
Block a user