[FuturaSciences] Fix content extraction (#3487, #3488) (#3606)

This commit is contained in:
ORelio 2023-08-09 20:10:15 +02:00 committed by GitHub
parent 1fcf67f14a
commit 6cc4cf24dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -90,7 +90,7 @@ class FuturaSciencesBridge extends FeedExpander
$item = parent::parseItem($newsItem); $item = parent::parseItem($newsItem);
$item['uri'] = str_replace('#xtor%3DRSS-8', '', $item['uri']); $item['uri'] = str_replace('#xtor%3DRSS-8', '', $item['uri']);
$article = getSimpleHTMLDOMCached($item['uri']); $article = getSimpleHTMLDOMCached($item['uri']);
//$item['content'] = $this->extractArticleContent($article); $item['content'] = $this->extractArticleContent($article);
$author = $this->extractAuthor($article); $author = $this->extractAuthor($article);
if (!empty($author)) { if (!empty($author)) {
$item['author'] = $author; $item['author'] = $author;
@ -100,7 +100,7 @@ class FuturaSciencesBridge extends FeedExpander
private function extractArticleContent($article) private function extractArticleContent($article)
{ {
$contents = $article->find('section.article-text', 1); $contents = $article->find('div.article-text', 0);
foreach ($contents->find('img') as $img) { foreach ($contents->find('img') as $img) {
if (!empty($img->getAttribute('data-src'))) { if (!empty($img->getAttribute('data-src'))) {