[ScientificAmerican] Fix bridge (#3575)

This commit is contained in:
Korytov Pavel 2023-07-26 22:47:47 +03:00 committed by GitHub
parent 235c084820
commit bf4ea12719
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -25,10 +25,11 @@ class ScientificAmericanBridge extends FeedExpander
]; ];
const FEED = 'http://rss.sciam.com/ScientificAmerican-Global'; const FEED = 'http://rss.sciam.com/ScientificAmerican-Global';
const ISSUES = 'https://www.scientificamerican.com/store/archive/?magazineFilterID=all'; const ISSUES = 'https://www.scientificamerican.com/archive/issues/2020s/';
public function collectData() public function collectData()
{ {
$this->collectIssues();
$items = [ $items = [
...$this->collectFeed(), ...$this->collectFeed(),
...$this->collectIssues() ...$this->collectIssues()
@ -49,7 +50,7 @@ class ScientificAmericanBridge extends FeedExpander
if ($this->getInput('addContents') == 1) { if ($this->getInput('addContents') == 1) {
usort($this->items, function ($item1, $item2) { usort($this->items, function ($item1, $item2) {
return $item1['timestamp'] < $item2['timestamp']; return $item1['timestamp'] - $item2['timestamp'];
}); });
} }
} }
@ -65,8 +66,8 @@ class ScientificAmericanBridge extends FeedExpander
private function collectIssues() private function collectIssues()
{ {
$html = getSimpleHTMLDOMCached(self::ISSUES); $html = getSimpleHTMLDOMCached(self::ISSUES);
$issues_root = $html->find('div.store-listing-group', 0); $content = $html->getElementById('content')->children(3);
$issues = $issues_root->find('div.store-listing-group__item'); $issues = $content->children();
$issues_count = min( $issues_count = min(
(int)$this->getInput('parseIssues'), (int)$this->getInput('parseIssues'),
count($issues) count($issues)
@ -74,7 +75,7 @@ class ScientificAmericanBridge extends FeedExpander
$items = []; $items = [];
for ($i = 0; $i < $issues_count; $i++) { for ($i = 0; $i < $issues_count; $i++) {
$a = $issues[$i]->find('a.store-listing__cta', 0); $a = $issues[$i]->find('a', 0);
$link = 'https://scientificamerican.com' . $a->getAttribute('href'); $link = 'https://scientificamerican.com' . $a->getAttribute('href');
array_push($items, ...$this->parseIssue($link)); array_push($items, ...$this->parseIssue($link));
} }
@ -86,51 +87,42 @@ class ScientificAmericanBridge extends FeedExpander
$items = []; $items = [];
$html = getSimpleHTMLDOMCached($issue_link); $html = getSimpleHTMLDOMCached($issue_link);
$features = $html->find('section[data-issue-column="Features"]', 0); $features = $html->find('[class^=Detail_issue__article__previews__featured]', 0);
if ($features != null) { if ($features != null) {
$articles = $features->find('article'); $articles = $features->find('div', 0)->children();
foreach ($articles as $article) { foreach ($articles as $article) {
$items[] = $this->parseIssueItem($article); $h4 = $article->find('h4', 0);
$a = $h4->find('a', 0);
$link = 'https://scientificamerican.com' . $a->getAttribute('href');
$title = $a->plaintext;
$items[] = [
'uri' => $link,
'title' => $title,
'uid' => $link,
'content' => ''
];
} }
} }
$departments = $html->find('section[data-issue-column="Departments"]', 0); $departments = $html->find('[class^=Detail_issue__article__previews__departments]', 0);
if ($departments != null) { if ($departments != null) {
$lis = $departments->find('ul', 0)->find('li'); $headers = $departments->find('[class*=Listing_article__listing__title]');
foreach ($lis as $li) { foreach ($headers as $header) {
$items[] = $this->parseIssueItem($li); $a = $header->find('a', 0);
$link = 'https://scientificamerican.com' . $a->getAttribute('href');
$title = $a->plaintext;
$items[] = [
'uri' => $link,
'title' => $title,
'uid' => $link,
'content' => ''
];
} }
} }
return $items; return $items;
} }
private function parseIssueItem($article)
{
$title = $article->getAttribute('data-article-title');
$a = $article->find('a', 0);
$link = null;
if ($a != null) {
$link = $a->href;
} else {
[$kind, $v] = explode('-', $article->getAttribute('id'), 2);
$link = 'https://scientificamerican.com/' . $kind . '/' . $v;
}
$content = '';
$desc = $article->find('p.listing-wide__inner__desc', 0);
if ($desc != null) {
$content = $desc->plaintext;
}
return [
'uri' => $link,
'title' => $title,
'uid' => $link,
'content' => $content
];
}
private function updateItem($item) private function updateItem($item)
{ {
$html = getSimpleHTMLDOMCached($item['uri']); $html = getSimpleHTMLDOMCached($item['uri']);