From 723768c82850e33b668e8d6a8b7c014bce052e57 Mon Sep 17 00:00:00 2001 From: vincentvd1 Date: Thu, 11 May 2023 01:30:25 +0200 Subject: [PATCH] Add bridge for Magellantv articles (#3368) * [MagellantvBrdige] added first version * [MagellantvBridge] cleanup, added tags and fixed bugs * [MagellantvBridge] fix linting issues * [MagellantvBridge] more linting fixes * [MagellantvBridge] removed tabs --- bridges/MagellantvBridge.php | 88 ++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 bridges/MagellantvBridge.php diff --git a/bridges/MagellantvBridge.php b/bridges/MagellantvBridge.php new file mode 100644 index 00000000..b1f0403e --- /dev/null +++ b/bridges/MagellantvBridge.php @@ -0,0 +1,88 @@ + [ + 'type' => 'list', + 'name' => 'Article topic', + 'values' => [ + 'All topics' => 'all', + 'Ancient history' => 'ancient-history', + 'Art & culture' => 'art-culture', + 'Biography' => 'biography', + 'Current history' => 'current-history', + 'Early modern' => 'early-modern', + 'Earth' => 'earth', + 'Mind & body' => 'mind-body', + 'Nature' => 'nature', + 'Science & tech' => 'science-tech', + 'Short takes' => 'short-takes', + 'Space' => 'space', + 'Travel & adventure' => 'travel-adventure', + 'True crime' => 'true-crime', + 'War & military' => 'war-military' + ], + ] + ] + ]; + + public function getIcon() + { + return 'https://www.magellantv.com/favicon-32x32.png'; + } + + private function retrieveTags($article) + { + // Retrieve all tags from an article and store in array + $article_tags_list = $article->find('div.articleCategory_article-category-tag__uEAXz > a'); + $tags = []; + foreach ($article_tags_list as $tag) { + array_push($tags, $tag->plaintext); + } + + return $tags; + } + + public function collectData() + { + // Determine URL based on topic + $topic = $this->getInput('topic'); + if ($topic == 'all') { + $url = 'https://www.magellantv.com/articles'; + } else { + $url = sprintf('https://www.magellantv.com/articles/category/%s', $topic); + } + $dom = getSimpleHTMLDOM($url); + + // Check whether items exists + $article_list = $dom->find('div.articlePreview_preview-card__mLMOm'); + if (sizeof($article_list) == 0) { + throw new Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + + // Loop over each article and store article information + foreach ($article_list as $article) { + $article = defaultLinkTo($article, $this->getURI()); + $meta_information = $article->find('div.articlePreview_article-metas__kD1i7', 0); + $title = $article->find('div.articlePreview_article-title___Ci5V > h2 > a', 0); + $tags_list = $this->retrieveTags($article); + + $item = [ + 'title' => $title->plaintext, + 'uri' => $title->href, + 'timestamp' => strtotime($meta_information->find('div.articlePreview_article-date__8Jyfn', 0)->plaintext), + 'author' => $meta_information->find('div.articlePreview_article-author__Ie0_u > span', 1)->plaintext, + 'categories' => $tags_list + ]; + + $this->items[] = $item; + } + } +}