From 082542dabcfd0ac1b68923a438c218023353d4f9 Mon Sep 17 00:00:00 2001 From: Arnav Jain Date: Sun, 3 Nov 2024 18:22:44 +0100 Subject: [PATCH] [TestFaktaBridge] new bridge (#4307) * [TestFaktaBridge] new bridge * [TestFaktaBridge] fix linting errors --- bridges/TestFaktaBridge.php | 100 ++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 bridges/TestFaktaBridge.php diff --git a/bridges/TestFaktaBridge.php b/bridges/TestFaktaBridge.php new file mode 100644 index 00000000..b9a65138 --- /dev/null +++ b/bridges/TestFaktaBridge.php @@ -0,0 +1,100 @@ + 'Jan', + 'Feb' => 'Feb', + 'Mar' => 'Mar', + 'Apr' => 'Apr', + 'Maj' => 'May', + 'Jun' => 'Jun', + 'Jul' => 'Jul', + 'Aug' => 'Aug', + 'Sep' => 'Sep', + 'Okt' => 'Oct', + 'Nov' => 'Nov', + 'Dec' => 'Dec' + ]; + + // Replace Swedish month names with English + $dateString = preg_replace_callback( + '/\b(' . implode('|', array_keys($months)) . ')\b/', + function ($matches) use ($months) { + return $months[$matches[0]]; + }, + $dateString + ); + + // Create DateTime object + $dateValue = DateTime::createFromFormat( + 'd M, Y', + trim($dateString), + new DateTimeZone('Europe/Stockholm') + ); + if ($dateValue) { + $dateValue->setTime(0, 0); // Set time to 00:00 + return $dateValue->getTimestamp(); + } + + return $dateValue ? $dateValue->getTimestamp() : false; + } + + public function collectData() + { + $NEWSURL = self::URI . '/sv'; + $html = getSimpleHTMLDOMCached($NEWSURL, 18000) or + returnServerError('Could not request: ' . $NEWSURL); + + foreach ($html->find('.row-container') as $element) { + // Debug::log($element); + + $title = $element->find('h2', 0)->plaintext; + $category = trim($element->find('.red-label', 0)->plaintext); + $url = self::URI . $element->find('a', 0)->getAttribute('href'); + $figure = $element->find('img', 0); + $preamble = trim($element->find('.text', 0)->plaintext); + + $article_html = getSimpleHTMLDOMCached($url, 18000) or + returnServerError('Could not request: ' . $url); + $article_content = $article_html->find('div.content', 0); + $article_text = $article_html->find('article', 0); + + $requestor = $article_html->find('div.uppdrag', 0)->plaintext; + $author = trim($article_html->find('span.name', 0)->plaintext); + $published = $this->parseSwedishDates( + str_replace( + 'Publicerad: ', + '', + trim($article_html->find('span.created', 0)->plaintext) + ) + ); + + $content = $figure . '
'; + $content .= '' . strtoupper($category) . ' ' . $requestor . '

'; + $content .= '' . $preamble . '

'; + $content .= $article_text; + + $this->items[] = [ + 'uri' => $url, + 'title' => $title, + 'author' => $author, + 'timestamp' => $published, + 'content' => trim($content), + ]; + } + } +}