fixes extracting article images, article date/time and article author and item id (#3645)

This commit is contained in:
Niehztog 2023-09-03 00:22:48 +02:00 committed by GitHub
parent 64000a2526
commit 92b2bc5e11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -6,7 +6,7 @@ class NiusBridge extends XPathAbstract
const URI = 'https://www.nius.de/news';
const DESCRIPTION = 'Die Stimme der Mehrheit';
const MAINTAINER = 'Niehztog';
//const PARAMETERS = array();
const CACHE_TIMEOUT = 3600;
const FEED_SOURCE_URL = 'https://www.nius.de/news';
@ -15,18 +15,30 @@ class NiusBridge extends XPathAbstract
const XPATH_EXPRESSION_ITEM_CONTENT = './/h2[@class="title"]//node()';
const XPATH_EXPRESSION_ITEM_URI = './/a[1]/@href';
const XPATH_EXPR_AUTHOR_PART1 = 'normalize-space(.//span[@class="author"]/text()[1])';
const XPATH_EXPR_AUTHOR_PART2 = 'normalize-space(.//span[@class="author"]/text()[2])';
const XPATH_EXPRESSION_ITEM_AUTHOR = 'substring-after(concat(' . self::XPATH_EXPR_AUTHOR_PART1 . ', " ", ' . self::XPATH_EXPR_AUTHOR_PART2 . '), " ")';
const XPATH_EXPRESSION_ITEM_AUTHOR = 'normalize-space(.//span[@class="author"]/text()[3])';
//const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/td[3]';
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/img[1]/@src';
const XPATH_EXPRESSION_ITEM_TIMESTAMP = 'normalize-space(.//span[@class="author"]/text()[1])';
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/img[@sizes]/@src';
const XPATH_EXPRESSION_ITEM_CATEGORIES = './/div[@class="subtitle"]/text()';
const SETTING_FIX_ENCODING = false;
protected function formatItemTimestamp($value)
{
return DateTimeImmutable::createFromFormat(
false !== strpos($value, ' Uhr') ? 'H:i \U\h\r' : 'd.m.y',
$value,
new DateTimeZone('Europe/Berlin')
)->format('U');
}
protected function cleanMediaUrl($mediaUrl)
{
$result = preg_match('~https:\/\/www\.nius\.de\/_next\/image\?url=(.*)\?~', $mediaUrl, $matches);
return $result ? $matches[1] : $mediaUrl;
}
protected function generateItemId(FeedItem $item)
{
return substr($item->getURI(), strrpos($item->getURI(), '/') + 1);
}
}