refactor(FeedParser): (#3928)

This commit is contained in:
Dag 2024-01-29 21:51:06 +01:00 committed by GitHub
parent cfe3dcfe6d
commit c4fceab7b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 88 additions and 41 deletions

View File

@ -80,7 +80,7 @@ abstract class BridgeAbstract
} }
/** /**
* The description is currently not used in feed production * The description is only used in bridge card rendering on frontpage
*/ */
public function getDescription() public function getDescription()
{ {

View File

@ -32,6 +32,7 @@ abstract class FeedExpander extends BridgeAbstract
$feedParser = new FeedParser(); $feedParser = new FeedParser();
$this->feed = $feedParser->parseFeed($xmlString); $this->feed = $feedParser->parseFeed($xmlString);
$items = array_slice($this->feed['items'], 0, $maxItems); $items = array_slice($this->feed['items'], 0, $maxItems);
// todo: extract parse logic out from FeedParser
foreach ($items as $item) { foreach ($items as $item) {
// Give bridges a chance to modify the item // Give bridges a chance to modify the item
$item = $this->parseItem($item); $item = $this->parseItem($item);

View File

@ -36,7 +36,7 @@ final class FeedParser
$channel = $xml->channel[0]; $channel = $xml->channel[0];
$feed['title'] = trim((string)$channel->title); $feed['title'] = trim((string)$channel->title);
$feed['uri'] = trim((string)$channel->link); $feed['uri'] = trim((string)$channel->link);
if (!empty($channel->image)) { if (isset($channel->image->url)) {
$feed['icon'] = trim((string)$channel->image->url); $feed['icon'] = trim((string)$channel->image->url);
} }
foreach ($xml->item as $item) { foreach ($xml->item as $item) {
@ -47,7 +47,7 @@ final class FeedParser
$channel = $xml->channel[0]; $channel = $xml->channel[0];
$feed['title'] = trim((string)$channel->title); $feed['title'] = trim((string)$channel->title);
$feed['uri'] = trim((string)$channel->link); $feed['uri'] = trim((string)$channel->link);
if (!empty($channel->image)) { if (isset($channel->image->url)) {
$feed['icon'] = trim((string)$channel->image->url); $feed['icon'] = trim((string)$channel->image->url);
} }
foreach ($channel->item as $item) { foreach ($channel->item as $item) {
@ -70,10 +70,10 @@ final class FeedParser
} }
} }
} }
if (!empty($xml->icon)) { if (isset($xml->icon)) {
$feed['icon'] = (string)$xml->icon; $feed['icon'] = (string) $xml->icon;
} elseif (!empty($xml->logo)) { } elseif (isset($xml->logo)) {
$feed['icon'] = (string)$xml->logo; $feed['icon'] = (string) $xml->logo;
} }
foreach ($xml->entry as $item) { foreach ($xml->entry as $item) {
$feed['items'][] = $this->parseAtomItem($item); $feed['items'][] = $this->parseAtomItem($item);
@ -171,11 +171,7 @@ final class FeedParser
if (in_array($namespaceName, ['', 'content', 'media'])) { if (in_array($namespaceName, ['', 'content', 'media'])) {
continue; continue;
} }
$module = $feedItem->children($namespaceUrl); $item[$namespaceName] = $this->parseModule($feedItem, $namespaceName, $namespaceUrl);
$item[$namespaceName] = [];
foreach ($module as $moduleKey => $moduleValue) {
$item[$namespaceName][$moduleKey] = (string) $moduleValue;
}
} }
if (isset($namespaces['itunes'])) { if (isset($namespaces['itunes'])) {
$enclosure = $feedItem->enclosure; $enclosure = $feedItem->enclosure;
@ -185,43 +181,27 @@ final class FeedParser
'type' => (string) $enclosure['type'], 'type' => (string) $enclosure['type'],
]; ];
} }
if (isset($feedItem->guid)) { if (!$item['uri']) {
// Pluck out a url from guid // Let's use guid as uri if it's a permalink
foreach ($feedItem->guid->attributes() as $attribute => $value) { if (isset($feedItem->guid)) {
if ( foreach ($feedItem->guid->attributes() as $attribute => $value) {
$attribute === 'isPermaLink' if ($attribute === 'isPermaLink' && ($value === 'true' || (filter_var($feedItem->guid, FILTER_VALIDATE_URL)))) {
&& ( $item['uri'] = (string) $feedItem->guid;
$value === 'true' || ( break;
filter_var($feedItem->guid, FILTER_VALIDATE_URL) }
&& (empty($item['uri']) || !filter_var($item['uri'], FILTER_VALIDATE_URL))
)
)
) {
$item['uri'] = (string)$feedItem->guid;
break;
} }
} }
} }
if (isset($feedItem->pubDate)) { $item['timestamp'] = $feedItem->pubDate ?? $dc->date ?? '';
$item['timestamp'] = strtotime((string)$feedItem->pubDate); $item['timestamp'] = strtotime((string) $item['timestamp']);
} elseif (isset($dc->date)) {
$item['timestamp'] = strtotime((string)$dc->date);
}
if (isset($feedItem->author)) { $item['author'] = $feedItem->author ?? $feedItem->creator ?? $dc->creator ?? $media->credit ?? '';
$item['author'] = (string)$feedItem->author; $item['author'] = (string) $item['author'];
} elseif (isset($feedItem->creator)) {
$item['author'] = (string)$feedItem->creator;
} elseif (isset($dc->creator)) {
$item['author'] = (string)$dc->creator;
} elseif (isset($media->credit)) {
$item['author'] = (string)$media->credit;
}
if (isset($feedItem->enclosure) && !empty($feedItem->enclosure['url'])) { if (isset($feedItem->enclosure) && !empty($feedItem->enclosure['url'])) {
$item['enclosures'] = [ $item['enclosures'] = [
(string)$feedItem->enclosure['url'], (string) $feedItem->enclosure['url'],
]; ];
} }
return $item; return $item;
@ -261,4 +241,15 @@ final class FeedParser
} }
return $item; return $item;
} }
private function parseModule(\SimpleXMLElement $element, string $namespaceName, string $namespaceUrl): array
{
$result = [];
$module = $element->children($namespaceUrl);
foreach ($module as $name => $value) {
// todo: add custom parsing if it's something other than a string
$result[$name] = (string) $value;
}
return $result;
}
} }

View File

@ -125,4 +125,59 @@ class FeedParserTest extends TestCase
$this->assertSame('root', $item['author']); $this->assertSame('root', $item['author']);
$this->assertSame('html', $item['content']); $this->assertSame('html', $item['content']);
} }
public function testAppleItunesModule()
{
$xml = <<<XML
<?xml version="1.0" encoding="UTF-8"?>
<rss
version="2.0"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:cc="http://web.resource.org/cc/"
xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"
xmlns:media="http://search.yahoo.com/mrss/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:podcast="https://podcastindex.org/namespace/1.0"
xmlns:googleplay="http://www.google.com/schemas/play-podcasts/1.0"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
>
<channel>
<item>
<itunes:duration>30:05</itunes:duration>
<enclosure length="48123248" type="audio/mpeg" url="https://example.com/1.mp3" />
</item>
</channel>
</rss>
XML;
$sut = new \FeedParser();
$feed = $sut->parseFeed($xml);
$expected = [
'title' => '',
'uri' => '',
'icon' => '',
'items' => [
[
'uri' => '',
'title' => '',
'content' => '',
'timestamp' => '',
'author' => '',
'itunes' => [
'duration' => '30:05',
],
'enclosure' => [
'url' => 'https://example.com/1.mp3',
'length' => '48123248',
'type' => 'audio/mpeg',
],
'enclosures' => [
'https://example.com/1.mp3',
],
]
],
];
$this->assertEquals($expected, $feed);
}
} }