fix(FeedParser): scrape out content from rss content:encoded (#4178)

* fix(FeedParser): parse content module from rss2

* refactor
This commit is contained in:
Dag 2024-07-31 19:04:07 +02:00 committed by GitHub
parent e55e9b8fac
commit b8a9f34527
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 39 additions and 20 deletions

View File

@ -418,7 +418,16 @@ See `formats/PlaintextFormat.php` for an example.
These commands require that you have installed the dev dependencies in `composer.json`. These commands require that you have installed the dev dependencies in `composer.json`.
Run all tests:
./vendor/bin/phpunit ./vendor/bin/phpunit
Run a single test class:
./vendor/bin/phpunit --filter UrlTest
Run linter:
./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./ ./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./
https://github.com/squizlabs/PHP_CodeSniffer/wiki https://github.com/squizlabs/PHP_CodeSniffer/wiki

View File

@ -112,15 +112,6 @@ class DisplayAction implements ActionInterface
$input = array_diff_key($requestArray, array_fill_keys($remove, '')); $input = array_diff_key($requestArray, array_fill_keys($remove, ''));
$bridge->setInput($input); $bridge->setInput($input);
$bridge->collectData(); $bridge->collectData();
$items = $bridge->getItems();
if (isset($items[0]) && is_array($items[0])) {
$feedItems = [];
foreach ($items as $item) {
$feedItems[] = FeedItem::fromArray($item);
}
$items = $feedItems;
}
$feed = $bridge->getFeed();
} catch (\Exception $e) { } catch (\Exception $e) {
// Probably an exception inside a bridge // Probably an exception inside a bridge
if ($e instanceof HttpException) { if ($e instanceof HttpException) {
@ -154,6 +145,16 @@ class DisplayAction implements ActionInterface
} }
} }
$items = $bridge->getItems();
if (isset($items[0]) && is_array($items[0])) {
$feedItems = [];
foreach ($items as $item) {
$feedItems[] = FeedItem::fromArray($item);
}
$items = $feedItems;
}
$feed = $bridge->getFeed();
$formatFactory = new FormatFactory(); $formatFactory = new FormatFactory();
$format = $formatFactory->create($format); $format = $formatFactory->create($format);

View File

@ -186,21 +186,26 @@ class FeedItem
} }
/** /**
* @param string|object $content The item content as text or simple_html_dom object. * @param string|array|\simple_html_dom|\simple_html_dom_node $content The item content
*/ */
public function setContent($content) public function setContent($content)
{ {
$this->content = null; $this->content = null;
if ( if (
$content instanceof simple_html_dom $content instanceof simple_html_dom
|| $content instanceof simple_html_dom_node || $content instanceof simple_html_dom_node
) { ) {
$content = (string) $content; $content = (string) $content;
} elseif (is_array($content)) {
// Assuming this is the rss2.0 content module
$content = $content['encoded'] ?? '';
} }
if (is_string($content)) { if (is_string($content)) {
$this->content = $content; $this->content = $content;
} else { } else {
Debug::log(sprintf('Feed content must be a string but got %s', gettype($content))); Debug::log(sprintf('Unable to convert feed content to string: %s', gettype($content)));
} }
} }

View File

@ -167,8 +167,9 @@ final class FeedParser
if (isset($namespaces['media'])) { if (isset($namespaces['media'])) {
$media = $feedItem->children($namespaces['media']); $media = $feedItem->children($namespaces['media']);
} }
foreach ($namespaces as $namespaceName => $namespaceUrl) { foreach ($namespaces as $namespaceName => $namespaceUrl) {
if (in_array($namespaceName, ['', 'content', 'media'])) { if (in_array($namespaceName, ['', 'media'])) {
continue; continue;
} }
$item[$namespaceName] = $this->parseModule($feedItem, $namespaceName, $namespaceUrl); $item[$namespaceName] = $this->parseModule($feedItem, $namespaceName, $namespaceUrl);

View File

@ -8,6 +8,13 @@ use PHPUnit\Framework\TestCase;
class FeedParserTest extends TestCase class FeedParserTest extends TestCase
{ {
private \FeedParser $sut;
public function setUp(): void
{
$this->sut = new \FeedParser();
}
public function testRss1() public function testRss1()
{ {
$xml = <<<XML $xml = <<<XML
@ -37,8 +44,7 @@ class FeedParserTest extends TestCase
</rdf:RDF> </rdf:RDF>
XML; XML;
$sut = new \FeedParser(); $feed = $this->sut->parseFeed($xml);
$feed = $sut->parseFeed($xml);
$this->assertSame('hello feed', $feed['title']); $this->assertSame('hello feed', $feed['title']);
$this->assertSame('http://meerkat.oreillynet.com', $feed['uri']); $this->assertSame('http://meerkat.oreillynet.com', $feed['uri']);
@ -74,8 +80,7 @@ class FeedParserTest extends TestCase
</rss> </rss>
XML; XML;
$sut = new \FeedParser(); $feed = $this->sut->parseFeed($xml);
$feed = $sut->parseFeed($xml);
$this->assertSame('hello feed', $feed['title']); $this->assertSame('hello feed', $feed['title']);
$this->assertSame('https://example.com/', $feed['uri']); $this->assertSame('https://example.com/', $feed['uri']);
@ -111,8 +116,7 @@ class FeedParserTest extends TestCase
</feed> </feed>
XML; XML;
$sut = new \FeedParser(); $feed = $this->sut->parseFeed($xml);
$feed = $sut->parseFeed($xml);
$this->assertSame('hello feed', $feed['title']); $this->assertSame('hello feed', $feed['title']);
$this->assertSame('https://example.com/1', $feed['uri']); $this->assertSame('https://example.com/1', $feed['uri']);
@ -151,8 +155,7 @@ class FeedParserTest extends TestCase
</rss> </rss>
XML; XML;
$sut = new \FeedParser(); $feed = $this->sut->parseFeed($xml);
$feed = $sut->parseFeed($xml);
$expected = [ $expected = [
'title' => '', 'title' => '',
'uri' => '', 'uri' => '',