From daef240cd2fac0473113da456e11172d7d24c7a4 Mon Sep 17 00:00:00 2001 From: Dag Date: Fri, 13 Oct 2023 23:14:08 +0200 Subject: [PATCH] test: add test for FeedParser (#3754) --- bridges/ArsTechnicaBridge.php | 2 +- bridges/UrlebirdBridge.php | 62 ++++++++++------ lib/FeedParser.php | 7 +- tests/FeedParserTest.php | 128 ++++++++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 25 deletions(-) create mode 100644 tests/FeedParserTest.php diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php index d15cfb4f..5b3283b5 100644 --- a/bridges/ArsTechnicaBridge.php +++ b/bridges/ArsTechnicaBridge.php @@ -30,7 +30,7 @@ class ArsTechnicaBridge extends FeedExpander public function collectData() { $url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section'); - $this->collectExpandableDatas($url); + $this->collectExpandableDatas($url, 10); } protected function parseItem(array $item) diff --git a/bridges/UrlebirdBridge.php b/bridges/UrlebirdBridge.php index 429e93f5..38f73249 100644 --- a/bridges/UrlebirdBridge.php +++ b/bridges/UrlebirdBridge.php @@ -6,7 +6,7 @@ class UrlebirdBridge extends BridgeAbstract const NAME = 'urlebird.com'; const URI = 'https://urlebird.com/'; const DESCRIPTION = 'Bridge for urlebird.com'; - const CACHE_TIMEOUT = 10; + const CACHE_TIMEOUT = 60 * 5; const PARAMETERS = [ [ 'query' => [ @@ -21,50 +21,70 @@ class UrlebirdBridge extends BridgeAbstract private $title; - private function fixURI($uri) - { - $path = parse_url($uri, PHP_URL_PATH); - $encoded_path = array_map('urlencode', explode('/', $path)); - return str_replace($path, implode('/', $encoded_path), $uri); - } - public function collectData() { switch ($this->getInput('query')[0]) { - default: - returnServerError('Please, enter valid username or hashtag!'); - break; case '@': $url = 'https://urlebird.com/user/' . substr($this->getInput('query'), 1) . '/'; break; case '#': $url = 'https://urlebird.com/hash/' . substr($this->getInput('query'), 1) . '/'; break; + default: + returnServerError('Please, enter valid username or hashtag!'); + break; } $html = getSimpleHTMLDOM($url); + $limit = 10; + $this->title = $html->find('title', 0)->innertext; $articles = $html->find('div.thumb'); + $articles = array_slice($articles, 0, $limit); foreach ($articles as $article) { $item = []; - $item['uri'] = $this->fixURI($article->find('a', 2)->href); - $article_content = getSimpleHTMLDOM($item['uri']); - $item['author'] = $article->find('img', 0)->alt . ' (' . - $article_content->find('a.user-video', 1)->innertext . ')'; - $item['title'] = $article_content->find('title', 0)->innertext; - $item['enclosures'][] = $article_content->find('video', 0)->poster; - $video = $article_content->find('video', 0); + $itemUrl = $article->find('a', 2)->href; + $item['uri'] = $this->encodePathSegments($itemUrl); + + $dom = getSimpleHTMLDOM($item['uri']); + $videoDiv = $dom->find('div.video', 0); + + // timestamp + $timestampH6 = $videoDiv->find('h6', 0); + $datetimeString = str_replace('Posted ', '', $timestampH6->plaintext); + $item['timestamp'] = $datetimeString; + + $innertext = $dom->find('a.user-video', 1)->innertext; + $alt = $article->find('img', 0)->alt; + $item['author'] = $alt . ' (' . $innertext . ')'; + + $item['title'] = $dom->find('title', 0)->innertext; + $item['enclosures'][] = $dom->find('video', 0)->poster; + + $video = $dom->find('video', 0); $video->autoplay = null; + $item['content'] = $video->outertext . '
' . - $article_content->find('div.music', 0) . '
' . - $article_content->find('div.info2', 0)->innertext . - '

find('video', 0)->src . '">Direct video link

Post link

'; + $this->items[] = $item; } } + private function encodePathSegments($url) + { + $path = parse_url($url, PHP_URL_PATH); + $pathSegments = explode('/', $path); + $encodedPathSegments = array_map('urlencode', $pathSegments); + $encodedPath = implode('/', $encodedPathSegments); + $result = str_replace($path, $encodedPath, $url); + return $result; + } + public function getName() { return $this->title ?: parent::getName(); diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 64a3587d..1393f5f5 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -142,6 +142,7 @@ final class FeedParser } if (isset($feedItem->guid)) { + // Pluck out a url from guid foreach ($feedItem->guid->attributes() as $attribute => $value) { if ( $attribute === 'isPermaLink' @@ -207,9 +208,9 @@ final class FeedParser 'content' => null, 'timestamp' => null, 'author' => null, - 'uid' => null, - 'categories' => [], - 'enclosures' => [], + //'uid' => null, + //'categories' => [], + //'enclosures' => [], ]; if (isset($feedItem->link)) { // todo: trim uri diff --git a/tests/FeedParserTest.php b/tests/FeedParserTest.php new file mode 100644 index 00000000..acd93e52 --- /dev/null +++ b/tests/FeedParserTest.php @@ -0,0 +1,128 @@ + + + + hello feed + http://meerkat.oreillynet.com + Meerkat: An Open Wire Service + + + + + + + + + + XML: A Disruptive Technology + http://c.moreover.com/click/here.pl?r123 + desc + + + XML; + + $sut = new \FeedParser(); + $feed = $sut->parseFeed($xml); + + $this->assertSame('hello feed', $feed['title']); + $this->assertSame('http://meerkat.oreillynet.com', $feed['uri']); + $this->assertSame(null, $feed['icon']); + + $item = $feed['items'][0]; + $this->assertSame('XML: A Disruptive Technology', $item['title']); + $this->assertSame('http://c.moreover.com/click/here.pl?r123', $item['uri']); + $this->assertSame('desc', $item['content']); + } + + public function testRss2() + { + $xml = << + + + hello feed + https://example.com/ + + https://example.com/2.ico + + + + hello world + https://example.com/1 + desc2 + Tue, 26 Apr 2022 00:00:00 +0200 + root + + + + + XML; + + $sut = new \FeedParser(); + $feed = $sut->parseFeed($xml); + + $this->assertSame('hello feed', $feed['title']); + $this->assertSame('https://example.com/', $feed['uri']); + $this->assertSame('https://example.com/2.ico', $feed['icon']); + + $item = $feed['items'][0]; + $this->assertSame('hello world', $item['title']); + $this->assertSame('https://example.com/1', $item['uri']); + $this->assertSame(1650924000, $item['timestamp']); + $this->assertSame('root', $item['author']); + $this->assertSame('desc2', $item['content']); + $this->assertSame(['https://example.com/1.png'], $item['enclosures']); + } + + public function testAtom() + { + $xml = << + + hello feed + + https://example.com/2.ico + + + hello world + + + root + + html + 2015-11-05T14:38:49+01:00 + + + XML; + + $sut = new \FeedParser(); + $feed = $sut->parseFeed($xml); + + $this->assertSame('hello feed', $feed['title']); + $this->assertSame('https://example.com/1', $feed['uri']); + $this->assertSame('https://example.com/2.ico', $feed['icon']); + + $item = $feed['items'][0]; + $this->assertSame('hello world', $item['title']); + $this->assertSame('https://example.com/1', $item['uri']); + $this->assertSame(1446730729, $item['timestamp']); + $this->assertSame('root', $item['author']); + $this->assertSame('html', $item['content']); + } +}