mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-05 00:59:35 +00:00
test: add test for FeedParser (#3754)
This commit is contained in:
parent
5f37c72be0
commit
daef240cd2
@ -30,7 +30,7 @@ class ArsTechnicaBridge extends FeedExpander
|
|||||||
public function collectData()
|
public function collectData()
|
||||||
{
|
{
|
||||||
$url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section');
|
$url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section');
|
||||||
$this->collectExpandableDatas($url);
|
$this->collectExpandableDatas($url, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem(array $item)
|
protected function parseItem(array $item)
|
||||||
|
@ -6,7 +6,7 @@ class UrlebirdBridge extends BridgeAbstract
|
|||||||
const NAME = 'urlebird.com';
|
const NAME = 'urlebird.com';
|
||||||
const URI = 'https://urlebird.com/';
|
const URI = 'https://urlebird.com/';
|
||||||
const DESCRIPTION = 'Bridge for urlebird.com';
|
const DESCRIPTION = 'Bridge for urlebird.com';
|
||||||
const CACHE_TIMEOUT = 10;
|
const CACHE_TIMEOUT = 60 * 5;
|
||||||
const PARAMETERS = [
|
const PARAMETERS = [
|
||||||
[
|
[
|
||||||
'query' => [
|
'query' => [
|
||||||
@ -21,50 +21,70 @@ class UrlebirdBridge extends BridgeAbstract
|
|||||||
|
|
||||||
private $title;
|
private $title;
|
||||||
|
|
||||||
private function fixURI($uri)
|
|
||||||
{
|
|
||||||
$path = parse_url($uri, PHP_URL_PATH);
|
|
||||||
$encoded_path = array_map('urlencode', explode('/', $path));
|
|
||||||
return str_replace($path, implode('/', $encoded_path), $uri);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function collectData()
|
public function collectData()
|
||||||
{
|
{
|
||||||
switch ($this->getInput('query')[0]) {
|
switch ($this->getInput('query')[0]) {
|
||||||
default:
|
|
||||||
returnServerError('Please, enter valid username or hashtag!');
|
|
||||||
break;
|
|
||||||
case '@':
|
case '@':
|
||||||
$url = 'https://urlebird.com/user/' . substr($this->getInput('query'), 1) . '/';
|
$url = 'https://urlebird.com/user/' . substr($this->getInput('query'), 1) . '/';
|
||||||
break;
|
break;
|
||||||
case '#':
|
case '#':
|
||||||
$url = 'https://urlebird.com/hash/' . substr($this->getInput('query'), 1) . '/';
|
$url = 'https://urlebird.com/hash/' . substr($this->getInput('query'), 1) . '/';
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
returnServerError('Please, enter valid username or hashtag!');
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = getSimpleHTMLDOM($url);
|
$html = getSimpleHTMLDOM($url);
|
||||||
|
$limit = 10;
|
||||||
|
|
||||||
$this->title = $html->find('title', 0)->innertext;
|
$this->title = $html->find('title', 0)->innertext;
|
||||||
$articles = $html->find('div.thumb');
|
$articles = $html->find('div.thumb');
|
||||||
|
$articles = array_slice($articles, 0, $limit);
|
||||||
foreach ($articles as $article) {
|
foreach ($articles as $article) {
|
||||||
$item = [];
|
$item = [];
|
||||||
$item['uri'] = $this->fixURI($article->find('a', 2)->href);
|
$itemUrl = $article->find('a', 2)->href;
|
||||||
$article_content = getSimpleHTMLDOM($item['uri']);
|
$item['uri'] = $this->encodePathSegments($itemUrl);
|
||||||
$item['author'] = $article->find('img', 0)->alt . ' (' .
|
|
||||||
$article_content->find('a.user-video', 1)->innertext . ')';
|
$dom = getSimpleHTMLDOM($item['uri']);
|
||||||
$item['title'] = $article_content->find('title', 0)->innertext;
|
$videoDiv = $dom->find('div.video', 0);
|
||||||
$item['enclosures'][] = $article_content->find('video', 0)->poster;
|
|
||||||
$video = $article_content->find('video', 0);
|
// timestamp
|
||||||
|
$timestampH6 = $videoDiv->find('h6', 0);
|
||||||
|
$datetimeString = str_replace('Posted ', '', $timestampH6->plaintext);
|
||||||
|
$item['timestamp'] = $datetimeString;
|
||||||
|
|
||||||
|
$innertext = $dom->find('a.user-video', 1)->innertext;
|
||||||
|
$alt = $article->find('img', 0)->alt;
|
||||||
|
$item['author'] = $alt . ' (' . $innertext . ')';
|
||||||
|
|
||||||
|
$item['title'] = $dom->find('title', 0)->innertext;
|
||||||
|
$item['enclosures'][] = $dom->find('video', 0)->poster;
|
||||||
|
|
||||||
|
$video = $dom->find('video', 0);
|
||||||
$video->autoplay = null;
|
$video->autoplay = null;
|
||||||
|
|
||||||
$item['content'] = $video->outertext . '<br>' .
|
$item['content'] = $video->outertext . '<br>' .
|
||||||
$article_content->find('div.music', 0) . '<br>' .
|
$dom->find('div.music', 0) . '<br>' .
|
||||||
$article_content->find('div.info2', 0)->innertext .
|
$dom->find('div.info2', 0)->innertext .
|
||||||
'<br><br><a href="' . $article_content->find('video', 0)->src .
|
'<br><br><a href="' . $dom->find('video', 0)->src .
|
||||||
'">Direct video link</a><br><br><a href="' . $item['uri'] .
|
'">Direct video link</a><br><br><a href="' . $item['uri'] .
|
||||||
'">Post link</a><br><br>';
|
'">Post link</a><br><br>';
|
||||||
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function encodePathSegments($url)
|
||||||
|
{
|
||||||
|
$path = parse_url($url, PHP_URL_PATH);
|
||||||
|
$pathSegments = explode('/', $path);
|
||||||
|
$encodedPathSegments = array_map('urlencode', $pathSegments);
|
||||||
|
$encodedPath = implode('/', $encodedPathSegments);
|
||||||
|
$result = str_replace($path, $encodedPath, $url);
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
public function getName()
|
public function getName()
|
||||||
{
|
{
|
||||||
return $this->title ?: parent::getName();
|
return $this->title ?: parent::getName();
|
||||||
|
@ -142,6 +142,7 @@ final class FeedParser
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isset($feedItem->guid)) {
|
if (isset($feedItem->guid)) {
|
||||||
|
// Pluck out a url from guid
|
||||||
foreach ($feedItem->guid->attributes() as $attribute => $value) {
|
foreach ($feedItem->guid->attributes() as $attribute => $value) {
|
||||||
if (
|
if (
|
||||||
$attribute === 'isPermaLink'
|
$attribute === 'isPermaLink'
|
||||||
@ -207,9 +208,9 @@ final class FeedParser
|
|||||||
'content' => null,
|
'content' => null,
|
||||||
'timestamp' => null,
|
'timestamp' => null,
|
||||||
'author' => null,
|
'author' => null,
|
||||||
'uid' => null,
|
//'uid' => null,
|
||||||
'categories' => [],
|
//'categories' => [],
|
||||||
'enclosures' => [],
|
//'enclosures' => [],
|
||||||
];
|
];
|
||||||
if (isset($feedItem->link)) {
|
if (isset($feedItem->link)) {
|
||||||
// todo: trim uri
|
// todo: trim uri
|
||||||
|
128
tests/FeedParserTest.php
Normal file
128
tests/FeedParserTest.php
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace RssBridge\Tests;
|
||||||
|
|
||||||
|
use PHPUnit\Framework\TestCase;
|
||||||
|
|
||||||
|
class FeedParserTest extends TestCase
|
||||||
|
{
|
||||||
|
public function testRss1()
|
||||||
|
{
|
||||||
|
$xml = <<<XML
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:cc="http://creativecommons.org/ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
>
|
||||||
|
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||||
|
<title>hello feed</title>
|
||||||
|
<link>http://meerkat.oreillynet.com</link>
|
||||||
|
<description>Meerkat: An Open Wire Service</description>
|
||||||
|
|
||||||
|
<items>
|
||||||
|
<rdf:Seq>
|
||||||
|
<rdf:li resource="http://c.moreover.com/click/here.pl?r123" />
|
||||||
|
</rdf:Seq>
|
||||||
|
</items>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||||
|
<title>XML: A Disruptive Technology</title>
|
||||||
|
<link>http://c.moreover.com/click/here.pl?r123</link>
|
||||||
|
<description>desc</description>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>
|
||||||
|
XML;
|
||||||
|
|
||||||
|
$sut = new \FeedParser();
|
||||||
|
$feed = $sut->parseFeed($xml);
|
||||||
|
|
||||||
|
$this->assertSame('hello feed', $feed['title']);
|
||||||
|
$this->assertSame('http://meerkat.oreillynet.com', $feed['uri']);
|
||||||
|
$this->assertSame(null, $feed['icon']);
|
||||||
|
|
||||||
|
$item = $feed['items'][0];
|
||||||
|
$this->assertSame('XML: A Disruptive Technology', $item['title']);
|
||||||
|
$this->assertSame('http://c.moreover.com/click/here.pl?r123', $item['uri']);
|
||||||
|
$this->assertSame('desc', $item['content']);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testRss2()
|
||||||
|
{
|
||||||
|
$xml = <<<XML
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<rss version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>hello feed</title>
|
||||||
|
<link>https://example.com/</link>
|
||||||
|
<image>
|
||||||
|
<url>https://example.com/2.ico</url>
|
||||||
|
</image>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>hello world</title>
|
||||||
|
<link>https://example.com/1</link>
|
||||||
|
<description>desc2</description>
|
||||||
|
<pubDate>Tue, 26 Apr 2022 00:00:00 +0200</pubDate>
|
||||||
|
<author>root</author>
|
||||||
|
<enclosure url="https://example.com/1.png"></enclosure>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
XML;
|
||||||
|
|
||||||
|
$sut = new \FeedParser();
|
||||||
|
$feed = $sut->parseFeed($xml);
|
||||||
|
|
||||||
|
$this->assertSame('hello feed', $feed['title']);
|
||||||
|
$this->assertSame('https://example.com/', $feed['uri']);
|
||||||
|
$this->assertSame('https://example.com/2.ico', $feed['icon']);
|
||||||
|
|
||||||
|
$item = $feed['items'][0];
|
||||||
|
$this->assertSame('hello world', $item['title']);
|
||||||
|
$this->assertSame('https://example.com/1', $item['uri']);
|
||||||
|
$this->assertSame(1650924000, $item['timestamp']);
|
||||||
|
$this->assertSame('root', $item['author']);
|
||||||
|
$this->assertSame('desc2', $item['content']);
|
||||||
|
$this->assertSame(['https://example.com/1.png'], $item['enclosures']);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testAtom()
|
||||||
|
{
|
||||||
|
$xml = <<<XML
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
|
||||||
|
<title>hello feed</title>
|
||||||
|
<link href="https://example.com/1"></link>
|
||||||
|
<icon>https://example.com/2.ico</icon>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<title>hello world</title>
|
||||||
|
<link href="https://example.com/1"></link>
|
||||||
|
<author>
|
||||||
|
<name>root</name>
|
||||||
|
</author>
|
||||||
|
<content type="html">html</content>
|
||||||
|
<updated>2015-11-05T14:38:49+01:00</updated>
|
||||||
|
</entry>
|
||||||
|
</feed>
|
||||||
|
XML;
|
||||||
|
|
||||||
|
$sut = new \FeedParser();
|
||||||
|
$feed = $sut->parseFeed($xml);
|
||||||
|
|
||||||
|
$this->assertSame('hello feed', $feed['title']);
|
||||||
|
$this->assertSame('https://example.com/1', $feed['uri']);
|
||||||
|
$this->assertSame('https://example.com/2.ico', $feed['icon']);
|
||||||
|
|
||||||
|
$item = $feed['items'][0];
|
||||||
|
$this->assertSame('hello world', $item['title']);
|
||||||
|
$this->assertSame('https://example.com/1', $item['uri']);
|
||||||
|
$this->assertSame(1446730729, $item['timestamp']);
|
||||||
|
$this->assertSame('root', $item['author']);
|
||||||
|
$this->assertSame('html', $item['content']);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user