Merge remote-tracking branch 'upstream/master'

This commit is contained in:
sysadminstory 2018-02-11 23:56:27 +01:00
commit 5eef26423c
11 changed files with 237 additions and 67 deletions

View File

@ -2,20 +2,6 @@ dist: trusty
sudo: false
language: php
before_install:
# Circumvent a bug in current Travis CI builds using Ubuntu Trusty, where the
# include_path is wrong.
#
# Default is:
# - include_path='.:/home/travis/.phpenv/versions/5.6.31/share/pear'
#
# Should be:
# - include_path='.:/home/travis/.phpenv/versions/5.6.31/lib/php/pear'
#
# This applies to all builds except hhvm and nightly. Once the distro is fixed
# the following line can be removed
- if [[ ${TRAVIS_PHP_VERSION:0:1} == "5" || ${TRAVIS_PHP_VERSION:0:1} == "7" ]]; then echo "include_path='.:/home/travis/.phpenv/versions/$(phpenv version-name)/lib/php/pear'" >> ~/.phpenv/versions/$(phpenv version-name)/etc/php.ini; fi
install:
- pear channel-update pear.php.net
- pear install PHP_CodeSniffer

View File

@ -7,23 +7,23 @@ rss-bridge is a PHP project capable of generating ATOM feeds for websites which
Supported sites/pages (main)
===
* `FlickrExplore` : [Latest interesting images](http://www.flickr.com/explore) from Flickr
* `GoogleSearch` : Most recent results from Google Search
* `GooglePlus` : Most recent posts of user timeline
* `Twitter` : Return keyword/hashtag search or user timeline
* `Identi.ca` : Identica user timeline (Should be compatible with other Pump.io instances)
* `YouTube` : YouTube user channel, playlist or search
* `Cryptome` : Returns the most recent documents from [Cryptome.org](http://cryptome.org/)
* `DansTonChat`: Most recent quotes from [danstonchat.com](http://danstonchat.com/)
* `DuckDuckGo`: Most recent results from [DuckDuckGo.com](https://duckduckgo.com/)
* `Instagram`: Most recent photos from an Instagram user
* `OpenClassrooms`: Lastest tutorials from [fr.openclassrooms.com](http://fr.openclassrooms.com/)
* `Pinterest`: Most recent photos from user or search
* `ScmbBridge`: Newest stories from [secouchermoinsbete.fr](http://secouchermoinsbete.fr/)
* `Wikipedia`: highlighted articles from [Wikipedia](https://wikipedia.org/) in English, German, French or Esperanto
* `Bandcamp` : Returns last release from [bandcamp](https://bandcamp.com/) for a tag
* `ThePirateBay` : Returns the newest indexed torrents from [The Pirate Bay](https://thepiratebay.se/) with keywords
* `Facebook` : Returns the latest posts on a page or profile on [Facebook](https://facebook.com/)
* `Bandcamp` : Returns last release from [bandcamp](https://bandcamp.com/) for a tag
* `Cryptome` : Returns the most recent documents from [Cryptome.org](http://cryptome.org/)
* `DansTonChat`: Most recent quotes from [danstonchat.com](http://danstonchat.com/)
* `DuckDuckGo`: Most recent results from [DuckDuckGo.com](https://duckduckgo.com/)
* `Facebook` : Returns the latest posts on a page or profile on [Facebook](https://facebook.com/)
* `FlickrExplore` : [Latest interesting images](http://www.flickr.com/explore) from Flickr
* `GooglePlus` : Most recent posts of user timeline
* `GoogleSearch` : Most recent results from Google Search
* `Identi.ca` : Identica user timeline (Should be compatible with other Pump.io instances)
* `Instagram`: Most recent photos from an Instagram user
* `OpenClassrooms`: Lastest tutorials from [fr.openclassrooms.com](http://fr.openclassrooms.com/)
* `Pinterest`: Most recent photos from user or search
* `ScmbBridge`: Newest stories from [secouchermoinsbete.fr](http://secouchermoinsbete.fr/)
* `ThePirateBay` : Returns the newest indexed torrents from [The Pirate Bay](https://thepiratebay.se/) with keywords
* `Twitter` : Return keyword/hashtag search or user timeline
* `Wikipedia`: highlighted articles from [Wikipedia](https://wikipedia.org/) in English, German, French or Esperanto
* `YouTube` : YouTube user channel, playlist or search
Plus [many other bridges](bridges/) to enable, thanks to the community
@ -31,11 +31,11 @@ Output format
===
Output format can take several forms:
* `Atom` : ATOM Feed, for use in RSS/Feed readers
* `Mrss` : MRSS Feed, for use in RSS/Feed readers
* `Json` : Json, for consumption by other applications.
* `Html` : Simple html page.
* `Plaintext` : raw text (php object, as returned by print_r)
* `Atom` : ATOM Feed, for use in RSS/Feed readers
* `Html` : Simple html page.
* `Json` : Json, for consumption by other applications.
* `Mrss` : MRSS Feed, for use in RSS/Feed readers
* `Plaintext` : raw text (php object, as returned by print_r)
Screenshot
===

View File

@ -0,0 +1,65 @@
<?php
class BloombergBridge extends BridgeAbstract
{
const NAME = 'Bloomberg';
const URI = 'https://www.bloomberg.com/';
const DESCRIPTION = 'Trending stories from Bloomberg';
const MAINTAINER = 'mdemoss';
const PARAMETERS = array(
'Trending Stories' => array(),
'From Search' => array(
'q' => array(
'name' => 'Keyword',
'required' => true
)
)
);
public function getName()
{
switch($this->queriedContext) {
case 'Trending Stories':
return self::NAME . ' Trending Stories';
case 'From Search':
if (!is_null($this->getInput('q'))) {
return self::NAME . ' Search : ' . $this->getInput('q');
}
break;
}
return parent::getName();
}
public function collectData()
{
switch($this->queriedContext) {
case 'Trending Stories': // Get list of top new <article>s from the front page.
$html = getSimpleHTMLDOMCached($this->getURI(), 300);
$stories = $html->find('ul.top-news-v3__stories article.top-news-v3-story');
break;
case 'From Search': // Get list of <article> elements from search.
$html = getSimpleHTMLDOMCached(
$this->getURI() .
'search?sort=time:desc&page=1&query=' .
urlencode($this->getInput('q')), 300
);
$stories = $html->find('div.search-result-items article.search-result-story');
break;
}
foreach ($stories as $element) {
$item['uri'] = $element->find('h1 a', 0)->href;
if (preg_match('#^https://#i', $item['uri']) !== 1) {
$item['uri'] = $this->getURI() . $item['uri'];
}
$articleHtml = getSimpleHTMLDOMCached($item['uri']);
if (!$articleHtml) {
continue;
}
$item['title'] = $element->find('h1 a', 0)->plaintext;
$item['timestamp'] = strtotime($articleHtml->find('meta[name=iso-8601-publish-date],meta[name=date]', 0)->content);
$item['content'] = $articleHtml->find('meta[name=description]', 0)->content;
$this->items[] = $item;
}
}
}

View File

@ -46,7 +46,7 @@ class FacebookBridge extends BridgeAbstract {
if(is_array($matches) && count($matches) > 1) {
$link = $matches[1];
if(strpos($link, '/') === 0)
$link = self::URI . $link . '"';
$link = self::URI . $link;
if(strpos($link, 'facebook.com/l.php?u=') !== false)
$link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
return ' href="' . $link . '"';

View File

@ -42,10 +42,10 @@ class LegifranceJOBridge extends BridgeAbstract {
$html = getSimpleHTMLDOM(self::URI)
or $this->returnServer('Unable to download ' . self::URI);
$this->author = trim($html->find('h2.title', 0)->plaintext);
$this->author = trim($html->find('h2.titleJO', 0)->plaintext);
$uri = $html->find('h2.titleELI', 0)->plaintext;
$this->uri = trim(substr($uri, strpos($uri, 'https')));
$this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/')));
$this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/'), -5));
foreach($html->find('h3') as $section) {
$subsections = $section->nextSibling()->find('h4');

23
bridges/PcGamerBridge.php Normal file
View File

@ -0,0 +1,23 @@
<?php
class PcGamerBridge extends BridgeAbstract
{
const NAME = 'PC Gamer';
const URI = 'https://www.pcgamer.com/';
const DESCRIPTION = 'PC Gamer Most Read Stories';
const MAINTAINER = 'mdemoss';
public function collectData()
{
$html = getSimpleHTMLDOMCached($this->getURI(), 300);
$stories = $html->find('div#popularcontent li.most-popular-item');
foreach ($stories as $element) {
$item['uri'] = $element->find('a', 0)->href;
$articleHtml = getSimpleHTMLDOMCached($item['uri']);
$item['title'] = $element->find('h4 a', 0)->plaintext;
$item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content);
$item['content'] = $articleHtml->find('meta[name=description]', 0)->content;
$item['author'] = $articleHtml->find('a[itemprop=author]', 0)->plaintext;
$this->items[] = $item;
}
}
}

38
bridges/TebeoBridge.php Normal file
View File

@ -0,0 +1,38 @@
<?php
class TebeoBridge extends FeedExpander {
const NAME = 'Tébéo Bridge';
const URI = 'http://www.tebeo.bzh/';
const CACHE_TIMEOUT = 21600; //6h
const DESCRIPTION = 'Returns the newest Tébéo videos by category';
const MAINTAINER = 'Mitsukarenai';
const PARAMETERS = array( array(
'cat' => array(
'name' => 'Catégorie',
'type' => 'list',
'values' => array(
'Toutes les vidéos' => '/',
'Actualité' => '/14-actualite',
'Sport' => '/3-sport',
'Culture-Loisirs' => '/5-culture-loisirs',
'Société' => '/15-societe',
'Langue Bretonne' => '/9-langue-bretonne'
)
)
));
public function collectData(){
$url = self::URI . '/le-replay/' . $this->getInput('cat');
$html = getSimpleHTMLDOM($url)
or returnServerError('Could not request Tébéo.');
foreach($html->find('div[id=items_replay] div.replay') as $element) {
$item = array();
$item['uri'] = $element->find('a', 0)->href;
$item['title'] = $element->find('h3', 0)->plaintext;
$item['timestamp'] = strtotime($element->find('p.moment-format-day', 0)->plaintext);
$item['content'] = '<a href="'.$item['uri'].'"><img alt="" src="'.$element->find('img', 0)->src.'"></a>';
$this->items[] = $item;
}
}
}

View File

@ -77,9 +77,7 @@ class TwitterBridge extends BridgeAbstract {
$param = 'u';
break;
case 'By list':
$specific = $this->getInput('user');
$param = 'list';
break;
return $this->getInput('list') . ' - Twitter list by ' . $this->getInput('user');
default: return parent::getName();
}
return 'Twitter ' . $specific . $this->getInput($param);
@ -165,7 +163,7 @@ class TwitterBridge extends BridgeAbstract {
switch($this->queriedContext) {
case 'By list':
// Check if filter applies to list (using raw content)
if(!is_null($this->getInput('filter'))) {
if($this->getInput('filter')) {
if(stripos($tweet->find('p.js-tweet-text', 0)->plaintext, $this->getInput('filter')) === false) {
continue 2; // switch + for-loop!
}

View File

@ -1,9 +1,11 @@
<?php
class VkBridge extends BridgeAbstract {
class VkBridge extends BridgeAbstract
{
const MAINTAINER = 'ahiles3005';
const NAME = 'VK.com';
const URI = 'http://vk.com/';
const URI = 'https://vk.com/';
const CACHE_TIMEOUT = 300; // 5min
const DESCRIPTION = 'Working with open pages';
const PARAMETERS = array(
@ -15,42 +17,43 @@ class VkBridge extends BridgeAbstract {
)
);
public function getURI(){
if(!is_null($this->getInput('u'))) {
public function getURI()
{
if (!is_null($this->getInput('u'))) {
return static::URI . urlencode($this->getInput('u'));
}
return parent::getURI();
}
public function collectData(){
public function collectData()
{
ini_set('user-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0');
$text_html = getContents($this->getURI())
or returnServerError('No results for group or user name "' . $this->getInput('u') . '".');
$text_html = $this->getContents()
or returnServerError('No results for group or user name "' . $this->getInput('u') . '".');
$text_html = iconv('windows-1251', 'utf-8', $text_html);
$html = str_get_html($text_html);
$pageName = $html->find('.page_name', 0)->plaintext;
foreach($html->find('.post') as $post) {
foreach ($html->find('.post') as $post) {
if(is_object($post->find('a.wall_post_more', 0))) {
if (is_object($post->find('a.wall_post_more', 0))) {
//delete link "show full" in content
$post->find('a.wall_post_more', 0)->outertext = '';
}
$item = array();
$item['content'] = strip_tags(backgroundToImg($post->find('div.wall_text', 0)->innertext), '<br><img>');
if(is_object($post->find('a.page_media_link_title', 0))) {
$link = $post->find('a.page_media_link_title', 0)->getAttribute('href');
if (is_object($post->find('a.page_media_link_title', 0))) {
$link = $post->find('a.page_media_link_title', 0)->getAttribute('href');
//external link in the post
$item['content'] .= "\n\rExternal link: "
. str_replace('/away.php?to=', '', urldecode($link));
. str_replace('/away.php?to=', '', urldecode($link));
}
//get video on post
if(is_object($post->find('span.post_video_title_content', 0))) {
if (is_object($post->find('span.post_video_title_content', 0))) {
$titleVideo = $post->find('span.post_video_title_content', 0)->plaintext;
$linkToVideo = self::URI . $post->find('a.page_post_thumb_video', 0)->getAttribute('href');
$item['content'] .= "\n\r {$titleVideo}: {$linkToVideo}";
@ -58,9 +61,57 @@ class VkBridge extends BridgeAbstract {
// get post link
$item['uri'] = self::URI . $post->find('a.post_link', 0)->getAttribute('href');
$item['date'] = $post->find('span.rel_date', 0)->plaintext;
$item['timestamp'] = $this->getTime($post);
$item['author'] = $pageName;
$this->items[] = $item;
// var_dump($item['date']);
}
}
private function getTime($post)
{
if ($time = $post->find('span.rel_date', 0)->getAttribute('time')) {
return $time;
} else {
$strdate = $post->find('span.rel_date', 0)->plaintext;
$date = date_parse($strdate);
if (!$date['year']) {
if (strstr($strdate, 'today') !== false) {
$strdate = date('d-m-Y') . ' ' . $strdate;
} elseif (strstr($strdate, 'yesterday ') !== false) {
$time = time() - 60 * 60 * 24;
$strdate = date('d-m-Y', $time) . ' ' . $strdate;
} else {
$strdate = $strdate . ' ' . date('Y');
}
$date = date_parse($strdate);
}
return strtotime($date['day'] . '-' . $date['month'] . '-' . $date['year'] . ' ' .
$date['hour'] . ':' . $date['minute']);
}
}
public function getContents()
{
ini_set('user-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0');
$opts = array(
'http' => array(
'method' => "GET",
'user_agent' => ini_get('user_agent'),
'accept_encoding' => 'gzip',
'header' => "Accept-language: en\r\n
Cookie: remixlang=3\r\n"
)
);
$context = stream_context_create($opts);
return getContents($this->getURI(), false, $context);
}
}

View File

@ -91,7 +91,7 @@ class YoutubeBridge extends BridgeAbstract {
if(strpos($vid, 'googleads') === false)
$this->ytBridgeAddItem($vid, $title, $author, $desc, $time);
}
$this->request = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext);
$this->feedName = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext); // feedName will be used by getName()
}
private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector){
@ -164,7 +164,7 @@ class YoutubeBridge extends BridgeAbstract {
$html = $this->ytGetSimpleHTMLDOM($url_listing)
or returnServerError("Could not request YouTube. Tried:\n - $url_listing");
$this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a');
$this->request = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
$this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName()
} elseif($this->getInput('s')) { /* search mode */
$this->request = $this->getInput('s');
$page = 1;
@ -182,7 +182,7 @@ class YoutubeBridge extends BridgeAbstract {
or returnServerError("Could not request YouTube. Tried:\n - $url_listing");
$this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3');
$this->request = 'Search: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
$this->feedName = 'Search: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName()
} else { /* no valid mode */
returnClientError("You must either specify either:\n - YouTube
username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
@ -190,6 +190,15 @@ class YoutubeBridge extends BridgeAbstract {
}
public function getName(){
return (!empty($this->request) ? $this->request . ' - ' : '') . 'YouTube Bridge';
}
// Name depends on queriedContext:
switch($this->queriedContext) {
case 'By username':
case 'By channel id':
case 'By playlist Id':
case 'Search result':
return $this->feedName . ' - YouTube'; // We already know it's a bridge, right?
default:
return parent::getName();
}
}
}