From 641d5880c7c1af09701d4b1950047a994f75c8f7 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 27 Feb 2016 12:56:36 +0100 Subject: [PATCH 01/14] [T411] Domain name change T411 changeid their domain name to t411.ch --- bridges/T411Bridge.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bridges/T411Bridge.php b/bridges/T411Bridge.php index 54904cce..6d794cb5 100644 --- a/bridges/T411Bridge.php +++ b/bridges/T411Bridge.php @@ -5,7 +5,7 @@ class T411Bridge extends BridgeAbstract { $this->maintainer = "ORelio"; $this->name = "T411"; - $this->uri = "https://t411.in/"; + $this->uri = $this->getURI(); $this->description = "Returns the 5 newest torrents with specified search terms
Use url part after '?' mark when using their search engine"; $this->update = "2016-02-06"; @@ -34,8 +34,8 @@ class T411Bridge extends BridgeAbstract { $this->returnError('You must specify a search criteria', 400); } - //Retrieve torrent listing as truncated rss, which does not contain torrent description - $url = 'http://www.t411.in/torrents/search/?'.$param['search'].'&order=added&type=desc'; + //Retrieve torrent listing from search results, which does not contain torrent description + $url = $this->getURI().'torrents/search/?'.$param['search'].'&order=added&type=desc'; $html = file_get_html($url) or $this->returnError('Could not request t411: '.$url, 500); $results = $html->find('table.results', 0); if (is_null($results)) @@ -64,7 +64,7 @@ class T411Bridge extends BridgeAbstract { $item_author = $item_html->find('a.profile', 0)->innertext; //Retrieve image for thumbnail or generic logo fallback - $item_image = 'http://www.t411.in/themes/blue/images/logo.png'; + $item_image = $this->getURI().'themes/blue/images/logo.png'; foreach ($item_desc->find('img') as $img) { if (strpos($img->src, 'prez') === false) { $item_image = $img->src; @@ -92,7 +92,7 @@ class T411Bridge extends BridgeAbstract { } public function getURI() { - return 'https://t411.in'; + return 'https://t411.ch/'; } public function getCacheDuration() { From 05c2e2aa20c9ad8613d3f0fc215d08255451b54f Mon Sep 17 00:00:00 2001 From: "teromene@teromene.fr" Date: Wed, 2 Mar 2016 11:49:27 +0000 Subject: [PATCH 02/14] Corrected CourrierInternationalBridge --- bridges/CourrierInternationalBridge.php | 53 ++++++++++++------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php index a7ff8b03..cf7126c9 100644 --- a/bridges/CourrierInternationalBridge.php +++ b/bridges/CourrierInternationalBridge.php @@ -13,52 +13,49 @@ class CourrierInternationalBridge extends BridgeAbstract{ public function collectData(array $param){ - function fetchArticle($link) { - - $page = file_get_html($link); + $html = ''; - $contenu = $page->find(".article-text")[0]; - - return strip_tags($contenu); - - - - } - - $html = ''; - - $html = file_get_html('http://www.courrierinternational.com/article') or $this->returnError('Error.', 500); + $html = file_get_html('http://www.courrierinternational.com/') or $this->returnError('Error.', 500); - $element = $html->find(".type-normal"); + $element = $html->find("article"); - $article_count = 1; + $article_count = 1; - foreach($element as $article) { + foreach($element as $article) { - $item = new \Item(); + $item = new \Item(); - $item->uri = "http://www.courrierinternational.com".$article->find("a")[0]->getAttribute("href"); - $item->content = fetchArticle("http://www.courrierinternational.com".$article->find("a")[0]->getAttribute("href")); - $item->title = strip_tags($article->find("h2")[0]); + $item->uri = $article->parent->getAttribute("href"); - $dateTime = date_parse($article->find("time")[0]); + if(strpos($item->uri, "http") === FALSE) { + $item->uri = "http://courrierinternational.fr/".$item->uri; + } + + $page = file_get_html($item->uri); - $item->timestamp = mktime( + $cleaner = new HTMLSanitizer(); + + $item->content = $cleaner->sanitize($page->find("div.article-text")[0]); + $item->title = strip_tags($article->find(".title")[0]); + + $dateTime = date_parse($page->find("time")[0]); + + $item->timestamp = mktime( $dateTime['hour'], $dateTime['minute'], $dateTime['second'], $dateTime['month'], $dateTime['day'], $dateTime['year'] - ); + ); - $this->items[] = $item; - $article_count ++; - if($article_count > 5) break; + $this->items[] = $item; + $article_count ++; + if($article_count > 5) break; - } + } From 386f0ca379eb5bb43ae33b0d37f2baf4c81c1518 Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Wed, 2 Mar 2016 13:14:22 +0100 Subject: [PATCH 03/14] Fix BandCamp image display --- bridges/BandcampBridge.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php index 8d1d2aaf..ce002e5c 100644 --- a/bridges/BandcampBridge.php +++ b/bridges/BandcampBridge.php @@ -33,10 +33,14 @@ class BandcampBridge extends BridgeAbstract{ } foreach($html->find('li.item') as $release) { + $script = $release->find('div.art', 0)->getAttribute('onclick'); + $uri = ltrim($script, "return 'url("); + $uri = rtrim($uri, "')"); + $item = new \Item(); $item->name = $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; $item->title = $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; - $item->content = '
' . $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; + $item->content = '
' . $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; $item->id = $release->find('a',0)->getAttribute('href'); $item->uri = $release->find('a',0)->getAttribute('href'); $this->items[] = $item; From 3e8a7624e151eedda0007d8336b9da2177544c4f Mon Sep 17 00:00:00 2001 From: "teromene@teromene.fr" Date: Thu, 3 Mar 2016 15:26:07 +0000 Subject: [PATCH 04/14] Added an URL option to activate debug : debug activates all messages, and disables the cache. --- index.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/index.php b/index.php index 783e91a2..5e9dfbb3 100644 --- a/index.php +++ b/index.php @@ -14,7 +14,14 @@ TODO : date_default_timezone_set('UTC'); error_reporting(0); -//ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only. + +if(isset($_REQUEST['debug'])) { + + ini_set('display_errors','1'); error_reporting(E_ALL); //Report all errors + + $_REQUEST["disable_cache"] = true; //Disable the cache. + +} require_once __DIR__ . '/lib/RssBridge.php'; From 9df5913d6ba1e7efa817683a1f3771c1637dfa55 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sun, 6 Mar 2016 19:03:24 +0100 Subject: [PATCH 05/14] [T411] Use torrent id as permalink This way links are not broken if the torrent is renamed. --- bridges/T411Bridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/T411Bridge.php b/bridges/T411Bridge.php index 6d794cb5..92f3c764 100644 --- a/bridges/T411Bridge.php +++ b/bridges/T411Bridge.php @@ -52,7 +52,7 @@ class T411Bridge extends BridgeAbstract { usleep(500000); //So we need to wait (500ms) //Retrieve data from RSS entry - $item_uri = 'http://'.ExtractFromDelimiters($element->outertext, 'getURI().'torrents/details/?id='.ExtractFromDelimiters($element->find('a.nfo', 0)->outertext, '?id=', '"'); $item_title = ExtractFromDelimiters($element->outertext, '" title="', '"'); $item_date = strtotime($element->find('dd', 0)->plaintext); From c13dd8c18a97d6e174f093a3217df333742a7112 Mon Sep 17 00:00:00 2001 From: alexis Date: Sat, 12 Mar 2016 16:50:45 +0100 Subject: [PATCH 06/14] Add Vine.co bridge :) --- bridges/VineBridge.php | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 bridges/VineBridge.php diff --git a/bridges/VineBridge.php b/bridges/VineBridge.php new file mode 100644 index 00000000..d9482e34 --- /dev/null +++ b/bridges/VineBridge.php @@ -0,0 +1,59 @@ +maintainer = "ckiw"; + $this->name = "Vine bridge"; + $this->uri = "http://vine.co/"; + $this->description = "Returns the latests vines from vine user page"; + $this->update = "2016-03-12"; + + $this->parameters[] = + '[ + { + "name" : "User id", + "identifier" : "u", + "type" : "text", + "required" : "true" + } + ]'; + } + + public function collectData(array $param){ + $html = ''; + $uri = 'http://vine.co/u/'.$param['u'].'?mode=list'; + + $html = file_get_html($uri) or $this->returnError('No results for this query.', 404); + + foreach($html->find('.post') as $element) { + $a = $element->find('a', 0); + $a->href = str_replace('https://', 'http://', $a->href); + $time = strtotime(ltrim($element->find('p', 0)->plaintext, " Uploaded at ")); + $video = $element->find('video', 0); + $video->controls = "true"; + $element->find('h2', 0)->outertext = ''; + + $item = new \Item(); + $item->uri = $a->href; + $item->timestamp = $time; + $item->title = $a->plaintext; + $item->content = $element; + + $this->items[] = $item; + } + + } + + public function getName(){ + return 'Vine'; + } + + public function getURI(){ + return 'http://vine.co'; + } + + public function getCacheDuration(){ + return 10; //seconds + } +} From c2769c89992b2c77810a2066b2c03c900ff2261d Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 16 Mar 2016 19:51:54 +0100 Subject: [PATCH 07/14] [CNET] Fix content extraction --- bridges/CNETBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index ec7decf0..42c78a94 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -9,7 +9,7 @@ class CNETBridge extends BridgeAbstract { $this->name = 'CNET News'; $this->uri = 'http://www.cnet.com/'; $this->description = 'Returns the newest articles.
You may specify a topic found in some section URLs, else all topics are selected.'; - $this->update = '2016-02-06'; + $this->update = '2016-03-16'; $this->parameters[] = '[ @@ -76,7 +76,7 @@ class CNETBridge extends BridgeAbstract { if (is_object($article_thumbnail)) $article_thumbnail = $article_thumbnail->find('img', 0)->src; - $article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '
', '