From 641d5880c7c1af09701d4b1950047a994f75c8f7 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 27 Feb 2016 12:56:36 +0100 Subject: [PATCH 001/584] [T411] Domain name change T411 changeid their domain name to t411.ch --- bridges/T411Bridge.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bridges/T411Bridge.php b/bridges/T411Bridge.php index 54904cce..6d794cb5 100644 --- a/bridges/T411Bridge.php +++ b/bridges/T411Bridge.php @@ -5,7 +5,7 @@ class T411Bridge extends BridgeAbstract { $this->maintainer = "ORelio"; $this->name = "T411"; - $this->uri = "https://t411.in/"; + $this->uri = $this->getURI(); $this->description = "Returns the 5 newest torrents with specified search terms
Use url part after '?' mark when using their search engine"; $this->update = "2016-02-06"; @@ -34,8 +34,8 @@ class T411Bridge extends BridgeAbstract { $this->returnError('You must specify a search criteria', 400); } - //Retrieve torrent listing as truncated rss, which does not contain torrent description - $url = 'http://www.t411.in/torrents/search/?'.$param['search'].'&order=added&type=desc'; + //Retrieve torrent listing from search results, which does not contain torrent description + $url = $this->getURI().'torrents/search/?'.$param['search'].'&order=added&type=desc'; $html = file_get_html($url) or $this->returnError('Could not request t411: '.$url, 500); $results = $html->find('table.results', 0); if (is_null($results)) @@ -64,7 +64,7 @@ class T411Bridge extends BridgeAbstract { $item_author = $item_html->find('a.profile', 0)->innertext; //Retrieve image for thumbnail or generic logo fallback - $item_image = 'http://www.t411.in/themes/blue/images/logo.png'; + $item_image = $this->getURI().'themes/blue/images/logo.png'; foreach ($item_desc->find('img') as $img) { if (strpos($img->src, 'prez') === false) { $item_image = $img->src; @@ -92,7 +92,7 @@ class T411Bridge extends BridgeAbstract { } public function getURI() { - return 'https://t411.in'; + return 'https://t411.ch/'; } public function getCacheDuration() { From 4fac0e382c8c4c0cadb5c1c1364faf120d141a52 Mon Sep 17 00:00:00 2001 From: ahiles3005 Date: Tue, 1 Mar 2016 20:39:09 +0400 Subject: [PATCH 002/584] bug fix: After adding a new bridge, it is not loaded because it was removed from the list of action --- index.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/index.php b/index.php index 5578b2de..783e91a2 100644 --- a/index.php +++ b/index.php @@ -54,8 +54,6 @@ if (!file_exists($whitelist_file)) { } else { $whitelist_selection = explode("\n", file_get_contents($whitelist_file)); - //Remove the last empty line. - array_pop($whitelist_selection); } Cache::purge(); From ffa1ea8f0f3cd6231f51cfc4edd7865c93b51cca Mon Sep 17 00:00:00 2001 From: ahiles3005 Date: Tue, 1 Mar 2016 21:06:08 +0400 Subject: [PATCH 003/584] add: Bridge for Russian popular social network.Vkontakte or simply VK --- bridges/VkBridge.php | 80 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 bridges/VkBridge.php diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php new file mode 100644 index 00000000..7126027c --- /dev/null +++ b/bridges/VkBridge.php @@ -0,0 +1,80 @@ +maintainer = "ahiles3005"; + $this->name = "VK.com"; + $this->uri = "http://www.vk.com/"; + $this->description = "Working with open pages"; + $this->update = "21/02/2016"; + + $this->parameters["Url on page group or user"] = '[ + { + "name" : "Url", + "identifier" : "u" + } + ]'; + } + + public function collectData(array $param) { + $html = ''; + if (isset($param['u'])) { + $this->request = $param['u']; + + $opts = array(//option for get EN ver. site + 'https' => array( + 'method' => "GET", + 'header' => "Set-Cookie: remixlang=3\r\n" . + "Referer: http://www.vk.com/\r\n" + ) + ); + $context = stream_context_create($opts); + $text_html = file_get_contents(urldecode($this->request), false, $context) or $this->returnError('No results for this query.', 404); + $text_html = iconv('windows-1251', 'utf-8', $text_html); + $html = str_get_html($text_html); + } + + + foreach ($html->find('div.post_table') as $post) { + if (is_object($post->find('a.wall_post_more', 0))) { + $post->find('a.wall_post_more', 0)->outertext = ''; //delete link "show full" in content + } + + + $item = new \Item(); + $item->content = strip_tags($post->find('div.wall_post_text', 0)->innertext); + + if (is_object($post->find('a.page_media_link_title', 0))) { + $link = $post->find('a.page_media_link_title', 0)->getAttribute('href'); + $item->content .= "\n\rExternal link: " . str_replace('/away.php?to=', '', urldecode($link)); //external link in the post + } + //get video on post + if (is_object($post->find('span.post_video_title_content', 0))) { + $titleVideo = $post->find('span.post_video_title_content', 0)->plaintext; + $linkToVideo = 'https://vk.com' . $post->find('a.page_post_thumb_video', 0)->getAttribute('href'); + $item->content .= "\n\r {$titleVideo}: {$linkToVideo}"; + } + $item->uri = 'https://vk.com' . $post->find('.reply_link_wrap', 0)->find('a', 0)->getAttribute('href'); // get post link + $item->date = $post->find('span.rel_date', 0)->plaintext; + $this->items[] = $item; + // var_dump($item->date); + } + } + + public function getName() { + return(isset($this->name) ? $this->name . ' - ' : '') . 'VK Bridge'; + } + + public function getURI() { + return 'http://vk.com'; + } + + public function getCacheDuration() { + return 300; // 5 minutes + } + +} From 5f35988a184416c54738a8690937e82c94e493ad Mon Sep 17 00:00:00 2001 From: ahiles3005 Date: Tue, 1 Mar 2016 21:08:33 +0400 Subject: [PATCH 004/584] Revert "add: Bridge for Russian popular social network.Vkontakte or simply VK" This reverts commit ffa1ea8f0f3cd6231f51cfc4edd7865c93b51cca. --- bridges/VkBridge.php | 80 -------------------------------------------- 1 file changed, 80 deletions(-) delete mode 100644 bridges/VkBridge.php diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php deleted file mode 100644 index 7126027c..00000000 --- a/bridges/VkBridge.php +++ /dev/null @@ -1,80 +0,0 @@ -maintainer = "ahiles3005"; - $this->name = "VK.com"; - $this->uri = "http://www.vk.com/"; - $this->description = "Working with open pages"; - $this->update = "21/02/2016"; - - $this->parameters["Url on page group or user"] = '[ - { - "name" : "Url", - "identifier" : "u" - } - ]'; - } - - public function collectData(array $param) { - $html = ''; - if (isset($param['u'])) { - $this->request = $param['u']; - - $opts = array(//option for get EN ver. site - 'https' => array( - 'method' => "GET", - 'header' => "Set-Cookie: remixlang=3\r\n" . - "Referer: http://www.vk.com/\r\n" - ) - ); - $context = stream_context_create($opts); - $text_html = file_get_contents(urldecode($this->request), false, $context) or $this->returnError('No results for this query.', 404); - $text_html = iconv('windows-1251', 'utf-8', $text_html); - $html = str_get_html($text_html); - } - - - foreach ($html->find('div.post_table') as $post) { - if (is_object($post->find('a.wall_post_more', 0))) { - $post->find('a.wall_post_more', 0)->outertext = ''; //delete link "show full" in content - } - - - $item = new \Item(); - $item->content = strip_tags($post->find('div.wall_post_text', 0)->innertext); - - if (is_object($post->find('a.page_media_link_title', 0))) { - $link = $post->find('a.page_media_link_title', 0)->getAttribute('href'); - $item->content .= "\n\rExternal link: " . str_replace('/away.php?to=', '', urldecode($link)); //external link in the post - } - //get video on post - if (is_object($post->find('span.post_video_title_content', 0))) { - $titleVideo = $post->find('span.post_video_title_content', 0)->plaintext; - $linkToVideo = 'https://vk.com' . $post->find('a.page_post_thumb_video', 0)->getAttribute('href'); - $item->content .= "\n\r {$titleVideo}: {$linkToVideo}"; - } - $item->uri = 'https://vk.com' . $post->find('.reply_link_wrap', 0)->find('a', 0)->getAttribute('href'); // get post link - $item->date = $post->find('span.rel_date', 0)->plaintext; - $this->items[] = $item; - // var_dump($item->date); - } - } - - public function getName() { - return(isset($this->name) ? $this->name . ' - ' : '') . 'VK Bridge'; - } - - public function getURI() { - return 'http://vk.com'; - } - - public function getCacheDuration() { - return 300; // 5 minutes - } - -} From 54e912805e6ba4bf4d2210b6244aabe174537d28 Mon Sep 17 00:00:00 2001 From: ahiles3005 Date: Tue, 1 Mar 2016 21:10:46 +0400 Subject: [PATCH 005/584] add: Bridge for Russian popular social network.Vkontakte or simply VK --- bridges/VkBridge.php | 64 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 bridges/VkBridge.php diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php new file mode 100644 index 00000000..4aaf367e --- /dev/null +++ b/bridges/VkBridge.php @@ -0,0 +1,64 @@ +maintainer = "ahiles3005"; + $this->name = "VK.com"; + $this->uri = "http://www.vk.com/"; + $this->description = "Working with open pages"; + $this->update = "21/02/2016"; + $this->parameters["Url on page group or user"] = '[ + { + "name" : "Url", + "identifier" : "u" + } + ]'; + } + + public function collectData(array $param) { + $html = ''; + if (isset($param['u'])) { + $this->request = $param['u']; + $text_html = file_get_contents(urldecode($this->request)) or $this->returnError('No results for this query.', 404); + $text_html = iconv('windows-1251', 'utf-8', $text_html); + $html = str_get_html($text_html); + } + foreach ($html->find('div.post_table') as $post) { + if (is_object($post->find('a.wall_post_more', 0))) { + $post->find('a.wall_post_more', 0)->outertext = ''; //delete link "show full" in content + } + $item = new \Item(); + $item->content = strip_tags($post->find('div.wall_post_text', 0)->innertext); + if (is_object($post->find('a.page_media_link_title', 0))) { + $link = $post->find('a.page_media_link_title', 0)->getAttribute('href'); + $item->content .= "\n\rExternal link: " . str_replace('/away.php?to=', '', urldecode($link)); //external link in the post + } + //get video on post + if (is_object($post->find('span.post_video_title_content', 0))) { + $titleVideo = $post->find('span.post_video_title_content', 0)->plaintext; + $linkToVideo = 'https://vk.com' . $post->find('a.page_post_thumb_video', 0)->getAttribute('href'); + $item->content .= "\n\r {$titleVideo}: {$linkToVideo}"; + } + $item->uri = 'https://vk.com' . $post->find('.reply_link_wrap', 0)->find('a', 0)->getAttribute('href'); // get post link + $item->date = $post->find('span.rel_date', 0)->plaintext; + $this->items[] = $item; + // var_dump($item->date); + } + } + + public function getName() { + return(isset($this->name) ? $this->name . ' - ' : '') . 'VK Bridge'; + } + + public function getURI() { + return 'http://vk.com'; + } + + public function getCacheDuration() { + return 300; // 5 minutes + } + +} From 05c2e2aa20c9ad8613d3f0fc215d08255451b54f Mon Sep 17 00:00:00 2001 From: "teromene@teromene.fr" Date: Wed, 2 Mar 2016 11:49:27 +0000 Subject: [PATCH 006/584] Corrected CourrierInternationalBridge --- bridges/CourrierInternationalBridge.php | 53 ++++++++++++------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php index a7ff8b03..cf7126c9 100644 --- a/bridges/CourrierInternationalBridge.php +++ b/bridges/CourrierInternationalBridge.php @@ -13,52 +13,49 @@ class CourrierInternationalBridge extends BridgeAbstract{ public function collectData(array $param){ - function fetchArticle($link) { - - $page = file_get_html($link); + $html = ''; - $contenu = $page->find(".article-text")[0]; - - return strip_tags($contenu); - - - - } - - $html = ''; - - $html = file_get_html('http://www.courrierinternational.com/article') or $this->returnError('Error.', 500); + $html = file_get_html('http://www.courrierinternational.com/') or $this->returnError('Error.', 500); - $element = $html->find(".type-normal"); + $element = $html->find("article"); - $article_count = 1; + $article_count = 1; - foreach($element as $article) { + foreach($element as $article) { - $item = new \Item(); + $item = new \Item(); - $item->uri = "http://www.courrierinternational.com".$article->find("a")[0]->getAttribute("href"); - $item->content = fetchArticle("http://www.courrierinternational.com".$article->find("a")[0]->getAttribute("href")); - $item->title = strip_tags($article->find("h2")[0]); + $item->uri = $article->parent->getAttribute("href"); - $dateTime = date_parse($article->find("time")[0]); + if(strpos($item->uri, "http") === FALSE) { + $item->uri = "http://courrierinternational.fr/".$item->uri; + } + + $page = file_get_html($item->uri); - $item->timestamp = mktime( + $cleaner = new HTMLSanitizer(); + + $item->content = $cleaner->sanitize($page->find("div.article-text")[0]); + $item->title = strip_tags($article->find(".title")[0]); + + $dateTime = date_parse($page->find("time")[0]); + + $item->timestamp = mktime( $dateTime['hour'], $dateTime['minute'], $dateTime['second'], $dateTime['month'], $dateTime['day'], $dateTime['year'] - ); + ); - $this->items[] = $item; - $article_count ++; - if($article_count > 5) break; + $this->items[] = $item; + $article_count ++; + if($article_count > 5) break; - } + } From 386f0ca379eb5bb43ae33b0d37f2baf4c81c1518 Mon Sep 17 00:00:00 2001 From: Damien Calesse Date: Wed, 2 Mar 2016 13:14:22 +0100 Subject: [PATCH 007/584] Fix BandCamp image display --- bridges/BandcampBridge.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php index 8d1d2aaf..ce002e5c 100644 --- a/bridges/BandcampBridge.php +++ b/bridges/BandcampBridge.php @@ -33,10 +33,14 @@ class BandcampBridge extends BridgeAbstract{ } foreach($html->find('li.item') as $release) { + $script = $release->find('div.art', 0)->getAttribute('onclick'); + $uri = ltrim($script, "return 'url("); + $uri = rtrim($uri, "')"); + $item = new \Item(); $item->name = $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; $item->title = $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; - $item->content = '
' . $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; + $item->content = '
' . $release->find('div.itemsubtext',0)->plaintext . ' - ' . $release->find('div.itemtext',0)->plaintext; $item->id = $release->find('a',0)->getAttribute('href'); $item->uri = $release->find('a',0)->getAttribute('href'); $this->items[] = $item; From 3e8a7624e151eedda0007d8336b9da2177544c4f Mon Sep 17 00:00:00 2001 From: "teromene@teromene.fr" Date: Thu, 3 Mar 2016 15:26:07 +0000 Subject: [PATCH 008/584] Added an URL option to activate debug : debug activates all messages, and disables the cache. --- index.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/index.php b/index.php index 783e91a2..5e9dfbb3 100644 --- a/index.php +++ b/index.php @@ -14,7 +14,14 @@ TODO : date_default_timezone_set('UTC'); error_reporting(0); -//ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only. + +if(isset($_REQUEST['debug'])) { + + ini_set('display_errors','1'); error_reporting(E_ALL); //Report all errors + + $_REQUEST["disable_cache"] = true; //Disable the cache. + +} require_once __DIR__ . '/lib/RssBridge.php'; From 9df5913d6ba1e7efa817683a1f3771c1637dfa55 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sun, 6 Mar 2016 19:03:24 +0100 Subject: [PATCH 009/584] [T411] Use torrent id as permalink This way links are not broken if the torrent is renamed. --- bridges/T411Bridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/T411Bridge.php b/bridges/T411Bridge.php index 6d794cb5..92f3c764 100644 --- a/bridges/T411Bridge.php +++ b/bridges/T411Bridge.php @@ -52,7 +52,7 @@ class T411Bridge extends BridgeAbstract { usleep(500000); //So we need to wait (500ms) //Retrieve data from RSS entry - $item_uri = 'http://'.ExtractFromDelimiters($element->outertext, 'getURI().'torrents/details/?id='.ExtractFromDelimiters($element->find('a.nfo', 0)->outertext, '?id=', '"'); $item_title = ExtractFromDelimiters($element->outertext, '" title="', '"'); $item_date = strtotime($element->find('dd', 0)->plaintext); From c13dd8c18a97d6e174f093a3217df333742a7112 Mon Sep 17 00:00:00 2001 From: alexis Date: Sat, 12 Mar 2016 16:50:45 +0100 Subject: [PATCH 010/584] Add Vine.co bridge :) --- bridges/VineBridge.php | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 bridges/VineBridge.php diff --git a/bridges/VineBridge.php b/bridges/VineBridge.php new file mode 100644 index 00000000..d9482e34 --- /dev/null +++ b/bridges/VineBridge.php @@ -0,0 +1,59 @@ +maintainer = "ckiw"; + $this->name = "Vine bridge"; + $this->uri = "http://vine.co/"; + $this->description = "Returns the latests vines from vine user page"; + $this->update = "2016-03-12"; + + $this->parameters[] = + '[ + { + "name" : "User id", + "identifier" : "u", + "type" : "text", + "required" : "true" + } + ]'; + } + + public function collectData(array $param){ + $html = ''; + $uri = 'http://vine.co/u/'.$param['u'].'?mode=list'; + + $html = file_get_html($uri) or $this->returnError('No results for this query.', 404); + + foreach($html->find('.post') as $element) { + $a = $element->find('a', 0); + $a->href = str_replace('https://', 'http://', $a->href); + $time = strtotime(ltrim($element->find('p', 0)->plaintext, " Uploaded at ")); + $video = $element->find('video', 0); + $video->controls = "true"; + $element->find('h2', 0)->outertext = ''; + + $item = new \Item(); + $item->uri = $a->href; + $item->timestamp = $time; + $item->title = $a->plaintext; + $item->content = $element; + + $this->items[] = $item; + } + + } + + public function getName(){ + return 'Vine'; + } + + public function getURI(){ + return 'http://vine.co'; + } + + public function getCacheDuration(){ + return 10; //seconds + } +} From c2769c89992b2c77810a2066b2c03c900ff2261d Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 16 Mar 2016 19:51:54 +0100 Subject: [PATCH 011/584] [CNET] Fix content extraction --- bridges/CNETBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index ec7decf0..42c78a94 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -9,7 +9,7 @@ class CNETBridge extends BridgeAbstract { $this->name = 'CNET News'; $this->uri = 'http://www.cnet.com/'; $this->description = 'Returns the newest articles.
You may specify a topic found in some section URLs, else all topics are selected.'; - $this->update = '2016-02-06'; + $this->update = '2016-03-16'; $this->parameters[] = '[ @@ -76,7 +76,7 @@ class CNETBridge extends BridgeAbstract { if (is_object($article_thumbnail)) $article_thumbnail = $article_thumbnail->find('img', 0)->src; - $article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '
', '