diff --git a/bridges/ArstechnicaBridge.php b/bridges/ArstechnicaBridge.php index 0146a42b..0c90fc50 100644 --- a/bridges/ArstechnicaBridge.php +++ b/bridges/ArstechnicaBridge.php @@ -1,30 +1,11 @@ <?php -class ArstechnicaBridge extends FeedExpander { +require_once('WordPressBridge.php'); + +class ArstechnicaBridge extends WordPressBridge { const MAINTAINER = "prysme"; const NAME = "ArstechnicaBridge"; const URI = "http://arstechnica.com"; const DESCRIPTION = "The PC enthusiast's resource. Power users and the tools they love, without computing religion"; - - protected function parseItem($item){ - $item = parent::parseItem($item); - - $html = $this->getSimpleHTMLDOMCached($item['uri']); - if(!$html){ - $item['content'] .= '<p>Requesting full article failed.</p>'; - }else{ - $item['content'] = $html->find('.article-guts', 0); - } - - return $item; - } - - public function collectData(){ - $this->collectExpandableDatas('http://feeds.arstechnica.com/arstechnica/index/'); - } - - public function getCacheDuration() { - return 7200; // 2h - } - + const PARAMETERS = array(); } diff --git a/bridges/FreenewsBridge.php b/bridges/FreenewsBridge.php index ab4d3d84..f56a0834 100644 --- a/bridges/FreenewsBridge.php +++ b/bridges/FreenewsBridge.php @@ -1,22 +1,11 @@ <?php -class FreenewsBridge extends FeedExpander { +require_once('WordPressBridge.php'); + +class FreenewsBridge extends WordPressBridge { const MAINTAINER = "mitsukarenai"; const NAME = "Freenews"; const URI = "http://freenews.fr"; - const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox). Ne rentrez pas d'id si vous voulez accéder aux actualités générales."; - - public function collectData(){ - parent::collectExpandableDatas('http://feeds.feedburner.com/Freenews-Freebox?format=xml'); - } - - protected function parseItem($newsItem) { - $item = parent::parseItem($newsItem); - - $articlePage = $this->getSimpleHTMLDOMCached($item['uri']); - $content = $articlePage->find('.post-container', 0); - $item['content'] = $content->innertext; - - return $item; - } + const DESCRIPTION = "Un site d'actualité pour les freenautes (mais ne parlant pas que de la freebox)"; + const PARAMETERS = array(); } diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php index a82d815a..0d6a2d11 100644 --- a/bridges/LeJournalDuGeekBridge.php +++ b/bridges/LeJournalDuGeekBridge.php @@ -1,42 +1,13 @@ <?php -class LeJournalDuGeekBridge extends FeedExpander { +require_once('WordPressBridge.php'); + +class LeJournalDuGeekBridge extends WordPressBridge{ const MAINTAINER = "polopollo"; const NAME = "journaldugeek.com (FR)"; const URI = "http://www.journaldugeek.com/"; - const DESCRIPTION = "Returns the 5 newest posts from LeJournalDuGeek (full text)."; - - public function collectData(){ - $this->collectExpandableDatas(self::URI . 'rss', 5); - } - - protected function parseItem($newsItem){ - $item = parent::parseItem($newsItem); - $item['content'] = $this->LeJournalDuGeekExtractContent($item['uri']); - return $item; - } - - private function LeJournalDuGeekExtractContent($url) { - $articleHTMLContent = $this->getSimpleHTMLDOMCached($url); - $text = $articleHTMLContent->find('div.post-content', 0)->innertext; - - foreach($articleHTMLContent->find('a.more') as $element) { - if ($element->innertext == "Source") { - $text = $text . '<p><a href="' . $element->href . '">Source : ' . $element->href . '</a></p>'; - break; - } - } - - foreach($articleHTMLContent->find('iframe') as $element) { - if (preg_match("/youtube/i", $element->src)) { - $text = $text . '// An IFRAME to Youtube was included in the article: <a href="' . $element->src . '">' . $element->src . '</a><br>'; - } - } - - $text = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $text); - $text = strip_tags($text, '<p><b><a><blockquote><img><em><br/><br><ul><li>'); - return $text; - } + const DESCRIPTION = "Returns the newest posts from LeJournalDuGeek (full text)."; + const PARAMETERS = array(); public function getCacheDuration(){ return 1800; // 30min diff --git a/bridges/NakedSecurityBridge.php b/bridges/NakedSecurityBridge.php index 43607725..ed2fb3d0 100644 --- a/bridges/NakedSecurityBridge.php +++ b/bridges/NakedSecurityBridge.php @@ -1,61 +1,11 @@ <?php -class NakedSecurityBridge extends FeedExpander { +require_once('WordPressBridge.php'); + +class NakedSecurityBridge extends WordPressBridge { const MAINTAINER = 'ORelio'; const NAME = 'Naked Security'; const URI = 'https://nakedsecurity.sophos.com/'; const DESCRIPTION = 'Returns the newest articles.'; - - private function StripRecursiveHTMLSection($string, $tag_name, $tag_start) { - $open_tag = '<'.$tag_name; - $close_tag = '</'.$tag_name.'>'; - $close_tag_length = strlen($close_tag); - if (strpos($tag_start, $open_tag) === 0) { - while (strpos($string, $tag_start) !== false) { - $max_recursion = 100; - $section_to_remove = null; - $section_start = strpos($string, $tag_start); - $search_offset = $section_start; - do { - $max_recursion--; - $section_end = strpos($string, $close_tag, $search_offset); - $search_offset = $section_end + $close_tag_length; - $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); - $open_tag_count = substr_count($section_to_remove, $open_tag); - $close_tag_count = substr_count($section_to_remove, $close_tag); - } while ($open_tag_count > $close_tag_count && $max_recursion > 0); - $string = str_replace($section_to_remove, '', $string); - } - } - return $string; - } - - - protected function parseItem($item){ - $item = parent::parseItem($item); - - $article_html = $this->getSimpleHTMLDOMCached($item['uri']); - if(!$article_html){ - $item['content'] = 'Could not request '.$this->getName().': '.$item['uri']; - return $item; - } - - $article_image = $article_html->find('img.wp-post-image', 0)->src; - $article_content = $article_html->find('div.entry-content', 0)->innertext; - $article_content = $this->StripRecursiveHTMLSection($article_content , 'div', '<div class="entry-prefix"'); - $article_content = $this->StripRecursiveHTMLSection($article_content , 'script', '<script'); - $article_content = $this->StripRecursiveHTMLSection($article_content , 'aside', '<aside'); - $article_content = '<p><img src="'.$article_image.'" /></p><p><b>'.$item['content'].'</b></p>'.$article_content; - - $item['content'] = $article_content; - - return $item; - - } - - public function collectData(){ - - $feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml'; - $this->collectExpandableDatas($feedUrl); - } + const PARAMETERS = array(); } diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index 34296b5c..39d73409 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -1,29 +1,15 @@ <?php -class NumeramaBridge extends FeedExpander { +require_once('WordPressBridge.php'); + +class NumeramaBridge extends WordPressBridge { const MAINTAINER = 'mitsukarenai'; const NAME = 'Numerama'; const URI = 'http://www.numerama.com/'; - const DESCRIPTION = 'Returns the 5 newest posts from Numerama (full text)'; - - public function collectData(){ - $this->collectExpandableDatas(self::URI . 'feed/', 5); - } - - protected function parseItem($newsItem){ - $item = parent::parseItem($newsItem); - $item['content'] = $this->ExtractContent($item['uri']); - return $item; - } - - private function ExtractContent($url){ - $article_html = $this->getSimpleHTMLDOMCached('Could not request Numerama: '.$url); - $contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block - $contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture - return $contents . $article_html->find('article[class=post-content]', 0)->innertext; // extract the post - } - + const DESCRIPTION = 'Returns the newest posts from Numerama (full text)'; + const PARAMETERS = array(); public function getCacheDuration() { + return 1800; // 30min } } diff --git a/bridges/SiliconBridge.php b/bridges/SiliconBridge.php index f48bffa8..90b96417 100644 --- a/bridges/SiliconBridge.php +++ b/bridges/SiliconBridge.php @@ -1,39 +1,13 @@ <?php -class SiliconBridge extends FeedExpander { +require_once('WordPressBridge.php'); + +class SiliconBridge extends WordPressBridge { const MAINTAINER = "ORelio"; const NAME = 'Silicon Bridge'; const URI = 'http://www.silicon.fr/'; const DESCRIPTION = "Returns the newest articles."; - - protected function parseItem($item){ - $item = parent::parseItem($item); - - $article_html = $this->getSimpleHTMLDOMCached($item['uri']); - if(!$article_html){ - $item['content'] .= '<p>Could not request Silicon: '.$item['uri'].'</p>'; - return $item; - } - - $article_content = '<p><b>'.$article_html->find('div.entry-excerpt', 0)->plaintext.'</b></p>' - .$article_html->find('div.entry-content', 0)->innertext; - - //Remove useless scripts left in the page - while (strpos($article_content, '<script') !== false) { - $script_section = substr($article_content, strpos($article_content, '<script')); - $script_section = substr($script_section, 0, strpos($script_section, '</script>') + 9); - $article_content = str_replace($script_section, '', $article_content); - } - - $item['content'] = $article_content; - - return $item; - } - - public function collectData(){ - $feedUrl = self::URI.'feed'; - $this->collectExpandableDatas($feedUrl); - } + const PARAMETERS = array(); public function getCacheDuration() { return 1800; // 30 minutes diff --git a/bridges/ZatazBridge.php b/bridges/ZatazBridge.php index b805d604..d1e08197 100644 --- a/bridges/ZatazBridge.php +++ b/bridges/ZatazBridge.php @@ -1,41 +1,13 @@ <?php -class ZatazBridge extends BridgeAbstract { +require_once('WordPressBridge.php'); + +class ZatazBridge extends WordPressBridge{ const MAINTAINER = "aledeg"; const NAME = 'Zataz Magazine'; const URI = 'http://www.zataz.com'; const DESCRIPTION = "ZATAZ Magazine - S'informer, c'est déjà se sécuriser"; - - public function collectData(){ - $html = $this->getSimpleHTMLDOM(self::URI) or $this->returnServerError('Could not request ' . self::URI); - - $recent_posts = $html->find('#recent-posts-3', 0)->find('ul', 0)->find('li'); - foreach ($recent_posts as $article) { - if (count($this->items) < 5) { - $uri = $article->find('a', 0)->href; - $this->items[] = $this->getDetails($uri); - } - } - } - - private function getDetails($uri) { - $html = $this->getSimpleHTMLDOM($uri) or exit; - - $item = array(); - - $article = $html->find('.gdl-blog-full', 0); - $item['uri'] = $uri; - $item['title'] = $article->find('.blog-title', 0)->find('a', 0)->innertext; - $item['content'] = $article->find('.blog-content', 0)->innertext; - $item['timestamp'] = $this->getTimestampFromDate($article->find('.blog-date', 0)->find('a', 0)->href); - return $item; - } - - private function getTimestampFromDate($uri) { - preg_match('/\d{4}\/\d{2}\/\d{2}/', $uri, $matches); - $date = new \DateTime($matches[0]); - return $date->format('U'); - } + const PARAMETERS = array(); public function getCacheDuration() { return 7200; // 2h