From 3f64d2d65a747504992dda951aaa616827c0db04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Thu, 15 Sep 2016 12:40:26 +0200 Subject: [PATCH] [bridges] make them WordPressBridge derivatives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The specific content filtering used in these bridges will need to be reintegrated later as part of the bridge or as part of the WordPressBridge if they are considered generic enough filters, such as the already existing WordPressBridge #is', '', $text); - $text = strip_tags($text, '



  • '); - return $text; - } + const DESCRIPTION = "Returns the newest posts from LeJournalDuGeek (full text)."; + const PARAMETERS = array(); public function getCacheDuration(){ return 1800; // 30min diff --git a/bridges/NakedSecurityBridge.php b/bridges/NakedSecurityBridge.php index 43607725..ed2fb3d0 100644 --- a/bridges/NakedSecurityBridge.php +++ b/bridges/NakedSecurityBridge.php @@ -1,61 +1,11 @@ '; - $close_tag_length = strlen($close_tag); - if (strpos($tag_start, $open_tag) === 0) { - while (strpos($string, $tag_start) !== false) { - $max_recursion = 100; - $section_to_remove = null; - $section_start = strpos($string, $tag_start); - $search_offset = $section_start; - do { - $max_recursion--; - $section_end = strpos($string, $close_tag, $search_offset); - $search_offset = $section_end + $close_tag_length; - $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); - $open_tag_count = substr_count($section_to_remove, $open_tag); - $close_tag_count = substr_count($section_to_remove, $close_tag); - } while ($open_tag_count > $close_tag_count && $max_recursion > 0); - $string = str_replace($section_to_remove, '', $string); - } - } - return $string; - } - - - protected function parseItem($item){ - $item = parent::parseItem($item); - - $article_html = $this->getSimpleHTMLDOMCached($item['uri']); - if(!$article_html){ - $item['content'] = 'Could not request '.$this->getName().': '.$item['uri']; - return $item; - } - - $article_image = $article_html->find('img.wp-post-image', 0)->src; - $article_content = $article_html->find('div.entry-content', 0)->innertext; - $article_content = $this->StripRecursiveHTMLSection($article_content , 'div', '
    StripRecursiveHTMLSection($article_content , 'script', 'StripRecursiveHTMLSection($article_content , 'aside', '

    '.$item['content'].'

    '.$article_content; - - $item['content'] = $article_content; - - return $item; - - } - - public function collectData(){ - - $feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml'; - $this->collectExpandableDatas($feedUrl); - } + const PARAMETERS = array(); } diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index 34296b5c..39d73409 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -1,29 +1,15 @@ collectExpandableDatas(self::URI . 'feed/', 5); - } - - protected function parseItem($newsItem){ - $item = parent::parseItem($newsItem); - $item['content'] = $this->ExtractContent($item['uri']); - return $item; - } - - private function ExtractContent($url){ - $article_html = $this->getSimpleHTMLDOMCached('Could not request Numerama: '.$url); - $contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block - $contents = ''; // add post picture - return $contents . $article_html->find('article[class=post-content]', 0)->innertext; // extract the post - } - + const DESCRIPTION = 'Returns the newest posts from Numerama (full text)'; + const PARAMETERS = array(); public function getCacheDuration() { + return 1800; // 30min } } diff --git a/bridges/SiliconBridge.php b/bridges/SiliconBridge.php index f48bffa8..90b96417 100644 --- a/bridges/SiliconBridge.php +++ b/bridges/SiliconBridge.php @@ -1,39 +1,13 @@ getSimpleHTMLDOMCached($item['uri']); - if(!$article_html){ - $item['content'] .= '

    Could not request Silicon: '.$item['uri'].'

    '; - return $item; - } - - $article_content = '

    '.$article_html->find('div.entry-excerpt', 0)->plaintext.'

    ' - .$article_html->find('div.entry-content', 0)->innertext; - - //Remove useless scripts left in the page - while (strpos($article_content, '') + 9); - $article_content = str_replace($script_section, '', $article_content); - } - - $item['content'] = $article_content; - - return $item; - } - - public function collectData(){ - $feedUrl = self::URI.'feed'; - $this->collectExpandableDatas($feedUrl); - } + const PARAMETERS = array(); public function getCacheDuration() { return 1800; // 30 minutes diff --git a/bridges/ZatazBridge.php b/bridges/ZatazBridge.php index b805d604..d1e08197 100644 --- a/bridges/ZatazBridge.php +++ b/bridges/ZatazBridge.php @@ -1,41 +1,13 @@ getSimpleHTMLDOM(self::URI) or $this->returnServerError('Could not request ' . self::URI); - - $recent_posts = $html->find('#recent-posts-3', 0)->find('ul', 0)->find('li'); - foreach ($recent_posts as $article) { - if (count($this->items) < 5) { - $uri = $article->find('a', 0)->href; - $this->items[] = $this->getDetails($uri); - } - } - } - - private function getDetails($uri) { - $html = $this->getSimpleHTMLDOM($uri) or exit; - - $item = array(); - - $article = $html->find('.gdl-blog-full', 0); - $item['uri'] = $uri; - $item['title'] = $article->find('.blog-title', 0)->find('a', 0)->innertext; - $item['content'] = $article->find('.blog-content', 0)->innertext; - $item['timestamp'] = $this->getTimestampFromDate($article->find('.blog-date', 0)->find('a', 0)->href); - return $item; - } - - private function getTimestampFromDate($uri) { - preg_match('/\d{4}\/\d{2}\/\d{2}/', $uri, $matches); - $date = new \DateTime($matches[0]); - return $date->format('U'); - } + const PARAMETERS = array(); public function getCacheDuration() { return 7200; // 2h