diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php
index b58c6672..d426946b 100644
--- a/bridges/EconomistBridge.php
+++ b/bridges/EconomistBridge.php
@@ -1,70 +1,125 @@
array(
+ 'limit' => array(
+ 'name' => 'Feed Item Limit',
+ 'required' => true,
+ 'type' => 'number',
+ 'defaultValue' => 10,
+ 'title' => 'Maximum number of returned feed items. Maximum 30, default 10'
+ )
+ ),
+ 'Topics' => array(
+ 'topic' => array(
+ 'name' => 'Topics',
+ 'type' => 'list',
+ 'title' => 'Select a Topic',
+ 'defaultValue' => 'latest',
+ 'values' => array(
+ 'Latest' => 'latest',
+ 'The world this week' => 'the-world-this-week',
+ 'Letters' => 'letters',
+ 'Leaders' => 'leaders',
+ 'Briefings' => 'briefing',
+ 'Special reports' => 'special-report',
+ 'Britain' => 'britain',
+ 'Europe' => 'europe',
+ 'United States' => 'united-states',
+ 'The Americas' => 'the-americas',
+ 'Middle East and Africa' => 'middle-east-and-africa',
+ 'Asia' => 'asia',
+ 'China' => 'china',
+ 'International' => 'international',
+ 'Business' => 'business',
+ 'Finance and economics' => 'finance-and-economics',
+ 'Science and technology' => 'science-and-technology',
+ 'Books and arts' => 'books-and-arts',
+ 'Obituaries' => 'obituary',
+ 'Graphic detail' => 'graphic-detail',
+ 'Indicators' => 'economic-and-financial-indicators',
+ )
+ )
+ ),
+ 'Blogs' => array(
+ 'blog' => array(
+ 'name' => 'Blogs',
+ 'type' => 'list',
+ 'title' => 'Select a Blog',
+ 'values' => array(
+ 'Bagehots notebook' => 'bagehots-notebook',
+ 'Bartleby' => 'bartleby',
+ 'Buttonwoods notebook' => 'buttonwoods-notebook',
+ 'Charlemagnes notebook' => 'charlemagnes-notebook',
+ 'Democracy in America' => 'democracy-in-america',
+ 'Erasmus' => 'erasmus',
+ 'Free exchange' => 'free-exchange',
+ 'Game theory' => 'game-theory',
+ 'Gulliver' => 'gulliver',
+ 'Kaffeeklatsch' => 'kaffeeklatsch',
+ 'Prospero' => 'prospero',
+ 'The Economist Explains' => 'the-economist-explains',
+ )
+ )
+ )
+ );
+
+ public function collectData(){
+ // get if topics or blogs were selected and store the selected category
+ switch ($this->queriedContext) {
+ case 'Topics':
+ $category = $this->getInput('topic');
+ break;
+ case 'Blogs':
+ $category = $this->getInput('blog');
+ break;
+ default:
+ $category = 'latest';
+ }
+ // limit the returned articles to 30 at max
+ if ((int)$this->getInput('limit') <= 30) {
+ $limit = (int)$this->getInput('limit');
+ } else {
+ $limit = 30;
+ }
+
+ $this->collectExpandableDatas('https://www.economist.com/' . $category . '/rss.xml', $limit);
}
- public function collectData() {
- $html = getSimpleHTMLDOM(self::URI . '/latest/')
- or returnServerError('Could not fetch latest updates form The Economist.');
+ protected function parseItem($feedItem){
+ $item = parent::parseItem($feedItem);
- foreach($html->find('div.teaser') as $element) {
+ $article = getSimpleHTMLDOM($item['uri'])
+ or returnServerError('Could not request Site: ' . $item['title']);
+ // before the article can be added, it needs to be cleaned up, thus, the extra function
+ $item['content'] = $this->cleanContent($article);
+ // only the article lead image is retained
+ $item['enclosures'][] = $article->find('div.article__lead-image', 0)->find('img', 0)->getAttribute('src');
- $a = $element->find('a.headline-link', 0);
- $href = $a->href;
+ return $item;
+ }
- if (substr($href, 0, 4) != 'http')
- $href = self::URI . $a->href;
-
- $full = getSimpleHTMLDOMCached($href);
- $article = $full->find('article', 0);
- $header = $article->find('span[itemprop="headline"]', 0);
- $headerimg = $article->find('div[itemprop="image"]', 0)->find('img', 0);
- $author = $article->find('p[itemprop="byline"]', 0);
- $time = $article->find('time', 0);
- $content = $article->find('div[itemprop="text"]', 0);
- $section = array( $article->find('strong[itemprop="articleSection"]', 0)->plaintext );
-
- // Author
- if ($author)
- $author = substr($author->innertext, 3, strlen($author));
- else
- $author = 'The Economist';
-
- // Remove newsletter subscription box
- $newsletter = $content->find('div[class="newsletter-form__message"]', 0);
- if ($newsletter)
- $newsletter->outertext = '';
-
- $newsletterForm = $content->find('form', 0);
- if ($newsletterForm)
- $newsletterForm->outertext = '';
-
- // Remove next and previous article URLs at the bottom
- $nextprev = $content->find('div[class="blog-post__next-previous-wrapper"]', 0);
- if ($nextprev)
- $nextprev->outertext = '';
-
- $item = array();
- $item['title'] = $header->innertext;
- $item['uri'] = $href;
- $item['timestamp'] = strtotime($time->datetime);
- $item['author'] = $author;
- $item['categories'] = $section;
-
- $item['content'] = '' . $content->innertext;
-
- $this->items[] = $item;
-
- if (count($this->items) >= 10)
- break;
+ private function cleanContent($article){
+ // the actual article is in this div
+ $content = $article->find('div.layout-article-body', 0)->innertext;
+ // clean the article content. Remove all div's since the text is in paragraph elements
+ foreach (array(
+ '