From 56eb829a66a7e1f9d332bb0fa19be63e0dd90568 Mon Sep 17 00:00:00 2001 From: Tobias Alexander Franke Date: Sun, 29 Nov 2020 10:31:20 +0000 Subject: [PATCH] [EconomistBridge] Fixes for fetching new page structure (#1836) --- bridges/EconomistBridge.php | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php index 94121ac3..b58c6672 100644 --- a/bridges/EconomistBridge.php +++ b/bridges/EconomistBridge.php @@ -14,17 +14,28 @@ class EconomistBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI . '/latest/') or returnServerError('Could not fetch latest updates form The Economist.'); - foreach($html->find('article') as $element) { + foreach($html->find('div.teaser') as $element) { + + $a = $element->find('a.headline-link', 0); + $href = $a->href; + + if (substr($href, 0, 4) != 'http') + $href = self::URI . $a->href; - $a = $element->find('a', 0); - $href = self::URI . $a->href; $full = getSimpleHTMLDOMCached($href); $article = $full->find('article', 0); + $header = $article->find('span[itemprop="headline"]', 0); + $headerimg = $article->find('div[itemprop="image"]', 0)->find('img', 0); + $author = $article->find('p[itemprop="byline"]', 0); + $time = $article->find('time', 0); + $content = $article->find('div[itemprop="text"]', 0); + $section = array( $article->find('strong[itemprop="articleSection"]', 0)->plaintext ); - $header = $article->find('h1', 0); - $author = $article->find('span[itemprop="author"]', 0); - $time = $article->find('time[itemprop="dateCreated"]', 0); - $content = $article->find('div[itemprop="description"]', 0); + // Author + if ($author) + $author = substr($author->innertext, 3, strlen($author)); + else + $author = 'The Economist'; // Remove newsletter subscription box $newsletter = $content->find('div[class="newsletter-form__message"]', 0); @@ -40,19 +51,15 @@ class EconomistBridge extends BridgeAbstract { if ($nextprev) $nextprev->outertext = ''; - $section = array( $article->find('h3[itemprop="articleSection"]', 0)->plaintext ); - $item = array(); - $item['title'] = $header->find('span', 0)->innertext . ': ' - . $header->find('span', 1)->innertext; - + $item['title'] = $header->innertext; $item['uri'] = $href; $item['timestamp'] = strtotime($time->datetime); - $item['author'] = $author->innertext; + $item['author'] = $author; $item['categories'] = $section; $item['content'] = '' . $content->innertext; + . $headerimg->src . '">' . $content->innertext; $this->items[] = $item;