diff --git a/bridges/PcGamerBridge.php b/bridges/PcGamerBridge.php index c4bcccf4..db5a9ded 100644 --- a/bridges/PcGamerBridge.php +++ b/bridges/PcGamerBridge.php @@ -2,43 +2,33 @@ class PcGamerBridge extends BridgeAbstract { const NAME = 'PC Gamer'; - const URI = 'https://www.pcgamer.com/archive/'; - const DESCRIPTION = 'PC Gamer Most Read Stories'; - const CACHE_TIMEOUT = 3600; + const URI = 'https://www.pcgamer.com/'; + const DESCRIPTION = 'PC Gamer is your source for exclusive reviews, demos, + updates and news on all your favorite PC gaming franchises.'; const MAINTAINER = 'IceWreck, mdemoss'; public function collectData() { $html = getSimpleHTMLDOMCached($this->getURI(), 300); - $stories = $html->find('ul.basic-list li.day-article'); - $i = 0; - // Find induvidual stories in the archive page + $stories = $html->find('a.article-link'); foreach ($stories as $element) { - if($i == 15) break; - $item['uri'] = $element->find('a', 0)->href; - // error_log(print_r($item['uri'], TRUE)); + $item = array(); + $item['uri'] = $element->href; $articleHtml = getSimpleHTMLDOMCached($item['uri']); - $item['title'] = $element->find('a', 0)->plaintext; + + // Relying on meta tags ought to be more reliable. + $item['title'] = $articleHtml->find('meta[name=parsely-title]', 0)->content; + $item['content'] = html_entity_decode($articleHtml->find('meta[name=description]', 0)->content); + $item['author'] = $articleHtml->find('meta[name=parsely-author]', 0)->content; + $item['enclosures'][] = $articleHtml->find('meta[name=parsely-image-url]', 0)->content; + /* I don't know why every article has two extra tags, but because + one matches another common tag, "guide," it needs to be removed. */ + $item['categories'] = array_diff( + explode(',', $articleHtml->find('meta[name=parsely-tags]', 0)->content), + array('van_buying_guide_progressive', 'serversidehawk') + ); $item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content); - $item['author'] = $articleHtml->find('span.by-author a', 0)->plaintext; - - // Get the article content - $articleContents = $articleHtml->find('#article-body', 0); - - /* - By default the img src has a link to an error image and then the actual image - is added in by JS. So we replace the error image with the actual full size image - whoose link is in one of the attributes of the img tag - */ - foreach($articleContents->find('img') as $img) { - $imgsrc = $img->getAttribute('data-original-mos'); - // error_log($imgsrc); - $img->src = $imgsrc; - } - - $item['content'] = $articleContents; $this->items[] = $item; - $i++; } } }