mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-05 17:19:37 +00:00
parent
2c94791bcd
commit
19fc2dc100
@ -1,6 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
class GatesNotesBridge extends FeedExpander
|
class GatesNotesBridge extends BridgeAbstract
|
||||||
{
|
{
|
||||||
const MAINTAINER = 'corenting';
|
const MAINTAINER = 'corenting';
|
||||||
const NAME = 'Gates Notes';
|
const NAME = 'Gates Notes';
|
||||||
@ -8,14 +8,51 @@ class GatesNotesBridge extends FeedExpander
|
|||||||
const DESCRIPTION = 'Returns the newest articles.';
|
const DESCRIPTION = 'Returns the newest articles.';
|
||||||
const CACHE_TIMEOUT = 21600; // 6h
|
const CACHE_TIMEOUT = 21600; // 6h
|
||||||
|
|
||||||
protected function parseItem($item)
|
public function collectData()
|
||||||
{
|
{
|
||||||
$item = parent::parseItem($item);
|
$params = [
|
||||||
|
'validYearsString' => 'all',
|
||||||
|
'setNumber' => '0',
|
||||||
|
'sortByVideo' => 'all',
|
||||||
|
'sortByTopic' => 'all'
|
||||||
|
];
|
||||||
|
$api_endpoint = '/api/TGNWebAPI/Get_Filtered_Article_Set?';
|
||||||
|
$apiUrl = self::URI . $api_endpoint . http_build_query($params);
|
||||||
|
|
||||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
$rawContent = getContents($apiUrl);
|
||||||
|
$cleanedContent = str_replace('\r\n', '', substr($rawContent, 1, -1));
|
||||||
|
$cleanedContent = str_replace('\"', '"', $cleanedContent);
|
||||||
|
|
||||||
|
// The content is actually a json between quotes with \r\n inserted
|
||||||
|
$json = json_decode($cleanedContent);
|
||||||
|
|
||||||
|
foreach ($json as $article) {
|
||||||
|
$item = [];
|
||||||
|
|
||||||
|
$articleUri = self::URI . '/' . $article->{'_system_'}->name;
|
||||||
|
|
||||||
|
$item['uri'] = $articleUri;
|
||||||
|
$item['title'] = $article->headline;
|
||||||
|
$item['content'] = self::getItemContent($articleUri);
|
||||||
|
$item['timestamp'] = strtotime($article->date);
|
||||||
|
|
||||||
|
$this->items[] = $item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function getItemContent($articleUri)
|
||||||
|
{
|
||||||
|
// We need to change the headers as the normal desktop website
|
||||||
|
// use canvas-based image carousels for some pictures
|
||||||
|
$headers = [
|
||||||
|
'User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
||||||
|
];
|
||||||
|
$article_html = getSimpleHTMLDOMCached($articleUri, 86400, $headers);
|
||||||
|
|
||||||
|
$content = '';
|
||||||
if (!$article_html) {
|
if (!$article_html) {
|
||||||
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
$content .= '<p><em>Could not request ' . $this->getName() . ': ' . $articleUri . '</em></p>';
|
||||||
return $item;
|
return $content;
|
||||||
}
|
}
|
||||||
$article_html = defaultLinkTo($article_html, $this->getURI());
|
$article_html = defaultLinkTo($article_html, $this->getURI());
|
||||||
|
|
||||||
@ -23,6 +60,20 @@ class GatesNotesBridge extends FeedExpander
|
|||||||
$hero_image = '<img src=' . $article_html->find('img.article_top_DMT_Image', 0)->getAttribute('data-src') . '>';
|
$hero_image = '<img src=' . $article_html->find('img.article_top_DMT_Image', 0)->getAttribute('data-src') . '>';
|
||||||
|
|
||||||
$article_body = $article_html->find('div.TGN_Article_ReadTimeSection', 0);
|
$article_body = $article_html->find('div.TGN_Article_ReadTimeSection', 0);
|
||||||
|
|
||||||
|
// Remove the menu bar on some articles (PDF download etc.)
|
||||||
|
foreach ($article_body->find('.TGN_MenuHolder') as $found) {
|
||||||
|
$found->remove();
|
||||||
|
}
|
||||||
|
|
||||||
|
// For the carousels pictures, we still to remove the lazy-loading and force the real picture
|
||||||
|
foreach ($article_body->find('canvas') as $found) {
|
||||||
|
$found->remove();
|
||||||
|
}
|
||||||
|
foreach ($article_body->find('.TGN_PE_C_Img') as $found) {
|
||||||
|
$found->setAttribute('src', $found->getAttribute('data-src'));
|
||||||
|
}
|
||||||
|
|
||||||
// Convert iframe of Youtube videos to link
|
// Convert iframe of Youtube videos to link
|
||||||
foreach ($article_body->find('iframe') as $found) {
|
foreach ($article_body->find('iframe') as $found) {
|
||||||
$iframeUrl = $found->getAttribute('src');
|
$iframeUrl = $found->getAttribute('src');
|
||||||
@ -32,6 +83,7 @@ class GatesNotesBridge extends FeedExpander
|
|||||||
$found->outertext = '<p><a href="' . $iframeUrl . '">' . $text . '</a></p>';
|
$found->outertext = '<p><a href="' . $iframeUrl . '">' . $text . '</a></p>';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove <link> CSS ressources
|
// Remove <link> CSS ressources
|
||||||
foreach ($article_body->find('link') as $found) {
|
foreach ($article_body->find('link') as $found) {
|
||||||
$linkedRessourceUrl = $found->getAttribute('href');
|
$linkedRessourceUrl = $found->getAttribute('href');
|
||||||
@ -42,14 +94,8 @@ class GatesNotesBridge extends FeedExpander
|
|||||||
}
|
}
|
||||||
$article_body = sanitize($article_body->innertext);
|
$article_body = sanitize($article_body->innertext);
|
||||||
|
|
||||||
$item['content'] = $top_description . $hero_image . $article_body;
|
$content = $top_description . $hero_image . $article_body;
|
||||||
|
|
||||||
return $item;
|
return $content;
|
||||||
}
|
|
||||||
|
|
||||||
public function collectData()
|
|
||||||
{
|
|
||||||
$feed = static::URI . '/rss';
|
|
||||||
$this->collectExpandableDatas($feed);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user