mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-05 00:59:35 +00:00
[ReutersBridge] Updated 'Top News' feed, some fix (#2488)
This commit is contained in:
parent
626cc9119a
commit
e86ce338a2
@ -131,6 +131,7 @@ class ReutersBridge extends BridgeAbstract
|
||||
'sports' => '/lifestyle/sports',
|
||||
'life' => '/lifestyle',
|
||||
'science' => '/lifestyle/science',
|
||||
'home/topnews' => '/home',
|
||||
);
|
||||
|
||||
const OLD_WIRE_SECTION = array(
|
||||
@ -211,11 +212,12 @@ class ReutersBridge extends BridgeAbstract
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $endpoint - Provide section's endpoint to Reuters API.
|
||||
* @param string $endpoint - A endpoint is provided could be article URI or ID.
|
||||
* @param string $fetch_type - Provide what kind of fetch do you want? Article or Section.
|
||||
* @param boolean $is_article_uid {true|false} - A boolean flag to determined if using UID instead of url to fetch.
|
||||
* @return string A completed API URL to fetch data
|
||||
*/
|
||||
private function getAPIURL($endpoint, $fetch_type) {
|
||||
private function getAPIURL($endpoint, $fetch_type, $is_article_uid = false) {
|
||||
$base_url = self::URI . '/pf/api/v3/content/fetch/';
|
||||
$wire_url = 'https://wireapi.reuters.com/v8';
|
||||
switch($fetch_type) {
|
||||
@ -223,10 +225,23 @@ class ReutersBridge extends BridgeAbstract
|
||||
if($this->useWireAPI) {
|
||||
return $wire_url . $endpoint;
|
||||
}
|
||||
$query = array(
|
||||
'website_url' => $endpoint,
|
||||
'website' => 'reuters'
|
||||
|
||||
$base_query = array(
|
||||
'website' => 'reuters',
|
||||
);
|
||||
$query = array();
|
||||
|
||||
if ($is_article_uid) {
|
||||
$query = array(
|
||||
'id' => $endpoint
|
||||
);
|
||||
} else {
|
||||
$query = array(
|
||||
'website_url' => $endpoint,
|
||||
);
|
||||
}
|
||||
|
||||
$query = array_merge($base_query, $query);
|
||||
$json_query = json_encode($query);
|
||||
return $base_url . 'article-by-id-or-url-v1?query=' . $json_query;
|
||||
break;
|
||||
@ -241,11 +256,17 @@ class ReutersBridge extends BridgeAbstract
|
||||
return $wire_url . $feed_uri;
|
||||
}
|
||||
$query = array(
|
||||
'fetch_type' => 'section',
|
||||
'section_id' => $endpoint,
|
||||
'size' => 30,
|
||||
'website' => 'reuters'
|
||||
);
|
||||
|
||||
if ($endpoint != '/home') {
|
||||
$query = array_merge($query, array(
|
||||
'fetch_type' => 'section',
|
||||
));
|
||||
}
|
||||
|
||||
$json_query = json_encode($query);
|
||||
return $base_url . 'articles-by-section-alias-or-id-v1?query=' . $json_query;
|
||||
break;
|
||||
@ -253,11 +274,27 @@ class ReutersBridge extends BridgeAbstract
|
||||
returnServerError('unsupported endpoint');
|
||||
}
|
||||
|
||||
private function getArticle($feed_uri)
|
||||
private function addStories($title, $content, $timestamp, $author, $url, $category) {
|
||||
$item = array();
|
||||
$item['categories'] = $category;
|
||||
$item['author'] = $author;
|
||||
$item['content'] = $content;
|
||||
$item['title'] = $title;
|
||||
$item['timestamp'] = $timestamp;
|
||||
$item['uri'] = $url;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
private function getArticle($feed_uri, $is_article_uid = false)
|
||||
{
|
||||
// This will make another request to API to get full detail of article and author's name.
|
||||
$url = $this->getAPIURL($feed_uri, 'article');
|
||||
$url = $this->getAPIURL($feed_uri, 'article', $is_article_uid);
|
||||
$rawData = $this->getJson($url);
|
||||
|
||||
if(json_last_error() != JSON_ERROR_NONE) { // Checking whether a valid JSON or not
|
||||
return $this->handleRedirectedArticle($url);
|
||||
}
|
||||
|
||||
$article_content = '';
|
||||
$authorlist = '';
|
||||
$category = array();
|
||||
@ -299,6 +336,40 @@ class ReutersBridge extends BridgeAbstract
|
||||
return $content_detail;
|
||||
}
|
||||
|
||||
private function handleRedirectedArticle($url) {
|
||||
$html = getSimpleHTMLDOMCached($url, 86400); // Duration 24h
|
||||
|
||||
$description = '';
|
||||
$author = '';
|
||||
$images = '';
|
||||
$meta_items = $html->find('meta');
|
||||
foreach($meta_items as $meta) {
|
||||
switch ($meta->name) {
|
||||
case 'description':
|
||||
$description = $meta->content;
|
||||
break;
|
||||
case 'author':
|
||||
case 'twitter:creator':
|
||||
$author = $meta->content;
|
||||
break;
|
||||
case 'twitter:image:src':
|
||||
case 'twitter:image':
|
||||
$url = $meta->content;
|
||||
$images = "<img src=$url" . '>';
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return array(
|
||||
'content' => $description,
|
||||
'author' => $author,
|
||||
'category' => '',
|
||||
'images' => $images,
|
||||
'published_at' => '',
|
||||
'status' => 'redirected'
|
||||
);
|
||||
}
|
||||
|
||||
private function handleImage($images) {
|
||||
$img_placeholder = '';
|
||||
|
||||
@ -444,6 +515,27 @@ EOD;
|
||||
return $description;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $stories
|
||||
*/
|
||||
private function addRelatedStories($stories) {
|
||||
foreach($stories as $story) {
|
||||
$story_data = $this->getArticle($story['url']);
|
||||
$title = $story['caption'];
|
||||
$url = self::URI . $story['url'];
|
||||
if(isset($story_data['status']) && $story_data['status'] != 'redirected') {
|
||||
$article_body = defaultLinkTo($story_data['content'], $this->getURI());
|
||||
} else {
|
||||
$article_body = $story_data['content'];
|
||||
}
|
||||
$content = $article_body . $story_data['images'];
|
||||
$timestamp = $story_data['published_at'];
|
||||
$category = $story_data['category'];
|
||||
$author = $story_data['author'];
|
||||
$this->addStories($title, $content, $timestamp, $author, $url, $category);
|
||||
}
|
||||
}
|
||||
|
||||
public function getName() {
|
||||
return $this->feedName;
|
||||
}
|
||||
@ -500,11 +592,14 @@ EOD;
|
||||
$title = $story['title'];
|
||||
$article_uri = $story['canonical_url'];
|
||||
$source_type = $story['source']['name'];
|
||||
if (isset($story['related_stories'])) {
|
||||
$this->addRelatedStories($story['related_stories']);
|
||||
}
|
||||
}
|
||||
|
||||
// Some article cause unexpected behaviour like redirect to another site not API.
|
||||
// Attempt to check article source type to avoid this.
|
||||
if($source_type == 'composer') { // Only Reuters PF api have this, Wire don't.
|
||||
if(!$this->useWireAPI && $source_type != 'Package') { // Only Reuters PF api have this, Wire don't.
|
||||
$author = $this->handleAuthorName($story['authors']);
|
||||
$timestamp = $story['published_time'];
|
||||
$image_placeholder = '';
|
||||
@ -512,6 +607,7 @@ EOD;
|
||||
$image_placeholder = $this->handleImage(array($story['thumbnail']));
|
||||
}
|
||||
$content = $story['description'] . $image_placeholder;
|
||||
$category = array($story['primary_section']['name']);
|
||||
} else {
|
||||
$content_detail = $this->getArticle($article_uri);
|
||||
$description = $content_detail['content'];
|
||||
@ -524,13 +620,8 @@ EOD;
|
||||
$timestamp = $content_detail['published_at'];
|
||||
}
|
||||
|
||||
$item['categories'] = $category;
|
||||
$item['author'] = $author;
|
||||
$item['content'] = $content;
|
||||
$item['title'] = $title;
|
||||
$item['timestamp'] = $timestamp;
|
||||
$item['uri'] = $url;
|
||||
$this->items[] = $item;
|
||||
$this->addStories($title, $content, $timestamp, $author, $url, $category);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user