From 8f9314947bbd8ab22cfcdfa7085fb77f97e088ed Mon Sep 17 00:00:00 2001
From: Joseph
Date: Tue, 5 Apr 2022 12:03:25 +0000
Subject: [PATCH] [AssociatedPressNewsBridge] Add bridge (#1475)
---
bridges/AssociatedPressNewsBridge.php | 270 ++++++++++++++++++++++++++
1 file changed, 270 insertions(+)
create mode 100644 bridges/AssociatedPressNewsBridge.php
diff --git a/bridges/AssociatedPressNewsBridge.php b/bridges/AssociatedPressNewsBridge.php
new file mode 100644
index 00000000..edd6444b
--- /dev/null
+++ b/bridges/AssociatedPressNewsBridge.php
@@ -0,0 +1,270 @@
+ array(
+ 'topic' => array(
+ 'name' => 'Topic',
+ 'type' => 'list',
+ 'values' => array(
+ 'AP Top News' => 'apf-topnews',
+ 'Sports' => 'apf-sports',
+ 'Entertainment' => 'apf-entertainment',
+ 'Oddities' => 'apf-oddities',
+ 'Travel' => 'apf-Travel',
+ 'Technology' => 'apf-technology',
+ 'Lifestyle' => 'apf-lifestyle',
+ 'Business' => 'apf-business',
+ 'U.S. News' => 'apf-usnews',
+ 'Health' => 'apf-Health',
+ 'Science' => 'apf-science',
+ 'World News' => 'apf-WorldNews',
+ 'Politics' => 'apf-politics',
+ 'Religion' => 'apf-religion',
+ 'Photo Galleries' => 'PhotoGalleries',
+ 'Fact Checks' => 'APFactCheck',
+ 'Videos' => 'apf-videos',
+ ),
+ 'defaultValue' => 'apf-topnews',
+ ),
+ ),
+ 'Custom Topic' => array(
+ 'topic' => array(
+ 'name' => 'Topic',
+ 'type' => 'text',
+ 'required' => true,
+ 'exampleValue' => 'europe'
+ ),
+ )
+ );
+
+ const CACHE_TIMEOUT = 900; // 15 mins
+
+ private $detectParamRegex = '/^https?:\/\/(?:www\.)?apnews\.com\/(?:[tag|hub]+\/)?([\w-]+)$/';
+ private $tagEndpoint = 'https://afs-prod.appspot.com/api/v2/feed/tag?tags=';
+ private $feedName = '';
+
+ public function detectParameters($url) {
+ $params = array();
+
+ if(preg_match($this->detectParamRegex, $url, $matches) > 0) {
+ $params['topic'] = $matches[1];
+ $params['context'] = 'Custom Topic';
+ return $params;
+ }
+
+ return null;
+ }
+
+ public function collectData() {
+ switch($this->getInput('topic')) {
+ case 'Podcasts':
+ returnClientError('Podcasts topic feed is not supported');
+ break;
+ case 'PressReleases':
+ returnClientError('PressReleases topic feed is not supported');
+ break;
+ default:
+ $this->collectCardData();
+ }
+ }
+
+ public function getURI() {
+ if (!is_null($this->getInput('topic'))) {
+ return self::URI . $this->getInput('topic');
+ }
+
+ return parent::getURI();
+ }
+
+ public function getName() {
+ if (!empty($this->feedName)) {
+ return $this->feedName . ' - Associated Press';
+ }
+
+ return parent::getName();
+ }
+
+ private function getTagURI() {
+ if (!is_null($this->getInput('topic'))) {
+ return $this->tagEndpoint . $this->getInput('topic');
+ }
+
+ return parent::getURI();
+ }
+
+ private function collectCardData() {
+ $json = getContents($this->getTagURI())
+ or returnServerError('Could not request: ' . $this->getTagURI());
+
+ $tagContents = json_decode($json, true);
+
+ if (empty($tagContents['tagObjs'])) {
+ returnClientError('Topic not found: ' . $this->getInput('topic'));
+ }
+
+ $this->feedName = $tagContents['tagObjs'][0]['name'];
+
+ foreach ($tagContents['cards'] as $card) {
+ $item = array();
+
+ // skip hub peeks & Notifications
+ if ($card['cardType'] == 'Hub Peek' || $card['cardType'] == 'Notification') {
+ continue;
+ }
+
+ $storyContent = $card['contents'][0];
+
+ switch($storyContent['contentType']) {
+ case 'web': // Skip link only content
+ continue 2;
+
+ case 'video':
+ $html = $this->processVideo($storyContent);
+
+ $item['enclosures'][] = 'https://storage.googleapis.com/afs-prod/media/'
+ . $storyContent['media'][0]['id'] . '/800.jpeg';
+ break;
+ default:
+ if (empty($storyContent['storyHTML'])) { // Skip if no storyHTML
+ continue 2;
+ }
+
+ $html = defaultLinkTo($storyContent['storyHTML'], self::URI);
+ $html = str_get_html($html);
+
+ $this->processMediaPlaceholders($html, $storyContent['id']);
+ $this->processHubLinks($html, $storyContent);
+ $this->processIframes($html);
+
+ if (!is_null($storyContent['leadPhotoId'])) {
+ $item['enclosures'][] = 'https://storage.googleapis.com/afs-prod/media/'
+ . $storyContent['leadPhotoId'] . '/800.jpeg';
+ }
+ }
+
+ $item['title'] = $card['contents'][0]['headline'];
+ $item['uri'] = self::URI . $card['shortId'];
+
+ if ($card['contents'][0]['localLinkUrl']) {
+ $item['uri'] = $card['contents'][0]['localLinkUrl'];
+ }
+
+ $item['timestamp'] = $storyContent['published'];
+
+ if (is_null($storyContent['bylines']) === false) {
+ // Remove 'By' from the bylines
+ if (substr($storyContent['bylines'], 0, 2) == 'By') {
+ $item['author'] = ltrim($storyContent['bylines'], 'By ');
+ } else {
+ $item['author'] = $storyContent['bylines'];
+ }
+ }
+
+ $item['content'] = $html;
+
+ foreach ($storyContent['tagObjs'] as $tag) {
+ $item['categories'][] = $tag['name'];
+ }
+
+ $this->items[] = $item;
+
+ if (count($this->items) >= 15) {
+ break;
+ }
+ }
+ }
+
+ private function processMediaPlaceholders($html, $id) {
+
+ if ($html->find('div.media-placeholder', 0)) {
+ // Fetch page content
+ $json = getContents('https://afs-prod.appspot.com/api/v2/content/' . $id);
+ $storyContent = json_decode($json, true);
+
+ foreach ($html->find('div.media-placeholder') as $div) {
+ $key = array_search($div->id, $storyContent['mediumIds']);
+
+ if (!isset($storyContent['media'][$key])) {
+ continue;
+ }
+
+ $media = $storyContent['media'][$key];
+
+ if ($media['type'] === 'Photo') {
+ $mediaUrl = $media['gcsBaseUrl'] . $media['imageRenderedSizes'][0] . $media['imageFileExtension'];
+ $mediaCaption = $media['caption'];
+
+ $div->outertext = <<
{$mediaCaption}
+ EOD;
+ }
+
+ if ($media['type'] === 'YouTube') {
+ $div->outertext = <<
+
+ EOD;
+ }
+ }
+ }
+ }
+
+ /*
+ Create full coverage links (HubLinks)
+ */
+ private function processHubLinks($html, $storyContent) {
+
+ if (!empty($storyContent['richEmbeds'])) {
+ foreach ($storyContent['richEmbeds'] as $embed) {
+
+ if ($embed['type'] === 'Hub Link') {
+ $url = self::URI . $embed['tag']['id'];
+ $div = $html->find('div[id=' . $embed['id'] . ']', 0);
+
+ if ($div) {
+ $div->outertext = <<{$embed['calloutText']} {$embed['displayName']}
+EOD;
+ }
+ }
+ }
+ }
+ }
+
+ private function processVideo($storyContent) {
+ $video = $storyContent['media'][0];
+
+ if ($video['type'] === 'YouTube') {
+ $url = 'https://www.youtube.com/embed/' . $video['externalId'];
+ $html = <<
+EOD;
+ } else {
+ $html = <<
+
+
+EOD;
+ }
+
+ return $html;
+ }
+
+ // Remove datawrapper.dwcdn.net iframes and related javaScript
+ private function processIframes($html) {
+
+ foreach ($html->find('iframe') as $index => $iframe) {
+ if (preg_match('/datawrapper\.dwcdn\.net/', $iframe->src)) {
+ $iframe->outertext = '';
+
+ if ($html->find('script', $index)) {
+ $html->find('script', $index)->outertext = '';
+ }
+ }
+ }
+ }
+}