mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-16 20:00:55 +00:00
[ARDMediathekBridge] Switch to JSON-API (#2380)
* Switch ARDMediathekBridge to JSON-API The html screen scraping approach of ARDMediathekBridge did not work reliably. I could not find one show for which the item list was not empty using the html screen scraping approach. The proposed change uses the JSON-API of the WebApp. Although there is still room for improvement (feed title, better understanding of the API, more accurate mimic of the webapp's behavior, de-pagination …), it does work with this change. Indicate that now full URLs as well as just the ID are accepted.
This commit is contained in:
parent
368a198321
commit
f259fa7f9f
@ -4,14 +4,48 @@ class ARDMediathekBridge extends BridgeAbstract {
|
|||||||
const URI = 'https://www.ardmediathek.de';
|
const URI = 'https://www.ardmediathek.de';
|
||||||
const DESCRIPTION = 'Feed of any series in the ARD-Mediathek, specified by its path';
|
const DESCRIPTION = 'Feed of any series in the ARD-Mediathek, specified by its path';
|
||||||
const MAINTAINER = 'yue-dongchen';
|
const MAINTAINER = 'yue-dongchen';
|
||||||
|
/*
|
||||||
|
* Number of Items to be requested from ARDmediathek API
|
||||||
|
* 12 has been observed on the wild
|
||||||
|
* 29 is the highest successfully tested value
|
||||||
|
* More Items could be fetched via pagination
|
||||||
|
* The JSON-field pagination holds more information on that
|
||||||
|
* @const PAGESIZE number of requested items
|
||||||
|
*/
|
||||||
|
const PAGESIZE = 29;
|
||||||
|
/*
|
||||||
|
* The URL Prefix of the (Webapp-)API
|
||||||
|
* @const APIENDPOINT https-URL of the used endpoint
|
||||||
|
*/
|
||||||
|
const APIENDPOINT = 'https://api.ardmediathek.de/page-gateway/widgets/ard/asset/';
|
||||||
|
/*
|
||||||
|
* The URL prefix of the video link
|
||||||
|
* URLs from the webapp include a slug containing titles of show, episode, and tv station.
|
||||||
|
* It seems to work without that.
|
||||||
|
* @const VIDEOLINKPREFIX https-URL prefix of video links
|
||||||
|
*/
|
||||||
|
const VIDEOLINKPREFIX = 'https://www.ardmediathek.de/video/';
|
||||||
|
/*
|
||||||
|
* The requested width of the preview image
|
||||||
|
* 432 has been observed on the wild
|
||||||
|
* The webapp seems to also compute and add the height value
|
||||||
|
* It seems to works without that.
|
||||||
|
* @const IMAGEWIDTH width in px of the preview image
|
||||||
|
*/
|
||||||
|
const IMAGEWIDTH = 432;
|
||||||
|
/*
|
||||||
|
* Placeholder that will be replace by IMAGEWIDTH in the preview image URL
|
||||||
|
* @const IMAGEWIDTHPLACEHOLDER
|
||||||
|
*/
|
||||||
|
const IMAGEWIDTHPLACEHOLDER = '{width}';
|
||||||
|
|
||||||
const PARAMETERS = array(
|
const PARAMETERS = array(
|
||||||
array(
|
array(
|
||||||
'path' => array(
|
'path' => array(
|
||||||
'name' => 'Path',
|
'name' => 'Show Link or ID',
|
||||||
'required' => true,
|
'required' => true,
|
||||||
'title' => 'Enter without trailing slash',
|
'title' => 'Link to the show page or just its alphanumeric suffix',
|
||||||
'defaultValue' => '45-min/Y3JpZDovL25kci5kZS8xMzkx'
|
'defaultValue' => 'https://www.ardmediathek.de/sendung/45-min/Y3JpZDovL25kci5kZS8xMzkx/'
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
@ -19,17 +53,38 @@ class ARDMediathekBridge extends BridgeAbstract {
|
|||||||
public function collectData() {
|
public function collectData() {
|
||||||
date_default_timezone_set('Europe/Berlin');
|
date_default_timezone_set('Europe/Berlin');
|
||||||
|
|
||||||
$url = 'https://www.ardmediathek.de/sendung/' . $this->getInput('path') . '/';
|
$pathComponents = explode('/', $this->getInput('path'));
|
||||||
$html = getSimpleHTMLDOM($url);
|
if (empty($pathComponents)) {
|
||||||
$html = defaultLinkTo($html, $url);
|
returnClientError('Path may not be empty');
|
||||||
|
}
|
||||||
|
if (count($pathComponents) < 2) {
|
||||||
|
$showID = $pathComponents[0];
|
||||||
|
} else {
|
||||||
|
$lastKey = count($pathComponents) - 1;
|
||||||
|
$showID = $pathComponents[$lastKey];
|
||||||
|
if (strlen($showID) === 0) {
|
||||||
|
$showID = $pathComponents[$lastKey - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
foreach($html->find('a.Root-sc-1ytw7qu-0') as $video) {
|
$url = SELF::APIENDPOINT . $showID . '/?pageSize=' . SELF::PAGESIZE;
|
||||||
|
$rawJSON = getContents($url);
|
||||||
|
$processedJSON = json_decode($rawJSON);
|
||||||
|
|
||||||
|
foreach($processedJSON->teasers as $video) {
|
||||||
$item = array();
|
$item = array();
|
||||||
$item['uri'] = $video->href;
|
// there is also ->links->self->id, ->links->self->urlId, ->links->target->id, ->links->target->urlId
|
||||||
$item['title'] = $video->find('h3', 0)->plaintext;
|
$item['uri'] = SELF::VIDEOLINKPREFIX . $video->id . '/';
|
||||||
$item['content'] = '<img src="' . $video->find('img', 0)->src . '" />';
|
// there is also ->mediumTitle and ->shortTitle
|
||||||
$item['timestamp'] = strtotime(mb_substr($video->find('div.Line-epbftj-1', 0)->plaintext, 0, 10));
|
$item['title'] = $video->longTitle;
|
||||||
|
// in the test, aspect16x9 was the only child of images, not sure whether that is always true
|
||||||
|
$item['enclosures'] = array(
|
||||||
|
str_replace(SELF::IMAGEWIDTHPLACEHOLDER, SELF::IMAGEWIDTH, $video->images->aspect16x9->src)
|
||||||
|
);
|
||||||
|
$item['content'] = '<img src="' . $item['enclosures'][0] . '" /><p>';
|
||||||
|
$item['timestamp'] = $video->broadcastedOn;
|
||||||
|
$item['uid'] = $video->id;
|
||||||
|
$item['author'] = $video->publicationService->name;
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user