0
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-06-30 02:31:07 +00:00
rss-bridge/bridges/TikTokBridge.php
Apollo Nargang 5bd767b862
[TikTokBridge] Use oEmbed for video metadata (#4514)
* [TikTokBridge] Use oEmbed for video metadata

Fetches oEmbed-formatted metadata for videos through the TikTok API to
provide post titles, thumbnails, and authors. This hasn't yet been
tested, so it's possible it doesn't work.

* [TikTokBridge] Add back view count parsing

oops

* [TikTokBridge] Prepend www to the oEmbed API endpoint URL

The non-www URL resulted in a 301 redirect to the www URL, so this just
skips that redirect, improving performance a bit and hopefully helping
with the 400 errors.

* [TikTokBridge] Retry failed OEmbed requests

If an OEmbed request fails, retry a few times, waiting a bit in between
each retry. This should fix the problem for the most part, since I think
the problem was related to some sort of rate limit (it isn't mentioned
in the docs, but it seems to only happen when sending large quantities
of sequential requests).
2025-05-09 05:10:04 +02:00

126 lines
3.8 KiB
PHP

<?php
class TikTokBridge extends BridgeAbstract
{
const NAME = 'TikTok Bridge';
const URI = 'https://www.tiktok.com';
const DESCRIPTION = 'Returns posts';
const MAINTAINER = 'VerifiedJoseph';
const PARAMETERS = [
'By user' => [
'username' => [
'name' => 'Username',
'type' => 'text',
'required' => true,
'exampleValue' => '@tiktok',
]
]];
const TEST_DETECT_PARAMETERS = [
'https://www.tiktok.com/@tiktok' => [
'context' => 'By user', 'username' => '@tiktok'
]
];
const OEMBED_RETRY_COUNT = 20;
const OEMBED_RETRY_DELAY = 0.1;
const CACHE_TIMEOUT = 900; // 15 minutes
public function collectData()
{
$html = getSimpleHTMLDOMCached('https://www.tiktok.com/embed/' . $this->processUsername());
$author = $html->find('span[data-e2e=creator-profile-userInfo-TUXText]', 0)->plaintext ?? self::NAME;
$authorProfilePicture = $html->find('img[data-e2e=creator-profile-userInfo-Avatar]', 0)->src ?? '';
$videos = $html->find('div[data-e2e=common-videoList-VideoContainer]');
foreach ($videos as $video) {
$item = [];
// Omit query string (remove tracking parameters)
$a = $video->find('a', 0);
$href = $a->href;
$parsedUrl = parse_url($href);
$url = $parsedUrl['scheme'] . '://' . $parsedUrl['host'] . '/' . ltrim($parsedUrl['path'], '/');
// Sometimes the API fails to return data for a second, so try a few times
$attempts = 0;
do {
try {
// Fetch the video embed data from the OEmbed API
$videoEmbedResponse = getContents('https://www.tiktok.com/oembed?url=' . $url);
} catch (Exception $e) {
$attempts++;
sleep($OEMBED_RETRY_DELAY);
continue;
}
break;
} while($attempts < $OEMBED_RETRY_COUNT);
$videoEmbedData = json_decode($videoEmbedResponse);
$title = $videoEmbedData->title;
$image = $videoEmbedData->thumbnail_url;
$views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext;
$enclosures = [$image, $authorProfilePicture];
$item['uri'] = $url;
$item['title'] = $title;
$item['author'] = '@' . $videoEmbedData->author_unique_id;
$item['enclosures'] = $enclosures;
$item['content'] = <<<EOD
<p>$title</p>
<a href="{$url}"><img src="{$image}"/></a>
<p>{$views} views<p><br/>
EOD;
$this->items[] = $item;
}
}
public function getURI()
{
switch ($this->queriedContext) {
case 'By user':
return self::URI . '/' . $this->processUsername();
default:
return parent::getURI();
}
}
public function getName()
{
switch ($this->queriedContext) {
case 'By user':
return $this->processUsername() . ' - TikTok';
default:
return parent::getName();
}
}
private function processUsername()
{
$username = trim($this->getInput('username'));
if (preg_match('#^https?://www\.tiktok\.com/@(.*)$#', $username, $m)) {
return '@' . $m[1];
}
if (substr($username, 0, 1) !== '@') {
return '@' . $username;
}
return $username;
}
public function detectParameters($url)
{
if (preg_match('/tiktok\.com\/(@[\w]+)/', $url, $matches) > 0) {
return [
'context' => 'By user',
'username' => $matches[1]
];
}
return null;
}
}