mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-04 16:49:35 +00:00
Merge branch 'RSS-Bridge:master' into master
This commit is contained in:
commit
f7ddb79e0b
@ -1,147 +0,0 @@
|
||||
<?php
|
||||
|
||||
class AnthropicBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'sqrtminusone';
|
||||
const NAME = 'Anthropic Research Bridge';
|
||||
const URI = 'https://www.anthropic.com';
|
||||
|
||||
const CACHE_TIMEOUT = 3600; // 1 hour
|
||||
const DESCRIPTION = 'Returns research publications from Anthropic';
|
||||
|
||||
const PARAMETERS = [
|
||||
'' => [
|
||||
'limit' => [
|
||||
'name' => 'Limit',
|
||||
'type' => 'number',
|
||||
'required' => true,
|
||||
'defaultValue' => 10
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// Anthropic sometimes returns 500 for no reason. The contents are still there.
|
||||
$html = $this->getHTMLIgnoreError(self::URI . '/research');
|
||||
$limit = $this->getInput('limit');
|
||||
|
||||
$page_data = $this->extractPageData($html);
|
||||
$pages = $this->parsePageData($page_data);
|
||||
for ($i = 0; $i < min(count($pages), $limit); $i++) {
|
||||
$page = $pages[$i];
|
||||
$page['content'] = $this->parsePage($page['uri']);
|
||||
$this->items[] = $page;
|
||||
}
|
||||
}
|
||||
|
||||
private function getHTMLIgnoreError($url, $ttl = null)
|
||||
{
|
||||
if ($ttl != null) {
|
||||
$cacheKey = 'pages_' . $url;
|
||||
$content = $this->cache->get($cacheKey);
|
||||
if ($content) {
|
||||
return str_get_html($content);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
$content = getContents($url);
|
||||
} catch (HttpException $e) {
|
||||
$content = $e->response->getBody();
|
||||
}
|
||||
if ($ttl != null) {
|
||||
$this->cache->set($cacheKey, $content, $ttl);
|
||||
}
|
||||
return str_get_html($content);
|
||||
}
|
||||
|
||||
private function extractPageData($html)
|
||||
{
|
||||
foreach ($html->find('script') as $script) {
|
||||
$js_code = $script->innertext;
|
||||
if (!str_starts_with($js_code, 'self.__next_f.push(')) {
|
||||
continue;
|
||||
}
|
||||
$push_data = (string)json_decode(mb_substr($js_code, 22, mb_strlen($js_code) - 2 - 22));
|
||||
$square_bracket = mb_strpos($push_data, '[');
|
||||
$push_array = json_decode(mb_substr($push_data, $square_bracket), true);
|
||||
if ($push_array == null || count($push_array) < 4) {
|
||||
continue;
|
||||
}
|
||||
$page_data = $push_array[3];
|
||||
if ($page_data != null && array_key_exists('page', $page_data)) {
|
||||
return $page_data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function parsePageData($page_data)
|
||||
{
|
||||
$result = [];
|
||||
foreach ($page_data['page']['sections'] as $section) {
|
||||
if (
|
||||
!array_key_exists('internalName', $section) ||
|
||||
$section['internalName'] != 'Research Teams'
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
foreach ($section['tabPages'] as $tabPage) {
|
||||
if ($tabPage['label'] != 'Overview') {
|
||||
continue;
|
||||
}
|
||||
foreach ($tabPage['sections'] as $section1) {
|
||||
if (
|
||||
!array_key_exists('title', $section1)
|
||||
|| $section1['title'] != 'Publications'
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
foreach ($section1['posts'] as $post) {
|
||||
$enc = [];
|
||||
if ($post['cta'] != null && array_key_exists('url', $post['cta'])) {
|
||||
$enc = [$post['cta']['url']];
|
||||
}
|
||||
$result[] = [
|
||||
'title' => $post['title'],
|
||||
'timestamp' => $post['publishedOn'],
|
||||
'uri' => self::URI . '/research/' . $post['slug']['current'],
|
||||
'categories' => array_map(
|
||||
fn($s) => $s['label'],
|
||||
$post['subjects'],
|
||||
),
|
||||
'enclosures' => $enc,
|
||||
];
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
private function parsePage($url)
|
||||
{
|
||||
// Again, 500 for no reason.
|
||||
$html = $this->getHTMLIgnoreError($url, 7 * 24 * 60 * 60);
|
||||
|
||||
$content = '';
|
||||
|
||||
// Main content
|
||||
$main = $html->find('div[class*="PostDetail_post-detail"] > article', 0);
|
||||
|
||||
// Mostly YouTube videos
|
||||
$iframes = $main->find('iframe');
|
||||
foreach ($iframes as $iframe) {
|
||||
$iframe->parent->removeAttribute('style');
|
||||
$iframe->outertext = '<a href="' . $iframe->src . '">' . $iframe->src . '</a>';
|
||||
}
|
||||
|
||||
$main = convertLazyLoading($main);
|
||||
$main = defaultLinkTo($main, self::URI);
|
||||
$content .= $main;
|
||||
return $content;
|
||||
}
|
||||
}
|
@ -180,7 +180,7 @@ class BlueskyBridge extends BridgeAbstract
|
||||
|
||||
if (Debug::isEnabled()) {
|
||||
$url = explode('/', $post['post']['uri']);
|
||||
error_log('https://bsky.app/profile/' . $url[2] . '/post/' . $url[4]);
|
||||
$this->logger->debug('https://bsky.app/profile/' . $url[2] . '/post/' . $url[4]);
|
||||
}
|
||||
|
||||
$description = '';
|
||||
@ -255,10 +255,16 @@ class BlueskyBridge extends BridgeAbstract
|
||||
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
|
||||
} elseif (isset($quotedRecord['blocked']) && $quotedRecord['blocked']) { //blocked by quote author
|
||||
$description .= 'Author of quoted post has blocked OP.';
|
||||
} elseif (($quotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView') {
|
||||
$description .= '</p>';
|
||||
$description .= $this->getGeneratorViewDescription($quotedRecord);
|
||||
$description .= '<p>';
|
||||
} elseif (
|
||||
($quotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' ||
|
||||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView'
|
||||
) {
|
||||
$description .= $this->getListFeedDescription($quotedRecord);
|
||||
} elseif (
|
||||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' ||
|
||||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic'
|
||||
) {
|
||||
$description .= $this->getStarterPackDescription($post['post']['embed']['record']);
|
||||
} else {
|
||||
$quotedAuthorDid = $quotedRecord['author']['did'];
|
||||
$quotedDisplayName = $quotedRecord['author']['displayName'] ?? '';
|
||||
@ -403,10 +409,16 @@ class BlueskyBridge extends BridgeAbstract
|
||||
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
|
||||
} elseif (isset($replyQuotedRecord['blocked']) && $replyQuotedRecord['blocked']) { //blocked by quote author
|
||||
$description .= 'Author of quoted post has blocked OP.';
|
||||
} elseif (($replyQuotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView') {
|
||||
$description .= '</p>';
|
||||
$description .= $this->getGeneratorViewDescription($replyQuotedRecord);
|
||||
$description .= '<p>';
|
||||
} elseif (
|
||||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' ||
|
||||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView'
|
||||
) {
|
||||
$description .= $this->getListFeedDescription($replyQuotedRecord);
|
||||
} elseif (
|
||||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' ||
|
||||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic'
|
||||
) {
|
||||
$description .= $this->getStarterPackDescription($replyPost['embed']['record']);
|
||||
} else {
|
||||
$quotedAuthorDid = $replyQuotedRecord['author']['did'];
|
||||
$quotedDisplayName = $replyQuotedRecord['author']['displayName'] ?? '';
|
||||
@ -554,11 +566,19 @@ class BlueskyBridge extends BridgeAbstract
|
||||
}
|
||||
$title .= ', replying to ' . $replyAuthor;
|
||||
}
|
||||
if (isset($post['post']['embed']) && isset($post['post']['embed']['record'])) {
|
||||
|
||||
if (
|
||||
isset($post['post']['embed']) &&
|
||||
isset($post['post']['embed']['record']) &&
|
||||
//if not starter pack, feed or list
|
||||
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.feed.defs#generatorView' &&
|
||||
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#listView' &&
|
||||
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#starterPackViewBasic'
|
||||
) {
|
||||
if (isset($post['post']['embed']['record']['blocked'])) {
|
||||
$quotedAuthor = 'blocked user';
|
||||
} elseif (isset($post['post']['embed']['record']['notFound'])) {
|
||||
$quotedAuthor = 'deleted post';
|
||||
$quotedAuthor = 'deleted psost';
|
||||
} elseif (isset($post['post']['embed']['record']['detached'])) {
|
||||
$quotedAuthor = 'detached post';
|
||||
} else {
|
||||
@ -587,34 +607,64 @@ class BlueskyBridge extends BridgeAbstract
|
||||
{
|
||||
$uri = 'https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor=' . urlencode($did) . '&filter=' . urlencode($filter) . '&limit=30';
|
||||
if (Debug::isEnabled()) {
|
||||
error_log($uri);
|
||||
$this->logger->debug($uri);
|
||||
}
|
||||
$response = json_decode(getContents($uri), true);
|
||||
return $response;
|
||||
}
|
||||
|
||||
private function getGeneratorViewDescription(array $record): string
|
||||
//Embed for generated feeds and lists
|
||||
private function getListFeedDescription(array $record): string
|
||||
{
|
||||
$avatar = e($record['avatar']);
|
||||
$displayName = e($record['displayName']);
|
||||
$displayHandle = e($record['creator']['handle']);
|
||||
$likeCount = e($record['likeCount']);
|
||||
$feedViewAvatar = isset($record['avatar']) ? '<img src="' . preg_replace('/\/img\/avatar\//', '/img/avatar_thumbnail/', $record['avatar']) . '">' : '';
|
||||
$feedViewName = e($record['displayName'] ?? $record['name']);
|
||||
$feedViewDescription = e($record['description'] ?? '');
|
||||
$authorDisplayName = e($record['creator']['displayName']);
|
||||
$authorHandle = e($record['creator']['handle']);
|
||||
$likeCount = isset($record['likeCount']) ? '<br>Liked by ' . e($record['likeCount']) . ' users' : '';
|
||||
preg_match('/\/([^\/]+)$/', $record['uri'], $matches);
|
||||
$uri = e('https://bsky.app/profile/' . $record['creator']['did'] . '/feed/' . $matches[1]);
|
||||
if (($record['purpose'] ?? '') === 'app.bsky.graph.defs#modlist') {
|
||||
$typeURL = '/lists/';
|
||||
$typeDesc = 'moderation list';
|
||||
} elseif (($record['purpose'] ?? '') === 'app.bsky.graph.defs#curatelist') {
|
||||
$typeURL = '/lists/';
|
||||
$typeDesc = 'list';
|
||||
} else {
|
||||
$typeURL = '/feed/';
|
||||
$typeDesc = 'feed';
|
||||
}
|
||||
$uri = e('https://bsky.app/profile/' . $record['creator']['did'] . $typeURL . $matches[1]);
|
||||
|
||||
return <<<END
|
||||
<a href="{$uri}" style="color: inherit;">
|
||||
<div style="border: 1px solid #333; padding: 10px;">
|
||||
<div style="display: flex; margin-bottom: 10px;">
|
||||
<img src="{$avatar}" height="50" width="50" style="margin-right: 10px;">
|
||||
<div style="display: flex; flex-direction: column; justify-content: center;">
|
||||
<h3>{$displayName}</h3>
|
||||
<span>Feed by @{$displayHandle}</span>
|
||||
</div>
|
||||
</div>
|
||||
<span>Liked by {$likeCount} users</span>
|
||||
</div>
|
||||
</a>
|
||||
<blockquote>
|
||||
<b><a href="{$uri}">{$feedViewName}</a></b><br/>
|
||||
Bluesky {$typeDesc} by <b>{$authorDisplayName}</b> <i>@{$authorHandle}</i>
|
||||
<figure>
|
||||
{$feedViewAvatar}
|
||||
<figcaption>{$feedViewDescription}{$likeCount}</figcaption>
|
||||
</figure>
|
||||
</blockquote>
|
||||
END;
|
||||
}
|
||||
|
||||
private function getStarterPackDescription(array $record): string
|
||||
{
|
||||
if (!isset($record['record'])) {
|
||||
return 'Failed to get starter pack information.';
|
||||
}
|
||||
$starterpackRecord = $record['record'];
|
||||
$starterpackName = e($starterpackRecord['name']);
|
||||
$starterpackDescription = e($starterpackRecord['description']);
|
||||
$creatorDisplayName = e($record['creator']['displayName']);
|
||||
$creatorHandle = e($record['creator']['handle']);
|
||||
preg_match('/\/([^\/]+)$/', $starterpackRecord['list'], $matches);
|
||||
$uri = e('https://bsky.app/starter-pack/' . $record['creator']['did'] . '/' . $matches[1]);
|
||||
return <<<END
|
||||
<blockquote>
|
||||
<b><a href="{$uri}">{$starterpackName}</a></b><br/>
|
||||
Bluesky starter pack by <b>{$creatorDisplayName}</b> <i>@{$creatorHandle}</i><br/>
|
||||
{$starterpackDescription}
|
||||
</blockquote>
|
||||
END;
|
||||
}
|
||||
}
|
||||
|
@ -26,21 +26,16 @@ class TheFarSideBridge extends BridgeAbstract
|
||||
$image = $card->find('img', 0);
|
||||
$imageUrl = $image->attr['data-src'];
|
||||
|
||||
// Images are downloaded to bypass the hotlink protection.
|
||||
$image = getContents($imageUrl, ['Referer: ' . self::URI]);
|
||||
|
||||
// Encode image as base64
|
||||
$imageBase64 = base64_encode($image);
|
||||
|
||||
$caption = '';
|
||||
|
||||
if ($card->find('figcaption', 0)) {
|
||||
$caption = $card->find('figcaption', 0)->innertext;
|
||||
}
|
||||
|
||||
$item['enclosures'][] = $imageUrl;
|
||||
$item['content'] .= <<<EOD
|
||||
<figure>
|
||||
<img title="{$caption}" src="data:image/jpeg;base64,{$imageBase64}"/>
|
||||
<img title="{$caption}" src="{$imageUrl}"/>
|
||||
<figcaption>{$caption}</figcaption>
|
||||
</figure>
|
||||
<br/>
|
||||
|
Loading…
Reference in New Issue
Block a user