Merge branch 'RSS-Bridge:master' into master

This commit is contained in:
subtle4553 2025-03-25 22:51:29 +01:00 committed by GitHub
commit f7ddb79e0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 82 additions and 184 deletions

View File

@ -1,147 +0,0 @@
<?php
class AnthropicBridge extends BridgeAbstract
{
const MAINTAINER = 'sqrtminusone';
const NAME = 'Anthropic Research Bridge';
const URI = 'https://www.anthropic.com';
const CACHE_TIMEOUT = 3600; // 1 hour
const DESCRIPTION = 'Returns research publications from Anthropic';
const PARAMETERS = [
'' => [
'limit' => [
'name' => 'Limit',
'type' => 'number',
'required' => true,
'defaultValue' => 10
],
]
];
public function collectData()
{
// Anthropic sometimes returns 500 for no reason. The contents are still there.
$html = $this->getHTMLIgnoreError(self::URI . '/research');
$limit = $this->getInput('limit');
$page_data = $this->extractPageData($html);
$pages = $this->parsePageData($page_data);
for ($i = 0; $i < min(count($pages), $limit); $i++) {
$page = $pages[$i];
$page['content'] = $this->parsePage($page['uri']);
$this->items[] = $page;
}
}
private function getHTMLIgnoreError($url, $ttl = null)
{
if ($ttl != null) {
$cacheKey = 'pages_' . $url;
$content = $this->cache->get($cacheKey);
if ($content) {
return str_get_html($content);
}
}
try {
$content = getContents($url);
} catch (HttpException $e) {
$content = $e->response->getBody();
}
if ($ttl != null) {
$this->cache->set($cacheKey, $content, $ttl);
}
return str_get_html($content);
}
private function extractPageData($html)
{
foreach ($html->find('script') as $script) {
$js_code = $script->innertext;
if (!str_starts_with($js_code, 'self.__next_f.push(')) {
continue;
}
$push_data = (string)json_decode(mb_substr($js_code, 22, mb_strlen($js_code) - 2 - 22));
$square_bracket = mb_strpos($push_data, '[');
$push_array = json_decode(mb_substr($push_data, $square_bracket), true);
if ($push_array == null || count($push_array) < 4) {
continue;
}
$page_data = $push_array[3];
if ($page_data != null && array_key_exists('page', $page_data)) {
return $page_data;
}
}
}
private function parsePageData($page_data)
{
$result = [];
foreach ($page_data['page']['sections'] as $section) {
if (
!array_key_exists('internalName', $section) ||
$section['internalName'] != 'Research Teams'
) {
continue;
}
foreach ($section['tabPages'] as $tabPage) {
if ($tabPage['label'] != 'Overview') {
continue;
}
foreach ($tabPage['sections'] as $section1) {
if (
!array_key_exists('title', $section1)
|| $section1['title'] != 'Publications'
) {
continue;
}
foreach ($section1['posts'] as $post) {
$enc = [];
if ($post['cta'] != null && array_key_exists('url', $post['cta'])) {
$enc = [$post['cta']['url']];
}
$result[] = [
'title' => $post['title'],
'timestamp' => $post['publishedOn'],
'uri' => self::URI . '/research/' . $post['slug']['current'],
'categories' => array_map(
fn($s) => $s['label'],
$post['subjects'],
),
'enclosures' => $enc,
];
}
break;
}
break;
}
break;
}
return $result;
}
private function parsePage($url)
{
// Again, 500 for no reason.
$html = $this->getHTMLIgnoreError($url, 7 * 24 * 60 * 60);
$content = '';
// Main content
$main = $html->find('div[class*="PostDetail_post-detail"] > article', 0);
// Mostly YouTube videos
$iframes = $main->find('iframe');
foreach ($iframes as $iframe) {
$iframe->parent->removeAttribute('style');
$iframe->outertext = '<a href="' . $iframe->src . '">' . $iframe->src . '</a>';
}
$main = convertLazyLoading($main);
$main = defaultLinkTo($main, self::URI);
$content .= $main;
return $content;
}
}

View File

@ -180,7 +180,7 @@ class BlueskyBridge extends BridgeAbstract
if (Debug::isEnabled()) {
$url = explode('/', $post['post']['uri']);
error_log('https://bsky.app/profile/' . $url[2] . '/post/' . $url[4]);
$this->logger->debug('https://bsky.app/profile/' . $url[2] . '/post/' . $url[4]);
}
$description = '';
@ -255,10 +255,16 @@ class BlueskyBridge extends BridgeAbstract
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
} elseif (isset($quotedRecord['blocked']) && $quotedRecord['blocked']) { //blocked by quote author
$description .= 'Author of quoted post has blocked OP.';
} elseif (($quotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView') {
$description .= '</p>';
$description .= $this->getGeneratorViewDescription($quotedRecord);
$description .= '<p>';
} elseif (
($quotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' ||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView'
) {
$description .= $this->getListFeedDescription($quotedRecord);
} elseif (
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' ||
($quotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic'
) {
$description .= $this->getStarterPackDescription($post['post']['embed']['record']);
} else {
$quotedAuthorDid = $quotedRecord['author']['did'];
$quotedDisplayName = $quotedRecord['author']['displayName'] ?? '';
@ -403,10 +409,16 @@ class BlueskyBridge extends BridgeAbstract
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
} elseif (isset($replyQuotedRecord['blocked']) && $replyQuotedRecord['blocked']) { //blocked by quote author
$description .= 'Author of quoted post has blocked OP.';
} elseif (($replyQuotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView') {
$description .= '</p>';
$description .= $this->getGeneratorViewDescription($replyQuotedRecord);
$description .= '<p>';
} elseif (
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView' ||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#listView'
) {
$description .= $this->getListFeedDescription($replyQuotedRecord);
} elseif (
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.starterpack' ||
($replyQuotedRecord['$type'] ?? '') === 'app.bsky.graph.defs#starterPackViewBasic'
) {
$description .= $this->getStarterPackDescription($replyPost['embed']['record']);
} else {
$quotedAuthorDid = $replyQuotedRecord['author']['did'];
$quotedDisplayName = $replyQuotedRecord['author']['displayName'] ?? '';
@ -554,11 +566,19 @@ class BlueskyBridge extends BridgeAbstract
}
$title .= ', replying to ' . $replyAuthor;
}
if (isset($post['post']['embed']) && isset($post['post']['embed']['record'])) {
if (
isset($post['post']['embed']) &&
isset($post['post']['embed']['record']) &&
//if not starter pack, feed or list
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.feed.defs#generatorView' &&
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#listView' &&
($post['post']['embed']['record']['$type'] ?? '') !== 'app.bsky.graph.defs#starterPackViewBasic'
) {
if (isset($post['post']['embed']['record']['blocked'])) {
$quotedAuthor = 'blocked user';
} elseif (isset($post['post']['embed']['record']['notFound'])) {
$quotedAuthor = 'deleted post';
$quotedAuthor = 'deleted psost';
} elseif (isset($post['post']['embed']['record']['detached'])) {
$quotedAuthor = 'detached post';
} else {
@ -587,34 +607,64 @@ class BlueskyBridge extends BridgeAbstract
{
$uri = 'https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor=' . urlencode($did) . '&filter=' . urlencode($filter) . '&limit=30';
if (Debug::isEnabled()) {
error_log($uri);
$this->logger->debug($uri);
}
$response = json_decode(getContents($uri), true);
return $response;
}
private function getGeneratorViewDescription(array $record): string
//Embed for generated feeds and lists
private function getListFeedDescription(array $record): string
{
$avatar = e($record['avatar']);
$displayName = e($record['displayName']);
$displayHandle = e($record['creator']['handle']);
$likeCount = e($record['likeCount']);
$feedViewAvatar = isset($record['avatar']) ? '<img src="' . preg_replace('/\/img\/avatar\//', '/img/avatar_thumbnail/', $record['avatar']) . '">' : '';
$feedViewName = e($record['displayName'] ?? $record['name']);
$feedViewDescription = e($record['description'] ?? '');
$authorDisplayName = e($record['creator']['displayName']);
$authorHandle = e($record['creator']['handle']);
$likeCount = isset($record['likeCount']) ? '<br>Liked by ' . e($record['likeCount']) . ' users' : '';
preg_match('/\/([^\/]+)$/', $record['uri'], $matches);
$uri = e('https://bsky.app/profile/' . $record['creator']['did'] . '/feed/' . $matches[1]);
if (($record['purpose'] ?? '') === 'app.bsky.graph.defs#modlist') {
$typeURL = '/lists/';
$typeDesc = 'moderation list';
} elseif (($record['purpose'] ?? '') === 'app.bsky.graph.defs#curatelist') {
$typeURL = '/lists/';
$typeDesc = 'list';
} else {
$typeURL = '/feed/';
$typeDesc = 'feed';
}
$uri = e('https://bsky.app/profile/' . $record['creator']['did'] . $typeURL . $matches[1]);
return <<<END
<a href="{$uri}" style="color: inherit;">
<div style="border: 1px solid #333; padding: 10px;">
<div style="display: flex; margin-bottom: 10px;">
<img src="{$avatar}" height="50" width="50" style="margin-right: 10px;">
<div style="display: flex; flex-direction: column; justify-content: center;">
<h3>{$displayName}</h3>
<span>Feed by @{$displayHandle}</span>
</div>
</div>
<span>Liked by {$likeCount} users</span>
</div>
</a>
<blockquote>
<b><a href="{$uri}">{$feedViewName}</a></b><br/>
Bluesky {$typeDesc} by <b>{$authorDisplayName}</b> <i>@{$authorHandle}</i>
<figure>
{$feedViewAvatar}
<figcaption>{$feedViewDescription}{$likeCount}</figcaption>
</figure>
</blockquote>
END;
}
private function getStarterPackDescription(array $record): string
{
if (!isset($record['record'])) {
return 'Failed to get starter pack information.';
}
$starterpackRecord = $record['record'];
$starterpackName = e($starterpackRecord['name']);
$starterpackDescription = e($starterpackRecord['description']);
$creatorDisplayName = e($record['creator']['displayName']);
$creatorHandle = e($record['creator']['handle']);
preg_match('/\/([^\/]+)$/', $starterpackRecord['list'], $matches);
$uri = e('https://bsky.app/starter-pack/' . $record['creator']['did'] . '/' . $matches[1]);
return <<<END
<blockquote>
<b><a href="{$uri}">{$starterpackName}</a></b><br/>
Bluesky starter pack by <b>{$creatorDisplayName}</b> <i>@{$creatorHandle}</i><br/>
{$starterpackDescription}
</blockquote>
END;
}
}

View File

@ -26,21 +26,16 @@ class TheFarSideBridge extends BridgeAbstract
$image = $card->find('img', 0);
$imageUrl = $image->attr['data-src'];
// Images are downloaded to bypass the hotlink protection.
$image = getContents($imageUrl, ['Referer: ' . self::URI]);
// Encode image as base64
$imageBase64 = base64_encode($image);
$caption = '';
if ($card->find('figcaption', 0)) {
$caption = $card->find('figcaption', 0)->innertext;
}
$item['enclosures'][] = $imageUrl;
$item['content'] .= <<<EOD
<figure>
<img title="{$caption}" src="data:image/jpeg;base64,{$imageBase64}"/>
<img title="{$caption}" src="{$imageUrl}"/>
<figcaption>{$caption}</figcaption>
</figure>
<br/>