[TldrTechBridge] Fix and improve bridge (#4090)

This commit is contained in:
Korytov Pavel 2024-04-27 11:35:59 +03:00 committed by GitHub
parent d31f20758c
commit f3ca567159
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -22,11 +22,15 @@ class TldrTechBridge extends BridgeAbstract
'type' => 'list', 'type' => 'list',
'values' => [ 'values' => [
'Tech' => 'tech', 'Tech' => 'tech',
'Crypto' => 'crypto', 'Web Dev' => 'webdev',
'AI' => 'ai', 'AI' => 'ai',
'Web Dev' => 'engineering', 'Information Security' => 'infosec',
'Product Management' => 'product',
'DevOps' => 'devops',
'Crypto' => 'crypto',
'Design' => 'design',
'Marketing' => 'marketing',
'Founders' => 'founders', 'Founders' => 'founders',
'Cybersecurity' => 'cybersecurity'
], ],
'defaultValue' => 'tech' 'defaultValue' => 'tech'
] ]
@ -48,12 +52,17 @@ class TldrTechBridge extends BridgeAbstract
// Convert /<topic>/2023-01-01 to unix timestamp // Convert /<topic>/2023-01-01 to unix timestamp
$date_items = explode('/', $child->href); $date_items = explode('/', $child->href);
$date = strtotime(end($date_items)); $date = strtotime(end($date_items));
$this->items[] = [ $item_url = self::URI . ltrim($child->href, '/');
'uri' => self::URI . $child->href, try {
'title' => $child->plaintext, $this->items[] = [
'timestamp' => $date, 'uri' => self::URI . $child->href,
'content' => $this->extractContent(self::URI . $child->href), 'title' => $child->plaintext,
]; 'timestamp' => $date,
'content' => $this->extractContent($item_url),
];
} catch (HttpException $e) {
continue;
}
$added++; $added++;
if ($added >= $limit) { if ($added >= $limit) {
break; break;
@ -66,7 +75,7 @@ class TldrTechBridge extends BridgeAbstract
$html = getSimpleHTMLDOM($url); $html = getSimpleHTMLDOM($url);
$content = $html->find('div.content-center.mt-5', 0); $content = $html->find('div.content-center.mt-5', 0);
if (!$content) { if (!$content) {
return ''; throw new HttpException('Could not find content', 500);
} }
$subscribe_form = $content->find('div.mt-5 > div > form', 0); $subscribe_form = $content->find('div.mt-5 > div > form', 0);
if ($subscribe_form) { if ($subscribe_form) {