From c5f586497f3d23be61a6e8a5fe0f948f98a5b2f6 Mon Sep 17 00:00:00 2001 From: Mynacol Date: Sat, 16 Dec 2023 11:21:19 +0100 Subject: [PATCH 001/319] [GolemBridge] Remove multi-page page headers On multi-page articles like [1], all the pages after the first one have a page header that we add in the article content. When we tack the pages together again, we don't need those extra page headers. [1] https://www.golem.de/news/science-fiction-die-zehn-besten-filme-aus-den-spannenden-70ern-2312-179557.html --- bridges/GolemBridge.php | 3 --- 1 file changed, 3 deletions(-) diff --git a/bridges/GolemBridge.php b/bridges/GolemBridge.php index c1b03433..599d713a 100644 --- a/bridges/GolemBridge.php +++ b/bridges/GolemBridge.php @@ -116,9 +116,6 @@ class GolemBridge extends FeedExpander // reload html, as remove() is buggy $article = str_get_html($article->outertext); - if ($pageHeader = $article->find('header.paged-cluster-header h1', 0)) { - $item .= $pageHeader; - } $header = $article->find('header', 0); foreach ($header->find('p, figure') as $element) { From 3944ae68cbe8b8dd4fd653a288cffdb42cd3802e Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 19 Dec 2023 07:53:25 +0100 Subject: [PATCH 002/319] fix(reddit): use old.reddit.com instead of www.reddit.com (#3848) --- bridges/RedditBridge.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index 2b7fe84f..bb3e7afc 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -1,10 +1,15 @@ Date: Tue, 19 Dec 2023 08:46:37 +0100 Subject: [PATCH 003/319] fix(gatesnotes): the unfucked their json (#3849) --- bridges/GatesNotesBridge.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php index 24ba9b2e..0d919968 100644 --- a/bridges/GatesNotesBridge.php +++ b/bridges/GatesNotesBridge.php @@ -23,12 +23,14 @@ class GatesNotesBridge extends BridgeAbstract $cleanedContent = str_replace([ '', '', - '\r\n', ], '', $rawContent); - $cleanedContent = str_replace('\"', '"', $cleanedContent); - $cleanedContent = trim($cleanedContent, '"'); + // $cleanedContent = str_replace('\"', '"', $cleanedContent); + // $cleanedContent = trim($cleanedContent, '"'); $json = Json::decode($cleanedContent, false); + if (is_string($json)) { + throw new \Exception('wtf? ' . $json); + } foreach ($json as $article) { $item = []; From 98a94855dc6b909b75629c6630c3795c68e7d560 Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 20 Dec 2023 03:16:25 +0100 Subject: [PATCH 004/319] feat: embed response in http exception (#3847) --- bridges/GettrBridge.php | 10 +++++++++- config.default.ini.php | 3 ++- lib/contents.php | 15 ++------------- lib/http.php | 24 +++++++++++++++++++++++- templates/exception.html.php | 23 +++++++++++++++++++++++ 5 files changed, 59 insertions(+), 16 deletions(-) diff --git a/bridges/GettrBridge.php b/bridges/GettrBridge.php index 74804043..d3b9b899 100644 --- a/bridges/GettrBridge.php +++ b/bridges/GettrBridge.php @@ -33,7 +33,15 @@ class GettrBridge extends BridgeAbstract $user, min($this->getInput('limit'), 20) ); - $data = json_decode(getContents($api), false); + try { + $json = getContents($api); + } catch (HttpException $e) { + if ($e->getCode() === 400 && str_contains($e->response->getBody(), 'E_USER_NOTFOUND')) { + throw new \Exception('User not found: ' . $user); + } + throw $e; + } + $data = json_decode($json, false); foreach ($data->result->aux->post as $post) { $this->items[] = [ diff --git a/config.default.ini.php b/config.default.ini.php index 52786aef..201b1414 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -47,7 +47,8 @@ enable_debug_mode = false enable_maintenance_mode = false [http] -timeout = 60 +; Operation timeout in seconds +timeout = 30 useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" ; Max http response size in MB diff --git a/lib/contents.php b/lib/contents.php index a4def21a..8676a2a8 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -101,19 +101,8 @@ function getContents( $response = $response->withBody($cachedResponse->getBody()); break; default: - $exceptionMessage = sprintf( - '%s resulted in %s %s %s', - $url, - $response->getCode(), - $response->getStatusLine(), - // If debug, include a part of the response body in the exception message - Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '', - ); - - if (CloudFlareException::isCloudFlareResponse($response)) { - throw new CloudFlareException($exceptionMessage, $response->getCode()); - } - throw new HttpException(trim($exceptionMessage), $response->getCode()); + $e = HttpException::fromResponse($response, $url); + throw $e; } if ($returnFull === true) { // todo: return the actual response object diff --git a/lib/http.php b/lib/http.php index eb70705f..bfa6b6bf 100644 --- a/lib/http.php +++ b/lib/http.php @@ -2,7 +2,29 @@ class HttpException extends \Exception { - // todo: should include the failing http response (if present) + public ?Response $response; + + public function __construct(string $message = '', int $statusCode = 0, ?Response $response = null) + { + parent::__construct($message, $statusCode); + $this->response = $response ?? new Response('', 0); + } + + public static function fromResponse(Response $response, string $url): HttpException + { + $message = sprintf( + '%s resulted in %s %s %s', + $url, + $response->getCode(), + $response->getStatusLine(), + // If debug, include a part of the response body in the exception message + Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '', + ); + if (CloudFlareException::isCloudFlareResponse($response)) { + return new CloudFlareException($message, $response->getCode(), $response); + } + return new HttpException(trim($message), $response->getCode(), $response); + } } final class CloudFlareException extends HttpException diff --git a/templates/exception.html.php b/templates/exception.html.php index dac0ad26..e1dd97c1 100644 --- a/templates/exception.html.php +++ b/templates/exception.html.php @@ -16,6 +16,13 @@

+ getCode() === 400): ?> +

400 Bad Request

+

+ This is usually caused by an incorrectly constructed http request. +

+ + getCode() === 404): ?>

404 Page Not Found

@@ -40,6 +47,22 @@

+ getCode() === 0): ?> +

+ See + + https://curl.haxx.se/libcurl/c/libcurl-errors.html + + for description of the curl error code. +

+ +

+ + https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/getCode()) ?> + +

+ + getCode() === 10): ?>

The rss feed is completely empty

From 4e40e032b0fcac52bc74ba5994cefe1d00debf45 Mon Sep 17 00:00:00 2001 From: Mynacol Date: Wed, 20 Dec 2023 22:18:10 +0100 Subject: [PATCH 005/319] Remove matrix reference The main communications platform is still Libera.chat, matrix was only provided by the hosted IRC-Matrix bridge. The bridge was turned off already and won't come back. --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 570fb87d..2a762d45 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ Officially hosted instance: https://rss-bridge.org/bridge01/ [![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge) -[![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#rssbridge:libera.chat) [![Actions Status](https://img.shields.io/github/actions/workflow/status/RSS-Bridge/rss-bridge/tests.yml?branch=master&label=GitHub%20Actions&logo=github)](https://github.com/RSS-Bridge/rss-bridge/actions) ||| From 4c5cf89725e7ebd975eb6ec5136b5e3927df07fe Mon Sep 17 00:00:00 2001 From: Dag Date: Thu, 21 Dec 2023 09:18:21 +0100 Subject: [PATCH 006/319] fix(rumble): not all videos have a datetime (#3852) --- bridges/RumbleBridge.php | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/bridges/RumbleBridge.php b/bridges/RumbleBridge.php index d5b82136..f6bfca7d 100644 --- a/bridges/RumbleBridge.php +++ b/bridges/RumbleBridge.php @@ -40,15 +40,18 @@ class RumbleBridge extends BridgeAbstract $dom = getSimpleHTMLDOM($url); foreach ($dom->find('ol.thumbnail__grid div.thumbnail__grid--item') as $video) { - $datetime = $video->find('time', 0)->getAttribute('datetime'); - - $this->items[] = [ + $item = [ 'title' => $video->find('h3', 0)->plaintext, 'uri' => self::URI . $video->find('a', 0)->href, - 'timestamp' => (new \DateTimeImmutable($datetime))->getTimestamp(), 'author' => $account . '@rumble.com', 'content' => defaultLinkTo($video, self::URI)->innertext, ]; + $time = $video->find('time', 0); + if ($time) { + $publishedAt = new \DateTimeImmutable($time->getAttribute('datetime')); + $item['timestamp'] = $publishedAt->getTimestamp(); + } + $this->items[] = $item; } } From f40f99740588b09033917fd38132a99875495540 Mon Sep 17 00:00:00 2001 From: Dag Date: Thu, 21 Dec 2023 09:24:22 +0100 Subject: [PATCH 007/319] fix: various small fixes (#3853) --- bridges/ARDAudiothekBridge.php | 20 +++++++++++++------- bridges/CarThrottleBridge.php | 6 ++---- bridges/EZTVBridge.php | 2 +- bridges/TrelloBridge.php | 2 +- bridges/YoutubeBridge.php | 6 +++++- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/bridges/ARDAudiothekBridge.php b/bridges/ARDAudiothekBridge.php index 2c1958f3..619c0911 100644 --- a/bridges/ARDAudiothekBridge.php +++ b/bridges/ARDAudiothekBridge.php @@ -63,11 +63,13 @@ class ARDAudiothekBridge extends BridgeAbstract public function collectData() { - $oldTz = date_default_timezone_get(); + $path = $this->getInput('path'); + $limit = $this->getInput('limit'); + $oldTz = date_default_timezone_get(); date_default_timezone_set('Europe/Berlin'); - $pathComponents = explode('/', $this->getInput('path')); + $pathComponents = explode('/', $path); if (empty($pathComponents)) { returnClientError('Path may not be empty'); } @@ -82,17 +84,21 @@ class ARDAudiothekBridge extends BridgeAbstract } $url = self::APIENDPOINT . 'programsets/' . $showID . '/'; - $rawJSON = getContents($url); - $processedJSON = json_decode($rawJSON)->data->programSet; + $json1 = getContents($url); + $data1 = Json::decode($json1, false); + $processedJSON = $data1->data->programSet; + if (!$processedJSON) { + throw new \Exception('Unable to find show id: ' . $showID); + } - $limit = $this->getInput('limit'); $answerLength = 1; $offset = 0; $numberOfElements = 1; while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) { - $rawJSON = getContents($url . '?offset=' . $offset); - $processedJSON = json_decode($rawJSON)->data->programSet; + $json2 = getContents($url . '?offset=' . $offset); + $data2 = Json::decode($json2, false); + $processedJSON = $data2->data->programSet; $answerLength = count($processedJSON->items->nodes); $offset = $offset + $answerLength; diff --git a/bridges/CarThrottleBridge.php b/bridges/CarThrottleBridge.php index 913b686c..70d7b54e 100644 --- a/bridges/CarThrottleBridge.php +++ b/bridges/CarThrottleBridge.php @@ -9,8 +9,7 @@ class CarThrottleBridge extends BridgeAbstract public function collectData() { - $news = getSimpleHTMLDOMCached(self::URI . 'news') - or returnServerError('could not retrieve page'); + $news = getSimpleHTMLDOMCached(self::URI . 'news'); $this->items[] = []; @@ -22,8 +21,7 @@ class CarThrottleBridge extends BridgeAbstract $item['uri'] = self::URI . $titleElement->getAttribute('href'); $item['title'] = $titleElement->innertext; - $articlePage = getSimpleHTMLDOMCached($item['uri']) - or returnServerError('could not retrieve page'); + $articlePage = getSimpleHTMLDOMCached($item['uri']); $authorDiv = $articlePage->find('div.author div'); if ($authorDiv) { diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php index 73318f0c..25a88124 100644 --- a/bridges/EZTVBridge.php +++ b/bridges/EZTVBridge.php @@ -96,7 +96,7 @@ class EZTVBridge extends BridgeAbstract protected function getItemFromTorrent($torrent) { $item = []; - $item['uri'] = $torrent->episode_url; + $item['uri'] = $torrent->episode_url ?? $torrent->torrent_url; $item['author'] = $torrent->imdb_id; $item['timestamp'] = $torrent->date_released_unix; $item['title'] = $torrent->title; diff --git a/bridges/TrelloBridge.php b/bridges/TrelloBridge.php index a1b5cfb8..cab2bde2 100644 --- a/bridges/TrelloBridge.php +++ b/bridges/TrelloBridge.php @@ -648,7 +648,7 @@ class TrelloBridge extends BridgeAbstract $action->type ]; if (isset($action->data->card)) { - $item['categories'][] = $action->data->card->name; + $item['categories'][] = $action->data->card->name ?? $action->data->card->id; $item['uri'] = 'https://trello.com/c/' . $action->data->card->shortLink . '#action-' diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 993f8c90..6a29e387 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -164,7 +164,11 @@ class YoutubeBridge extends BridgeAbstract $jsonData = $this->extractJsonFromHtml($html); // TODO: this method returns only first 100 video items // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0] ?? null; + if (!$jsonData) { + // playlist probably doesnt exists + throw new \Exception('Unable to find playlist: ' . $url_listing); + } $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; $item_count = count($jsonData); From ea2b4d7506f0feded2899cb0aab351fa7dca3194 Mon Sep 17 00:00:00 2001 From: July Date: Sat, 23 Dec 2023 03:42:37 -0500 Subject: [PATCH 008/319] [ArsTechnicaBridge] Properly handle paged content (#3855) * [ArsTechnicaBridge] Properly handle paged content * [ArsTechnicaBridge] Remove normal site ad wrapper --- bridges/ArsTechnicaBridge.php | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php index 613c1c58..2c631871 100644 --- a/bridges/ArsTechnicaBridge.php +++ b/bridges/ArsTechnicaBridge.php @@ -35,39 +35,34 @@ class ArsTechnicaBridge extends FeedExpander protected function parseItem(array $item) { - $item_html = getSimpleHTMLDOMCached($item['uri'] . '&'); + $item_html = getSimpleHTMLDOMCached($item['uri']); $item_html = defaultLinkTo($item_html, self::URI); + $item['content'] = $item_html->find('.article-content', 0); - $item_content = $item_html->find('.article-content.post-page', 0); - if (!$item_content) { - // The dom selector probably broke. Let's just return the item as-is - return $item; + $pages = $item_html->find('nav.page-numbers > .numbers > a', -2); + if (null !== $pages) { + for ($i = 2; $i <= $pages->innertext; $i++) { + $page_url = $item['uri'] . '&page=' . $i; + $page_html = getSimpleHTMLDOMCached($page_url); + $page_html = defaultLinkTo($page_html, self::URI); + $item['content'] .= $page_html->find('.article-content', 0); + } + $item['content'] = str_get_html($item['content']); } - $item['content'] = $item_content; - // remove various ars advertising $item['content']->find('#social-left', 0)->remove(); foreach ($item['content']->find('.ars-component-buy-box') as $ad) { $ad->remove(); } - foreach ($item['content']->find('i-amphtml-sizer') as $ad) { + foreach ($item['content']->find('.ad_wrapper') as $ad) { $ad->remove(); } foreach ($item['content']->find('.sidebar') as $ad) { $ad->remove(); } - foreach ($item['content']->find('a') as $link) { //remove amp redirect links - $url = $link->getAttribute('href'); - if (str_contains($url, 'go.redirectingat.com')) { - $url = extractFromDelimiters($url, 'url=', '&'); - $url = urldecode($url); - $link->setAttribute('href', $url); - } - } - - $item['content'] = backgroundToImg(str_replace('data-amp-original-style="background-image', 'style="background-image', $item['content'])); + $item['content'] = backgroundToImg($item['content']); $item['uid'] = explode('=', $item['uri'])[1]; From 98dafb61ae5519b7c6c4be2d7dd4d66b6bd6a4eb Mon Sep 17 00:00:00 2001 From: xduugu Date: Sat, 23 Dec 2023 08:43:01 +0000 Subject: [PATCH 009/319] [ARDAudiothekBridge] add duration to feed items (#3854) --- bridges/ARDAudiothekBridge.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bridges/ARDAudiothekBridge.php b/bridges/ARDAudiothekBridge.php index 619c0911..02b6b007 100644 --- a/bridges/ARDAudiothekBridge.php +++ b/bridges/ARDAudiothekBridge.php @@ -125,6 +125,10 @@ class ARDAudiothekBridge extends BridgeAbstract $item['categories'] = [$category]; } + $item['itunes'] = [ + 'duration' => $audio->duration, + ]; + $this->items[] = $item; } } From 9f163ab7c651f44c1d6266ca817aca2c0f208f51 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Mon, 25 Dec 2023 14:51:51 +0100 Subject: [PATCH 010/319] [FreeTelechargerBridge] Update to the new URL (#3856) * [FreeTelechargerBridge] Update to the new URL Website has changed URL and some design : this bridge is now adapted to thoses changes * [FreeTelechargerBridge] Fix example value Example valuse seems to use an "old" template, switch to a newer example that use the new template * [FreeTelechargerBridge] Fix notice Fix notice --- bridges/FreeTelechargerBridge.php | 61 ++++++++++++++++--------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/bridges/FreeTelechargerBridge.php b/bridges/FreeTelechargerBridge.php index 8362b4ff..f0e5d35a 100644 --- a/bridges/FreeTelechargerBridge.php +++ b/bridges/FreeTelechargerBridge.php @@ -3,7 +3,7 @@ class FreeTelechargerBridge extends BridgeAbstract { const NAME = 'Free-Telecharger'; - const URI = 'https://www.free-telecharger.live/'; + const URI = 'https://www.free-telecharger.art/'; const DESCRIPTION = 'Suivi de série sur Free-Telecharger'; const MAINTAINER = 'sysadminstory'; const PARAMETERS = [ @@ -12,43 +12,46 @@ class FreeTelechargerBridge extends BridgeAbstract 'name' => 'URL de la série', 'type' => 'text', 'required' => true, - 'title' => 'URL d\'une série sans le https://www.free-telecharger.live/', + 'title' => 'URL d\'une série sans le https://www.free-telecharger.art/', 'pattern' => 'series.*\.html', - 'exampleValue' => 'series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html' + 'exampleValue' => 'series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html' ], ] ]; const CACHE_TIMEOUT = 3600; + private string $showTitle; + private string $showTechDetails; + public function collectData() { - $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')); + $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')); - // Find all block content of the page - $blocks = $html->find('div[class=block1]'); + // Find all block content of the page + $blocks = $html->find('div[class=block1]'); - // Global Infos block - $infosBlock = $blocks[0]; - // Links block - $linksBlock = $blocks[2]; + // Global Infos block + $infosBlock = $blocks[0]; + // Links block + $linksBlock = $blocks[2]; - // Extract Global Show infos - $this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext); - $this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext); + // Extract Global Show infos + $this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext); + $this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext); - // Get Episodes names and links - $episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#ff6600]'); - $links = $linksBlock->find('div[id=link]', 0)->find('a'); + // Get Episodes names and links + $episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#e93100]'); + $links = $linksBlock->find('div[id=link]', 0)->find('a'); foreach ($episodes as $index => $episode) { - $item = []; // Create an empty item - $item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-'); - $item['uri'] = $links[$index]->href; - $item['content'] = '' . $item['title'] . ''; - $item['uid'] = hash('md5', $item['uri']); + $item = []; // Create an empty item + $item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-'); + $item['uri'] = $links[$index]->href; + $item['content'] = '' . $item['title'] . ''; + $item['uid'] = hash('md5', $item['uri']); - $this->items[] = $item; // Add this item to the list + $this->items[] = $item; // Add this item to the list } } @@ -57,7 +60,7 @@ class FreeTelechargerBridge extends BridgeAbstract switch ($this->queriedContext) { case 'Suivi de publication de série': return $this->showTitle . ' ' . $this->showTechDetails . ' - ' . self::NAME; - break; + break; default: return self::NAME; } @@ -68,7 +71,7 @@ class FreeTelechargerBridge extends BridgeAbstract switch ($this->queriedContext) { case 'Suivi de publication de série': return self::URI . $this->getInput('url'); - break; + break; default: return self::URI; } @@ -76,14 +79,14 @@ class FreeTelechargerBridge extends BridgeAbstract public function detectParameters($url) { - // Example: https://www.free-telecharger.live/series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html + // Example: https://www.free-telecharger.art/series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html $params = []; - $regex = '/^https:\/\/www.*\.free-telecharger\.live\/(series.*\.html)/'; + $regex = '/^https:\/\/www.*\.free-telecharger\.art\/(series.*\.html)/'; if (preg_match($regex, $url, $matches) > 0) { - $params['context'] = 'Suivi de publication de série'; - $params['url'] = urldecode($matches[1]); - return $params; + $params['context'] = 'Suivi de publication de série'; + $params['url'] = urldecode($matches[1]); + return $params; } return null; From c9074facfed51371a59dd189648c5a80751feb4e Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Tue, 26 Dec 2023 12:18:42 +0100 Subject: [PATCH 011/319] [GreatFonBridge] Remove bridge (#3857) Website is unreliable, it's not useful to keep this bridge. --- bridges/GreatFonBridge.php | 140 ------------------------------------- 1 file changed, 140 deletions(-) delete mode 100644 bridges/GreatFonBridge.php diff --git a/bridges/GreatFonBridge.php b/bridges/GreatFonBridge.php deleted file mode 100644 index 2951634c..00000000 --- a/bridges/GreatFonBridge.php +++ /dev/null @@ -1,140 +0,0 @@ - [ - 'u' => [ - 'name' => 'username', - 'type' => 'text', - 'title' => 'Instagram username you want to follow', - 'exampleValue' => 'aesoprockwins', - 'required' => true, - ], - ] - ]; - const TEST_DETECT_PARAMETERS = [ - 'https://www.instagram.com/instagram/' => ['context' => 'Username', 'u' => 'instagram'], - 'https://instagram.com/instagram/' => ['context' => 'Username', 'u' => 'instagram'], - 'https://greatfon.com/v/instagram' => ['context' => 'Username', 'u' => 'instagram'], - 'https://www.greatfon.com/v/instagram' => ['context' => 'Username', 'u' => 'instagram'], - ]; - - public function collectData() - { - $username = $this->getInput('u'); - $html = getSimpleHTMLDOMCached(self::URI . '/v/' . $username); - $html = defaultLinkTo($html, self::URI); - - foreach ($html->find('div[class*=content__item]') as $post) { - // Skip the ads - if (!str_contains($post->class, 'ads')) { - $url = $post->find('a[href^=https://greatfon.com/c/]', 0)->href; - $date = $this->parseDate($post->find('div[class=content__time-text]', 0)->plaintext); - $description = $post->find('img', 0)->alt; - $imageUrl = $post->find('img', 0)->src; - $author = $username; - $uid = $url; - $title = 'Post - ' . $username . ' - ' . $this->descriptionToTitle($description); - - // Checking post type - $isVideo = (bool) $post->find('div[class=content__camera]', 0); - $videoNote = $isVideo ? '

(video)

' : ''; - - $this->items[] = [ - 'uri' => $url, - 'author' => $author, - 'timestamp' => $date, - 'title' => $title, - 'thumbnail' => $imageUrl, - 'enclosures' => [$imageUrl], - 'content' => << - {$description} - -{$videoNote} -

{$description}

-HTML, - 'uid' => $uid - ]; - } - } - } - - private function parseDate($content) - { - // Parse date, and transform the date into a timetamp, even in a case of a relative date - $date = date_create(); - - // Content trimmed to be sure that the "article" is at the beginning of the string and remove "ago" to make it a valid PHP date interval - $dateString = trim(str_replace(' ago', '', $content)); - - // Replace the article "an" or "a" by the number "1" to be a valid PHP date interval - $dateString = preg_replace('/^((an|a) )/m', '1 ', $dateString); - - $relativeDate = date_interval_create_from_date_string($dateString); - if ($relativeDate) { - date_sub($date, $relativeDate); - // As the relative interval has the precision of a day for date older than 24 hours, we can remove the hour of the date, as it is not relevant - date_time_set($date, 0, 0, 0, 0); - } else { - $this->logger->info(sprintf('Unable to parse date string: %s', $dateString)); - } - return date_format($date, 'r'); - } - - public function getURI() - { - if (!is_null($this->getInput('u'))) { - return urljoin(self::URI, '/v/' . $this->getInput('u')); - } - - return parent::getURI(); - } - - public function getIcon() - { - return static::URI . '/images/favicon-hub-3ede543aa6d1225e8dc016ccff6879c8.ico?vsn=d'; - } - - private function descriptionToTitle($description) - { - return strlen($description) > 60 ? mb_substr($description, 0, 57) . '...' : $description; - } - - public function getName() - { - if (!is_null($this->getInput('u'))) { - return 'Username ' . $this->getInput('u') . ' - GreatFon Bridge'; - } - return parent::getName(); - } - - public function detectParameters($url) - { - $regex = '/^http(s|):\/\/((www\.|)(instagram.com)\/([a-zA-Z0-9_\.]{1,30})(\/reels\/|\/tagged\/|\/|)|(www\.|)(greatfon.com)\/v\/([a-zA-Z0-9_\.]{1,30}))/'; - if (preg_match($regex, $url, $matches) > 0) { - $params['context'] = 'Username'; - // Extract detected domain using the regex - $domain = $matches[8] ?? $matches[4]; - if ($domain == 'greatfon.com') { - $params['u'] = $matches[9]; - return $params; - } elseif ($domain == 'instagram.com') { - $params['u'] = $matches[5]; - return $params; - } else { - return null; - } - } else { - return null; - } - } -} From 19384463857c35b1d3ef0a7dbbbcc40d2f0cba0c Mon Sep 17 00:00:00 2001 From: Florent V Date: Tue, 26 Dec 2023 12:19:08 +0100 Subject: [PATCH 012/319] [EdfPricesBridge] add new bridge (#3846) * [EdfPricesBridge] add new brige * [EdfPricesBridge] bad refactor * [EdfPricesBridge] support php 7.4 --------- Co-authored-by: Florent VIOLLEAU --- bridges/EdfPricesBridge.php | 106 ++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 bridges/EdfPricesBridge.php diff --git a/bridges/EdfPricesBridge.php b/bridges/EdfPricesBridge.php new file mode 100644 index 00000000..f67ed30b --- /dev/null +++ b/bridges/EdfPricesBridge.php @@ -0,0 +1,106 @@ + [ + 'name' => 'Choisir un contrat', + 'type' => 'list', + // we can add later HCHP, EJP, base + 'values' => ['Tempo' => '/energie/edf/tarifs/tempo'], + ] + ] + ]; + const CACHE_TIMEOUT = 7200; // 2h + + /** + * @param simple_html_dom $html + * @param string $contractUri + * @return void + */ + private function tempo(simple_html_dom $html, string $contractUri): void + { + // current color and next + $daysDom = $html->find('#calendrier', 0)->nextSibling()->find('.card--ejp'); + if ($daysDom && count($daysDom) === 2) { + foreach ($daysDom as $dayDom) { + $day = trim($dayDom->find('.card__title', 0)->innertext) . '/' . (new \DateTime('now'))->format(('Y')); + $dayColor = $dayDom->find('.card-ejp__icon span', 0)->innertext; + + $text = $day . ' - ' . $dayColor; + $item['uri'] = self::URI . $contractUri; + $item['title'] = $text; + $item['author'] = self::MAINTAINER; + $item['content'] = $text; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } + } + + // colors + $ulDom = $html->find('#tarif-de-l-offre-edf-tempo-current-date-html-year', 0)->nextSibling()->nextSibling()->nextSibling(); + $elementsDom = $ulDom->find('li'); + if ($elementsDom && count($elementsDom) === 3) { + foreach ($elementsDom as $elementDom) { + $item = []; + + $matches = []; + preg_match_all('/Jour (.*) : Heures (.*) : (.*) € \/ Heures (.*) : (.*) €/um', $elementDom->innertext, $matches, PREG_SET_ORDER, 0); + + if ($matches && count($matches[0]) === 6) { + for ($i = 0; $i < 2; $i++) { + $text = 'Jour ' . $matches[0][1] . ' - Heures ' . $matches[0][2 + 2 * $i] . ' : ' . $matches[0][3 + 2 * $i] . '€'; + $item['uri'] = self::URI . $contractUri; + $item['title'] = $text; + $item['author'] = self::MAINTAINER; + $item['content'] = $text; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } + } + } + } + + // powers + $ulPowerContract = $ulDom->nextSibling()->nextSibling(); + $elementsPowerContractDom = $ulPowerContract->find('li'); + if ($elementsPowerContractDom && count($elementsPowerContractDom) === 4) { + foreach ($elementsPowerContractDom as $elementPowerContractDom) { + $item = []; + + $matches = []; + preg_match_all('/(.*) kVA : (.*) €/um', $elementPowerContractDom->innertext, $matches, PREG_SET_ORDER, 0); + + if ($matches && count($matches[0]) === 3) { + $text = $matches[0][1] . ' kVA : ' . $matches[0][2] . '€'; + $item['uri'] = self::URI . $contractUri; + $item['title'] = $text; + $item['author'] = self::MAINTAINER; + $item['content'] = $text; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } + } + } + } + + public function collectData() + { + $contract = $this->getKey('contract'); + $contractUri = $this->getInput('contract'); + $html = getSimpleHTMLDOM(self::URI . $contractUri); + + if ($contract === 'Tempo') { + $this->tempo($html, $contractUri); + } + } +} From ad2d4c7b1b538868070e0264f3692542883cac50 Mon Sep 17 00:00:00 2001 From: Florent V Date: Tue, 26 Dec 2023 12:20:49 +0100 Subject: [PATCH 013/319] [BridgeAbstract] use getParameters instead of static to allow overriding it from bridges (#3858) --- lib/BridgeAbstract.php | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index a7b811a8..0f86f454 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -154,8 +154,8 @@ abstract class BridgeAbstract { // Import and assign all inputs to their context foreach ($input as $name => $value) { - foreach (static::PARAMETERS as $context => $set) { - if (array_key_exists($name, static::PARAMETERS[$context])) { + foreach ($this->getParameters() as $context => $set) { + if (array_key_exists($name, $this->getParameters()[$context])) { $this->inputs[$context][$name]['value'] = $value; } } @@ -163,16 +163,16 @@ abstract class BridgeAbstract // Apply default values to missing data $contexts = [$queriedContext]; - if (array_key_exists('global', static::PARAMETERS)) { + if (array_key_exists('global', $this->getParameters())) { $contexts[] = 'global'; } foreach ($contexts as $context) { - if (!isset(static::PARAMETERS[$context])) { + if (!isset($this->getParameters()[$context])) { // unknown context provided by client, throw exception here? or continue? } - foreach (static::PARAMETERS[$context] as $name => $properties) { + foreach ($this->getParameters()[$context] as $name => $properties) { if (isset($this->inputs[$context][$name]['value'])) { continue; } @@ -204,8 +204,8 @@ abstract class BridgeAbstract } // Copy global parameter values to the guessed context - if (array_key_exists('global', static::PARAMETERS)) { - foreach (static::PARAMETERS['global'] as $name => $properties) { + if (array_key_exists('global', $this->getParameters())) { + foreach ($this->getParameters()['global'] as $name => $properties) { if (isset($input[$name])) { $value = $input[$name]; } else { @@ -246,8 +246,8 @@ abstract class BridgeAbstract if (!isset($this->inputs[$this->queriedContext][$input]['value'])) { return null; } - if (array_key_exists('global', static::PARAMETERS)) { - if (array_key_exists($input, static::PARAMETERS['global'])) { + if (array_key_exists('global', $this->getParameters())) { + if (array_key_exists($input, $this->getParameters()['global'])) { $context = 'global'; } } @@ -256,7 +256,7 @@ abstract class BridgeAbstract } $needle = $this->inputs[$this->queriedContext][$input]['value']; - foreach (static::PARAMETERS[$context][$input]['values'] as $first_level_key => $first_level_value) { + foreach ($this->getParameters()[$context][$input]['values'] as $first_level_key => $first_level_value) { if (!is_array($first_level_value) && $needle === (string)$first_level_value) { return $first_level_key; } elseif (is_array($first_level_value)) { @@ -273,7 +273,7 @@ abstract class BridgeAbstract { $regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/'; if ( - empty(static::PARAMETERS) + empty($this->getParameters()) && preg_match($regex, $url, $urlMatches) > 0 && preg_match($regex, static::URI, $bridgeUriMatches) > 0 && $urlMatches[3] === $bridgeUriMatches[3] From c8178e1fc409635af1a40167c4f511feb8d3df7f Mon Sep 17 00:00:00 2001 From: Damien Calesse <2787828+kranack@users.noreply.github.com> Date: Wed, 27 Dec 2023 13:17:49 +0100 Subject: [PATCH 014/319] [SensCritique] Fix bridge (#3860) --- bridges/SensCritiqueBridge.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index b823b55c..005704e1 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -57,7 +57,7 @@ class SensCritiqueBridge extends BridgeAbstract } $html = getSimpleHTMLDOM($uri); // This selector name looks like it's automatically generated - $list = $html->find('div.Universes__WrapperProducts-sc-1qa2w66-0.eVdcAv', 0); + $list = $html->find('div[data-testid="row"]', 0); $this->extractDataFromList($list); } @@ -69,6 +69,7 @@ class SensCritiqueBridge extends BridgeAbstract if ($list === null) { returnClientError('Cannot extract data from list'); } + foreach ($list->find('div[data-testid="product-list-item"]') as $movie) { $item = []; $item['title'] = $movie->find('h2 a', 0)->plaintext; From 5ab1924c4f96937885e12bcbd16b7bfb83a3c15b Mon Sep 17 00:00:00 2001 From: tillcash Date: Thu, 28 Dec 2023 18:20:34 +0530 Subject: [PATCH 015/319] Add WorldbankBridge and OglafBridge (#3862) * Add WorldbankBridge and OglafBridge * Update OglafBridge.php Remove redundant parent call to parseItem and rename formal argument to improve code clarity. * Update WorldbankBridge.php fix lint --- bridges/OglafBridge.php | 35 +++++++++++++++++++++++++ bridges/WorldbankBridge.php | 52 +++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 bridges/OglafBridge.php create mode 100644 bridges/WorldbankBridge.php diff --git a/bridges/OglafBridge.php b/bridges/OglafBridge.php new file mode 100644 index 00000000..1f4bc1af --- /dev/null +++ b/bridges/OglafBridge.php @@ -0,0 +1,35 @@ + [ + 'name' => 'limit (max 20)', + 'type' => 'number', + 'defaultValue' => 10, + 'required' => true, + ] + ] + ]; + + public function collectData() + { + $url = self::URI . 'feeds/rss/'; + $limit = min(20, $this->getInput('limit')); + $this->collectExpandableDatas($url, $limit); + } + + protected function parseItem($item) + { + $html = getSimpleHTMLDOMCached($item['uri']); + $comicImage = $html->find('img[id="strip"]', 0); + $item['content'] = $comicImage; + + return $item; + } +} diff --git a/bridges/WorldbankBridge.php b/bridges/WorldbankBridge.php new file mode 100644 index 00000000..9b40e86e --- /dev/null +++ b/bridges/WorldbankBridge.php @@ -0,0 +1,52 @@ + [ + 'name' => 'Language', + 'type' => 'list', + 'defaultValue' => 'English', + 'values' => [ + 'English' => 'English', + 'French' => 'French', + ] + ], + 'limit' => [ + 'name' => 'limit (max 100)', + 'type' => 'number', + 'defaultValue' => 5, + 'required' => true, + ] + ] + ]; + + public function collectData() + { + $apiUrl = 'https://search.worldbank.org/api/v2/news?format=json&rows=' + . min(100, $this->getInput('limit')) + . '&lang_exact=' . $this->getInput('lang'); + + $jsonData = json_decode(getContents($apiUrl)); + + // Remove unnecessary data from the original object + if (isset($jsonData->documents->facets)) { + unset($jsonData->documents->facets); + } + + foreach ($jsonData->documents as $element) { + $this->items[] = [ + 'uid' => $element->id, + 'timestamp' => $element->lnchdt, + 'title' => $element->title->{'cdata!'}, + 'uri' => $element->url, + 'content' => $element->descr->{'cdata!'}, + ]; + } + } +} From f67d2eb88adc597cc57fbfc402c28725b671e5a3 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Thu, 28 Dec 2023 13:53:06 +0100 Subject: [PATCH 016/319] [TikTokBridge] Use embed iframe to bypass scraping protection (#3864) The Tiktok Website was totally changed using some "scraping" protection (passing as parameter value generated somewhere in the bunch of javascript to the "API URL" that was before). The iframe embed does not have such protection. It has less information (no date, ...) but it's better than nothing ! --- bridges/TikTokBridge.php | 66 ++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/bridges/TikTokBridge.php b/bridges/TikTokBridge.php index 73a18b04..6590df66 100644 --- a/bridges/TikTokBridge.php +++ b/bridges/TikTokBridge.php @@ -8,12 +8,12 @@ class TikTokBridge extends BridgeAbstract const MAINTAINER = 'VerifiedJoseph'; const PARAMETERS = [ 'By user' => [ - 'username' => [ - 'name' => 'Username', - 'type' => 'text', - 'required' => true, - 'exampleValue' => '@tiktok', - ] + 'username' => [ + 'name' => 'Username', + 'type' => 'text', + 'required' => true, + 'exampleValue' => '@tiktok', + ] ]]; const TEST_DETECT_PARAMETERS = [ @@ -24,53 +24,33 @@ class TikTokBridge extends BridgeAbstract const CACHE_TIMEOUT = 900; // 15 minutes - private $feedName = ''; - public function collectData() { - $html = getSimpleHTMLDOM($this->getURI()); + $html = getSimpleHTMLDOMCached('https://www.tiktok.com/embed/' . $this->processUsername()); - $title = $html->find('h1', 0)->plaintext ?? self::NAME; - $this->feedName = htmlspecialchars_decode($title); + $author = $html->find('span[data-e2e=creator-profile-userInfo-TUXText]', 0)->plaintext ?? self::NAME; - $var = $html->find('script[id=SIGI_STATE]', 0); - if (!$var) { - throw new \Exception('Unable to find tiktok user data for ' . $this->processUsername()); - } - $SIGI_STATE_RAW = $var->innertext; - $SIGI_STATE = Json::decode($SIGI_STATE_RAW, false); + $videos = $html->find('div[data-e2e=common-videoList-VideoContainer]'); - if (!isset($SIGI_STATE->ItemModule)) { - return; - } - - foreach ($SIGI_STATE->ItemModule as $key => $value) { + foreach ($videos as $video) { $item = []; - $link = 'https://www.tiktok.com/@' . $value->author . '/video/' . $value->id; - $image = $value->video->dynamicCover; - if (empty($image)) { - $image = $value->video->cover; - } - $views = $value->stats->playCount; - $hastags = []; - foreach ($value->textExtra as $tag) { - $hastags[] = $tag->hashtagName; - } - $hastags_str = ''; - foreach ($hastags as $tag) { - $hastags_str .= '#' . $tag . ' '; - } + // Handle link "untracking" + $linkParts = parse_url($video->find('a', 0)->href); + $link = $linkParts['scheme'] . '://' . $linkParts['host'] . '/' . $linkParts['path']; + + $image = $video->find('video', 0)->poster; + $views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext; + + $enclosures = [$image]; $item['uri'] = $link; - $item['title'] = $value->desc; - $item['timestamp'] = $value->createTime; - $item['author'] = '@' . $value->author; - $item['enclosures'][] = $image; - $item['categories'] = $hastags; + $item['title'] = 'Video'; + $item['author'] = '@' . $author; + $item['enclosures'] = $enclosures; $item['content'] = << -

{$views} views


Hashtags: {$hastags_str} +

{$views} views


EOD; $this->items[] = $item; @@ -91,7 +71,7 @@ EOD; { switch ($this->queriedContext) { case 'By user': - return $this->feedName . ' (' . $this->processUsername() . ') - TikTok'; + return $this->processUsername() . ' - TikTok'; default: return parent::getName(); } From 2032ed18c49a82fc2e634dfa6f2b91e652228876 Mon Sep 17 00:00:00 2001 From: Damien Calesse <2787828+kranack@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:51:15 +0100 Subject: [PATCH 017/319] [SensCritique] Update the content to add the image (#3865) --- bridges/SensCritiqueBridge.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index 005704e1..f6a2ea16 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -71,10 +71,17 @@ class SensCritiqueBridge extends BridgeAbstract } foreach ($list->find('div[data-testid="product-list-item"]') as $movie) { + $synopsis = $movie->find('p[data-testid="synopsis"]', 0); + $item = []; $item['title'] = $movie->find('h2 a', 0)->plaintext; - // todo: fix image - $item['content'] = $movie->innertext; + $item['content'] = sprintf( + '

%s

%s

%s', + $movie->find('span[data-testid="poster-img-wrapper"]', 0)->{'data-srcname'}, + $movie->find('p[data-testid="other-infos"]', 0)->innertext, + $movie->find('p[data-testid="creators"]', 0)->innertext, + $synopsis ? sprintf('

%s

', $synopsis->innertext) : '' + ); $item['id'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/'); $item['uri'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/'); $this->items[] = $item; From 7dbe10658213e165c07faac01a8c79771b4917c8 Mon Sep 17 00:00:00 2001 From: Dag Date: Thu, 28 Dec 2023 23:26:14 +0100 Subject: [PATCH 018/319] docs(nginx, phpfpm): improve install and config instructions (#3866) --- README.md | 186 ++++++++++++++++++++++++++++++++++--------- caches/FileCache.php | 1 + index.php | 3 +- 3 files changed, 152 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 2a762d45..34efc8de 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,15 @@ ![RSS-Bridge](static/logo_600px.png) -RSS-Bridge is a web application. +RSS-Bridge is a PHP web application. It generates web feeds for websites that don't have one. Officially hosted instance: https://rss-bridge.org/bridge01/ +IRC channel #rssbridge at https://libera.chat/ + + [![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge) @@ -48,53 +51,146 @@ Check out RSS-Bridge right now on https://rss-bridge.org/bridge01/ Alternatively find another [public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html). -## Tutorial - -### Install with composer or git - Requires minimum PHP 7.4. +## Tutorial + +### How to install on traditional shared web hosting + +RSS-Bridge can basically be unzipped in a web folder. Should be working instantly. + +Latest zip as of Sep 2023: https://github.com/RSS-Bridge/rss-bridge/archive/refs/tags/2023-09-24.zip + +### How to install on Debian 12 (nginx + php-fpm) + +These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (1vcpu-512mb-10gb, 5 USD/month). + ```shell -apt install nginx php-fpm php-mbstring php-simplexml php-curl +timedatectl set-timezone Europe/Oslo + +apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl + +# Create a new user account +useradd --shell /bin/bash --create-home rss-bridge + +cd /var/www + +# Create folder and change ownership +mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/ + +# Become user +su rss-bridge + +# Fetch latest master +git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/ +cd rss-bridge + +# Copy over the default config +cp -v config.default.ini.php config.ini.php + +# Give full permissions only to owner (rss-bridge) +chmod 700 -R ./ + +# Give read and execute to others (nginx and php-fpm) +chmod o+rx ./ ./static + +# Give read to others (nginx) +chmod o+r -R ./static ``` +Nginx config: + +```nginx +# /etc/nginx/sites-enabled/rss-bridge.conf + +server { + listen 80; + server_name example.com; + access_log /var/log/nginx/rss-bridge.access.log; + error_log /var/log/nginx/rss-bridge.error.log; + + # Intentionally not setting a root folder here + + # autoindex is off by default but feels good to explicitly turn off + autoindex off; + + # Static content only served here + location /static/ { + alias /var/www/rss-bridge/static/; + } + + # Pass off to php-fpm only when location is exactly / + location = / { + root /var/www/rss-bridge/; + include snippets/fastcgi-php.conf; + fastcgi_pass unix:/run/php/rss-bridge.sock; + } + + # Reduce spam + location = /favicon.ico { + access_log off; + log_not_found off; + } + + # Reduce spam + location = /robots.txt { + access_log off; + log_not_found off; + } +} +``` + +PHP FPM pool config: +```ini +; /etc/php/8.2/fpm/pool.d/rss-bridge.conf + +[rss-bridge] + +user = rss-bridge +group = rss-bridge + +listen = /run/php/rss-bridge.sock + +listen.owner = www-data +listen.group = www-data + +pm = static +pm.max_children = 10 +pm.max_requests = 500 +``` + +PHP ini config: +```ini +; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini + +max_execution_time = 20 +memory_limit = 64M +``` + +Restart fpm and nginx: + +```shell +# Lint and restart php-fpm +php-fpm8.2 -t +systemctl restart php8.2-fpm + +# Lint and restart nginx +nginx -t +systemctl restart nginx +``` + +### How to install from Composer + +Install the latest release. + ```shell cd /var/www composer create-project -v --no-dev rss-bridge/rss-bridge ``` -```shell -cd /var/www -git clone https://github.com/RSS-Bridge/rss-bridge.git -``` +### How to install with Caddy -Config: - -```shell -# Give the http user write permission to the cache folder -chown www-data:www-data /var/www/rss-bridge/cache - -# Optionally copy over the default config file -cp config.default.ini.php config.ini.php -``` - -Example config for nginx: - -```nginx -# /etc/nginx/sites-enabled/rssbridge -server { - listen 80; - server_name example.com; - root /var/www/rss-bridge; - index index.php; - - location ~ \.php$ { - include snippets/fastcgi-php.conf; - fastcgi_read_timeout 60s; - fastcgi_pass unix:/run/php/php-fpm.sock; - } -} -``` +TODO. See https://github.com/RSS-Bridge/rss-bridge/issues/3785 ### Install from Docker Hub: @@ -163,6 +259,22 @@ Learn more in ## How-to +### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable" + +```shell +# Give rssbridge ownership +chown rssbridge:rssbridge -R /var/www/rss-bridge/cache + +# Or, give www-data ownership +chown www-data:www-data -R /var/www/rss-bridge/cache + +# Or, give everyone write permission +chmod 777 -R /var/www/rss-bridge/cache + +# Or last ditch effort (CAREFUL) +rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/ +``` + ### How to create a new bridge from scratch Create the new bridge in e.g. `bridges/BearBlogBridge.php`: diff --git a/caches/FileCache.php b/caches/FileCache.php index 09d12791..7a0eb81d 100644 --- a/caches/FileCache.php +++ b/caches/FileCache.php @@ -54,6 +54,7 @@ class FileCache implements CacheInterface ]; $cacheFile = $this->createCacheFile($key); $bytes = file_put_contents($cacheFile, serialize($item), LOCK_EX); + // todo: Consider tightening the permissions of the created file. It usually allow others to read, depending on umask if ($bytes === false) { // Consider just logging the error here throw new \Exception(sprintf('Failed to write to: %s', $cacheFile)); diff --git a/index.php b/index.php index 14713e06..c2c546a1 100644 --- a/index.php +++ b/index.php @@ -8,7 +8,8 @@ require_once __DIR__ . '/lib/bootstrap.php'; $errors = Configuration::checkInstallation(); if ($errors) { - die('
' . implode("\n", $errors) . '
'); + print '
' . implode("\n", $errors) . '
'; + exit(1); } $customConfig = []; From fac1f5cd88f04855a891aeb7341f783e57ce5b3c Mon Sep 17 00:00:00 2001 From: Dag Date: Sat, 30 Dec 2023 01:33:31 +0100 Subject: [PATCH 019/319] refactor(reddit) (#3869) * refactor * yup * fix also reporterre --- bridges/RedditBridge.php | 66 +++++++++++------------------------- bridges/ReporterreBridge.php | 44 +++++++++++++----------- 2 files changed, 44 insertions(+), 66 deletions(-) diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index bb3e7afc..618463a6 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -173,7 +173,7 @@ class RedditBridge extends BridgeAbstract $item['author'] = $data->author; $item['uid'] = $data->id; $item['timestamp'] = $data->created_utc; - $item['uri'] = $this->encodePermalink($data->permalink); + $item['uri'] = $this->urlEncodePathParts($data->permalink); $item['categories'] = []; @@ -193,13 +193,11 @@ class RedditBridge extends BridgeAbstract if ($post->kind == 't1') { // Comment - $item['content'] - = htmlspecialchars_decode($data->body_html); + $item['content'] = htmlspecialchars_decode($data->body_html); } elseif ($data->is_self) { // Text post - $item['content'] - = htmlspecialchars_decode($data->selftext_html); + $item['content'] = htmlspecialchars_decode($data->selftext_html); } elseif (isset($data->post_hint) && $data->post_hint == 'link') { // Link with preview @@ -215,18 +213,11 @@ class RedditBridge extends BridgeAbstract $embed = ''; } - $item['content'] = $this->template( - $data->url, - $data->thumbnail, - $data->domain - ) . $embed; - } elseif (isset($data->post_hint) ? $data->post_hint == 'image' : false) { + $item['content'] = $this->createFigureLink($data->url, $data->thumbnail, $data->domain) . $embed; + } elseif (isset($data->post_hint) && $data->post_hint == 'image') { // Single image - $item['content'] = $this->link( - $this->encodePermalink($data->permalink), - '' - ); + $item['content'] = $this->createLink($this->urlEncodePathParts($data->permalink), ''); } elseif ($data->is_gallery ?? false) { // Multiple images @@ -246,32 +237,18 @@ class RedditBridge extends BridgeAbstract end($data->preview->images[0]->resolutions); $index = key($data->preview->images[0]->resolutions); - $item['content'] = $this->template( - $data->url, - $data->preview->images[0]->resolutions[$index]->url, - 'Video' - ); - } elseif (isset($data->media) ? $data->media->type == 'youtube.com' : false) { + $item['content'] = $this->createFigureLink($data->url, $data->preview->images[0]->resolutions[$index]->url, 'Video'); + } elseif (isset($data->media) && $data->media->type == 'youtube.com') { // Youtube link - - $item['content'] = $this->template( - $data->url, - $data->media->oembed->thumbnail_url, - 'YouTube' - ); + $item['content'] = $this->createFigureLink($data->url, $data->media->oembed->thumbnail_url, 'YouTube'); + //$item['content'] = htmlspecialchars_decode($data->media->oembed->html); } elseif (explode('.', $data->domain)[0] == 'self') { // Crossposted text post // TODO (optionally?) Fetch content of the original post. - - $item['content'] = $this->link( - $this->encodePermalink($data->permalink), - 'Crossposted from r/' - . explode('.', $data->domain)[1] - ); + $item['content'] = $this->createLink($this->urlEncodePathParts($data->permalink), 'Crossposted from r/' . explode('.', $data->domain)[1]); } else { // Link WITHOUT preview - - $item['content'] = $this->link($data->url, $data->domain); + $item['content'] = $this->createLink($data->url, $data->domain); } $this->items[] = $item; @@ -279,7 +256,7 @@ class RedditBridge extends BridgeAbstract } // Sort the order to put the latest posts first, even for mixed subreddits usort($this->items, function ($a, $b) { - return $a['timestamp'] < $b['timestamp']; + return $b['timestamp'] <=> $a['timestamp']; }); } @@ -299,24 +276,19 @@ class RedditBridge extends BridgeAbstract } } - private function encodePermalink($link) + private function urlEncodePathParts($link) { - return self::URI . implode( - '/', - array_map('urlencode', explode('/', $link)) - ); + return self::URI . implode('/', array_map('urlencode', explode('/', $link))); } - private function template($href, $src, $caption) + private function createFigureLink($href, $src, $caption) { - return '
' - . $caption . '
'; + return sprintf('
%s
', $href, $caption, $src); } - private function link($href, $text) + private function createLink($href, $text) { - return '' . $text . ''; + return sprintf('%s', $href, $text); } public function detectParameters($url) diff --git a/bridges/ReporterreBridge.php b/bridges/ReporterreBridge.php index 18378d24..78c60d5f 100644 --- a/bridges/ReporterreBridge.php +++ b/bridges/ReporterreBridge.php @@ -1,11 +1,35 @@ find('item') as $element) { + if ($limit < 5) { + $item = []; + $item['title'] = html_entity_decode($element->find('title', 0)->plaintext); + $item['timestamp'] = strtotime($element->find('dc:date', 0)->plaintext); + $item['uri'] = $element->find('guid', 0)->innertext; + //$item['content'] = html_entity_decode($this->extractContent($item['uri'])); + $item['content'] = htmlspecialchars_decode($element->find('description', 0)->plaintext); + $this->items[] = $item; + $limit++; + } + } + } private function extractContent($url) { @@ -22,22 +46,4 @@ class ReporterreBridge extends BridgeAbstract $text = strip_tags($text, '


'); return $text; } - - public function collectData() - { - $html = getSimpleHTMLDOM(self::URI . 'spip.php?page=backend'); - $limit = 0; - - foreach ($html->find('item') as $element) { - if ($limit < 5) { - $item = []; - $item['title'] = html_entity_decode($element->find('title', 0)->plaintext); - $item['timestamp'] = strtotime($element->find('dc:date', 0)->plaintext); - $item['uri'] = $element->find('guid', 0)->innertext; - $item['content'] = html_entity_decode($this->extractContent($item['uri'])); - $this->items[] = $item; - $limit++; - } - } - } } From ef378663aaa98ef54c7145781e8ab1e35fe50e7d Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 2 Jan 2024 16:21:52 +0100 Subject: [PATCH 020/319] test: happy new year (#3873) * test: happy new year * yup --- tests/FeedItemTest.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/FeedItemTest.php b/tests/FeedItemTest.php index 0e7af222..3390e7b3 100644 --- a/tests/FeedItemTest.php +++ b/tests/FeedItemTest.php @@ -41,7 +41,8 @@ class FeedItemTest extends TestCase $this->assertSame(64800, $item->getTimestamp()); $item->setTimestamp('1st jan last year'); - // This will fail at 2024-01-01 hehe - $this->assertSame(1640995200, $item->getTimestamp()); + + // This will fail at 2025-01-01 hehe + $this->assertSame(1672531200, $item->getTimestamp()); } } From e904de2dc987d6578f9fd5f527aa736801c2185c Mon Sep 17 00:00:00 2001 From: Damien Calesse <2787828+kranack@users.noreply.github.com> Date: Tue, 2 Jan 2024 16:22:39 +0100 Subject: [PATCH 021/319] [YGGTorrent] Update URI (#3871) --- bridges/YGGTorrentBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/YGGTorrentBridge.php b/bridges/YGGTorrentBridge.php index f0c31f11..018bcfc4 100644 --- a/bridges/YGGTorrentBridge.php +++ b/bridges/YGGTorrentBridge.php @@ -7,7 +7,7 @@ class YGGTorrentBridge extends BridgeAbstract { const MAINTAINER = 'teromene'; const NAME = 'Yggtorrent Bridge'; - const URI = 'https://www5.yggtorrent.fi'; + const URI = 'https://www3.yggtorrent.qa'; const DESCRIPTION = 'Returns torrent search from Yggtorrent'; const PARAMETERS = [ From 0f6fa8034b04e1e007158ef0c5cc784bf8d7ef45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Kol=C3=A1=C5=99?= Date: Tue, 2 Jan 2024 16:23:13 +0100 Subject: [PATCH 022/319] Fixed selector in CeskaTelevizeBridge (#3872) * Fixed selector in CeskaTelevizeBridge * Fixed also description selector --- bridges/CeskaTelevizeBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/CeskaTelevizeBridge.php b/bridges/CeskaTelevizeBridge.php index 003cd4c7..be00d664 100644 --- a/bridges/CeskaTelevizeBridge.php +++ b/bridges/CeskaTelevizeBridge.php @@ -57,9 +57,9 @@ class CeskaTelevizeBridge extends BridgeAbstract $this->feedName .= " ({$category})"; } - foreach ($html->find('#episodeListSection a[data-testid=next-link]') as $element) { + foreach ($html->find('#episodeListSection a[data-testid=card]') as $element) { $itemTitle = $element->find('h3', 0); - $itemContent = $element->find('div[class^=content-]', 0); + $itemContent = $element->find('p[class^=content-]', 0); $itemDate = $element->find('div[class^=playTime-] span', 0); $itemThumbnail = $element->find('img', 0); $itemUri = self::URI . $element->getAttribute('href'); From 12395fcf2d87939a8a95d8bbc95e188e171bfbca Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Fri, 5 Jan 2024 07:22:16 +0100 Subject: [PATCH 023/319] Docker fix default fastcgi.logging (#3875) Mistake from https://github.com/RSS-Bridge/rss-bridge/pull/3500 Wrong file extension: should have been `.ini` and not `.conf` otherwise it has no effect. See https://github.com/docker-library/php/pull/1360 and https://github.com/docker-library/php/issues/878#issuecomment-938595965 --- Dockerfile | 2 +- config/php.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index f504b51f..2f1f4f3d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,7 @@ ENV CURL_IMPERSONATE ff91esr COPY ./config/nginx.conf /etc/nginx/sites-available/default COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf -COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.conf +COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini COPY --chown=www-data:www-data ./ /app/ diff --git a/config/php.ini b/config/php.ini index 115f1c89..383afffb 100644 --- a/config/php.ini +++ b/config/php.ini @@ -1,4 +1,4 @@ ; Inspired by https://github.com/docker-library/php/blob/master/8.2/bookworm/fpm/Dockerfile -; https://github.com/docker-library/php/issues/878#issuecomment-938595965' +; https://github.com/docker-library/php/issues/878#issuecomment-938595965 fastcgi.logging = Off From 55ffac5bae8d84ff1b42339d1114117cf32a6854 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Fri, 5 Jan 2024 07:23:40 +0100 Subject: [PATCH 024/319] [PepperBridgeAbstract, DealabsBridge, HotUKDealsBridge, MydealsBridge] (#3876) Fix the Deal source link The HTML does not contain the link to the "Deal source anymore", now only an attribute does contain the information about the Deal Source. The JSON data is now extraced for each Deal, and used to get the Temperature and Deal Source. --- bridges/DealabsBridge.php | 1 + bridges/HotUKDealsBridge.php | 1 + bridges/MydealsBridge.php | 1 + bridges/PepperBridgeAbstract.php | 29 +++++++++++++++++++++-------- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/bridges/DealabsBridge.php b/bridges/DealabsBridge.php index a904c3ff..4d39502c 100644 --- a/bridges/DealabsBridge.php +++ b/bridges/DealabsBridge.php @@ -1910,6 +1910,7 @@ class DealabsBridge extends PepperBridgeAbstract 'context-talk' => 'Surveillance Discussion', 'uri-group' => 'groupe/', 'uri-deal' => 'bons-plans/', + 'uri-merchant' => 'search/bons-plans?merchant-id=', 'request-error' => 'Impossible de joindre Dealabs', 'thread-error' => 'Impossible de déterminer l\'ID de la discussion. Vérifiez l\'URL que vous avez entré', 'no-results' => 'Il n'y a rien à afficher pour le moment :(', diff --git a/bridges/HotUKDealsBridge.php b/bridges/HotUKDealsBridge.php index 69301c42..a7e62250 100644 --- a/bridges/HotUKDealsBridge.php +++ b/bridges/HotUKDealsBridge.php @@ -3274,6 +3274,7 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'context-talk' => 'Discussion Monitoring', 'uri-group' => 'tag/', 'uri-deal' => 'deals/', + 'uri-merchant' => 'search/deals?merchant-id=', 'request-error' => 'Could not request HotUKDeals', 'thread-error' => 'Unable to determine the thread ID. Check the URL you entered', 'no-results' => 'Ooops, looks like we could', diff --git a/bridges/MydealsBridge.php b/bridges/MydealsBridge.php index 22b46413..d7e074a9 100644 --- a/bridges/MydealsBridge.php +++ b/bridges/MydealsBridge.php @@ -2021,6 +2021,7 @@ class MydealsBridge extends PepperBridgeAbstract 'context-talk' => 'Überwachung Diskussion', 'uri-group' => 'gruppe/', 'uri-deal' => 'deals/', + 'uri-merchant' => 'search/gutscheine?merchant-id=', 'request-error' => 'Could not request mydeals', 'thread-error' => 'Die ID der Diskussion kann nicht ermittelt werden. Überprüfen Sie die eingegebene URL', 'no-results' => 'Ups, wir konnten nichts', diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index 6cb0f302..73bd194d 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -104,6 +104,9 @@ class PepperBridgeAbstract extends BridgeAbstract $item['title'] = $this->getTitle($deal); $item['author'] = $deal->find('span.thread-username', 0)->plaintext; + // Get the JSON Data stored as vue + $jsonDealData = $this->getDealJsonData($deal); + $item['content'] = '
find('div[class=js-vue2]', 0)->getAttribute('data-vue2')); - return $data['props']['thread']['temperature'] . '°'; + return $data; } /** * Get the source of a Deal if it exists * @return string String of the deal source */ - private function getSource($deal) + private function getSource($jsonData) { - if (($origin = $deal->find('button[class*=text--color-greyShade]', 0)) != null) { - $path = str_replace(' ', '/', trim(Json::decode($origin->{'data-cloak-link'})['path'])); - $text = $origin->find('span[class*=link]', 0); + if ($jsonData['props']['thread']['merchant'] != null) { + $path = $this->i8n('uri-merchant') . $jsonData['props']['thread']['merchant']['merchantId']; + $text = $jsonData['props']['thread']['merchant']['merchantName']; return ''; } else { return ''; From ea58c8d2bcd17b09e7d9dea64297ea44885a3933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B5=D0=B7=D0=B4=D0=B0=D0=BB=D0=B8=D1=81=D1=8C?= =?UTF-8?q?=D0=BA=D0=BE?= <105280814+uandreew@users.noreply.github.com> Date: Sat, 6 Jan 2024 19:13:50 +0200 Subject: [PATCH 025/319] Update 06_Public_Hosts.md (#3877) --- docs/01_General/06_Public_Hosts.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/01_General/06_Public_Hosts.md b/docs/01_General/06_Public_Hosts.md index c9572824..4aa905da 100644 --- a/docs/01_General/06_Public_Hosts.md +++ b/docs/01_General/06_Public_Hosts.md @@ -22,6 +22,7 @@ | ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.foxhaven.cyou| ![](https://img.shields.io/badge/website-up-brightgreen) | [@Aysilu](https://foxhaven.cyou) | Hosted with Timeweb (Maintained in Poland) | | ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.m3wz.su| ![](https://img.shields.io/badge/website-up-brightgreen) | [@m3oweezed](https://m3wz.su/en/about) | Poland, Hosted with Timeweb Cloud | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rb.ash.fail | ![](https://img.shields.io/website/https/rb.ash.fail.svg) | [@ash](https://ash.fail/contact.html) | Hosted with Hostaris, Germany +| ![](https://iplookup.flagfox.net/images/h16/UA.png) | https://rss.noleron.com | ![](https://img.shields.io/website/https/rss.noleron.com) | [@ihor](https://noleron.com/about) | Hosted with Hosting Ukraine, Ukraine ## Inactive instances From 3ce94409ab650e042993480d638482a89901776d Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 9 Jan 2024 20:18:33 +0100 Subject: [PATCH 026/319] feat: support itunes namespace in top channel feed (#3776) Also preserves other properties. --- actions/DisplayAction.php | 11 +- bridges/ItakuBridge.php | 6 +- formats/AtomFormat.php | 79 ++++++------ formats/HtmlFormat.php | 12 +- formats/JsonFormat.php | 18 +-- formats/MrssFormat.php | 118 ++++++++++-------- formats/PlaintextFormat.php | 6 +- lib/BridgeAbstract.php | 55 +++++--- lib/FormatAbstract.php | 75 +++++------ lib/bootstrap.php | 3 - tests/FormatTest.php | 72 +++++++++++ tests/Formats/BaseFormatTest.php | 2 +- .../expectedAtomFormat/feed.common.xml | 6 +- .../samples/expectedAtomFormat/feed.empty.xml | 6 +- .../expectedAtomFormat/feed.emptyItems.xml | 6 +- .../expectedAtomFormat/feed.microblog.xml | 6 +- .../expectedMrssFormat/feed.common.xml | 6 +- .../samples/expectedMrssFormat/feed.empty.xml | 2 +- .../expectedMrssFormat/feed.emptyItems.xml | 2 +- .../expectedMrssFormat/feed.microblog.xml | 6 +- tests/Formats/samples/feed.empty.json | 2 +- tests/Formats/samples/feed.emptyItems.json | 2 +- 22 files changed, 298 insertions(+), 203 deletions(-) create mode 100644 tests/FormatTest.php diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 43563996..080da52e 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -100,7 +100,7 @@ class DisplayAction implements ActionInterface private function createResponse(array $request, BridgeAbstract $bridge, FormatAbstract $format) { $items = []; - $infos = []; + $feed = []; try { $bridge->loadConfiguration(); @@ -116,12 +116,7 @@ class DisplayAction implements ActionInterface } $items = $feedItems; } - $infos = [ - 'name' => $bridge->getName(), - 'uri' => $bridge->getURI(), - 'donationUri' => $bridge->getDonationURI(), - 'icon' => $bridge->getIcon() - ]; + $feed = $bridge->getFeed(); } catch (\Exception $e) { if ($e instanceof HttpException) { // Reproduce (and log) these responses regardless of error output and report limit @@ -155,7 +150,7 @@ class DisplayAction implements ActionInterface } $format->setItems($items); - $format->setExtraInfos($infos); + $format->setFeed($feed); $now = time(); $format->setLastModified($now); $headers = [ diff --git a/bridges/ItakuBridge.php b/bridges/ItakuBridge.php index 149757f5..0577752c 100644 --- a/bridges/ItakuBridge.php +++ b/bridges/ItakuBridge.php @@ -280,7 +280,7 @@ class ItakuBridge extends BridgeAbstract $opt['range'] = ''; $user_id = $this->getInput('user_id') ?? $this->getOwnerID($this->getInput('user')); - $data = $this->getFeed( + $data = $this->getFeedData( $opt, $user_id ); @@ -289,7 +289,7 @@ class ItakuBridge extends BridgeAbstract if ($this->queriedContext === 'Home feed') { $opt['order'] = $this->getInput('order'); $opt['range'] = $this->getInput('range'); - $data = $this->getFeed($opt); + $data = $this->getFeedData($opt); } foreach ($data['results'] as $record) { @@ -409,7 +409,7 @@ class ItakuBridge extends BridgeAbstract return $this->getData($url, false, true); } - private function getFeed(array $opt, $ownerID = null) + private function getFeedData(array $opt, $ownerID = null) { $url = self::URI . "/api/feed/?date_range={$opt['range']}&ordering={$opt['order']}&page=1&page_size=30&format=json"; diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 07ca7272..1fabef2e 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -17,44 +17,61 @@ class AtomFormat extends FormatAbstract public function stringify() { $document = new \DomDocument('1.0', $this->getCharset()); + $document->formatOutput = true; $feedUrl = get_current_url(); - $extraInfos = $this->getExtraInfos(); - if (empty($extraInfos['uri'])) { - $uri = REPOSITORY; - } else { - $uri = $extraInfos['uri']; - } - - $document->formatOutput = true; $feed = $document->createElementNS(self::ATOM_NS, 'feed'); $document->appendChild($feed); $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:media', self::MRSS_NS); - $title = $document->createElement('title'); - $feed->appendChild($title); - $title->setAttribute('type', 'text'); - $title->appendChild($document->createTextNode($extraInfos['name'])); + $feedArray = $this->getFeed(); + foreach ($feedArray as $feedKey => $feedValue) { + if (in_array($feedKey, ['donationUri'])) { + continue; + } + if ($feedKey === 'name') { + $title = $document->createElement('title'); + $feed->appendChild($title); + $title->setAttribute('type', 'text'); + $title->appendChild($document->createTextNode($feedValue)); + } elseif ($feedKey === 'icon') { + if ($feedValue) { + $icon = $document->createElement('icon'); + $feed->appendChild($icon); + $icon->appendChild($document->createTextNode($feedValue)); + + $logo = $document->createElement('logo'); + $feed->appendChild($logo); + $logo->appendChild($document->createTextNode($feedValue)); + } + } elseif ($feedKey === 'uri') { + if ($feedValue) { + $linkAlternate = $document->createElement('link'); + $feed->appendChild($linkAlternate); + $linkAlternate->setAttribute('rel', 'alternate'); + $linkAlternate->setAttribute('type', 'text/html'); + $linkAlternate->setAttribute('href', $feedValue); + + $linkSelf = $document->createElement('link'); + $feed->appendChild($linkSelf); + $linkSelf->setAttribute('rel', 'self'); + $linkSelf->setAttribute('type', 'application/atom+xml'); + $linkSelf->setAttribute('href', $feedUrl); + } + } elseif ($feedKey === 'itunes') { + // todo: skip? + } else { + $element = $document->createElement($feedKey); + $feed->appendChild($element); + $element->appendChild($document->createTextNode($feedValue)); + } + } $id = $document->createElement('id'); $feed->appendChild($id); $id->appendChild($document->createTextNode($feedUrl)); - $uriparts = parse_url($uri); - if (empty($extraInfos['icon'])) { - $iconUrl = $uriparts['scheme'] . '://' . $uriparts['host'] . '/favicon.ico'; - } else { - $iconUrl = $extraInfos['icon']; - } - $icon = $document->createElement('icon'); - $feed->appendChild($icon); - $icon->appendChild($document->createTextNode($iconUrl)); - - $logo = $document->createElement('logo'); - $feed->appendChild($logo); - $logo->appendChild($document->createTextNode($iconUrl)); - $feedTimestamp = gmdate(DATE_ATOM, $this->lastModified); $updated = $document->createElement('updated'); $feed->appendChild($updated); @@ -69,17 +86,7 @@ class AtomFormat extends FormatAbstract $author->appendChild($authorName); $authorName->appendChild($document->createTextNode($feedAuthor)); - $linkAlternate = $document->createElement('link'); - $feed->appendChild($linkAlternate); - $linkAlternate->setAttribute('rel', 'alternate'); - $linkAlternate->setAttribute('type', 'text/html'); - $linkAlternate->setAttribute('href', $uri); - $linkSelf = $document->createElement('link'); - $feed->appendChild($linkSelf); - $linkSelf->setAttribute('rel', 'self'); - $linkSelf->setAttribute('type', 'application/atom+xml'); - $linkSelf->setAttribute('href', $feedUrl); foreach ($this->getItems() as $item) { $itemArray = $item->toArray(); diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php index 4933af8d..ef66f493 100644 --- a/formats/HtmlFormat.php +++ b/formats/HtmlFormat.php @@ -8,7 +8,7 @@ class HtmlFormat extends FormatAbstract { $queryString = $_SERVER['QUERY_STRING']; - $extraInfos = $this->getExtraInfos(); + $feedArray = $this->getFeed(); $formatFactory = new FormatFactory(); $buttons = []; $linkTags = []; @@ -29,9 +29,9 @@ class HtmlFormat extends FormatAbstract ]; } - if (Configuration::getConfig('admin', 'donations') && $extraInfos['donationUri'] !== '') { + if (Configuration::getConfig('admin', 'donations') && $feedArray['donationUri']) { $buttons[] = [ - 'href' => e($extraInfos['donationUri']), + 'href' => e($feedArray['donationUri']), 'value' => 'Donate to maintainer', ]; } @@ -39,7 +39,7 @@ class HtmlFormat extends FormatAbstract $items = []; foreach ($this->getItems() as $item) { $items[] = [ - 'url' => $item->getURI() ?: $extraInfos['uri'], + 'url' => $item->getURI() ?: $feedArray['uri'], 'title' => $item->getTitle() ?? '(no title)', 'timestamp' => $item->getTimestamp(), 'author' => $item->getAuthor(), @@ -51,9 +51,9 @@ class HtmlFormat extends FormatAbstract $html = render_template(__DIR__ . '/../templates/html-format.html.php', [ 'charset' => $this->getCharset(), - 'title' => $extraInfos['name'], + 'title' => $feedArray['name'], 'linkTags' => $linkTags, - 'uri' => $extraInfos['uri'], + 'uri' => $feedArray['uri'], 'buttons' => $buttons, 'items' => $items, ]); diff --git a/formats/JsonFormat.php b/formats/JsonFormat.php index dd61da41..016e75e1 100644 --- a/formats/JsonFormat.php +++ b/formats/JsonFormat.php @@ -25,18 +25,18 @@ class JsonFormat extends FormatAbstract public function stringify() { - $host = $_SERVER['HTTP_HOST'] ?? ''; - $extraInfos = $this->getExtraInfos(); + $feedArray = $this->getFeed(); + $data = [ - 'version' => 'https://jsonfeed.org/version/1', - 'title' => empty($extraInfos['name']) ? $host : $extraInfos['name'], - 'home_page_url' => empty($extraInfos['uri']) ? REPOSITORY : $extraInfos['uri'], - 'feed_url' => get_current_url(), + 'version' => 'https://jsonfeed.org/version/1', + 'title' => $feedArray['name'], + 'home_page_url' => $feedArray['uri'], + 'feed_url' => get_current_url(), ]; - if (!empty($extraInfos['icon'])) { - $data['icon'] = $extraInfos['icon']; - $data['favicon'] = $extraInfos['icon']; + if ($feedArray['icon']) { + $data['icon'] = $feedArray['icon']; + $data['favicon'] = $feedArray['icon']; } $items = []; diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php index 5b96a6a7..e93a8289 100644 --- a/formats/MrssFormat.php +++ b/formats/MrssFormat.php @@ -35,16 +35,8 @@ class MrssFormat extends FormatAbstract public function stringify() { $document = new \DomDocument('1.0', $this->getCharset()); - - $feedUrl = get_current_url(); - $extraInfos = $this->getExtraInfos(); - if (empty($extraInfos['uri'])) { - $uri = REPOSITORY; - } else { - $uri = $extraInfos['uri']; - } - $document->formatOutput = true; + $feed = $document->createElement('rss'); $document->appendChild($feed); $feed->setAttribute('version', '2.0'); @@ -54,51 +46,74 @@ class MrssFormat extends FormatAbstract $channel = $document->createElement('channel'); $feed->appendChild($channel); - $title = $extraInfos['name']; - $channelTitle = $document->createElement('title'); - $channel->appendChild($channelTitle); - $channelTitle->appendChild($document->createTextNode($title)); + $feedArray = $this->getFeed(); + $uri = $feedArray['uri']; + $title = $feedArray['name']; - $link = $document->createElement('link'); - $channel->appendChild($link); - $link->appendChild($document->createTextNode($uri)); + foreach ($feedArray as $feedKey => $feedValue) { + if (in_array($feedKey, ['atom', 'donationUri'])) { + continue; + } + if ($feedKey === 'name') { + $channelTitle = $document->createElement('title'); + $channel->appendChild($channelTitle); + $channelTitle->appendChild($document->createTextNode($title)); - $description = $document->createElement('description'); - $channel->appendChild($description); - $description->appendChild($document->createTextNode($extraInfos['name'])); + $description = $document->createElement('description'); + $channel->appendChild($description); + $description->appendChild($document->createTextNode($title)); + } elseif ($feedKey === 'uri') { + $link = $document->createElement('link'); + $channel->appendChild($link); + $link->appendChild($document->createTextNode($uri)); - $allowedIconExtensions = [ - '.gif', - '.jpg', - '.png', - ]; - $icon = $extraInfos['icon']; - if (!empty($icon) && in_array(substr($icon, -4), $allowedIconExtensions)) { - $feedImage = $document->createElement('image'); - $channel->appendChild($feedImage); - $iconUrl = $document->createElement('url'); - $iconUrl->appendChild($document->createTextNode($icon)); - $feedImage->appendChild($iconUrl); - $iconTitle = $document->createElement('title'); - $iconTitle->appendChild($document->createTextNode($title)); - $feedImage->appendChild($iconTitle); - $iconLink = $document->createElement('link'); - $iconLink->appendChild($document->createTextNode($uri)); - $feedImage->appendChild($iconLink); + $linkAlternate = $document->createElementNS(self::ATOM_NS, 'link'); + $channel->appendChild($linkAlternate); + $linkAlternate->setAttribute('rel', 'alternate'); + $linkAlternate->setAttribute('type', 'text/html'); + $linkAlternate->setAttribute('href', $uri); + + $linkSelf = $document->createElementNS(self::ATOM_NS, 'link'); + $channel->appendChild($linkSelf); + $linkSelf->setAttribute('rel', 'self'); + $linkSelf->setAttribute('type', 'application/atom+xml'); + $feedUrl = get_current_url(); + $linkSelf->setAttribute('href', $feedUrl); + } elseif ($feedKey === 'icon') { + $allowedIconExtensions = [ + '.gif', + '.jpg', + '.png', + '.ico', + ]; + $icon = $feedValue; + if ($icon && in_array(substr($icon, -4), $allowedIconExtensions)) { + $feedImage = $document->createElement('image'); + $channel->appendChild($feedImage); + $iconUrl = $document->createElement('url'); + $iconUrl->appendChild($document->createTextNode($icon)); + $feedImage->appendChild($iconUrl); + $iconTitle = $document->createElement('title'); + $iconTitle->appendChild($document->createTextNode($title)); + $feedImage->appendChild($iconTitle); + $iconLink = $document->createElement('link'); + $iconLink->appendChild($document->createTextNode($uri)); + $feedImage->appendChild($iconLink); + } + } elseif ($feedKey === 'itunes') { + $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS); + foreach ($feedValue as $itunesKey => $itunesValue) { + $itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey); + $channel->appendChild($itunesProperty); + $itunesProperty->appendChild($document->createTextNode($itunesValue)); + } + } else { + $element = $document->createElement($feedKey); + $channel->appendChild($element); + $element->appendChild($document->createTextNode($feedValue)); + } } - $linkAlternate = $document->createElementNS(self::ATOM_NS, 'link'); - $channel->appendChild($linkAlternate); - $linkAlternate->setAttribute('rel', 'alternate'); - $linkAlternate->setAttribute('type', 'text/html'); - $linkAlternate->setAttribute('href', $uri); - - $linkSelf = $document->createElementNS(self::ATOM_NS, 'link'); - $channel->appendChild($linkSelf); - $linkSelf->setAttribute('rel', 'self'); - $linkSelf->setAttribute('type', 'application/atom+xml'); - $linkSelf->setAttribute('href', $feedUrl); - foreach ($this->getItems() as $item) { $itemArray = $item->toArray(); $itemTimestamp = $item->getTimestamp(); @@ -135,6 +150,7 @@ class MrssFormat extends FormatAbstract $entry->appendChild($itunesProperty); $itunesProperty->appendChild($document->createTextNode($itunesValue)); } + if (isset($itemArray['enclosure'])) { $itunesEnclosure = $document->createElement('enclosure'); $entry->appendChild($itunesEnclosure); @@ -142,7 +158,9 @@ class MrssFormat extends FormatAbstract $itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']); $itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']); } - } if (!empty($itemUri)) { + } + + if (!empty($itemUri)) { $entryLink = $document->createElement('link'); $entry->appendChild($entryLink); $entryLink->appendChild($document->createTextNode($itemUri)); diff --git a/formats/PlaintextFormat.php b/formats/PlaintextFormat.php index 0a9237d0..4e18caa6 100644 --- a/formats/PlaintextFormat.php +++ b/formats/PlaintextFormat.php @@ -6,11 +6,11 @@ class PlaintextFormat extends FormatAbstract public function stringify() { - $data = []; + $feed = $this->getFeed(); foreach ($this->getItems() as $item) { - $data[] = $item->toArray(); + $feed['items'][] = $item->toArray(); } - $text = print_r($data, true); + $text = print_r($feed, true); // Remove invalid non-UTF8 characters ini_set('mbstring.substitute_character', 'none'); $text = mb_convert_encoding($text, $this->getCharset(), 'UTF-8'); diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index 0f86f454..8001ba4f 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -40,9 +40,38 @@ abstract class BridgeAbstract abstract public function collectData(); - public function getItems() + public function getFeed(): array { - return $this->items; + return [ + 'name' => $this->getName(), + 'uri' => $this->getURI(), + 'donationUri' => $this->getDonationURI(), + 'icon' => $this->getIcon(), + ]; + } + + public function getName() + { + return static::NAME; + } + + public function getURI() + { + return static::URI ?? 'https://github.com/RSS-Bridge/rss-bridge/'; + } + + public function getDonationURI(): string + { + return static::DONATION_URI; + } + + public function getIcon() + { + if (static::URI) { + // This favicon may or may not exist + return rtrim(static::URI, '/') . '/favicon.ico'; + } + return ''; } public function getOption(string $name) @@ -50,6 +79,9 @@ abstract class BridgeAbstract return $this->configuration[$name] ?? null; } + /** + * The description is currently not used in feed production + */ public function getDescription() { return static::DESCRIPTION; @@ -60,29 +92,14 @@ abstract class BridgeAbstract return static::MAINTAINER; } - public function getName() - { - return static::NAME; - } - - public function getIcon() - { - return static::URI . '/favicon.ico'; - } - public function getParameters(): array { return static::PARAMETERS; } - public function getURI() + public function getItems() { - return static::URI; - } - - public function getDonationURI(): string - { - return static::DONATION_URI; + return $this->items; } public function getCacheTimeout() diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php index c76d1e42..28eb4bbf 100644 --- a/lib/FormatAbstract.php +++ b/lib/FormatAbstract.php @@ -9,10 +9,43 @@ abstract class FormatAbstract protected string $charset = 'UTF-8'; protected array $items = []; protected int $lastModified; - protected array $extraInfos = []; + + protected array $feed = []; abstract public function stringify(); + public function setFeed(array $feed) + { + $default = [ + 'name' => '', + 'uri' => '', + 'icon' => '', + 'donationUri' => '', + ]; + $this->feed = array_merge($default, $feed); + } + + public function getFeed(): array + { + return $this->feed; + } + + /** + * @param FeedItem[] $items + */ + public function setItems(array $items): void + { + $this->items = $items; + } + + /** + * @return FeedItem[] The items + */ + public function getItems(): array + { + return $this->items; + } + public function getMimeType(): string { return static::MIME_TYPE; @@ -32,44 +65,4 @@ abstract class FormatAbstract { $this->lastModified = $lastModified; } - - /** - * @param FeedItem[] $items - */ - public function setItems(array $items): void - { - $this->items = $items; - } - - /** - * @return FeedItem[] The items - */ - public function getItems(): array - { - return $this->items; - } - - public function setExtraInfos(array $infos = []) - { - $extras = [ - 'name', - 'uri', - 'icon', - 'donationUri', - ]; - foreach ($extras as $extra) { - if (!isset($infos[$extra])) { - $infos[$extra] = ''; - } - } - $this->extraInfos = $infos; - } - - public function getExtraInfos(): array - { - if (!$this->extraInfos) { - $this->setExtraInfos(); - } - return $this->extraInfos; - } } diff --git a/lib/bootstrap.php b/lib/bootstrap.php index a95de9dd..85d823e9 100644 --- a/lib/bootstrap.php +++ b/lib/bootstrap.php @@ -9,9 +9,6 @@ const PATH_LIB_CACHES = __DIR__ . '/../caches/'; /** Path to the cache folder */ const PATH_CACHE = __DIR__ . '/../cache/'; -/** URL to the RSS-Bridge repository */ -const REPOSITORY = 'https://github.com/RSS-Bridge/rss-bridge/'; - // Allow larger files for simple_html_dom // todo: extract to config (if possible) const MAX_FILE_SIZE = 10000000; diff --git a/tests/FormatTest.php b/tests/FormatTest.php new file mode 100644 index 00000000..b5df395c --- /dev/null +++ b/tests/FormatTest.php @@ -0,0 +1,72 @@ + '', + 'uri' => '', + 'icon' => '', + 'donationUri' => '', + ]; + $this->assertEquals([], $sut->getFeed()); + + $sut->setFeed([ + 'name' => '0', + 'uri' => '1', + 'icon' => '2', + 'donationUri' => '3', + ]); + $expected = [ + 'name' => '0', + 'uri' => '1', + 'icon' => '2', + 'donationUri' => '3', + ]; + $this->assertEquals($expected, $sut->getFeed()); + + $sut->setFeed([]); + $expected = [ + 'name' => '', + 'uri' => '', + 'icon' => '', + 'donationUri' => '', + ]; + $this->assertEquals($expected, $sut->getFeed()); + + $sut->setFeed(['foo' => 'bar', 'foo2' => 'bar2']); + $expected = [ + 'name' => '', + 'uri' => '', + 'icon' => '', + 'donationUri' => '', + 'foo' => 'bar', + 'foo2' => 'bar2', + ]; + $this->assertEquals($expected, $sut->getFeed()); + } +} + +class TestFormat extends \FormatAbstract +{ + public function stringify() + { + } +} + +class TestBridge extends \BridgeAbstract +{ + public function collectData() + { + $this->items[] = ['title' => 'kek']; + } +} diff --git a/tests/Formats/BaseFormatTest.php b/tests/Formats/BaseFormatTest.php index 71e196f0..8999e772 100644 --- a/tests/Formats/BaseFormatTest.php +++ b/tests/Formats/BaseFormatTest.php @@ -61,7 +61,7 @@ abstract class BaseFormatTest extends TestCase $formatFactory = new FormatFactory(); $format = $formatFactory->create($formatName); $format->setItems($sample->items); - $format->setExtraInfos($sample->meta); + $format->setFeed($sample->meta); $format->setLastModified(strtotime('2000-01-01 12:00:00 UTC')); return $format->stringify(); diff --git a/tests/Formats/samples/expectedAtomFormat/feed.common.xml b/tests/Formats/samples/expectedAtomFormat/feed.common.xml index aa6d0687..455e5440 100644 --- a/tests/Formats/samples/expectedAtomFormat/feed.common.xml +++ b/tests/Formats/samples/expectedAtomFormat/feed.common.xml @@ -2,15 +2,15 @@ Sample feed with common data - https://example.com/feed?type=common&items=4 + + https://example.com/logo.png https://example.com/logo.png + https://example.com/feed?type=common&items=4 2000-01-01T12:00:00+00:00 RSS-Bridge - - Test Entry diff --git a/tests/Formats/samples/expectedAtomFormat/feed.empty.xml b/tests/Formats/samples/expectedAtomFormat/feed.empty.xml index fc04304d..083f230f 100644 --- a/tests/Formats/samples/expectedAtomFormat/feed.empty.xml +++ b/tests/Formats/samples/expectedAtomFormat/feed.empty.xml @@ -2,14 +2,12 @@ Sample feed with minimum data + + https://example.com/feed - https://github.com/favicon.ico - https://github.com/favicon.ico 2000-01-01T12:00:00+00:00 RSS-Bridge - - diff --git a/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml b/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml index 18572fac..d7cb461a 100644 --- a/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml +++ b/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml @@ -2,15 +2,13 @@ Sample feed with minimum data + + https://example.com/feed - https://github.com/favicon.ico - https://github.com/favicon.ico 2000-01-01T12:00:00+00:00 RSS-Bridge - - Sample Item #1 diff --git a/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml b/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml index 32bc0273..8eb0133c 100644 --- a/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml +++ b/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml @@ -2,15 +2,15 @@ Sample microblog feed - https://example.com/feed + + https://example.com/logo.png https://example.com/logo.png + https://example.com/feed 2000-01-01T12:00:00+00:00 RSS-Bridge - - Oh 😲 I found three monkeys 🙈🙉🙊 diff --git a/tests/Formats/samples/expectedMrssFormat/feed.common.xml b/tests/Formats/samples/expectedMrssFormat/feed.common.xml index 38a16f88..92838ae8 100644 --- a/tests/Formats/samples/expectedMrssFormat/feed.common.xml +++ b/tests/Formats/samples/expectedMrssFormat/feed.common.xml @@ -2,15 +2,15 @@ Sample feed with common data - https://example.com/blog/ Sample feed with common data + https://example.com/blog/ + + https://example.com/logo.png Sample feed with common data https://example.com/blog/ - - Test Entry diff --git a/tests/Formats/samples/expectedMrssFormat/feed.empty.xml b/tests/Formats/samples/expectedMrssFormat/feed.empty.xml index 888c42b6..40eecfc6 100644 --- a/tests/Formats/samples/expectedMrssFormat/feed.empty.xml +++ b/tests/Formats/samples/expectedMrssFormat/feed.empty.xml @@ -2,8 +2,8 @@ Sample feed with minimum data - https://github.com/RSS-Bridge/rss-bridge/ Sample feed with minimum data + https://github.com/RSS-Bridge/rss-bridge/ diff --git a/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml b/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml index 9e712ddd..8839f5a5 100644 --- a/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml +++ b/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml @@ -2,8 +2,8 @@ Sample feed with minimum data - https://github.com/RSS-Bridge/rss-bridge/ Sample feed with minimum data + https://github.com/RSS-Bridge/rss-bridge/ diff --git a/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml b/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml index 81dac87a..63c04c0f 100644 --- a/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml +++ b/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml @@ -2,15 +2,15 @@ Sample microblog feed - https://example.com/blog/ Sample microblog feed + https://example.com/blog/ + + https://example.com/logo.png Sample microblog feed https://example.com/blog/ - - 1918f084648b82057c1dd3faa3d091da82a6fac2 diff --git a/tests/Formats/samples/feed.empty.json b/tests/Formats/samples/feed.empty.json index aac09f64..7b1a2eae 100644 --- a/tests/Formats/samples/feed.empty.json +++ b/tests/Formats/samples/feed.empty.json @@ -6,7 +6,7 @@ }, "meta": { "name": "Sample feed with minimum data", - "uri": "", + "uri": "https://github.com/RSS-Bridge/rss-bridge/", "icon": "" }, "items": [] diff --git a/tests/Formats/samples/feed.emptyItems.json b/tests/Formats/samples/feed.emptyItems.json index 0287d428..4d077487 100644 --- a/tests/Formats/samples/feed.emptyItems.json +++ b/tests/Formats/samples/feed.emptyItems.json @@ -6,7 +6,7 @@ }, "meta": { "name": "Sample feed with minimum data", - "uri": "", + "uri": "https://github.com/RSS-Bridge/rss-bridge/", "icon": "" }, "items": [ From 0bf5dbbc0ba46cc27fe40b554b0c3c0ba705ef8b Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 9 Jan 2024 20:33:35 +0100 Subject: [PATCH 027/319] chore: add tools for manually administrating the configured cache (#3867) --- README.md | 36 +++++++++++++++++++++++--- bridges/PixivBridge.php | 29 ++++++++++----------- docs/10_Bridge_Specific/PixivBridge.md | 15 ++++++++--- index.php | 25 +++--------------- lib/CacheFactory.php | 1 + lib/Configuration.php | 2 +- lib/bootstrap.php | 15 +++++++++++ lib/logger.php | 1 + phpcs.xml | 8 +++++- templates/exception.html.php | 8 ++++++ 10 files changed, 95 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 34efc8de..46bb5a69 100644 --- a/README.md +++ b/README.md @@ -251,7 +251,7 @@ Browse http://localhost:3000/ [![Deploy to Cloudron](https://cloudron.io/img/button.svg)](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html) [![Run on PikaPods](https://www.pikapods.com/static/run-button.svg)](https://www.pikapods.com/pods?run=rssbridge) -The Heroku quick deploy currently does not work. It might possibly work if you fork this repo and +The Heroku quick deploy currently does not work. It might work if you fork this repo and modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688 Learn more in @@ -259,11 +259,29 @@ Learn more in ## How-to +### How to remove all cache items + +As current user: + + bin/cache-clear + +As user rss-bridge: + + sudo -u rss-bridge bin/cache-clear + +As root: + + sudo bin/cache-clear + +### How to remove all expired cache items + + bin/cache-clear + ### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable" ```shell -# Give rssbridge ownership -chown rssbridge:rssbridge -R /var/www/rss-bridge/cache +# Give rss-bridge ownership +chown rss-bridge:rss-bridge -R /var/www/rss-bridge/cache # Or, give www-data ownership chown www-data:www-data -R /var/www/rss-bridge/cache @@ -275,6 +293,16 @@ chmod 777 -R /var/www/rss-bridge/cache rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/ ``` +### How to fix "attempt to write a readonly database" + +The sqlite files (db, wal and shm) are not writeable. + + chown -v rss-bridge:rss-bridge cache/* + +### How to fix "Unable to prepare statement: 1, no such table: storage" + + rm cache/* + ### How to create a new bridge from scratch Create the new bridge in e.g. `bridges/BearBlogBridge.php`: @@ -389,6 +417,8 @@ These commands require that you have installed the dev dependencies in `composer ./vendor/bin/phpunit ./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./ +https://github.com/squizlabs/PHP_CodeSniffer/wiki + ### How to spawn a minimal development environment php -S 127.0.0.1:9001 diff --git a/bridges/PixivBridge.php b/bridges/PixivBridge.php index c4f5277f..fc4443ed 100644 --- a/bridges/PixivBridge.php +++ b/bridges/PixivBridge.php @@ -1,9 +1,11 @@ [ 'posts' => [ @@ -251,14 +252,13 @@ class PixivBridge extends BridgeAbstract $img_url = preg_replace('/https:\/\/i\.pximg\.net/', $proxy_url, $result['url']); } } else { - //else cache and use image. - $img_url = $this->cacheImage( - $result['url'], - $result['id'], - array_key_exists('illustType', $result) - ); + $img_url = $result['url']; + // Temporarily disabling caching of the image + //$img_url = $this->cacheImage($result['url'], $result['id'], array_key_exists('illustType', $result)); } - $item['content'] = ""; + + // Currently, this might result in broken image due to their strict referrer check + $item['content'] = sprintf('', $img_url, $img_url); // Additional content items if (array_key_exists('pageCount', $result)) { @@ -318,7 +318,7 @@ class PixivBridge extends BridgeAbstract if ( !(strlen($proxy) > 0 && preg_match('/https?:\/\/.*/', $proxy)) ) { - return returnServerError('Invalid proxy_url value set. The proxy must include the HTTP/S at the beginning of the url.'); + returnServerError('Invalid proxy_url value set. The proxy must include the HTTP/S at the beginning of the url.'); } } @@ -326,8 +326,7 @@ class PixivBridge extends BridgeAbstract if ($cookie) { $isAuth = $this->loadCacheValue('is_authenticated'); if (!$isAuth) { - $res = $this->getData('https://www.pixiv.net/ajax/webpush', true, true) - or returnServerError('Invalid PHPSESSID cookie provided. Please check the 🍪 and try again.'); + $res = $this->getData('https://www.pixiv.net/ajax/webpush', true, true); if ($res['error'] === false) { $this->saveCacheValue('is_authenticated', true); } @@ -374,11 +373,11 @@ class PixivBridge extends BridgeAbstract if ($cache) { $data = $this->loadCacheValue($url); if (!$data) { - $data = getContents($url, $httpHeaders, $curlOptions, true) or returnServerError("Could not load $url"); + $data = getContents($url, $httpHeaders, $curlOptions, true); $this->saveCacheValue($url, $data); } } else { - $data = getContents($url, $httpHeaders, $curlOptions, true) or returnServerError("Could not load $url"); + $data = getContents($url, $httpHeaders, $curlOptions, true); } $this->checkCookie($data['headers']); diff --git a/docs/10_Bridge_Specific/PixivBridge.md b/docs/10_Bridge_Specific/PixivBridge.md index b782a445..ba8da2d8 100644 --- a/docs/10_Bridge_Specific/PixivBridge.md +++ b/docs/10_Bridge_Specific/PixivBridge.md @@ -2,9 +2,14 @@ PixivBridge =============== # Image proxy -As Pixiv requires images to be loaded with the `Referer "https://www.pixiv.net/"` header set, caching or image proxy is required to use this bridge. -To turn off image caching, set the `proxy_url` value in this bridge's configuration section of `config.ini.php` to the url of the proxy. The bridge will then use the proxy in this format (essentially replacing `https://i.pximg.net` with the proxy): +As Pixiv requires images to be loaded with the `Referer "https://www.pixiv.net/"` header set, +caching or image proxy is required to use this bridge. + +To turn off image caching, set the `proxy_url` value in this bridge's configuration section of `config.ini.php` +to the url of the proxy. + +The bridge will then use the proxy in this format (essentially replacing `https://i.pximg.net` with the proxy): Before: `https://i.pximg.net/img-original/img/0000/00/00/00/00/00/12345678_p0.png` @@ -15,9 +20,11 @@ proxy_url = "https://proxy.example.com" ``` # Authentication -Authentication is required to view and search R-18+ and non-public images. To enable this, set the following in this bridge's configuration in `config.ini.php`. -``` +Authentication is required to view and search R-18+ and non-public images. +To enable this, set the following in this bridge's configuration in `config.ini.php`. + +```ini ; from cookie "PHPSESSID". Recommend to get in incognito browser. cookie = "00000000_hashedsessionidhere" ``` \ No newline at end of file diff --git a/index.php b/index.php index c2c546a1..126200da 100644 --- a/index.php +++ b/index.php @@ -1,33 +1,14 @@ ' . implode("\n", $errors) . ''; - exit(1); -} - -$customConfig = []; -if (file_exists(__DIR__ . '/config.ini.php')) { - $customConfig = parse_ini_file(__DIR__ . '/config.ini.php', true, INI_SCANNER_TYPED); -} -Configuration::loadConfiguration($customConfig, getenv()); - // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); date_default_timezone_set(Configuration::getConfig('system', 'timezone')); -$rssBridge = new RssBridge(); - set_exception_handler(function (\Throwable $e) { - http_response_code(500); - print render(__DIR__ . '/templates/exception.html.php', ['e' => $e]); RssBridge::getLogger()->error('Uncaught Exception', ['e' => $e]); - exit(1); + http_response_code(500); + exit(render(__DIR__ . '/templates/exception.html.php', ['e' => $e])); }); set_error_handler(function ($code, $message, $file, $line) { @@ -63,4 +44,6 @@ register_shutdown_function(function () { } }); +$rssBridge = new RssBridge(); + $rssBridge->main($argv ?? []); diff --git a/lib/CacheFactory.php b/lib/CacheFactory.php index df78d9cb..90aa21ba 100644 --- a/lib/CacheFactory.php +++ b/lib/CacheFactory.php @@ -37,6 +37,7 @@ class CacheFactory if ($index === false) { throw new \InvalidArgumentException(sprintf('Invalid cache name: "%s"', $name)); } + $className = $cacheNames[$index] . 'Cache'; if (!preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $className)) { throw new \InvalidArgumentException(sprintf('Invalid cache classname: "%s"', $className)); diff --git a/lib/Configuration.php b/lib/Configuration.php index ac7d29bf..ab1c9cdf 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -59,7 +59,7 @@ final class Configuration } $config = parse_ini_file(__DIR__ . '/../config.default.ini.php', true, INI_SCANNER_TYPED); if (!$config) { - throw new \Exception('Error parsing config'); + throw new \Exception('Error parsing ini config'); } foreach ($config as $header => $section) { foreach ($section as $key => $value) { diff --git a/lib/bootstrap.php b/lib/bootstrap.php index 85d823e9..fe2069d3 100644 --- a/lib/bootstrap.php +++ b/lib/bootstrap.php @@ -1,5 +1,9 @@ ' . implode("\n", $errors) . ''); +} + +$customConfig = []; +if (file_exists(__DIR__ . '/../config.ini.php')) { + $customConfig = parse_ini_file(__DIR__ . '/../config.ini.php', true, INI_SCANNER_TYPED); +} +Configuration::loadConfiguration($customConfig, getenv()); diff --git a/lib/logger.php b/lib/logger.php index 7a902b5b..e579915d 100644 --- a/lib/logger.php +++ b/lib/logger.php @@ -149,6 +149,7 @@ final class StreamHandler ); error_log($text); if ($record['level'] < Logger::ERROR && Debug::isEnabled()) { + // The record level is INFO or WARNING here // Not a good idea to print here because http headers might not have been sent print sprintf("
%s
\n", e($text)); } diff --git a/phpcs.xml b/phpcs.xml index 5e50470a..21e1f50a 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -1,6 +1,11 @@ - Created with the PHP Coding Standard Generator. http://edorian.github.com/php-coding-standard-generator/ + + Originally created with the PHP Coding Standard Generator. + But later manually tweaked. + http://edorian.github.com/php-coding-standard-generator/ + + ./static ./vendor ./templates @@ -11,6 +16,7 @@ + diff --git a/templates/exception.html.php b/templates/exception.html.php index e1dd97c1..62ac90b4 100644 --- a/templates/exception.html.php +++ b/templates/exception.html.php @@ -23,6 +23,14 @@

+ getCode() === 403): ?> +

403 Forbidden

+

+ The HTTP 403 Forbidden response status code indicates that the + server understands the request but refuses to authorize it. +

+ + getCode() === 404): ?>

404 Page Not Found

From 0c08f791efbfc6dd92f89d922984a6a41583de44 Mon Sep 17 00:00:00 2001 From: ORelio Date: Tue, 9 Jan 2024 20:34:56 +0100 Subject: [PATCH 028/319] CssSelectorComplexBridge: Use cookies everywhere (#3827) (#3870) --- bridges/CssSelectorComplexBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php index e661fe18..67ad4c92 100644 --- a/bridges/CssSelectorComplexBridge.php +++ b/bridges/CssSelectorComplexBridge.php @@ -245,7 +245,7 @@ class CssSelectorComplexBridge extends BridgeAbstract protected function getTitle($page, $title_cleanup) { if (is_string($page)) { - $page = getSimpleHTMLDOMCached($page); + $page = getSimpleHTMLDOMCached($page, $this->getHeaders()); } $title = html_entity_decode($page->find('title', 0)->plaintext); if (!empty($title)) { @@ -302,7 +302,7 @@ class CssSelectorComplexBridge extends BridgeAbstract protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0) { if (is_string($page)) { - $page = getSimpleHTMLDOM($page); + $page = getSimpleHTMLDOM($page, $this->getHeaders()); } $entryElements = $page->find($entry_selector); @@ -355,7 +355,7 @@ class CssSelectorComplexBridge extends BridgeAbstract */ protected function fetchArticleElementFromPage($entry_url, $content_selector) { - $entry_html = getSimpleHTMLDOMCached($entry_url); + $entry_html = getSimpleHTMLDOMCached($entry_url, $this->getHeaders()); $article_content = $entry_html->find($content_selector, 0); if (is_null($article_content)) { From 1fecc4cfc13072856d68b7a33233a4e5e54a72db Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 9 Jan 2024 21:28:43 +0100 Subject: [PATCH 029/319] Revert "CssSelectorComplexBridge: Use cookies everywhere (#3827) (#3870)" (#3881) This reverts commit 0c08f791efbfc6dd92f89d922984a6a41583de44. --- bridges/CssSelectorComplexBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php index 67ad4c92..e661fe18 100644 --- a/bridges/CssSelectorComplexBridge.php +++ b/bridges/CssSelectorComplexBridge.php @@ -245,7 +245,7 @@ class CssSelectorComplexBridge extends BridgeAbstract protected function getTitle($page, $title_cleanup) { if (is_string($page)) { - $page = getSimpleHTMLDOMCached($page, $this->getHeaders()); + $page = getSimpleHTMLDOMCached($page); } $title = html_entity_decode($page->find('title', 0)->plaintext); if (!empty($title)) { @@ -302,7 +302,7 @@ class CssSelectorComplexBridge extends BridgeAbstract protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0) { if (is_string($page)) { - $page = getSimpleHTMLDOM($page, $this->getHeaders()); + $page = getSimpleHTMLDOM($page); } $entryElements = $page->find($entry_selector); @@ -355,7 +355,7 @@ class CssSelectorComplexBridge extends BridgeAbstract */ protected function fetchArticleElementFromPage($entry_url, $content_selector) { - $entry_html = getSimpleHTMLDOMCached($entry_url, $this->getHeaders()); + $entry_html = getSimpleHTMLDOMCached($entry_url); $article_content = $entry_html->find($content_selector, 0); if (is_null($article_content)) { From 2e5d2a88f39afccefab58b4fb40d22da7794a4b8 Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 9 Jan 2024 21:36:42 +0100 Subject: [PATCH 030/319] fix: only escape iframe,script and link for html output (#3882) --- formats/AtomFormat.php | 2 +- formats/JsonFormat.php | 2 +- formats/MrssFormat.php | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 1fabef2e..5c9f2b6a 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -179,7 +179,7 @@ class AtomFormat extends FormatAbstract $content = $document->createElement('content'); $content->setAttribute('type', 'html'); - $content->appendChild($document->createTextNode(break_annoying_html_tags($entryContent))); + $content->appendChild($document->createTextNode($entryContent)); $entry->appendChild($content); foreach ($item->getEnclosures() as $enclosure) { diff --git a/formats/JsonFormat.php b/formats/JsonFormat.php index 016e75e1..586aae0a 100644 --- a/formats/JsonFormat.php +++ b/formats/JsonFormat.php @@ -47,7 +47,7 @@ class JsonFormat extends FormatAbstract $entryTitle = $item->getTitle(); $entryUri = $item->getURI(); $entryTimestamp = $item->getTimestamp(); - $entryContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : ''; + $entryContent = $item->getContent() ?? ''; $entryEnclosures = $item->getEnclosures(); $entryCategories = $item->getCategories(); diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php index e93a8289..aaa1d0cd 100644 --- a/formats/MrssFormat.php +++ b/formats/MrssFormat.php @@ -119,7 +119,7 @@ class MrssFormat extends FormatAbstract $itemTimestamp = $item->getTimestamp(); $itemTitle = $item->getTitle(); $itemUri = $item->getURI(); - $itemContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : ''; + $itemContent = $item->getContent() ?? ''; $itemUid = $item->getUid(); $isPermaLink = 'false'; From 491cb50219d8f799d85bfb4e6027adf501e9afa4 Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 10 Jan 2024 00:25:36 +0100 Subject: [PATCH 031/319] docs: typo (#3883) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 46bb5a69..e027d912 100644 --- a/README.md +++ b/README.md @@ -275,7 +275,7 @@ As root: ### How to remove all expired cache items - bin/cache-clear + bin/cache-prune ### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable" From 0eb4f6b2678ab17255ee87bde2f919a7e6883799 Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 10 Jan 2024 20:39:15 +0100 Subject: [PATCH 032/319] fix(tiktok): remove duplicate leading slash in url path, fix #3884 (#3885) --- bridges/TikTokBridge.php | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/bridges/TikTokBridge.php b/bridges/TikTokBridge.php index 6590df66..22fdfcef 100644 --- a/bridges/TikTokBridge.php +++ b/bridges/TikTokBridge.php @@ -35,21 +35,23 @@ class TikTokBridge extends BridgeAbstract foreach ($videos as $video) { $item = []; - // Handle link "untracking" - $linkParts = parse_url($video->find('a', 0)->href); - $link = $linkParts['scheme'] . '://' . $linkParts['host'] . '/' . $linkParts['path']; + // Omit query string (remove tracking parameters) + $a = $video->find('a', 0); + $href = $a->href; + $parsedUrl = parse_url($href); + $url = $parsedUrl['scheme'] . '://' . $parsedUrl['host'] . '/' . ltrim($parsedUrl['path'], '/'); $image = $video->find('video', 0)->poster; $views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext; $enclosures = [$image]; - $item['uri'] = $link; + $item['uri'] = $url; $item['title'] = 'Video'; $item['author'] = '@' . $author; $item['enclosures'] = $enclosures; $item['content'] = << +

{$views} views


EOD; From c7e8ddf4865516a4bddc884cf80c058cb5aad770 Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 10 Jan 2024 21:47:34 +0100 Subject: [PATCH 033/319] CssSelectorComplexBridge: Use cookies everywhere (RSS-Bridge#3827) (#3886) v2 after feedback from #3870 --- bridges/CssSelectorComplexBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php index e661fe18..632e6b6a 100644 --- a/bridges/CssSelectorComplexBridge.php +++ b/bridges/CssSelectorComplexBridge.php @@ -245,7 +245,7 @@ class CssSelectorComplexBridge extends BridgeAbstract protected function getTitle($page, $title_cleanup) { if (is_string($page)) { - $page = getSimpleHTMLDOMCached($page); + $page = getSimpleHTMLDOMCached($page, 86400, $this->getHeaders()); } $title = html_entity_decode($page->find('title', 0)->plaintext); if (!empty($title)) { @@ -302,7 +302,7 @@ class CssSelectorComplexBridge extends BridgeAbstract protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0) { if (is_string($page)) { - $page = getSimpleHTMLDOM($page); + $page = getSimpleHTMLDOM($page, $this->getHeaders()); } $entryElements = $page->find($entry_selector); @@ -355,7 +355,7 @@ class CssSelectorComplexBridge extends BridgeAbstract */ protected function fetchArticleElementFromPage($entry_url, $content_selector) { - $entry_html = getSimpleHTMLDOMCached($entry_url); + $entry_html = getSimpleHTMLDOMCached($entry_url, 86400, $this->getHeaders()); $article_content = $entry_html->find($content_selector, 0); if (is_null($article_content)) { From 080e29365a24c5ad0898f2f8bf99e7068c41856b Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 10 Jan 2024 21:48:12 +0100 Subject: [PATCH 034/319] feat(http-client): add http retry count to config (#3887) --- config.default.ini.php | 5 +++++ lib/contents.php | 3 ++- lib/http.php | 30 ++++++++++++++++-------------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/config.default.ini.php b/config.default.ini.php index 201b1414..21727c5e 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -49,6 +49,11 @@ enable_maintenance_mode = false [http] ; Operation timeout in seconds timeout = 30 + +; Operation retry count in case of curl error +retries = 2 + +; User agent useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" ; Max http response size in MB diff --git a/lib/contents.php b/lib/contents.php index 8676a2a8..9998a3f1 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -38,6 +38,7 @@ function getContents( $config = [ 'useragent' => Configuration::getConfig('http', 'useragent'), 'timeout' => Configuration::getConfig('http', 'timeout'), + 'retries' => Configuration::getConfig('http', 'retries'), 'headers' => array_merge($defaultHttpHeaders, $httpHeadersNormalized), 'curl_options' => $curlOptions, ]; @@ -71,7 +72,7 @@ function getContents( // Ignore invalid 'Last-Modified' HTTP header value } } - // todo: to be nice nice citizen we should also check for Etag + // todo: We should also check for Etag } $response = $httpClient->request($url, $config); diff --git a/lib/http.php b/lib/http.php index bfa6b6bf..405b01c6 100644 --- a/lib/http.php +++ b/lib/http.php @@ -63,7 +63,7 @@ final class CurlHttpClient implements HttpClient 'proxy' => null, 'curl_options' => [], 'if_not_modified_since' => null, - 'retries' => 3, + 'retries' => 2, 'max_filesize' => null, 'max_redirections' => 5, ]; @@ -136,26 +136,28 @@ final class CurlHttpClient implements HttpClient return $len; }); - $attempts = 0; + // This retry logic is a bit hard to understand, but it works + $tries = 0; while (true) { - $attempts++; + $tries++; $body = curl_exec($ch); if ($body !== false) { // The network call was successful, so break out of the loop break; } - if ($attempts > $config['retries']) { - // Finally give up - $curl_error = curl_error($ch); - $curl_errno = curl_errno($ch); - throw new HttpException(sprintf( - 'cURL error %s: %s (%s) for %s', - $curl_error, - $curl_errno, - 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', - $url - )); + if ($tries <= $config['retries']) { + continue; } + // Max retries reached, give up + $curl_error = curl_error($ch); + $curl_errno = curl_errno($ch); + throw new HttpException(sprintf( + 'cURL error %s: %s (%s) for %s', + $curl_error, + $curl_errno, + 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', + $url + )); } $statusCode = curl_getinfo($ch, CURLINFO_RESPONSE_CODE); From d9ac0195506040e68cebfc81c5753e416ab7b22f Mon Sep 17 00:00:00 2001 From: July Date: Wed, 10 Jan 2024 18:42:57 -0500 Subject: [PATCH 035/319] [AnnasArchiveBridge] Add new bridge (#3888) * [AnnasArchiveBridge] Add new bridge * [AnnasArchiveBridge] Add missing exampleValue * [AnnasArchiveBridge] Remove vestigial debug print --- bridges/AnnasArchiveBridge.php | 175 +++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 bridges/AnnasArchiveBridge.php diff --git a/bridges/AnnasArchiveBridge.php b/bridges/AnnasArchiveBridge.php new file mode 100644 index 00000000..e8a1e8c4 --- /dev/null +++ b/bridges/AnnasArchiveBridge.php @@ -0,0 +1,175 @@ + [ + 'name' => 'Query', + 'exampleValue' => 'apothecary diaries', + 'required' => true, + ], + 'ext' => [ + 'name' => 'Extension', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'azw3' => 'azw3', + 'cbr' => 'cbr', + 'cbz' => 'cbz', + 'djvu' => 'djvu', + 'epub' => 'epub', + 'fb2' => 'fb2', + 'fb2.zip' => 'fb2.zip', + 'mobi' => 'mobi', + 'pdf' => 'pdf', + ] + ], + 'lang' => [ + 'name' => 'Language', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'Afrikaans [af]' => 'af', + 'Arabic [ar]' => 'ar', + 'Bangla [bn]' => 'bn', + 'Belarusian [be]' => 'be', + 'Bulgarian [bg]' => 'bg', + 'Catalan [ca]' => 'ca', + 'Chinese [zh]' => 'zh', + 'Church Slavic [cu]' => 'cu', + 'Croatian [hr]' => 'hr', + 'Czech [cs]' => 'cs', + 'Danish [da]' => 'da', + 'Dongxiang [sce]' => 'sce', + 'Dutch [nl]' => 'nl', + 'English [en]' => 'en', + 'French [fr]' => 'fr', + 'German [de]' => 'de', + 'Greek [el]' => 'el', + 'Hebrew [he]' => 'he', + 'Hindi [hi]' => 'hi', + 'Hungarian [hu]' => 'hu', + 'Indonesian [id]' => 'id', + 'Irish [ga]' => 'ga', + 'Italian [it]' => 'it', + 'Japanese [ja]' => 'ja', + 'Kazakh [kk]' => 'kk', + 'Korean [ko]' => 'ko', + 'Latin [la]' => 'la', + 'Latvian [lv]' => 'lv', + 'Lithuanian [lt]' => 'lt', + 'Luxembourgish [lb]' => 'lb', + 'Ndolo [ndl]' => 'ndl', + 'Norwegian [no]' => 'no', + 'Persian [fa]' => 'fa', + 'Polish [pl]' => 'pl', + 'Portuguese [pt]' => 'pt', + 'Romanian [ro]' => 'ro', + 'Russian [ru]' => 'ru', + 'Serbian [sr]' => 'sr', + 'Spanish [es]' => 'es', + 'Swedish [sv]' => 'sv', + 'Tamil [ta]' => 'ta', + 'Traditional Chinese [zh‑Hant]' => 'zh‑Hant', + 'Turkish [tr]' => 'tr', + 'Ukrainian [uk]' => 'uk', + 'Unknown language' => '_empty', + 'Unknown language [und]' => 'und', + 'Unknown language [urdu]' => 'urdu', + 'Urdu [ur]' => 'ur', + 'Vietnamese [vi]' => 'vi', + 'Welsh [cy]' => 'cy', + ] + ], + 'content' => [ + 'name' => 'Type', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'Book (fiction)' => 'book_fiction', + 'Book (non‑fiction)' => 'book_nonfiction', + 'Book (unknown)' => 'book_unknown', + 'Comic book' => 'book_comic', + 'Journal article' => 'journal_article', + 'Magazine' => 'magazine', + 'Standards document' => 'standards_document', + ] + ], + 'src' => [ + 'name' => 'Source', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'Internet Archive' => 'ia', + 'Libgen.li' => 'lgli', + 'Libgen.rs' => 'lgrs', + 'Sci‑Hub' => 'scihub', + 'Z‑Library' => 'zlib', + ] + ], + ] + ]; + + public function collectData() + { + $url = $this->getURI(); + $list = getSimpleHTMLDOMCached($url); + $list = defaultLinkTo($list, self::URI); + + // Don't attempt to do anything if not found message is given + if ($list->find('.js-not-found-additional')) { + return; + } + + foreach ($list->find('.w-full > .mb-4 > div > a') as $element) { + $item = []; + $item['title'] = $element->find('h3', 0)->plaintext; + $item['author'] = $element->find('div.italic', 0)->plaintext; + $item['uri'] = $element->href; + $item['content'] = $element->plaintext; + $item['uid'] = $item['uri']; + + if ($item_html = getSimpleHTMLDOMCached($item['uri'])) { + $item_html = defaultLinkTo($item_html, self::URI); + $item['content'] .= $item_html->find('main img', 0); + $item['content'] .= $item_html->find('main .mt-4', 0); // Summary + if ($links = $item_html->find('main ul.mb-4', -1)) { + foreach ($links->find('li > a.js-download-link') as $file) { + $item['enclosures'][] = $file->href; + } + // Remove bulk torrents from enclosures list + $item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']); + } + } + + $this->items[] = $item; + } + } + + public function getName() + { + $name = parent::getName(); + if ($this->getInput('q') != null) { + $name .= ' - ' . $this->getInput('q'); + } + return $name; + } + + public function getURI() + { + $params = array_filter([ // Filter to remove non-provided parameters + 'q' => $this->getInput('q'), + 'ext' => $this->getInput('ext'), + 'lang' => $this->getInput('lang'), + 'src' => $this->getInput('src'), + 'content' => $this->getInput('content'), + ]); + $url = parent::getURI() . 'search?sort=newest&' . http_build_query($params); + return $url; + } +} From d5175aebcc6f74430189caab1525e6511722a6ed Mon Sep 17 00:00:00 2001 From: July Date: Thu, 11 Jan 2024 14:09:45 -0500 Subject: [PATCH 036/319] [ScribbleHubBridge] Get author feed title regardless of CloudFlare (#3892) --- bridges/ScribbleHubBridge.php | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/bridges/ScribbleHubBridge.php b/bridges/ScribbleHubBridge.php index e7cdf337..0f7c7a6c 100644 --- a/bridges/ScribbleHubBridge.php +++ b/bridges/ScribbleHubBridge.php @@ -12,16 +12,16 @@ class ScribbleHubBridge extends FeedExpander 'uid' => [ 'name' => 'uid', 'required' => true, - // Example: Alyson Greaves's stories - 'exampleValue' => '76208', + // Example: miriamrobern's stories + 'exampleValue' => '149271', ], ], 'Series' => [ 'sid' => [ 'name' => 'sid', 'required' => true, - // Example: latest chapters from The Sisters of Dorley by Alyson Greaves - 'exampleValue' => '421879', + // Example: latest chapters from Uskweirs + 'exampleValue' => '965299', ], ] ]; @@ -52,6 +52,10 @@ class ScribbleHubBridge extends FeedExpander return []; } + if ($this->queriedContext === 'Author') { + $this->author = $item['author']; + } + $item['comments'] = $item['uri'] . '#comments'; try { @@ -90,16 +94,7 @@ class ScribbleHubBridge extends FeedExpander $name = parent::getName() . " $this->queriedContext"; switch ($this->queriedContext) { case 'Author': - try { - $page = getSimpleHTMLDOMCached(self::URI . 'profile/' . $this->getInput('uid')); - } catch (HttpException $e) { - // 403 Forbidden, This means we got anti-bot response - if ($e->getCode() === 403) { - return $name; - } - throw $e; - } - $title = html_entity_decode($page->find('.p_m_username.fp_authorname', 0)->plaintext); + $title = $this->author; break; case 'Series': try { From 191e5b0493f3fc1bf2a3fc4169333c03480be23f Mon Sep 17 00:00:00 2001 From: Dag Date: Fri, 12 Jan 2024 01:31:01 +0100 Subject: [PATCH 037/319] feat: add etag support to getContents (#3893) --- README.md | 2 +- config.default.ini.php | 2 +- lib/BridgeCard.php | 5 ++--- lib/FeedExpander.php | 2 +- lib/FeedParser.php | 4 ++-- lib/XPathAbstract.php | 5 ++++- lib/contents.php | 49 +++++++++++++++++++++++------------------- lib/http.php | 4 ++++ 8 files changed, 42 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index e027d912..d6d1046c 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ PHP ini config: ```ini ; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini -max_execution_time = 20 +max_execution_time = 15 memory_limit = 64M ``` diff --git a/config.default.ini.php b/config.default.ini.php index 21727c5e..ee1e54c9 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -48,7 +48,7 @@ enable_maintenance_mode = false [http] ; Operation timeout in seconds -timeout = 30 +timeout = 15 ; Operation retry count in case of curl error retries = 2 diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 4781ebc1..a82f8e5a 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -16,7 +16,7 @@ final class BridgeCard $bridge = $bridgeFactory->create($bridgeClassName); - $isHttps = strpos($bridge->getURI(), 'https') === 0; + $isHttps = str_starts_with($bridge->getURI(), 'https'); $uri = $bridge->getURI(); $name = $bridge->getName(); @@ -113,8 +113,7 @@ EOD; } if (!$isHttps) { - $form .= '

Warning : -This bridge is not fetching its content through a secure connection
'; + $form .= '
Warning: This bridge is not fetching its content through a secure connection
'; } return $form; diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index 056578e9..c0d7e878 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -41,7 +41,7 @@ abstract class FeedExpander extends BridgeAbstract } /** - * This method is overidden by bridges + * This method is overridden by bridges * * @return array */ diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 2d982de1..510bcb32 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -7,9 +7,9 @@ declare(strict_types=1); * * Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0. * - * Produce arrays meant to be used inside rss-bridge. + * Produces array meant to be used inside rss-bridge. * - * The item structure is tweaked so that works with FeedItem + * The item structure is tweaked so that it works with FeedItem */ final class FeedParser { diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php index e30bb5eb..2206f79a 100644 --- a/lib/XPathAbstract.php +++ b/lib/XPathAbstract.php @@ -518,7 +518,10 @@ abstract class XPathAbstract extends BridgeAbstract if (strlen($value) === 0) { return ''; } - if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) { + if ( + strpos($value, 'http://') === 0 + || strpos($value, 'https://') === 0 + ) { return $value; } diff --git a/lib/contents.php b/lib/contents.php index 9998a3f1..43db8c03 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -24,6 +24,32 @@ function getContents( $headerValue = trim(implode(':', array_slice($parts, 1))); $httpHeadersNormalized[$headerName] = $headerValue; } + + $requestBodyHash = null; + if (isset($curlOptions[CURLOPT_POSTFIELDS])) { + $requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false)); + } + $cacheKey = implode('_', ['server', $url, $requestBodyHash]); + + /** @var Response $cachedResponse */ + $cachedResponse = $cache->get($cacheKey); + if ($cachedResponse) { + $lastModified = $cachedResponse->getHeader('last-modified'); + if ($lastModified) { + try { + // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime + $lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified); + $config['if_not_modified_since'] = $lastModified->getTimestamp(); + } catch (Exception $e) { + // Failed to parse last-modified + } + } + $etag = $cachedResponse->getHeader('etag'); + if ($etag) { + $httpHeadersNormalized['if-none-match'] = $etag; + } + } + // Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102 $defaultHttpHeaders = [ 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', @@ -35,6 +61,7 @@ function getContents( 'Sec-Fetch-User' => '?1', 'TE' => 'trailers', ]; + $config = [ 'useragent' => Configuration::getConfig('http', 'useragent'), 'timeout' => Configuration::getConfig('http', 'timeout'), @@ -53,28 +80,6 @@ function getContents( $config['proxy'] = Configuration::getConfig('proxy', 'url'); } - $requestBodyHash = null; - if (isset($curlOptions[CURLOPT_POSTFIELDS])) { - $requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false)); - } - $cacheKey = implode('_', ['server', $url, $requestBodyHash]); - - /** @var Response $cachedResponse */ - $cachedResponse = $cache->get($cacheKey); - if ($cachedResponse) { - $cachedLastModified = $cachedResponse->getHeader('last-modified'); - if ($cachedLastModified) { - try { - // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime - $cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified); - $config['if_not_modified_since'] = $cachedLastModified->getTimestamp(); - } catch (Exception $dateTimeParseFailue) { - // Ignore invalid 'Last-Modified' HTTP header value - } - } - // todo: We should also check for Etag - } - $response = $httpClient->request($url, $config); switch ($response->getCode()) { diff --git a/lib/http.php b/lib/http.php index 405b01c6..90b65a6e 100644 --- a/lib/http.php +++ b/lib/http.php @@ -258,6 +258,10 @@ final class Response } /** + * HTTP response may have multiple headers with the same name. + * + * This method by default, returns only the last header. + * * @return string[]|string|null */ public function getHeader(string $name, bool $all = false) From 6eaf0eaa565361d0a18f23cdcd8df894116ad73a Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 17 Jan 2024 20:10:32 +0100 Subject: [PATCH 038/319] fix: add cache clearing tools (#3896) Forgot to add these in #3867 --- .gitignore | 1 - bin/cache-clear | 14 ++++++++++++++ bin/cache-prune | 14 ++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100755 bin/cache-clear create mode 100755 bin/cache-prune diff --git a/.gitignore b/.gitignore index 9725342d..6ed95489 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ data/ *.pydevproject .project .metadata -bin/ tmp/ *.tmp *.bak diff --git a/bin/cache-clear b/bin/cache-clear new file mode 100755 index 00000000..3563abad --- /dev/null +++ b/bin/cache-clear @@ -0,0 +1,14 @@ +#!/usr/bin/env php +clear(); diff --git a/bin/cache-prune b/bin/cache-prune new file mode 100755 index 00000000..7b7a6031 --- /dev/null +++ b/bin/cache-prune @@ -0,0 +1,14 @@ +#!/usr/bin/env php +prune(); From 6408123330a28041344cccf3133981196e62a9a6 Mon Sep 17 00:00:00 2001 From: SebLaus <97241865+SebLaus@users.noreply.github.com> Date: Fri, 19 Jan 2024 03:59:47 +0100 Subject: [PATCH 039/319] [IdealoBridge] added Header with user-agent and fixed typo (#3897) * Added header with useragent * copy paste error from local test environment * Fixed missing space in New before * fixed missing space after comma in argument list --- bridges/IdealoBridge.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bridges/IdealoBridge.php b/bridges/IdealoBridge.php index 89c5f87d..cef2b812 100644 --- a/bridges/IdealoBridge.php +++ b/bridges/IdealoBridge.php @@ -42,8 +42,13 @@ class IdealoBridge extends BridgeAbstract public function collectData() { + // Needs header with user-agent to function properly. + $header = [ + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15' + ]; + $link = $this->getInput('Link'); - $html = getSimpleHTMLDOM($link); + $html = getSimpleHTMLDOM($link, $header); // Get Productname $titleobj = $html->find('.oopStage-title', 0); @@ -80,7 +85,7 @@ class IdealoBridge extends BridgeAbstract // Generate Content if ($PriceNew > 1) { $content = "

Price New:
$PriceNew

"; - $content .= "

Price Newbefore:
$OldPriceNew

"; + $content .= "

Price New before:
$OldPriceNew

"; } if ($this->getInput('MaxPriceNew') != '') { From 12a90e20749471c1f2c794792f6b1fabcb74d13e Mon Sep 17 00:00:00 2001 From: ORelio Date: Fri, 19 Jan 2024 21:30:06 +0100 Subject: [PATCH 040/319] Utils: Add Webp MIME type (#3900) --- lib/utils.php | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/utils.php b/lib/utils.php index e8f00f54..07806e7c 100644 --- a/lib/utils.php +++ b/lib/utils.php @@ -171,6 +171,7 @@ function parse_mime_type($url) 'jpg' => 'image/jpeg', 'gif' => 'image/gif', 'png' => 'image/png', + 'webp' => 'image/webp', 'image' => 'image/*', 'mp3' => 'audio/mpeg', ]; From bb36eb9eb831eb6bce8641323b7e5ce90798575b Mon Sep 17 00:00:00 2001 From: ORelio Date: Fri, 19 Jan 2024 21:30:53 +0100 Subject: [PATCH 041/319] [CssSelectorBridge] Time/Thumbnail improvements (#3879) (#3901) * Implement