From 97f7df0d06b740db246539c854bcc7611f74f3c1 Mon Sep 17 00:00:00 2001 From: Dag Date: Fri, 3 Jan 2025 08:17:47 +0100 Subject: [PATCH] feat(feedmerge): remove duplicates based off of title too (#4392) --- bridges/FeedMergeBridge.php | 45 ++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/bridges/FeedMergeBridge.php b/bridges/FeedMergeBridge.php index 37b574b6..4fe42013 100644 --- a/bridges/FeedMergeBridge.php +++ b/bridges/FeedMergeBridge.php @@ -6,8 +6,10 @@ class FeedMergeBridge extends FeedExpander const NAME = 'FeedMerge'; const URI = 'https://github.com/RSS-Bridge/rss-bridge'; const DESCRIPTION = <<<'TEXT' -This bridge merges two or more feeds into a single feed. Max 10 items are fetched from each feed. -TEXT; + This bridge merges two or more feeds into a single feed.
+ Max 10 latest items are fetched from each individual feed.
+ Items with identical url or title are considered duplicates (and are removed).
+ TEXT; const PARAMETERS = [ [ @@ -36,11 +38,11 @@ TEXT; ]; /** - * todo: Consider a strategy which produces a shorter feed url + * TODO: Consider a strategy which produces a shorter feed url */ public function collectData() { - $limit = (int)($this->getInput('limit') ?: 10); + $limit = (int)($this->getInput('limit') ?: 99); $feeds = [ $this->getInput('feed_1'), $this->getInput('feed_2'), @@ -61,7 +63,7 @@ TEXT; if (count($feeds) > 1) { // Allow one or more feeds to fail try { - $this->collectExpandableDatas($feed); + $this->collectExpandableDatas($feed, 10); } catch (HttpException $e) { $this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e))); // This feed item might be spammy. Considering dropping it. @@ -80,31 +82,48 @@ TEXT; throw $e; } } else { - $this->collectExpandableDatas($feed); + $this->collectExpandableDatas($feed, 10); } } // If $this->items is empty we should consider throw exception here - // Sort by timestamp descending + // Sort by timestamp, uri, title in descending order usort($this->items, function ($a, $b) { $t1 = $a['timestamp'] ?? $a['uri'] ?? $a['title']; $t2 = $b['timestamp'] ?? $b['uri'] ?? $b['title']; return $t2 <=> $t1; }); - // Remove duplicates by using url as unique key + // Remove duplicates by url $items = []; foreach ($this->items as $item) { - $index = $item['uri'] ?? null; - if ($index) { - // Overwrite duplicates - $items[$index] = $item; + $uri = $item['uri'] ?? null; + if ($uri) { + // Insert or override the existing duplicate + $items[$uri] = $item; } else { + // The item doesn't have a uri! $items[] = $item; } } - $this->items = array_slice(array_values($items), 0, $limit); + $this->items = array_values($items); + + // Remove duplicates by title + $items = []; + foreach ($this->items as $item) { + $title = $item['title'] ?? null; + if ($title) { + // Insert or override the existing duplicate + $items[$title] = $item; + } else { + // The item doesn't have a title! + $items[] = $item; + } + } + $this->items = array_values($items); + + $this->items = array_slice($this->items, 0, $limit); } public function getIcon()