mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-05 00:59:35 +00:00
feat(feedmerge): remove duplicates based off of title too (#4392)
This commit is contained in:
parent
db3899f2e6
commit
97f7df0d06
@ -6,8 +6,10 @@ class FeedMergeBridge extends FeedExpander
|
|||||||
const NAME = 'FeedMerge';
|
const NAME = 'FeedMerge';
|
||||||
const URI = 'https://github.com/RSS-Bridge/rss-bridge';
|
const URI = 'https://github.com/RSS-Bridge/rss-bridge';
|
||||||
const DESCRIPTION = <<<'TEXT'
|
const DESCRIPTION = <<<'TEXT'
|
||||||
This bridge merges two or more feeds into a single feed. Max 10 items are fetched from each feed.
|
This bridge merges two or more feeds into a single feed. <br>
|
||||||
TEXT;
|
Max 10 latest items are fetched from each individual feed. <br>
|
||||||
|
Items with identical url or title are considered duplicates (and are removed). <br>
|
||||||
|
TEXT;
|
||||||
|
|
||||||
const PARAMETERS = [
|
const PARAMETERS = [
|
||||||
[
|
[
|
||||||
@ -36,11 +38,11 @@ TEXT;
|
|||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* todo: Consider a strategy which produces a shorter feed url
|
* TODO: Consider a strategy which produces a shorter feed url
|
||||||
*/
|
*/
|
||||||
public function collectData()
|
public function collectData()
|
||||||
{
|
{
|
||||||
$limit = (int)($this->getInput('limit') ?: 10);
|
$limit = (int)($this->getInput('limit') ?: 99);
|
||||||
$feeds = [
|
$feeds = [
|
||||||
$this->getInput('feed_1'),
|
$this->getInput('feed_1'),
|
||||||
$this->getInput('feed_2'),
|
$this->getInput('feed_2'),
|
||||||
@ -61,7 +63,7 @@ TEXT;
|
|||||||
if (count($feeds) > 1) {
|
if (count($feeds) > 1) {
|
||||||
// Allow one or more feeds to fail
|
// Allow one or more feeds to fail
|
||||||
try {
|
try {
|
||||||
$this->collectExpandableDatas($feed);
|
$this->collectExpandableDatas($feed, 10);
|
||||||
} catch (HttpException $e) {
|
} catch (HttpException $e) {
|
||||||
$this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e)));
|
$this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e)));
|
||||||
// This feed item might be spammy. Considering dropping it.
|
// This feed item might be spammy. Considering dropping it.
|
||||||
@ -80,31 +82,48 @@ TEXT;
|
|||||||
throw $e;
|
throw $e;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$this->collectExpandableDatas($feed);
|
$this->collectExpandableDatas($feed, 10);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If $this->items is empty we should consider throw exception here
|
// If $this->items is empty we should consider throw exception here
|
||||||
|
|
||||||
// Sort by timestamp descending
|
// Sort by timestamp, uri, title in descending order
|
||||||
usort($this->items, function ($a, $b) {
|
usort($this->items, function ($a, $b) {
|
||||||
$t1 = $a['timestamp'] ?? $a['uri'] ?? $a['title'];
|
$t1 = $a['timestamp'] ?? $a['uri'] ?? $a['title'];
|
||||||
$t2 = $b['timestamp'] ?? $b['uri'] ?? $b['title'];
|
$t2 = $b['timestamp'] ?? $b['uri'] ?? $b['title'];
|
||||||
return $t2 <=> $t1;
|
return $t2 <=> $t1;
|
||||||
});
|
});
|
||||||
|
|
||||||
// Remove duplicates by using url as unique key
|
// Remove duplicates by url
|
||||||
$items = [];
|
$items = [];
|
||||||
foreach ($this->items as $item) {
|
foreach ($this->items as $item) {
|
||||||
$index = $item['uri'] ?? null;
|
$uri = $item['uri'] ?? null;
|
||||||
if ($index) {
|
if ($uri) {
|
||||||
// Overwrite duplicates
|
// Insert or override the existing duplicate
|
||||||
$items[$index] = $item;
|
$items[$uri] = $item;
|
||||||
} else {
|
} else {
|
||||||
|
// The item doesn't have a uri!
|
||||||
$items[] = $item;
|
$items[] = $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$this->items = array_slice(array_values($items), 0, $limit);
|
$this->items = array_values($items);
|
||||||
|
|
||||||
|
// Remove duplicates by title
|
||||||
|
$items = [];
|
||||||
|
foreach ($this->items as $item) {
|
||||||
|
$title = $item['title'] ?? null;
|
||||||
|
if ($title) {
|
||||||
|
// Insert or override the existing duplicate
|
||||||
|
$items[$title] = $item;
|
||||||
|
} else {
|
||||||
|
// The item doesn't have a title!
|
||||||
|
$items[] = $item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->items = array_values($items);
|
||||||
|
|
||||||
|
$this->items = array_slice($this->items, 0, $limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getIcon()
|
public function getIcon()
|
||||||
|
Loading…
Reference in New Issue
Block a user