mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-09 16:38:50 +00:00
Add CSS Selector Feed Expander (#3732)
* Add CSS Selector Feed Expander This bridge combines CssSelectorBridge with FeedExpander Allows expanding a feed using CSS selectors * Fix code linting --------- Co-authored-by: ORelio <ORelio>
This commit is contained in:
parent
f97a3fa4d9
commit
47f52b5912
@ -60,11 +60,12 @@ class CssSelectorBridge extends BridgeAbstract
|
|||||||
]
|
]
|
||||||
];
|
];
|
||||||
|
|
||||||
private $feedName = '';
|
protected $feedName = '';
|
||||||
|
protected $homepageUrl = '';
|
||||||
|
|
||||||
public function getURI()
|
public function getURI()
|
||||||
{
|
{
|
||||||
$url = $this->getInput('home_page');
|
$url = $this->homepageUrl;
|
||||||
if (empty($url)) {
|
if (empty($url)) {
|
||||||
$url = parent::getURI();
|
$url = parent::getURI();
|
||||||
}
|
}
|
||||||
@ -81,7 +82,7 @@ class CssSelectorBridge extends BridgeAbstract
|
|||||||
|
|
||||||
public function collectData()
|
public function collectData()
|
||||||
{
|
{
|
||||||
$url = $this->getInput('home_page');
|
$this->homepageUrl = $this->getInput('home_page');
|
||||||
$url_selector = $this->getInput('url_selector');
|
$url_selector = $this->getInput('url_selector');
|
||||||
$url_pattern = $this->getInput('url_pattern');
|
$url_pattern = $this->getInput('url_pattern');
|
||||||
$content_selector = $this->getInput('content_selector');
|
$content_selector = $this->getInput('content_selector');
|
||||||
@ -90,7 +91,7 @@ class CssSelectorBridge extends BridgeAbstract
|
|||||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||||
$limit = $this->getInput('limit') ?? 10;
|
$limit = $this->getInput('limit') ?? 10;
|
||||||
|
|
||||||
$html = defaultLinkTo(getSimpleHTMLDOM($url), $url);
|
$html = defaultLinkTo(getSimpleHTMLDOM($this->homepageUrl), $this->homepageUrl);
|
||||||
$this->feedName = $this->titleCleanup($this->getPageTitle($html), $title_cleanup);
|
$this->feedName = $this->titleCleanup($this->getPageTitle($html), $title_cleanup);
|
||||||
$items = $this->htmlFindEntries($html, $url_selector, $url_pattern, $limit, $content_cleanup);
|
$items = $this->htmlFindEntries($html, $url_selector, $url_pattern, $limit, $content_cleanup);
|
||||||
|
|
||||||
|
98
bridges/CssSelectorFeedExpanderBridge.php
Normal file
98
bridges/CssSelectorFeedExpanderBridge.php
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
if (!class_exists('CssSelectorFeedExpanderBridgeInternal')) {
|
||||||
|
// Utility class used internally by CssSelectorFeedExpanderBridge
|
||||||
|
class CssSelectorFeedExpanderBridgeInternal extends FeedExpander
|
||||||
|
{
|
||||||
|
public function collectData()
|
||||||
|
{
|
||||||
|
// Unused. Call collectExpandableDatas($url) inherited from FeedExpander instead
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class CssSelectorFeedExpanderBridge extends CssSelectorBridge
|
||||||
|
{
|
||||||
|
const MAINTAINER = 'ORelio';
|
||||||
|
const NAME = 'CSS Selector Feed Expander';
|
||||||
|
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
|
||||||
|
const DESCRIPTION = 'Expand any site RSS feed using CSS selectors (Advanced Users)';
|
||||||
|
const PARAMETERS = [
|
||||||
|
[
|
||||||
|
'feed' => [
|
||||||
|
'name' => 'Feed: URL of truncated RSS feed',
|
||||||
|
'exampleValue' => 'https://example.com/feed.xml',
|
||||||
|
'required' => true
|
||||||
|
],
|
||||||
|
'content_selector' => [
|
||||||
|
'name' => 'Selector for each article content',
|
||||||
|
'title' => <<<EOT
|
||||||
|
This bridge works using CSS selectors, e.g. "div.article" will match <div class="article">.
|
||||||
|
Everything inside that element becomes feed item content.
|
||||||
|
EOT,
|
||||||
|
'exampleValue' => 'article.content',
|
||||||
|
'required' => true
|
||||||
|
],
|
||||||
|
'content_cleanup' => [
|
||||||
|
'name' => '[Optional] Content cleanup: List of items to remove',
|
||||||
|
'title' => 'Selector for unnecessary elements to remove inside article contents.',
|
||||||
|
'exampleValue' => 'div.ads, div.comments',
|
||||||
|
],
|
||||||
|
'dont_expand_metadata' => [
|
||||||
|
'name' => '[Optional] Don\'t expand metadata',
|
||||||
|
'title' => "This bridge will attempt to fill missing fields using metadata from the webpage.\nCheck to disable.",
|
||||||
|
'type' => 'checkbox',
|
||||||
|
],
|
||||||
|
'discard_thumbnail' => [
|
||||||
|
'name' => '[Optional] Discard thumbnail set by site author',
|
||||||
|
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
|
||||||
|
'type' => 'checkbox',
|
||||||
|
],
|
||||||
|
'limit' => self::LIMIT
|
||||||
|
]
|
||||||
|
];
|
||||||
|
|
||||||
|
public function collectData()
|
||||||
|
{
|
||||||
|
$url = $this->getInput('feed');
|
||||||
|
$content_selector = $this->getInput('content_selector');
|
||||||
|
$content_cleanup = $this->getInput('content_cleanup');
|
||||||
|
$dont_expand_metadata = $this->getInput('dont_expand_metadata');
|
||||||
|
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||||
|
$limit = $this->getInput('limit');
|
||||||
|
|
||||||
|
$feed_expander = new CssSelectorFeedExpanderBridgeInternal();
|
||||||
|
$items = $feed_expander->collectExpandableDatas($url)->getItems();
|
||||||
|
|
||||||
|
$this->homepageUrl = urljoin($url, '/');
|
||||||
|
$this->feedName = $feed_expander->getName();
|
||||||
|
|
||||||
|
foreach ($items as $item_from_feed) {
|
||||||
|
$item_expanded = $this->expandEntryWithSelector(
|
||||||
|
$item_from_feed['uri'],
|
||||||
|
$content_selector,
|
||||||
|
$content_cleanup
|
||||||
|
);
|
||||||
|
|
||||||
|
if ($dont_expand_metadata) {
|
||||||
|
// Take feed item, only replace content from expanded data
|
||||||
|
$content = $item_expanded['content'];
|
||||||
|
$item_expanded = $item_from_feed;
|
||||||
|
$item_expanded['content'] = $content;
|
||||||
|
} else {
|
||||||
|
// Take expanded item, but give priority to metadata already in source item
|
||||||
|
foreach ($item_from_feed as $field => $val) {
|
||||||
|
if ($field !== 'content') {
|
||||||
|
$item_expanded[$field] = $val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($discard_thumbnail && isset($item_expanded['enclosures'])) {
|
||||||
|
unset($item_expanded['enclosures']);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->items[] = $item_expanded;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -64,7 +64,7 @@ class SitemapBridge extends CssSelectorBridge
|
|||||||
|
|
||||||
public function collectData()
|
public function collectData()
|
||||||
{
|
{
|
||||||
$url = $this->getInput('home_page');
|
$this->homepageUrl = $this->getInput('home_page');
|
||||||
$url_pattern = $this->getInput('url_pattern');
|
$url_pattern = $this->getInput('url_pattern');
|
||||||
$content_selector = $this->getInput('content_selector');
|
$content_selector = $this->getInput('content_selector');
|
||||||
$content_cleanup = $this->getInput('content_cleanup');
|
$content_cleanup = $this->getInput('content_cleanup');
|
||||||
@ -73,8 +73,8 @@ class SitemapBridge extends CssSelectorBridge
|
|||||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||||
$limit = $this->getInput('limit');
|
$limit = $this->getInput('limit');
|
||||||
|
|
||||||
$this->feedName = $this->titleCleanup($this->getPageTitle($url), $title_cleanup);
|
$this->feedName = $this->titleCleanup($this->getPageTitle($this->homepageUrl), $title_cleanup);
|
||||||
$sitemap_url = empty($site_map) ? $url : $site_map;
|
$sitemap_url = empty($site_map) ? $this->homepageUrl : $site_map;
|
||||||
$sitemap_xml = $this->getSitemapXml($sitemap_url, !empty($site_map));
|
$sitemap_xml = $this->getSitemapXml($sitemap_url, !empty($site_map));
|
||||||
$links = $this->sitemapXmlToList($sitemap_xml, $url_pattern, empty($limit) ? 10 : $limit);
|
$links = $this->sitemapXmlToList($sitemap_xml, $url_pattern, empty($limit) ? 10 : $limit);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user