mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-09 16:38:50 +00:00
refactor: more feed parsing tweaks (#3748)
This commit is contained in:
parent
2880524dfc
commit
49d9dafaec
@ -40,7 +40,7 @@ class TapasBridge extends FeedExpander
|
|||||||
$this->id = $html->find('meta[property$=":url"]', 0)->content;
|
$this->id = $html->find('meta[property$=":url"]', 0)->content;
|
||||||
$this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id);
|
$this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id);
|
||||||
}
|
}
|
||||||
$this->collectExpandableDatas($this->getURI());
|
$this->collectExpandableDatas($this->getURI(), 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem(array $item)
|
protected function parseItem(array $item)
|
||||||
@ -55,9 +55,8 @@ class TapasBridge extends FeedExpander
|
|||||||
|
|
||||||
if ($this->getInput('extend_content')) {
|
if ($this->getInput('extend_content')) {
|
||||||
$html = getSimpleHTMLDOM($item['uri']);
|
$html = getSimpleHTMLDOM($item['uri']);
|
||||||
if (!$item['content']) {
|
$item['content'] = $item['content'] ?? '';
|
||||||
$item['content'] = '';
|
|
||||||
}
|
|
||||||
if ($html->find('article.main__body', 0)) {
|
if ($html->find('article.main__body', 0)) {
|
||||||
foreach ($html->find('article', 0)->find('img') as $line) {
|
foreach ($html->find('article', 0)->find('img') as $line) {
|
||||||
$item['content'] .= '<img src="' . $line->{'data-src'} . '">';
|
$item['content'] .= '<img src="' . $line->{'data-src'} . '">';
|
||||||
|
@ -22,6 +22,11 @@ abstract class FeedExpander extends BridgeAbstract
|
|||||||
if ($xmlString === '') {
|
if ($xmlString === '') {
|
||||||
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
|
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
|
||||||
}
|
}
|
||||||
|
// prepare/massage the xml to make it more acceptable
|
||||||
|
$badStrings = [
|
||||||
|
'»',
|
||||||
|
];
|
||||||
|
$xmlString = str_replace($badStrings, '', $xmlString);
|
||||||
$feedParser = new FeedParser();
|
$feedParser = new FeedParser();
|
||||||
$this->feed = $feedParser->parseFeed($xmlString);
|
$this->feed = $feedParser->parseFeed($xmlString);
|
||||||
$items = array_slice($this->feed['items'], 0, $maxItems);
|
$items = array_slice($this->feed['items'], 0, $maxItems);
|
||||||
|
@ -11,7 +11,10 @@ final class FeedParser
|
|||||||
$xmlErrors = libxml_get_errors();
|
$xmlErrors = libxml_get_errors();
|
||||||
libxml_use_internal_errors(false);
|
libxml_use_internal_errors(false);
|
||||||
if ($xml === false) {
|
if ($xml === false) {
|
||||||
throw new \Exception('Unable to parse xml');
|
if ($xmlErrors) {
|
||||||
|
$firstXmlErrorMessage = $xmlErrors[0]->message;
|
||||||
|
}
|
||||||
|
throw new \Exception(sprintf('Unable to parse xml: %s', $firstXmlErrorMessage ?? ''));
|
||||||
}
|
}
|
||||||
$feed = [
|
$feed = [
|
||||||
'title' => null,
|
'title' => null,
|
||||||
@ -123,7 +126,6 @@ final class FeedParser
|
|||||||
{
|
{
|
||||||
// Primary data is compatible to 0.91 with some additional data
|
// Primary data is compatible to 0.91 with some additional data
|
||||||
$item = $this->parseRss091Item($feedItem);
|
$item = $this->parseRss091Item($feedItem);
|
||||||
|
|
||||||
$namespaces = $feedItem->getNamespaces(true);
|
$namespaces = $feedItem->getNamespaces(true);
|
||||||
if (isset($namespaces['dc'])) {
|
if (isset($namespaces['dc'])) {
|
||||||
$dc = $feedItem->children($namespaces['dc']);
|
$dc = $feedItem->children($namespaces['dc']);
|
||||||
@ -192,7 +194,14 @@ final class FeedParser
|
|||||||
|
|
||||||
public function parseRss091Item(\SimpleXMLElement $feedItem): array
|
public function parseRss091Item(\SimpleXMLElement $feedItem): array
|
||||||
{
|
{
|
||||||
$item = [];
|
$item = [
|
||||||
|
'uri' => null,
|
||||||
|
'title' => null,
|
||||||
|
'content' => null,
|
||||||
|
'timestamp' => null,
|
||||||
|
'author' => null,
|
||||||
|
'enclosures' => [],
|
||||||
|
];
|
||||||
if (isset($feedItem->link)) {
|
if (isset($feedItem->link)) {
|
||||||
// todo: trim uri
|
// todo: trim uri
|
||||||
$item['uri'] = (string)$feedItem->link;
|
$item['uri'] = (string)$feedItem->link;
|
||||||
|
Loading…
Reference in New Issue
Block a user