mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-05 00:59:35 +00:00
refactor: more feed parsing tweaks (#3748)
This commit is contained in:
parent
2880524dfc
commit
49d9dafaec
@ -40,7 +40,7 @@ class TapasBridge extends FeedExpander
|
||||
$this->id = $html->find('meta[property$=":url"]', 0)->content;
|
||||
$this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id);
|
||||
}
|
||||
$this->collectExpandableDatas($this->getURI());
|
||||
$this->collectExpandableDatas($this->getURI(), 10);
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
@ -55,9 +55,8 @@ class TapasBridge extends FeedExpander
|
||||
|
||||
if ($this->getInput('extend_content')) {
|
||||
$html = getSimpleHTMLDOM($item['uri']);
|
||||
if (!$item['content']) {
|
||||
$item['content'] = '';
|
||||
}
|
||||
$item['content'] = $item['content'] ?? '';
|
||||
|
||||
if ($html->find('article.main__body', 0)) {
|
||||
foreach ($html->find('article', 0)->find('img') as $line) {
|
||||
$item['content'] .= '<img src="' . $line->{'data-src'} . '">';
|
||||
|
@ -22,6 +22,11 @@ abstract class FeedExpander extends BridgeAbstract
|
||||
if ($xmlString === '') {
|
||||
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
|
||||
}
|
||||
// prepare/massage the xml to make it more acceptable
|
||||
$badStrings = [
|
||||
'»',
|
||||
];
|
||||
$xmlString = str_replace($badStrings, '', $xmlString);
|
||||
$feedParser = new FeedParser();
|
||||
$this->feed = $feedParser->parseFeed($xmlString);
|
||||
$items = array_slice($this->feed['items'], 0, $maxItems);
|
||||
|
@ -11,7 +11,10 @@ final class FeedParser
|
||||
$xmlErrors = libxml_get_errors();
|
||||
libxml_use_internal_errors(false);
|
||||
if ($xml === false) {
|
||||
throw new \Exception('Unable to parse xml');
|
||||
if ($xmlErrors) {
|
||||
$firstXmlErrorMessage = $xmlErrors[0]->message;
|
||||
}
|
||||
throw new \Exception(sprintf('Unable to parse xml: %s', $firstXmlErrorMessage ?? ''));
|
||||
}
|
||||
$feed = [
|
||||
'title' => null,
|
||||
@ -123,7 +126,6 @@ final class FeedParser
|
||||
{
|
||||
// Primary data is compatible to 0.91 with some additional data
|
||||
$item = $this->parseRss091Item($feedItem);
|
||||
|
||||
$namespaces = $feedItem->getNamespaces(true);
|
||||
if (isset($namespaces['dc'])) {
|
||||
$dc = $feedItem->children($namespaces['dc']);
|
||||
@ -192,7 +194,14 @@ final class FeedParser
|
||||
|
||||
public function parseRss091Item(\SimpleXMLElement $feedItem): array
|
||||
{
|
||||
$item = [];
|
||||
$item = [
|
||||
'uri' => null,
|
||||
'title' => null,
|
||||
'content' => null,
|
||||
'timestamp' => null,
|
||||
'author' => null,
|
||||
'enclosures' => [],
|
||||
];
|
||||
if (isset($feedItem->link)) {
|
||||
// todo: trim uri
|
||||
$item['uri'] = (string)$feedItem->link;
|
||||
|
Loading…
Reference in New Issue
Block a user