From db42f2786c6f362b7ce9f152578592ee4c92ae9f Mon Sep 17 00:00:00 2001 From: ORelio Date: Tue, 1 Apr 2025 00:42:08 +0200 Subject: [PATCH] [FeedExpander] Add prepareXml() overridable function (#4485) * FeedExpander: Remove tailing content in XML - Move preprocessing code into overridable preprocessXml() - Auto-remove trailing data after root xml node * FeedExpander: Add PR reference with use case * FeedExpander: Code linting * [FeedExpander] Keep content at end of document for now Will add back later if more sites have the same issue * [FeedExpander] prepareXml: Add type hints --- lib/FeedExpander.php | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index ef001af1..1a47851b 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -22,14 +22,7 @@ abstract class FeedExpander extends BridgeAbstract if ($xmlString === '') { throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10); } - // prepare/massage the xml to make it more acceptable - $problematicStrings = [ - ' ', - '»', - '’', - ]; - $xmlString = str_replace($problematicStrings, '', $xmlString); - + $xmlString = $this->prepareXml($xmlString); $feedParser = new FeedParser(); try { $this->feed = $feedParser->parseFeed($xmlString); @@ -59,6 +52,23 @@ abstract class FeedExpander extends BridgeAbstract return $item; } + /** + * Prepare XML document to make it more acceptable by the parser + * This method can be overriden by bridges to change this behavior + * + * @return string + */ + protected function prepareXml(string $xmlString): string + { + // Remove problematic escape sequences + $problematicStrings = [ + ' ', + '»', + '’', + ]; + return str_replace($problematicStrings, '', $xmlString); + } + public function getURI() { return $this->feed['uri'] ?? parent::getURI();