From 90d22f0d80fc826db97d47b6e506e12afd39bcf0 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Tue, 7 Jun 2022 23:22:03 +0200 Subject: [PATCH] [{Atom,Mrss}Format]: Generate using DomDocument (#2771) * [AtomFormat]: Generate using DomDocument This will escape the HTML content for us as needed. * [MrssFormat]: Generate using DomDocument This will escape the HTML content for us as needed. --- formats/AtomFormat.php | 188 +++++++++++++++++++++++------------------ formats/MrssFormat.php | 172 ++++++++++++++++++++----------------- 2 files changed, 198 insertions(+), 162 deletions(-) diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 81aaf441..0d555b43 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -9,6 +9,9 @@ class AtomFormat extends FormatAbstract{ const MIME_TYPE = 'application/atom+xml'; + protected const ATOM_NS = 'http://www.w3.org/2005/Atom'; + protected const MRSS_NS = 'http://search.yahoo.com/mrss/'; + const LIMIT_TITLE = 140; public function stringify(){ @@ -17,26 +20,66 @@ class AtomFormat extends FormatAbstract{ $urlPath = (isset($_SERVER['PATH_INFO'])) ? $_SERVER['PATH_INFO'] : ''; $urlRequest = (isset($_SERVER['REQUEST_URI'])) ? $_SERVER['REQUEST_URI'] : ''; - $feedUrl = $this->xml_encode($urlPrefix . $urlHost . $urlRequest); + $feedUrl = $urlPrefix . $urlHost . $urlRequest; $extraInfos = $this->getExtraInfos(); - $title = $this->xml_encode($extraInfos['name']); $uri = !empty($extraInfos['uri']) ? $extraInfos['uri'] : REPOSITORY; + $document = new DomDocument('1.0', $this->getCharset()); + $document->formatOutput = true; + $feed = $document->createElementNS(self::ATOM_NS, 'feed'); + $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:media', self::MRSS_NS); + $document->appendChild($feed); + + $title = $document->createElement('title'); + $title->setAttribute('type', 'text'); + $title->appendChild($document->createTextNode($extraInfos['name'])); + $feed->appendChild($title); + + $id = $document->createElement('id'); + $id->appendChild($document->createTextNode($feedUrl)); + $feed->appendChild($id); + + $uriparts = parse_url($uri); + if(!empty($extraInfos['icon'])) { + $iconUrl = $extraInfos['icon']; + } else { + $iconUrl = $uriparts['scheme'] . '://' . $uriparts['host'] . '/favicon.ico'; + } + $icon = $document->createElement('icon'); + $icon->appendChild($document->createTextNode($iconUrl)); + $feed->appendChild($icon); + + $logo = $document->createElement('logo'); + $logo->appendChild($document->createTextNode($iconUrl)); + $feed->appendChild($logo); + + $feedTimestamp = gmdate(DATE_ATOM, $this->lastModified); + $updated = $document->createElement('updated'); + $updated->appendChild($document->createTextNode($feedTimestamp)); + $feed->appendChild($updated); + // since we can't guarantee that all items have an author, // a global feed author is mandatory $feedAuthor = 'RSS-Bridge'; + $author = $document->createElement('author'); + $authorName = $document->createElement('name'); + $authorName->appendChild($document->createTextNode($feedAuthor)); + $author->appendChild($authorName); + $feed->appendChild($author); - $uriparts = parse_url($uri); - if(!empty($extraInfos['icon'])) { - $icon = $extraInfos['icon']; - } else { - $icon = $this->xml_encode($uriparts['scheme'] . '://' . $uriparts['host'] . '/favicon.ico'); - } + $linkAlternate = $document->createElement('link'); + $linkAlternate->setAttribute('rel', 'alternate'); + $linkAlternate->setAttribute('type', 'text/html'); + $linkAlternate->setAttribute('href', $uri); + $feed->appendChild($linkAlternate); - $uri = $this->xml_encode($uri); + $linkSelf = $document->createElement('link'); + $linkSelf->setAttribute('rel', 'self'); + $linkSelf->setAttribute('type', 'application/atom+xml'); + $linkSelf->setAttribute('href', $feedUrl); + $feed->appendChild($linkSelf); - $entries = ''; foreach($this->getItems() as $item) { $entryTimestamp = $item->getTimestamp(); $entryTitle = $item->getTitle(); @@ -48,7 +91,7 @@ class AtomFormat extends FormatAbstract{ $entryID = 'urn:sha1:' . $item->getUid(); if (empty($entryID)) // Fallback to provided URI - $entryID = $this->xml_encode($entryUri); + $entryID = $entryUri; if (empty($entryID)) // Fallback to title and content $entryID = 'urn:sha1:' . hash('sha1', $entryTitle . $entryContent); @@ -67,96 +110,75 @@ class AtomFormat extends FormatAbstract{ if (empty($entryContent)) $entryContent = ' '; - $entryAuthor = ''; - if ($item->getAuthor()) { - $entryAuthor = $this->xml_encode($item->getAuthor()); - } + $entry = $document->createElement('entry'); - $entryTitle = $this->xml_encode($entryTitle); - $entryUri = $this->xml_encode($entryUri); - $entryTimestamp = $this->xml_encode(gmdate(DATE_ATOM, $entryTimestamp)); - $entryContent = $this->xml_encode($this->sanitizeHtml($entryContent)); + $title = $document->createElement('title'); + $title->setAttribute('type', 'html'); + $title->appendChild($document->createTextNode($entryTitle)); + $entry->appendChild($title); - $entryEnclosures = ''; - foreach($item->getEnclosures() as $enclosure) { - $entryEnclosures .= '' - . PHP_EOL; - } + $entryTimestamp = gmdate(DATE_ATOM, $entryTimestamp); + $published = $document->createElement('published'); + $published->appendChild($document->createTextNode($entryTimestamp)); + $entry->appendChild($published); - $entryCategories = ''; - foreach($item->getCategories() as $category) { - $entryCategories .= '' - . PHP_EOL; - } + $updated = $document->createElement('updated'); + $updated->appendChild($document->createTextNode($entryTimestamp)); + $entry->appendChild($updated); - $entryThumbnail = $item->thumbnail; - if (!empty($entryThumbnail)) - $entryThumbnail = ''; + $id = $document->createElement('id'); + $id->appendChild($document->createTextNode($entryID)); + $entry->appendChild($id); - $entryLinkAlternate = ''; if (!empty($entryUri)) { - $entryLinkAlternate = ''; + $entryLinkAlternate = $document->createElement('link'); + $entryLinkAlternate->setAttribute('rel', 'alternate'); + $entryLinkAlternate->setAttribute('type', 'text/html'); + $entryLinkAlternate->setAttribute('href', $entryUri); + $entry->appendChild($entryLinkAlternate); } - if (!empty($entryAuthor)) { - $entryAuthor = '' - . $entryAuthor - . ''; + if (!empty($item->getAuthor())) { + $author = $document->createElement('author'); + $authorName = $document->createElement('name'); + $authorName->appendChild($document->createTextNode($item->getAuthor())); + $author->appendChild($authorName); + $entry->appendChild($author); } - $entries .= <<createElement('content'); + $content->setAttribute('type', 'html'); + $content->appendChild($document->createTextNode($this->sanitizeHtml($entryContent))); + $entry->appendChild($content); - - {$entryTitle} - {$entryTimestamp} - {$entryTimestamp} - {$entryID} - {$entryLinkAlternate} - {$entryAuthor} - {$entryContent} - {$entryEnclosures} - {$entryCategories} - {$entryThumbnail} - + foreach($item->getEnclosures() as $enclosure) { + $entryEnclosure = $document->createElement('link'); + $entryEnclosure->setAttribute('rel', 'enclosure'); + $entryEnclosure->setAttribute('type', getMimeType($enclosure)); + $entryEnclosure->setAttribute('href', $enclosure); + $entry->appendChild($entryEnclosure); + } -EOD; + foreach($item->getCategories() as $category) { + $entryCategory = $document->createElement('category'); + $entryCategory->setAttribute('term', $category); + $entry->appendChild($entryCategory); + } + + if (!empty($item->thumbnail)) { + $thumbnail = $document->createElementNS(self::MRSS_NS, 'media:thumbnail'); + $thumbnail->setAttribute('url', $item->thumbnail); + $entry->appendChild($thumbnail); + } + + $feed->appendChild($entry); } - $feedTimestamp = gmdate(DATE_ATOM, $this->lastModified); - $charset = $this->getCharset(); - - /* Data are prepared, now let's begin the "MAGIE !!!" */ - $toReturn = << - - - {$title} - {$feedUrl} - {$icon} - {$icon} - {$feedTimestamp} - - {$feedAuthor} - - - -{$entries} - -EOD; + $toReturn = $document->saveXML(); // Remove invalid characters ini_set('mbstring.substitute_character', 'none'); $toReturn = mb_convert_encoding($toReturn, $this->getCharset(), 'UTF-8'); return $toReturn; } - - private function xml_encode($text){ - return htmlspecialchars($text, ENT_XML1); - } } diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php index 0c51104a..3266c7b1 100644 --- a/formats/MrssFormat.php +++ b/formats/MrssFormat.php @@ -27,6 +27,9 @@ class MrssFormat extends FormatAbstract { const MIME_TYPE = 'application/rss+xml'; + protected const ATOM_NS = 'http://www.w3.org/2005/Atom'; + protected const MRSS_NS = 'http://search.yahoo.com/mrss/'; + const ALLOWED_IMAGE_EXT = array( '.gif', '.jpg', '.png' ); @@ -37,24 +40,67 @@ class MrssFormat extends FormatAbstract { $urlPath = (isset($_SERVER['PATH_INFO'])) ? $_SERVER['PATH_INFO'] : ''; $urlRequest = (isset($_SERVER['REQUEST_URI'])) ? $_SERVER['REQUEST_URI'] : ''; - $feedUrl = $this->xml_encode($urlPrefix . $urlHost . $urlRequest); + $feedUrl = $urlPrefix . $urlHost . $urlRequest; $extraInfos = $this->getExtraInfos(); - $title = $this->xml_encode($extraInfos['name']); - $icon = $extraInfos['icon']; + $uri = !empty($extraInfos['uri']) ? $extraInfos['uri'] : REPOSITORY; - if(!empty($extraInfos['uri'])) { - $uri = $this->xml_encode($extraInfos['uri']); - } else { - $uri = REPOSITORY; + $document = new DomDocument('1.0', $this->getCharset()); + $document->formatOutput = true; + $feed = $document->createElement('rss'); + $feed->setAttribute('version', '2.0'); + $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:atom', self::ATOM_NS); + $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:media', self::MRSS_NS); + $document->appendChild($feed); + + $channel = $document->createElement('channel'); + $feed->appendChild($channel); + + $title = $extraInfos['name']; + $channelTitle = $document->createElement('title'); + $channelTitle->appendChild($document->createTextNode($title)); + $channel->appendChild($channelTitle); + + $link = $document->createElement('link'); + $link->appendChild($document->createTextNode($uri)); + $channel->appendChild($link); + + $description = $document->createElement('description'); + $description->appendChild($document->createTextNode($extraInfos['name'])); + $channel->appendChild($description); + + $icon = $extraInfos['icon']; + if (!empty($icon) && in_array(substr($icon, -4), self::ALLOWED_IMAGE_EXT)) { + $feedImage = $document->createElement('image'); + $channel->appendChild($feedImage); + $iconUrl = $document->createElement('url'); + $iconUrl->appendChild($document->createTextNode($icon)); + $feedImage->appendChild($iconUrl); + $iconTitle = $document->createElement('title'); + $iconTitle->appendChild($document->createTextNode($title)); + $feedImage->appendChild($iconTitle); + $iconLink = $document->createElement('link'); + $iconLink->appendChild($document->createTextNode($uri)); + $feedImage->appendChild($iconLink); } - $items = ''; + $linkAlternate = $document->createElementNS(self::ATOM_NS, 'atom:link'); + $linkAlternate->setAttribute('rel', 'alternate'); + $linkAlternate->setAttribute('type', 'text/html'); + $linkAlternate->setAttribute('href', $uri); + $channel->appendChild($linkAlternate); + + $linkSelf = $document->createElementNS(self::ATOM_NS, 'atom:link'); + $linkSelf->setAttribute('rel', 'self'); + $linkSelf->setAttribute('type', 'application/atom+xml'); + $linkSelf->setAttribute('href', $feedUrl); + $channel->appendChild($linkSelf); + foreach($this->getItems() as $item) { $itemTimestamp = $item->getTimestamp(); - $itemTitle = $this->xml_encode($item->getTitle()); - $itemUri = $this->xml_encode($item->getURI()); - $itemContent = $this->xml_encode($this->sanitizeHtml($item->getContent())); + $itemTitle = $item->getTitle(); + $itemUri = $item->getURI(); + $itemContent = $this->sanitizeHtml($item->getContent()); $entryID = $item->getUid(); $isPermaLink = 'false'; @@ -66,91 +112,59 @@ class MrssFormat extends FormatAbstract { if (empty($entryID)) // Fallback to title and content $entryID = hash('sha1', $itemTitle . $itemContent); - $entryTitle = ''; - if (!empty($itemTitle)) - $entryTitle = '' . $itemTitle . ''; + $entry = $document->createElement('item'); - $entryLink = ''; - if (!empty($itemUri)) - $entryLink = '' . $itemUri . ''; - - $entryPublished = ''; - if (!empty($itemTimestamp)) { - $entryPublished = '' - . $this->xml_encode(gmdate(DATE_RFC2822, $itemTimestamp)) - . ''; + if (!empty($itemTitle)) { + $entryTitle = $document->createElement('title'); + $entryTitle->appendChild($document->createTextNode($itemTitle)); + $entry->appendChild($entryTitle); } - $entryDescription = ''; - if (!empty($itemContent)) - $entryDescription = '' . $itemContent . ''; + if (!empty($itemUri)) { + $entryLink = $document->createElement('link'); + $entryLink->appendChild($document->createTextNode($itemUri)); + $entry->appendChild($entryLink); + } + + $entryGuid = $document->createElement('guid'); + $entryGuid->setAttribute('isPermaLink', $isPermaLink); + $entryGuid->appendChild($document->createTextNode($entryID)); + $entry->appendChild($entryGuid); + + if (!empty($itemTimestamp)) { + $entryPublished = $document->createElement('pubDate'); + $entryPublished->appendChild($document->createTextNode(gmdate(DATE_RFC2822, $itemTimestamp))); + $entry->appendChild($entryPublished); + } + + if (!empty($itemContent)) { + $entryDescription = $document->createElement('description'); + $entryDescription->appendChild($document->createTextNode($itemContent)); + $entry->appendChild($entryDescription); + } - $entryEnclosures = ''; foreach($item->getEnclosures() as $enclosure) { - $entryEnclosures .= '' - . PHP_EOL; + $entryEnclosure = $document->createElementNS(self::MRSS_NS, 'media:content'); + $entryEnclosure->setAttribute('url', $enclosure); + $entryEnclosure->setAttribute('type', getMimeType($enclosure)); + $entry->appendChild($entryEnclosure); } $entryCategories = ''; foreach($item->getCategories() as $category) { - $entryCategories .= '' - . $category . '' - . PHP_EOL; + $entryCategory = $document->createElement('category'); + $entryCategory->appendChild($document->createTextNode($category)); + $entry->appendChild($entryCategory); } - $items .= << - {$entryTitle} - {$entryLink} - {$entryID} - {$entryPublished} - {$entryDescription} - {$entryEnclosures} - {$entryCategories} - - -EOD; + $channel->appendChild($entry); } - $charset = $this->getCharset(); - - $feedImage = ''; - if (!empty($icon) && in_array(substr($icon, -4), self::ALLOWED_IMAGE_EXT)) { - $feedImage .= << - {$icon} - {$title} - {$uri} - -EOD; - } - - /* Data are prepared, now let's begin the "MAGIE !!!" */ - $toReturn = << - - - {$title} - {$uri} - {$title} - {$feedImage} - - - {$items} - - -EOD; + $toReturn = $document->saveXML(); // Remove invalid non-UTF8 characters ini_set('mbstring.substitute_character', 'none'); $toReturn = mb_convert_encoding($toReturn, $this->getCharset(), 'UTF-8'); return $toReturn; } - - private function xml_encode($text){ - return htmlspecialchars($text, ENT_XML1); - } }