diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index 9e42d6a6..b823b55c 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -56,7 +56,8 @@ class SensCritiqueBridge extends BridgeAbstract break; } $html = getSimpleHTMLDOM($uri); - $list = $html->find('ul.elpr-list', 0); + // This selector name looks like it's automatically generated + $list = $html->find('div.Universes__WrapperProducts-sc-1qa2w66-0.eVdcAv', 0); $this->extractDataFromList($list); } @@ -68,36 +69,13 @@ class SensCritiqueBridge extends BridgeAbstract if ($list === null) { returnClientError('Cannot extract data from list'); } - - foreach ($list->find('li') as $movie) { + foreach ($list->find('div[data-testid="product-list-item"]') as $movie) { $item = []; - $item['author'] = htmlspecialchars_decode($movie->find('.elco-title a', 0)->plaintext, ENT_QUOTES) - . ' ' - . $movie->find('.elco-date', 0)->plaintext; - - $item['title'] = $movie->find('.elco-title a', 0)->plaintext - . ' ' - . $movie->find('.elco-date', 0)->plaintext; - - $item['content'] = ''; - $originalTitle = $movie->find('.elco-original-title', 0); - $description = $movie->find('.elco-description', 0); - - if ($originalTitle) { - $item['content'] = '' . $originalTitle->plaintext . '

'; - } - - $item['content'] .= $movie->find('.elco-baseline', 0)->plaintext - . '
' - . $movie->find('.elco-baseline', 1)->plaintext - . '

' - . ($description ? $description->plaintext : '') - . '

' - . trim($movie->find('.erra-ratings .erra-global', 0)->plaintext) - . ' / 10'; - - $item['id'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/'); - $item['uri'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/'); + $item['title'] = $movie->find('h2 a', 0)->plaintext; + // todo: fix image + $item['content'] = $movie->innertext; + $item['id'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/'); + $item['uri'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/'); $this->items[] = $item; } } diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index d86b9cf9..0d47692d 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -158,8 +158,8 @@ class VkBridge extends BridgeAbstract $article_author_selector = 'div.article_snippet__author'; $article_thumb_selector = 'div.article_snippet__image'; } - $article_title = $article->find($article_title_selector, 0)->innertext; - $article_author = $article->find($article_author_selector, 0)->innertext; + $article_title = $article->find($article_title_selector, 0)->innertext ?? ''; + $article_author = $article->find($article_author_selector, 0)->innertext ?? ''; $article_link = $article->getAttribute('href'); $article_img_element_style = $article->find($article_thumb_selector, 0)->getAttribute('style'); preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches); diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 7c8a5232..64a3587d 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -2,6 +2,13 @@ declare(strict_types=1); +/** + * Very basic and naive feed parser that srapes out rss 0.91, 1.0, 2.0 and atom 1.0. + * + * Emit arrays meant to be used inside rss-bridge. + * + * The feed item structure is identical to that of FeedItem + */ final class FeedParser { public function parseFeed(string $xmlString): array @@ -200,6 +207,8 @@ final class FeedParser 'content' => null, 'timestamp' => null, 'author' => null, + 'uid' => null, + 'categories' => [], 'enclosures' => [], ]; if (isset($feedItem->link)) { diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php index 0304f627..b05a5764 100644 --- a/lib/FormatAbstract.php +++ b/lib/FormatAbstract.php @@ -31,7 +31,10 @@ abstract class FormatAbstract $this->lastModified = $lastModified; } - public function setItems(array $items) + /** + * @param FeedItem[] $items + */ + public function setItems(array $items): void { $this->items = $items; } diff --git a/lib/url.php b/lib/url.php index 2dcbbba5..993fef96 100644 --- a/lib/url.php +++ b/lib/url.php @@ -7,7 +7,9 @@ final class UrlException extends \Exception } /** - * Intentionally restrictive url parser + * Intentionally restrictive url parser. + * + * Only absolute http/https urls. */ final class Url { @@ -29,7 +31,7 @@ final class Url $parts = parse_url($url); if ($parts === false) { - throw new UrlException(sprintf('Invalid url %s', $url)); + throw new UrlException(sprintf('Failed to parse_url(): %s', $url)); } return (new self()) @@ -38,6 +40,7 @@ final class Url ->withPort($parts['port'] ?? 80) ->withPath($parts['path'] ?? '/') ->withQueryString($parts['query'] ?? null); + // todo: add fragment } public static function validate(string $url): bool