fix(senscritique) (#3750)

This commit is contained in:
Dag 2023-10-13 11:24:22 +02:00 committed by GitHub
parent 49d9dafaec
commit 920d00480d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 28 additions and 35 deletions

View File

@ -56,7 +56,8 @@ class SensCritiqueBridge extends BridgeAbstract
break; break;
} }
$html = getSimpleHTMLDOM($uri); $html = getSimpleHTMLDOM($uri);
$list = $html->find('ul.elpr-list', 0); // This selector name looks like it's automatically generated
$list = $html->find('div.Universes__WrapperProducts-sc-1qa2w66-0.eVdcAv', 0);
$this->extractDataFromList($list); $this->extractDataFromList($list);
} }
@ -68,36 +69,13 @@ class SensCritiqueBridge extends BridgeAbstract
if ($list === null) { if ($list === null) {
returnClientError('Cannot extract data from list'); returnClientError('Cannot extract data from list');
} }
foreach ($list->find('div[data-testid="product-list-item"]') as $movie) {
foreach ($list->find('li') as $movie) {
$item = []; $item = [];
$item['author'] = htmlspecialchars_decode($movie->find('.elco-title a', 0)->plaintext, ENT_QUOTES) $item['title'] = $movie->find('h2 a', 0)->plaintext;
. ' ' // todo: fix image
. $movie->find('.elco-date', 0)->plaintext; $item['content'] = $movie->innertext;
$item['id'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/');
$item['title'] = $movie->find('.elco-title a', 0)->plaintext $item['uri'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/');
. ' '
. $movie->find('.elco-date', 0)->plaintext;
$item['content'] = '';
$originalTitle = $movie->find('.elco-original-title', 0);
$description = $movie->find('.elco-description', 0);
if ($originalTitle) {
$item['content'] = '<em>' . $originalTitle->plaintext . '</em><br><br>';
}
$item['content'] .= $movie->find('.elco-baseline', 0)->plaintext
. '<br>'
. $movie->find('.elco-baseline', 1)->plaintext
. '<br><br>'
. ($description ? $description->plaintext : '')
. '<br><br>'
. trim($movie->find('.erra-ratings .erra-global', 0)->plaintext)
. ' / 10';
$item['id'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/');
$item['uri'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/');
$this->items[] = $item; $this->items[] = $item;
} }
} }

View File

@ -158,8 +158,8 @@ class VkBridge extends BridgeAbstract
$article_author_selector = 'div.article_snippet__author'; $article_author_selector = 'div.article_snippet__author';
$article_thumb_selector = 'div.article_snippet__image'; $article_thumb_selector = 'div.article_snippet__image';
} }
$article_title = $article->find($article_title_selector, 0)->innertext; $article_title = $article->find($article_title_selector, 0)->innertext ?? '';
$article_author = $article->find($article_author_selector, 0)->innertext; $article_author = $article->find($article_author_selector, 0)->innertext ?? '';
$article_link = $article->getAttribute('href'); $article_link = $article->getAttribute('href');
$article_img_element_style = $article->find($article_thumb_selector, 0)->getAttribute('style'); $article_img_element_style = $article->find($article_thumb_selector, 0)->getAttribute('style');
preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches); preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches);

View File

@ -2,6 +2,13 @@
declare(strict_types=1); declare(strict_types=1);
/**
* Very basic and naive feed parser that srapes out rss 0.91, 1.0, 2.0 and atom 1.0.
*
* Emit arrays meant to be used inside rss-bridge.
*
* The feed item structure is identical to that of FeedItem
*/
final class FeedParser final class FeedParser
{ {
public function parseFeed(string $xmlString): array public function parseFeed(string $xmlString): array
@ -200,6 +207,8 @@ final class FeedParser
'content' => null, 'content' => null,
'timestamp' => null, 'timestamp' => null,
'author' => null, 'author' => null,
'uid' => null,
'categories' => [],
'enclosures' => [], 'enclosures' => [],
]; ];
if (isset($feedItem->link)) { if (isset($feedItem->link)) {

View File

@ -31,7 +31,10 @@ abstract class FormatAbstract
$this->lastModified = $lastModified; $this->lastModified = $lastModified;
} }
public function setItems(array $items) /**
* @param FeedItem[] $items
*/
public function setItems(array $items): void
{ {
$this->items = $items; $this->items = $items;
} }

View File

@ -7,7 +7,9 @@ final class UrlException extends \Exception
} }
/** /**
* Intentionally restrictive url parser * Intentionally restrictive url parser.
*
* Only absolute http/https urls.
*/ */
final class Url final class Url
{ {
@ -29,7 +31,7 @@ final class Url
$parts = parse_url($url); $parts = parse_url($url);
if ($parts === false) { if ($parts === false) {
throw new UrlException(sprintf('Invalid url %s', $url)); throw new UrlException(sprintf('Failed to parse_url(): %s', $url));
} }
return (new self()) return (new self())
@ -38,6 +40,7 @@ final class Url
->withPort($parts['port'] ?? 80) ->withPort($parts['port'] ?? 80)
->withPath($parts['path'] ?? '/') ->withPath($parts['path'] ?? '/')
->withQueryString($parts['query'] ?? null); ->withQueryString($parts['query'] ?? null);
// todo: add fragment
} }
public static function validate(string $url): bool public static function validate(string $url): bool