mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-09 16:38:50 +00:00
fix(senscritique) (#3750)
This commit is contained in:
parent
49d9dafaec
commit
920d00480d
@ -56,7 +56,8 @@ class SensCritiqueBridge extends BridgeAbstract
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
$html = getSimpleHTMLDOM($uri);
|
$html = getSimpleHTMLDOM($uri);
|
||||||
$list = $html->find('ul.elpr-list', 0);
|
// This selector name looks like it's automatically generated
|
||||||
|
$list = $html->find('div.Universes__WrapperProducts-sc-1qa2w66-0.eVdcAv', 0);
|
||||||
|
|
||||||
$this->extractDataFromList($list);
|
$this->extractDataFromList($list);
|
||||||
}
|
}
|
||||||
@ -68,36 +69,13 @@ class SensCritiqueBridge extends BridgeAbstract
|
|||||||
if ($list === null) {
|
if ($list === null) {
|
||||||
returnClientError('Cannot extract data from list');
|
returnClientError('Cannot extract data from list');
|
||||||
}
|
}
|
||||||
|
foreach ($list->find('div[data-testid="product-list-item"]') as $movie) {
|
||||||
foreach ($list->find('li') as $movie) {
|
|
||||||
$item = [];
|
$item = [];
|
||||||
$item['author'] = htmlspecialchars_decode($movie->find('.elco-title a', 0)->plaintext, ENT_QUOTES)
|
$item['title'] = $movie->find('h2 a', 0)->plaintext;
|
||||||
. ' '
|
// todo: fix image
|
||||||
. $movie->find('.elco-date', 0)->plaintext;
|
$item['content'] = $movie->innertext;
|
||||||
|
$item['id'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/');
|
||||||
$item['title'] = $movie->find('.elco-title a', 0)->plaintext
|
$item['uri'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/');
|
||||||
. ' '
|
|
||||||
. $movie->find('.elco-date', 0)->plaintext;
|
|
||||||
|
|
||||||
$item['content'] = '';
|
|
||||||
$originalTitle = $movie->find('.elco-original-title', 0);
|
|
||||||
$description = $movie->find('.elco-description', 0);
|
|
||||||
|
|
||||||
if ($originalTitle) {
|
|
||||||
$item['content'] = '<em>' . $originalTitle->plaintext . '</em><br><br>';
|
|
||||||
}
|
|
||||||
|
|
||||||
$item['content'] .= $movie->find('.elco-baseline', 0)->plaintext
|
|
||||||
. '<br>'
|
|
||||||
. $movie->find('.elco-baseline', 1)->plaintext
|
|
||||||
. '<br><br>'
|
|
||||||
. ($description ? $description->plaintext : '')
|
|
||||||
. '<br><br>'
|
|
||||||
. trim($movie->find('.erra-ratings .erra-global', 0)->plaintext)
|
|
||||||
. ' / 10';
|
|
||||||
|
|
||||||
$item['id'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/');
|
|
||||||
$item['uri'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/');
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -158,8 +158,8 @@ class VkBridge extends BridgeAbstract
|
|||||||
$article_author_selector = 'div.article_snippet__author';
|
$article_author_selector = 'div.article_snippet__author';
|
||||||
$article_thumb_selector = 'div.article_snippet__image';
|
$article_thumb_selector = 'div.article_snippet__image';
|
||||||
}
|
}
|
||||||
$article_title = $article->find($article_title_selector, 0)->innertext;
|
$article_title = $article->find($article_title_selector, 0)->innertext ?? '';
|
||||||
$article_author = $article->find($article_author_selector, 0)->innertext;
|
$article_author = $article->find($article_author_selector, 0)->innertext ?? '';
|
||||||
$article_link = $article->getAttribute('href');
|
$article_link = $article->getAttribute('href');
|
||||||
$article_img_element_style = $article->find($article_thumb_selector, 0)->getAttribute('style');
|
$article_img_element_style = $article->find($article_thumb_selector, 0)->getAttribute('style');
|
||||||
preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches);
|
preg_match('/background-image: url\((.*)\)/', $article_img_element_style, $matches);
|
||||||
|
@ -2,6 +2,13 @@
|
|||||||
|
|
||||||
declare(strict_types=1);
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Very basic and naive feed parser that srapes out rss 0.91, 1.0, 2.0 and atom 1.0.
|
||||||
|
*
|
||||||
|
* Emit arrays meant to be used inside rss-bridge.
|
||||||
|
*
|
||||||
|
* The feed item structure is identical to that of FeedItem
|
||||||
|
*/
|
||||||
final class FeedParser
|
final class FeedParser
|
||||||
{
|
{
|
||||||
public function parseFeed(string $xmlString): array
|
public function parseFeed(string $xmlString): array
|
||||||
@ -200,6 +207,8 @@ final class FeedParser
|
|||||||
'content' => null,
|
'content' => null,
|
||||||
'timestamp' => null,
|
'timestamp' => null,
|
||||||
'author' => null,
|
'author' => null,
|
||||||
|
'uid' => null,
|
||||||
|
'categories' => [],
|
||||||
'enclosures' => [],
|
'enclosures' => [],
|
||||||
];
|
];
|
||||||
if (isset($feedItem->link)) {
|
if (isset($feedItem->link)) {
|
||||||
|
@ -31,7 +31,10 @@ abstract class FormatAbstract
|
|||||||
$this->lastModified = $lastModified;
|
$this->lastModified = $lastModified;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function setItems(array $items)
|
/**
|
||||||
|
* @param FeedItem[] $items
|
||||||
|
*/
|
||||||
|
public function setItems(array $items): void
|
||||||
{
|
{
|
||||||
$this->items = $items;
|
$this->items = $items;
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,9 @@ final class UrlException extends \Exception
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Intentionally restrictive url parser
|
* Intentionally restrictive url parser.
|
||||||
|
*
|
||||||
|
* Only absolute http/https urls.
|
||||||
*/
|
*/
|
||||||
final class Url
|
final class Url
|
||||||
{
|
{
|
||||||
@ -29,7 +31,7 @@ final class Url
|
|||||||
|
|
||||||
$parts = parse_url($url);
|
$parts = parse_url($url);
|
||||||
if ($parts === false) {
|
if ($parts === false) {
|
||||||
throw new UrlException(sprintf('Invalid url %s', $url));
|
throw new UrlException(sprintf('Failed to parse_url(): %s', $url));
|
||||||
}
|
}
|
||||||
|
|
||||||
return (new self())
|
return (new self())
|
||||||
@ -38,6 +40,7 @@ final class Url
|
|||||||
->withPort($parts['port'] ?? 80)
|
->withPort($parts['port'] ?? 80)
|
||||||
->withPath($parts['path'] ?? '/')
|
->withPath($parts['path'] ?? '/')
|
||||||
->withQueryString($parts['query'] ?? null);
|
->withQueryString($parts['query'] ?? null);
|
||||||
|
// todo: add fragment
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function validate(string $url): bool
|
public static function validate(string $url): bool
|
||||||
|
Loading…
Reference in New Issue
Block a user