fix: various fixes (#3702)

* fix: symfonycasts

* various fixes
This commit is contained in:
Dag 2023-09-24 18:15:14 +02:00 committed by GitHub
parent ce353c1e4f
commit 437afd67e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 32 additions and 75 deletions

View File

@ -30,6 +30,9 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
// this bridge is broken and unmaintained
return;
$uri = self::URI . '/monair/commune/' . $this->getInput('cities'); $uri = self::URI . '/monair/commune/' . $this->getInput('cities');
$html = getSimpleHTMLDOM($uri); $html = getSimpleHTMLDOM($uri);

View File

@ -38,50 +38,20 @@ class BrutBridge extends BridgeAbstract
] ]
]; ];
const CACHE_TIMEOUT = 1800; // 30 mins
private $jsonRegex = '/window\.__PRELOADED_STATE__ = ((?:.*)});/';
public function collectData() public function collectData()
{ {
$html = getSimpleHTMLDOM($this->getURI()); $url = $this->getURI();
$html = getSimpleHTMLDOM($url);
$results = $html->find('div.results', 0); $regex = '/window.__PRELOADED_STATE__ = (.*);/';
preg_match($regex, $html, $parts);
foreach ($results->find('li.col-6.col-sm-4.col-md-3.col-lg-2.px-2.pb-4') as $li) { $data = Json::decode($parts[1], false);
$item = []; foreach ($data->medias->index as $uid => $media) {
$this->items[] = [
$videoPath = self::URI . $li->children(0)->href; 'uid' => $uid,
$videoPageHtml = getSimpleHTMLDOMCached($videoPath, 3600); 'title' => $media->metadata->slug,
'uri' => $media->share_url,
$json = $this->extractJson($videoPageHtml); 'timestamp' => $media->published_at,
$id = array_keys((array) $json->media->index)[0]; ];
$item['uri'] = $videoPath;
$item['title'] = $json->media->index->$id->title;
$item['timestamp'] = $json->media->index->$id->published_at;
$item['enclosures'][] = $json->media->index->$id->media->thumbnail;
$description = $json->media->index->$id->description;
$article = '';
if (is_null($json->media->index->$id->media->seo_article) === false) {
$article = markdownToHtml($json->media->index->$id->media->seo_article);
}
$item['content'] = <<<EOD
<video controls poster="{$json->media->index->$id->media->thumbnail}" preload="none">
<source src="{$json->media->index->$id->media->mp4_url}" type="video/mp4">
</video>
<p>{$description}</p>
{$article}
EOD;
$this->items[] = $item;
if (count($this->items) >= 10) {
break;
}
} }
} }
@ -90,35 +60,14 @@ EOD;
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) { if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category'); return self::URI . '/' . $this->getInput('edition') . '/' . $this->getInput('category');
} }
return parent::getURI(); return parent::getURI();
} }
public function getName() public function getName()
{ {
if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) { if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) {
return $this->getKey('category') . ' - ' . return $this->getKey('category') . ' - ' . $this->getKey('edition') . ' - Brut.';
$this->getKey('edition') . ' - Brut.';
} }
return parent::getName(); return parent::getName();
} }
/**
* Extract JSON from page
*/
private function extractJson($html)
{
if (!preg_match($this->jsonRegex, $html, $parts)) {
returnServerError('Failed to extract data from page');
}
$data = json_decode($parts[1]);
if ($data === false) {
returnServerError('Failed to decode extracted data');
}
return $data;
}
} }

View File

@ -72,7 +72,7 @@ class SitemapBridge extends CssSelectorBridge
$sitemap_xml = $this->getSitemapXml($sitemap_url, !empty($site_map)); $sitemap_xml = $this->getSitemapXml($sitemap_url, !empty($site_map));
$links = $this->sitemapXmlToList($sitemap_xml, $url_pattern, empty($limit) ? 10 : $limit); $links = $this->sitemapXmlToList($sitemap_xml, $url_pattern, empty($limit) ? 10 : $limit);
if (empty($links) && empty(sitemapXmlToList($sitemap_xml))) { if (empty($links) && empty($this->sitemapXmlToList($sitemap_xml))) {
returnClientError('Could not retrieve URLs with Timestamps from Sitemap: ' . $sitemap_url); returnClientError('Could not retrieve URLs with Timestamps from Sitemap: ' . $sitemap_url);
} }

View File

@ -10,22 +10,27 @@ class SymfonyCastsBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$html = getSimpleHTMLDOM('https://symfonycasts.com/updates/find'); $url = 'https://symfonycasts.com/updates/find';
$dives = $html->find('div'); $html = getSimpleHTMLDOM($url);
/** @var simple_html_dom_node[] $dives */
$dives = $html->find('div.user-notification-not-viewed');
/* @var simple_html_dom $div */
foreach ($dives as $div) { foreach ($dives as $div) {
$id = $div->getAttribute('data-mark-update-id-value');
$type = $div->find('h5', 0); $type = $div->find('h5', 0);
$title = $div->find('span', 0); $title = $div->find('a', 0);
$dateString = $div->find('h5.font-gray', 0); $dateString = $div->find('h5.font-gray', 0);
$href = $div->find('a', 0); $href = $div->find('a', 0);
$url = 'https://symfonycasts.com' . $href->getAttribute('href'); $hrefAttribute = $href->getAttribute('href');
$url = 'https://symfonycasts.com' . $hrefAttribute;
$item = []; // Create an empty item $item = [];
$item['uid'] = $id; $item['uid'] = $div->getAttribute('data-mark-update-update-url-value');
$item['title'] = $title->innertext; $item['title'] = $title->innertext;
// this natural language date string does not work
$item['timestamp'] = $dateString->innertext; $item['timestamp'] = $dateString->innertext;
$item['content'] = $type->plaintext . '<a href="' . $url . '">' . $title . '</a>'; $item['content'] = $type->plaintext . '<a href="' . $url . '">' . $title . '</a>';
$item['uri'] = $url; $item['uri'] = $url;
$this->items[] = $item; // Add item to the list $this->items[] = $item; // Add item to the list

View File

@ -28,8 +28,7 @@
"ext-openssl": "*", "ext-openssl": "*",
"ext-libxml": "*", "ext-libxml": "*",
"ext-simplexml": "*", "ext-simplexml": "*",
"ext-json": "*", "ext-json": "*"
"ext-intl": "*"
}, },
"require-dev": { "require-dev": {
"phpunit/phpunit": "^9", "phpunit/phpunit": "^9",
@ -39,6 +38,7 @@
"ext-memcached": "Allows to use memcached as cache type", "ext-memcached": "Allows to use memcached as cache type",
"ext-sqlite3": "Allows to use an SQLite database for caching", "ext-sqlite3": "Allows to use an SQLite database for caching",
"ext-zip": "Required for FDroidRepoBridge", "ext-zip": "Required for FDroidRepoBridge",
"ext-intl": "Required for OLXBridge",
"ext-dom": "Allows to use some bridges based on XPath expressions" "ext-dom": "Allows to use some bridges based on XPath expressions"
}, },
"autoload-dev": { "autoload-dev": {