mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-05 00:59:35 +00:00
[ZeitBridge] Remove annoyances, add content
Remove navigational elements, podcast images. Add many more header images, article content in <ul> (and for ggod measure in <ol>) and quotes with their content and not only their author. Extreme example: https://www.zeit.de/campus/2024-05/protest-palaestina-universitaet-europa-uebersicht
This commit is contained in:
parent
a7ed3d56f9
commit
4d12aa2a9e
@ -87,7 +87,7 @@ class ZeitBridge extends FeedExpander
|
|||||||
// remove known bad elements
|
// remove known bad elements
|
||||||
foreach (
|
foreach (
|
||||||
$article->find(
|
$article->find(
|
||||||
'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast, div[data-paywall], .js-embed-consent'
|
'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast, .podcast-player__image, div[data-paywall], .js-embed-consent, script, nav, .article-flexible-toc__subheading-link, .faq-link'
|
||||||
) as $bad
|
) as $bad
|
||||||
) {
|
) {
|
||||||
$bad->remove();
|
$bad->remove();
|
||||||
@ -114,7 +114,7 @@ class ZeitBridge extends FeedExpander
|
|||||||
}
|
}
|
||||||
|
|
||||||
// header image
|
// header image
|
||||||
$headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('header', 0);
|
$headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('.article-header', 0) ?? $article->find('header', 0);
|
||||||
if ($headerimg) {
|
if ($headerimg) {
|
||||||
$item['content'] .= implode('', $headerimg->find('img[src], figcaption'));
|
$item['content'] .= implode('', $headerimg->find('img[src], figcaption'));
|
||||||
}
|
}
|
||||||
@ -124,7 +124,7 @@ class ZeitBridge extends FeedExpander
|
|||||||
|
|
||||||
if ($pages) {
|
if ($pages) {
|
||||||
foreach ($pages as $page) {
|
foreach ($pages as $page) {
|
||||||
$elements = $page->find('p, h2, figcaption, img[src]');
|
$elements = $page->find('p, ul, ol, h2, figure.article__media img[src], figure.article__media figcaption, figure.quote');
|
||||||
$item['content'] .= implode('', $elements);
|
$item['content'] .= implode('', $elements);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user