0
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-08-22 19:38:38 +00:00

[GolemBridge] Add multi-page headings

On multi-page articles like [1], some paragraph headers were missing
because they are headers of the article pages.

These headers were previously removed in
c5f586497f for being redundant with the
original header. The article at [1] proves us wrong, but I added a logic
to ignore truly duplicate headers.

[1] https://www.golem.de/news/es-muss-nicht-immer-apple-sein-fuenf-ueberzeugende-airpods-pro-alternativen-im-test-2508-195000.html
This commit is contained in:
Mynacol 2025-08-17 11:57:00 +00:00
parent 876d3c8ae7
commit e30698f12f

View File

@ -139,6 +139,15 @@ class GolemBridge extends FeedExpander
// reload html, as remove() is buggy // reload html, as remove() is buggy
$article = str_get_html($article->outertext); $article = str_get_html($article->outertext);
// Add multipage headers, but only if they are different to the article header
$firstHeader = $page->find('.table-jtoc td', 0);
if (isset($firstHeader)) {
$firstHeader = html_entity_decode($firstHeader->title);
}
$multipageHeader = $article->find('header.paged-cluster-header h1', 0);
if (isset($multipageHeader) && $multipageHeader->plaintext !== $firstHeader) {
$item .= $multipageHeader;
}
$header = $article->find('header', 0); $header = $article->find('header', 0);
foreach ($header->find('p, figure') as $element) { foreach ($header->find('p, figure') as $element) {