mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-08-22 11:28:36 +00:00
[GolemBridge] Add multi-page headings
On multi-page articles like [1], some paragraph headers were missing
because they are headers of the article pages.
These headers were previously removed in
c5f586497f
for being redundant with the
original header. The article at [1] proves us wrong, but I added a logic
to ignore truly duplicate headers.
[1] https://www.golem.de/news/es-muss-nicht-immer-apple-sein-fuenf-ueberzeugende-airpods-pro-alternativen-im-test-2508-195000.html
This commit is contained in:
parent
876d3c8ae7
commit
e30698f12f
@ -139,6 +139,15 @@ class GolemBridge extends FeedExpander
|
||||
// reload html, as remove() is buggy
|
||||
$article = str_get_html($article->outertext);
|
||||
|
||||
// Add multipage headers, but only if they are different to the article header
|
||||
$firstHeader = $page->find('.table-jtoc td', 0);
|
||||
if (isset($firstHeader)) {
|
||||
$firstHeader = html_entity_decode($firstHeader->title);
|
||||
}
|
||||
$multipageHeader = $article->find('header.paged-cluster-header h1', 0);
|
||||
if (isset($multipageHeader) && $multipageHeader->plaintext !== $firstHeader) {
|
||||
$item .= $multipageHeader;
|
||||
}
|
||||
|
||||
$header = $article->find('header', 0);
|
||||
foreach ($header->find('p, figure') as $element) {
|
||||
|
Loading…
Reference in New Issue
Block a user