mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-08-22 19:38:38 +00:00
[GolemBridge] Add multi-page headings
On multi-page articles like [1], some paragraph headers were missing
because they are headers of the article pages.
These headers were previously removed in
c5f586497f
for being redundant with the
original header. The article at [1] proves us wrong, but I added a logic
to ignore truly duplicate headers.
[1] https://www.golem.de/news/es-muss-nicht-immer-apple-sein-fuenf-ueberzeugende-airpods-pro-alternativen-im-test-2508-195000.html
This commit is contained in:
parent
876d3c8ae7
commit
e30698f12f
@ -139,6 +139,15 @@ class GolemBridge extends FeedExpander
|
|||||||
// reload html, as remove() is buggy
|
// reload html, as remove() is buggy
|
||||||
$article = str_get_html($article->outertext);
|
$article = str_get_html($article->outertext);
|
||||||
|
|
||||||
|
// Add multipage headers, but only if they are different to the article header
|
||||||
|
$firstHeader = $page->find('.table-jtoc td', 0);
|
||||||
|
if (isset($firstHeader)) {
|
||||||
|
$firstHeader = html_entity_decode($firstHeader->title);
|
||||||
|
}
|
||||||
|
$multipageHeader = $article->find('header.paged-cluster-header h1', 0);
|
||||||
|
if (isset($multipageHeader) && $multipageHeader->plaintext !== $firstHeader) {
|
||||||
|
$item .= $multipageHeader;
|
||||||
|
}
|
||||||
|
|
||||||
$header = $article->find('header', 0);
|
$header = $article->find('header', 0);
|
||||||
foreach ($header->find('p, figure') as $element) {
|
foreach ($header->find('p, figure') as $element) {
|
||||||
|
Loading…
Reference in New Issue
Block a user