diff --git a/bridges/GolemBridge.php b/bridges/GolemBridge.php index a3ce82ac..48e00310 100644 --- a/bridges/GolemBridge.php +++ b/bridges/GolemBridge.php @@ -132,13 +132,22 @@ class GolemBridge extends FeedExpander // delete known bad elements foreach ( $article->find('div[id*="adtile"], #job-market, #seminars, iframe, - .gbox_affiliate, div.toc') as $bad + .gbox_affiliate, div.toc') as $bad ) { $bad->remove(); } // reload html, as remove() is buggy $article = str_get_html($article->outertext); + // Add multipage headers, but only if they are different to the article header + $firstHeader = $page->find('.table-jtoc td', 0); + if (isset($firstHeader)) { + $firstHeader = html_entity_decode($firstHeader->title); + } + $multipageHeader = $article->find('header.paged-cluster-header h1', 0); + if (isset($multipageHeader) && $multipageHeader->plaintext !== $firstHeader) { + $item .= $multipageHeader; + } $header = $article->find('header', 0); foreach ($header->find('p, figure') as $element) {