From a0bbbd69789c140debb087b8f5a6a40d373d2dc5 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Sat, 18 Dec 2021 11:19:58 +0100 Subject: [PATCH] [RadioMelodieBridge] Fix to use the new website layout (#2330) --- bridges/RadioMelodieBridge.php | 117 +++++++++++++++++++++++---------- 1 file changed, 83 insertions(+), 34 deletions(-) diff --git a/bridges/RadioMelodieBridge.php b/bridges/RadioMelodieBridge.php index 6807f589..703eb6e0 100644 --- a/bridges/RadioMelodieBridge.php +++ b/bridges/RadioMelodieBridge.php @@ -12,12 +12,7 @@ class RadioMelodieBridge extends BridgeAbstract { public function collectData(){ $html = getSimpleHTMLDOM(self::URI . '/actu/') or returnServerError('Could not request Radio Melodie.'); - $list = $html->find('div[class=displayList]', 0)->children(); - - $dateFormat = '%A %e %B %Y à %H:%M'; - // Set locale and Timezone to parse the date - setlocale (LC_TIME, 'fr_FR.utf8'); - date_default_timezone_set('Europe/Paris'); + $list = $html->find('div[class=listArticles]', 0)->children(); foreach($list as $element) { if($element->tag == 'a') { @@ -34,7 +29,7 @@ class RadioMelodieBridge extends BridgeAbstract { $picture = array(); // Get the Main picture URL - $picture[] = self::URI . $article->find('div[id=pictureTitleSupport]', 0)->find('img', 0)->src; + $picture[] = self::URI . $article->find('figure[class=photoviewer]', 0)->find('img', 0)->src; $audioHTML = $article->find('audio'); // Add the audio element to the enclosure @@ -57,26 +52,14 @@ class RadioMelodieBridge extends BridgeAbstract { $article->save(); } - // Remove Radio Melodie Logo - $logoHTML = $article->find('div[id=logoArticleRM]', 0); - $logoHTML->outertext = ''; - $article->save(); - - $author = $article->find('p[class=AuthorName]', 0)->plaintext; + // Extract the author + $author = $article->find('div[class=author]', 0)->children(1)->children(0)->plaintext; // Handle date to timestamp - $dateHTML = $article->find('p[class=date]', 0)->plaintext; - preg_match('/\| ([^-]*)( - .*|)$/', $dateHTML, $matches); + $dateHTML = $article->find('div[class=author]', 0)->children(1)->plaintext; + preg_match('/([a-z]{4,10}[ ]{1,2}[0-9]{1,2} [\p{L}]{4,10} [0-9]{4} à [0-9]{2}:[0-9]{2})/mus', $dateHTML, $matches); $dateText = $matches[1]; - $dateArray = strptime($dateText, $dateFormat); - $timestamp = mktime( - $dateArray['tm_hour'], - $dateArray['tm_min'], - $dateArray['tm_sec'], - $dateArray['tm_mon'] + 1, - $dateArray['tm_mday'], - $dateArray['tm_year'] + 1900 - ); + $timestamp = $this->parseDate($dateText); $item['enclosures'] = array_merge($picture, $audio); $item['author'] = $author; @@ -86,22 +69,15 @@ class RadioMelodieBridge extends BridgeAbstract { $item['timestamp'] = $timestamp; } - // Header Image - $header = ''; - - // Remove the Date and Author part - $textDOM->find('div[class=AuthorDate]', 0)->outertext = ''; - - // Remove Facebook javascript - $textDOM->find('script[src^=https://connect.facebook.net]', 0)->outertext = ''; + // Remove the share article part + $textDOM->find('div[class=share]', 0)->outertext = ''; // Rewrite relative Links $textDOM = defaultLinkTo($textDOM, self::URI . '/'); $article->save(); - //$this->rewriteAudioPlayers($textDOM); $text = $textDOM->innertext; - $item['content'] = '

' . $item['title'] . '

' . $dateHTML . '
' . $header . $text; + $item['content'] = '

' . $item['title'] . '

' . $dateText . '
' . $text; $this->items[] = $item; } } @@ -143,4 +119,77 @@ class RadioMelodieBridge extends BridgeAbstract { } } + + /* + * Function to parse the article date + */ + private function parseDate($date_fr) + { + // French date texts + $search_fr = array( + 'janvier', + 'février', + 'mars', + 'avril', + 'mai', + 'juin', + 'juillet', + 'août', + 'septembre', + 'octobre', + 'novembre', + 'décembre', + 'lundi', + 'mardi', + 'mercredi', + 'jeudi', + 'vendredi', + 'samedi', + 'dimanche' + ); + + // English replacement date text + $replace_en = array( + 'january', + 'february', + 'march', + 'april', + 'may', + 'june', + 'july', + 'august', + 'september', + 'october', + 'november', + 'december', + 'monday', + 'tuesday', + 'wednesday', + 'thursday', + 'friday', + 'saturday', + 'sunday' + ); + + $dateFormat = 'l j F Y \à H:i'; + + // Convert the date from French to English + $date_en = str_replace($search_fr, $replace_en, $date_fr); + + // Parse the date and convert it to an array + $date_array = date_parse_from_format($dateFormat, $date_en); + + // Convert the array to a unix timestamp + $timestamp = mktime( + $date_array['hour'], + $date_array['minute'], + $date_array['second'], + $date_array['month'], + $date_array['day'], + $date_array['year'] + ); + + return $timestamp; + + } }