From 91fe6c1fae1a81afc8cd6d91705f6645023cf884 Mon Sep 17 00:00:00 2001 From: Dan Wainwright Date: Sun, 23 Mar 2025 21:57:17 +0200 Subject: [PATCH] [BazarakiBridge] Add new bridge (#4473) * [BazarakiBridge] Add new bridge * fix --------- Co-authored-by: Dag --- bridges/BazarakiBridge.php | 139 +++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 bridges/BazarakiBridge.php diff --git a/bridges/BazarakiBridge.php b/bridges/BazarakiBridge.php new file mode 100644 index 00000000..44e4eb84 --- /dev/null +++ b/bridges/BazarakiBridge.php @@ -0,0 +1,139 @@ + [ + 'name' => 'URL', + 'type' => 'text', + 'required' => true, + 'title' => 'Enter the URL of the Bazaraki page to fetch adverts from.', + 'exampleValue' => 'https://www.bazaraki.com/real-estate-for-sale/houses/?lat=0&lng=0&radius=100000', + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Enter the number of adverts to fetch. (max 50)', + 'exampleValue' => '10', + 'defaultValue' => 10, + ] + ] + ]; + + public function collectData() + { + $url = $this->getInput('url'); + if (! str_starts_with($url, 'https://www.bazaraki.com/')) { + throw new \Exception('Nope'); + } + + $html = getSimpleHTMLDOM($url); + + $i = 0; + foreach ($html->find('div.advert') as $element) { + $i++; + if ($i > $this->getInput('limit') || $i > 50) { + break; + } + + $item = []; + + $item['uri'] = 'https://www.bazaraki.com' . $element->find('a.advert__content-title', 0)->href; + + # Get the content + $advert = getSimpleHTMLDOM($item['uri']); + + $price = trim($advert->find('div.announcement-price__cost', 0)->plaintext); + $name = trim($element->find('a.advert__content-title', 0)->plaintext); + + $item['title'] = $name . ' - ' . $price; + + $time = trim($advert->find('span.date-meta', 0)->plaintext); + $time = str_replace('Posted: ', '', $time); + + + $item['content'] = $this->processAdvertContent($advert); + $item['timestamp'] = $this->convertRelativeTime($time); + $item['author'] = trim($advert->find('div.author-name', 0)->plaintext); + $item['uid'] = $advert->find('span.number-announcement', 0)->plaintext; + + $this->items[] = $item; + } + } + + /** + * Process the advert content to clean up HTML + * + * @param simple_html_dom $advert The SimpleHTMLDOM object for the advert page + * @return string Processed HTML content + */ + private function processAdvertContent($advert) + { + // Get the content sections + $header = $advert->find('div.announcement-content-header', 0); + $characteristics = $advert->find('div.announcement-characteristics', 0); + $description = $advert->find('div.js-description', 0); + $images = $advert->find('div.announcement__images', 0); + + // Remove all favorites divs + foreach ($advert->find('div.announcement-meta__favorites') as $favorites) { + $favorites->outertext = ''; + } + + // Replace all tags with their text content + foreach ($advert->find('a') as $a) { + $a->outertext = $a->innertext; + } + + // Format the content with section headers and dividers + $formattedContent = ''; + + // Add header section + $formattedContent .= $header->innertext; + $formattedContent .= '
'; + + // Add characteristics section with header + $formattedContent .= '

Details

'; + $formattedContent .= $characteristics->innertext; + $formattedContent .= '
'; + + // Add description section with header + $formattedContent .= '

Description

'; + $formattedContent .= $description->innertext; + $formattedContent .= '
'; + + // Add images section with header + $formattedContent .= '

Images

'; + $formattedContent .= $images->innertext; + + return $formattedContent; + } + + /** + * Convert relative time strings like "Yesterday 12:32" to proper timestamps + * + * @param string $timeString The relative time string from the website + * @return string Timestamp in a format compatible with strtotime() + */ + private function convertRelativeTime($timeString) + { + if (strpos($timeString, 'Yesterday') !== false) { + // Replace "Yesterday" with actual date + $time = str_replace('Yesterday', date('Y-m-d', strtotime('-1 day')), $timeString); + return date('Y-m-d H:i:s', strtotime($time)); + } elseif (strpos($timeString, 'Today') !== false) { + // Replace "Today" with actual date + $time = str_replace('Today', date('Y-m-d'), $timeString); + return date('Y-m-d H:i:s', strtotime($time)); + } else { + // For other formats, return as is and let strtotime handle it + return $timeString; + } + } +}