From f134808a268065e5000ef694149f62bb0f263b16 Mon Sep 17 00:00:00 2001 From: Park0 Date: Sun, 22 Oct 2023 17:36:36 +0200 Subject: [PATCH] Marktplaats categories added (#3761) * Update MarktplaatsBridge.php * Update MarktplaatsBridge.php only main categories As the whole list is too big only main categories are used for now. * Renamed parameter 2 to sc Renamed unused method to better reflect it usage * Update MarktplaatsBridge.php Several fixed Categories completed Added a default empty one Check if the input is not empty before using Added helper methods to generate the categorylist * Update MarktplaatsBridge.php Set the methods to private for the CI --- bridges/MarktplaatsBridge.php | 143 +++++++++++++++++++++++++++++++++- 1 file changed, 139 insertions(+), 4 deletions(-) diff --git a/bridges/MarktplaatsBridge.php b/bridges/MarktplaatsBridge.php index 70a369d9..6ba993e7 100644 --- a/bridges/MarktplaatsBridge.php +++ b/bridges/MarktplaatsBridge.php @@ -14,6 +14,51 @@ class MarktplaatsBridge extends BridgeAbstract 'required' => true, 'title' => 'The search string for marktplaats', ], + 'c' => [ + 'name' => 'Category', + 'type' => 'list', + 'values' => [ + 'Select a category' => '', + 'Antiek en Kunst' => '1', + 'Audio, Tv en Foto' => '31', + 'Auto's' => '91', + 'Auto-onderdelen' => '2600', + 'Auto diversen' => '48', + 'Boeken' => '201', + 'Caravans en Kamperen' => '289', + 'Cd's en Dvd's' => '1744', + 'Computers en Software' => '322', + 'Contacten en Berichten' => '378', + 'Diensten en Vakmensen' => '1098', + 'Dieren en Toebehoren' => '395', + 'Doe-het-zelf en Verbouw' => '239', + 'Fietsen en Brommers' => '445', + 'Hobby en Vrije tijd' => '1099', + 'Huis en Inrichting' => '504', + 'Huizen en Kamers' => '1032', + 'Kinderen en Baby's' => '565', + 'Kleding | Dames' => '621', + 'Kleding | Heren' => '1776', + 'Motoren' => '678', + 'Muziek en Instrumenten' => '728', + 'Postzegels en Munten' => '1784', + 'Sieraden, Tassen en Uiterlijk' => '1826', + 'Spelcomputers en Games' => '356', + 'Sport en Fitness' => '784', + 'Telecommunicatie' => '820', + 'Tickets en Kaartjes' => '1984', + 'Tuin en Terras' => '1847', + 'Vacatures' => '167', + 'Vakantie' => '856', + 'Verzamelen' => '895', + 'Watersport en Boten' => '976', + 'Witgoed en Apparatuur' => '537', + 'Zakelijke goederen' => '1085', + 'Diversen' => '428', + ], + 'required' => false, + 'title' => 'The category to search in', + ], 'z' => [ 'name' => 'zipcode', 'type' => 'text', @@ -57,7 +102,15 @@ class MarktplaatsBridge extends BridgeAbstract 'type' => 'checkbox', 'required' => false, 'title' => 'Include the raw data behind the content', - ] + ], + 'sc' => [ + 'name' => 'Sub category', + 'type' => 'number', + 'required' => false, + 'exampleValue' => '12345', + 'title' => 'Sub category has to be given by id as the list is too big to show here. + Only use subcategories that belong to the main category. Both have to be correct', + ], ] ]; const CACHE_TIMEOUT = 900; @@ -80,6 +133,12 @@ class MarktplaatsBridge extends BridgeAbstract $excludeGlobal = true; } } + if (!empty($this->getInput('c'))) { + $query .= '&l1CategoryId=' . $this->getInput('c'); + } + if (!is_null($this->getInput('sc'))) { + $query .= '&l2CategoryId=' . $this->getInput('sc'); + } $url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query; $jsonString = getSimpleHTMLDOM($url); $jsonObj = json_decode($jsonString); @@ -97,15 +156,15 @@ class MarktplaatsBridge extends BridgeAbstract $item['enclosures'] = $listing->imageUrls; if (is_array($listing->imageUrls)) { foreach ($listing->imageUrls as $imgurl) { - $item['content'] .= "
\n"; + $item['content'] .= "
\n"; } } else { - $item['content'] .= "
\n"; + $item['content'] .= "
\n"; } } if (!is_null($this->getInput('r'))) { if ($this->getInput('r')) { - $item['content'] .= "
\n
\n
\n" . json_encode($listing); + $item['content'] .= "
\n
\n
\n" . json_encode($listing) . "
$url"; } } $item['content'] .= "
\n
\nPrice: " . $listing->priceInfo->priceCents / 100; @@ -130,4 +189,80 @@ class MarktplaatsBridge extends BridgeAbstract } return parent::getName(); } + + /** + * Method can be used to scrape the subcategories from marktplaats + */ + private static function scrapeSubCategories() + { + $main = []; + $main['Select a category'] = ''; + $marktplaatsHTML = file_get_html('https://www.marktplaats.nl'); + foreach ($marktplaatsHTML->find('select[id=categoryId] option') as $opt) { + if (!str_contains($opt->innertext, 'categorie')) { + $main[$opt->innertext] = $opt->value; + $ids[] = $opt->value; + } + } + + $result = []; + foreach ($ids as $id) { + $url = 'https://www.marktplaats.nl/lrp/api/search?l1CategoryId=' . $id; + $jsonstring = getContents($url); + $jsondata = json_decode((string)$jsonstring); + if (isset($jsondata->searchCategoryOptions)) { + $categories = $jsondata->searchCategoryOptions; + if (isset($jsondata->categoriesById->$id)) { + $maincategory = $jsondata->categoriesById->$id; + $array = []; + foreach ($categories as $categorie) { + $array[$categorie->fullName] = $categorie->id; + } + $result[$maincategory->fullName] = $array; + } + } else { + print($jsonstring); + } + } + $combinedResult = [ + 'main' => $main, + 'sub' => $result + ]; + return $combinedResult; + } + + /** + * Helper method to construct the array that could be used for categories + * + * @param $array + * @param $indent + * @return void + */ + private static function printArrayAsCode($array, $indent = 0) + { + foreach ($array as $key => $value) { + if (is_array($value)) { + echo str_repeat(' ', $indent) . "'$key' => [" . PHP_EOL; + self::printArrayAsCode($value, $indent + 1); + echo str_repeat(' ', $indent) . '],' . PHP_EOL; + } else { + $value = str_replace('\'', '\\\'', $value); + $key = str_replace('\'', '\\\'', $key); + echo str_repeat(' ', $indent) . "'$key' => '$value'," . PHP_EOL; + } + } + } + + private static function printScrapeArray() + { + $array = (MarktplaatsBridge::scrapeSubCategories()); + + echo '$myArray = [' . PHP_EOL; + self::printArrayAsCode($array['main'], 1); + echo '];' . PHP_EOL; + + echo '$myArray = [' . PHP_EOL; + self::printArrayAsCode($array['sub'], 1); + echo '];' . PHP_EOL; + } }