From cb3c055df9cb3ad7092f5c943212f7c642ab4a04 Mon Sep 17 00:00:00 2001 From: Joseph Date: Sun, 15 Aug 2021 18:36:38 +0000 Subject: [PATCH] [InternetArchiveBridge] Add detectParameters (#2142) --- bridges/InternetArchiveBridge.php | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/bridges/InternetArchiveBridge.php b/bridges/InternetArchiveBridge.php index bf4323b2..9a257678 100644 --- a/bridges/InternetArchiveBridge.php +++ b/bridges/InternetArchiveBridge.php @@ -29,11 +29,40 @@ class InternetArchiveBridge extends BridgeAbstract { const CACHE_TIMEOUT = 900; // 15 mins + const TEST_DETECT_PARAMETERS = array( + 'https://archive.org/details/@verifiedjoseph' => array( + 'context' => 'Account', 'username' => 'verifiedjoseph', 'content' => 'uploads' + ), + 'https://archive.org/details/@verifiedjoseph?tab=collections' => array( + 'context' => 'Account', 'username' => 'verifiedjoseph', 'content' => 'collections' + ), + ); + private $skipClasses = array( 'item-ia mobile-header hidden-tiles', 'item-ia account-ia' ); + private $detectParamsRegex = '/https?:\/\/archive\.org\/details\/@([\w]+)(?:\?tab=([a-z-]+))?/'; + + public function detectParameters($url) { + $params = array(); + + if(preg_match($this->detectParamsRegex, $url, $matches) > 0) { + $params['context'] = 'Account'; + $params['username'] = $matches[1]; + $params['content'] = 'uploads'; + + if (isset($matches[2])) { + $params['content'] = $matches[2]; + } + + return $params; + } + + return null; + } + public function collectData() { $html = getSimpleHTMLDOM($this->getURI())