From b6a9baff9494d4f7820b640848afa02492a6f865 Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 10 Oct 2023 21:41:57 +0200 Subject: [PATCH] fix(cvedetails,tldrtech) (#3735) --- bridges/CVEDetailsBridge.php | 131 ++++++++++++++++------------------- bridges/TldrTechBridge.php | 20 ++++-- lib/http.php | 1 + lib/utils.php | 2 +- 4 files changed, 75 insertions(+), 79 deletions(-) diff --git a/bridges/CVEDetailsBridge.php b/bridges/CVEDetailsBridge.php index 5334c170..27b4008c 100644 --- a/bridges/CVEDetailsBridge.php +++ b/bridges/CVEDetailsBridge.php @@ -36,12 +36,65 @@ class CVEDetailsBridge extends BridgeAbstract private $vendor = ''; private $product = ''; - // Return the URL to query. - // Because of the optional product ID, we need to attach it if it is - // set. The search result page has the exact same structure (with and - // without the product ID). - private function buildUrl() + public function collectData() { + if ($this->html == null) { + $this->fetchContent(); + } + + foreach ($this->html->find('#searchresults > .row') as $i => $tr) { + // There are some optional vulnerability types, which will be + // added to the categories as well as the CWE number -- which is + // always given. + $categories = [$this->vendor]; + $enclosures = []; + + $detailLink = $tr->find('h3 > a', 0); + $detailHtml = getSimpleHTMLDOM($detailLink->href); + + // The CVE number itself + $title = $tr->find('h3 > a', 0)->innertext; + $content = $tr->find('.cvesummarylong', 0)->innertext; + $cweList = $detailHtml->find('h2', 2)->next_sibling(); + foreach ($cweList->find('li') as $li) { + $cweWithDescription = $li->find('a', 0)->innertext; + + if (preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe)) { + $categories[] = 'CWE-' . $cwe[1]; + $enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html'; + } + } + + if ($this->product != '') { + $categories[] = $this->product; + } + + $this->items[] = [ + 'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href, + 'title' => $title, + 'timestamp' => $tr->find('[data-tsvfield="publishDate"]', 0)->innertext, + 'content' => $content, + 'categories' => $categories, + 'enclosures' => $enclosures, + 'uid' => $title, + ]; + + // We only want to fetch the latest 10 CVEs + if (count($this->items) >= 10) { + break; + } + } + } + + // Make the actual request to cvedetails.com and stores the response + // (HTML) for later use and extract vendor and product from it. + private function fetchContent() + { + // build url + // Return the URL to query. + // Because of the optional product ID, we need to attach it if it is + // set. The search result page has the exact same structure (with and + // without the product ID). $url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id'); if ($this->getInput('product_id') !== '') { $url .= '/product_id-' . $this->getInput('product_id'); @@ -51,22 +104,12 @@ class CVEDetailsBridge extends BridgeAbstract // number, which should be mostly accurate. $url .= '?order=1'; // Order by CVE number DESC - return $url; - } - - // Make the actual request to cvedetails.com and stores the response - // (HTML) for later use and extract vendor and product from it. - private function fetchContent() - { - $html = getSimpleHTMLDOM($this->buildUrl()); + $html = getSimpleHTMLDOM($url); $this->html = defaultLinkTo($html, self::URI); $vendor = $html->find('#contentdiv h1 > a', 0); if ($vendor == null) { - returnServerError('Invalid Vendor ID ' . - $this->getInput('vendor_id') . - ' or Product ID ' . - $this->getInput('product_id')); + returnServerError('Invalid Vendor ID ' . $this->getInput('vendor_id') . ' or Product ID ' . $this->getInput('product_id')); } $this->vendor = $vendor->innertext; @@ -76,7 +119,6 @@ class CVEDetailsBridge extends BridgeAbstract } } - // Build the name of the feed. public function getName() { if ($this->getInput('vendor_id') == '') { @@ -94,57 +136,4 @@ class CVEDetailsBridge extends BridgeAbstract return $name; } - - // Pull the data from the HTML response and fill the items.. - public function collectData() - { - if ($this->html == null) { - $this->fetchContent(); - } - - foreach ($this->html->find('#searchresults > .row') as $i => $tr) { - // There are some optional vulnerability types, which will be - // added to the categories as well as the CWE number -- which is - // always given. - $categories = [$this->vendor]; - $enclosures = []; - - $detailLink = $tr->find('.cveheader > h3 > a', 0); - $detailHtml = getSimpleHTMLDOM($detailLink->href); - - $div = $detailHtml->find('.cvedetailssummary', 0); - - // The CVE number itself - $title = $div->find('h1 > a', 0)->innertext; - $content = $div->find('.ssc-paragraph', 0)->innertext; - $cweList = $detailHtml->find('h2', 2)->next_sibling(); - foreach ($cweList->find('li') as $li) { - $cweWithDescription = $li->find('a', 0)->innertext; - preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe); - if (count($cwe) > 1) { - $categories[] = 'CWE-' . $cwe[1]; - $enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html'; - } - } - - if ($this->product != '') { - $categories[] = $this->product; - } - - $this->items[] = [ - 'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href, - 'title' => $title, - 'timestamp' => $tr->find('td', 5)->innertext, - 'content' => $content, - 'categories' => $categories, - 'enclosures' => $enclosures, - 'uid' => $title, - ]; - - // We only want to fetch the latest 10 CVEs - if (count($this->items) >= 10) { - break; - } - } - } } diff --git a/bridges/TldrTechBridge.php b/bridges/TldrTechBridge.php index b89686bb..984117b2 100644 --- a/bridges/TldrTechBridge.php +++ b/bridges/TldrTechBridge.php @@ -35,7 +35,10 @@ class TldrTechBridge extends BridgeAbstract public function collectData() { - $html = getSimpleHTMLDOM(self::URI . $this->getInput('topic') . '/archives'); + $topic = $this->getInput('topic'); + $limit = $this->getInput('limit'); + $url = self::URI . $topic . '/archives'; + $html = getSimpleHTMLDOM($url); $entries_root = $html->find('div.content-center.mt-5', 0); $added = 0; foreach ($entries_root->children() as $child) { @@ -46,22 +49,25 @@ class TldrTechBridge extends BridgeAbstract $date_items = explode('/', $child->href); $date = strtotime(end($date_items)); $this->items[] = [ - 'uri' => self::URI . $child->href, - 'title' => $child->plaintext, + 'uri' => self::URI . $child->href, + 'title' => $child->plaintext, 'timestamp' => $date, - 'content' => $this->parseEntry(self::URI . $child->href) + 'content' => $this->extractContent(self::URI . $child->href), ]; $added++; - if ($added >= $this->getInput('limit')) { + if ($added >= $limit) { break; } } } - private function parseEntry($uri) + private function extractContent($url) { - $html = getSimpleHTMLDOM($uri); + $html = getSimpleHTMLDOM($url); $content = $html->find('div.content-center.mt-5', 0); + if (!$content) { + return ''; + } $subscribe_form = $content->find('div.mt-5 > div > form', 0); if ($subscribe_form) { $content->removeChild($subscribe_form->parent->parent); diff --git a/lib/http.php b/lib/http.php index 3d65b2d1..c5c57d05 100644 --- a/lib/http.php +++ b/lib/http.php @@ -13,6 +13,7 @@ final class CloudFlareException extends HttpException 'Please Wait...', '<title>Attention Required!', '<title>Security | Glassdoor', + '<title>Access denied', // cf as seen on patreon.com ]; foreach ($cloudflareTitles as $cloudflareTitle) { if (str_contains($response->getBody(), $cloudflareTitle)) { diff --git a/lib/utils.php b/lib/utils.php index 4c58d258..e8f00f54 100644 --- a/lib/utils.php +++ b/lib/utils.php @@ -140,7 +140,7 @@ function _sanitize_path_name(string $s, string $pathName): string } /** - * This is buggy because strip tags removes a lot that isn't html + * This is buggy because strip_tags() removes a lot that isn't html */ function is_html(string $text): bool {