fix(cvedetails,tldrtech) (#3735)

This commit is contained in:
Dag 2023-10-10 21:41:57 +02:00 committed by GitHub
parent 143f90da60
commit b6a9baff94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 75 additions and 79 deletions

View File

@ -36,12 +36,65 @@ class CVEDetailsBridge extends BridgeAbstract
private $vendor = ''; private $vendor = '';
private $product = ''; private $product = '';
// Return the URL to query. public function collectData()
// Because of the optional product ID, we need to attach it if it is
// set. The search result page has the exact same structure (with and
// without the product ID).
private function buildUrl()
{ {
if ($this->html == null) {
$this->fetchContent();
}
foreach ($this->html->find('#searchresults > .row') as $i => $tr) {
// There are some optional vulnerability types, which will be
// added to the categories as well as the CWE number -- which is
// always given.
$categories = [$this->vendor];
$enclosures = [];
$detailLink = $tr->find('h3 > a', 0);
$detailHtml = getSimpleHTMLDOM($detailLink->href);
// The CVE number itself
$title = $tr->find('h3 > a', 0)->innertext;
$content = $tr->find('.cvesummarylong', 0)->innertext;
$cweList = $detailHtml->find('h2', 2)->next_sibling();
foreach ($cweList->find('li') as $li) {
$cweWithDescription = $li->find('a', 0)->innertext;
if (preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe)) {
$categories[] = 'CWE-' . $cwe[1];
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html';
}
}
if ($this->product != '') {
$categories[] = $this->product;
}
$this->items[] = [
'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href,
'title' => $title,
'timestamp' => $tr->find('[data-tsvfield="publishDate"]', 0)->innertext,
'content' => $content,
'categories' => $categories,
'enclosures' => $enclosures,
'uid' => $title,
];
// We only want to fetch the latest 10 CVEs
if (count($this->items) >= 10) {
break;
}
}
}
// Make the actual request to cvedetails.com and stores the response
// (HTML) for later use and extract vendor and product from it.
private function fetchContent()
{
// build url
// Return the URL to query.
// Because of the optional product ID, we need to attach it if it is
// set. The search result page has the exact same structure (with and
// without the product ID).
$url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id'); $url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id');
if ($this->getInput('product_id') !== '') { if ($this->getInput('product_id') !== '') {
$url .= '/product_id-' . $this->getInput('product_id'); $url .= '/product_id-' . $this->getInput('product_id');
@ -51,22 +104,12 @@ class CVEDetailsBridge extends BridgeAbstract
// number, which should be mostly accurate. // number, which should be mostly accurate.
$url .= '?order=1'; // Order by CVE number DESC $url .= '?order=1'; // Order by CVE number DESC
return $url; $html = getSimpleHTMLDOM($url);
}
// Make the actual request to cvedetails.com and stores the response
// (HTML) for later use and extract vendor and product from it.
private function fetchContent()
{
$html = getSimpleHTMLDOM($this->buildUrl());
$this->html = defaultLinkTo($html, self::URI); $this->html = defaultLinkTo($html, self::URI);
$vendor = $html->find('#contentdiv h1 > a', 0); $vendor = $html->find('#contentdiv h1 > a', 0);
if ($vendor == null) { if ($vendor == null) {
returnServerError('Invalid Vendor ID ' . returnServerError('Invalid Vendor ID ' . $this->getInput('vendor_id') . ' or Product ID ' . $this->getInput('product_id'));
$this->getInput('vendor_id') .
' or Product ID ' .
$this->getInput('product_id'));
} }
$this->vendor = $vendor->innertext; $this->vendor = $vendor->innertext;
@ -76,7 +119,6 @@ class CVEDetailsBridge extends BridgeAbstract
} }
} }
// Build the name of the feed.
public function getName() public function getName()
{ {
if ($this->getInput('vendor_id') == '') { if ($this->getInput('vendor_id') == '') {
@ -94,57 +136,4 @@ class CVEDetailsBridge extends BridgeAbstract
return $name; return $name;
} }
// Pull the data from the HTML response and fill the items..
public function collectData()
{
if ($this->html == null) {
$this->fetchContent();
}
foreach ($this->html->find('#searchresults > .row') as $i => $tr) {
// There are some optional vulnerability types, which will be
// added to the categories as well as the CWE number -- which is
// always given.
$categories = [$this->vendor];
$enclosures = [];
$detailLink = $tr->find('.cveheader > h3 > a', 0);
$detailHtml = getSimpleHTMLDOM($detailLink->href);
$div = $detailHtml->find('.cvedetailssummary', 0);
// The CVE number itself
$title = $div->find('h1 > a', 0)->innertext;
$content = $div->find('.ssc-paragraph', 0)->innertext;
$cweList = $detailHtml->find('h2', 2)->next_sibling();
foreach ($cweList->find('li') as $li) {
$cweWithDescription = $li->find('a', 0)->innertext;
preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe);
if (count($cwe) > 1) {
$categories[] = 'CWE-' . $cwe[1];
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html';
}
}
if ($this->product != '') {
$categories[] = $this->product;
}
$this->items[] = [
'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href,
'title' => $title,
'timestamp' => $tr->find('td', 5)->innertext,
'content' => $content,
'categories' => $categories,
'enclosures' => $enclosures,
'uid' => $title,
];
// We only want to fetch the latest 10 CVEs
if (count($this->items) >= 10) {
break;
}
}
}
} }

View File

@ -35,7 +35,10 @@ class TldrTechBridge extends BridgeAbstract
public function collectData() public function collectData()
{ {
$html = getSimpleHTMLDOM(self::URI . $this->getInput('topic') . '/archives'); $topic = $this->getInput('topic');
$limit = $this->getInput('limit');
$url = self::URI . $topic . '/archives';
$html = getSimpleHTMLDOM($url);
$entries_root = $html->find('div.content-center.mt-5', 0); $entries_root = $html->find('div.content-center.mt-5', 0);
$added = 0; $added = 0;
foreach ($entries_root->children() as $child) { foreach ($entries_root->children() as $child) {
@ -46,22 +49,25 @@ class TldrTechBridge extends BridgeAbstract
$date_items = explode('/', $child->href); $date_items = explode('/', $child->href);
$date = strtotime(end($date_items)); $date = strtotime(end($date_items));
$this->items[] = [ $this->items[] = [
'uri' => self::URI . $child->href, 'uri' => self::URI . $child->href,
'title' => $child->plaintext, 'title' => $child->plaintext,
'timestamp' => $date, 'timestamp' => $date,
'content' => $this->parseEntry(self::URI . $child->href) 'content' => $this->extractContent(self::URI . $child->href),
]; ];
$added++; $added++;
if ($added >= $this->getInput('limit')) { if ($added >= $limit) {
break; break;
} }
} }
} }
private function parseEntry($uri) private function extractContent($url)
{ {
$html = getSimpleHTMLDOM($uri); $html = getSimpleHTMLDOM($url);
$content = $html->find('div.content-center.mt-5', 0); $content = $html->find('div.content-center.mt-5', 0);
if (!$content) {
return '';
}
$subscribe_form = $content->find('div.mt-5 > div > form', 0); $subscribe_form = $content->find('div.mt-5 > div > form', 0);
if ($subscribe_form) { if ($subscribe_form) {
$content->removeChild($subscribe_form->parent->parent); $content->removeChild($subscribe_form->parent->parent);

View File

@ -13,6 +13,7 @@ final class CloudFlareException extends HttpException
'<title>Please Wait...', '<title>Please Wait...',
'<title>Attention Required!', '<title>Attention Required!',
'<title>Security | Glassdoor', '<title>Security | Glassdoor',
'<title>Access denied</title>', // cf as seen on patreon.com
]; ];
foreach ($cloudflareTitles as $cloudflareTitle) { foreach ($cloudflareTitles as $cloudflareTitle) {
if (str_contains($response->getBody(), $cloudflareTitle)) { if (str_contains($response->getBody(), $cloudflareTitle)) {

View File

@ -140,7 +140,7 @@ function _sanitize_path_name(string $s, string $pathName): string
} }
/** /**
* This is buggy because strip tags removes a lot that isn't html * This is buggy because strip_tags() removes a lot that isn't html
*/ */
function is_html(string $text): bool function is_html(string $text): bool
{ {