feat: add etag support to getContents (#3893)

This commit is contained in:
Dag 2024-01-12 01:31:01 +01:00 committed by GitHub
parent d5175aebcc
commit 191e5b0493
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 42 additions and 31 deletions

View File

@ -163,7 +163,7 @@ PHP ini config:
```ini ```ini
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini ; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
max_execution_time = 20 max_execution_time = 15
memory_limit = 64M memory_limit = 64M
``` ```

View File

@ -48,7 +48,7 @@ enable_maintenance_mode = false
[http] [http]
; Operation timeout in seconds ; Operation timeout in seconds
timeout = 30 timeout = 15
; Operation retry count in case of curl error ; Operation retry count in case of curl error
retries = 2 retries = 2

View File

@ -16,7 +16,7 @@ final class BridgeCard
$bridge = $bridgeFactory->create($bridgeClassName); $bridge = $bridgeFactory->create($bridgeClassName);
$isHttps = strpos($bridge->getURI(), 'https') === 0; $isHttps = str_starts_with($bridge->getURI(), 'https');
$uri = $bridge->getURI(); $uri = $bridge->getURI();
$name = $bridge->getName(); $name = $bridge->getName();
@ -113,8 +113,7 @@ EOD;
} }
if (!$isHttps) { if (!$isHttps) {
$form .= '<div class="secure-warning">Warning : $form .= '<div class="secure-warning">Warning: This bridge is not fetching its content through a secure connection</div>';
This bridge is not fetching its content through a secure connection</div>';
} }
return $form; return $form;

View File

@ -41,7 +41,7 @@ abstract class FeedExpander extends BridgeAbstract
} }
/** /**
* This method is overidden by bridges * This method is overridden by bridges
* *
* @return array * @return array
*/ */

View File

@ -7,9 +7,9 @@ declare(strict_types=1);
* *
* Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0. * Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
* *
* Produce arrays meant to be used inside rss-bridge. * Produces array meant to be used inside rss-bridge.
* *
* The item structure is tweaked so that works with FeedItem * The item structure is tweaked so that it works with FeedItem
*/ */
final class FeedParser final class FeedParser
{ {

View File

@ -518,7 +518,10 @@ abstract class XPathAbstract extends BridgeAbstract
if (strlen($value) === 0) { if (strlen($value) === 0) {
return ''; return '';
} }
if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) { if (
strpos($value, 'http://') === 0
|| strpos($value, 'https://') === 0
) {
return $value; return $value;
} }

View File

@ -24,6 +24,32 @@ function getContents(
$headerValue = trim(implode(':', array_slice($parts, 1))); $headerValue = trim(implode(':', array_slice($parts, 1)));
$httpHeadersNormalized[$headerName] = $headerValue; $httpHeadersNormalized[$headerName] = $headerValue;
} }
$requestBodyHash = null;
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
}
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);
/** @var Response $cachedResponse */
$cachedResponse = $cache->get($cacheKey);
if ($cachedResponse) {
$lastModified = $cachedResponse->getHeader('last-modified');
if ($lastModified) {
try {
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
$lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified);
$config['if_not_modified_since'] = $lastModified->getTimestamp();
} catch (Exception $e) {
// Failed to parse last-modified
}
}
$etag = $cachedResponse->getHeader('etag');
if ($etag) {
$httpHeadersNormalized['if-none-match'] = $etag;
}
}
// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102 // Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
$defaultHttpHeaders = [ $defaultHttpHeaders = [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
@ -35,6 +61,7 @@ function getContents(
'Sec-Fetch-User' => '?1', 'Sec-Fetch-User' => '?1',
'TE' => 'trailers', 'TE' => 'trailers',
]; ];
$config = [ $config = [
'useragent' => Configuration::getConfig('http', 'useragent'), 'useragent' => Configuration::getConfig('http', 'useragent'),
'timeout' => Configuration::getConfig('http', 'timeout'), 'timeout' => Configuration::getConfig('http', 'timeout'),
@ -53,28 +80,6 @@ function getContents(
$config['proxy'] = Configuration::getConfig('proxy', 'url'); $config['proxy'] = Configuration::getConfig('proxy', 'url');
} }
$requestBodyHash = null;
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
}
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);
/** @var Response $cachedResponse */
$cachedResponse = $cache->get($cacheKey);
if ($cachedResponse) {
$cachedLastModified = $cachedResponse->getHeader('last-modified');
if ($cachedLastModified) {
try {
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
$cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified);
$config['if_not_modified_since'] = $cachedLastModified->getTimestamp();
} catch (Exception $dateTimeParseFailue) {
// Ignore invalid 'Last-Modified' HTTP header value
}
}
// todo: We should also check for Etag
}
$response = $httpClient->request($url, $config); $response = $httpClient->request($url, $config);
switch ($response->getCode()) { switch ($response->getCode()) {

View File

@ -258,6 +258,10 @@ final class Response
} }
/** /**
* HTTP response may have multiple headers with the same name.
*
* This method by default, returns only the last header.
*
* @return string[]|string|null * @return string[]|string|null
*/ */
public function getHeader(string $name, bool $all = false) public function getHeader(string $name, bool $all = false)