mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-05 00:59:35 +00:00
feat: add etag support to getContents (#3893)
This commit is contained in:
parent
d5175aebcc
commit
191e5b0493
@ -163,7 +163,7 @@ PHP ini config:
|
|||||||
```ini
|
```ini
|
||||||
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
|
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
|
||||||
|
|
||||||
max_execution_time = 20
|
max_execution_time = 15
|
||||||
memory_limit = 64M
|
memory_limit = 64M
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ enable_maintenance_mode = false
|
|||||||
|
|
||||||
[http]
|
[http]
|
||||||
; Operation timeout in seconds
|
; Operation timeout in seconds
|
||||||
timeout = 30
|
timeout = 15
|
||||||
|
|
||||||
; Operation retry count in case of curl error
|
; Operation retry count in case of curl error
|
||||||
retries = 2
|
retries = 2
|
||||||
|
@ -16,7 +16,7 @@ final class BridgeCard
|
|||||||
|
|
||||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||||
|
|
||||||
$isHttps = strpos($bridge->getURI(), 'https') === 0;
|
$isHttps = str_starts_with($bridge->getURI(), 'https');
|
||||||
|
|
||||||
$uri = $bridge->getURI();
|
$uri = $bridge->getURI();
|
||||||
$name = $bridge->getName();
|
$name = $bridge->getName();
|
||||||
@ -113,8 +113,7 @@ EOD;
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!$isHttps) {
|
if (!$isHttps) {
|
||||||
$form .= '<div class="secure-warning">Warning :
|
$form .= '<div class="secure-warning">Warning: This bridge is not fetching its content through a secure connection</div>';
|
||||||
This bridge is not fetching its content through a secure connection</div>';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return $form;
|
return $form;
|
||||||
|
@ -41,7 +41,7 @@ abstract class FeedExpander extends BridgeAbstract
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method is overidden by bridges
|
* This method is overridden by bridges
|
||||||
*
|
*
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
|
@ -7,9 +7,9 @@ declare(strict_types=1);
|
|||||||
*
|
*
|
||||||
* Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
|
* Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
|
||||||
*
|
*
|
||||||
* Produce arrays meant to be used inside rss-bridge.
|
* Produces array meant to be used inside rss-bridge.
|
||||||
*
|
*
|
||||||
* The item structure is tweaked so that works with FeedItem
|
* The item structure is tweaked so that it works with FeedItem
|
||||||
*/
|
*/
|
||||||
final class FeedParser
|
final class FeedParser
|
||||||
{
|
{
|
||||||
|
@ -518,7 +518,10 @@ abstract class XPathAbstract extends BridgeAbstract
|
|||||||
if (strlen($value) === 0) {
|
if (strlen($value) === 0) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) {
|
if (
|
||||||
|
strpos($value, 'http://') === 0
|
||||||
|
|| strpos($value, 'https://') === 0
|
||||||
|
) {
|
||||||
return $value;
|
return $value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,6 +24,32 @@ function getContents(
|
|||||||
$headerValue = trim(implode(':', array_slice($parts, 1)));
|
$headerValue = trim(implode(':', array_slice($parts, 1)));
|
||||||
$httpHeadersNormalized[$headerName] = $headerValue;
|
$httpHeadersNormalized[$headerName] = $headerValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$requestBodyHash = null;
|
||||||
|
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
|
||||||
|
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
|
||||||
|
}
|
||||||
|
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);
|
||||||
|
|
||||||
|
/** @var Response $cachedResponse */
|
||||||
|
$cachedResponse = $cache->get($cacheKey);
|
||||||
|
if ($cachedResponse) {
|
||||||
|
$lastModified = $cachedResponse->getHeader('last-modified');
|
||||||
|
if ($lastModified) {
|
||||||
|
try {
|
||||||
|
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
|
||||||
|
$lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified);
|
||||||
|
$config['if_not_modified_since'] = $lastModified->getTimestamp();
|
||||||
|
} catch (Exception $e) {
|
||||||
|
// Failed to parse last-modified
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$etag = $cachedResponse->getHeader('etag');
|
||||||
|
if ($etag) {
|
||||||
|
$httpHeadersNormalized['if-none-match'] = $etag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
|
// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
|
||||||
$defaultHttpHeaders = [
|
$defaultHttpHeaders = [
|
||||||
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||||
@ -35,6 +61,7 @@ function getContents(
|
|||||||
'Sec-Fetch-User' => '?1',
|
'Sec-Fetch-User' => '?1',
|
||||||
'TE' => 'trailers',
|
'TE' => 'trailers',
|
||||||
];
|
];
|
||||||
|
|
||||||
$config = [
|
$config = [
|
||||||
'useragent' => Configuration::getConfig('http', 'useragent'),
|
'useragent' => Configuration::getConfig('http', 'useragent'),
|
||||||
'timeout' => Configuration::getConfig('http', 'timeout'),
|
'timeout' => Configuration::getConfig('http', 'timeout'),
|
||||||
@ -53,28 +80,6 @@ function getContents(
|
|||||||
$config['proxy'] = Configuration::getConfig('proxy', 'url');
|
$config['proxy'] = Configuration::getConfig('proxy', 'url');
|
||||||
}
|
}
|
||||||
|
|
||||||
$requestBodyHash = null;
|
|
||||||
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
|
|
||||||
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
|
|
||||||
}
|
|
||||||
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);
|
|
||||||
|
|
||||||
/** @var Response $cachedResponse */
|
|
||||||
$cachedResponse = $cache->get($cacheKey);
|
|
||||||
if ($cachedResponse) {
|
|
||||||
$cachedLastModified = $cachedResponse->getHeader('last-modified');
|
|
||||||
if ($cachedLastModified) {
|
|
||||||
try {
|
|
||||||
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
|
|
||||||
$cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified);
|
|
||||||
$config['if_not_modified_since'] = $cachedLastModified->getTimestamp();
|
|
||||||
} catch (Exception $dateTimeParseFailue) {
|
|
||||||
// Ignore invalid 'Last-Modified' HTTP header value
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// todo: We should also check for Etag
|
|
||||||
}
|
|
||||||
|
|
||||||
$response = $httpClient->request($url, $config);
|
$response = $httpClient->request($url, $config);
|
||||||
|
|
||||||
switch ($response->getCode()) {
|
switch ($response->getCode()) {
|
||||||
|
@ -258,6 +258,10 @@ final class Response
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* HTTP response may have multiple headers with the same name.
|
||||||
|
*
|
||||||
|
* This method by default, returns only the last header.
|
||||||
|
*
|
||||||
* @return string[]|string|null
|
* @return string[]|string|null
|
||||||
*/
|
*/
|
||||||
public function getHeader(string $name, bool $all = false)
|
public function getHeader(string $name, bool $all = false)
|
||||||
|
Loading…
Reference in New Issue
Block a user