mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-12 01:48:48 +00:00
refactor: extract CurlHttpClient (#3532)
* refactor: extract CurlHttpClient * refactor * interface
This commit is contained in:
parent
7b46b97abd
commit
a59793e8d6
@ -92,7 +92,12 @@ class AO3Bridge extends BridgeAbstract
|
|||||||
private function collectWork($id)
|
private function collectWork($id)
|
||||||
{
|
{
|
||||||
$url = self::URI . "/works/$id/navigate";
|
$url = self::URI . "/works/$id/navigate";
|
||||||
$response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']);
|
$httpClient = RssBridge::getHttpClient();
|
||||||
|
|
||||||
|
$response = $httpClient->request($url, [
|
||||||
|
'useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)',
|
||||||
|
]);
|
||||||
|
|
||||||
$html = \str_get_html($response['body']);
|
$html = \str_get_html($response['body']);
|
||||||
$html = defaultLinkTo($html, self::URI);
|
$html = defaultLinkTo($html, self::URI);
|
||||||
|
|
||||||
|
@ -14,7 +14,8 @@ while ($next) { /* Collect all contributors */
|
|||||||
'Content-Type' => 'application/json',
|
'Content-Type' => 'application/json',
|
||||||
'User-Agent' => 'RSS-Bridge',
|
'User-Agent' => 'RSS-Bridge',
|
||||||
];
|
];
|
||||||
$result = _http_request($url, ['headers' => $headers]);
|
$httpClient = new CurlHttpClient();
|
||||||
|
$result = $httpClient->request($url, ['headers' => $headers]);
|
||||||
|
|
||||||
foreach (json_decode($result['body']) as $contributor) {
|
foreach (json_decode($result['body']) as $contributor) {
|
||||||
$contributors[] = $contributor;
|
$contributors[] = $contributor;
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
final class RssBridge
|
final class RssBridge
|
||||||
{
|
{
|
||||||
|
private static HttpClient $httpClient;
|
||||||
private static CacheInterface $cache;
|
private static CacheInterface $cache;
|
||||||
|
|
||||||
public function main(array $argv = [])
|
public function main(array $argv = [])
|
||||||
@ -71,9 +72,10 @@ final class RssBridge
|
|||||||
// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED);
|
// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED);
|
||||||
date_default_timezone_set(Configuration::getConfig('system', 'timezone'));
|
date_default_timezone_set(Configuration::getConfig('system', 'timezone'));
|
||||||
|
|
||||||
// Create cache
|
|
||||||
$cacheFactory = new CacheFactory();
|
$cacheFactory = new CacheFactory();
|
||||||
self::setCache($cacheFactory->create());
|
|
||||||
|
self::$httpClient = new CurlHttpClient();
|
||||||
|
self::$cache = $cacheFactory->create();
|
||||||
|
|
||||||
if (Configuration::getConfig('authentication', 'enable')) {
|
if (Configuration::getConfig('authentication', 'enable')) {
|
||||||
$authenticationMiddleware = new AuthenticationMiddleware();
|
$authenticationMiddleware = new AuthenticationMiddleware();
|
||||||
@ -105,13 +107,13 @@ final class RssBridge
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function getHttpClient(): HttpClient
|
||||||
|
{
|
||||||
|
return self::$httpClient;
|
||||||
|
}
|
||||||
|
|
||||||
public static function getCache(): CacheInterface
|
public static function getCache(): CacheInterface
|
||||||
{
|
{
|
||||||
return self::$cache;
|
return self::$cache;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function setCache(CacheInterface $cache): void
|
|
||||||
{
|
|
||||||
self::$cache = $cache;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
257
lib/contents.php
257
lib/contents.php
@ -99,6 +99,7 @@ function getContents(
|
|||||||
array $curlOptions = [],
|
array $curlOptions = [],
|
||||||
bool $returnFull = false
|
bool $returnFull = false
|
||||||
) {
|
) {
|
||||||
|
$httpClient = RssBridge::getHttpClient();
|
||||||
$cache = RssBridge::getCache();
|
$cache = RssBridge::getCache();
|
||||||
$cache->setScope('server');
|
$cache->setScope('server');
|
||||||
$cache->setKey([$url]);
|
$cache->setKey([$url]);
|
||||||
@ -141,20 +142,14 @@ function getContents(
|
|||||||
$config['if_not_modified_since'] = $cache->getTime();
|
$config['if_not_modified_since'] = $cache->getTime();
|
||||||
}
|
}
|
||||||
|
|
||||||
$result = _http_request($url, $config);
|
$response = $httpClient->request($url, $config);
|
||||||
$response = [
|
|
||||||
'code' => $result['code'],
|
|
||||||
'status_lines' => $result['status_lines'],
|
|
||||||
'header' => $result['headers'],
|
|
||||||
'content' => $result['body'],
|
|
||||||
];
|
|
||||||
|
|
||||||
switch ($result['code']) {
|
switch ($response['code']) {
|
||||||
case 200:
|
case 200:
|
||||||
case 201:
|
case 201:
|
||||||
case 202:
|
case 202:
|
||||||
if (isset($result['headers']['cache-control'])) {
|
if (isset($response['headers']['cache-control'])) {
|
||||||
$cachecontrol = $result['headers']['cache-control'];
|
$cachecontrol = $response['headers']['cache-control'];
|
||||||
$lastValue = array_pop($cachecontrol);
|
$lastValue = array_pop($cachecontrol);
|
||||||
$directives = explode(',', $lastValue);
|
$directives = explode(',', $lastValue);
|
||||||
$directives = array_map('trim', $directives);
|
$directives = array_map('trim', $directives);
|
||||||
@ -163,7 +158,7 @@ function getContents(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$cache->saveData($result['body']);
|
$cache->saveData($response['body']);
|
||||||
break;
|
break;
|
||||||
case 301:
|
case 301:
|
||||||
case 302:
|
case 302:
|
||||||
@ -172,16 +167,16 @@ function getContents(
|
|||||||
break;
|
break;
|
||||||
case 304:
|
case 304:
|
||||||
// Not Modified
|
// Not Modified
|
||||||
$response['content'] = $cache->loadData();
|
$response['body'] = $cache->loadData();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
$exceptionMessage = sprintf(
|
$exceptionMessage = sprintf(
|
||||||
'%s resulted in %s %s %s',
|
'%s resulted in %s %s %s',
|
||||||
$url,
|
$url,
|
||||||
$result['code'],
|
$response['code'],
|
||||||
Response::STATUS_CODES[$result['code']] ?? '',
|
Response::STATUS_CODES[$response['code']] ?? '',
|
||||||
// If debug, include a part of the response body in the exception message
|
// If debug, include a part of the response body in the exception message
|
||||||
Debug::isEnabled() ? mb_substr($result['body'], 0, 500) : '',
|
Debug::isEnabled() ? mb_substr($response['body'], 0, 500) : '',
|
||||||
);
|
);
|
||||||
|
|
||||||
// The following code must be extracted if it grows too much
|
// The following code must be extracted if it grows too much
|
||||||
@ -192,137 +187,141 @@ function getContents(
|
|||||||
'<title>Security | Glassdoor',
|
'<title>Security | Glassdoor',
|
||||||
];
|
];
|
||||||
foreach ($cloudflareTitles as $cloudflareTitle) {
|
foreach ($cloudflareTitles as $cloudflareTitle) {
|
||||||
if (str_contains($result['body'], $cloudflareTitle)) {
|
if (str_contains($response['body'], $cloudflareTitle)) {
|
||||||
throw new CloudFlareException($exceptionMessage, $result['code']);
|
throw new CloudFlareException($exceptionMessage, $response['code']);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new HttpException(trim($exceptionMessage), $result['code']);
|
throw new HttpException(trim($exceptionMessage), $result['code']);
|
||||||
}
|
}
|
||||||
if ($returnFull === true) {
|
if ($returnFull === true) {
|
||||||
|
// For legacy reasons, use content instead of body
|
||||||
|
$response['content'] = $response['body'];
|
||||||
|
unset($response['body']);
|
||||||
return $response;
|
return $response;
|
||||||
}
|
}
|
||||||
return $response['content'];
|
return $response['body'];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
interface HttpClient
|
||||||
* Fetch content from url
|
|
||||||
*
|
|
||||||
* @internal Private function used internally
|
|
||||||
* @throws HttpException
|
|
||||||
*/
|
|
||||||
function _http_request(string $url, array $config = []): array
|
|
||||||
{
|
{
|
||||||
$defaults = [
|
public function request(string $url, array $config = []): array;
|
||||||
'useragent' => null,
|
}
|
||||||
'timeout' => 5,
|
|
||||||
'headers' => [],
|
|
||||||
'proxy' => null,
|
|
||||||
'curl_options' => [],
|
|
||||||
'if_not_modified_since' => null,
|
|
||||||
'retries' => 3,
|
|
||||||
'max_filesize' => null,
|
|
||||||
'max_redirections' => 5,
|
|
||||||
];
|
|
||||||
$config = array_merge($defaults, $config);
|
|
||||||
|
|
||||||
$ch = curl_init($url);
|
final class CurlHttpClient implements HttpClient
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
{
|
||||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
public function request(string $url, array $config = []): array
|
||||||
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
|
{
|
||||||
curl_setopt($ch, CURLOPT_HEADER, false);
|
$defaults = [
|
||||||
$httpHeaders = [];
|
'useragent' => null,
|
||||||
foreach ($config['headers'] as $name => $value) {
|
'timeout' => 5,
|
||||||
$httpHeaders[] = sprintf('%s: %s', $name, $value);
|
'headers' => [],
|
||||||
}
|
'proxy' => null,
|
||||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders);
|
'curl_options' => [],
|
||||||
if ($config['useragent']) {
|
'if_not_modified_since' => null,
|
||||||
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
|
'retries' => 3,
|
||||||
}
|
'max_filesize' => null,
|
||||||
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
|
'max_redirections' => 5,
|
||||||
curl_setopt($ch, CURLOPT_ENCODING, '');
|
];
|
||||||
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
$config = array_merge($defaults, $config);
|
||||||
|
|
||||||
if ($config['max_filesize']) {
|
$ch = curl_init($url);
|
||||||
// This option inspects the Content-Length header
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']);
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||||
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
|
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
|
||||||
// This progress function will monitor responses who omit the Content-Length header
|
curl_setopt($ch, CURLOPT_HEADER, false);
|
||||||
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) {
|
$httpHeaders = [];
|
||||||
if ($downloaded > $config['max_filesize']) {
|
foreach ($config['headers'] as $name => $value) {
|
||||||
// Return a non-zero value to abort the transfer
|
$httpHeaders[] = sprintf('%s: %s', $name, $value);
|
||||||
return -1;
|
}
|
||||||
|
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders);
|
||||||
|
if ($config['useragent']) {
|
||||||
|
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
|
||||||
|
}
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
|
||||||
|
curl_setopt($ch, CURLOPT_ENCODING, '');
|
||||||
|
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
||||||
|
|
||||||
|
if ($config['max_filesize']) {
|
||||||
|
// This option inspects the Content-Length header
|
||||||
|
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']);
|
||||||
|
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
|
||||||
|
// This progress function will monitor responses who omit the Content-Length header
|
||||||
|
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) {
|
||||||
|
if ($downloaded > $config['max_filesize']) {
|
||||||
|
// Return a non-zero value to abort the transfer
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($config['proxy']) {
|
||||||
|
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
|
||||||
|
}
|
||||||
|
if (curl_setopt_array($ch, $config['curl_options']) === false) {
|
||||||
|
throw new \Exception('Tried to set an illegal curl option');
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($config['if_not_modified_since']) {
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
|
||||||
|
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
|
||||||
|
}
|
||||||
|
|
||||||
|
$responseStatusLines = [];
|
||||||
|
$responseHeaders = [];
|
||||||
|
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
|
||||||
|
$len = strlen($rawHeader);
|
||||||
|
if ($rawHeader === "\r\n") {
|
||||||
|
return $len;
|
||||||
}
|
}
|
||||||
return 0;
|
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
|
||||||
|
$responseStatusLines[] = $rawHeader;
|
||||||
|
return $len;
|
||||||
|
}
|
||||||
|
$header = explode(':', $rawHeader);
|
||||||
|
if (count($header) === 1) {
|
||||||
|
return $len;
|
||||||
|
}
|
||||||
|
$name = mb_strtolower(trim($header[0]));
|
||||||
|
$value = trim(implode(':', array_slice($header, 1)));
|
||||||
|
if (!isset($responseHeaders[$name])) {
|
||||||
|
$responseHeaders[$name] = [];
|
||||||
|
}
|
||||||
|
$responseHeaders[$name][] = $value;
|
||||||
|
return $len;
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
if ($config['proxy']) {
|
$attempts = 0;
|
||||||
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
|
while (true) {
|
||||||
}
|
$attempts++;
|
||||||
if (curl_setopt_array($ch, $config['curl_options']) === false) {
|
$data = curl_exec($ch);
|
||||||
throw new \Exception('Tried to set an illegal curl option');
|
if ($data !== false) {
|
||||||
}
|
// The network call was successful, so break out of the loop
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ($attempts > $config['retries']) {
|
||||||
|
// Finally give up
|
||||||
|
$curl_error = curl_error($ch);
|
||||||
|
$curl_errno = curl_errno($ch);
|
||||||
|
throw new HttpException(sprintf(
|
||||||
|
'cURL error %s: %s (%s) for %s',
|
||||||
|
$curl_error,
|
||||||
|
$curl_errno,
|
||||||
|
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
|
||||||
|
$url
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ($config['if_not_modified_since']) {
|
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||||
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
|
curl_close($ch);
|
||||||
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
|
return [
|
||||||
|
'code' => $statusCode,
|
||||||
|
'status_lines' => $responseStatusLines,
|
||||||
|
'headers' => $responseHeaders,
|
||||||
|
'body' => $data,
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
$responseStatusLines = [];
|
|
||||||
$responseHeaders = [];
|
|
||||||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
|
|
||||||
$len = strlen($rawHeader);
|
|
||||||
if ($rawHeader === "\r\n") {
|
|
||||||
return $len;
|
|
||||||
}
|
|
||||||
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
|
|
||||||
$responseStatusLines[] = $rawHeader;
|
|
||||||
return $len;
|
|
||||||
}
|
|
||||||
$header = explode(':', $rawHeader);
|
|
||||||
if (count($header) === 1) {
|
|
||||||
return $len;
|
|
||||||
}
|
|
||||||
$name = mb_strtolower(trim($header[0]));
|
|
||||||
$value = trim(implode(':', array_slice($header, 1)));
|
|
||||||
if (!isset($responseHeaders[$name])) {
|
|
||||||
$responseHeaders[$name] = [];
|
|
||||||
}
|
|
||||||
$responseHeaders[$name][] = $value;
|
|
||||||
return $len;
|
|
||||||
});
|
|
||||||
|
|
||||||
$attempts = 0;
|
|
||||||
while (true) {
|
|
||||||
$attempts++;
|
|
||||||
$data = curl_exec($ch);
|
|
||||||
if ($data !== false) {
|
|
||||||
// The network call was successful, so break out of the loop
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if ($attempts > $config['retries']) {
|
|
||||||
// Finally give up
|
|
||||||
$curl_error = curl_error($ch);
|
|
||||||
$curl_errno = curl_errno($ch);
|
|
||||||
throw new HttpException(sprintf(
|
|
||||||
'cURL error %s: %s (%s) for %s',
|
|
||||||
$curl_error,
|
|
||||||
$curl_errno,
|
|
||||||
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
|
|
||||||
$url
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
||||||
curl_close($ch);
|
|
||||||
return [
|
|
||||||
'code' => $statusCode,
|
|
||||||
'status_lines' => $responseStatusLines,
|
|
||||||
'headers' => $responseHeaders,
|
|
||||||
'body' => $data,
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user