mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-04-04 16:49:35 +00:00
refactor: extract CurlHttpClient (#3532)
* refactor: extract CurlHttpClient * refactor * interface
This commit is contained in:
parent
7b46b97abd
commit
a59793e8d6
@ -92,7 +92,12 @@ class AO3Bridge extends BridgeAbstract
|
||||
private function collectWork($id)
|
||||
{
|
||||
$url = self::URI . "/works/$id/navigate";
|
||||
$response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']);
|
||||
$httpClient = RssBridge::getHttpClient();
|
||||
|
||||
$response = $httpClient->request($url, [
|
||||
'useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)',
|
||||
]);
|
||||
|
||||
$html = \str_get_html($response['body']);
|
||||
$html = defaultLinkTo($html, self::URI);
|
||||
|
||||
|
@ -14,7 +14,8 @@ while ($next) { /* Collect all contributors */
|
||||
'Content-Type' => 'application/json',
|
||||
'User-Agent' => 'RSS-Bridge',
|
||||
];
|
||||
$result = _http_request($url, ['headers' => $headers]);
|
||||
$httpClient = new CurlHttpClient();
|
||||
$result = $httpClient->request($url, ['headers' => $headers]);
|
||||
|
||||
foreach (json_decode($result['body']) as $contributor) {
|
||||
$contributors[] = $contributor;
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
final class RssBridge
|
||||
{
|
||||
private static HttpClient $httpClient;
|
||||
private static CacheInterface $cache;
|
||||
|
||||
public function main(array $argv = [])
|
||||
@ -71,9 +72,10 @@ final class RssBridge
|
||||
// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED);
|
||||
date_default_timezone_set(Configuration::getConfig('system', 'timezone'));
|
||||
|
||||
// Create cache
|
||||
$cacheFactory = new CacheFactory();
|
||||
self::setCache($cacheFactory->create());
|
||||
|
||||
self::$httpClient = new CurlHttpClient();
|
||||
self::$cache = $cacheFactory->create();
|
||||
|
||||
if (Configuration::getConfig('authentication', 'enable')) {
|
||||
$authenticationMiddleware = new AuthenticationMiddleware();
|
||||
@ -105,13 +107,13 @@ final class RssBridge
|
||||
}
|
||||
}
|
||||
|
||||
public static function getHttpClient(): HttpClient
|
||||
{
|
||||
return self::$httpClient;
|
||||
}
|
||||
|
||||
public static function getCache(): CacheInterface
|
||||
{
|
||||
return self::$cache;
|
||||
}
|
||||
|
||||
public static function setCache(CacheInterface $cache): void
|
||||
{
|
||||
self::$cache = $cache;
|
||||
}
|
||||
}
|
||||
|
257
lib/contents.php
257
lib/contents.php
@ -99,6 +99,7 @@ function getContents(
|
||||
array $curlOptions = [],
|
||||
bool $returnFull = false
|
||||
) {
|
||||
$httpClient = RssBridge::getHttpClient();
|
||||
$cache = RssBridge::getCache();
|
||||
$cache->setScope('server');
|
||||
$cache->setKey([$url]);
|
||||
@ -141,20 +142,14 @@ function getContents(
|
||||
$config['if_not_modified_since'] = $cache->getTime();
|
||||
}
|
||||
|
||||
$result = _http_request($url, $config);
|
||||
$response = [
|
||||
'code' => $result['code'],
|
||||
'status_lines' => $result['status_lines'],
|
||||
'header' => $result['headers'],
|
||||
'content' => $result['body'],
|
||||
];
|
||||
$response = $httpClient->request($url, $config);
|
||||
|
||||
switch ($result['code']) {
|
||||
switch ($response['code']) {
|
||||
case 200:
|
||||
case 201:
|
||||
case 202:
|
||||
if (isset($result['headers']['cache-control'])) {
|
||||
$cachecontrol = $result['headers']['cache-control'];
|
||||
if (isset($response['headers']['cache-control'])) {
|
||||
$cachecontrol = $response['headers']['cache-control'];
|
||||
$lastValue = array_pop($cachecontrol);
|
||||
$directives = explode(',', $lastValue);
|
||||
$directives = array_map('trim', $directives);
|
||||
@ -163,7 +158,7 @@ function getContents(
|
||||
break;
|
||||
}
|
||||
}
|
||||
$cache->saveData($result['body']);
|
||||
$cache->saveData($response['body']);
|
||||
break;
|
||||
case 301:
|
||||
case 302:
|
||||
@ -172,16 +167,16 @@ function getContents(
|
||||
break;
|
||||
case 304:
|
||||
// Not Modified
|
||||
$response['content'] = $cache->loadData();
|
||||
$response['body'] = $cache->loadData();
|
||||
break;
|
||||
default:
|
||||
$exceptionMessage = sprintf(
|
||||
'%s resulted in %s %s %s',
|
||||
$url,
|
||||
$result['code'],
|
||||
Response::STATUS_CODES[$result['code']] ?? '',
|
||||
$response['code'],
|
||||
Response::STATUS_CODES[$response['code']] ?? '',
|
||||
// If debug, include a part of the response body in the exception message
|
||||
Debug::isEnabled() ? mb_substr($result['body'], 0, 500) : '',
|
||||
Debug::isEnabled() ? mb_substr($response['body'], 0, 500) : '',
|
||||
);
|
||||
|
||||
// The following code must be extracted if it grows too much
|
||||
@ -192,137 +187,141 @@ function getContents(
|
||||
'<title>Security | Glassdoor',
|
||||
];
|
||||
foreach ($cloudflareTitles as $cloudflareTitle) {
|
||||
if (str_contains($result['body'], $cloudflareTitle)) {
|
||||
throw new CloudFlareException($exceptionMessage, $result['code']);
|
||||
if (str_contains($response['body'], $cloudflareTitle)) {
|
||||
throw new CloudFlareException($exceptionMessage, $response['code']);
|
||||
}
|
||||
}
|
||||
|
||||
throw new HttpException(trim($exceptionMessage), $result['code']);
|
||||
}
|
||||
if ($returnFull === true) {
|
||||
// For legacy reasons, use content instead of body
|
||||
$response['content'] = $response['body'];
|
||||
unset($response['body']);
|
||||
return $response;
|
||||
}
|
||||
return $response['content'];
|
||||
return $response['body'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch content from url
|
||||
*
|
||||
* @internal Private function used internally
|
||||
* @throws HttpException
|
||||
*/
|
||||
function _http_request(string $url, array $config = []): array
|
||||
interface HttpClient
|
||||
{
|
||||
$defaults = [
|
||||
'useragent' => null,
|
||||
'timeout' => 5,
|
||||
'headers' => [],
|
||||
'proxy' => null,
|
||||
'curl_options' => [],
|
||||
'if_not_modified_since' => null,
|
||||
'retries' => 3,
|
||||
'max_filesize' => null,
|
||||
'max_redirections' => 5,
|
||||
];
|
||||
$config = array_merge($defaults, $config);
|
||||
public function request(string $url, array $config = []): array;
|
||||
}
|
||||
|
||||
$ch = curl_init($url);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
|
||||
curl_setopt($ch, CURLOPT_HEADER, false);
|
||||
$httpHeaders = [];
|
||||
foreach ($config['headers'] as $name => $value) {
|
||||
$httpHeaders[] = sprintf('%s: %s', $name, $value);
|
||||
}
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders);
|
||||
if ($config['useragent']) {
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
|
||||
}
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
|
||||
curl_setopt($ch, CURLOPT_ENCODING, '');
|
||||
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
||||
final class CurlHttpClient implements HttpClient
|
||||
{
|
||||
public function request(string $url, array $config = []): array
|
||||
{
|
||||
$defaults = [
|
||||
'useragent' => null,
|
||||
'timeout' => 5,
|
||||
'headers' => [],
|
||||
'proxy' => null,
|
||||
'curl_options' => [],
|
||||
'if_not_modified_since' => null,
|
||||
'retries' => 3,
|
||||
'max_filesize' => null,
|
||||
'max_redirections' => 5,
|
||||
];
|
||||
$config = array_merge($defaults, $config);
|
||||
|
||||
if ($config['max_filesize']) {
|
||||
// This option inspects the Content-Length header
|
||||
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']);
|
||||
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
|
||||
// This progress function will monitor responses who omit the Content-Length header
|
||||
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) {
|
||||
if ($downloaded > $config['max_filesize']) {
|
||||
// Return a non-zero value to abort the transfer
|
||||
return -1;
|
||||
$ch = curl_init($url);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']);
|
||||
curl_setopt($ch, CURLOPT_HEADER, false);
|
||||
$httpHeaders = [];
|
||||
foreach ($config['headers'] as $name => $value) {
|
||||
$httpHeaders[] = sprintf('%s: %s', $name, $value);
|
||||
}
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders);
|
||||
if ($config['useragent']) {
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']);
|
||||
}
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']);
|
||||
curl_setopt($ch, CURLOPT_ENCODING, '');
|
||||
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
|
||||
|
||||
if ($config['max_filesize']) {
|
||||
// This option inspects the Content-Length header
|
||||
curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']);
|
||||
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
|
||||
// This progress function will monitor responses who omit the Content-Length header
|
||||
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) {
|
||||
if ($downloaded > $config['max_filesize']) {
|
||||
// Return a non-zero value to abort the transfer
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
});
|
||||
}
|
||||
|
||||
if ($config['proxy']) {
|
||||
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
|
||||
}
|
||||
if (curl_setopt_array($ch, $config['curl_options']) === false) {
|
||||
throw new \Exception('Tried to set an illegal curl option');
|
||||
}
|
||||
|
||||
if ($config['if_not_modified_since']) {
|
||||
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
|
||||
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
|
||||
}
|
||||
|
||||
$responseStatusLines = [];
|
||||
$responseHeaders = [];
|
||||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
|
||||
$len = strlen($rawHeader);
|
||||
if ($rawHeader === "\r\n") {
|
||||
return $len;
|
||||
}
|
||||
return 0;
|
||||
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
|
||||
$responseStatusLines[] = $rawHeader;
|
||||
return $len;
|
||||
}
|
||||
$header = explode(':', $rawHeader);
|
||||
if (count($header) === 1) {
|
||||
return $len;
|
||||
}
|
||||
$name = mb_strtolower(trim($header[0]));
|
||||
$value = trim(implode(':', array_slice($header, 1)));
|
||||
if (!isset($responseHeaders[$name])) {
|
||||
$responseHeaders[$name] = [];
|
||||
}
|
||||
$responseHeaders[$name][] = $value;
|
||||
return $len;
|
||||
});
|
||||
}
|
||||
|
||||
if ($config['proxy']) {
|
||||
curl_setopt($ch, CURLOPT_PROXY, $config['proxy']);
|
||||
}
|
||||
if (curl_setopt_array($ch, $config['curl_options']) === false) {
|
||||
throw new \Exception('Tried to set an illegal curl option');
|
||||
}
|
||||
$attempts = 0;
|
||||
while (true) {
|
||||
$attempts++;
|
||||
$data = curl_exec($ch);
|
||||
if ($data !== false) {
|
||||
// The network call was successful, so break out of the loop
|
||||
break;
|
||||
}
|
||||
if ($attempts > $config['retries']) {
|
||||
// Finally give up
|
||||
$curl_error = curl_error($ch);
|
||||
$curl_errno = curl_errno($ch);
|
||||
throw new HttpException(sprintf(
|
||||
'cURL error %s: %s (%s) for %s',
|
||||
$curl_error,
|
||||
$curl_errno,
|
||||
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
|
||||
$url
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if ($config['if_not_modified_since']) {
|
||||
curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']);
|
||||
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
|
||||
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
return [
|
||||
'code' => $statusCode,
|
||||
'status_lines' => $responseStatusLines,
|
||||
'headers' => $responseHeaders,
|
||||
'body' => $data,
|
||||
];
|
||||
}
|
||||
|
||||
$responseStatusLines = [];
|
||||
$responseHeaders = [];
|
||||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) {
|
||||
$len = strlen($rawHeader);
|
||||
if ($rawHeader === "\r\n") {
|
||||
return $len;
|
||||
}
|
||||
if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) {
|
||||
$responseStatusLines[] = $rawHeader;
|
||||
return $len;
|
||||
}
|
||||
$header = explode(':', $rawHeader);
|
||||
if (count($header) === 1) {
|
||||
return $len;
|
||||
}
|
||||
$name = mb_strtolower(trim($header[0]));
|
||||
$value = trim(implode(':', array_slice($header, 1)));
|
||||
if (!isset($responseHeaders[$name])) {
|
||||
$responseHeaders[$name] = [];
|
||||
}
|
||||
$responseHeaders[$name][] = $value;
|
||||
return $len;
|
||||
});
|
||||
|
||||
$attempts = 0;
|
||||
while (true) {
|
||||
$attempts++;
|
||||
$data = curl_exec($ch);
|
||||
if ($data !== false) {
|
||||
// The network call was successful, so break out of the loop
|
||||
break;
|
||||
}
|
||||
if ($attempts > $config['retries']) {
|
||||
// Finally give up
|
||||
$curl_error = curl_error($ch);
|
||||
$curl_errno = curl_errno($ch);
|
||||
throw new HttpException(sprintf(
|
||||
'cURL error %s: %s (%s) for %s',
|
||||
$curl_error,
|
||||
$curl_errno,
|
||||
'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
|
||||
$url
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
return [
|
||||
'code' => $statusCode,
|
||||
'status_lines' => $responseStatusLines,
|
||||
'headers' => $responseHeaders,
|
||||
'body' => $data,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user