From b3a784244808041a264c3bd548d2bdca9a143275 Mon Sep 17 00:00:00 2001 From: mruac Date: Wed, 6 Sep 2023 23:46:25 +0930 Subject: [PATCH] [PixivBridge] Add cookie auth and options (#3653) * added cookie mgmt and support for issue https://github.com/RSS-Bridge/rss-bridge/issues/2759 * added image proxy option * + mature and ai options, + cookie doc * mention doc * check cookie is auth'd --- bridges/PixivBridge.php | 201 ++++++++++++++++++++++--- docs/10_Bridge_Specific/PixivBridge.md | 23 +++ 2 files changed, 205 insertions(+), 19 deletions(-) create mode 100644 docs/10_Bridge_Specific/PixivBridge.md diff --git a/bridges/PixivBridge.php b/bridges/PixivBridge.php index cf509855..5549c609 100644 --- a/bridges/PixivBridge.php +++ b/bridges/PixivBridge.php @@ -7,6 +7,17 @@ class PixivBridge extends BridgeAbstract const NAME = 'Pixiv Bridge'; const URI = 'https://www.pixiv.net/'; const DESCRIPTION = 'Returns the tag search from pixiv.net'; + const MAINTAINER = 'mruac'; + const CONFIGURATION = [ + 'cookie' => [ + 'required' => false, + 'defaultValue' => null + ], + 'proxy_url' => [ + 'required' => false, + 'defaultValue' => null + ] + ]; const PARAMETERS = [ @@ -23,11 +34,21 @@ class PixivBridge extends BridgeAbstract 'mode' => [ 'name' => 'Post Type', 'type' => 'list', - 'values' => ['All Works' => 'all', - 'Illustrations' => 'illustrations/', - 'Manga' => 'manga/', - 'Novels' => 'novels/'] + 'values' => [ + 'All Works' => 'all', + 'Illustrations' => 'illustrations/', + 'Manga' => 'manga/', + 'Novels' => 'novels/' + ] ], + 'mature' => [ + 'name' => 'Include R-18 works', + 'type' => 'checkbox' + ], + 'ai' => [ + 'name' => 'Include AI-Generated works', + 'type' => 'checkbox' + ] ], 'Tag' => [ 'tag' => [ @@ -76,7 +97,7 @@ class PixivBridge extends BridgeAbstract default: return parent::getName(); } - return 'Pixiv ' . $this->getKey('mode') . " from ${context} ${query}"; + return 'Pixiv ' . $this->getKey('mode') . " from {$context} {$query}"; } public function getURI() @@ -106,7 +127,7 @@ class PixivBridge extends BridgeAbstract break; case 'User': $uri = static::URI . 'ajax/user/' . $this->getInput('userid') - . '/profile/top'; + . '/profile/top'; break; default: returnClientError('Invalid Context'); @@ -116,18 +137,47 @@ class PixivBridge extends BridgeAbstract private function getDataFromJSON($json, $json_key) { - $json = $json['body'][$json_key]; + $key = $json_key; + if ( + $this->queriedContext === 'Tag' && + $this->getOption('cookie') !== null + ) { + switch ($json_key) { + case 'illust': + case 'manga': + $key = 'illustManga'; + break; + } + } + $json = $json['body'][$key]; // Tags context contains subkey - if ($this->queriedContext == 'Tag') { + if ($this->queriedContext === 'Tag') { $json = $json['data']; + if ($this->getOption('cookie') !== null) { + switch ($json_key) { + case 'illust': + $json = array_reduce($json, function ($acc, $i) { + if ($i['illustType'] === 0) { + $acc[] = $i; + }return $acc; + }, []); + break; + case 'manga': + $json = array_reduce($json, function ($acc, $i) { + if ($i['illustType'] === 1) { + $acc[] = $i; + }return $acc; + }, []); + break; + } + } } return $json; } private function collectWorksArray() { - $content = getContents($this->getSearchURI($this->getInput('mode'))); - $content = json_decode($content, true); + $content = $this->getData($this->getSearchURI($this->getInput('mode')), true, true); if ($this->getInput('mode') == 'all') { $total = []; foreach (self::JSON_KEY_MAP[$this->queriedContext] as $mode => $json_key) { @@ -144,14 +194,36 @@ class PixivBridge extends BridgeAbstract public function collectData() { + $this->checkOptions(); + $proxy_url = $this->getOption('proxy_url'); + $proxy_url = $proxy_url ? rtrim($proxy_url, '/') : null; + $content = $this->collectWorksArray(); $content = array_filter($content, function ($v, $k) { return !array_key_exists('isAdContainer', $v); }, ARRAY_FILTER_USE_BOTH); + // Sort by updateDate to get newest works usort($content, function ($a, $b) { return $b['updateDate'] <=> $a['updateDate']; }); + + //exclude AI generated works if unchecked. + if ($this->getInput('ai') !== true) { + $content = array_filter($content, function ($v) { + $isAI = $v['aiType'] === 2; + return !$isAI; + }); + } + + //exclude R-18 works if unchecked. + if ($this->getInput('mature') !== true) { + $content = array_filter($content, function ($v) { + $isMature = $v['xRestrict'] > 0; + return !$isMature; + }); + } + $content = array_slice($content, 0, $this->getInput('posts')); foreach ($content as $result) { @@ -168,12 +240,25 @@ class PixivBridge extends BridgeAbstract $item['author'] = $result['userName']; $item['timestamp'] = $result['updateDate']; $item['categories'] = $result['tags']; - $cached_image = $this->cacheImage( - $result['url'], - $result['id'], - array_key_exists('illustType', $result) - ); - $item['content'] = ""; + + if ($proxy_url) { + //use proxy image host if set. + if ($this->getInput('fullsize')) { + $ajax_uri = static::URI . 'ajax/illust/' . $result['id']; + $imagejson = $this->getData($ajax_uri, true, true); + $img_url = preg_replace('/https:\/\/i\.pximg\.net/', $proxy_url, $imagejson['body']['urls']['original']); + } else { + $img_url = preg_replace('/https:\/\/i\.pximg\.net/', $proxy_url, $result['url']); + } + } else { + //else cache and use image. + $img_url = $this->cacheImage( + $result['url'], + $result['id'], + array_key_exists('illustType', $result) + ); + } + $item['content'] = ""; // Additional content items if (array_key_exists('pageCount', $result)) { @@ -188,6 +273,7 @@ class PixivBridge extends BridgeAbstract /** * todo: remove manual file cache + * See bridge specific documentation for alternative option. */ private function cacheImage($url, $illustId, $isImage) { @@ -209,19 +295,96 @@ class PixivBridge extends BridgeAbstract // Get fullsize URL if ($isImage && $this->getInput('fullsize')) { $ajax_uri = static::URI . 'ajax/illust/' . $illustId; - $imagejson = json_decode(getContents($ajax_uri), true); + $imagejson = $this->getData($ajax_uri, true, true); $url = $imagejson['body']['urls']['original']; } $headers = ['Referer: ' . static::URI]; try { - $illust = getContents($url, $headers); + $illust = $this->getData($url, true, false, $headers); } catch (Exception $e) { - $illust = getContents($thumbnailurl, $headers); // Original thumbnail + $illust = $this->getData($thumbnailurl, true, false, $headers); // Original thumbnail } file_put_contents($path, $illust); } return get_home_page_url() . 'cache/pixiv_img/' . preg_replace('/.*\//', '', $path); } + + private function checkOptions() + { + $proxy = $this->getOption('proxy_url'); + if ($proxy) { + if ( + !(strlen($proxy) > 0 && preg_match('/https?:\/\/.*/', $proxy)) + ) { + return returnServerError('Invalid proxy_url value set. The proxy must include the HTTP/S at the beginning of the url.'); + } + } + + $cookie = $this->getCookie(); + if ($cookie) { + $isAuth = $this->loadCacheValue('is_authenticated'); + if (!$isAuth) { + $res = $this->getData('https://www.pixiv.net/ajax/webpush', true, true) + or returnServerError('Invalid PHPSESSID cookie provided. Please check the 🍪 and try again.'); + if ($res['error'] === false) { + $this->saveCacheValue('is_authenticated', true); + } + } + } + } + + private function checkCookie(array $headers) + { + if (array_key_exists('set-cookie', $headers)) { + foreach ($headers['set-cookie'] as $value) { + if (str_starts_with($value, 'PHPSESSID=')) { + parse_str(strtr($value, ['&' => '%26', '+' => '%2B', ';' => '&']), $cookie); + if ($cookie['PHPSESSID'] != $this->getCookie()) { + $this->saveCacheValue('cookie', $cookie['PHPSESSID']); + } + break; + } + } + } + } + + private function getCookie() + { + // checks if cookie is set, if not initialise it with the cookie from the config + $value = $this->loadCacheValue('cookie', 2678400 /* 30 days + 1 day to let cookie chance to renew */); + if (!isset($value)) { + $value = $this->getOption('cookie'); + $this->saveCacheValue('cookie', $this->getOption('cookie')); + } + return $value; + } + + //Cache getContents by default + private function getData(string $url, bool $cache = true, bool $getJSON = false, array $httpHeaders = [], array $curlOptions = []) + { + $cookie_str = $this->getCookie(); + if ($cookie_str) { + $curlOptions[CURLOPT_COOKIE] = 'PHPSESSID=' . $cookie_str; + } + + if ($cache) { + $data = $this->loadCacheValue($url, 86400); // 24 hours + if (!$data) { + $data = getContents($url, $httpHeaders, $curlOptions, true) or returnServerError("Could not load $url"); + $this->saveCacheValue($url, $data); + } + } else { + $data = getContents($url, $httpHeaders, $curlOptions, true) or returnServerError("Could not load $url"); + } + + $this->checkCookie($data['headers']); + + if ($getJSON) { + return json_decode($data['content'], true); + } else { + return $data['content']; + } + } } diff --git a/docs/10_Bridge_Specific/PixivBridge.md b/docs/10_Bridge_Specific/PixivBridge.md new file mode 100644 index 00000000..b782a445 --- /dev/null +++ b/docs/10_Bridge_Specific/PixivBridge.md @@ -0,0 +1,23 @@ +PixivBridge +=============== + +# Image proxy +As Pixiv requires images to be loaded with the `Referer "https://www.pixiv.net/"` header set, caching or image proxy is required to use this bridge. + +To turn off image caching, set the `proxy_url` value in this bridge's configuration section of `config.ini.php` to the url of the proxy. The bridge will then use the proxy in this format (essentially replacing `https://i.pximg.net` with the proxy): + +Before: `https://i.pximg.net/img-original/img/0000/00/00/00/00/00/12345678_p0.png` + +After: `https://proxy.example.com/img-original/img/0000/00/00/00/00/00/12345678_p0.png` + +``` +proxy_url = "https://proxy.example.com" +``` + +# Authentication +Authentication is required to view and search R-18+ and non-public images. To enable this, set the following in this bridge's configuration in `config.ini.php`. + +``` +; from cookie "PHPSESSID". Recommend to get in incognito browser. +cookie = "00000000_hashedsessionidhere" +``` \ No newline at end of file