Add curl wrappers to support cookies.

cookiejar: /tmp/rssbridge-fb-cookies.txt , make sure it is writable by
web server process
Unfortunately, after I implemented cookies, I found that with
_fb_noscript=1, the captcha will only work for a single time. ie. the
next request will require a new captcha.
This commit is contained in:
Pellaeon Lin 2017-01-20 15:10:49 +08:00
parent 6852f3a08c
commit 1041e82d82
3 changed files with 89 additions and 9 deletions

View File

@ -1,4 +1,5 @@
<?php
require __DIR__ . '/../lib/contents_curl.php';
class FacebookBridge extends BridgeAbstract {
const MAINTAINER = 'teromene';
@ -87,6 +88,7 @@ class FacebookBridge extends BridgeAbstract {
$captcha_action = $_SESSION['captcha_action'];
$captcha_fields = $_SESSION['captcha_fields'];
$captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']);
/*
$http_options = array(
'http' => array(
'method' => 'POST',
@ -98,10 +100,11 @@ class FacebookBridge extends BridgeAbstract {
);
$context = stream_context_create($http_options);
$html = getContents($captcha_action, false, $context);
*/
list($html, $info, $res_header) = curlgetContents($captcha_action, $captcha_fields, true);
if ( $info['http_code'] != 200 )
returnServerError('Error '.$info['http_code'].$captcha_action."\nReq:\n".$res_header."\nResp:\n".$info['request_header']);
if($html === false){
returnServerError('Failed to submit captcha response back to Facebook');
}
unset($_SESSION['captcha_fields']);
$html = str_get_html($html);
}
@ -109,14 +112,16 @@ class FacebookBridge extends BridgeAbstract {
unset($_SESSION['captcha_action']);
}
$res_header = '';
//Retrieve page contents
if(is_null($html)){
if(!strpos($this->getInput('u'), "/")){
$html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1')
or returnServerError('No results for this query.');
if (is_null($html)) {
if (!strpos($this->getInput('u'), "/")) {
list($html, $info, $res_header) = curlgetSimpleHTMLDOM(self::URI.urlencode($this->getInput('u')).'?_fb_noscript=1');
if ( $info['http_code'] != 200 )
returnServerError('Error '.$info['http_code']."\nResp:\n".$res_header."\nReq:\n".$info['request_header']);
} else {
$html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1')
or returnServerError('No results for this query.');
list($html, $info) = curlgetSimpleHTMLDOM(self::URI.'pages/'.$this->getInput('u').'?_fb_noscript=1');
if ( $info['http_code'] != 200 ) returnServerError('No results for this query.');
}
}
@ -172,6 +177,18 @@ EOD;
// Ignore summary posts
if(strpos($post->class, '_3xaf') !== false) continue;
// Determine post attachments
/*
$attachment_wrapper = $post->find('._3x-2')[0];// search for attachment
if ( isset($attachment_wrapper) ) {
$attachment = $attachment_wrapper->find('.mtm')[0]->children(0);
if ( strpos($attachment->class, '_2a2q') !== false ) {
// photos
} elseif ( strpos($attachment->class, '_6m2') !== false ) {
// link
}
}*/
$item = array();
if(count($post->find('abbr')) > 0){

View File

@ -30,6 +30,8 @@ $maxlen = null){
} else {
$content = file_get_contents($url, $use_include_path, $context, $offset, $maxlen);
}
date_default_timezone_set('CST');
file_put_contents(__DIR__.'/../debug/D'.date('H-i-s').'.html', $content);
if($content === false)
debugMessage('Cant\'t download ' . $url);

61
lib/contents_curl.php Normal file
View File

@ -0,0 +1,61 @@
<?php
function curlgetContents( $url, $params, $post=false){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $post ? $url : $url.'?'.http_build_query($params) );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, '/tmp/rssbridge-fb-cookies.txt');
curl_setopt($ch, CURLOPT_COOKIEFILE, '/tmp/rssbridge-fb-cookies.txt');
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLINFO_HEADER_OUT, 1);
if ( $post ) {
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($params));
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'Content-Type: application/x-www-form-urlencoded',
'User-Agent: '.ini_get('user_agent'),
));
} else {
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'User-Agent: '.ini_get('user_agent'),
));
}
$response = curl_exec($ch);
$info = curl_getinfo($ch);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($response, 0, $header_size);
$body = substr($response, $header_size);
curl_close($ch);
file_put_contents(__DIR__.'/../debug/D'.date('H-i-s').'.html', $body);
return array($body, $info, $header);
}
function curlgetSimpleHTMLDOM($url
, $use_include_path = false
, $context = null
, $offset = 0
, $maxLen = null
, $lowercase = true
, $forceTagsClosed = true
, $target_charset = DEFAULT_TARGET_CHARSET
, $stripRN = true
, $defaultBRText = DEFAULT_BR_TEXT
, $defaultSpanText = DEFAULT_SPAN_TEXT
){
list($body, $info, $header) = curlgetContents($url, $use_include_path, $context, $offset, $maxLen);
return array(str_get_html($body
, $lowercase
, $forceTagsClosed
, $target_charset
, $stripRN
, $defaultBRText
, $defaultSpanText),
$info, $header);
}
?>