手机
当前位置:查字典教程网 >编程开发 >C语言 >用C实现PHP扩展 Fetch_Url 类数据抓取的方法
用C实现PHP扩展 Fetch_Url 类数据抓取的方法
摘要:一、类文档说明复制代码代码如下:classFetchUrl{function__construct();//返回网页内容常用于fetch()...

一、类文档说明

复制代码 代码如下:

class FetchUrl{

function __construct();

//返回网页内容 常用于fetch()方法返回false时

function body();

//将对象的数据重新初始化,用于多次重用一个FetchUrl对象

function clean();

//返回错误信息

function errmsg();

//返回错误码,>0表示有错误

function errcode();

/**

* 发起请求

* $url string 请求地址

* $callback function 匿名函数

*/

function fetch(string $url, function $callback);

//请求返回HTTP Code

function httpCode();

//请求返回Cookies数组

function responseCookies();

//请求返回头部信息数组

function responseHeaders();

//是否允许截断,默认为不允许

function setAllowRedirect(bool $allow=false);

//设置连接超时时间

function setConnectTimeout(int $seconds=5);

//在发起的请求中,添加cookie数据

function setCookie(string $name, string $value);

//在发起的请求中,批量添加cookie数据

function setCookies(array $cookies);

//设置请求的方法(POST/GET)

function setMethod(string $method="get");

//设置POST方法的数据

function setPostData(array $data);

//设置读取超时时间

function setReadTimeout(int $seconds=60);

function __destroy();

}

二、使用案例

复制代码 代码如下:

<?php

/*GET抓取http://www.baidu.com*/

/*

$fetch_url = new FetchUrl();

$fetch_url->setAllowRedirect(true);

$fetch_url->fetch('http://www.baidu.com');

*/

$cookies = array(

'wei_xin_wb_session'=>'value',

'wei_xin_wxblog_authcoder'=>'value');

/*POST提交数据*/

/*

$fetch_url = new FetchUrl();

$fetch_url->setMethod('post');

$data = array(

'step'=>2,

'pays[1]'=>0,

'pays[2]'=>0,

'pays[3]'=>0

);

$fetch_url->setCookies($cookies);

$fetch_url->setPostData($data);

$fetch_url->fetch('http://test.wx.pp.cc/wb_advs/manage?inajax=1');

*/

//POST上传数据和文件

$fetch_url = new FetchUrl();

$fetch_url->setAllowRedirect(true);

$fetch_url->setMethod('post');

$data = array(

'nickname'=>'挺好a',

'wxnickname'=>'good',

'wxusername'=>'good',

'intro'=>'good'

);

$fetch_url->setCookies($cookies);

$fetch_url->setPostData($data);

$binary = file_get_contents("http://www.baidu.com/img/shouye_b5486898c692066bd2cbaeda86d74448.gif");

$fetch_url->setBinary("picfile", "demo.jpg", $binary);//上传二进制文件

// $fetch_url->setFile("picfile", "C:/Users/Administrator/Desktop/123.jpg");//上传指定文件

if($fetch_url->errcode() == 0){

$fetch_url->fetch('http://wx.pp.cc/wb_ajax/addwxuser/0');

if($fetch_url->httpCode() == 200){

$html = $fetch_url->body();

echo $html;

}

}else{

echo "errmsg:".$fetch_url->errmsg().", errcode:".$fetch_url->errcode();

}

//返回请求头部信息

print_r($fetch_url->responseHeaders());

//清空之前的请求设置,复用$fetch_url。

$fetch_url->clean();

$fetch_url->fetch("http://www.baidu.com");

print_r($fetch_url->responseHeaders());

三、扩展实现

1.php_fetch_url.h

复制代码 代码如下:

/*

+----------------------------------------------------------------------+

| PHP Version 5 |

+----------------------------------------------------------------------+

| Copyright (c) 1997-2012 The PHP Group |

+----------------------------------------------------------------------+

| This source file is subject to version 3.01 of the PHP license, |

| that is bundled with this package in the file LICENSE, and is |

| available through the world-wide-web at the following url: |

| http://www.php.net/license/3_01.txt |

| If you did not receive a copy of the PHP license and are unable to |

| obtain it through the world-wide-web, please send a note to |

| license@php.net so we can mail you a copy immediately. |

+----------------------------------------------------------------------+

| Author: |

+----------------------------------------------------------------------+

*/

/* $Id$ */

#ifndef PHP_FETCH_URL_H

#define PHP_FETCH_URL_H

extern zend_module_entry fetch_url_module_entry;

#define phpext_fetch_url_ptr &fetch_url_module_entry

#ifdef PHP_WIN32

#define PHP_FETCH_URL_API __declspec(dllexport)

#elif defined(__GNUC__) && __GNUC__ >= 4

#define PHP_FETCH_URL_API __attribute__ ((visibility("default")))

#else

#define PHP_FETCH_URL_API

#endif

#ifdef PHP_WIN32

#define FETCH_CURL_MODE CURL_GLOBAL_WIN32

#else

#define FETCH_CURL_MODE CURL_GLOBAL_ALL

#endif

#ifdef ZTS

#include "TSRM.h"

#endif

#define FETCH_CLASS_NAME "FetchUrl"

#define FETCH_CLASS_CE g_fetch_ce

#define FETCH_THIS Z_OBJCE_P(getThis()), getThis()

#define FETCH_ERROR(errmsg, errno) zend_update_property_stringl(FETCH_THIS, ZEND_STRL("errmsg"), errmsg, sizeof(errmsg)-1 TSRMLS_CC);

zend_update_property_long(FETCH_THIS, ZEND_STRL("errno"), errno TSRMLS_CC)

PHP_MINIT_FUNCTION(fetch_url);

PHP_MSHUTDOWN_FUNCTION(fetch_url);

PHP_RINIT_FUNCTION(fetch_url);

PHP_RSHUTDOWN_FUNCTION(fetch_url);

PHP_MINFO_FUNCTION(fetch_url);

#ifdef ZTS

#define FETCH_URL_G(v) TSRMG(fetch_url_globals_id, zend_fetch_url_globals *, v)

#else

#define FETCH_URL_G(v) (fetch_url_globals.v)

#endif

#endif/* PHP_FETCH_URL_H */

2.fetch_url.c

复制代码 代码如下:

/*

+----------------------------------------------------------------------+

| PHP Version 5 |

+----------------------------------------------------------------------+

| Copyright (c) 1997-2012 The PHP Group |

+----------------------------------------------------------------------+

| This source file is subject to version 3.01 of the PHP license, |

| that is bundled with this package in the file LICENSE, and is |

| available through the world-wide-web at the following url: |

| http://www.php.net/license/3_01.txt |

| If you did not receive a copy of the PHP license and are unable to |

| obtain it through the world-wide-web, please send a note to |

| license@php.net so we can mail you a copy immediately. |

+----------------------------------------------------------------------+

| Author: |

+----------------------------------------------------------------------+

*/

/* $Id$ */

#ifdef HAVE_CONFIG_H

#include "config.h"

#endif

#include "php.h"

#include "php_ini.h"

#include "main/SAPI.h"

#include "Zend/zend_interfaces.h"

#include "ext/standard/info.h"

#include "ext/standard/php_var.h"

#include "ext/standard/php_string.h"

#include "ext/standard/php_smart_str.h"

#include "ext/standard/url.h"

#include "ext/pcre/php_pcre.h"

#include "php_fetch_url.h"

#include <curl/curl.h>

zend_class_entry *g_fetch_ce;

ZEND_BEGIN_ARG_INFO_EX(void_arginfo, 0, 0, 0)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(fetch_arginfo, 0, 0, 1)

ZEND_ARG_INFO(0, url)

ZEND_ARG_INFO(0, callback)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(responseCookies_arginfo, 0, 0, 0)

ZEND_ARG_INFO(0, all)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(responseHeaders_arginfo, 0, 0, 0)

ZEND_ARG_INFO(0, parse)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setAllowRedirect_arginfo, 0, 0, 0)

ZEND_ARG_INFO(0, allow)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setConnectTimeout_arginfo, 0, 0, 0)

ZEND_ARG_INFO(0, ms)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setCookie_arginfo, 0, 0, 2)

ZEND_ARG_INFO(0, name)

ZEND_ARG_INFO(0, value)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setCookies_arginfo, 0, 0, 1)

ZEND_ARG_INFO(0, cookies)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setHeader_arginfo, 0, 0, 2)

ZEND_ARG_INFO(0, name)

ZEND_ARG_INFO(0, value)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setMethod_arginfo, 0, 0, 1)

ZEND_ARG_INFO(0, method)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setPostData, 0, 0, 1)

ZEND_ARG_INFO(0, post_data)

ZEND_ARG_INFO(0, multil)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setReadTimeout_arginfo, 0, 0, 0)

ZEND_ARG_INFO(0, ms)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setBinary_arginfo, 0, 0, 3)

ZEND_ARG_INFO(0, post_filed)

ZEND_ARG_INFO(0, uploadfile_name)

ZEND_ARG_INFO(0, url)

ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setFile_arginfo, 0, 0, 2)

ZEND_ARG_INFO(0, post_filed)

ZEND_ARG_INFO(0, path)

ZEND_END_ARG_INFO()

ZEND_METHOD(fetch_url, __construct){

}

ZEND_METHOD(fetch_url, setBinary){

zval *input_filed_name, *binary_data, *uploadfile_name;

zval *g_binary_data, *item_data;

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zzz", &input_filed_name, &uploadfile_name, &binary_data) == FAILURE){

RETURN_FALSE;

}

if(Z_TYPE_P(input_filed_name) != IS_STRING || Z_TYPE_P(uploadfile_name) != IS_STRING || Z_TYPE_P(binary_data) != IS_STRING){

RETURN_FALSE;

}

g_binary_data = zend_read_property(FETCH_THIS, ZEND_STRL("binary_data"), 0 TSRMLS_CC);

if(Z_TYPE_P(g_binary_data) == IS_NULL){

MAKE_STD_ZVAL(g_binary_data);

array_init(g_binary_data);

}

MAKE_STD_ZVAL(item_data);

array_init(item_data);

add_index_stringl(item_data, 0, Z_STRVAL_P(uploadfile_name), Z_STRLEN_P(uploadfile_name), 1);

add_index_stringl(item_data, 1, Z_STRVAL_P(binary_data), Z_STRLEN_P(binary_data), 1);

add_assoc_zval(g_binary_data, Z_STRVAL_P(input_filed_name), item_data);

zend_update_property(FETCH_THIS, ZEND_STRL("binary_data"), g_binary_data TSRMLS_CC);

}

ZEND_METHOD(fetch_url, setFile){

zval *file_path, *input_filed_name;

zval *upload_filepaths;

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &input_filed_name, &file_path) == FAILURE){

RETURN_FALSE;

}

if(Z_TYPE_P(file_path) != IS_STRING || Z_TYPE_P(input_filed_name) != IS_STRING){

RETURN_FALSE;

}

upload_filepaths = zend_read_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), 0 TSRMLS_CC);

if(Z_TYPE_P(upload_filepaths) == IS_NULL){

MAKE_STD_ZVAL(upload_filepaths);

array_init(upload_filepaths);

}

add_assoc_stringl(upload_filepaths, Z_STRVAL_P(input_filed_name), Z_STRVAL_P(file_path), Z_STRLEN_P(file_path), 1);

zend_update_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), upload_filepaths TSRMLS_CC);

}

ZEND_METHOD(fetch_url, body){

zval *zval_body;

zval_body = zend_read_property(FETCH_THIS, ZEND_STRL("body"), 0 TSRMLS_CC);

RETURN_STRINGL(Z_STRVAL_P(zval_body), Z_STRLEN_P(zval_body), 1);

}

ZEND_METHOD(fetch_url, clean){

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("body"), ZEND_STRL("") TSRMLS_CC);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("errmsg"), ZEND_STRL("") TSRMLS_CC);

zend_update_property_long(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);

zend_update_property_null(FETCH_THIS, ZEND_STRL("httpCode") TSRMLS_CC);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), ZEND_STRL("") TSRMLS_CC);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("headers"), ZEND_STRL("") TSRMLS_CC);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("send_headers"), ZEND_STRL("") TSRMLS_CC);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("data"), ZEND_STRL("") TSRMLS_CC);

zend_update_property_null(FETCH_THIS, ZEND_STRL("binary_data") TSRMLS_CC);

zend_update_property_null(FETCH_THIS, ZEND_STRL("upload_filepaths") TSRMLS_CC);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("get") TSRMLS_CC);

}

ZEND_METHOD(fetch_url, errmsg){

zval *errmsg = zend_read_property(FETCH_THIS, ZEND_STRL("errmsg"), 0 TSRMLS_CC);

RETURN_STRINGL(Z_STRVAL_P(errmsg), Z_STRLEN_P(errmsg), 1);

}

ZEND_METHOD(fetch_url, errcode){

zval *err_no = zend_read_property(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);

RETURN_LONG(Z_LVAL_P(err_no));

}

static size_t read_data(void *buffer, size_t size, size_t nmemb, void *data){

smart_str *content = (smart_str*)data;

smart_str_appendl(content, buffer, size*nmemb);

return size*nmemb;

}

ZEND_METHOD(fetch_url, fetch){

CURLcode return_code;

CURL *curl_handler;

struct curl_slist *http_headers = NULL;

zval *url, *callback, *cookies, *connect_timeout, *allow_redirect, *method, *post_data, *read_timeout,

*send_headers, *err_no, *errmsg, *binary_data, *upload_filepaths;

smart_str body_str = {0}, header_str = {0};

struct curl_httppost *post = NULL;

struct curl_httppost *last = NULL;

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|z", &url, &callback) == FAILURE){

RETURN_FALSE;

}

err_no = zend_read_property(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);

errmsg = zend_read_property(FETCH_THIS, ZEND_STRL("errmsg"), 0 TSRMLS_CC);

if(Z_LVAL_P(err_no) > 0){

php_printf("errno:%d, errmsg:%s", Z_LVAL_P(err_no), Z_STRVAL_P(errmsg));

RETURN_FALSE;

}

if(Z_TYPE_P(url) != IS_STRING){

FETCH_ERROR("fetch url must be string.", 500);

RETURN_FALSE;

}

return_code = curl_global_init(FETCH_CURL_MODE);

if(return_code != CURLE_OK){

curl_global_cleanup();

FETCH_ERROR("curl init failed.", 500);

RETURN_FALSE;

}

curl_handler = curl_easy_init();

if(NULL == curl_handler){

curl_easy_cleanup(curl_handler);

curl_global_cleanup();

FETCH_ERROR("get curl handler failed.", 500);

RETURN_FALSE;

}

cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);

connect_timeout = zend_read_property(FETCH_THIS, ZEND_STRL("connect_timeout"), 0 TSRMLS_CC);

read_timeout = zend_read_property(FETCH_THIS, ZEND_STRL("read_timeout"), 0 TSRMLS_CC);

allow_redirect = zend_read_property(FETCH_THIS, ZEND_STRL("allow_redirect"), 0 TSRMLS_CC);

method = zend_read_property(FETCH_THIS, ZEND_STRL("method"), 0 TSRMLS_CC);

post_data = zend_read_property(FETCH_THIS, ZEND_STRL("data"), 0 TSRMLS_CC);

send_headers = zend_read_property(FETCH_THIS, ZEND_STRL("send_headers"), 0 TSRMLS_CC);

binary_data = zend_read_property(FETCH_THIS, ZEND_STRL("binary_data"), 0 TSRMLS_CC);

upload_filepaths = zend_read_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), 0 TSRMLS_CC);

curl_easy_setopt(curl_handler, CURLOPT_URL, Z_STRVAL_P(url));

curl_easy_setopt(curl_handler, CURLOPT_COOKIE, Z_STRVAL_P(cookies));

curl_easy_setopt(curl_handler, CURLOPT_WRITEFUNCTION, &read_data);

curl_easy_setopt(curl_handler, CURLOPT_WRITEDATA, &body_str);

curl_easy_setopt(curl_handler, CURLOPT_HEADERDATA, &header_str);

curl_easy_setopt(curl_handler, CURLOPT_HEADERFUNCTION, &read_data);

curl_easy_setopt(curl_handler, CURLOPT_TIMEOUT, Z_LVAL_P(read_timeout));

curl_easy_setopt(curl_handler, CURLOPT_CONNECTTIMEOUT, Z_LVAL_P(connect_timeout));

curl_easy_setopt(curl_handler, CURLOPT_AUTOREFERER, Z_LVAL_P(allow_redirect));

curl_easy_setopt(curl_handler, CURLOPT_MAXREDIRS, 5);

if(strcmp(Z_STRVAL_P(method), "get") == 0){

curl_easy_setopt(curl_handler, CURLOPT_HTTPGET, 1);

}else{

if(Z_TYPE_P(binary_data) != IS_NULL || Z_TYPE_P(upload_filepaths) != IS_NULL){

zval *delim, *post_arr, *delim_equal;

MAKE_STD_ZVAL(delim_equal);

MAKE_STD_ZVAL(delim);

MAKE_STD_ZVAL(post_arr);

ZVAL_STRING(delim, "&", 1);

ZVAL_STRING(delim_equal, "=", 1);

array_init(post_arr);

php_explode(delim, post_data, post_arr, LONG_MAX);

for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(post_arr));

zend_hash_has_more_elements(Z_ARRVAL_P(post_arr)) == SUCCESS;

zend_hash_move_forward(Z_ARRVAL_P(post_arr))){

zval **data_str;

zval *temp_data, **post_data_name, **post_data_value, *temp_zval;

if(zend_hash_get_current_data(Z_ARRVAL_P(post_arr), (void**)&data_str) == FAILURE){

continue;

}

if(Z_STRLEN_PP(data_str) > 0){

MAKE_STD_ZVAL(temp_data);

array_init(temp_data);

temp_zval = *data_str;

php_explode(delim_equal, temp_zval, temp_data, LONG_MAX);

zend_hash_index_find(Z_ARRVAL_P(temp_data), 0, (void**)&post_data_name);

zend_hash_index_find(Z_ARRVAL_P(temp_data), 1, (void**)&post_data_value);

curl_formadd(&post, &last, CURLFORM_COPYNAME, Z_STRVAL_PP(post_data_name), CURLFORM_COPYCONTENTS, Z_STRVAL_PP(post_data_value), CURLFORM_END);

zval_dtor(temp_data);

}

}

zval_dtor(post_arr);

zval_dtor(delim);

zval_dtor(delim_equal);

if(Z_TYPE_P(binary_data) != IS_NULL)

for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(binary_data));

zend_hash_has_more_elements(Z_ARRVAL_P(binary_data)) == SUCCESS;

zend_hash_move_forward(Z_ARRVAL_P(binary_data))){

char *input_file_name;

uint input_file_name_len;

ulong idx;

zval **item_data;

zval **upload_binary_data;

zval **uploadfile_name;

if(zend_hash_get_current_key_ex(Z_ARRVAL_P(binary_data), &input_file_name, &input_file_name_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){

continue;

}

if(zend_hash_get_current_data(Z_ARRVAL_P(binary_data), (void**)&item_data) == FAILURE){

continue;

}

zend_hash_index_find(Z_ARRVAL_PP(item_data), 0, (void**)&uploadfile_name);

zend_hash_index_find(Z_ARRVAL_PP(item_data), 1, (void**)&upload_binary_data);

curl_formadd(&post,

&last,

CURLFORM_COPYNAME,

input_file_name,

CURLFORM_BUFFER,

Z_STRVAL_PP(uploadfile_name), //todo:setBinary需要传递文件名参数

CURLFORM_BUFFERPTR,

Z_STRVAL_PP(upload_binary_data),

CURLFORM_BUFFERLENGTH,

Z_STRLEN_PP(upload_binary_data),

CURLFORM_END

);

}

if(Z_TYPE_P(upload_filepaths) != IS_NULL)

for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(upload_filepaths));

zend_hash_has_more_elements(Z_ARRVAL_P(upload_filepaths)) == SUCCESS;

zend_hash_move_forward(Z_ARRVAL_P(upload_filepaths))){

char *input_filed_name;

uint input_file_name_len;

ulong idx;

zval **file_path;

if(zend_hash_get_current_key_ex(Z_ARRVAL_P(upload_filepaths), &input_filed_name, &input_file_name_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){

continue;

}

if(zend_hash_get_current_data(Z_ARRVAL_P(upload_filepaths), (void**)&file_path) == FAILURE){

continue;

}

curl_formadd(&post, &last, CURLFORM_COPYNAME, input_filed_name, CURLFORM_FILE, Z_STRVAL_PP(file_path), CURLFORM_END);

}

curl_easy_setopt(curl_handler, CURLOPT_HTTPPOST, post);

http_headers = curl_slist_append(http_headers, estrdup("Expect:"));//防止出现HTTP 100跳转

}else{

curl_easy_setopt(curl_handler, CURLOPT_POSTFIELDS, Z_STRVAL_P(post_data));

curl_easy_setopt(curl_handler, CURLOPT_POST, 1);

}

}

if(Z_TYPE_P(send_headers) == IS_ARRAY && zend_hash_num_elements(Z_ARRVAL_P(send_headers)) > 0){

for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(send_headers));

zend_hash_has_more_elements(Z_ARRVAL_P(send_headers)) == SUCCESS;

zend_hash_move_forward(Z_ARRVAL_P(send_headers))){

char *header_key;

uint header_keylen;

ulong idx;

zval **header_val;

smart_str impl_headers = {0};

if(zend_hash_get_current_key_ex(Z_ARRVAL_P(send_headers), &header_key, &header_keylen, &idx, 0, NULL) != HASH_KEY_IS_STRING){

continue;

}

if(zend_hash_get_current_data(Z_ARRVAL_P(send_headers), (void**)&header_val) == FAILURE){

continue;

}

smart_str_appendl(&impl_headers, header_key, header_keylen);

smart_str_appendl(&impl_headers, ": ", 2);

smart_str_appendl(&impl_headers, Z_STRVAL_PP(header_val), Z_STRLEN_PP(header_val));

http_headers = curl_slist_append(http_headers, impl_headers.c);

}

}

curl_easy_setopt(curl_handler, CURLOPT_HTTPHEADER, http_headers);

curl_easy_perform(curl_handler);

curl_slist_free_all(http_headers);

curl_formfree(post);

curl_easy_cleanup(curl_handler);

curl_global_cleanup();

smart_str_0(&body_str);

smart_str_0(&header_str);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("headers"), header_str.c, header_str.len TSRMLS_CC);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("body"), body_str.c, body_str.len TSRMLS_CC);

}

ZEND_METHOD(fetch_url, httpCode){

pcre_cache_entry *pce;

zval *headers;

zval *result_match, *match_long, **http_code;

char *regex = estrdup("/^HTTP/1.1s(.*)sOK/");

if((pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC)) == NULL){

RETURN_FALSE;

}

MAKE_STD_ZVAL(result_match);

MAKE_STD_ZVAL(match_long);

headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);

php_pcre_match_impl(pce, Z_STRVAL_P(headers), Z_STRLEN_P(headers), match_long, result_match, 0, 0, 0, 0 TSRMLS_CC);

if(Z_LVAL_P(match_long) > 0){

if(zend_hash_index_find(Z_ARRVAL_P(result_match), 1, (void**)&http_code) == FAILURE){

RETURN_FALSE;

}else{

RETURN_STRINGL(Z_STRVAL_PP(http_code), Z_STRLEN_PP(http_code), 0);

}

}else{

RETURN_FALSE;

}

}

ZEND_METHOD(fetch_url, responseCookies){

pcre_cache_entry *pce;

zval *headers, *result_match, *match_long;

char *regex = estrdup("/Set-Cookie:s(.*?);/");

headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);

array_init(return_value);

if(Z_STRLEN_P(headers) > 0){

if((pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC)) == NULL){

RETURN_NULL();

}

MAKE_STD_ZVAL(result_match);

MAKE_STD_ZVAL(match_long);

//void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)

php_pcre_match_impl(pce, Z_STRVAL_P(headers), Z_STRLEN_P(headers), match_long, result_match, 1, 0, 0, 0 TSRMLS_CC);

if(Z_LVAL_P(match_long) > 0){

zval **result;

HashTable *result_ht;

char *found = NULL;

long found_offset;

char *cookie_name;

char *cookie_value;

if(zend_hash_index_find(Z_ARRVAL_P(result_match), 1, (void**)&result) != FAILURE){

result_ht = Z_ARRVAL_PP(result);

for(zend_hash_internal_pointer_reset(result_ht);

zend_hash_has_more_elements(result_ht) == SUCCESS;

zend_hash_move_forward(result_ht)){

zval **tmpzval;

if(zend_hash_get_current_data(result_ht, (void**)&tmpzval) == FAILURE){

continue;

}

found = php_memnstr(Z_STRVAL_PP(tmpzval), "=", 1, Z_STRVAL_PP(tmpzval) + Z_STRLEN_PP(tmpzval));

found_offset = found - Z_STRVAL_PP(tmpzval);

cookie_name = estrndup(Z_STRVAL_PP(tmpzval), found_offset);

cookie_value= estrndup(found+1, strlen(found)-1);

add_assoc_stringl(return_value, cookie_name, cookie_value, strlen(cookie_value), 1);

efree(cookie_name);

efree(cookie_value);

}

}

}

}else{

RETURN_NULL();

}

}

ZEND_METHOD(fetch_url, responseHeaders){

zval *headers, *delim;

uint idx;

headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);

MAKE_STD_ZVAL(delim);

array_init(return_value);

ZVAL_STRING(delim, "rn", 1);

php_explode(delim, headers, return_value, LONG_MAX);

idx = zend_hash_num_elements(Z_ARRVAL_P(return_value));

zend_hash_index_del(Z_ARRVAL_P(return_value), idx-1);

zend_hash_index_del(Z_ARRVAL_P(return_value), idx-2);

zval_dtor(delim);

}

ZEND_METHOD(fetch_url, setAllowRedirect){

zval *allow;

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &allow) == FAILURE){

RETURN_FALSE;

}

convert_to_long(allow);

zend_update_property_long(FETCH_THIS, ZEND_STRL("allow_redirect"), Z_LVAL_P(allow) TSRMLS_CC);

}

ZEND_METHOD(fetch_url, setConnectTimeout){

zval *connect_timeout;

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &connect_timeout) == FAILURE){

RETURN_FALSE;

}

convert_to_long(connect_timeout);

zend_update_property_long(FETCH_THIS, ZEND_STRL("connect_timeout"), Z_LVAL_P(connect_timeout) TSRMLS_CC);

}

ZEND_METHOD(fetch_url, setCookie){

zval *zval_cookies;

zval *cookie_name, *cookie_value;

smart_str impl_cookies = {0};

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &cookie_name, &cookie_value) == FAILURE){

RETURN_FALSE;

}

if(Z_TYPE_P(cookie_name) != IS_STRING || Z_TYPE_P(cookie_value) != IS_STRING){

RETURN_FALSE;

}

zval_cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);

smart_str_appendl(&impl_cookies, Z_STRVAL_P(zval_cookies), Z_STRLEN_P(zval_cookies));

smart_str_appendl(&impl_cookies, Z_STRVAL_P(cookie_name), Z_STRLEN_P(cookie_name));

smart_str_appendc(&impl_cookies, '=');

smart_str_appendl(&impl_cookies, Z_STRVAL_P(cookie_value), Z_STRLEN_P(cookie_value));

smart_str_appendc(&impl_cookies, ';');

smart_str_0(&impl_cookies);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), impl_cookies.c, impl_cookies.len TSRMLS_CC);

}

ZEND_METHOD(fetch_url, setCookies){

zval *zval_cookies;

zval *cookie_array;

smart_str impl_cookies = {0};

HashTable *cookies_ht;

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &cookie_array) == FAILURE){

RETURN_FALSE;

}

if(Z_TYPE_P(cookie_array) != IS_ARRAY){

RETURN_FALSE;

}

zval_cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);

cookies_ht = Z_ARRVAL_P(cookie_array);

smart_str_appendl(&impl_cookies, Z_STRVAL_P(zval_cookies), Z_STRLEN_P(zval_cookies));

for(zend_hash_internal_pointer_reset(cookies_ht);

zend_hash_has_more_elements(cookies_ht) == SUCCESS;

zend_hash_move_forward(cookies_ht))

{

zval **value;

char *key;

uint key_len;

ulong idx;

if(zend_hash_get_current_key_ex(cookies_ht, &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){

continue;

}

if(zend_hash_get_current_data(cookies_ht, (void**)&value) == FAILURE){

continue;

}

convert_to_string(*value);

if(Z_TYPE_PP(value) != IS_STRING){

continue;

}

smart_str_appendl(&impl_cookies, key, key_len-1);

smart_str_appendl(&impl_cookies, "=", 1);

smart_str_appendl(&impl_cookies, Z_STRVAL_PP(value), Z_STRLEN_PP(value));

smart_str_appendl(&impl_cookies, ";", 1);

}

php_url_decode(impl_cookies.c, impl_cookies.len);

smart_str_0(&impl_cookies);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), impl_cookies.c, impl_cookies.len TSRMLS_CC);

}

ZEND_METHOD(fetch_url, setHeader){

zval *headers, *value=NULL, *send_headers;

HashTable *headers_ht;

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|z", &headers) == FAILURE){

RETURN_FALSE;

}

send_headers = zend_read_property(FETCH_THIS, ZEND_STRL("send_headers"), 0 TSRMLS_CC);

if(Z_TYPE_P(send_headers) == IS_NULL){

MAKE_STD_ZVAL(send_headers);

array_init(send_headers);

}

headers_ht = Z_ARRVAL_P(send_headers);

if(Z_TYPE_P(headers) == IS_ARRAY){

for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(headers));

zend_hash_has_more_elements(Z_ARRVAL_P(headers)) == SUCCESS;

zend_hash_move_forward(Z_ARRVAL_P(headers))){

char* key;

uint key_len;

ulong idx;

zval **tmpzval;

if(zend_hash_get_current_key_ex(Z_ARRVAL_P(headers), &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){

continue;

}

if(zend_hash_get_current_data(Z_ARRVAL_P(headers), (void**)&tmpzval) == FAILURE){

continue;

}

add_assoc_stringl(send_headers, key, Z_STRVAL_PP(tmpzval), Z_STRLEN_PP(tmpzval), 1);

}

}else if(Z_TYPE_P(headers) == IS_STRING && Z_TYPE_P(value) == IS_STRING){

add_assoc_stringl(send_headers, Z_STRVAL_P(headers), Z_STRVAL_P(value), Z_STRLEN_P(value), 1);

}else{

zend_error(E_WARNING, "param error.");

}

zend_update_property(FETCH_THIS, ZEND_STRL("send_headers"), send_headers);

}

ZEND_METHOD(fetch_url, setMethod){

zval *zval_method;

char *method;

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &zval_method) == FAILURE){

RETURN_FALSE;

}

if(Z_TYPE_P(zval_method) != IS_STRING){

RETURN_FALSE;

}

method = php_strtolower(Z_STRVAL_P(zval_method), Z_STRLEN_P(zval_method));

if(strcmp(method, "get") == 0){

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("get") TSRMLS_CC);

}else if(strcmp(method, "post") == 0){

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("post") TSRMLS_CC);

}else{

FETCH_ERROR("Not support method.", 404);

RETURN_FALSE;

}

RETURN_TRUE;

}

ZEND_METHOD(fetch_url, setPostData){

zval *data, *post_data;

HashTable *post_data_ht;

smart_str temp = {0};

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &post_data) == FAILURE){

RETURN_FALSE;

}

data = zend_read_property(FETCH_THIS, ZEND_STRL("data"), 0 TSRMLS_CC);

if(Z_TYPE_P(post_data) != IS_ARRAY){

zend_error(E_WARNING, "post data must be array.");

RETURN_FALSE;

}

post_data_ht = Z_ARRVAL_P(post_data);

smart_str_appendl(&temp, Z_STRVAL_P(data), Z_STRLEN_P(data));

for(zend_hash_internal_pointer_reset(post_data_ht);

zend_hash_has_more_elements(post_data_ht) == SUCCESS;

zend_hash_move_forward(post_data_ht)){

zval **current_data;

char *key;

uint key_len;

ulong idx;

if(zend_hash_get_current_key_ex(post_data_ht, &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){

continue;

}

if(zend_hash_get_current_data(post_data_ht, (void**)¤t_data) == FAILURE){

continue;

}

convert_to_string(*current_data);

smart_str_appendl(&temp, key, key_len-1);

smart_str_appendc(&temp, '=');

smart_str_appendl(&temp, Z_STRVAL_PP(current_data), Z_STRLEN_PP(current_data));

smart_str_appendc(&temp, '&');

}

smart_str_0(&temp);

zend_update_property_stringl(FETCH_THIS, ZEND_STRL("data"), temp.c, temp.len TSRMLS_CC);

}

ZEND_METHOD(fetch_url, setReadTimeout){

zval *read_timeout;

if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &read_timeout) == FAILURE){

RETURN_FALSE;

}

if(Z_TYPE_P(read_timeout) != IS_LONG){

zend_error(E_WARNING, "readtimeout must be integer.");

RETURN_FALSE;

}

zend_update_property_long(FETCH_THIS, ZEND_STRL("read_timeout"), Z_LVAL_P(read_timeout) TSRMLS_CC);

}

ZEND_METHOD(fetch_url, __destruct){}

static zend_function_entry fetch_url_method[] = {

ZEND_ME(fetch_url, __construct, void_arginfo, ZEND_ACC_CTOR|ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setBinary, setBinary_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setFile, setFile_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, body, void_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, clean, void_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, errmsg, void_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, errcode, void_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, fetch, fetch_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, httpCode, void_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, responseCookies, responseCookies_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, responseHeaders, responseHeaders_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setAllowRedirect, setAllowRedirect_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setConnectTimeout, setConnectTimeout_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setCookie, setCookie_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setCookies, setCookies_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setHeader, setHeader_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setMethod, setMethod_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setPostData, setPostData, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, setReadTimeout, setReadTimeout_arginfo, ZEND_ACC_PUBLIC)

ZEND_ME(fetch_url, __destruct, void_arginfo, ZEND_ACC_DTOR|ZEND_ACC_PUBLIC)

{NULL, NULL, NULL}

};

/* If you declare any globals in php_fetch_url.h uncomment this:

ZEND_DECLARE_MODULE_GLOBALS(fetch_url)

*/

/* True global resources - no need for thread safety here */

static int le_fetch_url;

/* {{{ fetch_url_functions[]

*

* Every user visible function must have an entry in fetch_url_functions[].

*/

const zend_function_entry fetch_url_functions[] = {

PHP_FE_END/* Must be the last line in fetch_url_functions[] */

};

/* }}} */

/* {{{ fetch_url_module_entry

*/

zend_module_entry fetch_url_module_entry = {

#if ZEND_MODULE_API_NO >= 20010901

STANDARD_MODULE_HEADER,

#endif

"fetch_url",

fetch_url_functions,

PHP_MINIT(fetch_url),

PHP_MSHUTDOWN(fetch_url),

PHP_RINIT(fetch_url),/* Replace with NULL if there's nothing to do at request start */

PHP_RSHUTDOWN(fetch_url),/* Replace with NULL if there's nothing to do at request end */

PHP_MINFO(fetch_url),

#if ZEND_MODULE_API_NO >= 20010901

"0.1", /* Replace with version number for your extension */

#endif

STANDARD_MODULE_PROPERTIES

};

/* }}} */

#ifdef COMPILE_DL_FETCH_URL

ZEND_GET_MODULE(fetch_url)

#endif

/* {{{ PHP_INI

*/

/* Remove comments and fill if you need to have entries in php.ini

PHP_INI_BEGIN()

STD_PHP_INI_ENTRY("fetch_url.global_value", "42", PHP_INI_ALL, OnUpdateLong, global_value, zend_fetch_url_globals, fetch_url_globals)

STD_PHP_INI_ENTRY("fetch_url.global_string", "foobar", PHP_INI_ALL, OnUpdateString, global_string, zend_fetch_url_globals, fetch_url_globals)

PHP_INI_END()

*/

/* }}} */

/* {{{ php_fetch_url_init_globals

*/

/* Uncomment this function if you have INI entries

static void php_fetch_url_init_globals(zend_fetch_url_globals *fetch_url_globals)

{

fetch_url_globals->global_value = 0;

fetch_url_globals->global_string = NULL;

}

*/

/* }}} */

/* {{{ PHP_MINIT_FUNCTION

*/

PHP_MINIT_FUNCTION(fetch_url)

{

/* If you have INI entries, uncomment these lines

REGISTER_INI_ENTRIES();

*/

zend_class_entry fetch_ce;

INIT_CLASS_ENTRY(fetch_ce, FETCH_CLASS_NAME, fetch_url_method);

g_fetch_ce = zend_register_internal_class(&fetch_ce TSRMLS_CC);

zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("body"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_null(g_fetch_ce, ZEND_STRL("errmsg"), ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_long(g_fetch_ce, ZEND_STRL("errno"), 0, ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_null(g_fetch_ce, ZEND_STRL("httpCode"), ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("cookies"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("headers"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_null(g_fetch_ce, ZEND_STRL("send_headers"), ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_long(g_fetch_ce, ZEND_STRL("allow_redirect"), 1, ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_long(g_fetch_ce, ZEND_STRL("connect_timeout"), 5, ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("method"), ZEND_STRL("get"), ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_long(g_fetch_ce, ZEND_STRL("multilpart"), 0, ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("data"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_long(g_fetch_ce, ZEND_STRL("read_timeout"), 60, ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_null(g_fetch_ce, ZEND_STRL("binary_data"), ZEND_ACC_PROTECTED TSRMLS_CC);

zend_declare_property_null(g_fetch_ce, ZEND_STRL("upload_filepaths"), ZEND_ACC_PROTECTED TSRMLS_CC);

return SUCCESS;

}

/* }}} */

/* {{{ PHP_MSHUTDOWN_FUNCTION

*/

PHP_MSHUTDOWN_FUNCTION(fetch_url)

{

/* uncomment this line if you have INI entries

UNREGISTER_INI_ENTRIES();

*/

return SUCCESS;

}

/* }}} */

/* Remove if there's nothing to do at request start */

/* {{{ PHP_RINIT_FUNCTION

*/

PHP_RINIT_FUNCTION(fetch_url)

{

return SUCCESS;

}

/* }}} */

/* Remove if there's nothing to do at request end */

/* {{{ PHP_RSHUTDOWN_FUNCTION

*/

PHP_RSHUTDOWN_FUNCTION(fetch_url)

{

return SUCCESS;

}

/* }}} */

/* {{{ PHP_MINFO_FUNCTION

*/

PHP_MINFO_FUNCTION(fetch_url)

{

php_info_print_table_start();

php_info_print_table_header(2, "fetch_url support", "enabled");

php_info_print_table_end();

/* Remove comments if you have entries in php.ini

DISPLAY_INI_ENTRIES();

*/

}

/* }}} */

【用C实现PHP扩展 Fetch_Url 类数据抓取的方法】相关文章:

全排列算法的非递归实现与递归实现的方法(C++)

解析如何用指针实现整型数据的加法

基于Linux系统调用--getrlimit()与setrlimit()函数的方法

深入分析为Visual Assist设置快捷键的方法

C++读写.mat文件的方法

用C语言实现单链表的各种操作(二)

用C++实现单向循环链表的解决方法

用C实现添加和读取配置文件函数

C++算法之海量数据处理方法的总结分析

C++实现正态随机分布的方法

精品推荐
分类导航