手机
当前位置:查字典教程网 >编程开发 >C#教程 >c#远程html数据抓取实例分享
c#远程html数据抓取实例分享
摘要:复制代码代码如下://////获取远程html//////////////////publicstaticboolGetHttp(strin...

复制代码 代码如下:

/// <summary>

/// 获取远程html

/// </summary>

/// <param name="url"></param>

/// <param name="methed"></param>

/// <param name="param"></param>

/// <param name="html"></param>

/// <returns></returns>

public static bool GetHttp(string url, string methed, string param, out string html)

{

methed = methed.ToLower();

if (param != null && methed == "get" && param.Length > 0)

{

url += "?" + param;

}

try

{

MSXML2.XMLHTTP mx = new MSXML2.XMLHTTPClass();

mx.open(methed, url, false, null, null);

if (param != null && methed == "post" && param.Length > 0)

{

mx.setRequestHeader("Content-Length", param.Length.ToString());

mx.setRequestHeader("Content-Type", "application/x-www-form-urlencoded");

}

mx.send(param);

if (mx.readyState != 4)

{

html = "远程连接失败:-4";

return false;

}

html = mx.responseText;

return true;

}

catch (Exception ex)

{

html = "远程连接失败:"+ex.Message;

return false;

}

}

public static bool GetHttp1(string url, string methed, string param, string referer, string encode, out string html)

{

//return GetHttp(url,methed,param,out html);

//string encode = "utf-8";

//string methed = sendType.ToString();

if (param != null && methed == "get" && param.Length > 0)

{

if (url.IndexOf("?") >= 0)

{

url += "&" + param;

}

else

{

url += "?" + param;

}

}

try

{

HttpWebRequest webreq = (HttpWebRequest)WebRequest.Create(url);

webreq.Proxy=null;

webreq.Timeout = 1000 * 6;

webreq.ContentType = "application/x-www-form-urlencoded";

webreq.UserAgent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0";

//webreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)";

//谷歌的:User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36

//火狐的:User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0

//标准格式为: 浏览器标识 (操作系统标识; 加密等级标识; 浏览器语言) 渲染引擎标识 版本信息

//webreq.AllowAutoRedirect = false;

//频繁请求一个网址时,过段时间就会出现“基础连接已经关闭”

//webreq.KeepAlive = false;

//webreq.ProtocolVersion = HttpVersion.Version10;

if (referer.Length > 0)

{

webreq.Referer = referer;

}

CookieContainer mycookies = new CookieContainer();

webreq.CookieContainer = mycookies;

//if (this.cookieList != null)

//{

// webreq.CookieContainer.Add(this.GetCookies(webreq.RequestUri, this.cookieList));

//}

webreq.Method = methed;

//post 开始

if (param != null && methed == "post")

{

byte[] arrbyte = Encoding.GetEncoding(encode).GetBytes(param);

webreq.ContentLength = arrbyte.Length;

Stream newStream = webreq.GetRequestStream();

newStream.Write(arrbyte, 0, arrbyte.Length);

newStream.Close();

}

//post 结束

WebResponse w = webreq.GetResponse();

//返回HTML

using (HttpWebResponse webres = (HttpWebResponse)webreq.GetResponse())

{

using (Stream dataStream = webres.GetResponseStream())

{

using (StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding(encode)))

{

html = reader.ReadToEnd();

//this.cookieList = webreq.CookieContainer.GetCookies(webreq.RequestUri);

webreq.Abort();//可能会解决卡住或阻塞问题

}

}

}

}

catch (Exception ex)

{

html = "出现异常(HttpHelper.GetHTML),远程连接失败:" + ex.Message + " url:" + url;

//System.Windows.Forms.MessageBox.Show(html);

return false;

}

return true;

}

【c#远程html数据抓取实例分享】相关文章:

c# 获得局域网主机列表实例

C#连接MySql数据库的方法

C#常用正则大全分享

C#中通过API实现的打印类 实例代码

C#将html table 导出成excel实例

c#数据类型基础

C# DataTable 转换为 实体类对象实例

c#动态调用Webservice的两种方法实例

C#连接Oracle数据库的实例方法

C# 泛型类(函数)的实例化小例子

精品推荐
分类导航