- 积分
- 40165
- 好友
- 记录
- 主题
- 帖子
- 听众
- 收听
|
C#HttpHelper类1.1正式版---苏飞版
导读部分
-------------------------------------------------------------------------------------------------------------
C#HttpHelper类苏飞版--系列教程导航
http://www.sufeinet.com/thread-3-1-1.html
本站是C#HttpHelper类唯一官方网站,唯一更新网站,希望大家收藏关注。
介绍
C#HttpHelper实现了C#HttpWebRequest抓取时无视编码,无视证书,无视Cookie,并且实现的代理的功能,使用它您可以进行Get和Post请求,可以很方便 的设置Cookie,证书,代理,编码问题您不用管,因为类会自动为您识别网页的编码。
这个类是我以前写百度,和网络蜘蛛时使用的,经过上千万个网站的测试,上万个网站抓取的例子总结出来的,中间的方法也是我实验了很久之后方案,所以大家可以放心使用。
我不敢说100%,但是应该是99%的网站都没有问题,都可以无视编码,证书,和Cookie,如果你确实发现那个网站在使用本类过程中有问题,出现乱码,或者是获取不了,不能带Cookie,不能带证书等问题,我非常乐意您能联系我,提出您的问题,您也可以直接回复本帖子,我会第一时间进行解答。
目录
1.HttpHelper类下载
2.在webBrowser中取Cookie的方法
3.HttpHelper使用方法Get和Post使用方法
4.HttpHelper使用时设置和获取Cookie的方法
5.各种使用方法与例子收集
1.HttpHelper类下载
在线测试工具:http://www.sufeinet.com/thread-1930-1-1.html
下载地址:http://www.sufeinet.com/thread-3-1-1.html
要学习更多关于C# Http的请是看我的文章 C# HttpWebRequest 绝技
代码如下所示
[C#] 纯文本查看 复制代码 /// <summary>
/// 类说明:HttpHelps类,用来实现Http访问,Post或者Get方式的,直接访问,带Cookie的,带证书的等方式,可以设置代理
/// 编码日期:2011-09-20
/// 编 码 人:苏飞
/// 联系方式:361983679
/// 更新网址:[url=http://www.sufeinet.com/thread-3-1-1.html]http://www.sufeinet.com/thread-3-1-1.html[/url]
/// 修改日期:2013-01-07
/// </summary>
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.IO.Compression;
using System.Security.Cryptography.X509Certificates;
using System.Net.Security;
namespace DotNet.Utilities
{
/// <summary>
/// Http连接操作帮助类
/// </summary>
public class HttpHelper
{
#region 预定义方法或者变更
//默认的编码
private Encoding encoding = Encoding.Default;
//HttpWebRequest对象用来发起请求
private HttpWebRequest request = null;
//获取影响流的数据对象
private HttpWebResponse response = null;
/// <summary>
/// 根据相传入的数据,得到相应页面数据
/// </summary>
/// <param name="strPostdata">传入的数据Post方式,get方式传NUll或者空字符串都可以</param>
/// <returns>string类型的响应数据</returns>
private string GetHttpRequestData(HttpItem objhttpitem)
{
try
{
#region 得到请求的response
using (response = (HttpWebResponse)request.GetResponse())
{
MemoryStream _stream = new MemoryStream();
if (response.Cookies != null)
{
objhttpitem.CookieCollection = response.Cookies;
}
if (response.Headers["set-cookie"] != null)
{
objhttpitem.Cookie = response.Headers["set-cookie"];
}
//GZIIP处理
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
{
//开始读取流并设置编码方式
//new GZipStream(response.GetResponseStream(), CompressionMode.Decompress).CopyTo(_stream, 10240);
//.net4.0以下写法
_stream = GetMemoryStream(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
}
else
{
//开始读取流并设置编码方式
//response.GetResponseStream().CopyTo(_stream, 10240);
//.net4.0以下写法
_stream = GetMemoryStream(response.GetResponseStream());
}
//获取Byte
byte[] RawResponse = _stream.ToArray();
//是否返回Byte类型数据
if (objhttpitem.ResultType == ResultType.Byte)
{
objhttpitem.ResultByte = RawResponse;
}
//从这里开始我们要无视编码了
if (encoding == null)
{
string temp = Encoding.Default.GetString(RawResponse, 0, RawResponse.Length);
//<meta(.*?)charset([\s]?)=[^>](.*?)>
Match meta = Regex.Match(temp, "<meta([^<]*)charset=([^<]*)[\"']", RegexOptions.IgnoreCase | RegexOptions.Multiline);
string charter = (meta.Groups.Count > 2) ? meta.Groups[2].Value : string.Empty;
charter = charter.Replace("\"", string.Empty).Replace("'", string.Empty).Replace(";", string.Empty);
if (charter.Length > 0)
{
charter = charter.ToLower().Replace("iso-8859-1", "gbk");
encoding = Encoding.GetEncoding(charter);
}
else
{
if (response.CharacterSet.ToLower().Trim() == "iso-8859-1")
{
encoding = Encoding.GetEncoding("gbk");
}
else
{
if (string.IsNullOrEmpty(response.CharacterSet.Trim()))
{
encoding = Encoding.UTF8;
}
else
{
encoding = Encoding.GetEncoding(response.CharacterSet);
}
}
}
}
//得到返回的HTML
objhttpitem.Html = encoding.GetString(RawResponse);
//最后释放流
_stream.Close();
}
#endregion
}
catch (WebException ex)
{
//这里是在发生异常时返回的错误信息
objhttpitem.Html = "String Error";
response = (HttpWebResponse)ex.Response;
}
if (objhttpitem.IsToLower)
{
objhttpitem.Html = objhttpitem.Html.ToLower();
}
return objhttpitem.Html;
}
/// <summary>
/// 4.0以下.net版本取数据使用
/// </summary>
/// <param name="streamResponse">流</param>
private static MemoryStream GetMemoryStream(Stream streamResponse)
{
MemoryStream _stream = new MemoryStream();
int Length = 256;
Byte[] buffer = new Byte[Length];
int bytesRead = streamResponse.Read(buffer, 0, Length);
// write the required bytes
while (bytesRead > 0)
{
_stream.Write(buffer, 0, bytesRead);
bytesRead = streamResponse.Read(buffer, 0, Length);
}
return _stream;
}
/// <summary>
/// 为请求准备参数
/// </summary>
///<param name="objhttpItem">参数列表</param>
/// <param name="_Encoding">读取数据时的编码方式</param>
private void SetRequest(HttpItem objhttpItem)
{
// 验证证书
SetCer(objhttpItem);
// 设置代理
SetProxy(objhttpItem);
//请求方式Get或者Post
request.Method = objhttpItem.Method;
request.Timeout = objhttpItem.Timeout;
request.ReadWriteTimeout = objhttpItem.ReadWriteTimeout;
//Accept
request.Accept = objhttpItem.Accept;
//ContentType返回类型
request.ContentType = objhttpItem.ContentType;
//UserAgent客户端的访问类型,包括浏览器版本和操作系统信息
request.UserAgent = objhttpItem.UserAgent;
// 编码
SetEncoding(objhttpItem);
//设置Cookie
SetCookie(objhttpItem);
//来源地址
request.Referer = objhttpItem.Referer;
//是否执行跳转功能
request.AllowAutoRedirect = objhttpItem.Allowautoredirect;
//设置Post数据
SetPostData(objhttpItem);
//设置最大连接
if (objhttpItem.Connectionlimit > 0)
{
request.ServicePoint.ConnectionLimit = objhttpItem.Connectionlimit;
}
}
private void SetCer(HttpItem objhttpItem)
{
if (!string.IsNullOrEmpty(objhttpItem.CerPath))
{
//这一句一定要写在创建连接的前面。使用回调的方法进行证书验证。
ServicePointManager.ServerCertificateValidationCallback =
new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult);
//初始化对像,并设置请求的URL地址
request = (HttpWebRequest)WebRequest.Create(GetUrl(objhttpItem.URL));
//创建证书文件
X509Certificate objx509 = new X509Certificate(objhttpItem.CerPath);
//添加到请求里
request.ClientCertificates.Add(objx509);
}
else
{
//初始化对像,并设置请求的URL地址
request = (HttpWebRequest)WebRequest.Create(GetUrl(objhttpItem.URL));
}
}
/// <summary>
/// 设置编码
/// </summary>
/// <param name="objhttpItem">Http参数</param>
private void SetEncoding(HttpItem objhttpItem)
{
if (string.IsNullOrEmpty(objhttpItem.Encoding) || objhttpItem.Encoding.ToLower().Trim() == "null")
{
//读取数据时的编码方式
encoding = null;
}
else
{
//读取数据时的编码方式
encoding = System.Text.Encoding.GetEncoding(objhttpItem.Encoding);
}
}
/// <summary>
/// 设置Cookie
/// </summary>
/// <param name="objhttpItem">Http参数</param>
private void SetCookie(HttpItem objhttpItem)
{
if (!string.IsNullOrEmpty(objhttpItem.Cookie))
{
//Cookie
request.Headers[HttpRequestHeader.Cookie] = objhttpItem.Cookie;
}
//设置Cookie
if (objhttpItem.CookieCollection != null)
{
request.CookieContainer = new CookieContainer();
request.CookieContainer.Add(objhttpItem.CookieCollection);
}
}
/// <summary>
/// 设置Post数据
/// </summary>
/// <param name="objhttpItem">Http参数</param>
private void SetPostData(HttpItem objhttpItem)
{
//验证在得到结果时是否有传入数据
if (!string.IsNullOrEmpty(objhttpItem.Postdata) && request.Method.Trim().ToLower().Contains("post"))
{
byte[] buffer = Encoding.Default.GetBytes(objhttpItem.Postdata);
request.ContentLength = buffer.Length;
request.GetRequestStream().Write(buffer, 0, buffer.Length);
}
}
/// <summary>
/// 设置代理
/// </summary>
/// <param name="objhttpItem">参数对象</param>
private void SetProxy(HttpItem objhttpItem)
{
if (string.IsNullOrEmpty(objhttpItem.ProxyUserName) && string.IsNullOrEmpty(objhttpItem.ProxyPwd) && string.IsNullOrEmpty(objhttpItem.ProxyIp))
{
//不需要设置
}
else
{
//设置代理服务器
WebProxy myProxy = new WebProxy(objhttpItem.ProxyIp, false);
//建议连接
myProxy.Credentials = new NetworkCredential(objhttpItem.ProxyUserName, objhttpItem.ProxyPwd);
//给当前请求对象
request.Proxy = myProxy;
//设置安全凭证
request.Credentials = CredentialCache.DefaultNetworkCredentials;
}
}
/// <summary>
/// 回调验证证书问题
/// </summary>
/// <param name="sender">流对象</param>
/// <param name="certificate">证书</param>
/// <param name="chain">X509Chain</param>
/// <param name="errors">SslPolicyErrors</param>
/// <returns>bool</returns>
public bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors)
{
// 总是接受
return true;
}
#endregion
#region 普通类型
/// <summary>
/// 传入一个正确或不正确的URl,返回正确的URL
/// </summary>
/// <param name="URL">url</param>
/// <returns>
/// </returns>
public static string GetUrl(string URL)
{
if (!(URL.Contains("http://") || URL.Contains("https://")))
{
URL = "http://" + URL;
}
return URL;
}
///<summary>
///采用https协议访问网络,根据传入的URl地址,得到响应的数据字符串。
///</summary>
///<param name="objhttpItem">参数列表</param>
///<returns>String类型的数据</returns>
public string GetHtml(HttpItem objhttpItem)
{
//准备参数
SetRequest(objhttpItem);
//调用专门读取数据的类
return GetHttpRequestData(objhttpItem);
}
#endregion
}
/// <summary>
/// Http请求参考类
/// </summary>
public class HttpItem
{
string _URL;
/// <summary>
/// 请求URL必须填写
/// </summary>
public string URL
{
get { return _URL; }
set { _URL = value; }
}
string _Method = "GET";
/// <summary>
/// 请求方式默认为GET方式
/// </summary>
public string Method
{
get { return _Method; }
set { _Method = value; }
}
int _Timeout = 100000;
/// <summary>
/// 默认请求超时时间
/// </summary>
public int Timeout
{
get { return _Timeout; }
set { _Timeout = value; }
}
int _ReadWriteTimeout = 30000;
/// <summary>
/// 默认写入Post数据超时间
/// </summary>
public int ReadWriteTimeout
{
get { return _ReadWriteTimeout; }
set { _ReadWriteTimeout = value; }
}
string _Accept = "text/html, application/xhtml+xml, */*";
/// <summary>
/// 请求标头值 默认为text/html, application/xhtml+xml, */*
/// </summary>
public string Accept
{
get { return _Accept; }
set { _Accept = value; }
}
string _ContentType = "text/html";
/// <summary>
/// 请求返回类型默认 text/html
/// </summary>
public string ContentType
{
get { return _ContentType; }
set { _ContentType = value; }
}
string _UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)";
/// <summary>
/// 客户端访问信息默认Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)
/// </summary>
public string UserAgent
{
get { return _UserAgent; }
set { _UserAgent = value; }
}
string _Encoding = string.Empty;
/// <summary>
/// 返回数据编码默认为NUll,可以自动识别
/// </summary>
public string Encoding
{
get { return _Encoding; }
set { _Encoding = value; }
}
string _Postdata;
/// <summary>
/// Post请求时要发送的Post数据
/// </summary>
public string Postdata
{
get { return _Postdata; }
set { _Postdata = value; }
}
string _Cookie = string.Empty;
/// <summary>
/// 请求时的Cookie
/// </summary>
public string Cookie
{
get { return _Cookie; }
set { _Cookie = value; }
}
string _Referer = string.Empty;
/// <summary>
/// 来源地址,上次访问地址
/// </summary>
public string Referer
{
get { return _Referer; }
set { _Referer = value; }
}
string _CerPath = string.Empty;
/// <summary>
/// 证书绝对路径
/// </summary>
public string CerPath
{
get { return _CerPath; }
set { _CerPath = value; }
}
CookieCollection cookiecollection = null;
/// <summary>
/// Cookie对象集合
/// </summary>
public CookieCollection CookieCollection
{
get { return cookiecollection; }
set { cookiecollection = value; }
}
private Boolean isToLower = true;
/// <summary>
/// 是否设置为全文小写
/// </summary>
public Boolean IsToLower
{
get { return isToLower; }
set { isToLower = value; }
}
private Boolean allowautoredirect = true;
/// <summary>
/// 支持跳转页面,查询结果将是跳转后的页面
/// </summary>
public Boolean Allowautoredirect
{
get { return allowautoredirect; }
set { allowautoredirect = value; }
}
private int connectionlimit = 1024;
/// <summary>
/// 最大连接数
/// </summary>
public int Connectionlimit
{
get { return connectionlimit; }
set { connectionlimit = value; }
}
private string proxyusername = string.Empty;
/// <summary>
/// 代理Proxy 服务器用户名
/// </summary>
public string ProxyUserName
{
get { return proxyusername; }
set { proxyusername = value; }
}
private string proxypwd = string.Empty;
/// <summary>
/// 代理 服务器密码
/// </summary>
public string ProxyPwd
{
get { return proxypwd; }
set { proxypwd = value; }
}
private string proxyip = string.Empty;
/// <summary>
/// 代理 服务IP
/// </summary>
public string ProxyIp
{
get { return proxyip; }
set { proxyip = value; }
}
private ResultType resulttype = ResultType.String;
/// <summary>
/// 设置返回类型String和Byte
/// </summary>
public ResultType ResultType
{
get { return resulttype; }
set { resulttype = value; }
}
private string html = string.Empty;
/// <summary>
/// 返回的String类型数据
/// </summary>
public string Html
{
get { return html; }
set { html = value; }
}
private byte[] resultbyte = null;
/// <summary>
/// 返回的Byte数组
/// </summary>
public byte[] ResultByte
{
get { return resultbyte; }
set { resultbyte = value; }
}
}
/// <summary>
/// 返回类型
/// </summary>
public enum ResultType
{
String ,//表示只返回字符串
Byte//表示返回字符串和字节流
}
}
2.在webBrowser中取Cookie的方法
在很多情况下我们会使用间进程的webBrowser去实现一些网页的请求和抓去,这个时候有部分网页是取不到Cookie的,那怎么办呢?下面我提供一个方法,应该99%的都能取到,
[C#] 纯文本查看 复制代码 //取当前webBrowser登录后的Cookie值
[DllImport("wininet.dll", CharSet = CharSet.Auto, SetLastError = true)]
static extern bool InternetGetCookieEx(string pchURL, string pchCookieName, StringBuilder pchCookieData, ref int pcchCookieData, int dwFlags, object lpReserved);
//取出Cookie,当登录后才能取
private static string GetCookieString(string url)
{
// Determine the size of the cookie
int datasize = 256;
StringBuilder cookieData = new StringBuilder(datasize);
if (!InternetGetCookieEx(url, null, cookieData, ref datasize, 0x00002000, null))
{
if (datasize < 0)
return null;
// Allocate stringbuilder large enough to hold the cookie
cookieData = new StringBuilder(datasize);
if (!InternetGetCookieEx(url, null, cookieData, ref datasize, 0x00002000, null))
return null;
}
return cookieData.ToString();
}
3.HttpHelper类使用方法
使用方法,本来不应该写的因为这个本来就是一个类,直接调用就行了。
考虑到确实有不少人问怎么用,怎么调用,我想了想还是写几条例子吧
不管在调用任何方法之前都需要New一下
[C#] 纯文本查看 复制代码 httphelper objhttp = new httphelper();
1.比如我要访问www.sufeinet.com
[C#] 纯文本查看 复制代码 //下面是个例子大家可以看一下
HttpHelper http = new HttpHelper();
HttpItem item = new HttpItem()
{
URL = "http://www.sufeinet.com",//URL 必需项
Encoding = "gbk",//编码格式(utf-8,gb2312,gbk) 可选项 默认类会自动识别
Method = "get",//URL 可选项 默认为Get
Timeout = 100000,//连接超时时间 可选项默认为100000
ReadWriteTimeout = 30000,//写入Post数据超时时间 可选项默认为30000
IsToLower = false,//得到的HTML代码是否转成小写 可选项默认转小写
Cookie = "",//字符串Cookie 可选项
UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",//用户的浏览器类型,版本,操作系统 可选项有默认值
Accept = "text/html, application/xhtml+xml, */*",// 可选项有默认值
ContentType = "text/html",//返回类型 可选项有默认值
//Referer = "http://www.sufeinet.com",//来源URL 可选项
//Allowautoredirect = true,//是否根据301跳转 可选项
//CerPath = "d:\\123.cer",//证书绝对路径 可选项不需要证书时可以不写这个参数
//Connectionlimit = 1024,//最大连接数 可选项 默认为1024
//Postdata = "username=sufei&pwd=sufeinet.com",//Post数据 可选项GET时不需要写
//ProxyIp = "192.168.1.105",//代理服务器ID 可选项 不需要代理 时可以不设置这三个参数
//ProxyPwd = "123456",//代理服务器密码 可选项
//ProxyUserName = "administrator",//代理服务器账户名 可选项
ResultType = ResultType.Byte,//返回数据类型,是Byte还是String
};
//得到HTML代码
string html = http.GetHtml(item);
//取出返回的Cookie
string cookie = item.Cookie;
//得到返回的Byte数组
byte[] bytelist = item.ResultByte;
这只是个 例子,如果大家想看更多更具体的合作请在论坛查询其它文章,只要是Http访问相关的我使用的全部是这个类。
4.HttpHelper设置和获取Cookie的方法
下面是一下关于取Cookie和设置Cookie的例子吧
[C#] 纯文本查看 复制代码 HttpHelper objhttp = new HttpHelper();
//参数对象
HttpItem objHttpItem = new HttpItem()
{
URL = "http://www.sufeinet.com",
Encoding = "gb2312",
Method = "GET",
};
//取Html
string text = objhttp.GetHtml(objHttpItem);
//参数对象
objHttpItem = new HttpItem()
{
URL = "http://www.sufeinet.com",
Postdata = "username=sufei&userpwd=123456",
Encoding = "gb2312",
Method = "POST",
Referer = "",
Cookie = objHttpItem.Cookie,
IsToLower = false//是否转成小写
};
//取Html
string html = objhttp.GetHtml(objHttpItem);
|
|