|
我本来是搞php开发的,但是来了公司后需要做一个百度搜索的结果分析软件,我开始的时候已经用php开发玩了,但是后面在使用的过程中才发现php做的网页版的,查询起来特别不方便,所以后来才决定自学下C#,搞搞桌面开发,到今天自学C#差不多有快一个月的时间了。下面说下我主要做的东西
首先是软件的主界面:由于是给公司内部使用的,所以我对软件做了加密处理,还有注册码的添加。
下面就是主要的百度搜索结果分析的功能,我之前自己再网上拔下来不少代码,然后自己拼凑到一起,已经开发完成了一个查词的功能,但是做完才发现这东西必须要用多线程来处理,要不然界面就直接卡死了。但是多线程网上可不像我之前看基础教程那样有视频,所以自学了快一周了也没有搞明白。
下面是我主要的代码,里面请求HTTP网页信息的代码是我自己从网上拔下来,然后自己改吧改吧的。
/// <summary>
/// 搜索处理
/// </summary>
/// <param name="url">搜索网址</param>
/// <param name="Chareset">编码</param>
public string search(string url, string Chareset)
{
string yinqing = (string)comboBoxYinqing.SelectedItem;
string resulthtml = "";
System.GC.Collect();
//设置系统默认的连接数
System.Net.ServicePointManager.DefaultConnectionLimit = 1000;
string[] arr = new string[]{"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Win64; x64; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)"};
Random rd = new Random();//创建随机数的对象
rd.Next(arr.Length);// 是只取出来的数字 是在 0到数组arr长度之间的任意数字
string UserAgent = "";
if (yinqing == "百度手机搜索引擎移动网络")
{
UserAgent = "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3 like Mac OS X; de-de) AppleWebKit/533.17.9 (KHTML, like Gecko) Mobile/8F190";
}
else
{
UserAgent = arr[rd.Next(arr.Length)];// 把随机抽出来的索引所对应数组的值赋给textBox3文本
}
HttpState result = new HttpState();
long time = CommonHelper.ConvertDateTimeInt(DateTime.Now);
//Uri uri = new Uri(url + "&yfsj =" + time);
Uri uri = new Uri(url);
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(uri);
//System.Net.WebProxy proxy = new WebProxy("180.97.33.67", 80);
//System.Net.WebProxy proxy = new WebProxy("192.168.1.1", 80);
//myHttpWebRequest.Proxy = proxy;
//myHttpWebRequest.Headers.Add("Accept-Encoding", "gzip, deflate");
//myHttpWebRequest.Headers.Set("Content-Encoding", "gzip");
//myHttpWebRequest.Headers.Add("Accept-Language: zh-cn;q=0.5");
//myHttpWebRequest.Headers.Add("UA-CPU: x86");
myHttpWebRequest.ServicePoint.Expect100Continue = false;
myHttpWebRequest.ServicePoint.UseNagleAlgorithm = false;//是否使用 Nagle 不使用 提高效率
myHttpWebRequest.ServicePoint.ConnectionLimit = 65500;//最大连接数
myHttpWebRequest.AllowWriteStreamBuffering = false;//数据是否缓冲 false 提高效率
myHttpWebRequest.UseDefaultCredentials = true;
myHttpWebRequest.ContentType = "text/html";
myHttpWebRequest.UserAgent = UserAgent;
myHttpWebRequest.Method = "GET";
myHttpWebRequest.Accept = " text/html, */*; q=0.01";
//myHttpWebRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
//myHttpWebRequest.ContentLength = 100000000;//数据出错了,太大了
//保持连接
myHttpWebRequest.KeepAlive = false;
myHttpWebRequest.ProtocolVersion = HttpVersion.Version10;
myHttpWebRequest.Host = "www.baidu.com";
myHttpWebRequest.Referer = myHttpWebRequest.Address.AbsoluteUri;
//myHttpWebRequest.CookieContainer = new CookieContainer();
myHttpWebRequest.CookieContainer = baiduwebrequest.CookieContainer;
//超时设置
myHttpWebRequest.Timeout = 1 * 30 * 1000;//设置超时3秒
HttpWebResponse response = null;
try
{
System.GC.Collect();
response = (HttpWebResponse)myHttpWebRequest.GetResponse();
// 从 ResponseStream 中读取HTML源码并格式化 add by cqp
result.Html = readResponseStream(response, Chareset);
result.CookieContainer = myHttpWebRequest.CookieContainer;
//查询完成后关闭连接
//myHttpWebRequest = null;
//response.Close();
//response = null;
resulthtml = result.Html;
return resulthtml;
}
catch (Exception ex)
{
//MessageBox.Show("网络连接出现问题,请检查网络" + ex.ToString());
//Application.DoEvents();
toolStripStatusLabelTishi.Text = "提示:网络连接出现问题,请检查网络,正在重试!或者你可以更换ip";
if (myHttpWebRequest != null)
{
myHttpWebRequest.Abort();
myHttpWebRequest = null;
}
//search(url, "utf-8");
return ex.ToString();
}
}
public string readResponseStream(HttpWebResponse response, string Chareset)
{
string result = "";
System.IO.Compression.DeflateStream responseStream = new System.IO.Compression.DeflateStream(response.GetResponseStream(), System.IO.Compression.CompressionMode.Decompress);
using (StreamReader responseReader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(Chareset)))
{
result = formatHTML(responseReader.ReadToEnd());
responseReader.Close();
responseReader.Dispose();
response.Close();
}
return result;
}
/// <summary>
/// 描述:格式化网页源码
///
/// </summary>
/// <param name="htmlContent"></param>
/// <returns></returns>
public string formatHTML(string htmlContent)
{
string result = "";
result = htmlContent.Replace("»", "").Replace(" ", "")
.Replace("©", "").Replace("\r", "").Replace("\t", "")
.Replace("\n", "").Replace("&", "").Replace("\r\n", "")
.Replace("<em>", "").Replace("</em>", "")
.Replace("<EM>", "").Replace("</EM>", "");
result = Regex.Replace(result, "<script.*?</script>", "");
result = Regex.Replace(result, "<style.*?>.*?</style>", "");
result = Regex.Replace(result, "<a.*?>", "");
result = Regex.Replace(result, "<img.*?>", "");
return result;
}
|
|