|
程序在运行的时候会有图片查看是损坏,然后运行完毕所有图片都没有了。。。
[code=csharp]
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Net;
using System.Text;
using System.Collections;
using System.Text.RegularExpressions;
using System.Threading;
namespace TacCs2013.Caiji
{
public partial class Caiji : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
if (!IsPostBack)
{
}
}
protected void btnOK_Click(object sender, EventArgs e)
{
// 取得输入的URL地址
string PageUrl = txtUrl.Text;
WebClient wc = new WebClient();
//把已登录的用户或正被模拟的用户的用户凭据赋给用于对向Internet资源的请求进行身份验证的网络凭据
wc.Credentials = CredentialCache.DefaultCredentials;
///从指定的URI下载数据
Byte[] pageData = wc.DownloadData(PageUrl);
//把下载到的字符型数组数据转换成字符串类型
// lblc.Text = Encoding.UTF8.GetString(pageData);
string sHTML = Encoding.UTF8.GetString(pageData);
//取得网站路径
ArrayList alURL = GetHyperLinks(sHTML);//取得所有网站链接
string sUrl = "";//网站链接路径
int alurlcount = alURL.Count;//共有多少链接路径。创建多少个线程
countUl.Text = "共有" + alurlcount + "条路径。";
Thread[] thurl = new Thread[alurlcount];
string urlName = "";
for (int i = 0; i < alURL.Count; i++)
{
sUrl = alURL.ToString();
urlName += "||" + sUrl;
thurl = new Thread(new ParameterizedThreadStart(delegate { images(htmls(sUrl), sUrl); }));
thurl.Start();
}
lblurlname.Text = urlName;
}
/// <summary>
/// 取得网站源码
/// </summary>
/// <param name="urlName">要获取的网站</param>
/// <returns>返回源码</returns>
public string htmls(string urlName)
{
//取得输入的URL地址
string PageUrl = txtUrl.Text;
WebClient wc = new WebClient();
//把已登录的用户或正被模拟的用户的用户凭据赋给用于对向Internet资源的请求进行身份验证的网络凭据
wc.Credentials = CredentialCache.DefaultCredentials;
///从指定的URI下载数据
Byte[] pageData = wc.DownloadData(PageUrl);
//把下载到的字符型数组数据转换成字符串类型
// lblc.Text = Encoding.UTF8.GetString(pageData);
string sHTML = Encoding.UTF8.GetString(pageData);
return sHTML;
}
/// <summary>
/// 取得网站图片
/// </summary>
/// <param name="shtml">网站源码</param>
/// <param name="PageUrl">当前网站地址</param>
public void images(string sHTML, string PageUrl)
{
//取得网站图片路径
string imageurl = "";
string[] Simages = GetHtmlImageUrlList(sHTML);
int cg = 0;//成功多少张图片
string fromURL = "";
lblc.Text = "共有:" + Simages.Length + "张图片。";
WebClient myWebClient = new WebClient();
string savePath = "";//图片路径
string filetac = @"D:\\caiji\\";//文件路径
for (int i = 0; i < Simages.Length; i++)
{
imageurl = Simages.ToString();
//存入图片
fromURL = "http://www.baidu.com/img/lm.gif";
if (imageurl.Length > 7)
{
if (imageurl.ToString().Substring(0, 7) != "http://")
{
if (imageurl.Substring(0, 2) == "..")
imageurl = imageurl.Substring(2, imageurl.Length - 2);
else if (imageurl.Substring(0, 1) != "/")
imageurl = "/" + imageurl;
fromURL = PageUrl + imageurl;
}
else
fromURL = imageurl;
}
else
{
fromURL = imageurl;
}
try
{
string fileName = imageurl.Substring(imageurl.LastIndexOf("/"));
fileName = new DateTime().Millisecond + fileName.Substring(1, fileName.Length - 1);
savePath = Server.MapPath(fileName);
cg++;
myWebClient.DownloadFile(fromURL, filetac + fileName);
}
catch (Exception ex)
{
continue;
}
finally
{
myWebClient.Dispose();
}
}
lblcg.Text = "成功了:" + cg + "张图片。其中" + (Simages.Length - cg) + "张图片路径不符。跳出了下载!";
lblImageURL.Text = "图片路径为:" + filetac;
}
//遍历网站的链接
public ArrayList GetHyperLinks(string htmlCode)
{
ArrayList al = new ArrayList();
string strRegex = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";
Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
MatchCollection m = r.Matches(htmlCode);
for (int i = 0; i <= m.Count - 1; i++)
{
bool rep = false;
string strNew = m.ToString();
// 过滤重复的URL
foreach (string str in al)
{
if (strNew == str)
{
rep = true;
break;
}
}
if (!rep) al.Add(strNew);
}
al.Sort();
return al;
}
/// <summary>
/// 取得HTML中所有图片的 URL。
/// </summary>
/// <param name="sHtmlText">HTML代码</param>
/// <returns>图片的URL列表</returns>
public static string[] GetHtmlImageUrlList(string sHtmlText)
{
// 定义正则表达式用来匹配 img 标签
Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
// 搜索匹配的字符串
MatchCollection matches = regImg.Matches(sHtmlText);
int i = 0;
string[] sUrlList = new string[matches.Count];
// 取得匹配项列表
foreach (Match match in matches)
sUrlList[i++] = match.Groups["imgUrl"].Value;
return sUrlList;
}
//取得网站迅雷下载连接
public static string[] GetHtmlXunlei(string sHtmlText)
{
// 定义正则表达式用来匹配 img 标签
Regex regImg = new Regex(@"/^thunder:\/\//", RegexOptions.IgnoreCase);
// 搜索匹配的字符串
MatchCollection matches = regImg.Matches(sHtmlText);
int i = 0;
string[] sUrlList = new string[matches.Count];
// 取得匹配项列表
foreach (Match match in matches)
sUrlList[i++] = match.Groups["imgUrl"].Value;
return sUrlList;
}
}
}
[/code]
会不会是我线程调用多了?
求解决。。。。。
|
|