@站长苏飞 怎么运用正则把不同的保存进去 相同的不要 还有文章的内容怎么显示不出来了 我给注释掉了 [C#] 纯文本查看 复制代码 namespace CollectionNews
{
using System;
using System.IO;
using System.Net;
using System.Text;
using System.Threading;
using HtmlAgilityPack;
using NewLife.Log;
using NewLife.Web;
public class Collection
{
#region Fields
/// <summary>
/// 采集网站(安徽农网)
/// </summary>
public static string outFileName = ""; //生成的文件名
private string websiteUrl = "http://www.ahnw.gov.cn";
private Timer timer = null;
/// <summary>
/// 下载客户端
/// </summary>
private WebClientX webClient = new WebClientX(true, true);
#endregion
public void StartCollection()
{
this.timer = new Timer(this.CollectNews, null, 1000, 8 * 1000);
}
#region Public Method
public void CollectNews(object obj)
{
this.webClient.Credentials = CredentialCache.DefaultCredentials;
byte[] data = this.webClient.DownloadData(this.websiteUrl); // 下载网页
string msg = Encoding.Default.GetString(data); // 网页byte 转换成文字形式
HtmlDocument documnet = new HtmlDocument();
documnet.LoadHtml(msg); // 组装成网页格式
HtmlNode root = documnet.DocumentNode; // 根节点
string path = @"//div[@id=""NewsListPanel""]/ul/li"; // 匹配div
HtmlNodeCollection liList = root.SelectNodes(path); // 获取所有li节点
StringBuilder sb = new StringBuilder();
if (liList != null)
{
foreach (var li in liList)
{
//XTrace.WriteLine("----------------------新闻标题-----------------------------");
XTrace.WriteLine("新闻标题:{0}", li.InnerText.Replace("\r", string.Empty).Replace("\n", string.Empty).Trim());
sb.AppendLine(li.InnerText.Replace("\r", string.Empty).Replace("\n", string.Empty).Trim());
//XTrace.WriteLine("----------------------新闻标题-----------------------------");
//XTrace.WriteLine("----------------------新闻详细-----------------------------");
//XTrace.WriteLine("新闻详细:");
//HtmlNode anode = li.FirstChild;
//string href = this.websiteUrl + anode.Attributes["href"].Value;
//byte[] data2 = this.webClient.DownloadData(href);
//string detailMsg = Encoding.Default.GetString(data2);
//HtmlDocument detailHtml = new HtmlDocument();
//detailHtml.LoadHtml(detailMsg);
//HtmlNode detailRoot = detailHtml.DocumentNode;
//string detailPath = @"//div [@id=""content_show""]";
//HtmlNodeCollection detailli = detailRoot.SelectNodes(detailPath);
//foreach (HtmlNode node in liList)
//{
// XTrace.WriteLine(node.InnerText);
//}
//XTrace.WriteLine("----------------------新闻详细-----------------------------");
}
outFileName = "C:\\class\\" + "1111" + ".txt";
using (StreamWriter sw = new StreamWriter(outFileName,true))//将获取的内容写入文本
{
sw.WriteLine(sb.ToString());
}
}
}
}
#endregion
}
|