|
楼主 |
发表于 2015-7-4 20:16:44
|
显示全部楼层
/// <summary>
/// 测试.
/// </summary>
public void Test()
{
string pageURL = "http://www.google.com.hk/search?hl=zh-CN&source=hp&q=%E5%8D%9A%E6%B1%87%E6%95%B0%E7%A0%81&aq=f&aqi=g2&aql=&oq=&gs_rfai=";
Uri uri = new Uri(pageURL);
string queryString = uri.Query;
NameValueCollection col = GetQueryString(queryString);
string searchKey = col["q"];
//结果 searchKey = "博汇数码"
}
/// <summary>
/// 将查询字符串解析转换为名值集合.
/// </summary>
/// <param name="queryString"></param>
/// <returns></returns>
public static NameValueCollection GetQueryString(string queryString)
{
return GetQueryString(queryString, null, true);
}
/// <summary>
/// 将查询字符串解析转换为名值集合.
/// </summary>
/// <param name="queryString"></param>
/// <param name="encoding"></param>
/// <param name="isEncoded"></param>
/// <returns></returns>
public static NameValueCollection GetQueryString(string queryString, Encoding encoding, bool isEncoded)
{
queryString = queryString.Replace("?", "");
NameValueCollection result = new NameValueCollection(StringComparer.OrdinalIgnoreCase);
if (!string.IsNullOrEmpty(queryString))
{
int count = queryString.Length;
for (int i = 0; i < count; i++)
{
int startIndex = i;
int index = -1;
while (i < count)
{
char item = queryString[i];
if (item == '=')
{
if (index < 0)
{
index = i;
}
}
else if (item == '&')
{
break;
}
i++;
}
string key = null;
string value = null;
if (index >= 0)
{
key = queryString.Substring(startIndex, index - startIndex);
value = queryString.Substring(index + 1, (i - index) - 1);
}
else
{
key = queryString.Substring(startIndex, i - startIndex);
}
if (isEncoded)
{
result[MyUrlDeCode(key, encoding)] = MyUrlDeCode(value, encoding);
}
else
{
result[key] = value;
}
if ((i == (count - 1)) && (queryString[i] == '&'))
{
result[key] = string.Empty;
}
}
}
return result;
}
/// <summary>
/// 解码URL.
/// </summary>
/// <param name="encoding">null为自动选择编码</param>
/// <param name="str"></param>
/// <returns></returns>
public static string MyUrlDeCode(string str, Encoding encoding)
{
if (encoding == null)
{
Encoding utf8 = Encoding.UTF8;
//首先用utf-8进行解码
string code = HttpUtility.UrlDecode(str.ToUpper(), utf8);
//将已经解码的字符再次进行编码.
string encode = HttpUtility.UrlEncode(code, utf8).ToUpper();
if (str == encode)
encoding = Encoding.UTF8;
else
encoding = Encoding.GetEncoding("gb2312");
}
return HttpUtility.UrlDecode(str, encoding);
}
复制代码
说明
1.对于下面的编码方式中,Baidu是使用gb2312,而Google是使用UTF8的,所以这些编码如何解释呢?解决办法就是如MyUrlDeCode方法所写的,把编码进行一次解码,再把解码后的字符串进行编码,再对字符串进行比较,如果是相同的,那就是使用了这种编码的。还要注意进行ToUpper字符串。
2.
//博汇数码.
string baidu = HttpUtility.UrlDecode("wd=%B2%A9%BB%E3%CA%FD%C2%EB");
string google = HttpUtility.UrlDecode("q=%E5%8D%9A%E6%B1%87%E6%95%B0%E7%A0%81");
3.关于return GetQueryString(queryString, null, true); 如果你传入的是null,那就是表示你无法确认编码是gb2312还是UTF8,当你确认编码的,可以直接传入编码类型,比如return GetQueryString(queryString, Encoding.UTF8, true);
4.需要注意queryString = queryString.Replace("?", "");把?给过滤掉。
5.%B2%A9%BB%E3%CA%FD%C2%EB = %B2%A9 %BB%E3 %CA%FD %C2%EB(博汇数码gb2312)
6.%E5%8D%9A%E6%B1%87%E6%95%B0%E7%A0%81 = %E5%8D%9A %E6%B1%87 %E6%95%B0 %E7%A0%81(博汇数码utf-8)
|
|