本帖最后由 xiaomu 于 2014-1-24 17:13 编辑
我稍稍优化了下。
[C#] 纯文本查看 复制代码 /// <summary>
/// 过滤器规则
/// </summary>
public enum Filter
{
/// <summary>
/// 过滤符号
/// </summary>
Symbol,
/// <summary>
/// 过滤空格
/// </summary>
Blank,
/// <summary>
/// 过滤符号与空格
/// </summary>
SymbolAndBlank,
/// <summary>
/// 不过滤
/// </summary>
None
}
[C#] 纯文本查看 复制代码 /// <summary>
/// 过滤文本
/// </summary>
/// <param name="text">文本</param>
/// <param name="filter">过滤规则</param>
/// <returns>返回文本</returns>
private static string FilterText(string text, Filter filter)
{
if (text == null || text == string.Empty) { return ""; }
text = text.Trim();
if (filter==Filter.Symbol)
{
text = System.Text.RegularExpressions.Regex.Replace(text, @"[^a-zA-Z0-9\u4e00-\u9fa5\s]", "");//剔除特殊符号
}
else if (filter == Filter.Blank)
{
text = System.Text.RegularExpressions.Regex.Replace(text, "([ ]+)", "");//剔除空格
}
else if (filter == Filter.SymbolAndBlank)
{
text = System.Text.RegularExpressions.Regex.Replace(text, @"[^a-zA-Z0-9\u4e00-\u9fa5\s]", "");//剔除特殊符号
text = System.Text.RegularExpressions.Regex.Replace(text, "([ ]+)", "");//剔除空格
}
return text;
}
[C#] 纯文本查看 复制代码 /// <summary>
/// 取中文文本的拼音
/// </summary>
/// <param name="text">编码为UTF8的文本</param>
/// <returns>返回中文文本的拼音</returns>
public static string GetPinyin(string text, Filter filter)
{
text = FilterText(text, filter);
StringBuilder sbPinyin = new StringBuilder();
for (var i = 0; i < text.Length; ++i)
{
string py = GetPinyin(text[i]);
if (py != "") sbPinyin.Append(py);
}
return sbPinyin.ToString().Trim();
}
|