"
先创建一个类:
///
/// 字符串分析
///
interface IStringAna
{
///
/// key:文本包含的汉字或英文单词,value:汉字或英文单词出现的次数
///
///
/// //代码效果参考:https://v.youku.com/v_show/id_XNjQwNjg1MjE0MA==.html
key:文本包含的汉字或英文单词,value:汉字或英文单词出现的次数Dictionary[span style=""color: rgba(0, 0, 255, 1)"">string, int
}
class StringAna : IStringAna
{
public Dictionary[span style=""color: rgba(0, 0, 255, 1)"">string, int
{
var words = new Listspan style=""color: rgba(0, 0, 255, 1)"">string</span;
var dict = new Dictionaryspan style=""color: rgba(0, 0, 255, 1)"">string, int</span;
byte【】 bts = Encoding.Unicode.GetBytes(text);
//foreach (var bt in bts)
//{
// //代码效果参考:https://v.youku.com/v_show/id_XNjQwMDM5NzIyMA==.html
Console.Write(string.Format(""{0} "",bt));//}
//Console.WriteLine();
var pointer = 0;
var i = 0;
while (i <= bts.Length-2)
{
byte【】 tmp;
if (bts【i + 1】 == 0 && bts【i】 != 32)
{
pointer = i;
while (pointer + 2 < bts.Length && bts【pointer +2】 != 32 && bts【pointer + 2+1】 == 0)
{
pointer += 2;
}
var len = pointer+2 - i;
tmp = new byte【len】;
Array.Copy(bts, i, tmp, 0, len);
i = pointer+2;
}
else if (bts【i】 == 32 && bts【i + 1】 == 0)
{
i += 2;
continue;
}
else
{
tmp = new byte【】 { bts【i】, bts【i + 1】 };
i += 2;
}
var word = Bytes2Word(tmp);
words.Add(word);
Put(dict, Bytes2Word(tmp));
}
return dict;
}
string Bytes2Word(byte【】 bytes)
{
return Encoding.Unicode.GetString(bytes);
}
void Put(Dictionary[span style=""color: rgba(0, 0, 255, 1)"">string,int
{
int value;
if(dict.TryGetValue(word,out value))
{
dict【word】 = value + 1;
}
else
{
dict【word】 = 1;
}
}
}
实现接口:
var text = GetTestText();//生成测试文本
Console.WriteLine(text);
IStringAna ana = new StringAna();
var sp = Stopwatch.StartNew();
var rlt = ana.Ana(text);
Console.WriteLine($""Elapsed:{sp.ElapsedTicks}"");
if (rlt == null)
{
Console.WriteLine(""error"");
}
else
{
foreach(var item in rlt)
{
Console.WriteLine($""{item.Key}:{item.Value}"");
}
"