• 统计文本单词个数,并个数大小按序排列 C#


    性能全部损耗在排序上,由于hashtable不能排序,所以只能借助其他来排序,  不过还好  一本60000字的书10几秒就能统计完.

    using System.IO;
    using System.Collections;//use hashtable
    using System.Text.RegularExpressions;

    namespace WindowsApplication1
    {
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
            }
            private void cmdStart_Click(object sender, EventArgs e)
            {
                string Pattern = @"\,|\.|\ |\n|\r|\?|\;|\:|\!|\(|\)|\042|\“|\”|\-|[\u4e00-\u9fa5]|[0-9]";   //匹配正值表达式 逗号,点号,空格,换行符,回车符,问号,,分号,,冒号,感叹号,,左括号,又括号,双引号,左双引号,右双引号,中文字符,数字
                string textstring = "";
                int j;
                Hashtable ht = new Hashtable();      //创建一个Hashtable实例
                //Hashtable ht = new Hashtable();      //创建一个Hashtable实例
                Regex regex = new Regex(Pattern);    //创建Regex实类
                textBox2.Text = "";
                //读取文件
                try
                {
                    StreamReader sr = new StreamReader(@textBox1.Text,System.Text.Encoding.GetEncoding("GB2312"));
                    textstring = sr.ReadToEnd();
                    sr.Close();
                }
                catch
                {
                    MessageBox.Show("请把test.txt文件拷贝到C:/");
                }
                //根据匹配正值表达式分割字符串
                string[] words = regex.Split(textstring);
                //单词加入哈希表
                foreach (string word in words)
                {
                    //textBox2.Text = textBox2.Text + "   " + i;
                    if (word != null && word != "")
                    {
                        if (ht.Contains(word))
                        {
                            j = Convert.ToInt32(ht[word]) + 1;
                            //ht.Remove(word);
                            //ht.Add(word, j);
                            ht[word]=j;
                        }
                        else
                        {
                            ht.Add(word, 1);
                        }
                    }
                }
                ////对哈希表排序
                ArrayList akeys = new ArrayList(ht.Keys);
                    //按字母顺序进行排序
                    //akeys.Sort() ;
                    //按字单词次数进行排序
                    string[] keyarray = new string[akeys.Count];
                    int[] valuearray = new int[akeys.Count];
                    int index=0;
                    //将key/value分别赋给数祖
                    foreach (string skey in akeys)
                    {
                        keyarray[index] = Convert.ToString(skey);
                        valuearray[index] = Convert.ToInt32(ht[skey]);
                        index++;
                    }
                    for(int a=0;a<akeys.Count;a++)
                    {
                        for (int b = a+1; b < akeys.Count; b++)
                        {
                            if (valuearray[a]>valuearray[b])
                            {
                                valuearray[a] ^= valuearray[b];
                                valuearray[b] ^= valuearray[a];
                                valuearray[a] ^= valuearray[b];
                                string tempstr = keyarray[a];
                                keyarray[a] = keyarray[b];
                                keyarray[b] = tempstr;
                            }
                        }
                    }
                //显示
                    //按字母顺序进行排序显示
                    //foreach(string skey in akeys)
                    //{
                    //    textBox2.Text = textBox2.Text + skey + "\t" + ht[skey] + "\r\n";
                    //}
                    //按字单词次数进行排序显示
                    for (int a = 0; a < akeys.Count; a++)
                    {
                        textBox2.Text = textBox2.Text + keyarray[a] + "\t" + valuearray[a] + "\r\n";
                    }
            }
        }
    }

  • 相关阅读:
    将ObservableCollection(Of T) 数据 绑定到 Canvas
    StylusNodeJS下构建更富表现力/动态/健壮的CSS
    string引用类型
    c#之预处理器指令
    c#继承与构造函数
    c#构造函数
    c#之扩展方法
    c#之结构
    继承与重写
    dedict: 很有新意的图形化英汉字典
  • 原文地址:https://www.cnblogs.com/zhangdongdong/p/3037635.html
Copyright © 2020-2023  润新知