using System; using System.Collections; using System.Linq; using System.Text; using System.IO; using System.Text.RegularExpressions; class getFiles { public static void getWordList(string pathName, ref Hashtable wordList) //getWordList:从文本文件中统计词频保存在Hashtable中 { StreamReader sr = new StreamReader(pathName); string line; int num = ; line = sr.ReadLine(); //按行读取 while (line != null) { num++; MatchCollection mc; Regex rg = new Regex("[A-Za-z-]+"); //用正则表达式匹配单词 mc = rg.Matches(line); for (int i = ; i < mc.Count; i++) { string mcTmp = mc[i].Value.ToLower(); //大小写不敏感 if (mcTmp.Length >= ) { if (!wordList.ContainsKey(mcTmp)) //第一次出现则添加为Key { wordList.Add(mcTmp, ); } else //不是第一次出现则Value加 { int value = (int)wordList[mcTmp]; value++; wordList[mcTmp] = value; } } else continue; } line = sr.ReadLine(); } sr.Close(); } public static void getWordListExt(string pathName, ref Hashtable wordList) //getWordList的扩展模式 { StreamReader sr = new StreamReader(pathName); string line; int num = ; line = sr.ReadLine(); while (line != null) { num++; MatchCollection mc; Regex rg = new Regex("[A-Za-z-]+"); mc = rg.Matches(line); for (int i = ; i < mc.Count; i++) { string mcTmp = mc[i].Value.ToLower(); if (mcTmp.Length >= ) //单词的最小长度为,如a,ab,a,等…… { Regex rgx = new Regex("[-]+$"); //去掉不以数字开头的单词的尾部的数字 //如:win、win替换为win,winwin、则不变 if (mcTmp[] < '' || mcTmp[] >'') { mcTmp = rgx.Replace(mcTmp, ""); } if (!wordList.ContainsKey(mcTmp)) { wordList.Add(mcTmp, ); } else { int value = (int)wordList[mcTmp]; value++; wordList[mcTmp] = value; } } else continue; } line = sr.ReadLine(); } sr.Close(); } public static void getFilesDir(string pathName, ref Hashtable wordList, int extFlag) //getFilesDir:遍历目录中所以子目录及文件的函数 { string[] subFiles = Directory.GetFiles(pathName); //获取当前目录中文件的路径及名称 foreach (string subFile in subFiles) { string fileExt = Path.GetExtension(subFile); if (fileExt == ".txt" || fileExt == ".cpp" || fileExt == ".cs" || fileExt == ".h") //判断扩展名,找出指定的文本文件 { Console.WriteLine(subFile); try { if (extFlag == ) //判断是否为扩展模式 { getWordList(subFile, ref wordList); } else { getWordListExt(subFile, ref wordList); } } catch (Exception e) { Console.WriteLine(e.Message.ToString()); } } else //Console.WriteLine("不是指定类型的文本文件!"); continue; } string[] subDirs = Directory.GetDirectories(pathName); //获取当前目录的子目录 foreach (string subDir in subDirs) { getFilesDir(subDir, ref wordList, extFlag); //递归遍历子目录 } } public static void Main(string[] args) { string tmp; string pathName = ""; int i, j,valueTmp,extFlag = ; //tmp,i,j,valueTmp为排序参数,extFlag为扩展模式标记 Hashtable wordList = new Hashtable(); if (args.Length == ) //判断参数长度,若为,则不是扩展模式 { pathName = args[]; extFlag = ; } else if (args.Length == && args[] == "-e") //若参数长度为,且第一个参数为“-e”,则为扩展模式 { pathName = args[]; extFlag = ; } else Console.WriteLine("参数输入错误!"); try { if (Directory.Exists(pathName)) //判断输入的路径是否存在 { getFilesDir(pathName, ref wordList, extFlag); StreamWriter sw = new StreamWriter(pathName + @"\ypfei.txt"); ArrayList keysList = new ArrayList(wordList.Keys); keysList.Sort(); //对Hashtable中的Keys按字母序排列 //以下对Keys(单词)按Values(次数)进行插入排序 //由于插入排序是【稳定排序】,所以相同次数的单词依旧是字母序 for (i = ; i < keysList.Count; i++) { tmp = keysList[i].ToString(); valueTmp = (int)wordList[keysList[i]]; for (j = i; j > && valueTmp > (int)wordList[keysList[j - ]]; j--) { keysList[j] = keysList[j - ]; } keysList[j] = tmp; } //最后把结果循环输出到TXT文件中 for (i = ; i < keysList.Count; i++) { Console.WriteLine("{} {}",keysList[i],wordList[keysList[i]]); sw.WriteLine("<{}>:{}", keysList[i], wordList[keysList[i]]); } sw.Close(); } else Console.WriteLine("目录不存在!"); } catch (Exception e) { Console.WriteLine(e.Message.ToString()); } }