部分说明文档

部分说明文档

此博客仅为展示文档使用，为文档的一部分截取，详细可在项目文件夹中查看

C705团队代码说明文档

该文档为C705团队根据以前学霸项目Pipeline以及本团队所实现的所有代码的基础上进行注释说明所得。

C705团队主要修改的类为：OtherToHtml, DataMining, Denoising, WordSegment, GoogleTranslator, MainWindow

其他类大部分为以前团队所写代码，其中大部分注释为本团队标注，仅供参考用。

class OtherToHtml

    {

        public interface IcDocument

        {

            void TransformDocument();

        }

        public abstract class BaseDocument

        {

            /// <summary>

            /// 目標文件夾

            /// </summary>

            protected string TargetFolder;

            /// <summary>

            /// 原文件

            /// </summary>

            protected string source;

            /// <summary>

            /// 目標文件

            /// </summary>

            protected string Target;

            protected virtual void GetCurrentTarget()

            {

                if (!Directory.Exists(TargetFolder))

                {

                    Directory.CreateDirectory(TargetFolder);

                }

                FileInfo temp = new FileInfo(source);

                string fileName = temp.Name + ".html";

                Target = TargetFolder + @"" + fileName;

            }

            public BaseDocument(string TargetFolder, string source)

            {

                this.source = source;

                this.TargetFolder = TargetFolder;

                GetCurrentTarget();

            }

        }

        public class FactoryDocument

        {

            /// <summary>

            /// 得到操作的文檔

            /// </summary>

            /// <param name="TargetFolder">生成的文件夾</param>

            /// <param name="source">要讀取的文件</param>

            /// <returns></returns>

            public static IcDocument GetDocoment(string TargetFolder, string source)

            {

                FileInfo file = new FileInfo(source);

                IcDocument document = null;

                if (file.Exists)

                {

                    switch (Path.GetExtension(source).ToUpper())

                    {

                        case ".PDF":



                            document = new PdfDocument(TargetFolder, source);

                            break;

                    }

                }

                else

                {

                    MessageBox.Show("文件沒有找到");

                }

                return document;

            }

            internal static IcDocument GetDocoment(DirectoryInfo directoryInfo, string curItem)

            {

                throw new NotImplementedException();

            }

        }

        public class PdfDocument : BaseDocument, IcDocument

        {

            public PdfDocument(string TargetFolder, string source)

                : base(TargetFolder, source)

            {

            }

函数功能：将读到的pdf文件转化为txt文件

输入：要转化的文件的路径

输出：转化好的文件

            public void pdf2txt(FileInfo file)

            {

                PDDocument doc = PDDocument.load(file.FullName);

                PDFTextStripper pdfStripper = new PDFTextStripper();

                string text = pdfStripper.getText(doc);

                StreamWriter swPdfChange = new StreamWriter(Target, false, Encoding.GetEncoding(65001));

                swPdfChange.Write(text);

                swPdfChange.Close();

            }

                            函数功能：处理txt文件，清理转化失败的标识符，图片等乱码

                            输入：要处理的文件的路径

                            输出：处理好的文件

            public void handletxt()

            {

                String path = Target;

                String[] lines = File.ReadAllLines(path);

                List<String> list = new List<String>();

                foreach (String line in lines)

                {

                    if (line.Length > 4)//长度小于4的行，视为处理失败的行

                        list.Add(line);

                }

                lines = list.ToArray();

                File.WriteAllLines(path, lines);//将处理结果写回文件

            }

                            函数功能：处理pdf文件

                            输入：要处理的文件的路径

                            输出：已经处理过并且去除部分乱码的文件

            public void TransformDocument()

            {

                FileInfo pdffile = new FileInfo(source);

                if (pdffile.Exists)

                {

                    pdf2txt(pdffile);

                    handletxt();

                }

                else

                {

                    Console.WriteLine("The File is NOT Exist.");

                }

            }

        }

   }

}
相关阅读:
系统集群安装
 用ASP.net判断上传文件类型的三种方法
 C#中利用JQuery实现视频网站
 云计算和大数据
 c# Dictionary 中Keys.ToArray<>方法的细节测试
 DateTime compare
Dictionary的遍历和修改
 C# 键值对数据排序
 ant使用小结
 给我们的7句话
原文地址：https://www.cnblogs.com/C705/p/4226108.html

部分说明文档

此博客仅为展示文档使用，为文档的一部分截取，详细可在项目文件夹中查看

C705团队代码说明文档

class OtherToHtml