• 部分说明文档


    此博客仅为展示文档使用,为文档的一部分截取,详细可在项目文件夹中查看

    C705团队代码说明文档

    该文档为C705团队根据以前学霸项目Pipeline以及本团队所实现的所有代码的基础上进行注释说明所得。

    C705团队主要修改的类为:OtherToHtml, DataMining, Denoising, WordSegment, GoogleTranslator, MainWindow

    其他类大部分为以前团队所写代码,其中大部分注释为本团队标注,仅供参考用。

    class OtherToHtml

        {

            public interface IcDocument

            {

                void TransformDocument();

            }

            public abstract class BaseDocument

            {

                /// <summary>

                /// 目標文件夾

                /// </summary>

                protected string TargetFolder;

                /// <summary>

                /// 原文件

                /// </summary>

                protected string source;

                /// <summary>

                /// 目標文件

                /// </summary>

                protected string Target;

                protected virtual void GetCurrentTarget()

                {

                    if (!Directory.Exists(TargetFolder))

                    {

                        Directory.CreateDirectory(TargetFolder);

                    }

                    FileInfo temp = new FileInfo(source);

                    string fileName = temp.Name + ".html";

                    Target = TargetFolder + @"" + fileName;

                }

                public BaseDocument(string TargetFolder, string source)

                {

                    this.source = source;

                    this.TargetFolder = TargetFolder;

                    GetCurrentTarget();

                }

            }

            public class FactoryDocument

            {

                /// <summary>

                /// 得到操作的文檔

                /// </summary>

                /// <param name="TargetFolder">生成的文件夾</param>

                /// <param name="source">要讀取的文件</param>

                /// <returns></returns>

                public static IcDocument GetDocoment(string TargetFolder, string source)

                {

                    FileInfo file = new FileInfo(source);

                    IcDocument document = null;

                    if (file.Exists)

                    {

                        switch (Path.GetExtension(source).ToUpper())

                        {

                            case ".PDF":

                          

                                document = new PdfDocument(TargetFolder, source);

                                break;

                        }

                    }

                    else

                    {

                        MessageBox.Show("文件沒有找到");

                    }

                    return document;

                }

                internal static IcDocument GetDocoment(DirectoryInfo directoryInfo, string curItem)

                {

                    throw new NotImplementedException();

                }

            }

            public class PdfDocument : BaseDocument, IcDocument

            {

                public PdfDocument(string TargetFolder, string source)

                    : base(TargetFolder, source)

                {

                }

    函数功能:将读到的pdf文件转化为txt文件

    输入:要转化的文件的路径

    输出:转化好的文件

                public void pdf2txt(FileInfo file)

                {

                    PDDocument doc = PDDocument.load(file.FullName);

                    PDFTextStripper pdfStripper = new PDFTextStripper();

                    string text = pdfStripper.getText(doc);

                    StreamWriter swPdfChange = new StreamWriter(Target, false, Encoding.GetEncoding(65001));

                    swPdfChange.Write(text);

                    swPdfChange.Close();

                }

                                函数功能:处理txt文件,清理转化失败的标识符,图片等乱码

                                输入:要处理的文件的路径

                                输出:处理好的文件

                public void handletxt()

                {

                    String path = Target;

                    String[] lines = File.ReadAllLines(path);

                    List<String> list = new List<String>();

                    foreach (String line in lines)

                    {

                        if (line.Length > 4)//长度小于4的行,视为处理失败的行

                            list.Add(line);

                    }

                    lines = list.ToArray();

                    File.WriteAllLines(path, lines);//将处理结果写回文件

                }

                                函数功能:处理pdf文件

                                输入:要处理的文件的路径

                                输出:已经处理过并且去除部分乱码的文件

                public void TransformDocument()

                {

                    FileInfo pdffile = new FileInfo(source);

                    if (pdffile.Exists)

                    {

                        pdf2txt(pdffile);

                        handletxt();

                    }

                    else

                    {

                        Console.WriteLine("The File is NOT Exist.");

                    }

                }

            }

        }

    }

  • 相关阅读:
    系统集群安装
    用ASP.net判断上传文件类型的三种方法
    C#中利用JQuery实现视频网站
    云计算和大数据
    c# Dictionary 中Keys.ToArray<>方法的细节测试
    DateTime compare
    Dictionary的遍历和修改
    C# 键值对数据排序
    ant使用小结
    给我们的7句话
  • 原文地址:https://www.cnblogs.com/C705/p/4226108.html
Copyright © 2020-2023  润新知