• c#读取doc,pdf,ppt,txt文件


    doc pdf ppt与 txt之间的转换 :

    组件的作用一般是将文件读出成字符格式,并不是单纯的转换文件名后缀,所以需要将读出的东西写入txt文件 。

    添加office引用

    .net中对office中的word及ppt进行编程时,确保安装office时已经安装了word,ppt可编程组件(自定义安装时可查看)或者安装“Microsoft Office 2003 Primary Interop Assemblies”

    安装后,在编程页面添加引用:

    添加引用-com—microsoft powerpoint object 11.0 libaray/word 11.0 object library;

    还得添加office组件

    using Microsoft.Office.Interop.Word;

    using Microsoft.Office.Interop.PowerPoint;

    using org.pdfbox.pdmodel;

    using org.pdfbox.util;

    using Microsoft.Office.Interop.Word;

    using Microsoft.Office.Interop.PowerPoint;

    publicvoid pdf2txt(FileInfo file,FileInfo txtfile)

    {

    PDDocument doc =PDDocument.load(file.FullName);

    PDFTextStripper pdfStripper =newPDFTextStripper();

    string text = pdfStripper.getText(doc);

    StreamWriter swPdfChange =newStreamWriter(txtfile.FullName,false,Encoding.GetEncoding("gb2312"));

    swPdfChange.Write(text);

    swPdfChange.Close();

    }

    对于doc文件中的表格,读出的结果是去除掉了网格线,内容按行读取。

    Public void word2text(FileInfo file,FileInfo txtfile)

    {

    object readOnly =true;

    object missing = System.Reflection.Missing.Value;

    object fileName = file.FullName;

    Microsoft.Office.Interop.Word.ApplicationClass wordapp =new Microsoft.Office.Interop.Word.ApplicationClass();

    Document doc = wordapp.Documents.Open(ref fileName,

    ref missing,ref readOnly,ref missing, ref missing,ref missing,

    ref missing,ref missing,ref missing, ref missing,ref missing,

    ref missing,ref missing,ref missing, ref missing,ref missing);

    string text = doc.Content.Text;

    doc.Close(ref missing,ref missing,ref missing);

    wordapp.Quit(ref missing,ref missing,ref missing);

    StreamWriter swWordChange =new StreamWriter(txtfile.FullName,false,Encoding.GetEncoding("gb2312"));

    swWordChange.Write(text);

    swWordChange.Close();

    }

    Public void ppt2txt(FileInfo file, FileInfo txtfile)

    {

    Microsoft.Office.Interop.PowerPoint.Application pa =new Microsoft.Office.Interop.PowerPoint.ApplicationClass();

    Microsoft.Office.Interop.PowerPoint.Presentation pp = pa.Presentations.Open(file.FullName,

    Microsoft.Office.Core.MsoTriState.msoTrue,

    Microsoft.Office.Core.MsoTriState.msoFalse,

    Microsoft.Office.Core.MsoTriState.msoFalse);

    string pps ="";

    StreamWriter swPPtChange =new StreamWriter(txtfile.FullName,false,Encoding.GetEncoding("gb2312"));

    foreach (Microsoft.Office.Interop.PowerPoint.Slide slidein pp.Slides)

    {

    foreach (Microsoft.Office.Interop.PowerPoint.Shape shapein slide.Shapes)

    pps += shape.TextFrame.TextRange.Text.ToString();

    }

    swPPtChange.Write(pps);

    swPPtChange.Close();

    }

    读取不同类型的文件

    Public  StreamReader text2reader(FileInfo file)

    {

    StreamReader st =null;

    switch (file.Extension.ToLower())

    {

    case".txt":

    st = new  StreamReader(file.FullName,Encoding.GetEncoding("gb2312"));

    break;

    case".doc":

    FileInfo wordfile =new  FileInfo(@"E:/my programs/200807program/FileSearch/App_Data/word2txt.txt");//不能使用相对路径,想办法改进

    word2text(file, wordfile);

    st = newStreamReader(wordfile.FullName,Encoding.GetEncoding("gb2312"));

    break;

    case".pdf":

    FileInfo pdffile =new FileInfo(@"E:/my programs/200807program/FileSearch/App_Data/pdf2txt.txt");

    pdf2txt(file, pdffile);

    st = new StreamReader(pdffile.FullName,Encoding.GetEncoding("gb2312"));

    break;

    case".ppt":

    FileInfo pptfile =new FileInfo(@"E:/my programs/200807program/FileSearch/App_Data/ppt2txt.txt");

    ppt2txt(file,pptfile);

    st = new StreamReader(pptfile.FullName,Encoding.GetEncoding("gb2312"));

    break;

    }

    return st;

    }

  • 相关阅读:
    os.fork()
    解决方案:WindowsError: [Error 2]
    Python遍历文件夹和读写文件的方法
    导航帖
    IDEA后缀补全及快捷键
    Codeforces-Round#614 Div2
    图论算法-欧拉回路 专题训练
    快速求出n!质因数的个数
    Codeforces-Round#589 Div2
    洛谷P3386二分图匹配
  • 原文地址:https://www.cnblogs.com/hcf-0320/p/4218602.html
Copyright © 2020-2023  润新知