• csharp: iTextSharp get Text or Image


    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.Web.UI;
    using System.Web.UI.WebControls;
    using System.Drawing;
    using System.Drawing.Imaging;
    using System.IO;
    using System.Text;
    using iTextSharp.text.pdf;
    using iTextSharp;
    using iTextSharp.text.pdf.parser;
    using Dotnet = System.Drawing.Image;
    
    
    namespace OfficeDoumentWebApp
    {
    
    
        /// <summary>
        /// geovindu,Geovin Du, 涂聚文
        /// pdf
        /// </summary>
        public partial class ItexPDFForm : System.Web.UI.Page
        {
            string imgPath="geovindu";
            /// <summary>
            /// 
            /// </summary>
            /// <param name="sender"></param>
            /// <param name="e"></param>
            protected void Page_Load(object sender, EventArgs e)
            {
                StringBuilder sb = new StringBuilder();           
                try
                { 
    
                    if(!IsPostBack)
                    {
                        //Server.MapPath
                        string datafile = DateTime.Now.ToString("yyyyMMddHHmmssfff");
                        string urc = Server.MapPath("OutFile/" + datafile + "/");
                        if (!Directory.Exists(urc))
                        {
                            Directory.CreateDirectory(urc);
                        }
    
                        imgPath = urc;
                            string sourcefile = Server.MapPath("SourceFile/珠宝RFID实施方案.pdf");  //珠宝RFID实施方案
                            //if (!Directory.Exists(urc))
                            // Directory.CreateDirectory(urc);
                      
                          //  ItexPDFParser.ExtractImagesFromPDF(Server.MapPath("SourceFile/珠宝RFID实施方案.pdf"), urc);
    
                            PdfReader pdfReader = new PdfReader(sourcefile);
                        PdfReaderContentParser parser = new PdfReaderContentParser(pdfReader);
                        //ExtRenderListener extRenderListener = new ExtRenderListener();
    
                        for (int pageNumber = 1; pageNumber <= pdfReader.NumberOfPages; pageNumber++)
                        {
    
                            //PdfReader pdf = new PdfReader(pdfFile);
                            PdfDictionary pg = pdfReader.GetPageN(pageNumber);
                            PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
                            PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
    
                            //获取文本内容
                          string tex= PdfTextExtractor.GetTextFromPage(pdfReader, pageNumber);
                            sb.Append(tex);
    
                            if (!object.Equals(xobj, null))
                            {
                                foreach (PdfName name in xobj.Keys)
                                {
                                    PdfObject obj = xobj.Get(name);
                                    if (obj.IsIndirect())
                                    {
                                        PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
                                        string width = tg.Get(PdfName.WIDTH).ToString();
                                        string height = tg.Get(PdfName.HEIGHT).ToString();
                                        //TextRenderInfo inf=pdfReader.
                                        GraphicsState state = new GraphicsState();
                                        // state=
                                        //state = (GraphicsState)(new Matrix(float.Parse(width), float.Parse(height)));
                                        ImageRenderInfo imgRI = ImageRenderInfo.CreateForXObject(state, (PRIndirectReference)obj, tg);
                                        if (!Object.Equals(imgRI, null))
                                        {
                                            RenderImage(imgRI, pageNumber); //图片没有读出
                                        }
    
    
                                    }
                                }
                            }
                        }
                       
    
    
    
                     }
    
                }
                catch(Exception ex)
                {
                    Response.Write(ex.Message.ToString());
    
                }
                Response.Write(sb.ToString());
            }
            /// <summary>
            /// 
            /// </summary>
            /// <param name="renderInfo"></param>
            /// <returns></returns>
            GraphicsState getGraphicsState(PathPaintingRenderInfo renderInfo)
            {
                System.Reflection.FieldInfo gsField = typeof(PathPaintingRenderInfo).GetField("gs", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
                return (GraphicsState)gsField.GetValue(renderInfo);
            }
    
    
            /// <summary>
            /// 提取图片
            /// geovindu,Geovin Du
            /// 涂聚文
            /// </summary>
            /// <param name="renderInfo"></param>
    
            private void RenderImage(ImageRenderInfo renderInfo,int number)
            {
                PdfImageObject image = renderInfo.GetImage();
                using (Dotnet dotnetImg = image.GetDrawingImage())
                {
                    if (dotnetImg != null)
                    {
                        using (MemoryStream ms = new MemoryStream())
                        {
                            dotnetImg.Save(ms, ImageFormat.Jpeg); //Tiff
                            Bitmap d = new Bitmap(dotnetImg);
                            string f = imgPath + number + ".jpg";
                            d.Save(imgPath+number+".jpg");
                        }
                    }
                }
            }
    
    
        }
    }
    

      

    ABCpdf.NET
    https://www.nuget.org/packages/ABCpdf
    http://test.websupergoo.com/helppdfnet/default.htm?page=source%2f3-concepts%2fg-htmlrender.htm

  • 相关阅读:
    发邮件
    加密解密帮助类(对称加密)
    枚举静态类
    【06月05日】A股滚动市净率PB历史新低排名
    【06月04日】A股滚动市盈率PE历史新低排名
    【06月03日】预分红股息率最高排名
    北上资金近1周流入排行榜
    最近一月研报推荐次数最多的最热股票
    【2019年05月28日】指数估值排名
    北上资金近1周流入排行榜
  • 原文地址:https://www.cnblogs.com/geovindu/p/16670303.html
Copyright © 2020-2023  润新知