//读取PDF内容 private void button2_Click(object sender, EventArgs e) { label3.Text = OnCreated("D:\aa.pdf"); } private string OnCreated(string filepath) { try { string pdffilename = filepath; PdfReader pdfReader = new PdfReader(pdffilename); int numberOfPages = pdfReader.NumberOfPages; string text = string.Empty; for (int i = 1; i <= numberOfPages; ++i) { iTextSharp.text.pdf.parser.ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); text += iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(pdfReader, i, strategy); } pdfReader.Close(); return text; } catch (Exception ex) { StreamWriter wlog = File.AppendText(System.AppDomain.CurrentDomain.SetupInformation.ApplicationBase + "\mylog.log"); wlog.WriteLine("出错文件:" + "原因:" + ex.ToString()); wlog.Flush(); wlog.Close(); return null; } //读取TXT string text = System.IO.File.ReadAllText(path);//读取内容 path为文件路径 text = text.Replace(" ", string.Empty).Replace(" ", string.Empty);//去掉字符串里的 符号
实例: //1. 生成一个PDF,将文本和图片添加到PDF里面。 //2. 从PDF文档中提取所有图片。 //3. 从PDF文档中提取所有文本。 //生成一个PDF文件 里面包含文本和图片 private void button2_Click(object sender, EventArgs e) { Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument(); PdfPageBase page = doc.Pages.Add(); //添加文本 page.Canvas.DrawString("Hello!Welcome to my house!", new Spire.Pdf.Graphics.PdfFont(PdfFontFamily.Helvetica, 20f), new PdfSolidBrush(Color.Black), 10, 10);//中文汉字字符均不能正确生成 英文字母可以 //添加图片 Spire.Pdf.Graphics.PdfImage image = Spire.Pdf.Graphics.PdfImage.FromFile("ff.jpg"); float width = image.Width * 0.75f; float height = image.Height * 0.75f; float x = (page.Canvas.ClientSize.Width - width) / 2; page.Canvas.DrawImage(image, x, 60, width, height); //Spire.Pdf.Graphics.PdfImage image2 = Spire.Pdf.Graphics.PdfImage.FromFile("image.jpg"); //width = image2.Width * 0.75f; //height = image2.Height * 0.75f; //page.Canvas.DrawImage(image2, x - 100, 220, width, height); doc.SaveToFile("sample.pdf"); } //读取图片 获取图片个数 并把图片保存到本地 private void button1_Click(object sender, EventArgs e) { Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument(); doc.LoadFromFile("sample.pdf"); IList<Image> images = new List<Image>(); foreach (PdfPageBase page in doc.Pages) { if (page.ExtractImages() != null) { foreach (Image image in page.ExtractImages()) { images.Add(image); } } } doc.Close(); int index = 0; int aa = images.Count; label3.Text = aa.ToString(); foreach (Image image in images) { String imageFileName = String.Format("Image-{0}.png", index++); image.Save(imageFileName, ImageFormat.Png); } } //读取文本 private void button3_Click(object sender, EventArgs e) { Spire.Pdf.PdfDocument doc = new Spire.Pdf.PdfDocument(); doc.LoadFromFile("sample.pdf"); StringBuilder buffer = new StringBuilder(); foreach (PdfPageBase page in doc.Pages) { buffer.Append(page.ExtractText()); } doc.Close(); label1.Text = buffer.ToString();//在界面显示读取到的文本 //把读取到的文本写入TXT文件 //String fileName = "TextInPdf.txt"; //File.WriteAllText(fileName, buffer.ToString()); buffer = null; }
原文:https://blog.csdn.net/wk125570/article/details/73794257?utm_source=copy