实现思路:
1、使用NuGet获取iTextShart引用
2、使用iTextSharp读取PDF文档,遍历每一页中的图片,循环每一个图片;
3、使用图片压缩算法,根据自己需要,压缩满足条件的图片(例子中是限度是宽和高中的最小值大于1080的才压缩);
4、将压缩后的图片写回到PDF中
具体代码如下:
/// <summary> /// PDF文件压缩处理,只针对PDF文件里的图片 /// </summary> public class PDFCompression { /// <summary> /// 压缩PDF文件里的图片 /// </summary> /// <param name="sourceFile"></param> /// <param name="TargetFile"></param> public static void CompressPDFFile(string sourceFile, string TargetFile) { if (new FileInfo(sourceFile).Length <= 5 * 1024 * 1024) { File.Copy(sourceFile, TargetFile); } else { try { PdfReader reader2 = new PdfReader(sourceFile); //输出文件流. using (FileStream fs = new FileStream(TargetFile, FileMode.Create, FileAccess.Write, FileShare.None)) { using (PdfStamper stamper = new PdfStamper(reader2, fs)) { PdfDictionary page; //Get the page count int pageCount2 = reader2.NumberOfPages; //Loop through each page for (int i = 1; i <= pageCount2; i++) { //Get the page page = reader2.GetPageN(i); var pdfObjects = FindImageInPDFDictionarys(page, DistinguishImageIsNeedCompress); foreach (var obj in pdfObjects) { if (obj != null) { iTextSharp.text.Image img = ResizedImage(obj, reader2); iTextSharp.text.Image maskImage = img.ImageMask; PdfReader.KillIndirect(obj);//移除老图片,只是移除了关联. if (maskImage != null) stamper.Writer.AddDirectImageSimple(maskImage); //把新图片写进去 if (img != null) stamper.Writer.AddDirectImageSimple(img, (PRIndirectReference)obj); //把新图片写进去. } } } } } } catch (Exception ex) { File.Copy(sourceFile, TargetFile, true); } } } //在pdf页面中 找到图片 private static List<PdfObject> FindImageInPDFDictionarys(PdfDictionary pg, DistinguishImage distinguishMethod) { List<PdfObject> pdfObjects = new List<PdfObject>(); try { PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)); PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)); if (xobj != null) { foreach (PdfName name in xobj.Keys) { Console.WriteLine(name.ToString()); PdfObject obj = xobj.Get(name); if (obj.IsIndirect()) { PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj); PdfName type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)); //image at the root of the pdf if (PdfName.IMAGE.Equals(type)) { if (distinguishMethod(tg) == true) { pdfObjects.Add(obj); } else { continue;//继续找 } } //// image inside a form //else if (PdfName.FORM.Equals(type)) //{ // return FindImageInPDFDictionarys(tg, distinguishMethod); //} ////image inside a group //else if (PdfName.GROUP.Equals(type)) //{ // return FindImageInPDFDictionarys(tg, distinguishMethod); //} } } } } catch (Exception ex) { pdfObjects = new List<PdfObject>(); } return pdfObjects; } /// <summary> /// 辨别图片的委托 /// </summary> /// <param name="imgObject"></param> /// <returns></returns> delegate bool DistinguishImage(PdfDictionary imgObject); /// <summary> /// 辨别图片是不是需要压缩 /// </summary> /// <param name="imgObject"></param> /// <returns></returns> private static bool DistinguishImageIsNeedCompress(PdfDictionary imgObject) { //int width, height, length; //int.TryParse(imgObject.Get(PdfName.WIDTH).ToString(), out width); //int.TryParse(imgObject.Get(PdfName.HEIGHT).ToString(), out height); //int.TryParse(imgObject.Get(PdfName.LENGTH).ToString(), out length); ////从这3个参数就可以判断是不是需要压缩. //if (width == 270 && height == 111 && length == 11878) //{ // return true; //} //else //{ // return false; //} return true; } private static iTextSharp.text.Image ResizedImage(PdfObject obj, PdfReader reader) { if (obj != null) { int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo .InvariantCulture)); PdfObject pdfObj = reader.GetPdfObject(XrefIndex); PdfStream pdfStrem = (PdfStream)pdfObj; byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem); if ((bytes != null)) { using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes)) { memStream.Position = 0; System.Drawing.Image img = ImageHelper.ResizeImage(System.Drawing.Image.FromStream(memStream), .4M); ImageFormat format = img.PixelFormat == PixelFormat.Format1bppIndexed || img.PixelFormat == PixelFormat.Format4bppIndexed || img.PixelFormat == PixelFormat.Format8bppIndexed ? ImageFormat.Tiff : ImageFormat.Jpeg; var pdfImage = iTextSharp.text.Image.GetInstance(img, format); return pdfImage; } } else { return null; } } else { return null; } } public static Bitmap BitmapToGrayscale(Bitmap source) { // Create target image. int width = source.Width; int height = source.Height; Bitmap target = new Bitmap(width, height, PixelFormat.Format8bppIndexed); // Set the palette to discrete shades of gray ColorPalette palette = target.Palette; for (int i = 0; i < palette.Entries.Length; i++) { palette.Entries[i] = Color.FromArgb(0, i, i, i); } target.Palette = palette; // Lock bits so we have direct access to bitmap data BitmapData targetData = target.LockBits(new System.Drawing.Rectangle(0, 0, width, height), ImageLockMode.ReadWrite, PixelFormat.Format8bppIndexed); BitmapData sourceData = source.LockBits(new System.Drawing.Rectangle(0, 0, width, height), ImageLockMode.ReadOnly, PixelFormat.Format24bppRgb); unsafe { for (int r = 0; r < height; r++) { byte* pTarget = (byte*)(targetData.Scan0 + r * targetData.Stride); byte* pSource = (byte*)(sourceData.Scan0 + r * sourceData.Stride); for (int c = 0; c < width; c++) { byte colorIndex = (byte)(((*pSource) * 0.3 + *(pSource + 1) * 0.59 + *(pSource + 2) * 0.11)); *pTarget = colorIndex; pTarget++; pSource += 3; } } } target.UnlockBits(targetData); source.UnlockBits(sourceData); return target; } /// <summary> /// 判断是否PDF文件 /// </summary> /// <param name="filePath"></param> /// <returns></returns> public static bool IsPDF(string filePath) { try { return Path.GetExtension(filePath).ToLower().Contains("pdf"); } catch(Exception ex) { return false; } } } public static class ImageHelper { /// <summary> /// Resize the image to the specified width and height. /// </summary> /// <param name="image">The image to resize.</param> /// <param name="width">The width to resize to.</param> /// <param name="height">The height to resize to.</param> /// <returns>The resized image.</returns> public static Bitmap ResizeImage(System.Drawing.Image image, int width, int height) { var destRect = new System.Drawing.Rectangle(0, 0, width, height); var destImage = new System.Drawing.Bitmap(width, height); destImage.SetResolution(image.HorizontalResolution, image.VerticalResolution); using (var graphics = Graphics.FromImage(destImage)) { graphics.CompositingMode = CompositingMode.SourceCopy; graphics.CompositingQuality = CompositingQuality.HighQuality; graphics.InterpolationMode = InterpolationMode.HighQualityBicubic; graphics.SmoothingMode = SmoothingMode.HighQuality; graphics.PixelOffsetMode = PixelOffsetMode.HighQuality; using (var wrapMode = new ImageAttributes()) { wrapMode.SetWrapMode(WrapMode.TileFlipXY); graphics.DrawImage(image, destRect, 0, 0, image.Width, image.Height, GraphicsUnit.Pixel, wrapMode); } } return destImage; } public static Bitmap ResizeImage(System.Drawing.Image image, decimal percentage) { int baseSize = image.Height; if (image.Width < image.Height) { baseSize = image.Width; } int width = image.Width; int height = image.Height; if (baseSize > 1080) { width = (int)Math.Round(image.Width * percentage, MidpointRounding.AwayFromZero); height = (int)Math.Round(image.Height * percentage, MidpointRounding.AwayFromZero); } return ResizeImage(image, width, height); } }
参考链接:
https://github.com/jpeckham/CompressPDF
https://blog.csdn.net/phker/article/details/92806191
https://kb.itextpdf.com/home/it7kb/ebooks/itext-7-jump-start-tutorial-for-net