using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Drawing; using System.Drawing.Imaging; using System.IO; using System.Linq; using System.Net; using System.Net.Security; using System.Security.Cryptography.X509Certificates; using System.Text; namespace Common.Utils { public static class OCRUtil { private const String Host = "https://ocrapi-document.taobao.com"; private const String Path = "/ocrservice/document"; private const String Method = "POST"; private const String Appcode = "************"; private static readonly IList<string> ImageExList = new List<string>() { ".jpg", ".png", ".bmp" }; /// <summary> /// 确保图片可用 /// </summary> /// <param name="fileName"></param> private static void EnsureImageCanUse(string fileName) { if (string.IsNullOrWhiteSpace(fileName)) { throw new ArgumentNullException(nameof(fileName), "文件名为空"); } if (!File.Exists(fileName)) { throw new FileNotFoundException("文件不存在", fileName); } string ex = System.IO.Path.GetExtension(fileName); if (ImageExList.IndexOf(ex.ToLower()) == -1) { throw new FileFormatException("图片格式需为jpg,png,bmp"); } FileInfo fileInfo = new FileInfo(fileName); if (fileInfo.Length > 1204 * 1204 * 4) { throw new Exception("文件不能大于4M"); } } /// <summary> /// 将图片转换为Base64字符串 /// </summary> /// <param name="fileName"></param> /// <returns></returns> private static string ImageToBase64String(string fileName) { Bitmap bitmap = new Bitmap(fileName); string ex = System.IO.Path.GetExtension(fileName).ToLower(); using (MemoryStream ms = new MemoryStream()) { ImageFormat format; switch (ex) { case ".png": format = ImageFormat.Png; break; case ".jpg": format = ImageFormat.Jpeg; break; default: format = ImageFormat.Bmp; break; } bitmap.Save(ms, format); byte[] arr = new byte[ms.Length]; ms.Position = 0; ms.Read(arr, 0, (int)ms.Length); ms.Close(); return Convert.ToBase64String(arr); } } /// <summary> /// 阿里云OCR图片转文字 /// </summary> /// <param name="fileName"></param> /// <returns></returns> public static string ImageToText(string fileName) { EnsureImageCanUse(fileName); String querys = ""; String bodys = "{"img":"" + ImageToBase64String(fileName) + "","url":"","prob":false}"; String url = Host + Path; HttpWebRequest httpRequest = null; HttpWebResponse httpResponse = null; if (0 < querys.Length) { url = url + "?" + querys; } if (Host.Contains("https://")) { ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult); httpRequest = (HttpWebRequest)WebRequest.CreateDefault(new Uri(url)); } else { httpRequest = (HttpWebRequest)WebRequest.Create(url); } httpRequest.Method = Method; httpRequest.Headers.Add("Authorization", "APPCODE " + Appcode); //根据API的要求,定义相对应的Content-Type httpRequest.ContentType = "application/json; charset=UTF-8"; if (0 < bodys.Length) { byte[] data = Encoding.UTF8.GetBytes(bodys); using (Stream stream = httpRequest.GetRequestStream()) { stream.Write(data, 0, data.Length); } } httpResponse = (HttpWebResponse)httpRequest.GetResponse(); if (httpResponse.StatusCode != HttpStatusCode.OK) { throw new WebException("阿里云OCR接口调用识别失败"); } Stream st = httpResponse.GetResponseStream(); StreamReader reader = new StreamReader(st, Encoding.GetEncoding("utf-8")); string responseStr = reader.ReadToEnd(); OcrResult ocrResult = JsonConvert.DeserializeObject<OcrResult>(responseStr); return OcrResultToString(ocrResult); } private static string OcrResultToString(OcrResult ocrResult) { StringBuilder sb = new StringBuilder(500); var wordList = ocrResult.prism_wordsInfo; foreach (var item in wordList) { int leftX = item.pos[0].x; int blankSpaceCount = (int)Math.Floor((double)leftX / 60); if (blankSpaceCount > 0) { sb.Append(' ', blankSpaceCount*2); } sb.AppendLine(item.word); } return sb.ToString(); } public static bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors) { return true; } } /// <summary> /// OCR识别结果 /// </summary> public class OcrResult { public string sid { get; set; } public string prism_version { get; set; } public int prism_wnum { get; set; } public List<Prism_Wordsinfo> prism_wordsInfo { get; set; } public int height { get; set; } public int width { get; set; } public int orgHeight { get; set; } public int orgWidth { get; set; } } public class Prism_Wordsinfo { public string word { get; set; } public IList<Pos> pos { get; set; } } public class Pos { public int x { get; set; } public int y { get; set; } } }