今天突然间对识别验证码感兴趣,于是网上搜了一下
最简单的是引用tessnet2.dll,然后通过它来识别,代码如下
1 private void button1_Click(object sender, EventArgs e) 2 { 3 string strUrl = @"http://www.gz.gov.cn/sofpro/gecs/common/image.jsp?dt=Thu%20Nov%2024%202011%2017:20:21%20GMT+0800%20(China%20Standard%20Time)"; 4 5 Bitmap image = GetSourceCode(strUrl);//识别图像 6 pictureBox1.Image = image; 7 tessnet2.Tesseract ocr= new tessnet2.Tesseract();//声明一个OCR类 8 ocr.SetVariable("tessedit_char_whitelist", "0123456789"); //设置识别变量,当前只能识别数字。 9 ocr.Init(Application.StartupPath+@" mpe", "eng", false); //应用当前语言包。注,Tessnet2是支持多国语的。语言包下载链接:http://code.google.com/p/tesseract-ocr/downloads/list 10 List<tessnet2.Word> result = ocr.DoOCR(image, Rectangle.Empty);//执行识别操作 11 string str=""; 12 foreach (tessnet2.Word word in result) //遍历识别结果。 13 str += word.Confidence + ":" + word.Text + Environment.NewLine; 14 MessageBox.Show(str); 15 } 16 17 private Bitmap GetSourceCode(string url) 18 { 19 WebRequest request = WebRequest.Create(url); 20 WebResponse response = request.GetResponse(); 21 Stream st = response.GetResponseStream(); 22 Bitmap bitmap = (Bitmap)Bitmap.FromStream(st); 23 //bitmap.Save(System.Windows.Forms.Application.StartupPath + @" mp.bmp", System.Drawing.Imaging.ImageFormat.Bmp); 24 //bitmap = (Bitmap)Bitmap.FromFile(System.Windows.Forms.Application.StartupPath + @" mp.bmp"); 25 return bitmap; 26 }
由于tessnet2是在netframework2.0下生成的,所以需要配置一下config文件
<configuration> <startup useLegacyV2RuntimeActivationPolicy="true"> <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.0,Profile=Client"/> </startup> </configuration>
于是就可以用了
注意:http://www.gz.gov.cn/sofpro/gecs/common/image.jsp?dt=Thu%20Nov%2024%202011%2017:20:21%20GMT+0800%20(China%20Standard%20Time)是官方的一个专门是数字的一个验证码图片的网址