• OCR技术浅析-自写篇(2)


    本例仅以本人浅薄理解,妄想自制文字识别程序,实际在识别部分未有完善。

    <?php
    class readChar{
        private $imgSize;        //图片尺寸
        private $imgGd2;        //图像转GD2
        private $Index=array();    //颜色索引(key即为颜色索引)
        private $bigColor;        //二维图像颜色值(存储索引)
        function __construct($imgPath){
            $this->imgSize=getimagesize($imgPath);
            $this->imgSize['size']=$this->imgSize[0]*$this->imgSize[1];
            $this->imgGd2=imagecreatefromstring(file_get_contents($imgPath));
            if (imageistruecolor($this->imgGd2)) {
                imagetruecolortopalette($this->imgGd2, false, 256);//真彩图片转换为调色板
            }
            $this->setGray();
        }
        function __destruct(){
            imagedestroy($this->imgGd2);
        }
        private function showImg(){
            foreach($this->Index as $k=>$v){
                    imagecolorset($this->imgGd2,$k,$v,$v,$v);
            }
            header('Content-type: image/jpg');
            imagejpeg($this->imgGd2);
            exit;
        }
        private function setGray(){
            /*
                灰度化
                RGB均值/RGB单值/最大/最小/人性化:0.3R+0.59G+0.11B
                bug:若灰度值相等的两个颜色,刚好是主要颜色 则会识别不出来
            */
            for($i=ImageColorstotal($this->imgGd2)-1;$i>=0;$i--){
                $rgb=ImageColorsForIndex($this->imgGd2,$i);
                $this->Index[$i]=(int)(($rgb['red']+$rgb['green']+$rgb['blue'])/3);    //imagecolorset改变索引颜色
            }
            $this->bigColor=array();
            $pro=array();            //各灰度值占比
            for($x=0;$x<$this->imgSize[0];$x++){
                $this->bigColor[$x]=array();
                for($y=0;$y<$this->imgSize[1];$y++){
                    $Index=ImageColorAt($this->imgGd2, $x, $y);
                    $this->bigColor[$x][$y]=$Index;
                    $pro[$this->Index[$Index]]=@$pro[$this->Index[$Index]]+1;
                }
            }
            array_walk($pro,function(&$v){$v=$v/$this->imgSize['size'];});
            $this->setTwo($pro);
            
        }
        private function setTwo($pro){
            /*
                二值化 T很重要
                以T为阈值,低于T的为白否则为黑
                双峰法
                迭代法:
                OSTU(大津法):不懂
                    前景和背景的分割阈值记作T,前景像素点数占比为ω0,平均灰度μ0;背景像素点数占比例ω1,平均灰度为μ1。
                    总平均灰度记为μ
                    类间方差记假设图像的背景较暗,并且图像的大小为M×N,灰度值小于阈值T的像素数为N0,大于阈值T的像素数为N1
                    则有:
                  ω0=N0/ M×N (1)
                  ω1=N1/ M×N (2)
                  N0+N1=M×N (3)
                  ω0+ω1=1    (4)
                  μ=ω0*μ0+ω1*μ1 (5)
                        g=ω0(μ0-μ)^2+ω1(μ1-μ)^2 (6)    
                    将式(5)代入式(6),得到等价公式: g=ω0ω1(μ0-μ1)^2 
                    类间方差g最大时的阈值T,即为所求
                P分位法:需已知目标占图像的比例,以不同灰度值进行分割若比例≈P 则T为该灰度值
            */
            $T=127;
            $g_max=0;
            for ($i=0;$i<256;$i++){
                $w0 = $w1 = $u0_temp = $u1_temp = $u0 = $u1 = $g_tmp = 0;
                for ($j=0;$j<256;$j++){
                    if ($j <= $i){   //背景部分  
                        $w0 += @$pro[$j];
                        $u0_temp += $j * @$pro[$j];
                    }else{            //前景部分  
                        $w1 += @$pro[$j];
                        $u1_temp += $j * @$pro[$j];
                    }
                }
                $u0 = $w0==0?0:$u0_temp / $w0;
                $u1 = $w1==0?0:$u1_temp / $w1;
                $g_tmp =$w0 *$w1* pow(($u0 - $u1), 2);//类间方差 g=w0*w1*(u0-u1)^2
                if ($g_tmp > $g_max){
                    $g_max = $g_tmp;
                    $T = $i;
                }
            }
            for($x=0;$x<$this->imgSize[0];$x++){
                for($y=0;$y<$this->imgSize[1];$y++){
                    $index = $this->bigColor[$x][$y];
                    if($this->Index[$index]<=$T){
                        $this->Index[$index]=0;
                    }else{
                        $this->Index[$index]=255;
                    }
                }
            }
            $this->avgFilter();
        }
        private function avgFilter(){
            /*
            代码不实现
            均值滤波器、自适应维纳滤波器、中值滤波器、形态学噪声滤除器、小波去噪
            滤波前对于图片边界:不处理/填充0 or 255/填充临近灰度值
        */
            return $this->getChar();
            
        }
        private function getChar(){
            /*
                拆字
            */
            $pointTotal=array();    //Y轴统计
            for($x=0;$x<$this->imgSize[0];$x++){
                for($y=0;$y<$this->imgSize[1];$y++){
                    @$pointTotal[$y]+=$this->Index[$this->bigColor[$x][$y]]>0?0:1;
                }
            }
            $chars=array(); //Y轴划线
            $prev = $pointTotal[0];
            $tmpLine=array();
            foreach($pointTotal as $k=>$v){
                if($v==0 && $prev!=0){
                    //imageline ($this->imgGd2,0,$k,$this->imgSize[0]-1,$k,0);//划线 对程序无用
                    $tmpLine[]=$k;
                }elseif($v!=0 && $prev==0){
                    //imageline ($this->imgGd2,0,$k-1,$this->imgSize[0]-1,$k,0);//划线 对程序无用
                    $tmpLine[]=$k-1;
                }
                $prev=$v;
                if(count($tmpLine)==2){
                    $chars[]=$tmpLine;
                    $tmpLine=array();
                }
            }
            if(!$chars){
                //imageline ($this->imgGd2,0,0,$this->imgSize[0]-1,0,0);//划线 对程序无用
                //imageline ($this->imgGd2,0,$this->imgSize[1]-1,$this->imgSize[0]-1,$this->imgSize[1]-1,0);//划线 对程序无用
                $chars []=array(0,$this->imgSize[1]-1);
            }
            foreach($chars as $line=>$ypoint){
                $pointTotal=array();//每行的X轴统计
                for($x=0;$x<$this->imgSize[0];$x++){
                    $pointTotal[$x]=0;
                    for($y=$ypoint[0];$y<=$ypoint[1];$y++){
                        $pointTotal[$x]+=$this->Index[$this->bigColor[$x][$y]]>0?0:1;
                    }
                }
                $xLine=array();
                $tmpLine=array();//每行X轴划线
                $prev = $pointTotal[0];
                foreach($pointTotal as $k=>$v){
                    if($v==0 && $prev!=0){
                        //imageline ($this->imgGd2,$k,$ypoint[0],$k,$ypoint[1],0);//划线 对程序无用
                        $tmpLine[]=$k-1;
                    }
                    if($v!=0 && $prev==0){
                        //imageline ($this->imgGd2,$k-1,$ypoint[0],$k-1,$ypoint[1],0);//划线 对程序无用
                        $tmpLine[]=$k;
                    }
                    if(count($tmpLine)==2){
                        $xLine[]=$tmpLine;
                        $tmpLine=array();
                    }
                    $prev=$v;
                }
                foreach($xLine as $k=>$v){
                    $v['xcode']=$v['ycode']=array();
                    for($x=$v[0];$x<=$v[1];$x++){
                        for($y=$ypoint[0];$y<=$ypoint[1];$y++){
                            $gry = $this->Index[$this->bigColor[$x][$y]]>0?0:1;
                            @$v['xcode'][$x-$v[0]]        +=$gry;
                            @$v['ycode'][$y-$ypoint[0]]    +=$gry;
                        }
                    }
                    $xLine[$k]=$v;
                }
                $chars[$line]['xline']=$xLine;
            }
            $this->bigColor=null;
            foreach($chars as $v){
                foreach($v['xline'] as $vv){
                    $this->tranChar($vv['xcode'],$vv['ycode']);
                }
            }
        }
        private function tranChar($myX,$myY){
            /*
                识别文字
                本例用到的php自带函数 similar_text
                通过把每个字x和y轴做映射,然后和模板做相似度匹配(模板图为50x50所以需将映射做压缩处理)
            */
            $tplx='0,0,0,0,0,0,0,0,12,22,30,34,23,16,13,11,10,8,8,8,8,8,8,6,6,6,6,8,8,7,8,9,10,10,12,14,20,34,30,26,16,0,0,0,0,0,0,0,0,0';
            $tply='9,14,17,15,11,10,10,8,8,8,9,8,8,7,8,8,7,8,7,8,8,8,8,8,8,8,8,8,8,8,8,7,8,7,8,8,7,8,8,8,8,9,8,9,10,12,15,17,13,9';
            $diff=count($myX)-count($myY);
            $middle = (int)(abs($diff)/2);
            if($diff<0){
                $minMy=&$myX;
            }else{
                $minMy=&$myY;
            }
            for($i=0;$i<abs($diff);$i++){
                if($i<$middle){
                    array_unshift($minMy,0);
                    continue;
                }
                array_push($minMy,0);
            }
            $ratio = 50/count($myX);
            $newX=array();
            $newY=array();
            foreach($myX as $k=>$v){
                $key = min(ceil($k*$ratio),49);
                is_array(@$newX[$key]) || $newX[$key]=array();
                is_array(@$newY[$key]) || $newY[$key]=array();
                $newX[$key][]=$myX[$k];
                $newY[$key][]=$myY[$k];
            }
            array_walk($newY,function(&$v){$v=round(array_sum($v)/count($v));});
            array_walk($newX,function(&$v){$v=round(array_sum($v)/count($v));});
            
            $sx=similar_text(implode(',',$newX),$tplx);
            $sy=similar_text(implode(',',$newY),$tply);
            echo 'X:'.$sx.'/'.strlen($tplx).'='.($sx/strlen($tplx));
            echo "<br>";
            echo 'Y:'.$sy.'/'.strlen($tply).'='.($sy/strlen($tply));
            exit;
        }
    }
    new readChar("imgurl.jpg");

    附上模板图片:

    欢迎各位指点!
  • 相关阅读:
    购买电脑注意事项
    这个题用堆排序还是直接插入法呢?
    2011新历年最后一天了
    VC中对于Dialog,OnCreate()和OnInitDialog()是什么关系
    英语问题,(有些答案不对,不对的请说一声)
    尝鲜之在Github上搭建Octopress博客
    nodejs+express+ejs+mongoose实例
    Hadoop问题小记
    Storm资料汇总
    C# 集合类 Array Arraylist List Hashtable Dictionary Stack Queue 泛型
  • 原文地址:https://www.cnblogs.com/thors/p/9493974.html
Copyright © 2020-2023  润新知