• Opencv + opencv_contrib + Tesseract 之Qt开发环境搭建


    1.软件包准备

    • opencv源码包地址:                官网  github
    • opencv_contrib源码包地址:   github
    • Tesseract源码包地址:            github
    • cmake.exe 下载地址:             官网
    • qt  下载地址:                          官网

    注意: opencv和open_contrib包的版本号要一致(比如都是3.4.0)

    Tesseract源码安装参考: Win10 使用MinGW-w64编译Tesseract4.0

    2. 在环境变量PATH中添加:

    C:QtQt5.9.05.9mingw53_32in
    C:QtQt5.9.0Toolsmingw530_32in

    一方面方便日后在cmd中直接使用gcc、g++,qmake和mingw32-make

    另一方面,方便下一步cmake查找Qt相关配置

    3. 使用cmake生成解决方案

    如果提示:

    直接将 "CMAKE_SH"项删除即可。

    修改配置如下:

    • CMAKE_BUILD_TYPE: Debug或者Release
    • CMAKE_INSTALL_PREFIX: 指定程序安装位置
    • ENABLE_CXX11: 支持c11特性
    • WITH_QT
    • WITH_OPENGL
    • OPENCV_EXTRA_MODULES_PATH:  若使用opencv_contrib模块,则在此处填写解压后的路径,如 F:opencv_contribmodules
    • Tesseract_INCLUDE_DIR:  Tesseract头文件所在路径
    • Tesseract_LIBRARY:  Tesseract lib文件所在路径
    • Lept_LIBRARY:  leptonica lib文件所在路径 (很重要, 一定要配置,否则可能找不到Tesseract)

    建议取消勾选:

    • BUILD_DOCS :生成文档,需要安装Doxygen。官网提供了在线文档和离线文档。
    • BUILD_PERF_TESTS: 性能测试相关
    • BUILD_TESTS: 测试相关
    • BUILD_opencv_ts :一些单元测试代码。
    • INSTALL_TESTS :与开发无关。

    配置截图:

     Tesseract 相关:

    Qt 相关: 

    # 模块相关配置
    OpenCV modules:
        To be built:                 aruco bgsegm bioinspired calib3d ccalib core datasets dnn dpm face features2d flann fuzzy highgui img_hash imgcodecs imgproc line_descriptor ml objdetect optflow phase_unwrapping photo plot reg rgbd saliency shape stereo stitching structured_light superres surface_matching text tracking video videoio videostab world xfeatures2d ximgproc xobjdetect xphoto
        Disabled:                    js python2 python_bindings_generator cvv
        Disabled by dependency:      -
        Unavailable:                 cnn_3dobj cudaarithm cudabgsegm cudacodec cudafeatures2d cudafilters cudaimgproc cudalegacy cudaobjdetect cudaoptflow cudastereo cudawarping cudev dnn_modern freetype hdf java matlab ovis python3 python3 sfm ts viz
        Applications:                apps
        Documentation:               NO
        Non-free algorithms:         NO

    BUILD_opencv_world : 将.lib或者.dll文件统一整合进一个world文件中,方便使用。但若想只使用一部分模块可不勾选以减少体积

    重要: 

    如果勾选BUILD_opencv_world, 就需要取消勾选BUILD_opencv_cvv,否则会出现以下错误

     4.安装:

     

    5. 遇到的问题:

    1) 'sprintf_instead_use_StringCbPrintfA_or_StringCchPrintfA' was not declared in this scope .

    解决方法: 修改opencv源码目录中modulesvideoiosrccap_dshow.cpp, 找到#include "DShow.h",然后在其上面添加一行

    release版本:

    #define NO_DSHOW_STRSAFE

    debug版本:

    #define STRSAFE_NO_DEPRECATE

    如下图:

    2)

    解决方法:

    关闭预编译头, 取消勾选"ENABLE_PRECOMPILED_HEADERS"

    参考: https://wiki.qt.io/How_to_setup_Qt_and_openCV_on_Windows

    6.目录结构

    7. 向环境变量PATH中加入dll所在路径

    D:opencvx86mingwin

    8. 测试

    打开Qtcreator,创建一个c++项目。

    测试1:

    代码:

    #include <opencv2/opencv.hpp>
    using namespace cv;
    
    int main()
    {
        Mat im = imread("lena.png");
        namedWindow("Image");
        imshow("Image", im);
        waitKey(0);
        destroyWindow("Image");
        return 0;
    }
    ShowImage.cpp

    .pro配置文件:

    TEMPLATE = app
    CONFIG += console c++11
    CONFIG -= app_bundle
    CONFIG -= qt
    
    SOURCES += main.cpp
    
    INCLUDEPATH += D:opencvinclude
    CONFIG(debug, debug | release) {
        LIBS += D:opencvx86mingwinlibopencv_world340d.dll
    } else {
        LIBS += -LD:opencvx86mingwlib -lopencv_world340
    }
    ShowImage.pro

    效果:

    测试2:

      1 /*
      2  * textdetection.cpp
      3  *
      4  * A demo program of End-to-end Scene Text Detection and Recognition:
      5  * Shows the use of the Tesseract OCR API with the Extremal Region Filter algorithm described in:
      6  * Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
      7  *
      8  * Created on: Jul 31, 2014
      9  *     Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
     10  */
     11 
     12 #include "opencv2/text.hpp"
     13 #include "opencv2/core/utility.hpp"
     14 #include "opencv2/highgui.hpp"
     15 #include "opencv2/imgproc.hpp"
     16 
     17 #include <iostream>
     18 
     19 using namespace std;
     20 using namespace cv;
     21 using namespace cv::text;
     22 
     23 //Calculate edit distance between two words
     24 size_t edit_distance(const string& A, const string& B);
     25 size_t min(size_t x, size_t y, size_t z);
     26 bool   isRepetitive(const string& s);
     27 bool   sort_by_lenght(const string &a, const string &b);
     28 //Draw ER's in an image via floodFill
     29 void   er_draw(vector<Mat> &channels, vector<vector<ERStat> > &regions, vector<Vec2i> group, Mat& segmentation);
     30 
     31 //Perform text detection and recognition and evaluate results using edit distance
     32 int main1(int argc, char* argv[])
     33 {
     34     cout << endl << argv[0] << endl << endl;
     35     cout << "A demo program of End-to-end Scene Text Detection and Recognition: " << endl;
     36     cout << "Shows the use of the Tesseract OCR API with the Extremal Region Filter algorithm described in:" << endl;
     37     cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
     38 
     39     Mat image;
     40 
     41     if(argc>1)
     42         image  = imread(argv[1]);
     43     else
     44     {
     45         cout << "    Usage: " << argv[0] << " <input_image> [<gt_word1> ... <gt_wordN>]" << endl;
     46         return(0);
     47     }
     48 
     49     cout << "IMG_W=" << image.cols << endl;
     50     cout << "IMG_H=" << image.rows << endl;
     51 
     52     /*Text Detection*/
     53 
     54     // Extract channels to be processed individually
     55     vector<Mat> channels;
     56 
     57     Mat grey;
     58     cvtColor(image,grey,COLOR_RGB2GRAY);
     59 
     60     // Notice here we are only using grey channel, see textdetection.cpp for example with more channels
     61     channels.push_back(grey);
     62     channels.push_back(255-grey);
     63 
     64     double t_d = (double)getTickCount();
     65     // Create ERFilter objects with the 1st and 2nd stage default classifiers
     66     Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00015f,0.13f,0.2f,true,0.1f);
     67     Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.5);
     68 
     69     vector<vector<ERStat> > regions(channels.size());
     70     // Apply the default cascade classifier to each independent channel (could be done in parallel)
     71     for (int c=0; c<(int)channels.size(); c++)
     72     {
     73         er_filter1->run(channels[c], regions[c]);
     74         er_filter2->run(channels[c], regions[c]);
     75     }
     76     cout << "TIME_REGION_DETECTION = " << ((double)getTickCount() - t_d)*1000/getTickFrequency() << endl;
     77 
     78     Mat out_img_decomposition= Mat::zeros(image.rows+2, image.cols+2, CV_8UC1);
     79     vector<Vec2i> tmp_group;
     80     for (int i=0; i<(int)regions.size(); i++)
     81     {
     82         for (int j=0; j<(int)regions[i].size();j++)
     83         {
     84             tmp_group.push_back(Vec2i(i,j));
     85         }
     86         Mat tmp= Mat::zeros(image.rows+2, image.cols+2, CV_8UC1);
     87         er_draw(channels, regions, tmp_group, tmp);
     88         if (i > 0)
     89             tmp = tmp / 2;
     90         out_img_decomposition = out_img_decomposition | tmp;
     91         tmp_group.clear();
     92     }
     93 
     94     double t_g = (double)getTickCount();
     95     // Detect character groups
     96     vector< vector<Vec2i> > nm_region_groups;
     97     vector<Rect> nm_boxes;
     98     erGrouping(image, channels, regions, nm_region_groups, nm_boxes,ERGROUPING_ORIENTATION_HORIZ);
     99     cout << "TIME_GROUPING = " << ((double)getTickCount() - t_g)*1000/getTickFrequency() << endl;
    100 
    101 
    102 
    103     /*Text Recognition (OCR)*/
    104 
    105     double t_r = (double)getTickCount();
    106     Ptr<OCRTesseract> ocr = OCRTesseract::create();
    107     cout << "TIME_OCR_INITIALIZATION = " << ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
    108     string output;
    109 
    110     Mat out_img;
    111     Mat out_img_detection;
    112     Mat out_img_segmentation = Mat::zeros(image.rows+2, image.cols+2, CV_8UC1);
    113     image.copyTo(out_img);
    114     image.copyTo(out_img_detection);
    115     float scale_img  = 600.f/image.rows;
    116     float scale_font = (float)(2-scale_img)/1.4f;
    117     vector<string> words_detection;
    118 
    119     t_r = (double)getTickCount();
    120 
    121     for (int i=0; i<(int)nm_boxes.size(); i++)
    122     {
    123 
    124         rectangle(out_img_detection, nm_boxes[i].tl(), nm_boxes[i].br(), Scalar(0,255,255), 3);
    125 
    126         Mat group_img = Mat::zeros(image.rows+2, image.cols+2, CV_8UC1);
    127         er_draw(channels, regions, nm_region_groups[i], group_img);
    128         Mat group_segmentation;
    129         group_img.copyTo(group_segmentation);
    130         //image(nm_boxes[i]).copyTo(group_img);
    131         group_img(nm_boxes[i]).copyTo(group_img);
    132         copyMakeBorder(group_img,group_img,15,15,15,15,BORDER_CONSTANT,Scalar(0));
    133 
    134         vector<Rect>   boxes;
    135         vector<string> words;
    136         vector<float>  confidences;
    137         ocr->run(group_img, output, &boxes, &words, &confidences, OCR_LEVEL_WORD);
    138 
    139         output.erase(remove(output.begin(), output.end(), '
    '), output.end());
    140         //cout << "OCR output = "" << output << "" length = " << output.size() << endl;
    141         if (output.size() < 3)
    142             continue;
    143 
    144         for (int j=0; j<(int)boxes.size(); j++)
    145         {
    146             boxes[j].x += nm_boxes[i].x-15;
    147             boxes[j].y += nm_boxes[i].y-15;
    148 
    149             //cout << "  word = " << words[j] << "	 confidence = " << confidences[j] << endl;
    150             if ((words[j].size() < 2) || (confidences[j] < 51) ||
    151                     ((words[j].size()==2) && (words[j][0] == words[j][1])) ||
    152                     ((words[j].size()< 4) && (confidences[j] < 60)) ||
    153                     isRepetitive(words[j]))
    154                 continue;
    155             words_detection.push_back(words[j]);
    156             rectangle(out_img, boxes[j].tl(), boxes[j].br(), Scalar(255,0,255),3);
    157             Size word_size = getTextSize(words[j], FONT_HERSHEY_SIMPLEX, (double)scale_font, (int)(3*scale_font), NULL);
    158             rectangle(out_img, boxes[j].tl()-Point(3,word_size.height+3), boxes[j].tl()+Point(word_size.width,0), Scalar(255,0,255),-1);
    159             putText(out_img, words[j], boxes[j].tl()-Point(1,1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255,255,255),(int)(3*scale_font));
    160             out_img_segmentation = out_img_segmentation | group_segmentation;
    161         }
    162 
    163     }
    164 
    165     cout << "TIME_OCR = " << ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
    166 
    167 
    168     /* Recognition evaluation with (approximate) Hungarian matching and edit distances */
    169 
    170     if(argc>2)
    171     {
    172         int num_gt_characters   = 0;
    173         vector<string> words_gt;
    174         for (int i=2; i<argc; i++)
    175         {
    176             string s = string(argv[i]);
    177             if (s.size() > 0)
    178             {
    179                 words_gt.push_back(string(argv[i]));
    180                 //cout << " GT word " << words_gt[words_gt.size()-1] << endl;
    181                 num_gt_characters += (int)(words_gt[words_gt.size()-1].size());
    182             }
    183         }
    184 
    185         if (words_detection.empty())
    186         {
    187             //cout << endl << "number of characters in gt = " << num_gt_characters << endl;
    188             cout << "TOTAL_EDIT_DISTANCE = " << num_gt_characters << endl;
    189             cout << "EDIT_DISTANCE_RATIO = 1" << endl;
    190         }
    191         else
    192         {
    193 
    194             sort(words_gt.begin(),words_gt.end(),sort_by_lenght);
    195 
    196             int max_dist=0;
    197             vector< vector<int> > assignment_mat;
    198             for (int i=0; i<(int)words_gt.size(); i++)
    199             {
    200                 vector<int> assignment_row(words_detection.size(),0);
    201                 assignment_mat.push_back(assignment_row);
    202                 for (int j=0; j<(int)words_detection.size(); j++)
    203                 {
    204                     assignment_mat[i][j] = (int)(edit_distance(words_gt[i],words_detection[j]));
    205                     max_dist = max(max_dist,assignment_mat[i][j]);
    206                 }
    207             }
    208 
    209             vector<int> words_detection_matched;
    210 
    211             int total_edit_distance = 0;
    212             int tp=0, fp=0, fn=0;
    213             for (int search_dist=0; search_dist<=max_dist; search_dist++)
    214             {
    215                 for (int i=0; i<(int)assignment_mat.size(); i++)
    216                 {
    217                     int min_dist_idx =  (int)distance(assignment_mat[i].begin(),
    218                                         min_element(assignment_mat[i].begin(),assignment_mat[i].end()));
    219                     if (assignment_mat[i][min_dist_idx] == search_dist)
    220                     {
    221                         //cout << " GT word "" << words_gt[i] << "" best match "" << words_detection[min_dist_idx] << "" with dist " << assignment_mat[i][min_dist_idx] << endl;
    222                         if(search_dist == 0)
    223                             tp++;
    224                         else { fp++; fn++; }
    225 
    226                         total_edit_distance += assignment_mat[i][min_dist_idx];
    227                         words_detection_matched.push_back(min_dist_idx);
    228                         words_gt.erase(words_gt.begin()+i);
    229                         assignment_mat.erase(assignment_mat.begin()+i);
    230                         for (int j=0; j<(int)assignment_mat.size(); j++)
    231                         {
    232                             assignment_mat[j][min_dist_idx]=INT_MAX;
    233                         }
    234                         i--;
    235                     }
    236                 }
    237             }
    238 
    239             for (int j=0; j<(int)words_gt.size(); j++)
    240             {
    241                 //cout << " GT word "" << words_gt[j] << "" no match found" << endl;
    242                 fn++;
    243                 total_edit_distance += (int)words_gt[j].size();
    244             }
    245             for (int j=0; j<(int)words_detection.size(); j++)
    246             {
    247                 if (find(words_detection_matched.begin(),words_detection_matched.end(),j) == words_detection_matched.end())
    248                 {
    249                     //cout << " Detection word "" << words_detection[j] << "" no match found" << endl;
    250                     fp++;
    251                     total_edit_distance += (int)words_detection[j].size();
    252                 }
    253             }
    254 
    255 
    256             //cout << endl << "number of characters in gt = " << num_gt_characters << endl;
    257             cout << "TOTAL_EDIT_DISTANCE = " << total_edit_distance << endl;
    258             cout << "EDIT_DISTANCE_RATIO = " << (float)total_edit_distance / num_gt_characters << endl;
    259             cout << "TP = " << tp << endl;
    260             cout << "FP = " << fp << endl;
    261             cout << "FN = " << fn << endl;
    262         }
    263     }
    264 
    265 
    266 
    267     //resize(out_img_detection,out_img_detection,Size(image.cols*scale_img,image.rows*scale_img),0,0,INTER_LINEAR_EXACT);
    268     //imshow("detection", out_img_detection);
    269     //imwrite("detection.jpg", out_img_detection);
    270     //resize(out_img,out_img,Size(image.cols*scale_img,image.rows*scale_img),0,0,INTER_LINEAR_EXACT);
    271     namedWindow("recognition",WINDOW_NORMAL);
    272     imshow("recognition", out_img);
    273     waitKey(0);
    274     //imwrite("recognition.jpg", out_img);
    275     //imwrite("segmentation.jpg", out_img_segmentation);
    276     //imwrite("decomposition.jpg", out_img_decomposition);
    277 
    278     return 0;
    279 }
    280 
    281 size_t min(size_t x, size_t y, size_t z)
    282 {
    283     return x < y ? min(x,z) : min(y,z);
    284 }
    285 
    286 size_t edit_distance(const string& A, const string& B)
    287 {
    288     size_t NA = A.size();
    289     size_t NB = B.size();
    290 
    291     vector< vector<size_t> > M(NA + 1, vector<size_t>(NB + 1));
    292 
    293     for (size_t a = 0; a <= NA; ++a)
    294         M[a][0] = a;
    295 
    296     for (size_t b = 0; b <= NB; ++b)
    297         M[0][b] = b;
    298 
    299     for (size_t a = 1; a <= NA; ++a)
    300         for (size_t b = 1; b <= NB; ++b)
    301         {
    302             size_t x = M[a-1][b] + 1;
    303             size_t y = M[a][b-1] + 1;
    304             size_t z = M[a-1][b-1] + (A[a-1] == B[b-1] ? 0 : 1);
    305             M[a][b] = min(x,y,z);
    306         }
    307 
    308     return M[A.size()][B.size()];
    309 }
    310 
    311 bool isRepetitive(const string& s)
    312 {
    313     int count = 0;
    314     for (int i=0; i<(int)s.size(); i++)
    315     {
    316         if ((s[i] == 'i') ||
    317                 (s[i] == 'l') ||
    318                 (s[i] == 'I'))
    319             count++;
    320     }
    321     if (count > ((int)s.size()+1)/2)
    322     {
    323         return true;
    324     }
    325     return false;
    326 }
    327 
    328 
    329 void er_draw(vector<Mat> &channels, vector<vector<ERStat> > &regions, vector<Vec2i> group, Mat& segmentation)
    330 {
    331     for (int r=0; r<(int)group.size(); r++)
    332     {
    333         ERStat er = regions[group[r][0]][group[r][1]];
    334         if (er.parent != NULL) // deprecate the root region
    335         {
    336             int newMaskVal = 255;
    337             int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
    338             floodFill(channels[group[r][0]],segmentation,Point(er.pixel%channels[group[r][0]].cols,er.pixel/channels[group[r][0]].cols),
    339                       Scalar(255),0,Scalar(er.level),Scalar(0),flags);
    340         }
    341     }
    342 }
    343 
    344 bool   sort_by_lenght(const string &a, const string &b){return (a.size()>b.size());}
    end_to_end_recognition.cpp

    测试用到的文件end_to_end_recognition.cpp、scenetext01.jpg、trained_classifierNM1.xml、trained_classifierNM2.xml都位于opencv_contrib源码包目录下的modules extsamples中

    效果:

  • 相关阅读:
    [java] 深入理解内部类: inner-classes
    [java] 更好的书写equals方法-汇率换算器的实现(4)
    [java] 注释以及javadoc使用简介-汇率换算器的实现-插曲3
    [java] jsoup使用简介-汇率换算器实现-插曲2
    [java] 汇率换算器实现(3)
    [java] 汇率换算器实现-插曲1-正则表达式(1)
    [java] 汇率换算器实现(2)
    [java] 汇率换算器实现(1)
    [Basic] The most basic things about java
    电路相关知识–读<<继电器是如何成为CPU的>>
  • 原文地址:https://www.cnblogs.com/hupeng1234/p/8593287.html
Copyright © 2020-2023  润新知