• 身份证信息识别


    python依赖

    paddleocr==2.5.0.3
    paddlepaddle==2.3.1
    

    python示例

    #!/user/bin/env python
    # coding=utf-8
    import logging
    import re
    
    from paddleocr import PaddleOCR
    
    
    class ID:
        ocr_cls = PaddleOCR(use_angle_cls=True, use_gpu=False, lang="ch")
    
        @classmethod
        def parse_back(cls, img):
            issued = ""
            validity = ""
            ocr_result = cls.ocr_cls.ocr(img, cls=True)
            for i, x in enumerate(ocr_result):
                info = x[-1][0]
                try:
                    if info.startswith("签发机关"):
                        if info == "签发机关":
                            issued = ocr_result[i + 1][-1][0].strip()
                        else:
                            issued = info[4:]
                    elif info.startswith("有效期限"):
                        if info == "有效期限":
                            validity = ocr_result[i + 1][-1][0].strip()
                        else:
                            validity = info[3:]
                except Exception as e:
                    logging.error(f'error,e={e.args}')
    
            return {"issued": issued,
                    "validity": validity}
    
        @classmethod
        def parse_front(cls, img):
            name = ""
            sex = ""
            nation = ""
            birthday = ""
            address = ""
            id_no = ""
            ocr_result = cls.ocr_cls.ocr(img, cls=True)
            for i, x in enumerate(ocr_result):
                info = x[-1][0]
                try:
                    if info.startswith("姓"):
                        if info == "姓" and i < len(ocr_result) - 1:
                            if ocr_result[i + 1][-1][0].strip() == "名":
                                name = ocr_result[i + 2][-1][0].strip()
                            elif ocr_result[i + 1][-1][0].strip().startswith("名"):
                                name = ocr_result[i + 1][-1][0].strip()
                        elif info == "姓名" and i < len(ocr_result) - 1:
                            name = ocr_result[i + 1][-1][0].strip()
                        elif info.startswith("姓名"):
                            name = info[2:]
                    elif info.startswith("性"):
                        if info == "性" and i < len(ocr_result) - 1:
                            if ocr_result[i + 1][-1][0].strip() == "别":
                                sex = ocr_result[i + 2][-1][0].strip()[:1]
                            elif ocr_result[i + 1][-1][0].strip().startswith("别"):
                                sex = ocr_result[i + 1][-1][0].strip()[1:2]
                        elif info == "性别" and i < len(ocr_result) - 1:
                            sex = ocr_result[i + 1][-1][0].strip()[:1]
                        elif info.startswith("性别"):
                            sex = info[2:3]
                        if info.find("民") != -1:
                            if info.find("民族") != -1:
                                if info.endswith("民族"):
                                    nation = ocr_result[i + 1][-1][0].strip()
                                else:
                                    nation = info[info.index("民族") + 2:].strip()
                            else:
                                if ocr_result[i + 1][-1][0].strip().startswith("族"):
                                    if ocr_result[i + 1][-1][0].strip() == "族":
                                        nation = ocr_result[i + 2][-1][0].strip()
                                    else:
                                        nation = ocr_result[i + 1][-1][0].strip()[1:]
                    elif re.search(r"(\d{4}年\d{1,2}月\d{1,2}日)", info):
                        search_obj = re.search(r"(\d{4}年\d{1,2}月\d{1,2}日)", info)
                        birthday = search_obj.groups()[0]
                    elif info.startswith("住"):
                        if info == "住":
                            if ocr_result[i + 1][-1][0].strip().startswith("址"):
                                if ocr_result[i + 1][-1][0].strip() == "址":
                                    address = ocr_result[i + 2][-1][0].strip()
                                else:
                                    address = ocr_result[i + 1][-1][0].strip()[1:]
                        if info == "住址":
                            address = ocr_result[i + 1][-1][0].strip()
                        else:
                            address = info[2:].strip()
                    elif info.startswith("公民身份号码"):
                        if info == "公民身份号码":
                            id_no = ocr_result[i + 1][-1][0].strip()
                        else:
                            id_no = info[6:]
    
                except Exception as e:
                    logging.error(f'error,e={e.args}')
    
            return {"name": name, "sex": sex, "nation": nation,
                    "birthday": birthday, "address": address, "id_no": id_no}
    
    
    font_img_path = "./6.jpg"
    id_font = ID.parse_front(img=font_img_path)
    print(id_font)
    back_img_path = "./7.jpg"
    id_back = ID.parse_back(back_img_path)
    print(id_back)
    

      

  • 相关阅读:
    点评cat系列-服务器开发环境部署
    [FreeRTOS].FreeRTOS CortexM3 M4中断优先级设置总结
    [FreeRTOS]FreeRTOS使用
    [Ethernet].以太网总线详解
    [USB].USB总线详解
    [CAN].CAN总线详解
    [LIN].LIN总线详解
    [SDIO].SDIO总线详解
    [eMMC]eMMC读写性能测试
    [通信]Linux User层和Kernel层常用的通信方式
  • 原文地址:https://www.cnblogs.com/navysummer/p/16584445.html
Copyright © 2020-2023  润新知