• 营业执照信息识别


    python依赖

    paddlepaddle==2.3.1
    paddleocr==2.5.0.3
    

    python示例

    #!/user/bin/env python
    # coding=utf-8
    import logging
    
    from paddleocr import PaddleOCR
    
    
    class BusinessLicense:
    
        def __init__(self, img, **kwargs):
            self.ocr_cls = PaddleOCR(use_angle_cls=kwargs.get("use_angle_cls", True), use_gpu=kwargs.get("use_gpu", False),
                                     lang=kwargs.get("lang", "ch"))
            self.img = img
            self.flag = False
    
        def verification_credit_code(self, credit_code):
            try:
                if credit_code:
                    _sum = 0
                    code_dict = {
                        '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
                        'A': 10, 'B': 11, 'C': 12, 'D': 13, 'E': 14, 'F': 15, 'G': 16, 'H': 17, 'J': 18, 'K': 19, 'L': 20,
                        'M': 21,
                        'N': 22, 'P': 23, 'Q': 24,
                        'R': 25, 'T': 26, 'U': 27, 'W': 28, 'X': 29, 'Y': 30}
    
                    weights = ['1', '3', '9', '27', '19', '26', '16', '17', '20', '29', '25', '13', '8', '24', '10', '30',
                               '28']
    
                    for i in range(len(credit_code) - 1):
                        _sum += code_dict[credit_code[i]] * int(weights[i])
    
                    mod = 31 - _sum % 31
    
                    if (mod == code_dict[credit_code[-1]]) or ((mod == 31) and code_dict[credit_code[-1]] == 0):
                        self.flag = True
            except Exception as e:
                logging.error(f'error,e={e.args}')
            return self.flag
    
        def parse(self):
            credit_code = ""
            company_name = ""
            address = ""
            legal_person = ""
            ocr_result = self.ocr_cls.ocr(self.img, cls=True)
            for i, x in enumerate(ocr_result):
                info = x[-1][0]
                try:
                    if info.startswith("统一社会信用代码"):
                        if info == "统一社会信用代码" and i < len(ocr_result) - 1:
                            credit_code = ocr_result[i + 1][-1][0].replace(":", "").replace(":", "").strip()
                        else:
                            credit_code = info.split("统一社会信用代码")[1].strip()
                    elif info.startswith("名"):
                        if info == "名" and i < len(ocr_result) - 1:
                            if ocr_result[i + 1][-1][0].startswith("称"):
                                if ocr_result[i + 1][-1][0].strip() == "称":
                                    company_name = ocr_result[i + 2][-1][0].replace(":", "").replace(":", "").strip()
                                else:
                                    company_name = ocr_result[i + 1][-1][0].replace(":", "").replace(":", "")[1:].strip()
                    elif info.startswith("称") and info.endswith("公司"):
                        company_name = info.replace(":", "").replace(":", "")[1:].strip()
                    elif info.startswith("名称") and info.endswith("公司"):
                        company_name = info.replace(":", "").replace(":", "")[2:].strip()
                    elif info.startswith("住"):
                        if info == "住" and i < len(ocr_result) - 1:
                            if ocr_result[i + 1][-1][0].replace(":", "").replace(":", "").strip() == "所":
                                address = ocr_result[i + 2][-1][0].replace(":", "").replace(":", "").strip()
                            elif ocr_result[i + 1][-1][0].replace(":", "").replace(":", "").strip().startswith("所"):
                                address = ocr_result[i + 1][-1][0].replace(":", "").replace(":", "")[1:].strip()
                        elif info == "住所" and i < len(ocr_result) - 1:
                            address = ocr_result[i + 1][-1][0].replace(":", "").replace(":", "").strip()
                        elif info.startswith("住所"):
                            address = info.split("住所").replace(":", "").replace(":", "").strip()
                    elif info.startswith("法定代表人"):
                        if info == "法定代表人":
                            legal_person = ocr_result[i + 1][-1][0].replace(":", "").replace(":", "").strip()
                        else:
                            legal_person = info.replace(":", "").replace(":", "").split("法定代表人")[1].strip()
                except Exception as e:
                    logging.error(f'error,e={e.args}')
    
            return {"company_name": company_name,
                    "credit_code": credit_code,
                    "address": address,
                    "legal_person": legal_person}
    
    
    img_path = "./zhi.jpeg"
    bl = BusinessLicense(img=img_path)
    data = bl.parse()
    print(data)
    print(bl.verification_credit_code(data.get("credit_code")))
    

      

      

      

  • 相关阅读:
    redis-cluster的实例动态调整内存
    nginx upstream的五种分配方式
    cdn、回源等问题
    gpu机器安装nvidia-smi和python的tensorflow-gpu模块
    Hadoop、HBase、Spark单机安装
    数学的概念
    Eclipse 2020版安装&初始化
    191002一些岗位数量统计
    现象:SpringApplication.run后面的语句未执行
    Ubuntu 18.04安装docker
  • 原文地址:https://www.cnblogs.com/navysummer/p/16583898.html
Copyright © 2020-2023  润新知