python依赖
paddleocr==2.5.0.3 paddlepaddle==2.3.1
python示例
#!/user/bin/env python # coding=utf-8 import logging import re from paddleocr import PaddleOCR class ID: ocr_cls = PaddleOCR(use_angle_cls=True, use_gpu=False, lang="ch") @classmethod def parse_back(cls, img): issued = "" validity = "" ocr_result = cls.ocr_cls.ocr(img, cls=True) for i, x in enumerate(ocr_result): info = x[-1][0] try: if info.startswith("签发机关"): if info == "签发机关": issued = ocr_result[i + 1][-1][0].strip() else: issued = info[4:] elif info.startswith("有效期限"): if info == "有效期限": validity = ocr_result[i + 1][-1][0].strip() else: validity = info[3:] except Exception as e: logging.error(f'error,e={e.args}') return {"issued": issued, "validity": validity} @classmethod def parse_front(cls, img): name = "" sex = "" nation = "" birthday = "" address = "" id_no = "" ocr_result = cls.ocr_cls.ocr(img, cls=True) for i, x in enumerate(ocr_result): info = x[-1][0] try: if info.startswith("姓"): if info == "姓" and i < len(ocr_result) - 1: if ocr_result[i + 1][-1][0].strip() == "名": name = ocr_result[i + 2][-1][0].strip() elif ocr_result[i + 1][-1][0].strip().startswith("名"): name = ocr_result[i + 1][-1][0].strip() elif info == "姓名" and i < len(ocr_result) - 1: name = ocr_result[i + 1][-1][0].strip() elif info.startswith("姓名"): name = info[2:] elif info.startswith("性"): if info == "性" and i < len(ocr_result) - 1: if ocr_result[i + 1][-1][0].strip() == "别": sex = ocr_result[i + 2][-1][0].strip()[:1] elif ocr_result[i + 1][-1][0].strip().startswith("别"): sex = ocr_result[i + 1][-1][0].strip()[1:2] elif info == "性别" and i < len(ocr_result) - 1: sex = ocr_result[i + 1][-1][0].strip()[:1] elif info.startswith("性别"): sex = info[2:3] if info.find("民") != -1: if info.find("民族") != -1: if info.endswith("民族"): nation = ocr_result[i + 1][-1][0].strip() else: nation = info[info.index("民族") + 2:].strip() else: if ocr_result[i + 1][-1][0].strip().startswith("族"): if ocr_result[i + 1][-1][0].strip() == "族": nation = ocr_result[i + 2][-1][0].strip() else: nation = ocr_result[i + 1][-1][0].strip()[1:] elif re.search(r"(\d{4}年\d{1,2}月\d{1,2}日)", info): search_obj = re.search(r"(\d{4}年\d{1,2}月\d{1,2}日)", info) birthday = search_obj.groups()[0] elif info.startswith("住"): if info == "住": if ocr_result[i + 1][-1][0].strip().startswith("址"): if ocr_result[i + 1][-1][0].strip() == "址": address = ocr_result[i + 2][-1][0].strip() else: address = ocr_result[i + 1][-1][0].strip()[1:] if info == "住址": address = ocr_result[i + 1][-1][0].strip() else: address = info[2:].strip() elif info.startswith("公民身份号码"): if info == "公民身份号码": id_no = ocr_result[i + 1][-1][0].strip() else: id_no = info[6:] except Exception as e: logging.error(f'error,e={e.args}') return {"name": name, "sex": sex, "nation": nation, "birthday": birthday, "address": address, "id_no": id_no} font_img_path = "./6.jpg" id_font = ID.parse_front(img=font_img_path) print(id_font) back_img_path = "./7.jpg" id_back = ID.parse_back(back_img_path) print(id_back)