• Python 实现公式图像识别转 Latex(Mathpix)


    本文是文本OCR的后续,因为用到了公式识别,所以阅读了 Mathpix API 文档,编写了一份比较适合自己使用的公式识别的Python程序,具体代码如下,注意使用之前应当去 Mathpix 官网 申请开发者IDKEY其对应的是代码中的APP_IDAPP_KEY后的XXX,在我的代码中加入了使用次数的限制,但是需要手动新建一个 ./count.txt 文件用于初始化使用次数,当然在个人信息页的 Usage 下也可以看到 API 的调用情况包括时间和次数。下面是代码实现,可以直接拷贝使用:

    import os
    import sys
    import json
    import time
    import base64
    import signal
    import win32con
    import winsound
    import requests
    from PIL import ImageGrab
    import win32clipboard as wc
    
    def set_clipboard(txt):
        wc.OpenClipboard()
        wc.EmptyClipboard()
        wc.SetClipboardData(win32con.CF_UNICODETEXT, txt)
        wc.CloseClipboard()
    
    env = os.environ
    
    default_headers = {
        'app_id': env.get('APP_ID', 'XXX'),
        'app_key': env.get('APP_KEY', 'XXX'),
        'Content-type': 'application/json'
    }
    
    service = 'https://api.mathpix.com/v3/latex'
    
    format_set = ["text",
    "latex_simplified",
    "latex_styled",
    "mathml",
    "asciimath",
    "latex_list"]
    
    format_set_output = ["latex_styled",
    "latex_simplified",
    "text"]
    
    count = 0
    
    def changeCount(number):
        filehandle = open("./count.txt","w")
        filehandle.write(str(number))
        filehandle.close()
    
    def getCount():
        if not os.path.exists("./count.txt"):
            return 0
        else:
            filehandle = open("./count.txt","r")
            number = int(filehandle.read())
            filehandle.close()
            return number
    
    def image_uri(filename):
        image_data = open(filename, "rb").read()
        return "data:image/jpg;base64," + base64.b64encode(image_data).decode()
    
    def latex(args, headers=default_headers, timeout=30):
        r = requests.post(service,
            data=json.dumps(args), headers=headers, timeout=timeout)
        return json.loads(r.text)
    
    def sig_handler(signum, frame):
        sys.exit(0)
    
    """ 截图后,调用Mathpix 公式识别"""
    def LatexOcrScreenshots(path="./",ifauto=False):
        global count
        if count >= 1000:
            print("
    The maximum number of uses has been reached!")
            changeCount(count)
            return
        
        if not os.path.exists(path):
            os.makedirs(path)
        image = ImageGrab.grabclipboard()
        if image != None:
            count += 1
            changeCount(count)
            print("
    The image has been obtained. Please wait a moment!               ",end=" ")
            filename = str(time.time_ns())
            image.save(path+filename+".png")
            txt = latex({
                'src': image_uri(path+filename+".png"),
                "ocr": ["math", "text"],
                "skip_recrop": True,
                "formats":format_set
            })
            os.remove(path+filename+".png")
    		have_obtain = False
            for format_text in format_set_output:
                if format_text in txt:
                    set_clipboard("$$
    "+txt[format_text]+"
    $$")
                    have_obtain = True
                    break;
            if have_obtain == False:
            	set_clipboard("")
            winsound.PlaySound('SystemAsterisk',winsound.SND_ASYNC)
            return txt
        else :
            if not ifauto:
                print("Count : ",count," Please get the screenshots by Shift+Win+S!",end="")
                return ""
            else:
                print("
    Count : ",count," Please get the screenshots by Shift+Win+S!",end="")
    
    def AutoOcrScreenshotsLatex():
        global count
        count = getCount()
        signal.signal(signal.SIGINT, sig_handler)
        signal.signal(signal.SIGTERM, sig_handler)
        print("Count : ",count," Please get the screenshots by Shift+Win+S !",end="")
        while(1):
            try:
                LatexOcrScreenshots(ifauto=True)
                time.sleep(0.1)
            except SystemExit:
                print("
    Last Count : ",count)
                changeCount(count)
                return
            else:
                pass
            finally:
                pass
    
    if __name__ == '__main__':
    	AutoOcrScreenshots()
    

    可以看出其与百度API不同的地方是,直接使用网站POST便可以实现OCR内容的获取,具体获取的内容是由format_set决定的,而输出的内容的优先级是由format_set_output决定的。

    任世事无常,勿忘初心
  • 相关阅读:
    Go语言基础之map
    Go语言基础之切片
    Go语言基础之数组
    Go语言fmt.Printf使用指南
    Go语言基础之流程控制
    Go语言基础之运算符
    Go语言基础之变量和常量
    Go语言环境搭建
    随笔
    使用SocketServer 创建TCP服务端
  • 原文地址:https://www.cnblogs.com/FlameBlog/p/14715287.html
Copyright © 2020-2023  润新知