• 用AutoHotkey调用百度ocr接口提取增值税发票相关字段并写到Excel


    功能介绍:

    提取指定文件夹下的所有增值税发票(格式为jpg或png或pdf(暂时只处理第1页)), 把所有信息写到Excel表当前选中的单元格,并重命名原始发票(可指定规则)复制到新文件夹。

    由于要用到百度的接口,所以需要注册百度智能云+实名认证+创建应用+领取资源

    1. https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (产品服务→人工智能→文字识别)
    2. 实名认证
    3. 创建应用→随便输入应用名称→立即创建→查看应用详情→记录 appid apikey secretkey
    4. 领取相应的资源:文字识别→概览→右侧【领取免费资源】→选中【财务票据OCR】→全部→0元领取
    5. 等待资源到账:右键【资源列表】→已领取资源→核实是否拥有资源

    使用步骤:

    1. 打开任意Excel表,选中第一个要填的单元格
    2. 读取脚本说明,修改相应内容后运行即可
    ;注册百度智能云+实名认证+创建应用+领取资源(财务)
    ;   https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (产品服务→人工智能→文字识别)
    ;   实名认证
    ;   创建应用→随便输入应用名称→立即创建→查看应用详情→记录 appid apikey secretkey
    ;   领取相应的资源:
    ;       文字识别→概览→右侧【领取免费资源】→选中【财务票据OCR】→全部→0元领取
    ;       等待资源到账:右键【资源列表】→已领取资源
    ;搜索 hymodify 修改相应信息
    ;功能:
    ;   提取 dn0 文件夹电子发票信息(pdf只提取第1页),并写到当前已打开Excel表(从【当前选中单元格】开始写)
    #SingleInstance force
    
    if (!ProcessExist("Excel.exe")) {
        msgbox("请打开Excel并选中第一个要写入单元格",,0x40000)
        ExitApp
    }
    
    if (0) {
        dn0 := "c:UsersAdministratorDesktop11" ;hymodify 【旧】发票文件夹
        dn1 := "c:UsersAdministratorDesktop22" ;hymodify 【新】发票文件夹(发票重命名后复制到此文件夹)
        if !DirExist(dn1)
            DirCreate(dn1)
    } else {
        dn0 := DirSelect(, 2, "选择【旧】发票文件夹")
        dn1 := DirSelect(, 2, "选择【新】发票文件夹")
    }
    arrOcr := [
        ["发票代码","InvoiceCode"],
        ["发票号码","InvoiceNum"],
        ["开票日期","InvoiceDate"],
        ["校验码","CheckCode"],
        ["机器编号","MachineCode"],
        ["金额","AmountInFiguers"],
        ["服务名称1","CommodityName"],
        ["税率1","CommodityTaxRate"],
        ["税额1","CommodityTax"],
        ["大写金额","AmountInWords"],
        ["销售方名称","SellerName"],
        ["销售方纳税人识别号","SellerRegisterNum"],
        ["销售方地址","SellerAddress"],
        ["销售方开户行","SellerBank"],
        ["购买方名称","PurchaserName"],
        ["购买方纳税人识别号","PurchaserRegisterNum"],
        ["购买方地址","PurchaserAddress"],
        ["购买方开户行","PurchaserBank"],
    ]
    arrOther := [
        "新文件名", ;依赖 objOcr 结果
        "原文件名",
        "序号",
    ]
    
    csOcr := arrOcr.length()
    cs := csOcr+arrOther.length()
    arrA := ComObjArray(12, 1, cs)
    xl := ox()
    st := xl.ActiveSheet
    ac := xl.ActiveCell
    r := 0
    arrError := []
    if (ac.row == 1) { ;在第1行,则初始化并写入标题
        st.cells.NumberFormat := "@"
        ;设置标题
        for _, arr in arrOcr
            arrA[0,A_Index-1] := arr[1]
        for _, v in arrOther
            arrA[0,csOcr+A_Index-1] := v
        ac.resize(1,cs).value := arrA ;要写的第1行
        rng1 := ac.offset(1).resize(1,cs)
    } else
        rng1 := ac.resize(1,cs)
    loop files, dn0 . "*.*", "RF" { ;hymodify 带R会处理子文件夹
        if (A_LoopFileAttrib ~= "[HS]")
            continue
        if !(A_LoopFileName ~= "i).(pdf|jpg|png)") ;hymodify 过滤文件格式
            continue
        tooltip(A_Index . "`n" . A_LoopFileName)
        objOcr := _Web.baiduOcr_vatInvoice(A_LoopFileFullPath)
        ; hyf_objView(objOcr)
        arrA := ComObjArray(12, 1, cs) ;每行写一次
        ;写入 ocr 内容
        noExt := ""
        if (isobject(objOcr) && objOcr["TotalAmount"]) { ;成功获取结果
            for _, arr in arrOcr {
                res := objOcr[arr[2]]
                if (isobject(res)) {
                    if (res.length())
                        arrA[0,A_Index-1] := res[1]["word"]
                } else
                    arrA[0,A_Index-1] := res
            }
            noExt := format("{1}-{2}", objOcr["AmountInFiguers"],objOcr["InvoiceNum"]) ;hymodify 新文件名规则,默认是(金额-发票号码)
            arrA[0,csOcr] := noExt
        }
        ;常规内容
        arrA[0,csOcr+1] := A_LoopFileName
        arrA[0,csOcr+2] := r+1
        ;arrA写到整行
        rng1.offset(r).value := arrA
        r++
        ;文件处理
        if (strlen(noExt)) {
            SplitPath(A_LoopFileFullPath, fn,, ext)
            FileCopy(A_LoopFileFullPath, format("{1}{2}.{3}", dn1,noExt,ext))
            if errorlevel
                arrError.push(A_LoopFileName)
        } else {
            SplitPath(A_LoopFileFullPath, fn,, ext, noExt)
            FileCopy(A_LoopFileFullPath, format("{1}\__{2}.{3}", dn1,noExt,ext))
        }
    }
    WinActivate("ahk_id " . st.parent)
    tooltip
    if arrError.length()
        hyf_objView(arrError, "以下文件复制时出错了,请核实")
    else
        msgbox("已完成",,0x40000)
    return
    
    ox(winTitle:="ahk_class XLMAIN") {
        ctlID := ControlGetHwnd("EXCEL71", winTitle)
        if !ctlID
            ExitApp
        if dllcall("oleaccAccessibleObjectFromWindow", "ptr",ctlID, "uint",4294967280, "ptr",-VarSetCapacity(IID,16)+NumPut(0x46000000000000C0,NumPut(0x0000000000020400,IID,"int64"),"int64"), "ptr*",pacc) = 0
            win := ComObject(9, pacc, 1)
        loop {
            try
                xl := win.application
            catch
                ControlSend("{escape}", "EXCEL71", winTitle)
        }
        until !!xl
        return xl
    }
    return
    
    hyf_obj2Str(obj, char:="`n", level:=0) {
        static t := "", s := ""
        if level
            t .= A_Tab ;前置tab显示级数
        else
            t := "", s := "" ;防止多次运行时结果叠加
        if !isobject(obj)
            return "非对象,值为`n" . obj
        try { ;FIXME 无故出错
            for k, v in obj {
                if isobject(v) {
                    s .= t . k . char
                    %A_ThisFunc%(v, char, level + 1)
                    t := substr(t, 2) ;删除一个tab
                }
                else
                    s .= t . k . A_Tab . v . char
            }
        }
        if (char != "`n") ;强制换行
            s .= "`n"
        if (level = 0) ;返回结果
            return s
    }
    
    hyf_objView(obj, str:="", char:="`n", n:=0) {
        if strlen(str)
            return msgbox(str . "`n" . hyf_obj2Str(obj,char),,0x40000+n)
        else
            return msgbox(hyf_obj2Str(obj,char),,0x40000+n)
    }
    
    _pic1ToPdf(fp) {
        fpPdf := RegExReplace(fp, "w+$", "pdf")
        RunWait(format('python d:AA	oolpythonpdfimages2pdf.py "{1}" "{2}"', fp,fpPdf),, "hide")
    }
    
    class _Web {
    
        ;来自帮助 SysGetIPAddresses
        get(url) {
            rst := ComObjCreate("WinHttp.WinHttpRequest.5.1")
            rst.open("GET", url)
            try {
                rst.send()
                return rst.ResponseText
            }
        }
    
        ;网址,编码, 请求方式,post数据(NOTE 可能不好用)
        ;https://docs.microsoft.com/en-us/windows/win32/winhttp/iwinhttprequest-send
        post(url, postData:="", Encoding:="", headers:="") {
            rst := ComObjCreate("WinHttp.WinHttpRequest.5.1")
            rst.open("POST", url)
            if isobject(headers) {
                for k, v in headers {
                    if v
                        rst.SetRequestHeader(k, v)
                }
            }
            rst.SetRequestHeader("Content-Type", "application/x-www-form-urlencoded")
            ; hyf_objView(postData)
            if isobject(postData) {
                for k, v in postData
                    param := format("{1}={2}", k,this.UrlEncode(v)) ;NOTE 要转编码
                rst.send(param)
                rst.WaitForResponse(postData.haskey("timeout") ? postData.timeout : -1)
            } else {
                rst.send()
            }
            ; rsy.option(2) := nPage ;Codepage:nPage
            if Encoding && rst.ResponseBody {
                oADO := ComObjCreate("adodb.stream")
                oADO.Type := 1
                oADO.Mode := 3
                oADO.Open()
                oADO.Write(rst.ResponseBody)
                oADO.Position := 0
                oADO.Type := 2
                oADO.Charset := Encoding
                res := oADO.ReadText()
                oADO.Close()
                return res
            }
            return rst.ResponseText
        }
    
        baiduToken() {
            appid := "修改" ;hymodify
            apikey := "修改" ;hymodify
            secretkey := "修改" ;hymodify
            host := format("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={1}&client_secret={2}&", apikey,secretkey)
            res := this.get(host)
            obj := json.load(res)
            return obj['access_token']
        }
    
        ; _Web.baiduOcr_vatInvoice("c:UsersAdministratorDesktop221.pdf")
        baiduOcr_vatInvoice(fp, bArr:=false) {
            b64 := (strlen(fp) >256) ? fp : _toBase64(fp)
            request_url := "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice"
            access_token := this.baiduToken()
            request_url := format("{1}?access_token={2}", request_url,access_token)
            if (strlen(fp) < 256 && (fp ~= "i)pdf$"))
                params := {"pdf_file":b64}
            else
                params := {"image":b64}
            response := this.post(request_url, params, "utf-8")
            obj := json.load(response)
            ; hyf_objView(obj)
            if (obj.haskey("error_code"))
                throw obj["error_code"] . "`n" . obj["error_msg"]
            else
                return obj["words_result"]
            _toBase64(fp) {
                f := FileOpen(fp, "r")
                size := f.length
                f.RawRead(Bin, size)
                f.Close()
                DllCall("Crypt32.dllCryptBinaryToString", "Ptr",&Bin, "UInt",size, "UInt",0x01, "Ptr",0, "UIntP",B64Len)
                VarSetCapacity(b64, B64Len << 1, 0)
                DllCall("Crypt32.dllCryptBinaryToString", "Ptr",&Bin, "UInt",size, "UInt",0x01, "Ptr",&b64, "UIntP",B64Len)
                Bin := ""
                VarSetCapacity(Bin, 0)
                VarSetCapacity(b64, -1)
                res := RegExReplace(b64, "
    ")
                return res
            }
        }
    
        UrlEncode(str, enc:="UTF-8") { ;字符串特殊字符转义成URL格式(来自万年书妖)
            hex := "00"
            fun := "msvcrtswprintf"
            VarSetCapacity(buff, size:=strput(str, enc))
            strput(str, &buff, enc)
            while(code:=numget(buff, A_Index - 1, "UChar")) && dllcall(fun, "str",hex, "str","%%%02X", "uchar",code, "cdecl")
                r .= hex
            return r
            ;StringReplace, str, str, `%, , A ;%为URL特殊转义符,先处理(Google对%符的搜索支持不好才删除,否则替换为%25)
            ;array := {"&":"%26"," ":"%20","(":"%28",")":"%29","'":"%27",":":"%3A","/":"%2F","+":"%2B",A_Tab:"%21","`r`n":"%0A"} ;`r`n必须放一起,可用记事本测试
            ;for, key, value in array  ;特殊字符url转义
            ;StringReplace, str, str, %key%, %value%, A ;此处循环,两个参数必须一样
            ;return str
        }
    
    }
    
    class JSON {
        /**
        * Method: Load
        *     Parses a JSON string into an AHK value
        * Syntax:
        *     value := JSON.Load( Text [, reviver ] )
        * Parameter(s):
        *     value      [retval] - parsed value
        *     Text    [In, ByRef] - JSON formatted string
        *     reviver   [In, opt] - function Object, similar to JavaScript's
        *                           JSON.Parse() 'reviver' parameter
        */
        class Load extends JSON.Functor {
            call(self, ByRef Text, reviver:="") {
                this.rev := isobject(reviver) ? reviver : False
                ; Object keys(And Array indices) are temporarily stored In arrays so that
                ; we can enumerate them In the order they appear In the Document/Text instead
                ; of alphabetically. Skip if No reviver function Is specified.
                this.keys := this.rev ? {} : False
    
                static quot := chr(34), bashq := "" . quot
                    , json_value := quot . "{[01234567890-tfn"
                    , json_value_or_array_closing := quot . "{[]01234567890-tfn"
                    , object_key_or_object_closing := quot . "}"
    
                key := ""
                is_key := False
                root := {}
                stack := [root]
                next := json_value
                Pos := 0
    
                While((ch:=SubStr(Text, ++Pos, 1)) != "") {
                    if InStr(" `t`r`n", ch)
                        Continue
                    if !InStr(next, ch, 1)
                        this.ParseError(next, Text, Pos)
    
                    holder := stack[1]
                    is_array := holder.IsArray
    
                    if InStr(",:", ch) {
                        next := (is_key := !is_array && ch == ",") ? quot : json_value
    
                    } else if InStr("}]", ch) {
                        stack.RemoveAt(1)
                        next := stack[1]==root ? "" : stack[1].IsArray ? ",]" : ",}"
    
                    } else {
                        if InStr("{[", ch) {
                            ; Check if Array() Is overridden And if its return value has
                            ; the 'IsArray' property. if so, Array() will be called normally,
                            ; otherwise, use a custom base Object For arrays
                            static json_array := Func("Array").IsBuiltIn || ![].IsArray ? {IsArray: True} : 0
    
                            ; sacrifice readability For minor(actually negligible) performance gain
                            (ch == "{")
                                ? ( is_key := True
                                , value := {}
                                , next := object_key_or_object_closing )
                            ; ch == "["
                                : ( value := json_array ? new json_array : []
                                , next := json_value_or_array_closing )
    
                            stack.insertat(1, value)
    
                            if (this.keys)
                                this.keys[value] := []
    
                        } else {
                            if (ch == quot) {
                                i := Pos
                                While(i:=InStr(Text, quot,, i+1)) {
                                    value := StrReplace(SubStr(Text, Pos+1, i-Pos-1), "\", "u005c")
                                    static tail := A_AhkVersion<"2" ? 0 : -1
                                    if (SubStr(value, tail) != "")
                                        Break
                                }
    
                                if (!i)
                                    this.ParseError("'", Text, Pos)
    
                                value := StrReplace(value,  "/",  "/")
                                    , value := StrReplace(value, bashq, quot)
                                    , value := StrReplace(value,  "", "`b")
                                    , value := StrReplace(value,  "f", "`f")
                                    , value := StrReplace(value,  "
    ", "`n")
                                    , value := StrReplace(value,  "
    ", "`r")
                                    , value := StrReplace(value,  "	", "`t")
    
                                Pos := i ; update Pos
    
                                i := 0
                                While(i:=InStr(value, "",, i+1)) {
                                    if !(SubStr(value, i+1, 1) == "u")
                                        this.ParseError("", Text, Pos - strlen(SubStr(value, i+1)))
                                    uffff := Abs("0x" . SubStr(value, i+2, 4))
                                    if (A_IsUnicode || uffff < 0x100)
                                        value := SubStr(value, 1, i-1) . chr(uffff) . SubStr(value, i+6)
                                }
    
                                if (is_key) {
                                    key := value, next := ":"
                                    Continue
                                }
    
                            } else {
                                value := SubStr(Text, Pos, i := RegExMatch(Text, "[]},s]|$",, Pos)-Pos)
    
                                static Number := "Number", Integer :="Integer"
                                if value Is %Number% {
                                    if value Is %Integer%
                                        value += 0
                                } else if (value == "True" || value == "False")
                                    value := %value% + 0
                                else if (value == "null")
                                    value := ""
                                else
                                    ; we can do more here to pinpoint the actual culprit
                                    ; but that's just too much extra work.
                                    this.ParseError(next, Text, Pos, i)
                                Pos += i-1
                            }
    
                            next := holder==root ? "" : is_array ? ",]" : ",}"
                        } ; if InStr("{[", ch) { ... } else
                        is_array? key := holder.push(value) : holder[key] := value
                        if (this.keys && this.keys.haskey(holder))
                            this.keys[holder].Push(key)
                    }
    
                } ; While ( ... )
                return this.rev ? this.Walk(root, "") : root[""]
            }
    
            ParseError(expect, ByRef Text, Pos, len:=1) {
                static quot := chr(34), qurly := quot . "}"
    
                line := StrSplit(SubStr(Text, 1, Pos), "`n", "`r").Length()
                col := Pos - InStr(Text, "`n",, -(strlen(Text)-Pos+1))
                msg := Format("{1}`n`nLine:`t{2}`nCol:`t{3}`nChar:`t{4}"
                    ,     (expect == "")     ? "Extra data"
                    : (expect == "'")    ? "Unterminated string starting at"
                    : (expect == "")    ? "Invalid Escape"
                    : (expect == ":")    ? "Expecting ':' Delimiter"
                    : (expect == quot)   ? "Expecting Object key enclosed In double quotes"
                    : (expect == qurly)  ? "Expecting Object key enclosed In double quotes Or Object closing '}'"
                    : (expect == ",}")   ? "Expecting ',' Delimiter Or Object closing '}'"
                    : (expect == ",]")   ? "Expecting ',' Delimiter Or Array closing ']'"
                    : InStr(expect, "]") ? "Expecting JSON value Or Array closing ']'"
                    :                      "Expecting JSON value(string, Number, True, False, null, Object Or Array)"
                    , line, col, Pos)
    
                static offset := A_AhkVersion<"2" ? -3 : -4
                Throw Exception(msg, offset, SubStr(Text, Pos, len))
            }
    
            Walk(holder, key) {
                value := holder[key]
                if isobject(value) {
                    For i, k In this.keys[value] { ; Check if ObjHasKey(value, k) ??
                        v := this.Walk(value, k)
                        if (v != JSON.Undefined)
                            value[k] := v
                        else
                            value.delete(k)
                    }
                }
    
                return this.rev.call(holder, key, value)
            }
        }
    
        /**
        * Method: Dump
        *     Converts an AHK value into a JSON string
        * Syntax:
        *     str := JSON.Dump( value [, replacer, Space ] )
        * Parameter(s):
        *     str        [retval] - JSON representation of an AHK value
        *     value          [In] - any value(Object, string, Number)
        *     replacer  [In, opt] - function Object, similar to JavaScript's
        *                           JSON.stringify() 'replacer' parameter
        *     Space     [In, opt] - similar to JavaScript's JSON.stringify()
        *                           'Space' parameter
        */
        class Dump extends JSON.Functor {
    
            call(self, value, replacer:="", Space:="") {
                this.rep := isobject(replacer) ? replacer : ""
                this.gap := ""
                if (Space) {
                    static Integer := "Integer"
                    if (Space ~= "^d+$") {
                        Loop(((n:=Abs(Space))>10 ? 10 : n))
                            this.gap .= " "
                    } else {
                        this.gap := SubStr(Space, 1, 10)
                    }
                    this.indent := "`n"
                }
                return this.Str({"": value}, "")
            }
    
            Str(holder, key) {
                value := holder[key]
                if (this.rep)
                    value := this.rep.call(holder, key, holder.haskey(key) ? value : JSON.Undefined)
    
                if isobject(value) {
                    ; Check Object Type, skip serialization For other Object types such as
                    ; ComObject, Func, BoundFunc, FileObject, RegExMatchObject, Property, etc.
                    static Type := A_AhkVersion<"2" ? "" : Func("Type")
                    ;if (Type ? Type.call(value) == "Object" : ObjGetCapacity(value) != "")
                    if (isobject(value)) { ;hy
                        if (this.gap) {
                            stepback := this.indent
                            this.indent .= this.gap
                        }
    
                        is_array := value.IsArray
                        ; Array() Is Not overridden, rollback to old method of
                        ; identifying Array-like objects. Due to the use of a For-Loop
                        ; sparse arrays such as '[1,,3]' are detected as objects({}).
                        if (!is_array) {
                            For i In value
                                is_array := i == A_Index
                            Until !is_array
                        }
    
                        str := ""
                        if (is_array) {
                            Loop(value.Length()) {
                                if (this.gap)
                                    str .= this.indent
                                v := this.Str(value, A_Index)
                                str .= (v != "") ? v . "," : "null,"
                            }
                        } else {
                            colon := this.gap ? ": " : ":"
                            For k In value {
                                v := this.Str(value, k)
                                if (v != "") {
                                    if (this.gap)
                                        str .= this.indent
                                    str .= this.Quote(k) . colon . v . ","
                                }
                            }
                        }
                        if (str != "") {
                            str := RTrim(str, ",")
                            if (this.gap)
                                str .= stepback
                        }
                        if (this.gap)
                            this.indent := stepback
                        return is_array ? "[" . str . "]" : "{" . str . "}"
                    }
                } else ; is_number ? value : "value"
                    return type(value) != "String" ? value : this.Quote(value)
                ;return ObjGetCapacity([value])=="" ? value : this.Quote(value) ;hy
            }
    
            Quote(str) {
                static quot := chr(34), bashq := "" . quot
                if (str != "") {
                    str := StrReplace(str,  "",  "\")
                    ; , str := StrReplace(str,  "/",  "/") ; optional In ECMAScript
                        , str := StrReplace(str, quot, bashq)
                        , str := StrReplace(str, "`b",  "")
                        , str := StrReplace(str, "`f",  "f")
                        , str := StrReplace(str, "`n",  "
    ")
                        , str := StrReplace(str, "`r",  "
    ")
                        , str := StrReplace(str, "`t",  "	")
    
                    static rx_escapable := A_AhkVersion<"2" ? "O)[^x20-x7e]" : "[^x20-x7e]"
                    ;While RegExMatch(str, rx_escapable, m) ;hy修改,中文不转成u格式
                    ;str := StrReplace(str, m.Value, Format("u{1:04x}", Ord(m.Value)))
                }
                return quot . str . quot
            }
        }
    
        /**
        * Property: Undefined
        *     Proxy For 'undefined' Type
        * Syntax:
        *     undefined := JSON.Undefined
        * Remarks:
        *     For use with reviver And replacer functions since AutoHotkey does Not
        *     have an 'undefined' Type. Returning blank("") Or 0 won't work since these
        *     can't be distnguished from actual JSON values. this leaves us with objects.
        *     Replacer() - the caller may return a non-serializable AHK objects such as
        *     ComObject, Func, BoundFunc, FileObject, RegExMatchObject, And Property to
        *     mimic the behavior of returning 'undefined' In JavaScript but For the sake
        *     of code readability And convenience, it's better to do 'return JSON.Undefined'.
        *     Internally, the property returns a ComObject with the variant Type of VT_EMPTY.
        */
        Undefined[] {
            get {
                static empty := {}, vt_empty := ComObject(0, &empty, 1)
                return vt_empty
            }
        }
    
        class Functor {
            __call(method, ByRef arg, args*) {
                ; When casting to call(), use a new instance of the "function Object"
                ; so as to avoid directly storing the properties(used across sub-methods)
                ; into the "function Object" itself.
                if isobject(method)
                    return (new this).call(method, arg, args*)
                else if (method == "")
                    return (new this).call(arg, args*)
            }
        }
    }
  • 相关阅读:
    7A
    map最最最基本用法
    cccc超级酱油心得
    scu-4445
    初学算法之广搜
    初学算法之最基础的stl队列
    初学算法之筛选素数法
    go 虎牙爬取
    php使用xpath爬取内容
    go xpath
  • 原文地址:https://www.cnblogs.com/hyaray/p/15135219.html
Copyright © 2020-2023  润新知