• Libreoffice 各类文件转换的filtername


    LIBREOFFICE_DOC_FAMILIES = [
        "TextDocument",
        "WebDocument",
        "Spreadsheet",
        "Presentation",
        "Graphics"
    ]
    
    LIBREOFFICE_IMPORT_TYPES = {
        "docx": {
            "FilterName": "MS Word 2007 XML"
        },
        "pdf": {
            "FilterName": "PDF - Portable Document Format"
        },
        "jpg": {
            "FilterName": "JPEG - Joint Photographic Experts Group"
        },
        "html": {
            "FilterName": "HTML Document"
        },
        "odp": {
            "FilterName": "OpenDocument Presentation (Flat XML)"
        },
        "pptx": {
            "FilterName": "Microsoft PowerPoint 2007 XML"
        }
    }
    
    LIBREOFFICE_EXPORT_TYPES = {
        "pdf": {
            LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "writer_pdf_Export"},
            LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "writer_web_pdf_Export"},
            LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "calc_pdf_Export"},
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_pdf_Export"},
            LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_pdf_Export"}
        },
        "jpg": {
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_jpg_Export"},
            LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_jpg_Export"}    
        },
        "html": {
            LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "HTML (StarWriter)"},
            LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "HTML"},
            LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "HTML (StarCalc)"},
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_html_Export"},
            LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_html_Export"} 
        },
        "docx": {
            LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "MS Word 2007 XML"} 
        },
        "odp": {
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress8"}
        },
        "pptx": {
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "Impress MS PowerPoint 2007 XML"}
        }
    }

    转:

    convert_test

    #!/usr/bin/env python3
    """
        VIEW COMPLETE CODE AT
        =====================
        * https://github.com/six519/libreoffice_convert
        THANKS
        ======
        * Thanks to: Mirko Nasato for his PyODConverter http://www.artofsolving.com/opensource/pyodconverter
        TESTED USING
        ============
        * Fedora release 20 (Heisenbug)
        * Python 3.3.2
        INSTALL DEPENDENCIES
        ====================
        * yum install libreoffice-sdk
    """
    
    import uno
    import subprocess
    import time
    import os
    
    from com.sun.star.beans import PropertyValue
    
    LIBREOFFICE_DEFAULT_PORT = 6519
    LIBREOFFICE_DEFAULT_HOST = "localhost"
    
    LIBREOFFICE_DOC_FAMILIES = [
        "TextDocument",
        "WebDocument",
        "Spreadsheet",
        "Presentation",
        "Graphics"
    ]
    
    LIBREOFFICE_IMPORT_TYPES = {
        "docx": {
            "FilterName": "MS Word 2007 XML"
        },
        "pdf": {
            "FilterName": "PDF - Portable Document Format"
        },
        "jpg": {
            "FilterName": "JPEG - Joint Photographic Experts Group"
        },
        "html": {
            "FilterName": "HTML Document"
        },
        "odp": {
            "FilterName": "OpenDocument Presentation (Flat XML)"
        },
        "pptx": {
            "FilterName": "Microsoft PowerPoint 2007 XML"
        }
    }
    
    LIBREOFFICE_EXPORT_TYPES = {
        "pdf": {
            LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "writer_pdf_Export"},
            LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "writer_web_pdf_Export"},
            LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "calc_pdf_Export"},
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_pdf_Export"},
            LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_pdf_Export"}
        },
        "jpg": {
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_jpg_Export"},
            LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_jpg_Export"}    
        },
        "html": {
            LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "HTML (StarWriter)"},
            LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "HTML"},
            LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "HTML (StarCalc)"},
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_html_Export"},
            LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_html_Export"} 
        },
        "docx": {
            LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "MS Word 2007 XML"} 
        },
        "odp": {
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress8"}
        },
        "pptx": {
            LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "Impress MS PowerPoint 2007 XML"}
        }
    }
    
    class PythonLibreOffice(object):
    
        def __init__(self, host=LIBREOFFICE_DEFAULT_HOST, port=LIBREOFFICE_DEFAULT_PORT):
            self.host = host
            self.port = port
            self.local_context = uno.getComponentContext()
            self.resolver = self.local_context.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", self.local_context)
            self.connectionString = "socket,host=%s,port=%s;urp;StarOffice.ComponentContext" % (LIBREOFFICE_DEFAULT_HOST, LIBREOFFICE_DEFAULT_PORT)
            self.context = None
            self.desktop = None
            self.runUnoProcess()
            self.__lastErrorMessage = ""
    
            try:
                self.context = self.resolver.resolve("uno:%s" % self.connectionString)
                self.desktop = self.context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", self.context)
            except Exception as e:
                self.__lastErrorMessage = str(e)
    
        @property 
        def lastError(self):
    
            return self.__lastErrorMessage
    
        def terminateProcess(self):
    
            try:
                if self.desktop:
                    self.desktop.terminate()
            except Exception as e:
                self.__lastErrorMessage = str(e)
                return False
    
            return True
    
        def convertFile(self, outputFormat, inputFilename):
    
            if self.desktop:
            
                tOldFileName = os.path.splitext(inputFilename)
                outputFilename = "%s.%s" % (tOldFileName[0], outputFormat)
                inputFormat = tOldFileName[1].replace(".","")
                inputUrl = uno.systemPathToFileUrl(os.path.abspath(inputFilename))
                outputUrl = uno.systemPathToFileUrl(os.path.abspath(outputFilename))
    
                if inputFormat in LIBREOFFICE_IMPORT_TYPES:
                    inputProperties = {
                        "Hidden": True
                    }
    
                    inputProperties.update(LIBREOFFICE_IMPORT_TYPES[inputFormat])
    
                    doc = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self.propertyTuple(inputProperties))
                    
                    try:
                        doc.refresh()
                    except:
                        pass
    
                    docFamily = self.getDocumentFamily(doc)
                    if docFamily:
                        try:
                            outputProperties = LIBREOFFICE_EXPORT_TYPES[outputFormat][docFamily]
                            doc.storeToURL(outputUrl, self.propertyTuple(outputProperties))
                            doc.close(True)
    
                            return True
                        except Exception as e:
                            self.__lastErrorMessage = str(e)
            
            self.terminateProcess()
    
            return False
    
        def propertyTuple(self, propDict):
            properties = []
            for k,v in propDict.items():
                property = PropertyValue()
                property.Name = k
                property.Value = v
                properties.append(property)
    
            return tuple(properties)
    
        def getDocumentFamily(self, doc):
            try:
                if doc.supportsService("com.sun.star.text.GenericTextDocument"):
                    return LIBREOFFICE_DOC_FAMILIES[0]
                if doc.supportsService("com.sun.star.text.WebDocument"):
                    return LIBREOFFICE_DOC_FAMILIES[1]
                if doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
                    return LIBREOFFICE_DOC_FAMILIES[2]
                if doc.supportsService("com.sun.star.presentation.PresentationDocument"):
                    return LIBREOFFICE_DOC_FAMILIES[3]
                if doc.supportsService("com.sun.star.drawing.DrawingDocument"):
                    return LIBREOFFICE_DOC_FAMILIES[4]
            except:
                pass
    
            return None
    
        def runUnoProcess(self):
            subprocess.Popen('soffice --headless --norestore --accept="%s"' % self.connectionString, shell=True, stdin=None, stdout=None, stderr=None)
            time.sleep(3)
    
    if __name__ == "__main__":
    
        test_libreoffice = PythonLibreOffice()
    
        #convert MS Word Document file (docx) to PDF
        test_libreoffice.convertFile("pdf", "document.docx")
  • 相关阅读:
    IDEA中好用的插件
    Win10安装Docker
    win10系统关闭此电脑的文件夹
    postgreSQL主键自增长
    【设计模式】工厂模式
    vue2.0实现倒计时的插件(时间戳 刷新 跳转 都不影响),转载
    springCloud中的组件学习
    使用springBoot+jsp+redis解决单一用户问题
    使用HtmlUnit动态获取网页数据
    java爬取网站中所有网页的源代码和链接
  • 原文地址:https://www.cnblogs.com/zl1991/p/10762871.html
Copyright © 2020-2023  润新知