• 使用python删除word文档中的指定段落,顺便实现一下文档中的图片导出


    #! /etc/env/bin python3
    #! *_* coding=utf8 *_*
    
    
    from pathlib import Path
    from docx import Document
    import os
    
    # 从word中导出图片
    def extract_img_word(filename='',doc_path=''):
        '''
        docx文档其实也是一个zip压缩包,所以我们可以通过zip包解压它
        也可以直接改文件后缀
        '''
        from zipfile import ZipFile
        
        with ZipFile(filename) as zip_file:
            for names in zip_file.namelist():
                if names.startswith("word/media/image"):
                    zip_file.extract(names, doc_path)
                
    
    '''
    pip install python-docx
    https://python-docx.readthedocs.io/en/latest/
    '''
    #创建文档
    def createWord():
        document = Document()
        document.add_heading('Document Title', 0)
        document.add_paragraph('A plain paragraph having some')
        document.add_heading('Heading, level 1', level=1)
        document.add_heading('Heading, level 1', level=2)
        document.add_paragraph('以下段落需要删除')
        document.add_paragraph('A plain paragraph')
        document.add_paragraph('A plain paragraph 新段落')
        document.add_heading('Heading, level 2', level=2)
        document.save('H:/temp/test.docx')
        
    createWord()
    
    #删除指定段落
    def delete_paragraph(paragraph):
        p = paragraph._element
        p.getparent().remove(p)
        # p._p = p._element = None
        paragraph._p = paragraph._element = None
    
    def delWordContent(docx_file='',dest_file=''):
        #读取文本
        doc = Document(docx_file)
        paragraphs = doc.paragraphs
        i = 0
        flag = False
        for p in paragraphs:
            i+=1
            #print(str(i))
            #print(p.text)
            if p.text.find('需要删除') > -1:
                #print('找到了')
                flag = True
            if flag is True:
                #print('deleting')
                delete_paragraph(p)
        if flag is True:
            #保存为新文件
            doc.save(dest_file)
    
    delWordContent(docx_file='H:/temp/test.docx',dest_file='H:/temp/test-new.docx')
    
    def testDel():
        dest_dir = 'words'
        for filename in Path('H:/').glob('*.docx'):
            print(str(filename))
            dest_file = str(filename.parent / f'{dest_dir}'/filename.name)
            delWordContent(docx_file = str(filename), dest_file = dest_file)
            os.remove(str(filename))
  • 相关阅读:
    C#变量初始化
    Mcrosoft中间语言的主要特征
    去除json数据的某些键值对
    ASP.NET MVC 之控制器与视图之间的数据传递
    ASP.NET MVC 路由进阶(之二)--自定义路由约束
    ASP.NET WEB API 初探
    Linux学习三部曲(之三)
    Linux学习三部曲(之二)
    Linux学习三部曲(之一)
    C# 3.0 特性之扩展方法
  • 原文地址:https://www.cnblogs.com/liangblog/p/16203382.html
Copyright © 2020-2023  润新知