• 读取word文档并提取和写入数据(基于python 3.6)


    #!/usr/bin/python3
    # -*- coding: utf-8 -*-
    # @File : delete_file
    # @Author : moucong
    # @Date : 2018/4/1 16:33
    # @Software: PyCharm

    #读取docx中的文本代码示例
    import docx
    import re

    #获取文档
    file=docx.Document("E:\python_word\word.docx")
    print("段落数:"+str(len(file.paragraphs))) #输出段落数
    file_word = docx.Document()

    #输出每一段的内容
    for para in file.paragraphs:
    print(para.text)

    #输出段落编号及段落内容
    para_data = []
    for i in range(len(file.paragraphs)):
    # for j in map(lambda x:x.split(' '),file.paragraphs[i].text):
    para_single = file.paragraphs[i].text.split(' ')
    while '' in para_single: # 移除空格
    para_single.remove('')
    # para_data.append(para_single)
    for data_number in range(len(para_single)):
    data_num = re.findall(r"d", para_single[data_number])
    data_num = ''.join(data_num)
    para_data.append(data_num + ' ')
    file_word.add_paragraph(para_data)
    file_word.save("E:\python_word\number.docx")

  • 相关阅读:
    RPC的入门
    Https的实现原理
    Celery
    Flask信号
    Redis安装
    python之递归
    python之三元表达式和生成式
    python第十八天作业
    python之生成器
    python之迭代器
  • 原文地址:https://www.cnblogs.com/setname/p/8837024.html
Copyright © 2020-2023  润新知