使用python获取pptx文件的文本内容范例

get_text_from_pptx_pptm.py

#!/bin/python
# -*- coding: utf-8 -*-

from pptx import Presentation
import sys
import base64

reload(sys)
sys.setdefaultencoding('utf8')

fileName = sys.argv[1]
# print(fileName)

def tripSpace( str ):
    return str.replace("　", "").replace(" ", "").replace("	", "").replace("
", "").replace("
", "").replace("
", "").replace("v", "")

prs = Presentation(fileName)

# ファイル概要(1スライド目のノート)
file_summary = ""
# ファイル注釈(2スライド目以降のノート)
file_note = ""
# ファイル内容(オブジェクトのテキスト全文)
file_content = ""
for i, sld in enumerate(prs.slides, start=1):
    for shp in sld.shapes:
        if shp.has_text_frame:
            file_content += shp.text
    if ( i == 1 ) :
        file_summary = sld.notes_slide.notes_text_frame.text
    else :
        file_note += tripSpace(sld.notes_slide.notes_text_frame.text)
    
print(base64.b64encode(file_summary))
print(tripSpace(file_note))
print(tripSpace(file_content))

相关阅读:
MySQL主从复制集群添加slave节点
GTID主从与传统主从复制
20201207总结
202011051 每周例行报告
202011121 每周例行报告
202010081 每周例行报告
202011261 每周例行报告
202010153 每周例行报告
202010291 每周例行报告
202011201 每周例行报告

原文地址：https://www.cnblogs.com/gaoBlog/p/14042502.html