需求:把word里面的表单内容获取 按照规则拼成字符串
转换成类似下面的样子
代码如下:
from docx import Document import re def parse_docx(f): d = Document(f) table = d.tables for t in table: for row in range(1, len(t.rows)): str1 = "'" + d.tables[0].cell(row, 1).text.strip() + "' => '" if d.tables[0].cell(row, 3).text.strip() == "必输": str2 = "required|" else: str2 = 'max:' #正则匹配最长数字(首先替换掉不规则的括号) #替换不规则括号 strReg = d.tables[0].cell(row, 2).text.replace('(','(').replace(')',')') str3 = re.findall('[^()]+',strReg)[1] str4 = "', //" + d.tables[0].cell(row, 3).text + " "+ d.tables[0].cell(row, 0).text + " " + d.tables[0].cell(row, 4).text.replace(" ", "") print("%s%s%s%s" % (str1,str2,str3,str4)) parse_docx('./test.docx')