import pymysql
import xlrd
import re
import linecache
import docx
import sys
import docx
from docx import Document #导入库
import prettytable as pt
import xlwt
#连接数据库
try:
db = pymysql.connect(host="localhost",user="root",
passwd="123456",
db="zch",
charset='utf8')
except:
print("could not connect to mysql server")
#创建excel存储
def open_excel():
try:
book = xlrd.open_workbook("test.xlsx") #文件名,把文件与py文件放在同一目录下
except:
print("open excel file failed!")
try:
sheet = book.sheet_by_name("test") #execl里面的worksheet1
return sheet
except:
print("locate worksheet in excel failed!")
#向数据库插入数据
def insert_deta(x,y):
sheet = open_excel()
cursor = db.cursor()
for b in range(1, sheet.nrows): #第一行是标题名,对应表中的字段名所以应该从第二行开始,计算机以0开始计数,所以值是1
row_data = sheet.row_values(b)
value = (row_data[0],row_data[1],row_data[2],row_data[3],row_data[4],x,y)
sql = "INSERT INTO enclosure(sheet_nrow,sheet_content1,sheet_content2,sheet_content3,sheet_content4,file_docx_id,file_txt_id)VALUES(%s,%s,%s,%s,%s,%s,%s)"
cursor.execute(sql,value) #执行sql语句
db.commit()
cursor.close() #关闭连接
cursor = db.cursor()
sql1=cursor.execute("select file_docx_id from file_docx")
info=cursor.fetchmany(sql1)#获取查询结果
#遍历id获得id对应的文件地址,取出并在本机搜索读取
for i in info:
i = int(i[0])
print(i)
sql = cursor.execute('select file_txt_id,file_docx_name from file_docx where file_docx_id=%d'%i)
info = cursor.fetchmany(sql)
info1 = int(info[0][0])
category = cursor.execute('select file_category_id from file_txt where file_txt_id=%d'%info1)
file_category=cursor.fetchmany(category)
address = cursor.execute('select file_docx_address from file_category where file_category_id=%d'%file_category[0][0])
file_docx_address=cursor.fetchmany(address)
if file_docx_address[0][0] is None :
print("当前文件已经全部遍历完")
break
else:
path=file_docx_address[0][0] +"\"+ info[0][1]
document = Document(path)
ables = document.tables #获取文件中的表格集
tb = pt.PrettyTable()
book = xlwt.Workbook(encoding = 'utf-8')
test1 = book.add_sheet(u'test',cell_overwrite_ok = True)
for j in range(0,2):
# table = tables[j]#获取文件中的第i-1个表格
try:
print('找到表格')
table = tables[j]
result = []
for q in range(0,len(table.rows)):#从表格第一行开始循环读取表格数据
for a in range(0,len(table.columns)):#从第一列开始循环读取表格数据
result = table.cell(q,a).text
test1.write(q,a,result)
book.save('test.xlsx')
print('运行成功')
except :
print('没有表格了')
break
insert_deta(i,info1)
db.close()#关闭数据
print ("ok ")