1 import openpyxl 2 3 class ExceltoExcel(): 4 5 def __init__(self, file): 6 self.universityData = {} 7 self.wb = openpyxl.load_workbook(file) 8 self.wb_new =openpyxl.Workbook() # 新建一个表格来存储生成的数据 9 self.sheet = self.wb.active 10 self.new_sheet = self.wb_new.active # 新表单 11 self.maxrow = self.sheet.max_row 12 self.maxcol = self.sheet.max_column 13 14 15 def todict(self): 16 ''' 17 统计每个大学全部的获奖数量,分别列出是几等奖和对应的数量 18 {'同济大学':{'一等奖':{'team':1,'num'=3},'二等奖':{'team':2,'num'=6},'三等奖':{'team':1,'num'=3},'成功参与奖':{'team':1,'num'=3}}, 19 '清华大学':{'一等奖':{'team':1,'num'=3},'二等奖':{'team':1,'num'=3},'三等奖':{'team':1,'num'=3},'成功参与奖':{'team':1,'num'=3}},...} 20 统计所有人数 21 Fill in universityData with each rewardship‘s popularity 22 ''' 23 # tolal_team = self.maxrow 24 for row in range(2, self.maxrow + 1): 25 # 取每个单元格的数据 26 sheet_col = ['F', 'H', 'J'] 27 rewardcell = self.sheet['D' + str(row)].value # D列奖项 28 rewards = ['一等奖', '二等奖', '三等奖', '成功参与奖'] 29 for k in sheet_col: 30 university = self.sheet[k + str(row)].value # 每列大学的名称 31 # team = self.sheet['C' + str(row)].value # C列队伍 32 # 确定键值 33 self.universityData.setdefault(university, {}) 34 for reward in rewards: 35 if reward == rewardcell: 36 self.universityData[university].setdefault(rewardcell, {'team': 0, 'num': 0}) 37 self.universityData[university][rewardcell]['num'] += 1 # 统计各个奖项的所有人数 38 else: 39 self.universityData[university].setdefault(reward, {'team': 0, 'num': 0}) 40 41 if k == 'F': 42 self.universityData[university][rewardcell]['team'] += 1 # 只统计队长所在大学 43 self.universityData = sorted(self.universityData.items(), key=lambda item: item[0]) # 按照键值排序返回元祖 44 self.universityData = dict(self.universityData) # 将元祖转换成字典 45 # print('universityData', self.universityData) 46 # print(type(self.universityData)) 47 return self.universityData 48 49 # 查找单个键 50 def find(self, target, dictData, notFound='没找到'): 51 # 倒序查找第一个出现的需要查找的键的值 52 queue = [dictData] # 将字典存入列表 53 while len(queue) > 0: 54 data = queue.pop() # data是在queue中取出的最后一个元素,也就是原始字典;此时的queue为空列表[] 55 print('data', data) 56 for key, value in data.items(): 57 if key == target: 58 return value 59 elif type(value) == dict: 60 queue.append(value) 61 return notFound 62 63 # 有多个同名键在字典里时,可以用这个方法 64 def findAll(self, target, dictData, notFound=[]): 65 # 倒序查找所有出现的需要查找的键的值 66 queue = [dictData] 67 result = [] 68 while len(queue) > 0: 69 data = queue.pop() 70 for key, value in data.items(): 71 if key == target: 72 result.append(value) 73 elif type(value) == dict: 74 queue.append(value) 75 if not result: result = notFound 76 return result 77 78 def write_list_to_excel(self, dictData, num_list, team_list): 79 list_slice = [] # 人数切片 80 team_slice = [] # 队伍切片 81 sublist_sum = [] # 每个人数切片的和 82 team_sum = [] # 每个队伍切片的和 83 k = 0 84 row_1 = ['学校名称','一等奖','二等奖','三等奖','成功参与奖','总人数','队伍数量'] 85 for i in range(len(row_1)): 86 self.new_sheet.cell(row=1,column=i+1,value=row_1[i]) 87 university_name = [] 88 for key in dictData.keys(): 89 university_name.append(key) 90 for index, name in enumerate(university_name): 91 self.new_sheet['A'+str(index+2)] = name 92 while k < len(num_list): 93 sub_list = num_list[k:k+4] # 人数子集 94 team_sub = team_list[k:k+4] # 队伍子集 95 list_slice.append(sub_list) # 切片后存入列表 96 team_slice.append(team_sub) 97 sumlist = sum(sub_list) # 计算每个子集的和 98 teamsum = sum(team_sub) 99 sublist_sum.append(sumlist) # 将每个子集的和加入列表 100 team_sum.append(teamsum) 101 k += 4 102 if k > len(num_list): 103 break 104 for row in range(2, len(university_name)+2): 105 for col in range(2, 6): 106 self.new_sheet.cell(column=col, row=row, value=list_slice[row-2][col-2]) 107 self.new_sheet.cell(column=6, row=row, value=sublist_sum[row-2]) 108 self.new_sheet.cell(column=7, row=row, value=team_sum[row - 2]) 109 self.wb_new.save('result.xlsx') 110 print('共有{}支队伍参赛'.format(sum(team_sum))) 111 return university_name, list_slice, sublist_sum 112 113 114 if __name__ == '__main__': 115 excel1 = ExceltoExcel('example_A.xlsx') 116 dictData = excel1.todict() 117 # find_one = excel1.find('team', dictData) 118 find_num = excel1.findAll('num', dictData) 119 find_team = excel1.findAll('team', dictData) 120 find_num.reverse() 121 find_team.reverse() 122 # print(find_num) 123 # print(find_team) 124 university, list_slice, sublist_sum = excel1.write_list_to_excel(dictData,find_num, find_team) 125 # print('university name', university) 126 # print('list_slice', list_slice) 127 # print('sublist_sum', sublist_sum)
保存新的表格(部分):
统计了一个表格中每个大学每个奖项的获奖人数与队伍数量,队伍只统计队长所在的学校
以下代码读取某一文件夹下的所有Excel表格(比如读取A~F题获奖名单总共6个Excel表格),统计每道题也就是每个表格中每个大学的获奖情况并将其写入同一个Excel结果文件中:
1 import openpyxl 2 import os 3 4 class ExceltoExcel(): 5 6 def __init__(self, file, new_sheet): 7 self.universityData = {} 8 self.wb = openpyxl.load_workbook(file) 9 # self.wb_new =openpyxl.Workbook() # 新建一个表格来存储生成的数据 10 self.sheet = self.wb.active 11 self.new_sheet = new_sheet # 新表单 12 self.maxrow = self.sheet.max_row 13 self.maxcol = self.sheet.max_column 14 15 16 def todict(self): 17 ''' 18 统计每个大学全部的获奖数量,分别列出是几等奖和对应的数量 19 {'同济大学':{'一等奖':{'team':1,'num'=3},'二等奖':{'team':2,'num'=6},'三等奖':{'team':1,'num'=3},'成功参与奖':{'team':1,'num'=3}}, 20 '清华大学':{'一等奖':{'team':1,'num'=3},'二等奖':{'team':1,'num'=3},'三等奖':{'team':1,'num'=3},'成功参与奖':{'team':1,'num'=3}},...} 21 统计所有人数 22 Fill in universityData with each rewardship‘s popularity 23 ''' 24 # tolal_team = self.maxrow 25 for row in range(2, self.maxrow + 1): 26 # 取每个单元格的数据 27 sheet_col = ['F', 'H', 'J'] 28 rewardcell = self.sheet['D' + str(row)].value # D列奖项 29 rewards = ['一等奖', '二等奖', '三等奖', '成功参与奖'] 30 for k in sheet_col: 31 university = self.sheet[k + str(row)].value # 每列大学的名称 32 # team = self.sheet['C' + str(row)].value # C列队伍 33 # 确定键值 34 self.universityData.setdefault(university, {}) 35 for reward in rewards: 36 if reward == rewardcell: 37 self.universityData[university].setdefault(rewardcell, {'team': 0, 'num': 0}) 38 self.universityData[university][rewardcell]['num'] += 1 # 统计各个奖项的所有人数 39 else: 40 self.universityData[university].setdefault(reward, {'team': 0, 'num': 0}) 41 42 if k == 'F': 43 self.universityData[university][rewardcell]['team'] += 1 # 只统计队长所在大学 44 self.universityData = sorted(self.universityData.items(), key=lambda item: item[0]) # 按照键值排序返回元祖 45 self.universityData = dict(self.universityData) # 将元祖转换成字典 46 # print('universityData', self.universityData) 47 # print(type(self.universityData)) 48 return self.universityData 49 50 # 查找单个键 51 def find(self, target, dictData, notFound='没找到'): 52 # 倒序查找第一个出现的需要查找的键的值 53 queue = [dictData] # 将字典存入列表 54 while len(queue) > 0: 55 data = queue.pop() # data是在queue中取出的最后一个元素,也就是原始字典;此时的queue为空列表[] 56 print('data', data) 57 for key, value in data.items(): 58 if key == target: 59 return value 60 elif type(value) == dict: 61 queue.append(value) 62 return notFound 63 64 # 有多个同名键在字典里时,可以用这个方法 65 def findAll(self, target, dictData, notFound=[]): 66 # 倒序查找所有出现的需要查找的键的值 67 queue = [dictData] 68 result = [] 69 while len(queue) > 0: 70 data = queue.pop() 71 for key, value in data.items(): 72 if key == target: 73 result.append(value) 74 elif type(value) == dict: 75 queue.append(value) 76 if not result: result = notFound 77 return result 78 79 def write_list_to_excel(self, dictData, num_list, team_list): 80 list_slice = [] # 人数切片 81 team_slice = [] # 队伍切片 82 sublist_sum = [] # 每个人数切片的和 83 team_sum = [] # 每个队伍切片的和 84 k = 0 85 row_1 = ['学校名称','一等奖','二等奖','三等奖','成功参与奖','总人数','队伍数量'] 86 for i in range(len(row_1)): 87 self.new_sheet.cell(row=1,column=i+1,value=row_1[i]) 88 university_name = [] 89 for key in dictData.keys(): 90 university_name.append(key) 91 for index, name in enumerate(university_name): 92 self.new_sheet['A'+str(index+2)] = name 93 while k < len(num_list): 94 sub_list = num_list[k:k+4] # 人数子集 95 team_sub = team_list[k:k+4] # 队伍子集 96 list_slice.append(sub_list) # 切片后存入列表 97 team_slice.append(team_sub) 98 sumlist = sum(sub_list) # 计算每个子集的和 99 teamsum = sum(team_sub) 100 sublist_sum.append(sumlist) # 将每个子集的和加入列表 101 team_sum.append(teamsum) 102 k += 4 103 if k > len(num_list): 104 break 105 for row in range(2, len(university_name)+2): 106 for col in range(2, 6): 107 self.new_sheet.cell(column=col, row=row, value=list_slice[row-2][col-2]) 108 self.new_sheet.cell(column=6, row=row, value=sublist_sum[row-2]) 109 self.new_sheet.cell(column=7, row=row, value=team_sum[row - 2]) 110 return university_name, list_slice, sublist_sum, team_sum 111 112 113 if __name__ == '__main__': 114 file_path = './file' # excel文件路径 115 files = [] # 存储excel文件名 116 list = os.listdir(file_path) # 列出excel文件路径下所有的文件 117 list.sort(key=lambda x: x[4:5]) # 按照题目顺序排序 118 # print('list', list) 119 for i in range(len(list)): 120 item = os.path.join(file_path, list[i]) 121 files.append(item) 122 # print('files', files) 123 wb_new = openpyxl.Workbook() # 新建一个表格来存储生成的数据 124 f = open('result.txt', 'w') 125 for k,file in enumerate(files): 126 new_sheet = wb_new.create_sheet('list', index=k) # 插入新表单 127 excel = ExceltoExcel(file, new_sheet) 128 dictData = excel.todict() # 得到当前表格排序后的字典 129 # find_one = excel1.find('team', dictData) 130 find_num = excel.findAll('num', dictData) # 查找当前表格每个大学每个奖项的获奖人数 131 find_team = excel.findAll('team', dictData) # # 查找当前表格每个大学每个奖项的获奖队伍数,只统计队长所在的学校 132 find_num.reverse() # 正序排列 133 find_team.reverse() # 正序排列 134 # print(find_num) 135 # print(find_team) 136 _, _, _, team_sum = excel.write_list_to_excel(dictData,find_num, find_team) 137 wb_new.save('result.xlsx') 138 # print('university name', university) 139 # print('list_slice', list_slice) 140 # print('sublist_sum', sublist_sum) 141 title = file.split('/')[2][4:5] # 字符串分割提取题目,原标题为'./file/2019A.xlsx' 142 print('{}题共有{}支队伍获奖'.format(title, sum(team_sum))) 143 f.write('{}题共有{}支队伍获奖 '.format(title, sum(team_sum)))
1 A题共有781支队伍获奖 2 B题共有888支队伍获奖 3 C题共有1057支队伍获奖 4 D题共有4259支队伍获奖 5 E题共有4193支队伍获奖 6 F题共有781支队伍获奖
我这里的F题获奖名单和A题是相同的,所以A和F的统计结果相同。