• 【Python】openpyxl统计2019年数学建模获奖情况【1】


      1 import openpyxl
      2 
      3 class ExceltoExcel():
      4 
      5     def __init__(self, file):
      6         self.universityData = {}
      7         self.wb = openpyxl.load_workbook(file)
      8         self.wb_new =openpyxl.Workbook()  # 新建一个表格来存储生成的数据
      9         self.sheet = self.wb.active
     10         self.new_sheet = self.wb_new.active  # 新表单
     11         self.maxrow = self.sheet.max_row
     12         self.maxcol = self.sheet.max_column
     13 
     14 
     15     def todict(self):
     16         '''
     17         统计每个大学全部的获奖数量,分别列出是几等奖和对应的数量
     18         {'同济大学':{'一等奖':{'team':1,'num'=3},'二等奖':{'team':2,'num'=6},'三等奖':{'team':1,'num'=3},'成功参与奖':{'team':1,'num'=3}},
     19         '清华大学':{'一等奖':{'team':1,'num'=3},'二等奖':{'team':1,'num'=3},'三等奖':{'team':1,'num'=3},'成功参与奖':{'team':1,'num'=3}},...}
     20         统计所有人数
     21         Fill in universityData with each rewardship‘s popularity
     22         '''
     23         # tolal_team = self.maxrow
     24         for row in range(2, self.maxrow + 1):
     25             # 取每个单元格的数据
     26             sheet_col = ['F', 'H', 'J']
     27             rewardcell = self.sheet['D' + str(row)].value  # D列奖项
     28             rewards = ['一等奖', '二等奖', '三等奖', '成功参与奖']
     29             for k in sheet_col:
     30                 university = self.sheet[k + str(row)].value  # 每列大学的名称
     31                 # team = self.sheet['C' + str(row)].value  # C列队伍
     32                 # 确定键值
     33                 self.universityData.setdefault(university, {})
     34                 for reward in rewards:
     35                     if reward == rewardcell:
     36                         self.universityData[university].setdefault(rewardcell, {'team': 0, 'num': 0})
     37                         self.universityData[university][rewardcell]['num'] += 1  # 统计各个奖项的所有人数
     38                     else:
     39                         self.universityData[university].setdefault(reward, {'team': 0, 'num': 0})
     40 
     41                 if k == 'F':
     42                     self.universityData[university][rewardcell]['team'] += 1  # 只统计队长所在大学
     43         self.universityData = sorted(self.universityData.items(), key=lambda item: item[0])  # 按照键值排序返回元祖
     44         self.universityData = dict(self.universityData)  # 将元祖转换成字典
     45         # print('universityData', self.universityData)
     46         # print(type(self.universityData))
     47         return self.universityData
     48 
     49     # 查找单个键
     50     def find(self, target, dictData, notFound='没找到'):
     51         # 倒序查找第一个出现的需要查找的键的值
     52         queue = [dictData]  # 将字典存入列表
     53         while len(queue) > 0:
     54             data = queue.pop()  # data是在queue中取出的最后一个元素,也就是原始字典;此时的queue为空列表[]
     55             print('data', data)
     56             for key, value in data.items():
     57                 if key == target:
     58                     return value
     59                 elif type(value) == dict:
     60                     queue.append(value)
     61         return notFound
     62 
     63     # 有多个同名键在字典里时,可以用这个方法
     64     def findAll(self, target, dictData, notFound=[]):
     65         # 倒序查找所有出现的需要查找的键的值
     66         queue = [dictData]
     67         result = []
     68         while len(queue) > 0:
     69             data = queue.pop()
     70             for key, value in data.items():
     71                 if key == target:
     72                     result.append(value)
     73                 elif type(value) == dict:
     74                     queue.append(value)
     75         if not result: result = notFound
     76         return result
     77 
     78     def write_list_to_excel(self, dictData, num_list, team_list):
     79         list_slice = []  # 人数切片
     80         team_slice = []  # 队伍切片
     81         sublist_sum = []  # 每个人数切片的和
     82         team_sum = []  # 每个队伍切片的和
     83         k = 0
     84         row_1 = ['学校名称','一等奖','二等奖','三等奖','成功参与奖','总人数','队伍数量']
     85         for i in range(len(row_1)):
     86             self.new_sheet.cell(row=1,column=i+1,value=row_1[i])
     87         university_name = []
     88         for key in dictData.keys():
     89             university_name.append(key)
     90         for index, name in enumerate(university_name):
     91             self.new_sheet['A'+str(index+2)] = name
     92         while k < len(num_list):
     93             sub_list = num_list[k:k+4]  # 人数子集
     94             team_sub = team_list[k:k+4]  # 队伍子集
     95             list_slice.append(sub_list)  # 切片后存入列表
     96             team_slice.append(team_sub)
     97             sumlist = sum(sub_list)  # 计算每个子集的和
     98             teamsum = sum(team_sub)
     99             sublist_sum.append(sumlist)  # 将每个子集的和加入列表
    100             team_sum.append(teamsum)
    101             k += 4
    102             if k > len(num_list):
    103                 break
    104         for row in range(2, len(university_name)+2):
    105             for col in range(2, 6):
    106                 self.new_sheet.cell(column=col, row=row, value=list_slice[row-2][col-2])
    107             self.new_sheet.cell(column=6, row=row, value=sublist_sum[row-2])
    108             self.new_sheet.cell(column=7, row=row, value=team_sum[row - 2])
    109         self.wb_new.save('result.xlsx')
    110         print('共有{}支队伍参赛'.format(sum(team_sum)))
    111         return university_name, list_slice, sublist_sum
    112 
    113 
    114 if __name__ == '__main__':
    115     excel1 = ExceltoExcel('example_A.xlsx')
    116     dictData = excel1.todict()
    117     # find_one = excel1.find('team', dictData)
    118     find_num = excel1.findAll('num', dictData)
    119     find_team = excel1.findAll('team', dictData)
    120     find_num.reverse()
    121     find_team.reverse()
    122     # print(find_num)
    123     # print(find_team)
    124     university, list_slice, sublist_sum = excel1.write_list_to_excel(dictData,find_num, find_team)
    125     # print('university name', university)
    126     # print('list_slice', list_slice)
    127     # print('sublist_sum', sublist_sum)

    保存新的表格(部分):

    统计了一个表格中每个大学每个奖项的获奖人数与队伍数量,队伍只统计队长所在的学校

    以下代码读取某一文件夹下的所有Excel表格(比如读取A~F题获奖名单总共6个Excel表格),统计每道题也就是每个表格中每个大学的获奖情况并将其写入同一个Excel结果文件中:

      1 import openpyxl
      2 import os
      3 
      4 class ExceltoExcel():
      5 
      6     def __init__(self, file, new_sheet):
      7         self.universityData = {}
      8         self.wb = openpyxl.load_workbook(file)
      9         # self.wb_new =openpyxl.Workbook()  # 新建一个表格来存储生成的数据
     10         self.sheet = self.wb.active
     11         self.new_sheet = new_sheet  # 新表单
     12         self.maxrow = self.sheet.max_row
     13         self.maxcol = self.sheet.max_column
     14 
     15 
     16     def todict(self):
     17         '''
     18         统计每个大学全部的获奖数量,分别列出是几等奖和对应的数量
     19         {'同济大学':{'一等奖':{'team':1,'num'=3},'二等奖':{'team':2,'num'=6},'三等奖':{'team':1,'num'=3},'成功参与奖':{'team':1,'num'=3}},
     20         '清华大学':{'一等奖':{'team':1,'num'=3},'二等奖':{'team':1,'num'=3},'三等奖':{'team':1,'num'=3},'成功参与奖':{'team':1,'num'=3}},...}
     21         统计所有人数
     22         Fill in universityData with each rewardship‘s popularity
     23         '''
     24         # tolal_team = self.maxrow
     25         for row in range(2, self.maxrow + 1):
     26             # 取每个单元格的数据
     27             sheet_col = ['F', 'H', 'J']
     28             rewardcell = self.sheet['D' + str(row)].value  # D列奖项
     29             rewards = ['一等奖', '二等奖', '三等奖', '成功参与奖']
     30             for k in sheet_col:
     31                 university = self.sheet[k + str(row)].value  # 每列大学的名称
     32                 # team = self.sheet['C' + str(row)].value  # C列队伍
     33                 # 确定键值
     34                 self.universityData.setdefault(university, {})
     35                 for reward in rewards:
     36                     if reward == rewardcell:
     37                         self.universityData[university].setdefault(rewardcell, {'team': 0, 'num': 0})
     38                         self.universityData[university][rewardcell]['num'] += 1  # 统计各个奖项的所有人数
     39                     else:
     40                         self.universityData[university].setdefault(reward, {'team': 0, 'num': 0})
     41 
     42                 if k == 'F':
     43                     self.universityData[university][rewardcell]['team'] += 1  # 只统计队长所在大学
     44         self.universityData = sorted(self.universityData.items(), key=lambda item: item[0])  # 按照键值排序返回元祖
     45         self.universityData = dict(self.universityData)  # 将元祖转换成字典
     46         # print('universityData', self.universityData)
     47         # print(type(self.universityData))
     48         return self.universityData
     49 
     50     # 查找单个键
     51     def find(self, target, dictData, notFound='没找到'):
     52         # 倒序查找第一个出现的需要查找的键的值
     53         queue = [dictData]  # 将字典存入列表
     54         while len(queue) > 0:
     55             data = queue.pop()  # data是在queue中取出的最后一个元素,也就是原始字典;此时的queue为空列表[]
     56             print('data', data)
     57             for key, value in data.items():
     58                 if key == target:
     59                     return value
     60                 elif type(value) == dict:
     61                     queue.append(value)
     62         return notFound
     63 
     64     # 有多个同名键在字典里时,可以用这个方法
     65     def findAll(self, target, dictData, notFound=[]):
     66         # 倒序查找所有出现的需要查找的键的值
     67         queue = [dictData]
     68         result = []
     69         while len(queue) > 0:
     70             data = queue.pop()
     71             for key, value in data.items():
     72                 if key == target:
     73                     result.append(value)
     74                 elif type(value) == dict:
     75                     queue.append(value)
     76         if not result: result = notFound
     77         return result
     78 
     79     def write_list_to_excel(self, dictData, num_list, team_list):
     80         list_slice = []  # 人数切片
     81         team_slice = []  # 队伍切片
     82         sublist_sum = []  # 每个人数切片的和
     83         team_sum = []  # 每个队伍切片的和
     84         k = 0
     85         row_1 = ['学校名称','一等奖','二等奖','三等奖','成功参与奖','总人数','队伍数量']
     86         for i in range(len(row_1)):
     87             self.new_sheet.cell(row=1,column=i+1,value=row_1[i])
     88         university_name = []
     89         for key in dictData.keys():
     90             university_name.append(key)
     91         for index, name in enumerate(university_name):
     92             self.new_sheet['A'+str(index+2)] = name
     93         while k < len(num_list):
     94             sub_list = num_list[k:k+4]  # 人数子集
     95             team_sub = team_list[k:k+4]  # 队伍子集
     96             list_slice.append(sub_list)  # 切片后存入列表
     97             team_slice.append(team_sub)
     98             sumlist = sum(sub_list)  # 计算每个子集的和
     99             teamsum = sum(team_sub)
    100             sublist_sum.append(sumlist)  # 将每个子集的和加入列表
    101             team_sum.append(teamsum)
    102             k += 4
    103             if k > len(num_list):
    104                 break
    105         for row in range(2, len(university_name)+2):
    106             for col in range(2, 6):
    107                 self.new_sheet.cell(column=col, row=row, value=list_slice[row-2][col-2])
    108             self.new_sheet.cell(column=6, row=row, value=sublist_sum[row-2])
    109             self.new_sheet.cell(column=7, row=row, value=team_sum[row - 2])
    110         return university_name, list_slice, sublist_sum, team_sum
    111 
    112 
    113 if __name__ == '__main__':
    114     file_path = './file'  # excel文件路径
    115     files = []  # 存储excel文件名
    116     list = os.listdir(file_path)  # 列出excel文件路径下所有的文件
    117     list.sort(key=lambda x: x[4:5])  # 按照题目顺序排序
    118     # print('list', list)
    119     for i in range(len(list)):
    120         item = os.path.join(file_path, list[i])
    121         files.append(item)
    122     # print('files', files)
    123     wb_new = openpyxl.Workbook()  # 新建一个表格来存储生成的数据
    124     f = open('result.txt', 'w')
    125     for k,file in enumerate(files):
    126         new_sheet = wb_new.create_sheet('list', index=k)  # 插入新表单
    127         excel = ExceltoExcel(file, new_sheet)
    128         dictData = excel.todict()  # 得到当前表格排序后的字典
    129         # find_one = excel1.find('team', dictData)
    130         find_num = excel.findAll('num', dictData)  # 查找当前表格每个大学每个奖项的获奖人数
    131         find_team = excel.findAll('team', dictData)  # # 查找当前表格每个大学每个奖项的获奖队伍数,只统计队长所在的学校
    132         find_num.reverse()  # 正序排列
    133         find_team.reverse()  # 正序排列
    134         # print(find_num)
    135         # print(find_team)
    136         _, _, _, team_sum = excel.write_list_to_excel(dictData,find_num, find_team)
    137         wb_new.save('result.xlsx')
    138         # print('university name', university)
    139         # print('list_slice', list_slice)
    140         # print('sublist_sum', sublist_sum)
    141         title = file.split('/')[2][4:5]  # 字符串分割提取题目,原标题为'./file/2019A.xlsx'
    142         print('{}题共有{}支队伍获奖'.format(title, sum(team_sum)))
    143         f.write('{}题共有{}支队伍获奖
    '.format(title, sum(team_sum)))
    1 A题共有781支队伍获奖
    2 B题共有888支队伍获奖
    3 C题共有1057支队伍获奖
    4 D题共有4259支队伍获奖
    5 E题共有4193支队伍获奖
    6 F题共有781支队伍获奖

    我这里的F题获奖名单和A题是相同的,所以A和F的统计结果相同。

  • 相关阅读:
    JAVA程序员常用英语
    计算机常用英语汇总
    从一个前端角度来说页面的优化:雅虎35条(转)
    前端学习(五)
    PS常用快捷键(收藏)
    web前端开发规范文档
    前端学习(四)
    Web前端开发面试题
    linux --批量修改文件内容
    git --添加多个文件
  • 原文地址:https://www.cnblogs.com/DJames23/p/12726286.html
Copyright © 2020-2023  润新知