一、pandas
pandas模块是数据分析的大杀器,它使得对于文件相关的操作变得简单。
看一下它的简单使用
import pandas as pd # 读取 df = pd.read_csv('all_forum_info.csv') print(df.info()) # 写入 df.to_csv('data.csv')
具体用法参照我的博客pandas系列 https://www.cnblogs.com/zhangyafei/p/10685438.html
二、csv
1.读取
import csv # 读取 with open('all_forum_info.csv', encoding='utf-8') as myFile: lines = csv.reader(myFile) for line in lines: print(line) ['hgroup', 'forum_topic', 'forum_url', 'count_topic', 'count_post'] ['Welcome to Breastcancer.org', 'Info & Resources for New Patients & Members - Please No Posts!', 'https://community.breastcancer.org/forum/131', '11', '11'] ['Welcome to Breastcancer.org', 'Acknowledging and honoring our Community', 'https://community.breastcancer.org/forum/135', '111', '5,372'] ['Not Diagnosed but Concerned', 'Not Diagnosed But Worried', 'https://community.breastcancer.org/forum/83', '16,638', '133,217'] ['Not Diagnosed but Concerned', 'Waiting for Test Results', 'https://community.breastcancer.org/forum/62', '6,712', '62,439'] ['Not Diagnosed but Concerned', 'Benign Breast Conditions', 'https://community.breastcancer.org/forum/148', '795', '4,094'] ['Not Diagnosed but Concerned', 'High Risk for Breast Cancer', 'https://community.breastcancer.org/forum/47', '2,146', '23,435'] ['Tests, Treatments & Side Effects', 'Just Diagnosed', 'https://community.breastcancer.org/forum/5', '6,894', '103,851'] ['Tests, Treatments & Side Effects', 'Diagnosed and Waiting for Test Results', 'https://community.breastcancer.org/forum/147', '592', '6,442']
2. 写入
""" 写入 'w'表示写模式。 首先open()函数打开当前路径下的名字为't.csv'的文件,如果不存在这个文件,则创建它,返回myFile文件对象。 csv.writer(myFile)返回writer对象myWriter。 writerow()方法是一行一行写入,writerows方法是一次写入多行。 注意:如果文件't.csv'事先存在,调用writer函数会先清空原文件中的文本,再执行writerow/writerows方法。 """ with open('data.csv', 'w', encoding='utf-8') as myFile: myWriter = csv.writer(myFile) myWriter.writerow(['title', '数学']) myWriter.writerow([8, 'h']) myList = [[1, 2, 3], [4, 5, 6]] myWriter.writerows(myList)
data.csv
title,数学 8,h 1,2,3 4,5,6
修改分割符
csv.register_dialect('mydialect', delimiter='|', quoting=csv.QUOTE_ALL) with open('test.csv', 'r') as myFile: lines = csv.reader(myFile, 'mydialect') # print(lines.line_num) for line in lines: print(line) ['1', '2', '3'] ['2', '3', '4']
三、xlrd, xlwt
1. 读取
import xlrd myWorkbook = xlrd.open_workbook('data.xlsx') # 读取excel的三种方式 # 方式一 mySheets = myWorkbook.sheets() # 获取工作表list。 mySheet = mySheets[0] # 通过索引顺序获取。 # 方式二 # mySheet1 = myWorkbook.sheet_by_index(0) # 通过索引顺序获取。 # 方式三 # mySheet2 = myWorkbook.sheet_by_name('table') # 通过名称获取。 # 获取行数和列数 nrows = mySheet.nrows ncols = mySheet.ncols print(nrows, ncols) # 获取一行和一列 myRowValues = mySheet.row_values(0) # i是行数,从0开始计数,返回list对象。 myColValues = mySheet.col_values(0) # i是列数,从0开始计数,返回list对象。 print(myRowValues, myColValues) # 读取单元格数据 # myCell = mySheet.cell(0, 0) # 获取单元格,i是行数,j是列数,行数和列数都是从0开始计数。 # myCellValue = myCell.value # 通过单元格获取单元格数据。 myCellValue = mySheet.cell_value(0, 0) # 直接获取单元格数据,i是行数,j是列数,行数和列数都是从0开始计数。 print(myCellValue)
2. 写入
# 写入 import xlwt # 创建Excel工作薄 myWorkbook = xlwt.Workbook() # 添加Excel工作表 mySheet = myWorkbook.add_sheet('A Test Sheet') # 写入数据 myStyle = xlwt.easyxf('font: name Times New Roman, color-index red, bold on', num_format_str='#,##0.00') #数据格式 mySheet.write(1, 1, 1234.56, myStyle) mySheet.write(2, 0, 1) #写入A3,数值等于1 mySheet.write(2, 1, 1) #写入B3,数值等于1 mySheet.write(2, 2, xlwt.Formula("A3+B3")) #写入C3,数值等于2(A3+B3) # 保存 myWorkbook.save('excelFile.xlsx')
xlutils结合xlrd可以达到修改excel文件目的
import xlrd from xlutils.copy import copy workbook = xlrd.open_workbook('excelFile.xlsx') workbooknew = copy(workbook) ws = workbooknew.get_sheet(0) ws.write(3, 0, 'changed!') workbooknew.save('excelFilecopy.xls')
openpyxl可以对excel文件进行读写操作
from openpyxl import Workbook from openpyxl import load_workbook from openpyxl.writer.excel import ExcelWriter workbook_ = load_workbook("data.xlsx") sheetnames = workbook_.get_sheet_names() # 获得表单名字 print(sheetnames) sheet = workbook_.get_sheet_by_name(sheetnames[0]) print(sheet.cell(row=3, column=3).value) sheet['A1'] = '47' workbook_.save("data_new.xlsx") wb = Workbook() ws = wb.active ws['A1'] = 4 wb.save("data_new2.xlsx")
xlsxwriter可以写excel文件并加上图表
import xlsxwriter def get_chart(series): chart = workbook.add_chart({'type': 'line'}) for ses in series: name = ses["name"] values = ses["values"] chart.add_series({ 'name': name, 'categories': 'A2:A10', 'values': values }) chart.set_size({'width': 700, 'height': 350}) return chart if __name__ == '__main__': workbook = xlsxwriter.Workbook('H5应用中心关键数据及趋势.xlsx') worksheet = workbook.add_worksheet("每日PV,UV") headings = ['日期', '平均值'] worksheet.write_row('A1', headings) index = 0 for row in range(1, 10): for com in [0, 1]: worksheet.write(row, com, index) index += 1 series = [{"name": "平均值", "values": "B2:B10"}] chart = get_chart(series) chart.set_title({'name': '每日页面分享数据'}) worksheet.insert_chart('H7', chart) workbook.close()