1.安装
pip install openpyxl
2.使用方法
2.1 给单元格和行赋值
from openpyxl import Workbook wb = Workbook() #获取第一个活动的sheet,默认创建excel会有一个sheet ws = wb.active #单元格直接赋值 ws['A1'] = 42 #按行赋值 ws.append([1,2,3]) import datetime ws['A2'] = datetime.datetime.now() wb.save("1.xlsx")
2.2 创建sheet
from openpyxl import Workbook wb = Workbook() #在末尾插入 ws1 = wb.create_sheet("Mysheet") ws1.title = "New Title" #在开始插入 ws2 = wb.create_sheet("Mysheet", 0) ws2.title="你好" #在倒数第二个位置插入 ws3= wb.create_sheet("Mysheet", -1) ws3.title="third" ws1.sheet_properties.tabColor = "1072BA" for sheet in wb: print(sheet.title) wb.save("create_sheet.xlsx")
2.3 操作单元格
from openpyxl import Workbook wb=Workbook() ws1=wb.create_sheet("Mysheet") ws1["A1"]=124.45 ws1["B2"]="您好" #为第四行第二列也就是B4赋值10 d=ws1.cell(row=4, column=2, value=10) wb.save("cell.xlsx")
2.4 批量操作单元格
from openpyxl import Workbook wb = Workbook() ws = wb.active for row in ws.iter_rows(min_row=1, max_col=3, max_row=2): for cell in row: cell.value="test" print(cell) for col in ws.iter_rows(min_row=3, max_col=3, max_row=4): for cell in col: cell.value="hello" print(cell) ws['c9']='hello world' tuple(ws.rows) wb.save("batchcell.xlsx")
执行完之后,会在A1,B1,C1,A2,B2,C2赋值test,在A3,B3,C3,A4,B4,C4赋值hello
2.5 操作已经存在的Excel
# -*- coding: utf-8 -*- from openpyxl import Workbook from openpyxl import load_workbook wb = load_workbook('sample.xlsx') wb.guess_types = True #猜测格式类型 ws=wb.active ws["D1"]="12%" # Save the file wb.save("sample.xlsx")
2.6 使用公式
from openpyxl import Workbook from openpyxl import load_workbook wb = load_workbook("sample.xlsx") ws1 = wb.active ws1["A1"] = 1 ws1["A2"] = 2 ws1["A3"] = 3 ws1["A4"] = "=SUM(1,1)" ws1["A5"] = "=SUM(A1:A3)" wb.save("sample.xlsx")
2.7 合并单元格
from openpyxl import Workbook from openpyxl import load_workbook wb = load_workbook("sample.xlsx") ws1 = wb.active ws1.merge_cells("A2:D2") #ws1.unmerge_cells("A2:D2") ws1.merge_cells(start_row=2,start_column=1,end_row=2,end_column=4) #ws1.unmerge_cells(start_row=2,start_column=1,end_row=2,end_column=4) wb.save("sample.xlsx")
2.8 插入图片
from openpyxl import load_workbook from openpyxl.drawing.image import Image wb = load_workbook("sample.xlsx") ws1 = wb.active img = Image("jielun.png") ws1.add_image(img, 'A1') wb.save("sample.xlsx")
2.9 隐藏单元格
from openpyxl import load_workbook from openpyxl.drawing.image import Image wb = load_workbook("sample.xlsx") ws1 = wb.active ws1.column_dimensions.group('A', 'D', hidden=True) wb.save("sample.xlsx")
2.10 画柱状图
from openpyxl import load_workbook from openpyxl import Workbook from openpyxl.chart import BarChart,Reference,Series wb = Workbook() ws = wb.active for i in range(10): ws.append([i]) values = Reference(ws, min_col=1, min_row=1, max_col=1, max_row=10) chart = BarChart() chart.add_data(values) ws.add_chart(chart, "E15") wb.save("sample.xlsx")
2.11 画饼状图
# -*- coding: utf-8 -*- from openpyxl import load_workbook from openpyxl import Workbook from openpyxl.chart import (PieChart , ProjectedPieChart, Reference) from openpyxl.chart.series import DataPoint data = [ ['Pie', 'Sold'], ['Apple', 50], ['Cherry', 30], ['Pumpkin', 10], ['Chocolate', 40], ] wb = Workbook() ws = wb.active for row in data: ws.append(row) pie = PieChart() labels = Reference(ws, min_col=1, min_row=2, max_row=5) data = Reference(ws, min_col=2, min_row=1, max_row=5) pie.add_data(data, titles_from_data=True) pie.set_categories(labels) pie.title = "Pies sold by category" # Cut the first slice out of the pie slice = DataPoint(idx=0, explosion=20) pie.series[0].data_points = [slice] ws.add_chart(pie, "D1") ws = wb.create_sheet(title="Projection") data = [ ['Page', 'Views'], ['Search', 95], ['Products', 4], ['Offers', 0.5], ['Sales', 0.5], ] for row in data: ws.append(row) projected_pie = ProjectedPieChart() projected_pie.type = "pie" projected_pie.splitType = "val" # split by value labels = Reference(ws, min_col=1, min_row=2, max_row=5) data = Reference(ws, min_col=2, min_row=1, max_row=5) projected_pie.add_data(data, titles_from_data=True) projected_pie.set_categories(labels) ws.add_chart(projected_pie, "A10") from copy import deepcopy projected_bar = deepcopy(projected_pie) projected_bar.type = "bar" projected_bar.splitType = 'pos' # split by position ws.add_chart(projected_bar, "A27") # Save the file wb.save("sample.xlsx")
2.12 表格
from openpyxl.worksheet.table import Table, TableStyleInfo wb = Workbook() ws = wb.active data = [ ['Apples', 10000, 5000, 8000, 6000], ['Pears', 2000, 3000, 4000, 5000], ['Bananas', 6000, 6000, 6500, 6000], ['Oranges', 500, 300, 200, 700], ] # add column headings. NB. these must be strings ws.append(["Fruit", "2011", "2012", "2013", "2014"]) for row in data: ws.append(row) tab = Table(displayName="Table1", ref="A1:E5") # Add a default style with striped rows and banded columns style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=True, showLastColumn=True, showRowStripes=True, showColumnStripes=True) #第一列是否和样式第一行颜色一行,第二列是否··· #是否隔行换色,是否隔列换色 tab.tableStyleInfo = style ws.add_table(tab) # Save the file wb.save("sample.xlsx")
2.13 字体
# -*- coding: utf-8 -*- from openpyxl import Workbook from openpyxl.styles import Font wb = Workbook() ws = wb.active ws["A1"] = "测试" ws["A2"] = "字体" col = ws.column_dimensions['A'] col.font = Font(bold=True) #将A列设定为粗体 row = ws.row_dimensions[1] row.font = Font(underline="single") #将第一行设定为下划线格式 # Save the file wb.save("sample.xlsx")
3.使用爬虫爬取贝壳网二手房的信息并存入到excel
import sys from os import path import time import urllib3 import requests import numpy as np from bs4 import BeautifulSoup from urllib import parse from openpyxl import Workbook from openpyxl import load_workbook headers=[{'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}, {'User-Agent':'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11'}, {'User-Agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)'}] def get_house(): page_num = 0; total_num = 0; wb = Workbook() ws = wb.create_sheet("housesheet") ws.append(["title","tall","year","house","area","direction", "tag","totalPrice", "unitPrice"]) while(1): page_num +=1 url = "https://bj.ke.com/ershoufang/pg"+str(page_num)+"/" print(url) http = urllib3.PoolManager() time.sleep(np.random.rand()*5) try: r = http.request("GET", url, headers=headers[page_num%len(headers)]) plain_text = r.data.decode() print(plain_text) except Exception as e: print(e) continue soup = BeautifulSoup(plain_text, features="lxml") ligroup = soup.find_all("li", class_="clear") for item in ligroup: try: title = item.find("div", class_="title").a.get("title") content = item.find("div", class_="houseInfo").get_text() contents = content.strip().split("|") tall = contents[0].strip().replace(" ","") year = contents[1].strip() house = contents[2].strip().replace(" ", "") area = contents[3].strip().replace(" ", "") direction = contents[4].strip() tag = item.find("div", class_="tag").find("span").get_text() totalPrice = item.find("div", class_="totalPrice").get_text() unitPrice = item.find("div", class_="unitPrice").get_text().strip().replace(" ", "") ws.append([title, tall,year,house,area,direction,tag,totalPrice,unitPrice]) except Exception as e: print(e) continue if page_num > 20: break wb.save("hosuebeike.xlsx") if __name__=='__main__': get_house()
跑完之后,在当前目录下会有一个excel文件,打开看一下,部分截图如下: