Matplotlib 库是 python 的数据可视化库
import matplotlib.pyplot as plt
1、字符串转化为日期
unrate = pd.read_csv("unrate.csv") unrate["DATE"] = pd.to_datetime(unrate["DATE"])
2、拆线图
data1 = unrate[0: 12] plt.plot(data1["DATE"], data1["VALUE"]) # x轴数据和y轴数据 plt.xticks(rotation = 45) # 将x轴的属性旋转一个角度 plt.xlabel("Date Month") # x轴描述 plt.ylabel("Rate Value") # y轴描述 plt.title("my first plt") # 标题 plt.show()
3、多图拼切
fig = plt.figure() ax1 = fig.add_subplot(2, 1, 1) ax2 = fig.add_subplot(2, 1, 2) ax1.plot(np.random.randint(1, 5, 5), np.arange(5)) ax2.plot(np.arange(10)*3, np.arange(10)) plt.show()
4、一图多线
fig = plt.figure(figsize=(6, 3)) # 设定图尺寸 data1 = unrate[0: 12] data1["MONTH"] = data1["DATE"].dt.month plt.plot(data1["MONTH"], data1["VALUE"], c="red") data2 = unrate[12: 24] data2["MONTH"] = data2["DATE"].dt.month plt.plot(data2["MONTH"], data2["VALUE"], c="blue") plt.xticks(rotation = 45) #将x轴的属性旋转一个角度 plt.xlabel("Date Month") plt.ylabel("Rate Value") plt.title("my first plt") plt.show()
5、一图多线 - 自动跑代码(带图例)
fig = plt.figure(figsize=(10, 6)) colors = ['red', 'blue', 'green', 'orange', 'black'] for i in range(5): start_index = i*12 end_index = (i+1)*12 subset = unrate[start_index: end_index] label = str(1948 + i) plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label) # plt.legend(loc='best') plt.legend(loc = 'upper left') # 位置 plt.show()
6、条形图
fand_col = ["Fandango_Stars", "Fandango_Ratingvalue", "Metacritic_norm", "RT_user_norm_round", "IMDB_norm_round"] bar_heights = fand_new.ix[0, fand_col].values # 条形图高度 bar_positions = np.arange(5) + 0.75 # 条形图起始位置 tick_positions = range(1, 6) fig, ax = plt.subplots() ax.bar(bar_positions, bar_heights, 0.5) # 0.5表示条形图的宽度 ax.set_xticks(tick_positions) ax.set_xticklabels(fand_col, rotation = 90) ax.set_xlabel('Rating Source') ax.set_ylabel('Average Rating') ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') plt.show()
7、条形图 - 横向
fand_col = ["Fandango_Stars", "Fandango_Ratingvalue", "Metacritic_norm", "RT_user_norm_round", "IMDB_norm_round"] bar_heights = fand_new.ix[0, fand_col].values bar_positions = np.arange(5) + 0.75 tick_positions = range(1, 6) fig, ax = plt.subplots() ax.barh(bar_positions, bar_heights, 0.5) # 横向 ax.set_yticks(tick_positions) ax.set_yticklabels(fand_col, rotation = 0) ax.set_xlabel('Rating Source') ax.set_ylabel('Average Rating') ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)') plt.show()
8、散点图
fig, ax = plt.subplots() ax.scatter(fand_new['Fandango_Stars'], fand_new['Metacritic_norm']) # 散点图 ax.set_xlabel('Fandango') ax.set_ylabel('Rotten Tomatoes') plt.show()
9、直方图
fandango_distribution = fand_new['Fandango_Stars'].value_counts() fandango_distribution = fandango_distribution.sort_index() imdb_distribution = fand_new['IMDB_norm_round'].value_counts() imdb_distribution = imdb_distribution.sort_index() # bins 是什么?通俗一点就是分组,将N多数据分成X组。默认:bins=10 fig, ax = plt.subplots() ax.hist(fand_new['Fandango_Stars'], range=(4, 5), bins=5) # range 需要查看x轴的范围 plt.show()
10、多图
fig = plt.figure(figsize=(12, 12)) ax1 = fig.add_subplot(2,2,1) ax2 = fig.add_subplot(2,2,2) ax3 = fig.add_subplot(2,2,3) ax4 = fig.add_subplot(2,2,4) ax1.hist(fand_new['Fandango_Stars'], bins=20, range=(0, 5)) ax1.set_title('Distribution of Fandango Ratings') ax1.set_ylim(0, 50) ax2.hist(fand_new['IMDB_norm_round'], 20, range=(0, 5)) ax2.set_title('Distribution of Rotten Tomatoes Ratings') ax2.set_ylim(0, 50) ax3.hist(fand_new['Metacritic_norm'], 20, range=(0, 5)) ax3.set_title('Distribution of Metacritic Ratings') ax3.set_ylim(0, 50) ax4.hist(fand_new['RT_user_norm_round'], 20, range=(0, 5)) ax4.set_title('Distribution of IMDB Ratings') ax4.set_ylim(0, 50) plt.show()
11、四分图
fig, ax = plt.subplots() ax.boxplot(fand_new['Metacritic_norm']) ax.set_xticklabels(['Rotten Tomatoes']) ax.set_ylim(0, 5) plt.show()
12、多图 - 通过数组
num_cols = ['Fandango_Stars', 'IMDB_norm_round', 'Metacritic_norm', 'RT_user_norm_round'] fig, ax = plt.subplots() ax.boxplot(fand_new[num_cols].values) ax.set_xticklabels(num_cols, rotation=90) ax.set_ylim(0, 5) plt.show()
13、数据可视化 - 简洁一些
fig, ax = plt.subplots() ax.plot(women_degrees['Year'], women_degrees['Biology'], c='blue', label='Women') ax.plot(women_degrees['Year'], 100-women_degrees['Biology'], c='green', label='Men') ax.tick_params(bottom="off", top="off", left="off", right="off") # 可配置参数 for key,spine in ax.spines.items(): spine.set_visible(False) ax.legend(loc='upper right') plt.show()
14、数据可视化 - 多图 - 通过程序
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics'] fig = plt.figure(figsize=(12, 12)) for sp in range(0, 4): ax = fig.add_subplot(2, 2, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women') ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men') plt.legend(loc='upper right') plt.show()
15、数据可视化 - 多图 - 通过程序跑 - 多图 简洁
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics'] fig = plt.figure(figsize=(12, 12)) for sp in range(0, 4): ax = fig.add_subplot(2, 2, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women') ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men') for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") plt.legend(loc='upper right') plt.show()
16、如何使图表更好看?
cb_dark_blue = (0/255, 107/255, 164/255) # 自定义颜色 cb_orange = (255/255, 128/255, 14/255) fig = plt.figure(figsize=(12, 12)) for sp in range(0, 4): ax = fig.add_subplot(2, 2, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women') ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men') for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") plt.legend(loc='upper right') plt.show()
17、加粗线
cb_dark_blue = (0/255, 107/255, 164/255) cb_orange = (255/255, 128/255, 14/255) fig = plt.figure(figsize=(18, 3)) for sp in range(0, 4): ax = fig.add_subplot(1, 4, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) # 线条粗细 ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=3) for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") plt.legend(loc='upper right') plt.show()
18、加注释
fig = plt.figure(figsize=(18, 3)) for sp in range(0, 4): ax = fig.add_subplot(1, 4, sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=3) for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") if sp == 0: ax.text(2005, 87, 'Men') # 注释 ax.text(2002, 8, 'Women') elif sp == 3: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') plt.show()