Matplotlib常见的用法
import pandas as pd
import numpy as np
unrate = pd.read_csv('UNRATE.csv')
unrate['DATE'] = pd.to_datetime(unrate['DATE'])
print(unrate.head(12))
import matplotlib.pyplot as plt
# plt.plot()
# 前12个样本
# first_twelve = unrate[0:12]
# plt.plot(first_twelve['DATE'], first_twelve['VALUE'])
#
# # 横坐标旋转45度
# plt.xticks(rotation=45)
# 添加 坐标轴名称
plt.xlabel('Month')
plt.ylabel('Unemployment Rate')
plt.title('Monthly Unemployment Trends 1948')
# 子图 Data visualization
# fig = plt.figure()
# # ax1 = fig.add_subplot(4, 3, 1)
# # ax2 = fig.add_subplot(4, 3, 2)
# # ax3 = fig.add_subplot(4, 3, 3)
# ax4 = fig.add_subplot(4, 3, 6)
# 子图的大小
# fig = plt.figure(figsize=(5, 5))
#
# ax1 = fig.add_subplot(2, 1, 1)
# ax2 = fig.add_subplot(2, 1, 2)
#
# ax1.plot(np.arange(5), np.random.randint(1, 5, 5))
# ax2.plot(np.arange(5), np.arange(5) * 3)
#
# 添加颜色
# unrate['Month'] = unrate['DATE'].dt.month
# fig = plt.figure(figsize=(6, 3))
#
# plt.plot(unrate[0:12]['Month'], unrate[0:12]['VALUE'], c= 'red')
# plt.plot(unrate[12:24]['Month'], unrate[12:24]['VALUE'], c= 'blue')
# # 添加图例
# fig = plt.figure(figsize=(10, 6))
# colors = ['red', 'blue', 'green', 'orange', 'black']
# for i in range(5):
# start_index = i * 12
# end_index = (i+1) * 12
# subset = unrate[start_index: end_index]
# label = str(1948 + i)
# plt.plot(subset['Month'], subset['VALUE'], c=colors[i], label=label)
#
# plt.legend(loc='best')
# 柱形图
reviews = pd.read_csv('fandango_scores.csv')
print(reviews.columns)
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
norm_reviews = reviews[cols]
print(norm_reviews[:5])
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()
imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()
print(fandango_distribution)
print(imdb_distribution)
fig, ax = plt.subplots()
#ax.hist(norm_reviews['Fandango_Ratingvalue'])
# ax.hist(norm_reviews['Fandango_Ratingvalue'], bins=20)
# ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5), bins=20)
# fig = plt.figure(figsize=(5, 20))
# ax1 = fig.add_subplot(4, 1, 1)
# ax2 = fig.add_subplot(4, 1, 2)
# ax3 = fig.add_subplot(4, 1, 3)
# ax4 = fig.add_subplot(4, 1, 4)
#
# ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
# ax1.set_title('Distribution of Fandango Rratings')
# ax1.set_ylim(0, 50)
#
# ax2.hist(norm_reviews['RT_user_norm'], bins=20, range=(0, 5))
# ax2.set_title('Distribution of RT_user_norm')
# ax2.set_ylim(0, 50)
#
# ax3.hist(norm_reviews['Metacritic_user_nom'], bins=20, range=(0, 5))
# ax3.set_title('Distribution of Metacritic_user_nom')
# ax3.set_ylim(0, 50)
#
# ax4.hist(norm_reviews['IMDB_norm'], bins=20, range=(0, 5))
# ax4.set_title('Distribution of IMDB_norm')
# ax4.set_ylim(0, 50)
# 箱形图
# fig, ax = plt.subplots()
# ax.boxplot(norm_reviews['RT_user_norm'])
# ax.set_xticklabels(['Rotten Tomatoes'])
# ax.set_ylim(0, 5)
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0, 5)
plt.show()