• Python数据分析与机器学习-Matplot_4


    import pandas as pd
    import matplotlib.pyplot as plt
    reviews = pd.read_csv('fandango_scores.csv')
    cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
    norm_reviews = reviews[cols]
    print(norm_reviews[:5])
    
                                 FILM  RT_user_norm  Metacritic_user_nom  
    0  Avengers: Age of Ultron (2015)           4.3                 3.55   
    1               Cinderella (2015)           4.0                 3.75   
    2                  Ant-Man (2015)           4.5                 4.05   
    3          Do You Believe? (2015)           4.2                 2.35   
    4   Hot Tub Time Machine 2 (2015)           1.4                 1.70   
    
       IMDB_norm  Fandango_Ratingvalue  
    0       3.90                   4.5  
    1       3.55                   4.5  
    2       3.90                   4.5  
    3       2.70                   4.5  
    4       2.55                   3.0  
    
    fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
    print(fandango_distribution)
    fandango_distribution = fandango_distribution.sort_index()
    print('-----------')
    print(fandango_distribution)
    
    imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
    print(imdb_distribution)
    imdb_distribution = imdb_distribution.sort_index()
    print("-----------")
    print(imdb_distribution)
    
    4.1    16
    4.2    12
    3.9    12
    4.3    11
    3.7     9
    3.5     9
    4.5     9
    3.4     9
    3.6     8
    4.4     7
    4.0     7
    3.2     5
    2.9     5
    3.8     5
    3.3     4
    4.6     4
    3.0     4
    4.8     3
    3.1     3
    2.8     2
    2.7     2
    Name: Fandango_Ratingvalue, dtype: int64
    -----------
    2.7     2
    2.8     2
    2.9     5
    3.0     4
    3.1     3
    3.2     5
    3.3     4
    3.4     9
    3.5     9
    3.6     8
    3.7     9
    3.8     5
    3.9    12
    4.0     7
    4.1    16
    4.2    12
    4.3    11
    4.4     7
    4.5     9
    4.6     4
    4.8     3
    Name: Fandango_Ratingvalue, dtype: int64
    3.60    10
    3.30     9
    3.15     9
    3.90     9
    3.70     8
    3.45     7
    3.55     7
    3.35     7
    3.75     6
    3.20     6
    2.75     5
    3.65     5
    3.50     4
    2.70     4
    3.05     4
    4.10     4
    3.25     4
    3.85     4
    3.80     3
    2.95     3
    2.60     2
    4.20     2
    2.45     2
    2.30     2
    3.95     2
    2.80     2
    3.00     2
    4.00     1
    3.10     1
    2.00     1
    2.50     1
    2.85     1
    4.05     1
    4.15     1
    2.20     1
    4.30     1
    2.55     1
    2.15     1
    3.40     1
    2.90     1
    2.10     1
    Name: IMDB_norm, dtype: int64
    -----------
    2.00     1
    2.10     1
    2.15     1
    2.20     1
    2.30     2
    2.45     2
    2.50     1
    2.55     1
    2.60     2
    2.70     4
    2.75     5
    2.80     2
    2.85     1
    2.90     1
    2.95     3
    3.00     2
    3.05     4
    3.10     1
    3.15     9
    3.20     6
    3.25     4
    3.30     9
    3.35     7
    3.40     1
    3.45     7
    3.50     4
    3.55     7
    3.60    10
    3.65     5
    3.70     8
    3.75     6
    3.80     3
    3.85     4
    3.90     9
    3.95     2
    4.00     1
    4.05     1
    4.10     4
    4.15     1
    4.20     2
    4.30     1
    Name: IMDB_norm, dtype: int64
    
    fig, ax = plt.subplots()
    # ax.hist(norm_reviews['Fandango_Ratingvalue'])
    # ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
    ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)
    plt.show()
    

    fig = plt.figure(figsize=(5,20))
    ax1 = fig.add_subplot(4,1,1)
    ax2 = fig.add_subplot(4,1,2)
    ax3 = fig.add_subplot(4,1,3)
    ax4 = fig.add_subplot(4,1,4)
    ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
    ax1.set_title('Distribution of Fandango Ratings')
    ax1.set_ylim(0, 50)
    
    ax2.hist(norm_reviews['RT_user_norm'], bins=20, range=(0, 5))
    ax2.set_title('Distribution of Rotten Tomatoes Ratings')
    ax2.set_ylim(0, 50)
    
    ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5))
    ax3.set_title('Distribution of Metacritic Ratings')
    ax3.set_ylim(0, 50)
    
    ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5))
    ax4.set_title('Distribution of IMDB Ratings')
    ax4.set_ylim(0, 50)
    
    plt.show()
    

    fig, ax = plt.subplots()
    ax.boxplot(norm_reviews['RT_user_norm'])
    ax.set_xticklabels(['Rotten Tomatoes'])
    plt.show()
    

    num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
    fig, ax = plt.subplots()
    ax.boxplot(norm_reviews[num_cols].values)
    ax.set_xticklabels(num_cols,rotation=45)
    ax.set_ylim(0,5)
    plt.show()
    

  • 相关阅读:
    SQL键值约束、索引使用
    C#字符串的四舍五入
    VB中字符串操作函数
    C#文本选中及ContextMenuStrip菜单使用
    C#关于new的用法
    C#有关日期的使用方法
    break,continue的区别
    在Lua中使用数字的时候有个坑
    关于自动寻径和图、邻接表的学习和启发
    关于在Cocos2dx引擎中手动绑定C++到Lua
  • 原文地址:https://www.cnblogs.com/SweetZxl/p/11126877.html
Copyright © 2020-2023  润新知