• Data Visualisation Cheet Sheet


    Univariate plotting with pandas

    import pandas as pd
    reviews = pd.read_csv("../input/wine-reviews/winemag-data_first150k.csv", index_col=0)
    reviews.head(3)
    
    //bar
    reviews['province'].value_counts().head(10).plot.bar()
    (reviews['province'].value_counts().head(10) / len(reviews)).plot.bar()
    reviews['points'].value_counts().sort_index().plot.bar()
    
    //line chart
    reviews['points'].value_counts().sort_index().plot.line()
    
    //area chart
    reviews['points'].value_counts().sort_index().plot.area()
    
    //histograms
    reviews[reviews['price'] < 200]['price'].plot.hist()
    reviews['price'].plot.hist()
    reviews[reviews['price'] > 1500]
    
    //pie chart
    reviews['province'].value_counts().head(10).plot.pie()
    View Code

    Bivariate plotting with pandas

    import pandas as pd
    reviews = pd.read_csv("../input/wine-reviews/winemag-data_first150k.csv", index_col=0)
    reviews.head()
    
    //Scatter plot
    reviews[reviews['price'] < 100].sample(100).plot.scatter(x='price', y='points')
    
    //hexplot 数据相关性
    reviews[reviews['price'] < 100].plot.hexbin(x='price', y='points', gridsize=15)
    
    //stackplot 数据堆叠
    wine_counts.plot.bar(stacked=True)
    wine_counts.plot.area()
    
    //Bivariate line chart 线集成
    wine_counts.plot.line()
    View Code

    Plotting with seaborn

    import pandas as pd
    reviews = pd.read_csv("../input/wine-reviews/winemag-data_first150k.csv", index_col=0)
    import seaborn as sns
    
    //Countplot
    sns.countplot(reviews['points'])
    
    //KDE Plot 平滑去噪
    sns.kdeplot(reviews.query('price < 200').price)
    //对比线图
    reviews[reviews['price'] < 200]['price'].value_counts().sort_index().plot.line()
    //二维ked
    sns.kdeplot(reviews[reviews['price'] < 200].loc[:, ['price', 'points']].dropna().sample(5000))
    
    //Distplot
    sns.distplot(reviews['points'], bins=10, kde=False)
    
    //jointplot
    sns.jointplot(x='price', y='points', data=reviews[reviews['price'] < 100])
    sns.jointplot(x='price', y='points', data=reviews[reviews['price'] < 100], kind='hex', gridsize=20)
    
    
    //Boxplot and violin plot   25%-75%,中线
    df = reviews[reviews.variety.isin(reviews.variety.value_counts().head(5).index)]
    
    sns.boxplot(
        x='variety',
        y='points',
        data=df
    )
    View Code

    Faceting with seaborn

    import pandas as pd
    pd.set_option('max_columns', None)
    df = pd.read_csv("../input/fifa-18-demo-player-dataset/CompleteDataset.csv", index_col=0)
    
    import re
    import numpy as np
    import seaborn as sns
    
    footballers = df.copy()
    footballers['Unit'] = df['Value'].str[-1]
    footballers['Value (M)'] = np.where(footballers['Unit'] == '0', 0, 
                                        footballers['Value'].str[1:-1].replace(r'[a-zA-Z]',''))
    footballers['Value (M)'] = footballers['Value (M)'].astype(float)
    footballers['Value (M)'] = np.where(footballers['Unit'] == 'M', 
                                        footballers['Value (M)'], 
                                        footballers['Value (M)']/1000)
    footballers = footballers.assign(Value=footballers['Value (M)'],
                                     Position=footballers['Preferred Positions'].str.split().str[0])
    
    
    //The FacetGrid
    df = footballers[footballers['Position'].isin(['ST', 'GK'])]
    g = sns.FacetGrid(df, col="Position")
    g.map(sns.kdeplot, "Overall")
    
    
    df = footballers
    g = sns.FacetGrid(df, col="Position", col_wrap=6)//,每行6列
    g.map(sns.kdeplot, "Overall")
    
    
    df = footballers[footballers['Position'].isin(['ST', 'GK'])]
    df = df[df['Club'].isin(['Real Madrid CF', 'FC Barcelona', 'Atlético Madrid'])]
    g = sns.FacetGrid(df, row="Position", col="Club", 
                      row_order=['GK', 'ST'],
                      col_order=['Atlético Madrid', 'FC Barcelona', 'Real Madrid CF'])
    g.map(sns.violinplot, "Overall") //violin图
    
    
    //Pairplot 数据分析第一步
    sns.pairplot(footballers[['Overall', 'Potential', 'Value']])
    View Code

    Multivariate plotting

    import pandas as pd
    pd.set_option('max_columns', None)
    df = pd.read_csv("../input/fifa-18-demo-player-dataset/CompleteDataset.csv", index_col=0)
    
    import re
    import numpy as np
    
    footballers = df.copy()
    footballers['Unit'] = df['Value'].str[-1]
    footballers['Value (M)'] = np.where(footballers['Unit'] == '0', 0, 
                                        footballers['Value'].str[1:-1].replace(r'[a-zA-Z]',''))
    footballers['Value (M)'] = footballers['Value (M)'].astype(float)
    footballers['Value (M)'] = np.where(footballers['Unit'] == 'M', 
                                        footballers['Value (M)'], 
                                        footballers['Value (M)']/1000)
    footballers = footballers.assign(Value=footballers['Value (M)'],
                                     Position=footballers['Preferred Positions'].str.split().str[0])
    
    
    //Multivariate scatter plots
    import seaborn as sns
    sns.lmplot(x='Value', y='Overall', hue='Position', 
               data=footballers.loc[footballers['Position'].isin(['ST', 'RW', 'LW'])], 
               fit_reg=False)
    
    
    sns.lmplot(x='Value', y='Overall', markers=['o', 'x', '*'], hue='Position',
               data=footballers.loc[footballers['Position'].isin(['ST', 'RW', 'LW'])],
               fit_reg=False
              )
    
    //Grouped box plot 分组的优势
    f = (footballers
             .loc[footballers['Position'].isin(['ST', 'GK'])]
             .loc[:, ['Value', 'Overall', 'Aggression', 'Position']]
        )
    f = f[f["Overall"] >= 80]
    f = f[f["Overall"] < 85]
    f['Aggression'] = f['Aggression'].astype(float)
    sns.boxplot(x="Overall", y="Aggression", hue='Position', data=f)
    
    
    //Heatmap
    f = (
        footballers.loc[:, ['Acceleration', 'Aggression', 'Agility', 'Balance', 'Ball control']]
            .applymap(lambda v: int(v) if str.isdecimal(v) else np.nan)
            .dropna()
    ).corr()
    sns.heatmap(f, annot=True)
    
    
    //Parallel Coordinates
    from pandas.plotting import parallel_coordinates
    
    f = (
        footballers.iloc[:, 12:17]
            .loc[footballers['Position'].isin(['ST', 'GK'])]
            .applymap(lambda v: int(v) if str.isdecimal(v) else np.nan)
            .dropna()
    )
    f['Position'] = footballers['Position']
    f = f.sample(200)
    parallel_coordinates(f, 'Position')
    View Code

    plotly

    import pandas as pd
    reviews = pd.read_csv("../input/wine-reviews/winemag-data-130k-v2.csv", index_col=0)
    reviews.head()
    
    from plotly.offline import init_notebook_mode, iplot
    init_notebook_mode(connected=True)  #离线注入笔记本模式
    
    import plotly.graph_objs as go
    iplot([go.Scatter(x=reviews.head(1000)['points'], y=reviews.head(1000)['price'], mode='markers')])
    
    iplot([go.Histogram2dContour(x=reviews.head(500)['points'], 
                                 y=reviews.head(500)['price'], 
                                 contours=go.Contours(coloring='heatmap')),
           go.Scatter(x=reviews.head(1000)['points'], y=reviews.head(1000)['price'], mode='markers')])
    
    
    #surface图
    df = reviews.assign(n=0).groupby(['points', 'price'])['n'].count().reset_index()  #先point分组再price分,再添加的‘n’列上执行计数,最后对首列的index重新排序
    df = df[df["price"] < 100]
    v = df.pivot(index='price', columns='points', values='n').fillna(0).values.tolist() #重塑数组后用0填充NAN值,再把values列变成list
    iplot([go.Surface(z=v)])
    
    
    #地理图
    df = reviews['country'].replace("US", "United States").value_counts()
    
    iplot([go.Choropleth(
        locationmode='country names',
        locations=df.index.values,
        text=df.index,
        z=df.values
    )])
    View Code
  • 相关阅读:
    WEB应用图片的格式,以及各自的特点和优化(一) by FungLeo
    Android---App Widget(五)
    【转】VS2010/MFC编程入门之八(对话框:创建对话框类和添加控件变量)
    【转】MFC中用CFile读取和写入文件2
    【转】VC中获取文件的相对路径和绝对路径
    【转】VC中对文件的读写
    【转】VS2010/MFC编程入门之二十五(常用控件:组合框控件Combo Box)
    【转】CString类型互转 int
    【转】Visual Studio快捷键汇总
    【转】Ubuntu下配置支持Windows访问的samba共享
  • 原文地址:https://www.cnblogs.com/hotsnow/p/9476524.html
Copyright © 2020-2023  润新知