• Seaborn 绘图代码


    seaborn单变量、多变量及回归分析绘图
    https://blog.csdn.net/llh_1178/article/details/78147822

    Python数据科学分析速查表
    https://github.com/iamseancheney/python-data-science-cheatsheet

    subplots 的用法示例

    使用 regplot()lmplot() 都可以绘制回归关系,推荐 regplot()

    ig, ((ax1, ax2), (ax3, ax4),(ax5,ax6)) = plt.subplots(nrows=3, ncols=2, figsize=(14,10))
    
    OverallQual_scatter_plot = pd.concat([train['SalePrice'],train['OverallQual']],axis = 1)
    sns.regplot(x='OverallQual',y = 'SalePrice',data = OverallQual_scatter_plot,scatter= True, fit_reg=True, ax=ax1)
    
    TotalBsmtSF_scatter_plot = pd.concat([train['SalePrice'],train['TotalBsmtSF']],axis = 1)
    sns.regplot(x='TotalBsmtSF',y = 'SalePrice',data = TotalBsmtSF_scatter_plot,scatter= True, fit_reg=True, ax=ax2)
    
    GrLivArea_scatter_plot = pd.concat([train['SalePrice'],train['GrLivArea']],axis = 1)
    sns.regplot(x='GrLivArea',y = 'SalePrice',data = GrLivArea_scatter_plot,scatter= True, fit_reg=True, ax=ax3)
    
    GarageArea_scatter_plot = pd.concat([train['SalePrice'],train['GarageArea']],axis = 1)
    sns.regplot(x='GarageArea',y = 'SalePrice',data = GarageArea_scatter_plot,scatter= True, fit_reg=True, ax=ax4)
    
    FullBath_scatter_plot = pd.concat([train['SalePrice'],train['FullBath']],axis = 1)
    sns.regplot(x='FullBath',y = 'SalePrice',data = FullBath_scatter_plot,scatter= True, fit_reg=True, ax=ax5)
    
    YearBuilt_scatter_plot = pd.concat([train['SalePrice'],train['YearBuilt']],axis = 1)
    sns.regplot(x='YearBuilt',y = 'SalePrice',data = YearBuilt_scatter_plot,scatter= True, fit_reg=True, ax=ax6)
    
    YearRemodAdd_scatter_plot = pd.concat([train['SalePrice'],train['YearRemodAdd']],axis = 1)
    YearRemodAdd_scatter_plot.plot.scatter('YearRemodAdd','SalePrice')
    

    频率分布直方图

    plt.figure(figsize = (12,8))
    sns.distplot(train.kurt(),color='r',axlabel ='Kurtosis',norm_hist= False, kde = True,rug = False)
    plt.hist(train.kurt(),orientation = 'vertical',histtype = 'bar',label ='Kurtosis', color ='blue')
    plt.show()
    

    绘制热力图

    f , ax = plt.subplots(figsize = (14,12))
    plt.title('Correlation of Numeric Features with Sale Price',y=1,size=16)
    sns.heatmap(correlation, square = True, vmax=0.8)
    
    k= 11
    cols = correlation.nlargest(k,'SalePrice')['SalePrice'].index
    print(cols)
    cm = np.corrcoef(train[cols].values.T)
    f , ax = plt.subplots(figsize = (14,12))
    sns.heatmap(cm, vmax=.8, linewidths=0.01, square=True,annot=True,cmap='viridis',
                linecolor="white",xticklabels = cols.values ,annot_kws = {'size':12}, yticklabels = cols.values)
    

    很常用的一个 pairplot 图

    sns.set()
    columns = ['SalePrice','OverallQual','TotalBsmtSF','GrLivArea','GarageArea','FullBath','YearBuilt','YearRemodAdd']
    sns.pairplot(train[columns],size = 2 ,kind ='scatter',diag_kind='kde')
    plt.show()
    

    使用 Pandas 绘制柱状图

    saleprice_overall_quality= train.pivot_table(index ='OverallQual',values = 'SalePrice', aggfunc = np.median)
    saleprice_overall_quality.plot(kind = 'bar',color = 'blue')
    plt.xlabel('Overall Quality')
    plt.ylabel('Median Sale Price')
    plt.show()
    

    pivot_table 得到数据透视表。

    boxplot 绘制箱线图

    var = 'OverallQual'
    data = pd.concat([train['SalePrice'], train[var]], axis=1)
    f, ax = plt.subplots(figsize=(12, 8))
    fig = sns.boxplot(x=var, y="SalePrice", data=data)
    fig.axis(ymin=0, ymax=800000);
    
    var = 'Neighborhood'
    data = pd.concat([train['SalePrice'], train[var]], axis=1)
    f, ax = plt.subplots(figsize=(16, 10))
    fig = sns.boxplot(x=var, y="SalePrice", data=data)
    fig.axis(ymin=0, ymax=800000);
    xt = plt.xticks(rotation=45)
    

    连续绘制箱线图,下面这段代码有点厉害:

    for c in categorical_features:
        train[c] = train[c].astype('category')
        if train[c].isnull().any():
            train[c] = train[c].cat.add_categories(['MISSING'])
            train[c] = train[c].fillna('MISSING')
    
    def boxplot(x, y, **kwargs):
        sns.boxplot(x=x, y=y)
        x=plt.xticks(rotation=90)
    f = pd.melt(train, id_vars=['SalePrice'], value_vars=categorical_features)
    g = sns.FacetGrid(f, col="variable",  col_wrap=2, sharex=False, sharey=False, size=5)
    g = g.map(boxplot, "value", "SalePrice")
    

    计数图 countplot

    plt.figure(figsize = (12, 6))
    sns.countplot(x = 'Neighborhood', data = data)
    xt = plt.xticks(rotation=45)
    

    小提琴图

    sns.violinplot('Functional', 'SalePrice', data = train)
    

    pointplot

    plt.figure(figsize=(8,10))
    g1 = sns.pointplot(x='Neighborhood', y='SalePrice', 
                       data=train, hue='LotShape')
    g1.set_xticklabels(g1.get_xticklabels(),rotation=90)
    g1.set_title("Lotshape Based on Neighborhood", fontsize=15)
    g1.set_xlabel("Neighborhood")
    g1.set_ylabel("Sale Price", fontsize=12)
    plt.show()
    
  • 相关阅读:
    设计模式java----单例模式
    创建三个线程按顺序输出1-60,每个线程输出5个数
    java笔记----线程状态转换函数
    java笔记----常见的异常
    java一个数分解的质因数java
    MapReduce ----数据去重
    MapReduce ----倒排索引
    报错org.apache.hadoop.mapreduce.lib.input.FileSplit cannot be cast to org.apache.hadoop.mapred.FileSplit
    NumPy的使用(一)
    python----csv的使用
  • 原文地址:https://www.cnblogs.com/liweiwei1419/p/9712589.html
Copyright © 2020-2023  润新知