• 001_python实现数据分析


    一、

    # coding:utf8
    # !/usr/bin/python
    # import numpy as np
    import pandas as pd
    import np
    
    def example2():
        '''
        Describing a numeric ``Series``.
        :return:
        '''
        s = pd.Series([1, 2, 3])
        print s.describe()
        '''
        count    3.0     
        mean     2.0
        std      1.0
        min      1.0
        25%      1.5
        50%      2.0
        75%      2.5
        max      3.0
        dtype: float64
        '''
    def example3():
        '''
        Describing a categorical ``Series``.
        :return:
        '''
        s = pd.Series(['a', 'a', 'b', 'c'])
        print s.describe()
        '''
        count     4
        unique    3
        top       a
        freq      2
        dtype: object
        '''
    def example4():
        '''
        Describing a timestamp ``Series``.
        :return:
        '''
        s = pd.Series([
            np.datetime64("2000-01-01"),
            np.datetime64("2010-01-01"),
            np.datetime64("2010-01-01")
            ])
        print s.describe()
        '''
        count                       3
        unique                      2
        top       2010-01-01 00:00:00
        freq                        2
        first     2000-01-01 00:00:00
        last      2010-01-01 00:00:00
        dtype: object
        '''
    def example5():
        '''
        Describing a ``DataFrame``. By default only numeric fields are returned.
        :return:
        '''
        df = pd.DataFrame({'categorical': pd.Categorical(['d', 'e', 'f']),
                           'numeric': [1, 2, 3],
                            'object': ['a', 'b', 'c']})
        print df.describe()
        '''
        #Describing all columns of a ``DataFrame`` regardless of data type.
        print df.describe(include='all')
        #Describing a column from a ``DataFrame`` by accessing it as an attribute.
        print df.numeric.describe()
        #Including only numeric columns in a ``DataFrame`` description.
        print df.describe(include=[np.number])
        #Including only string columns in a ``DataFrame`` description.
        print df.describe(include=[np.object])
        #Including only categorical columns from a ``DataFrame`` description.
        print df.describe(include=['category'])
        #Excluding numeric columns from a ``DataFrame`` description.
        print df.describe(exclude=[np.number])
        #Excluding object columns from a ``DataFrame`` description.
        print df.describe(exclude=[np.object])
        '''
    def example1():
        dic1={'000':{'a':1,'b':2,'c':3},'001':{'d':4,'e':5,'f':6}}
        df2=pd.DataFrame(dic1)
        # print df2.describe()
        '''
               000  001
        count  3.0  3.0
        mean   2.0  5.0
        std    1.0  1.0
        min    1.0  4.0
        25%    1.5  4.5
        50%    2.0  5.0
        75%    2.5  5.5
        max    3.0  6.0
        '''
        print "返回非NAN数据项数量=>count()
    {count}
    ".format(count = df2.describe().count())
        print "返回中位数,等价第50位百分位数的值=>median()
    {median}
    ".format(median = df2.describe().median())
        print "返回数据的众值=>mode()
    {mode}
    ".format(mode = df2.describe().mode())
        print "返回数据的标准差(描述离散度)=>std()
    {std}
    ".format(std = df2.describe().std())
        print "返回方差=>var()
    {var}
    ".format(var = df2.describe().var())
        print "偏态系数(skewness,表示数据分布的对称程度)=>skew()
    {skew}
    ".format(skew = df2.describe().skew())
    
    def main():
        example1()
    if __name__ == '__main__':
        main()
    

    输出=>

    返回非NAN数据项数量=>count()
    000    8
    001    8
    dtype: int64
    返回中位数,等价第50位百分位数的值=>median()
    000    2.00
    001    4.75
    dtype: float64
    返回数据的众值=>mode()
       000  001
    0  1.0  5.0
    1  2.0  NaN
    2  3.0  NaN
    返回数据的标准差(描述离散度)=>std()
    000    0.801784
    001    1.603567
    dtype: float64
    返回方差=>var()
    000    0.642857
    001    2.571429
    dtype: float64
    偏态系数(skewness,表示数据分布的对称程度)=>skew()
    000    0.000000
    001   -1.299187
    dtype: float64
    

      

  • 相关阅读:
    《安富莱嵌入式周报》第262期:2022.04.182022.04.24
    第4期ThreadX视频教程:单片机动态APP加载玩法,像手机电脑一样加载卸载多个应用软件(20220501)
    H7TOOL发布固件V2.16, 脱机烧录增加汇顶,普冉,ESMT(台湾晶豪) SPIFLASH等支持(20220508)
    MDK5.37发布,围绕CortexM85更新了一大堆东西,MDK AC5正式退出历史舞台(20220504)
    【BSP视频教程】STM32H7视频教程第14期:超干货,MPU和Cache实战,一张图了解所有经典配置案例,争取人人都可以玩溜(20220508)
    地表最强CortexM85发布
    【Git】安装及配置
    限制input 只能输入正整数小数点一位 两位(指令方法)
    往对象中新增一个参数(之前没有定义)
    js 删除对象里的某个属性
  • 原文地址:https://www.cnblogs.com/arun-python/p/10423077.html
Copyright © 2020-2023  润新知