• pandas 2 选择数据


    from __future__ import print_function
    import pandas as pd
    import numpy as np
    
    np.random.seed(1)
    dates = pd.date_range('20130101', periods=6)
    df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=['A', 'B', 'C', 'D'])
    

    print(df['A'], df.A)                       # 选取属性值为‘A’的列,两种方法效果相同
    print(df[0:3], df['20130102':'20130104'])  # 选取数据的前三行,选取行属性之间的数据(包括了右边属性的数据) 两种方法效果相同
    
    

    select by label: loc 属性值

    # select by label: loc
    print(df.loc['20130102'])             #  选取行属性为‘20130102’的数据
    print(df.loc[:,['A','B']])            # 选取列属性为‘A’,‘B’的所有数据
    print(df.loc['20130102', ['A','B']])  # 选取行属性为‘20130102’,列属性为‘A’,‘B’的数据
    

    select by position: iloc 属性编号

    # select by position: iloc
    print(df.iloc[3])              # 选取行第4行的数据
    print(df.iloc[3, 1])           # 选取第4行第2列的数据
    print(df.iloc[3:5,0:2])        # 选取行编号为3,4 列编号为0,1的数据
    print(df.iloc[[1,2,4],[0,2]])  # 选取第2,3,5行,第1,3列的数据
    

    mixed selection: ix 标签值+标签标号

    # mixed selection: ix
    print(df.ix[:3, ['A', 'C']])   # 选取前3行,列属性为‘A’,‘C’的数据
    

    Boolean indexing 布尔

    # Boolean indexing
    print(df[df.A > 0])            # 选取所有值大于0的数据
    

    下面是所有的输出结果:

    print(df)
    
    >                   A         B         C         D
    > 2013-01-01  1.624345 -0.611756 -0.528172 -1.072969 
    > 2013-01-02  0.865408 -2.301539  1.744812 -0.761207
    > 2013-01-03  0.319039 -0.249370  1.462108 -2.060141
    > 2013-01-04 -0.322417 -0.384054  1.133769 -1.099891
    > 2013-01-05 -0.172428 -0.877858  0.042214  0.582815
    > 2013-01-06 -1.100619  1.144724  0.901591  0.502494
    
    print(df['A'])
    print('
    ')
    print(df.A)
    
    > 2013-01-01    1.624345
    > 2013-01-02    0.865408
    > 2013-01-03    0.319039
    > 2013-01-04   -0.322417
    > 2013-01-05   -0.172428
    > 2013-01-06   -1.100619
    > Freq: D, Name: A, dtype: float64
    
    
    > 2013-01-01    1.624345
    > 2013-01-02    0.865408
    > 2013-01-03    0.319039
    > 2013-01-04   -0.322417
    > 2013-01-05   -0.172428
    > 2013-01-06   -1.100619
    > Freq: D, Name: A, dtype: float64
    
    print(df[0:3])
    print('
    ')
    print(df['20130102':'20130104'])
    
    >                    A         B         C         D
    > 2013-01-01  1.624345 -0.611756 -0.528172 -1.072969
    > 2013-01-02  0.865408 -2.301539  1.744812 -0.761207
    > 2013-01-03  0.319039 -0.249370  1.462108 -2.060141
     
     
    >                    A         B         C         D
    > 2013-01-02  0.865408 -2.301539  1.744812 -0.761207
    > 2013-01-03  0.319039 -0.249370  1.462108 -2.060141
    > 2013-01-04 -0.322417 -0.384054  1.133769 -1.099891
    
    # select by label: loc
    print(df.loc['20130102'])
    
    > A    0.865408
    > B   -2.301539
    > C    1.744812
    > D   -0.761207
    > Name: 2013-01-02 00:00:00, dtype: float64
    
    print(df.loc[:, ['A', 'B']])
    
    >                    A         B
    > 2013-01-01  1.624345 -0.611756
    > 2013-01-02  0.865408 -2.301539
    > 2013-01-03  0.319039 -0.249370
    > 2013-01-04 -0.322417 -0.384054
    > 2013-01-05 -0.172428 -0.877858
    > 2013-01-06 -1.100619  1.144724
    
    print(df.loc['20130102', ['A', 'B']])
    
    > A    0.865408
    > B   -2.301539
    > Name: 2013-01-02 00:00:00, dtype: float64
    
    # select by position: iloc
    print(df.iloc[3])
    
    > A   -0.322417
    > B   -0.384054
    > C    1.133769
    > D   -1.099891
    > Name: 2013-01-04 00:00:00, dtype: float64
    
    print(df.iloc[3, 1])
    
    > -0.38405435466841564
    
    print(df.iloc[3:5, 0:2])
    
    >                    A         B
    > 2013-01-04 -0.322417 -0.384054
    > 2013-01-05 -0.172428 -0.877858
    
    print(df.iloc[[1, 2, 4], [0, 2]])
    
                       A         C
    2013-01-02  0.865408  1.744812
    2013-01-03  0.319039  1.462108
    2013-01-05 -0.172428  0.042214
    
    # mixed selection: ix
    print(df.ix[:3, ['A', 'C']])
    
    >                    A         C
    > 2013-01-01  1.624345 -0.528172
    > 2013-01-02  0.865408  1.744812
    > 2013-01-03  0.319039  1.462108
    
    # Boolean indexing
    print(df[df.A > 0])
    
    >                    A         B         C         D
    > 2013-01-01  1.624345 -0.611756 -0.528172 -1.072969
    > 2013-01-02  0.865408 -2.301539  1.744812 -0.761207
    > 2013-01-03  0.319039 -0.249370  1.462108 -2.060141
    

    END

  • 相关阅读:
    CF1029C Maximal Intersection 暴力枚举
    2018中国大学生程序设计竞赛
    php远程抓取(下载)文件到本项目指定目录中
    FCKEditor添加字体
    UCenter 与 DIscuz 通信失败的解决方法
    运用php函数mkdir创建多级目录
    PHP 数组函数-数组排序
    php登陆ssh执行命令
    php定时执行任务的几个方法
    PHP Yii框架中使用smarty模板
  • 原文地址:https://www.cnblogs.com/yangzhaonan/p/10435678.html
Copyright © 2020-2023  润新知