• 3-13 索引进阶


     

    1.指定倒序的索引

    In [1]:
    import pandas as pd
    import numpy as np
    s=pd.Series(np.arange(5),index=np.arange(5)[::-1],dtype='int64')#index=np.arange(5),指定索引范围;[::-1]是倒序索引
    s
    
    Out[1]:
    4    0
    3    1
    2    2
    1    3
    0    4
    dtype: int64
     

    2.索引是否存在这些元素

    In [2]:
    s.isin([1,3,4])#索引是否存在这些元素
    
    Out[2]:
    4    False
    3     True
    2    False
    1     True
    0     True
    dtype: bool
    In [3]:
    s[s.isin([1,3,4])]#取出对应元素
    
    Out[3]:
    3    1
    1    3
    0    4
    dtype: int64
     

    3.多重索引数据

    In [4]:
    s2=pd.Series(np.arange(6),index=pd.MultiIndex.from_product([[0,1],['a','b','c']]))#多重索引
    s2
    
    Out[4]:
    0  a    0
       b    1
       c    2
    1  a    3
       b    4
       c    5
    dtype: int32
    In [5]:
    s2.iloc[s2.index.isin([(1,'a'),(0,'b')])]#多重索引
    
    Out[5]:
    0  b    1
    1  a    3
    dtype: int32
    In [6]:
    s
    
    Out[6]:
    4    0
    3    1
    2    2
    1    3
    0    4
    dtype: int64
    In [7]:
    s[s>2]
    
    Out[7]:
    1    3
    0    4
    dtype: int64
     

    4.索引数据

    In [8]:
    dates=pd.date_range('20181124',periods=8)#构造8天的数据
    df=pd.DataFrame(np.random.randn(8,4),index=dates,columns=['A','B','C','D'])
    df
    
    Out[8]:
     
     ABCD
    2018-11-24 -1.302542 -1.850999 1.198842 0.134479
    2018-11-25 -1.049921 0.277906 -1.470455 0.561132
    2018-11-26 -0.572511 0.559077 -0.798659 1.680000
    2018-11-27 0.163208 -0.775700 0.293595 -1.309184
    2018-11-28 -0.341200 0.889370 0.278003 1.074634
    2018-11-29 -1.399622 -0.577925 0.170549 0.872441
    2018-11-30 1.146593 0.264301 -1.741237 -0.010080
    2018-12-01 1.151403 -1.483099 0.425871 1.522704
    In [9]:
    df.select(lambda x:x=='A',axis='columns')#索引A列的数据
    
     
    E:softwareAnaconda3 5.2.0libsite-packagesipykernel_launcher.py:1: FutureWarning: 'select' is deprecated and will be removed in a future release. You can use .loc[labels.map(crit)] as a replacement
      """Entry point for launching an IPython kernel.
    
    Out[9]:
     
     A
    2018-11-24 -1.302542
    2018-11-25 -1.049921
    2018-11-26 -0.572511
    2018-11-27 0.163208
    2018-11-28 -0.341200
    2018-11-29 -1.399622
    2018-11-30 1.146593
    2018-12-01 1.151403
    In [10]:
    df['A']#和上面一样
    
    Out[10]:
    2018-11-24   -1.302542
    2018-11-25   -1.049921
    2018-11-26   -0.572511
    2018-11-27    0.163208
    2018-11-28   -0.341200
    2018-11-29   -1.399622
    2018-11-30    1.146593
    2018-12-01    1.151403
    Freq: D, Name: A, dtype: float64
     

    5.where判断条件,并索引位置,替换数据

    In [11]:
    df.where(df<0)#把不满足条件的换成NaN
    
    Out[11]:
     
     ABCD
    2018-11-24 -1.302542 -1.850999 NaN NaN
    2018-11-25 -1.049921 NaN -1.470455 NaN
    2018-11-26 -0.572511 NaN -0.798659 NaN
    2018-11-27 NaN -0.775700 NaN -1.309184
    2018-11-28 -0.341200 NaN NaN NaN
    2018-11-29 -1.399622 -0.577925 NaN NaN
    2018-11-30 NaN NaN -1.741237 -0.010080
    2018-12-01 NaN -1.483099 NaN NaN
    In [12]:
    df.where(df<0,-df)#把不满足条件的换成-df,也就是可以替换成其他形式
    
    Out[12]:
     
     ABCD
    2018-11-24 -1.302542 -1.850999 -1.198842 -0.134479
    2018-11-25 -1.049921 -0.277906 -1.470455 -0.561132
    2018-11-26 -0.572511 -0.559077 -0.798659 -1.680000
    2018-11-27 -0.163208 -0.775700 -0.293595 -1.309184
    2018-11-28 -0.341200 -0.889370 -0.278003 -1.074634
    2018-11-29 -1.399622 -0.577925 -0.170549 -0.872441
    2018-11-30 -1.146593 -0.264301 -1.741237 -0.010080
    2018-12-01 -1.151403 -1.483099 -0.425871 -1.522704
     

    6.query按条件查找索引

    In [13]:
    df=pd.DataFrame(np.random.rand(10,3),columns=list('abc'))
    df
    
    Out[13]:
     
     abc
    0 0.413798 0.636850 0.364235
    1 0.338115 0.290723 0.413065
    2 0.120505 0.151662 0.458175
    3 0.032937 0.046397 0.231927
    4 0.959934 0.210111 0.028029
    5 0.319149 0.251683 0.409901
    6 0.642514 0.613375 0.926091
    7 0.777493 0.467535 0.576821
    8 0.139102 0.978898 0.540589
    9 0.543792 0.596241 0.330553
    In [14]:
    df.query('(a<b)')
    
    Out[14]:
     
     abc
    0 0.413798 0.636850 0.364235
    2 0.120505 0.151662 0.458175
    3 0.032937 0.046397 0.231927
    8 0.139102 0.978898 0.540589
    9 0.543792 0.596241 0.330553
    In [15]:
    df.query('(a<b)&(b<c)')
    
    Out[15]:
     
     abc
    2 0.120505 0.151662 0.458175
    3 0.032937 0.046397 0.231927
  • 相关阅读:
    join函数——Gevent源码分析
    代理上网(ssh 动态端口转发)
    内核热patch
    技术债
    mysql 隔离级别与间隙锁等
    python type
    django : related_name and related_query_name
    ssh 卡主
    logistics regression
    __new__ 和 __init__
  • 原文地址:https://www.cnblogs.com/AI-robort/p/11678977.html
Copyright © 2020-2023  润新知