• pandas 2


    ==============

    sdf={'rkey':[1,2,3,2],'name':['rkey1','rkey2','rkey3','rkey4']}
    sdf2={'lkey':[1,2,3],'name':['lsdf1','lsdf2','lsdf3']}
    sdf3={'lkey':[11,2,3],'name':['lsdf3','lsdf3','lsdf3']}
    cc=DataFrame(sdf)
    cc2=DataFrame(sdf2)
    cc3=DataFrame(sdf3)
    print cc.merge(cc2,left_on='rkey',right_on='lkey')
    name_x rkey lkey name_y

    0 rkey1 1 1 lsdf1
    1 rkey2 2 2 lsdf2
    2 rkey4 2 2 lsdf2
    3 rkey3 3 3 lsdf3

    print cc2.merge(cc3,on='lkey')

    lkey name_x name_y
    0 2 lsdf2 lsdf3
    1 3 lsdf3 lsdf3

    # 指定附加在重名列上的字符串
    print cc2.merge(cc3,on='lkey',suffixes=('_left','_right'))

    lkey name_left name_right
    0 2 lsdf2 lsdf3
    1 3 lsdf3 lsdf3

    ======通过索引和列进行合并=====================

    sdf2={'tst':[1,2,3],'name':['lsdf2','lsdf22','lsdf32']}
    sdf3={'lkey':[11,2,3],'name':['lsdf3','lsdf23','lsdf33']}
    cc2=DataFrame(sdf2,index=[1,2,3])
    cc3=DataFrame(sdf3)
    print cc2.merge(cc3,left_index=True,right_on='lkey')

     =======================

    sdf=Series([11,22,33])
    sdf2=Series([44,55,66])
    print pd.concat([sdf,sdf2])

    print pd.concat([sdf,sdf2],axis=1)

    =============

    sdf2={'tst2':[1,2,3],'name':['lsdf2','lsdf22','lsdf32']}
    sdf3={'tst3':[11,2,3],'name':['lsdf3','lsdf23','lsdf33']}
    cc2=DataFrame(sdf2,index=['b','c','d'])
    cc3=DataFrame(sdf3,index=['a','b','c'])
    print pd.concat([cc2,cc3])

    print pd.concat([cc2,cc3],axis=1)

    ===========

    print pd.concat([cc2,cc3],axis=1,join='inner')

    print pd.concat([cc2,cc3],join='inner')

     ==============

    sdf2={'tst':[1,2,3],'name':['lsdf2','lsdf22','lsdf32']}
    sdf3={'tst':[11,2,3],'name':['lsdf3','lsdf23','lsdf33']}
    cc2=DataFrame(sdf2,index=['b','c','d'])
    cc3=DataFrame(sdf3,index=['a','b','c'])
    print pd.concat([cc2,cc3],ignore_index=True)

     =========用参数对象中的数据为调用者对象的缺失数据打补丁==========

    sdf2={'tst':[11,np.nan,33],'name':[np.nan,'lsdf22','lsdf22']}
    sdf3={'tst':[1,2,3],'name':['lsdf3','lsdf23','lsdf33']}
    cc2=DataFrame(sdf2,index=['b','c','d'])
    cc3=DataFrame(sdf3,index=['a','b','c'])
    print cc2.combine_first(cc3)

     ======================

    sdf3={'tst':[1,2,3],'name':['lsdf3','lsdf23','lsdf33']}
    cc3=DataFrame(sdf3,index=['a','b','c'])
    # 指定附加在重名列上的字符串
    print cc3.replace(3,100) #替换一个值

    print cc3.replace([1,3],100) #替换多个值

    print cc3.replace({1:100,3:300})  #不同值进行不同替换

     ================

    df=pd.DataFrame({'name':['aa','bb','cc'],'age':[11,22,33]})
    ss=df['age']
    print ss
    
    0    11
    1    22
    2    33
    Name: age, dtype: int64

    索引ss的某一个值:ss[0]

    索引ss的某几个值:ss[[0,1]]

    切片:ss[1:]

    ==========

    s6=pd.Series(np.array([10,15,20,30,55,80]),index=['a','b','c','d','e','f'])
    s7=pd.Series(np.array([12,11,13,15,14,16]),index=['a','c','g','b','d','f'])
    #s6中不存在g索引,s7中不存在e索引,所以数据运算会产生两个缺失值NaN。
    print(s6+s7)  
    dtype: int32
    a    22.0
    b    30.0
    c    31.0
    d    44.0
    e     NaN
    f    96.0
    g     NaN
    #可以注意到这里的算术运算自动实现了两个序列的自动对齐
    #对于数据框的对齐,不仅是行索引的自动对齐,同时也会对列索引进行自动对齐,数据框相当于二维数组的推广
    print(s6/s7)
    dtype: float64
    a    0.833333
    b    1.000000
    c    1.818182
    d    2.142857
    e         NaN
    f    5.000000
    g         NaN
    dtype: float64

    获取DataFrame的多行:test_data.iloc[[0,2,4,5,7]]

    按某一列的值进行过滤:test_data[test_data['age']==51]

    对多列进行过滤:test_data[(test_data['age']==51) & (test_data['job']>=5)]   ---圆括号括起来+ &

    过滤完后,只留下某几列:test_data[(test_data['age']==51) & (test_data['job']>=5)][['education','housing','loan','contact','poutcome']]

    查询指定的行:test_data.loc[[0,2,4,5,7]]

    查询指定的列:test_data[['age','job','marital']]

    查询指定的行和列:test_data.loc[[0,2,4,5,7],['age','job','marital']]

    
    
    
    
    
    
    
    
    
    
    
    
    
    
  • 相关阅读:
    javascript基础案例解析
    Javascript正则
    Javascript数组
    JS函数
    数据类型转换
    flex弹性布局
    css基础5
    css基础4
    场景化支付对现有技术、业务、产品和风险产生深刻的影响
    场景化支付的关键技术
  • 原文地址:https://www.cnblogs.com/testzcy/p/10564950.html
Copyright © 2020-2023  润新知