• pandas 练习


    from pandas import Series, DataFrame
    
    # Series接收list或dict作为一维数据
    #两个属性:values, index
    #
    s1 = Series([4,7,-5,3])
    print(s1.values) #
    print(s1.index)  #序列号
    s1.index = ['a','b','c','d']
    print(s1)
    #
    s2 = Series({'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000})
    print(s2)
    
    #DataFrame接收matrix或dict(要求item为list)作为二维数据
    # 三个属性:values, index, columns
    #
    data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
            'year': [2000, 2001, 2002, 2001, 2002],
            'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
    f1 = DataFrame(data)
    print(f1.values) #
    print(f1.index)     #行序列(号)
    print(f1.columns)#列序列(号)
    #
    f2 = DataFrame(data, columns=['year', 'state', 'pop'])
    #
    f3 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'], index=['one', 'two', 'three', 'four', 'five'])
    
    
    
    #重要的功能
    ##1.重新索引
    #① Series
    s1 = Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
    s2 = s1.reindex(['a', 'b', 'c', 'd', 'e'])
    s3 = s1.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0)
    
    s4 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
    s5 = s4.reindex(range(6), method='ffill')
    
    #② DataFrame
    f1 = DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'], columns=['Ohio', 'Texas', 'California'])
    f2 = f1.reindex(index=['a', 'b', 'c', 'd'])                #行序列(号)
    f3 = f1.reindex(columns=['Texas', 'Utah', 'California'])#列序列(号)
    
    
    
    ##2.索引,挑选和过滤
    # .at, .iat, .loc, .iloc .ix
    
    # 1)类似 ndarry 的索引操作
    #① Series
    s1 = Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
    s1['b']
    s1[1]
    s1[2:4]
    s1[['b', 'a', 'd']]
    s1[[1, 3]]
    s1[s1 < 2]
    s1['b':'c']
    s1['b':'c'] = 5
    
    
    #② DataFrame
    df = DataFrame(np.arange(16).reshape((4, 4)), index=['Ohio', 'Colorado', 'Utah', 'New York'], 
                                                columns=['one', 'two', 'three', 'four'])
    df['two']
    df[['three', 'one']]
    df[:2]
    df[df['three'] > 5]
    df > 5
    df[df['three'] < 5] = 0
    
    
    # 2)标签索引
    #① Series
    
    #② DataFrame
    df.ix['Colorado', 'three']
    df.ix['Colorado', ['three', 'four']]
    df.ix[['Colorado', 'Utah'], ['three', 'four']]
    df.ix[['Colorado', 'Utah'], [2, 0, 3]]
    df.ix['Colorado']
    df.ix[2]
    df.ix[:'Utah', 'three']
    df.ix[df.three > 5, :3]
  • 相关阅读:
    简单爬取腾讯新闻内容方法封装
    Python正则表达式函数
    Escape(反思与总结)
    springboot中使用mybatis
    解决 java.lang.UnsatisfiedLinkError:no ** in java.library.path in java.library.path 的异常
    解决 fatal error: jni_md.h: No such file or directory #include “jni_md.h”
    Git troubleshooting
    Java8 新特性2——强大的Stream API
    Java8 新特性1—— Lambda表达式、内置函数式接口、方法引用与构造器引用
    搭建Linux(Ubuntu)系统下的Differential Datalog运行环境
  • 原文地址:https://www.cnblogs.com/hhh5460/p/5371309.html
Copyright © 2020-2023  润新知