• Python:Pandas学习


      1 import pandas as pd
      2 import numpy as np
      3 s = pd.Series([1, 3, 6, np.nan, 44, 1])
      4 
      5 df= pd.DataFrame(np.random.random((4,5)))
      6 
      7 # data frame 常用属性
      8 df.dtypes
      9 df.index
     10 df.columns
     11 df.values
     12 
     13 # data frame 常用方法
     14 df.describe()
     15 df.T
     16 df.sort_index(axis = 1, ascending = False)
     17 df.sort_values(by = 4)
     18 
     19 # 选择数据
     20 dates = pd.date_range('20160101', periods = 6)
     21 df = pd.DataFrame(np.arange(24).reshape((6,4)), index = dates,
     22                   columns = ['A', 'B', 'C', 'D'])
     23 
     24 '''row or column''' # 行不可隔着选择
     25 print(df[0:3])
     26 print(df[['A', 'D']])
     27 
     28 '''select by label:loc''' # 行不可隔着选择
     29 print(df.loc['20160101', :])
     30 print(df.loc[:,['A', 'B']])
     31 
     32 '''select by position:iloc'''
     33 print(df.iloc[[0, 2], [0, 3]])
     34 
     35 '''mixed selection:ix'''
     36 print(df.ix[[0, 2], ['A', 'D']])
     37 
     38 '''Boolean indexing'''
     39 print(df[df.B > 5])
     40 
     41 # 设置数据
     42 df.iloc[2, 2] = 111
     43 df.loc['20160101', 'D'] = 222
     44 df.B[df.A > 5] = 0
     45 print(df)
     46 
     47 df['F'] = np.nan
     48 df['E'] = range(6)
     49 print(df)
     50 
     51 # 处理缺失数据
     52 df.iloc[0, 1] = np.nan
     53 df.iloc[1, 2] = np.nan
     54 print(df)
     55 print(df.dropna(axis = 0, how = 'all')) # how = {'any', 'all'}
     56 print(df.fillna(value = 0))
     57 print(np.any(df.isnull()))
     58 
     59 # data frame 合并
     60 '''concatenating'''
     61 df1 = pd.DataFrame(np.ones((3,4))*0, columns = ['a', 'b', 'c', 'd'])
     62 df2 = pd.DataFrame(np.ones((3,4))*1, columns = ['a', 'b', 'c', 'd'])
     63 df3 = pd.DataFrame(np.ones((3,4))*2, columns = ['a', 'b', 'c', 'd'])
     64 
     65 res = pd.concat([df1, df2, df3], axis = 0, ignore_index = True)
     66 res1 = pd.concat([df1, df2, df3], axis = 1)
     67 
     68 '''join参数'''
     69 df1 = pd.DataFrame(np.ones((3,4))*0, columns = ['a', 'b', 'c', 'd'], index = [1, 2, 3])
     70 df2 = pd.DataFrame(np.ones((3,4))*1, columns = ['b', 'c', 'd', 'e'], index = [2, 3, 4])
     71 
     72 res = pd.concat([df1, df2], join = 'outer', ignore_index = True)
     73 res = pd.concat([df1, df2], join = 'inner', ignore_index = True)
     74 print(res)
     75 
     76 '''join_axes'''
     77 res = pd.concat([df1, df2], axis = 1, join = 'inner')
     78 res = pd.concat([df1, df2], axis = 1, join_axes = [df1.index])
     79 
     80 # append
     81 df1 = pd.DataFrame(np.ones((3,4))*0, columns = ['a', 'b', 'c', 'd'], index = [1, 2, 3])
     82 df2 = pd.DataFrame(np.ones((3,4))*1, columns = ['b', 'c', 'd', 'e'], index = [2, 3, 4])
     83 df3 = pd.DataFrame(np.ones((3,4))*1, columns = ['b', 'c', 'd', 'e'], index = [2, 3, 4])
     84 
     85 res = df1.append([df2, df3], ignore_index = True)
     86 res1 = pd.concat([df1, df2, df3])
     87 print(res)
     88 print(res1)
     89 
     90 # data frame merge
     91 '''merge one key'''
     92 left = pd.DataFrame({'key':['K1','K2','K3'],
     93                      'A':[1,2,3],
     94                      'B':[4,5,6]})
     95 
     96 right = pd.DataFrame({'key':['K0','K1','K3'],
     97                      'A':[11,43,53],
     98                      'D':[12,-1,0]})
     99 res = pd.merge(left, right, on = 'key', how = 'outer')
    100 print(res)
    101 
    102 '''merge two or more keys'''
    103 left = pd.DataFrame({'key0':['K1','K2','K3'],
    104                      'key1':['X0','X2','X3'],
    105                      'A':[1,2,3],
    106                      'B':[4,5,6]})
    107 
    108 right = pd.DataFrame({'key0':['K0','K1','K3'],
    109                       'key1':['X1','X0','K3'],
    110                      'A':[11,43,53],
    111                      'D':[12,-1,0]})
    112 res = pd.merge(left, right, on = ['key0', 'key1'], how = 'outer')
    113 print(res)
    114 
    115 '''merge index'''
    116 left = pd.DataFrame({'A':[1,2,3],
    117                      'B':[4,5,6]},
    118                     index = ['K0', 'K1', 'K2'])
    119 
    120 right = pd.DataFrame({'A':[11,43,53],
    121                      'D':[12,-1,0]},
    122                     index = ['K1', 'K2', 'K3'])
    123 res = pd.merge(left, right, left_index = True,
    124                right_index = True)
    125 print(res)
    126 
    127 '''handle overlapping columns'''
    128 left = pd.DataFrame({'key':['K1','K2','K3'],
    129                      'A':[1,2,3],
    130                      'B':[4,5,6]})
    131 
    132 right = pd.DataFrame({'key':['K0','K1','K3'],
    133                      'A':[11,43,53],
    134                      'B':[12,-1,0]})
    135 res = pd.merge(left, right, on = 'key',
    136                suffixes = ['_left', '_right'] , how = 'outer')
    137 print(res)
    138 
    139 # 作图
    140 import pandas as pd
    141 import numpy as np
    142 import matplotlib.pyplot as plt
    143 
    144 '''plot data'''
    145 '''Series'''
    146 data = pd.Series(np.random.randn(1000), index = np.arange(1000))
    147 data = data.cumsum()
    148 data.plot()
    149 print(data)
    150 
    151 '''Data Frame'''
    152 data = pd.DataFrame(np.random.randn(1000, 4), 
    153                     index = np.arange(1000),
    154                     columns = list("ABCD"))
    155 print(data.head())
    156 data = data.cumsum()
    157 data.plot()
    158 ax = data.plot.scatter(x = 'A', y = 'C',
    159                        color = 'Red',
    160                        label = 'Class 2')
    161 data.plot.scatter(x = 'A', y = 'B',
    162                   color = 'DarkGreen',
    163                   label = 'Class 2',
    164                   ax = ax)
  • 相关阅读:
    面试中要注意的 3 个 JavaScript 问题
    angularJS遇到的坑
    为什么你的前端工作经验不值钱?
    ceph pg常见的几种状态
    k8s网络之flannel
    k8s流量访问之service
    lvs+keepalived高可用
    iptables【学习笔记】
    Linux和OpenStack中的网络知识【学习笔记】
    服务器BIOS设置
  • 原文地址:https://www.cnblogs.com/sumai/p/6361222.html
Copyright © 2020-2023  润新知