代码
import pandas as pd import numpy as np s = pd.Series([1,3,6,np.nan, 44,1]) print('-1-') print(s) dates = pd.date_range('20160101', periods=6) print('-2-') print(dates) # index 是行的key; 默认就是数字 df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['a','b','c','d']) print('-3-') print(df) df1 = pd.DataFrame(np.arange(12).reshape((3,4))) print('-4-') print(df1) df2 = pd.DataFrame({'A':1., 'B':pd.Timestamp('20130102'), 'C':pd.Series(1,index=list(range(4)), dtype = 'float32'), 'D':np.array([3]*4,dtype='int32'), 'E':pd.Categorical(["test","train","test","train"]), 'F':'foo'}) print('-5-') print(df2) print('-6-') print(df2.dtypes) print('-7-') print(df2.index) print('-8-') print(df2.columns) print('-9-') print(df2.values) print('-10-') #只会计算数字串 print(df2.describe()) print('-11-') print(df2.T) print('-12-') # 对 ABCD排序 print(df2.sort_index(axis=1, ascending=False)) print('-13-') # 对123排序 print(df2.sort_index(axis=0, ascending=False)) print('-14-') print(df2.sort_values(by='E'))
输出
-1- 0 1.0 1 3.0 2 6.0 3 NaN 4 44.0 5 1.0 dtype: float64 -2- DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04', '2016-01-05', '2016-01-06'], dtype='datetime64[ns]', freq='D') -3- a b c d 2016-01-01 -0.636080 -0.411646 1.167693 -0.085643 2016-01-02 -0.931738 -0.656105 0.833493 0.866367 2016-01-03 -0.495047 -0.131291 -0.757423 -0.783154 2016-01-04 -0.207423 0.261732 0.300315 -0.674217 2016-01-05 0.241664 0.560630 -0.057852 -0.411710 2016-01-06 -0.964392 0.990477 0.926594 0.388210 -4- 0 1 2 3 0 0 1 2 3 1 4 5 6 7 2 8 9 10 11 -5- A B C D E F 0 1.0 2013-01-02 1.0 3 test foo 1 1.0 2013-01-02 1.0 3 train foo 2 1.0 2013-01-02 1.0 3 test foo 3 1.0 2013-01-02 1.0 3 train foo -6- A float64 B datetime64[ns] C float32 D int32 E category F object dtype: object -7- Int64Index([0, 1, 2, 3], dtype='int64') -8- Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object') -9- [[1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo'] [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo'] [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo'] [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']] -10- A C D count 4.0 4.0 4.0 mean 1.0 1.0 3.0 std 0.0 0.0 0.0 min 1.0 1.0 3.0 25% 1.0 1.0 3.0 50% 1.0 1.0 3.0 75% 1.0 1.0 3.0 max 1.0 1.0 3.0 -11- 0 ... 3 A 1 ... 1 B 2013-01-02 00:00:00 ... 2013-01-02 00:00:00 C 1 ... 1 D 3 ... 3 E test ... train F foo ... foo [6 rows x 4 columns] -12- F E D C B A 0 foo test 3 1.0 2013-01-02 1.0 1 foo train 3 1.0 2013-01-02 1.0 2 foo test 3 1.0 2013-01-02 1.0 3 foo train 3 1.0 2013-01-02 1.0 -13- A B C D E F 3 1.0 2013-01-02 1.0 3 train foo 2 1.0 2013-01-02 1.0 3 test foo 1 1.0 2013-01-02 1.0 3 train foo 0 1.0 2013-01-02 1.0 3 test foo -14- A B C D E F 0 1.0 2013-01-02 1.0 3 test foo 2 1.0 2013-01-02 1.0 3 test foo 1 1.0 2013-01-02 1.0 3 train foo 3 1.0 2013-01-02 1.0 3 train foo