import pandas as pd
'''
Series序列:
1.序列 的声明,指定index列标签
2.查看列索引(index)和元素 (values)
3.选择内部元素
4.为元素赋值
5.用Numpy数组定义新Series对象
6.筛选元素
7.Series对象运算和数学函数
8.Series组成元素(重复,是否存在)
9.NaN
10.Series用做字典
'''
### 1.声明Series,并指定索引(没指定:索引从0开始自动递增) series_define = pd.Series([2,3,3,4,6,8],index=['a','b','c','d','e','f']) print(series_define) ''' a 2 b 3 c 3 d 4 e 6 f 8 dtype: int64 '''
### 2.查看Series序列的索引和元素【返回两个数组】 series_index = series_define.index series_value = series_define.values print(series_index) print(series_value) ''' Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object') [2 3 3 4 6 8] '''
### 3.选择内部元素:切片或指定标签 print(series_define[-1]) print(series_define[4:-1]) print(series_define['f']) print(series_define[['e','f']]) ###通过标签取多个值时,要把标签放在数组中
### 4.为元素赋值:选取元素 = 赋值 series_define[0] = 66 series_define['b'] = 77 print(series_define) ''' a 66 b 77 c 3 d 4 e 6 f 8 dtype: int64 '''
### 5.现有数组生成Series arr = np.array([1,2,3,4]) s = pd.Series(arr) print(s) ''' 0 1 1 2 2 3 3 4 dtype: int32 '''
### 6.筛选元素:获取大于3的元素 s[s>3] print(s[s>3])
### 7.适用于Numpy数组的运算符(+ - * /) 和 np.log()等数学函数都适用 #相除 s1 = series_define/2 print(s1) ''' a 33.0 b 38.5 c 1.5 d 2.0 e 3.0 f 4.0 dtype: float64 ''' #取对 s2 = np.log(series_define) print(s2) ''' a 4.189655 b 4.343805 c 1.098612 d 1.386294 e 1.791759 f 2.079442 dtype: float64 '''
## 8.重复次数和判断是否存在 # .unique()去重(不重复的元素,返回value数组) s_a = pd.Series([1,1,1,1,2,2,2,3]) a = s_a.unique() print(a) ''' [1 2 3] ''' # .value_counts() 返回去重后的元素,并且统计出现的次数:返回Series,出现个数作为值 b = s_a.value_counts() print(b) print(b[1]) # .isin()判断是否存在(返回布尔值) c = s_a.isin([2,3]) print(c) c1 = s_a[s_a.isin([2,3])] print(c) print(c1) ''' 0 False 1 False 2 False 3 False 4 True 5 True 6 True 7 True dtype: bool 0 False 1 False 2 False 3 False 4 True 5 True 6 True 7 True dtype: bool 4 2 5 2 6 2 7 3 dtype: int64 '''
## 10.NaN:表示数据有问题 # np.NaN创建带NaN的序列 s4 = pd.Series([5,-3,np.NaN,14]) print(s4) ''' 0 5.0 1 -3.0 2 NaN 3 14.0 dtype: float64 ''' ##判断有无NaN ,如果有返回True s41 = s4.isnull() print(s41) ##判断不是NaN ,如果不是返回True s42= s4.notnull() print(s42) ''' 0 False 1 False 2 True 3 False dtype: bool 0 True 1 True 2 False 3 True dtype: bool '''
## 11.Series用作字典 ## 用字典创建序列Series mydict = { 'red':2000, 'blue':1000, 'yellow':500, 'orange':1000 } myseries = pd.Series(mydict) print(myseries) ''' red 2000 blue 1000 yellow 500 orange 1000 dtype: int64 ''' ##索引数组≈字典的key 元素数组≈字典的values 单独指定索引。 #如,将blue换成black,并且打乱顺序 colors = ['red','yellow','orange','black','green'] myseries = pd.Series(mydict,index=colors) print(myseries) ''' red 2000 blue 1000 yellow 500 orange 1000 dtype: int64 red 2000.0 yellow 500.0 orange 1000.0 black NaN green NaN dtype: float64 指定索引会和字典key取交,没有交集的部分异常值NaN填充 '''
## 12.Series对象之间的运算:两个Series相加:对应key相同,对应value相加,否则异常NaN mydict2 = { 'red':400, 'yellow':1000, 'black':700 } myseries2 = pd.Series(mydict2) mydict_add = myseries+myseries2 print(myseries) print(myseries2) print(mydict_add) ''' red 2000.0 yellow 500.0 orange 1000.0 black NaN green NaN dtype: float64 red 400 yellow 1000 black 700 dtype: int64 black NaN green NaN orange NaN red 2400.0 yellow 1500.0 '''