数据清洗与准备
1、抽样:
import numpy as np import pandas as pd choices = pd.Series([5,7,-1,6,4]) draws = choices.sample(n=10, replace=True) draws
OUT:
0 5 1 7 3 6 2 -1 4 4 4 4 4 4 2 -1 3 6 2 -1 dtype: int64
2、分割:
x = 'a|b|c' x.split('|')
['a', 'b', 'c']
3、取唯一值:
l1 = ['a','a', 'c','b', 'b', 'c','c'] pd.unique(l1)
array(['a', 'c', 'b'], dtype=object)
4、索引取值:
data = pd.DataFrame(np.arange(16).reshape((4,4)), index=['one','two','three','four'], columns=['a','b','c','d']) data data.columns.get_indexer(['c','a', 'b' ])
a | b | c | d | |
---|---|---|---|---|
one | 0 | 1 | 2 | 3 |
two | 4 | 5 | 6 | 7 |
three | 8 | 9 | 10 | 11 |
four | 12 | 13 | 14 | 15 |
array([2, 0, 1])
data.iloc[1,data.columns.get_indexer(['c','a', 'b' ])] =88 data
a | b | c | d | |
---|---|---|---|---|
one | 0 | 1 | 2 | 3 |
two | 88 | 88 | 88 | 7 |
three | 8 | 9 | 10 | 11 |
four | 12 | 13 | 14 | 15 |
value = data.iloc[:2,data.columns.get_indexer(['c','a', 'b' ])] value
c | a | b | |
---|---|---|---|
one | 2 | 0 | 1 |
two | 88 | 88 | 88 |
value2 = data.loc[['one','two'],['c','a', 'b' ]] value2
c | a | b | |
---|---|---|---|
one | 2 | 0 | 1 |
two | 88 | 88 | 88 |
5、筛选行与列
data = pd.DataFrame(np.arange(16).reshape((4,4)), index=['one','two','three','four'], columns=['a','b','c','d']) data
a | b | c | d | |
---|---|---|---|---|
one | 0 | 1 | 2 | 3 |
two | 4 | 5 | 6 | 7 |
three | 8 | 9 | 10 | 11 |
four | 12 | 13 | 14 | 15 |
data > 5
a | b | c | d | |
---|---|---|---|---|
one | False | False | False | False |
two | False | False | True | True |
three | True | True | True | True |
four | True | True | True | True |
data[data>5]
a | b | c | d | |
---|---|---|---|---|
one | NaN | NaN | NaN | NaN |
two | NaN | NaN | 6.0 | 7.0 |
three | 8.0 | 9.0 | 10.0 | 11.0 |
four | 12.0 | 13.0 | 14.0 | 15.0 |
data[(data>5).any(1)]#轴1方向上,选出值大于5的行(至少有一个值大于5)
a | b | c | d | |
---|---|---|---|---|
two | 4 | 5 | 6 | 7 |
three | 8 | 9 | 10 | 11 |
four | 12 | 13 | 14 | 15 |
(data>5).any(0) #轴0方向上,是否有值大于5的列
a True
b True c True d True
dtype: bool
data.loc[:,(data>5).any(0)]#选出值大于5的列(至少有一个值大于5)
a | b | c | d | |
---|---|---|---|---|
one | 0 | 1 | 2 | 3 |
two | 4 | 5 | 6 | 7 |
three | 8 | 9 | 10 | 11 |
four | 12 | 13 | 14 | 15 |