In [1]:
import warnings
import math
import pandas as pd
import numpy as np
import matplotlib
warnings.filterwarnings('ignore')
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100
pd.set_option('max_colwidth', 500)
get_ipython().magic(u'matplotlib inline')
matplotlib.style.use('ggplot')
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
myfont = matplotlib.font_manager.FontProperties(fname=u'simsun.ttc', size=14)
In [11]:
data = pd.DataFrame({
'age' : np.random.randint(15, 100, 100),
'height':np.random.randint(140, 180, 100),
'weight':np.random.randint(40, 80, 100),
'gender':np.random.randint(0,2, 100),
'salary':np.random.randint(3000, 30000, 100)
})
data.head()
Out[11]:
In [12]:
# series map:值转换
data['gender'] = data.gender.map({0:'man', 1:'women'})
data.head()
Out[12]:
In [14]:
# series map: 也可以使用字符串format
data.gender.map('i am a {} '.format).head()
Out[14]:
In [19]:
# series apply: 对序列的每个值做相应操作
data.salary = data.salary.apply(lambda x: x* 1.3)
data.head()
Out[19]:
In [33]:
# series apply: 对序列的每个值做相应操作,可传递复杂参数和函数
def fun1(x, bias):
return x * bias
data.salary = data.salary.apply(fun1, args=(1.3,))
data.head()
Out[33]:
In [36]:
# series apply: 对序列的每个值做相应操作,可传递复杂参数和函数
def fun2(x, **kwargs):
for month in kwargs:
x += kwargs[month] * 100
return x
data.salary = data.salary.apply(fun2, june=30, july=20, august=25)
data.head()
Out[36]:
In [28]:
# dataframe apply:根据函数对行或者列应用对应操作
data[['age','height', 'weight']].apply(np.sqrt).head()
Out[28]:
In [37]:
# dataframe apply:根据函数对行或者列应用对应操作
data[['age','height', 'weight']].apply(np.sum).head()
Out[37]:
In [29]:
# dataframe apply:根据函数对行应用对应操作,做比较复杂计算
def BMI(series):
height = series['height']/100.0
weight = series['weight']
return weight / height ** 2
data.apply(BMI, axis = 1).head()
Out[29]:
In [43]:
data[['age','height', 'weight']].head()
Out[43]:
In [44]:
# dataframe applymap:对每个元素操作
data[['age','height', 'weight']].applymap(lambda x:x-2).head()
Out[44]: