• map-apply-applymap


    In [1]:
    import warnings
    import math
    import pandas as pd
    import numpy as np
    import matplotlib
    
    warnings.filterwarnings('ignore')
    pd.options.display.max_rows = 100
    pd.options.display.max_columns = 100
    pd.set_option('max_colwidth', 500)
    
    get_ipython().magic(u'matplotlib inline')
    matplotlib.style.use('ggplot')
    
    from matplotlib import pyplot as plt
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    
    myfont = matplotlib.font_manager.FontProperties(fname=u'simsun.ttc', size=14)
    
    In [11]:
    data = pd.DataFrame({
        'age' : np.random.randint(15, 100, 100),
        'height':np.random.randint(140, 180, 100),
        'weight':np.random.randint(40, 80, 100),
        'gender':np.random.randint(0,2, 100),
        'salary':np.random.randint(3000, 30000, 100)
    })
    data.head()
    
    Out[11]:
     
     ageheightweightgendersalary
    0 68 140 58 0 15988
    1 22 160 59 0 5807
    2 90 160 60 0 21741
    3 71 178 45 0 14133
    4 61 165 59 1 14346
    In [12]:
    # series map:值转换
    data['gender'] = data.gender.map({0:'man', 1:'women'})
    data.head()
    
    Out[12]:
     
     ageheightweightgendersalary
    0 68 140 58 man 15988
    1 22 160 59 man 5807
    2 90 160 60 man 21741
    3 71 178 45 man 14133
    4 61 165 59 women 14346
    In [14]:
    # series map: 也可以使用字符串format
    data.gender.map('i am a {} '.format).head()
    
    Out[14]:
    0      i am a man 
    1      i am a man 
    2      i am a man 
    3      i am a man 
    4    i am a women 
    Name: gender, dtype: object
    In [19]:
    # series apply: 对序列的每个值做相应操作
    data.salary = data.salary.apply(lambda x: x* 1.3)
    data.head()
    
    Out[19]:
     
     ageheightweightgendersalary
    0 68 140 58 man 35125.636
    1 22 160 59 man 12757.979
    2 90 160 60 man 47764.977
    3 71 178 45 man 31050.201
    4 61 165 59 women 31518.162
    In [33]:
    # series apply: 对序列的每个值做相应操作,可传递复杂参数和函数
    def fun1(x, bias):
        return x * bias
    data.salary = data.salary.apply(fun1, args=(1.3,))
    data.head()
    
    Out[33]:
     
     ageheightweightgendersalary
    0 68 140 58 man 45663.3268
    1 22 160 59 man 16585.3727
    2 90 160 60 man 62094.4701
    3 71 178 45 man 40365.2613
    4 61 165 59 women 40973.6106
    In [36]:
    # series apply: 对序列的每个值做相应操作,可传递复杂参数和函数
    def fun2(x, **kwargs):
        for month in kwargs:
            x += kwargs[month] * 100
        return x 
    data.salary = data.salary.apply(fun2, june=30, july=20, august=25)
    data.head()
    
    Out[36]:
     
     ageheightweightgendersalary
    0 68 140 58 man 53238.3268
    1 22 160 59 man 24160.3727
    2 90 160 60 man 69669.4701
    3 71 178 45 man 47940.2613
    4 61 165 59 women 48548.6106
    In [28]:
    # dataframe apply:根据函数对行或者列应用对应操作
    data[['age','height', 'weight']].apply(np.sqrt).head()
    
    Out[28]:
     
     ageheightweight
    0 8.246211 11.832160 7.615773
    1 4.690416 12.649111 7.681146
    2 9.486833 12.649111 7.745967
    3 8.426150 13.341664 6.708204
    4 7.810250 12.845233 7.681146
    In [37]:
    # dataframe apply:根据函数对行或者列应用对应操作
    data[['age','height', 'weight']].apply(np.sum).head()
    
    Out[37]:
    age        5818
    height    16225
    weight     6081
    dtype: int64
    In [29]:
    # dataframe apply:根据函数对行应用对应操作,做比较复杂计算
    def BMI(series):
        height = series['height']/100.0
        weight = series['weight']
        return weight / height ** 2
    
    data.apply(BMI, axis = 1).head()
    
    Out[29]:
    0    29.591837
    1    23.046875
    2    23.437500
    3    14.202752
    4    21.671258
    dtype: float64
    In [43]:
    data[['age','height', 'weight']].head()
    
    Out[43]:
     
     ageheightweight
    0 68 140 58
    1 22 160 59
    2 90 160 60
    3 71 178 45
    4 61 165 59
    In [44]:
    # dataframe applymap:对每个元素操作
    data[['age','height', 'weight']].applymap(lambda x:x-2).head()
    
    Out[44]:
     
     ageheightweight
    0 66 138 56
    1 20 158 57
    2 88 158 58
    3 69 176 43
    4 59 163 57
  • 相关阅读:
    鹅厂女专家:用“爱折腾”实现跨界之美
    基于腾讯云的视频聊天研究
    iOS微信内存监控
    2017年数据库技术盘点
    如何做好游戏内实时语音体验
    腾讯云微计算实践:从Serverless说起,谈谈边缘计算的未来
    使用腾讯云“自定义监控”监控GPU使用率
    如何在Python中从零开始实现随机森林
    DataGridView 设置某个列为只能为数字
    Ieditor
  • 原文地址:https://www.cnblogs.com/qwj-sysu/p/12303097.html
Copyright © 2020-2023  润新知