• pandas-15 df['one_col'].apply()方法的用法


    pandas-15 df['one_col'].apply()方法的用法

    apply有点像map的用法,可以传入一个函数。
    如:df[‘A’].apply(str.upper)

    import numpy as np
    import pandas as pd
    from pandas import Series, DataFrame
    
    df = pd.read_csv('apply_demo.csv').head()
    print(df.size)  # .size 如果是series返回行数,如果是dataframe返回行数乘以列数
    print(df)
    ''' 原始数据
             time                                data
    0  1473411962   Symbol: APPL Seqno: 0 Price: 1623
    1  1473411962   Symbol: APPL Seqno: 0 Price: 1623
    2  1473411963   Symbol: APPL Seqno: 0 Price: 1623
    3  1473411963   Symbol: APPL Seqno: 0 Price: 1623
    4  1473411963   Symbol: APPL Seqno: 1 Price: 1649
    '''
    s1 = Series(['a'] * 5)
    df['A'] = s1
    print(df)
    '''
             time                                data  A
    0  1473411962   Symbol: APPL Seqno: 0 Price: 1623  a
    1  1473411962   Symbol: APPL Seqno: 0 Price: 1623  a
    2  1473411963   Symbol: APPL Seqno: 0 Price: 1623  a
    3  1473411963   Symbol: APPL Seqno: 0 Price: 1623  a
    4  1473411963   Symbol: APPL Seqno: 1 Price: 1649  a
    '''
    
    df['A'] = df['A'].apply(str.upper)
    print(df)
    '''
             time                                data  A
    0  1473411962   Symbol: APPL Seqno: 0 Price: 1623  A
    1  1473411962   Symbol: APPL Seqno: 0 Price: 1623  A
    2  1473411963   Symbol: APPL Seqno: 0 Price: 1623  A
    3  1473411963   Symbol: APPL Seqno: 0 Price: 1623  A
    4  1473411963   Symbol: APPL Seqno: 1 Price: 1649  A
    '''
    
    l = df['data'][0].strip().split(' ')
    print(l) # ['Symbol:', 'APPL', 'Seqno:', '0', 'Price:', '1623']
    
    def foo(line):
        items = line.strip().split(' ')
        return Series([items[1], items[3], items[5]])
    
    df_tmp = df['data'].apply(foo)
    print(df_tmp)
    '''
          0  1     2
    0  APPL  0  1623
    1  APPL  0  1623
    2  APPL  0  1623
    3  APPL  0  1623
    4  APPL  1  1649
    '''
    df_tmp = df_tmp.rename(columns = {0:'Symbol', 1:'Seqno', 2:'Price'})
    print(df_tmp)
    '''
      Symbol Seqno Price
    0   APPL     0  1623
    1   APPL     0  1623
    2   APPL     0  1623
    3   APPL     0  1623
    4   APPL     1  1649
    '''
    
    print(df.combine_first(df_tmp).drop(['data', 'A'], axis=1))
    '''
       Price  Seqno Symbol        time
    0  1623.0    0.0   APPL  1473411962
    1  1623.0    0.0   APPL  1473411962
    2  1623.0    0.0   APPL  1473411963
    3  1623.0    0.0   APPL  1473411963
    4  1649.0    1.0   APPL  1473411963
    '''
    df.combine_first(df_tmp).drop(['data', 'A'], axis=1).to_csv('./demo_duplicate.csv', index=False)
    
  • 相关阅读:
    Java学习十八
    Java学习十七
    Java学习十六
    毕设进度01
    Java学习十五
    Java学习十四
    Java学习十三
    爬虫基础三
    随笔
    火车车厢重排问题--队列模拟
  • 原文地址:https://www.cnblogs.com/wenqiangit/p/11252800.html
Copyright © 2020-2023  润新知