• pandas使用


    import pandas as pd
    import numpy as np
    
    # 读取数据,并以表格的形式显示
    df1 = pd.DataFrame(pd.read_excel('a.xlsx'))
    
    # 判断是否为空,每个表格都会去判断,并返回True/False
    a1 = df1.isnull().values
    """
    [[False False False False False False False False False]
     [False False  True False False False False False False]
     [False False False False False False False False False]
     [False False False False False False False False False]
     [False False False False False  True False False False]
     [False False False False False False False False False]
     [False False False False False False False False False]]
    
    """
    
    # 判断是否为非空,每个表格都会去判断,并返回True/False,会以规则的表格形式返回
    a2 = df1.notnull()
    """
          1     2      3     4     5      6  open_acc  weight  data
    0  True  True   True  True  True   True      True    True  True
    1  True  True  False  True  True   True      True    True  True
    2  True  True   True  True  True   True      True    True  True
    3  True  True   True  True  True   True      True    True  True
    4  True  True   True  True  True  False      True    True  True
    5  True  True   True  True  True   True      True    True  True
    6  True  True   True  True  True   True      True    True  True
    """
    
    # 判断列是否重复,比对每一行,并返回True/False
    b = df1.duplicated()
    """
    0    False
    1    False
    2    False
    3    False
    4    False
    5     True
    6    False
    dtype: bool
    """
    
    # 返回去掉重复行后的表
    c = df1.drop_duplicates()
    """
         1    2    3    4    5    6  open_acc   weight        data
    0   12   12   23   34   56   67         2     12kg  2018/02/26
    1  asd  asd  NaN  aas  qwe  qwe        10   23000g   2018/02/6
    2  123   qw    q    q  qwe  qwe         7  45000mg   2018/5/26
    3    1    2    3    4    5    6        15     123t      2017/9
    4    1    2    3    4    5  NaN        20     456t      18/2/2
    6   12  123    4    3   er   32        12      34g   2015/3/01
    """
    
    # 空值以。。。。方式填充
    d1 = df1.fillna('\')
    """
         1    2   3    4    5    6  open_acc   weight        data
    0   12   12  23   34   56   67         2     12kg  2018/02/26
    1  asd  asd     aas  qwe  qwe        10   23000g   2018/02/6
    2  123   qw   q    q  qwe  qwe         7  45000mg   2018/5/26
    3    1    2   3    4    5    6        15     123t      2017/9
    4    1    2   3    4    5            20     456t      18/2/2
    5    1    2   3    4    5    6        15     123t      2017/9
    6   12  123   4    3   er   32        12      34g   2015/3/01
    """
    
    # 包含空值的行删除
    d2 = df1.dropna()
    """
         1    2   3   4    5    6  open_acc   weight        data
    0   12   12  23  34   56   67         2     12kg  2018/02/26
    2  123   qw   q   q  qwe  qwe         7  45000mg   2018/5/26
    3    1    2   3   4    5    6        15     123t      2017/9
    5    1    2   3   4    5    6        15     123t      2017/9
    6   12  123   4   3   er   32        12      34g   2015/3/01
    """
    
    # 给表更换新的列名,注意字段列表的长度一定要和列的长度相同
    column_names = ['a', 'b', 'c', 'd', 'e', 'f','g','h','i']
    df = pd.read_excel('a.xlsx', names=column_names)
    """
         a    b    c    d    e    f   g        h           i
    0   12   12   23   34   56   67   2     12kg  2018/02/26
    1  asd  asd  NaN  aas  qwe  qwe  10   23000g   2018/02/6
    2  123   qw    q    q  qwe  qwe   7  45000mg   2018/5/26
    3    1    2    3    4    5    6  15     123t      2017/9
    4    1    2    3    4    5  NaN  20     456t      18/2/2
    5    1    2    3    4    5    6  15     123t      2017/9
    6   12  123    4    3   er   32  12      34g   2015/3/01
    """
    
    # 打印第一列
    # print(df1[1])
    """
    0     12
    1    asd
    2    123
    3      1
    4      1
    5      1
    6     12
    Name: 1, dtype: object
    """
    
    # 将列中的12,1替换成'qwe'
    a = df1[1].replace([12, 1], 'qwe')
    """
    0    qwe
    1    qwe
    2    123
    3    qwe
    4    qwe
    5    qwe
    6    qwe
    Name: 1, dtype: object
    """
    
    # 打印数据的前n行,默认为5
    # print(df1.head())
    
    # 给数据分组
    """
    0-5     A  不包含0
    6-10    B
    11-15   C
    16-20   D
    """
    bins = [0, 5, 10, 15, 20]
    group_names = ['A', 'B', 'C', 'D']
    df1['categories'] = pd.cut(df1['open_acc'], bins, labels=group_names)
    """
         1    2    3    4    ...     open_acc   weight        data categories
    0   12   12   23   34    ...            2     12kg  2018/02/26          A
    1    1  asd  NaN  aas    ...           10   23000g   2018/02/6          B
    2  123   qw    q    q    ...            7  45000mg   2018/5/26          B
    3    1    2    3    4    ...           15     123t      2017/9          C
    4    1    2    3    4    ...           20     456t      18/2/2          D
    5    1    2    3    4    ...           15     123t      2017/9          C
    6   12  123    4    3    ...           12      34g   2015/3/01          C
    """
    
    # 获取到需要换算单位的行
    
    rows_with_lbs = df1['weight'].str.contains('mg').fillna(False)
    """
         1   2  3  4    5    6  open_acc   weight       data categories
    2  123  qw  q  q  qwe  qwe         7  45000mg  2018/5/26          B
    """
    
    # 换算单位
    for i, lbs_row in df1[rows_with_lbs].iterrows():
        # print(i) # 获取到值的行
        # print(lbs_row['weight'][-2:])
        weight = int(float(lbs_row['weight'][:-2]) / 1000)
        df1.at[i, 'weight'] = '{}g'.format(weight)
    # print(df1)
    """
         1    2    3    4    ...     open_acc  weight        data categories
    0   12   12   23   34    ...            2    12kg  2018/02/26          A
    1    1  asd  NaN  aas    ...           10  23000g   2018/02/6          B
    2  123   qw    q    q    ...            7     45g   2018/5/26          B
    3    1    2    3    4    ...           15    123t      2017/9          C
    4    1    2    3    4    ...           20    456t      18/2/2          D
    5    1    2    3    4    ...           15    123t      2017/9          C
    6   12  123    4    3    ...           12     34g   2015/3/01          C
    """
    
    # rows_with_lbs = df1['weight'].str.contains('t').fillna(False)
    # for i,lbs_row in df1[rows_with_lbs].iterrows():
    #     # print(i) # 获取到值的行
    #     # print(lbs_row['weight'][:-2])
    #     weight = int(float(lbs_row['weight'][:-2]) * 10000000)
    #     df1.at[i, 'weight'] = '{}g'.format(weight)
    
    # print(df1)
    
    
    # 换算单位封装
    def conversion(DataFrame, column, unit, num, new_unit='', null=False):
        """
    
        :param DataFrame: pandas对象
        :param column: 字段
        :param unit: 当前单位
        :param new_unit: 新单位
        :param num: 当前单位换算成新单位所需要乘的数
        :param null: 是否为空
        :return:pandas对象
    
             1    2    3    4    ...     open_acc     weight        data categories
        0   12   12   23   34    ...            2  1.20e+04g  2018/02/26          A
        1  asd  asd  NaN  aas    ...           10        23g   2018/02/6          B
        2  123   qw    q    q    ...            7        45g   2018/5/26          B
        3    1    2    3    4    ...           15  1.23e+08g      2017/9          C
        4    1    2    3    4    ...           20  4.56e+08g      18/2/2          D
        """
    
    
        rows_with_lbs = DataFrame[column].str.contains(unit).fillna(null)
        column_len = len(unit)
    
        for i, lbs_row in DataFrame[rows_with_lbs].iterrows():
    
            # print(lbs_row['weight'][-column_len:])
            weight = int(float(lbs_row[column][:-column_len]) * num)
    
            # 默认4位,超出长度变为科学计数法
            if len(str(weight)) > 4:
    
                DataFrame.at[i, column] = ('%0.2e' + new_unit) % weight
                """
                     1    2    3    4    ...     open_acc     weight        data categories
                0   12   12   23   34    ...            2  1.20e+04g  2018/02/26          A
                1  asd  asd  NaN  aas    ...           10  2.30e+01g   2018/02/6          B
                2  123   qw    q    q    ...            7  4.50e+01g   2018/5/26          B
                3    1    2    3    4    ...           15  1.23e+08g      2017/9          C
                4    1    2    3    4    ...           20  4.56e+08g      18/2/2          D
                """
            else:
                DataFrame.at[i, column] = str(weight) + new_unit
                """
                     1    2    3    4    ...     open_acc      weight        data categories
                0   12   12   23   34    ...            2      12000g  2018/02/26          A
                1  asd  asd  NaN  aas    ...           10         23g   2018/02/6          B
                2  123   qw    q    q    ...            7         45g   2018/5/26          B
                3    1    2    3    4    ...           15  123000000g      2017/9          C
                4    1    2    3    4    ...           20  456000000g      18/2/2          D
                """
    
        return DataFrame
    
    
    df1 = conversion(df1, 'weight', 'kg', 1000, 'g')  # kg - g
    
    df2 = conversion(df1, 'weight', 'mg', 0.001, 'g')  # mg -g
    
    df3 = conversion(df2, 'weight', 't', 1000000, 'g')  # t - g
    
    print(df3)
    
     
  • 相关阅读:
    如何开发一款浏览器[转]
    粒子滤波实现物体跟踪的算法原理:
    Canny边缘检测(转)
    [转载] 图像处理常用边缘检测算子总结
    [转载]Sobel边缘检测
    Android读取手机联系人信息
    【Android】动态注册广播接收器
    Android对话框的生成
    C语言基础丨(六)程序结构——顺序结构【1】
    C语言丨指针基础知识详解
  • 原文地址:https://www.cnblogs.com/ZSMblog/p/9612465.html
Copyright © 2020-2023  润新知