• Python数据分析与机器学习-Pandas_2


    import pandas
    food_info = pandas.read_csv("food_info.csv")
    col_names = food_info.columns.tolist()
    print(col_names)
    print(food_info.head(3))
    
    ['NDB_No', 'Shrt_Desc', 'Water_(g)', 'Energ_Kcal', 'Protein_(g)', 'Lipid_Tot_(g)', 'Ash_(g)', 'Carbohydrt_(g)', 'Fiber_TD_(g)', 'Sugar_Tot_(g)', 'Calcium_(mg)', 'Iron_(mg)', 'Magnesium_(mg)', 'Phosphorus_(mg)', 'Potassium_(mg)', 'Sodium_(mg)', 'Zinc_(mg)', 'Copper_(mg)', 'Manganese_(mg)', 'Selenium_(mcg)', 'Vit_C_(mg)', 'Thiamin_(mg)', 'Riboflavin_(mg)', 'Niacin_(mg)', 'Vit_B6_(mg)', 'Vit_B12_(mcg)', 'Vit_A_IU', 'Vit_A_RAE', 'Vit_E_(mg)', 'Vit_D_mcg', 'Vit_D_IU', 'Vit_K_(mcg)', 'FA_Sat_(g)', 'FA_Mono_(g)', 'FA_Poly_(g)', 'Cholestrl_(mg)']
       NDB_No                 Shrt_Desc  Water_(g)  Energ_Kcal  Protein_(g)  
    0    1001          BUTTER WITH SALT      15.87         717         0.85   
    1    1002  BUTTER WHIPPED WITH SALT      15.87         717         0.85   
    2    1003      BUTTER OIL ANHYDROUS       0.24         876         0.28   
    
       Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  Fiber_TD_(g)  Sugar_Tot_(g)  ...  
    0          81.11     2.11            0.06           0.0           0.06  ...   
    1          81.11     2.11            0.06           0.0           0.06  ...   
    2          99.48     0.00            0.00           0.0           0.00  ...   
    
       Vit_A_IU  Vit_A_RAE  Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  Vit_K_(mcg)  
    0    2499.0      684.0        2.32        1.5      60.0          7.0   
    1    2499.0      684.0        2.32        1.5      60.0          7.0   
    2    3069.0      840.0        2.80        1.8      73.0          8.6   
    
       FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  Cholestrl_(mg)  
    0      51.368       21.021        3.043           215.0  
    1      50.489       23.426        3.012           219.0  
    2      61.924       28.732        3.694           256.0  
    
    [3 rows x 36 columns]
    
    # print(food_info["Iron_(mg)"])
    col = ["Iron_(mg)"]
    # print(food_info[col])
    div_1000 = food_info["Iron_(mg)"]/1000
    print(div_1000)
    
    0       0.00002
    1       0.00016
    2       0.00000
    3       0.00031
    4       0.00043
    5       0.00050
    6       0.00033
    7       0.00064
    8       0.00016
    9       0.00021
    10      0.00076
    11      0.00007
    12      0.00016
    13      0.00015
    14      0.00013
    15      0.00014
    16      0.00038
    17      0.00044
    18      0.00065
    19      0.00023
    20      0.00052
    21      0.00024
    22      0.00017
    23      0.00013
    24      0.00072
    25      0.00044
    26      0.00020
    27      0.00022
    28      0.00023
    29      0.00041
             ...   
    8588    0.00900
    8589    0.00030
    8590    0.00010
    8591    0.00163
    8592    0.03482
    8593    0.00228
    8594    0.00017
    8595    0.00017
    8596    0.00486
    8597    0.00025
    8598    0.00023
    8599    0.00013
    8600    0.00011
    8601    0.00068
    8602    0.00783
    8603    0.00311
    8604    0.00030
    8605    0.00018
    8606    0.00080
    8607    0.00004
    8608    0.00387
    8609    0.00005
    8610    0.00038
    8611    0.00520
    8612    0.00150
    8613    0.00140
    8614    0.00058
    8615    0.00360
    8616    0.00350
    8617    0.00140
    Name: Iron_(mg), Length: 8618, dtype: float64
    
    # It applies the arithmetic operator to the first value in both columns, the second value in both columns,and so on
    water_energy = food_info["Water_(g)"]*food_info["Energ_Kcal"]
    water_energy = food_info["Water_(g)"]*food_info["Energ_Kcal"]
    iron_grams = food_info["Iron_(mg)"]/1000
    print(food_info.shape)
    food_info["Iron_(g)"] = iron_grams
    print(food_info.shape)
    
    (8618, 36)
    (8618, 37)
    
    # The largest value in the "Energ_Kcal" column.
    max_calories = food_info["Energ_Kcal"].max()
    normalized_calories = food_info["Energ_Kcal"]/max_calories
    food_info["Normalized_calories"] = normalized_calories
    print(food_info.shape)
    
    (8618, 38)
    
    # By default, pandas will sort the data by the column we specify in ascending order and return a new DataFrame
    # Sorts the DataFrame in-place, rather tahn returning a new DataFrame
    # print(food_info["Sodium_(mg)"])
    food_info.sort_values("Sodium_(mg)",inplace=True)
    print(food_info["Sodium_(mg)"])
    food_info.sort_values("Sodium_(mg)",inplace=True,ascending=False)
    print(food_info["Sodium_(mg)"])
    
    760     0.0
    610     0.0
    611     0.0
    8387    0.0
    8607    0.0
    629     0.0
    630     0.0
    631     0.0
    6470    0.0
    654     0.0
    8599    0.0
    633     0.0
    634     0.0
    635     0.0
    637     0.0
    638     0.0
    639     0.0
    646     0.0
    653     0.0
    632     0.0
    606     0.0
    6463    0.0
    655     0.0
    673     0.0
    658     0.0
    3636    0.0
    659     0.0
    660     0.0
    661     0.0
    3663    0.0
           ... 
    8153    NaN
    8155    NaN
    8156    NaN
    8157    NaN
    8158    NaN
    8159    NaN
    8160    NaN
    8161    NaN
    8163    NaN
    8164    NaN
    8165    NaN
    8167    NaN
    8169    NaN
    8170    NaN
    8172    NaN
    8173    NaN
    8174    NaN
    8175    NaN
    8176    NaN
    8177    NaN
    8178    NaN
    8179    NaN
    8180    NaN
    8181    NaN
    8183    NaN
    8184    NaN
    8185    NaN
    8195    NaN
    8251    NaN
    8267    NaN
    Name: Sodium_(mg), Length: 8618, dtype: float64
    276     38758.0
    5814    27360.0
    6192    26050.0
    1242    26000.0
    1245    24000.0
    1243    24000.0
    1244    23875.0
    292     17000.0
    1254    11588.0
    5811    10600.0
    8575     9690.0
    291      8068.0
    1249     8031.0
    5812     7893.0
    1292     7851.0
    293      7203.0
    4472     7027.0
    4836     6820.0
    1261     6580.0
    3747     6008.0
    1266     5730.0
    4835     5586.0
    4834     5493.0
    1263     5356.0
    1553     5203.0
    1552     5053.0
    1251     4957.0
    1257     4843.0
    294      4616.0
    8613     4450.0
             ...   
    8153        NaN
    8155        NaN
    8156        NaN
    8157        NaN
    8158        NaN
    8159        NaN
    8160        NaN
    8161        NaN
    8163        NaN
    8164        NaN
    8165        NaN
    8167        NaN
    8169        NaN
    8170        NaN
    8172        NaN
    8173        NaN
    8174        NaN
    8175        NaN
    8176        NaN
    8177        NaN
    8178        NaN
    8179        NaN
    8180        NaN
    8181        NaN
    8183        NaN
    8184        NaN
    8185        NaN
    8195        NaN
    8251        NaN
    8267        NaN
    Name: Sodium_(mg), Length: 8618, dtype: float64
  • 相关阅读:
    数据分析 第五篇:离群点检测
    linux lsof命令详解
    ES基本查询语句教程
    Swagger详解(SpringBoot+Swagger集成)
    Elasticsearch5.0 安装问题集锦
    kafka auto.offset.reset latest earliest 详解
    干货 | Elasticsearch多表关联设计指南
    kafka auto.offset.reset latest earliest 详解
    linux sed命令
    Idea中如何使用debug操作
  • 原文地址:https://www.cnblogs.com/SweetZxl/p/11124195.html
Copyright © 2020-2023  润新知