对于给定的数据集,进行适当的数据清洗
import pandas as pd data = {'Chinese': [66, 95, 93, 90, 80, 80], 'English': [65, 85, 92, 88, 90, 90], 'Math': [None, 98, 96, 77, 90, 90]} df = pd.DataFrame(data, index=['zhangfei', 'guanyu', 'zhaoyun', 'huangzhong', 'dianwei', 'dianwei'], columns=['English', 'Math', 'Chinese']) print('构建的数据: ',df) #数据清洗 #删除不必要的行 df = df.drop(index=['guanyu']) print('删除后的新数据: ',df) #去重 df = df.drop_duplicates() print('去重后的新数据: ',df) #更改数据格式 df['Math'].astype('str') #列名重命名 print('检查哪列存在空值: ',df.isnull().any()) #重命名 df.rename(columns={'English':'yingyu','Math':'shuxue','Chinese':'yuwen'},inplace=True) print('重命名后的数据: ',df) df['sum1'] = df['yingyu']+df['shuxue']+df['yuwen'] print('增加一列总成绩: ',df)
结果:
构建的数据: English Math Chinese zhangfei 65 NaN 66 guanyu 85 98.0 95 zhaoyun 92 96.0 93 huangzhong 88 77.0 90 dianwei 90 90.0 80 dianwei 90 90.0 80 删除后的新数据: English Math Chinese zhangfei 65 NaN 66 zhaoyun 92 96.0 93 huangzhong 88 77.0 90 dianwei 90 90.0 80 dianwei 90 90.0 80 去重后的新数据: English Math Chinese zhangfei 65 NaN 66 zhaoyun 92 96.0 93 huangzhong 88 77.0 90 dianwei 90 90.0 80 检查哪列存在空值: English False Math True Chinese False dtype: bool 重命名后的数据: yingyu shuxue yuwen zhangfei 65 NaN 66 zhaoyun 92 96.0 93 huangzhong 88 77.0 90 dianwei 90 90.0 80 增加一列总成绩: yingyu shuxue yuwen sum1 zhangfei 65 NaN 66 NaN zhaoyun 92 96.0 93 281.0 huangzhong 88 77.0 90 255.0 dianwei 90 90.0 80 260.0