1-20
import pandas as pd
import numpy as np
1.将下面的字典创建为DataFrame
data = {"grammer":["Python","C","Java","GO",np.nan,"SQL","PHP","Python"],
"score":[1,2,np.nan,4,5,6,7,10]}
df = pd.DataFrame(data)
df
2.提取含有字符串"Python"的行
#方法一
df[df['grammer'] == 'Python']
#方法二
results = df['grammer'].str.contains("Python")
results.fillna(value=False,inplace = True)
df[results]
3.输出df的所有列名
4.修改第二列列名为'popularity'
df.rename(columns={'score':'popularity'}, inplace = True)
df
5.统计grammer列中每种编程语言出现的次数
df['grammer'].value_counts()
6.将空值用上下值的平均值填充
df['popularity'] = df['popularity'].fillna(df['popularity'].interpolate())
df
7.提取popularity列中值大于3的行
8.按照grammer列进行去除重复值
df.drop_duplicates(['grammer'])
9.计算popularity列平均值
10.将grammer列转换为list
11.将DataFrame保存为EXCEL
12.查看数据行列数
13.提取popularity列值大于3小于7的行
df[(df['popularity'] > 3) & (df['popularity'] < 7)]
14.交换两列位置
'''
方法1
'''
temp = df['popularity']
df.drop(labels=['popularity'], axis=1,inplace = True)
df.insert(0, 'popularity', temp)
df
'''
方法2
cols = df.columns[[1,0]]
df = df[cols]
df
'''
15.提取popularity列最大值所在行
df[df['popularity'] == df['popularity'].max()]
16.查看最后5行数据
17.删除最后一行数据
df.drop([len(df)-1],inplace=True)
df
18.添加一行数据['Perl',6.6]
row={'grammer':'Perl','popularity':6.6}
df = df.append(row,ignore_index=True)
df
19.对数据按照"popularity"列值的大小进行排序
df.sort_values("popularity",inplace=True)
df
20.统计grammer列每个字符串的长度
df['grammer'] = df['grammer'].fillna('R')
df['len_str'] = df['grammer'].map(lambda x: len(x))
df