# concat
import numpy as np import pandas as pd from pandas import Series,DataFrame df1 = DataFrame(data=np.random.randint(0,100,size=(3,3)),index=['a','b','c'],columns=['A','B','C']) df2 = DataFrame(data=np.random.randint(0,100,size=(3,3)),index=['a','e','c'],columns=['A','E','C']) pd.concat((df1,df1),axis=0,join='inner') #列 outer # concat 匹配级联 不匹配级联 pd.concat((df1,df2),axis=0,join='inner') #outer 用的多
df1.append(df2) #在后面追加 默认在列上
级联<表表横纵的拼接>/合并merge的区别<数据的合并>
# merge
一对一 合并 import numpy as np from pandas import DataFrame,Series import pandas as pd df1 = DataFrame({'employee':['Bob','Jake','Lisa'], 'group':['Accounting','Engineering','Engineering'], }) df2 = DataFrame({'employee':['Lisa','Bob','Jake'], 'hire_date':[2004,2008,2012], }) pd.merge(df2,df1) #默认inner outer显示全数据 #left, right, how='inner', on条件, left_on=None, right_on=None, left_index=False, right_index=False
多对一合并 df3 = DataFrame({ 'employee':['Lisa','Jake'], 'group':['Accounting','Engineering'], 'hire_date':[2004,2016]}) df4 = DataFrame({'group':['Accounting','Engineering','Engineering'], 'supervisor':['Carly','Guido','Steve'] }) pd.merge(df3,df4,'outer')
多对多合并 df1 = DataFrame({'employee':['Bob','Jake','Lisa'], 'group':['Accounting','Engineering','Engineering']}) df5 = DataFrame({'group':['Engineering','Engineering','HR'], 'supervisor':['Carly','Guido','Steve'] }) pd.merge(df1,df5,'right') #on条件 suffixes=('_x', '_y')指定冲突列名
#inner outer左右表数据的完整性
#left right 区别
加载excl数据: pd.read_excel('excl_path',sheetname=1)
key的规范化 当列冲突时,即有多个列名称相同时,需要使用on=来指定哪一个列作为key,配合suffixes指定冲突列名 df1 = DataFrame({'employee':['Jack',"Summer","Steve"], 'group':['Accounting','Finance','Marketing']}) df2 = DataFrame({'employee':['Jack','Bob',"Jake"], 'hire_date':[2003,2009,2012], 'group':['Accounting','sell','ceo']}) pd.merge(df1,df2,'outer','group',suffixes=('_1', '_2')) 当两张表没有可进行连接的列时,可使用left_on和right_on手动指定merge中左右两边的哪一列列作为连接的列 pd.merge(df1,df5,'outer',left_on='employee',right_on='name',) #有NaN pd.merge(df1,df5,'inner',left_on='employee',right_on='name',) #
内合并inner交集 只保留两者都有的key(默认模式)
外合并outer并集 how='outer' 补NaN