join按行索引进行拼接,merge按列所以进行拼接¶
In [1]:
import pandas as pd
import numpy as np
In [2]:
d1 = pd.DataFrame(np.arange(12).reshape(3,4),index=list("ABC"),columns=list("HIJK"))
d2 = pd.DataFrame(np.arange(12).reshape(4,3),index=list("ABCD"),columns=list("KLM"))
print(d1,"
*******************
",d2)
In [3]:
#有相同列名时,需要至少给其中一列添加后缀
print(d1.join(d2,lsuffix="_left",rsuffix="_right"))
In [4]:
print(d2.join(d1,lsuffix="_left",rsuffix="_right"))
In [5]:
d3 = pd.DataFrame(np.zeros(shape=(3,3)),columns=list("HOP"))
d3.loc[2,"H"] = 1
print(d1,"
*******************
",d3)
In [6]:
print(d1.merge(d3,on="H"))
#print(d1.merge(d3))
In [7]:
print(d1.merge(d3,on="H",how="outer"))
In [8]:
print(d1.merge(d3,on="H",how="left"))
In [9]:
print(d1.merge(d3,on="H",how="right"))
In [10]:
print(d2,"
********************
",d3)
print("*"*20)
print(d2.merge(d3,left_on="K",right_on="H"))#没有相同列名时的合并
In [11]:
#有多个列名相同时
d3["I"] = [1.,0.,0.]
print(d1,"
"+"*"*20+"
",d3,"
"+"*"*20)
print(d1.merge(d3),"
"+"*"*20)
print(d1.merge(d3,on="H"))