MR实现多表连接的原理和单表连接时一样的,甚至比单表连接还要简单。
在map阶段只需要根据文件的名称区分左表还是右表。使用关联的字段作为key2。
在reduce中对values中的值分别存储到一个左表list和右表list中。对左表list和右表list进行一个笛卡尔积完事。
1 import java.io.*; 2 import java.util.*; 3 4 import org.apache.hadoop.io.*; 5 import org.apache.hadoop.util.*; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.mapreduce.*; 8 import org.apache.hadoop.mapreduce.lib.input.*; 9 import org.apache.hadoop.mapreduce.lib.output.*; 10 import org.apache.hadoop.conf.*; 11 import org.apache.hadoop.util.Tool; 12 public class MTjoin extends Configured implements Tool { 13 /* 14 * 多表链接,与单表链接思路类似。将关联列作为map的key值,用数字区分左表和右表。在Reduce阶段对两个表进行笛卡尔积 15 * */ 16 public static class Map extends Mapper<LongWritable,Text,Text,Text>{ 17 public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{ 18 String line=value.toString(); 19 int linelen=line.length(); 20 //去除文件首行 21 if(line.indexOf("factoryname")==-1&&line.indexOf("addressID")==-1) 22 { 23 //处理factory数据 24 if(line.charAt(linelen-2)==' ') 25 { 26 String facstr="1"+line.substring(0, linelen-2); 27 String addrestr=String.valueOf(line.charAt(linelen-1)); 28 context.write(new Text(addrestr), new Text(facstr)); 29 }else{ 30 String addreidstr=String.valueOf(line.charAt(0)); 31 String addrenastr="2"+line.substring(1); 32 context.write(new Text(addreidstr), new Text(addrenastr)); 33 } 34 35 } 36 } 37 38 } 39 40 public static class Reduce extends Reducer<Text,Text,Text,Text>{ 41 public void reduce(Text key,Iterable<Text> values,Context context)throws IOException, InterruptedException{ 42 ArrayList<String> facarr=new ArrayList<String>(); 43 ArrayList<String> addarr=new ArrayList<String>(); 44 for(Text var:values){ 45 if(var.toString().charAt(0)=='1') 46 { 47 facarr.add(var.toString().substring(1)); 48 }else if(var.toString().charAt(0)=='2') 49 { 50 addarr.add(var.toString().substring(1)); 51 } 52 53 } 54 if(facarr.size()!=0&&addarr.size()!=0) 55 { 56 for(int i=0;i<facarr.size();i++) 57 { 58 context.write(new Text(facarr.get(i)), new Text(addarr.get(0))); 59 } 60 61 } 62 } 63 } 64 @Override 65 public int run(String[] args) throws Exception { 66 // TODO Auto-generated method stub 67 Configuration conf=new Configuration(); 68 Job job=new Job(conf,"MTjoin"); 69 job.setJarByClass(MTjoin.class); 70 71 job.setOutputKeyClass(Text.class); 72 job.setOutputValueClass(Text.class); 73 74 job.setMapperClass(Map.class); 75 job.setReducerClass(Reduce.class); 76 77 job.setInputFormatClass(TextInputFormat.class); 78 job.setOutputFormatClass(TextOutputFormat.class); 79 80 FileInputFormat.setInputPaths(job, new Path(args[0])); 81 FileOutputFormat.setOutputPath(job, new Path(args[1])); 82 83 boolean success=job.waitForCompletion(true); 84 return success?0:1; 85 } 86 public static void main(String[] args)throws Exception{ 87 int ret=ToolRunner.run(new MTjoin(), args); 88 System.exit(ret); 89 } 90 91 }