MR實現多表連線的原理和單表連線時一樣的,甚至比單表連線還要簡單。
在map階段只需要根據檔案的名稱區分左表還是右表。使用關聯的欄位作為key2。
在reduce中對values中的值分別儲存到一個左表list和右表list中。對左表list和右表list進行一個笛卡爾積完事。
1 import java.io.*; 2 import java.util.*; 3 4 import org.apache.hadoop.io.*; 5 import org.apache.hadoop.util.*; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.mapreduce.*; 8 import org.apache.hadoop.mapreduce.lib.input.*; 9 import org.apache.hadoop.mapreduce.lib.output.*; 10 import org.apache.hadoop.conf.*; 11 import org.apache.hadoop.util.Tool; 12 public class MTjoin extends Configured implements Tool { 13 /* 14 * 多表連結,與單錶連結思路類似。將關聯列作為map的key值,用數字區分左表和右表。在Reduce階段對兩個表進行笛卡爾積 15 * */ 16 public static class Map extends Mapper<LongWritable,Text,Text,Text>{ 17 public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{ 18 String line=value.toString(); 19 int linelen=line.length(); 20 //去除檔案首行 21 if(line.indexOf("factoryname")==-1&&line.indexOf("addressID")==-1) 22 { 23 //處理factory資料 24 if(line.charAt(linelen-2)==' ') 25 { 26 String facstr="1"+line.substring(0, linelen-2); 27 String addrestr=String.valueOf(line.charAt(linelen-1)); 28 context.write(new Text(addrestr), new Text(facstr)); 29 }else{ 30 String addreidstr=String.valueOf(line.charAt(0)); 31 String addrenastr="2"+line.substring(1); 32 context.write(new Text(addreidstr), new Text(addrenastr)); 33 } 34 35 } 36 } 37 38 } 39 40 public static class Reduce extends Reducer<Text,Text,Text,Text>{ 41 public void reduce(Text key,Iterable<Text> values,Context context)throws IOException, InterruptedException{ 42 ArrayList<String> facarr=new ArrayList<String>(); 43 ArrayList<String> addarr=new ArrayList<String>(); 44 for(Text var:values){ 45 if(var.toString().charAt(0)=='1') 46 { 47 facarr.add(var.toString().substring(1)); 48 }else if(var.toString().charAt(0)=='2') 49 { 50 addarr.add(var.toString().substring(1)); 51 } 52 53 } 54 if(facarr.size()!=0&&addarr.size()!=0) 55 { 56 for(int i=0;i<facarr.size();i++) 57 { 58 context.write(new Text(facarr.get(i)), new Text(addarr.get(0))); 59 } 60 61 } 62 } 63 } 64 @Override 65 public int run(String[] args) throws Exception { 66 // TODO Auto-generated method stub 67 Configuration conf=new Configuration(); 68 Job job=new Job(conf,"MTjoin"); 69 job.setJarByClass(MTjoin.class); 70 71 job.setOutputKeyClass(Text.class); 72 job.setOutputValueClass(Text.class); 73 74 job.setMapperClass(Map.class); 75 job.setReducerClass(Reduce.class); 76 77 job.setInputFormatClass(TextInputFormat.class); 78 job.setOutputFormatClass(TextOutputFormat.class); 79 80 FileInputFormat.setInputPaths(job, new Path(args[0])); 81 FileOutputFormat.setOutputPath(job, new Path(args[1])); 82 83 boolean success=job.waitForCompletion(true); 84 return success?0:1; 85 } 86 public static void main(String[] args)throws Exception{ 87 int ret=ToolRunner.run(new MTjoin(), args); 88 System.exit(ret); 89 } 90 91 }