多個mapreduce連線例項

停不下的腳步發表於2014-12-25

將reduce端連線的Map/Reduce結果作為wordCount 的map輸入源:

package com.mr.multiMapReduce;

import java.io.IOException;

import org.apache.hadoop.examples.WordCount;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.mr.reduceSideJoin.CombineValues;
import com.mr.reduceSideJoin.ReduceSideJoin_LeftOuterJoin;

/*
 * 這個例項不能在eclipse中執行,無法啟動job2,但是放在hadoop環境上是好的。
 * */
public class MultiMapReduce {
	// 啟動函式
	public static void main(String[] args) throws IOException {

		JobConf conf = new JobConf(MultiMapReduce.class);

		// 第一個job的配置
		Job job1 = new Job(conf, "join1");
		job1.setJarByClass(MultiMapReduce.class);

		job1.setMapperClass(ReduceSideJoin_LeftOuterJoin.LeftOutJoinMapper.class);
		job1.setReducerClass(ReduceSideJoin_LeftOuterJoin.LeftOutJoinReducer.class);

		job1.setMapOutputKeyClass(Text.class);// map階段的輸出的key
		job1.setMapOutputValueClass(CombineValues.class);// map階段的輸出的value

		job1.setOutputKeyClass(Text.class);// reduce階段的輸出的key
		job1.setOutputValueClass(Text.class);// reduce階段的輸出的value

		// job1的輸入輸出檔案路徑
		FileInputFormat.addInputPath(job1, new Path(args[0]));
		FileOutputFormat.setOutputPath(job1, new Path(args[1]));

		// 第二個作業的配置
		Job job2 = new Job(conf, "Join2");
		job2.setJarByClass(MultiMapReduce.class);

		job2.setMapperClass(WordCount.TokenizerMapper.class);
		job2.setReducerClass(WordCount.IntSumReducer.class);

		job2.setMapOutputKeyClass(Text.class);// map階段的輸出的key
		job2.setMapOutputValueClass(IntWritable.class);// map階段的輸出的value

		job2.setOutputKeyClass(Text.class);// reduce階段的輸出的key
		job2.setOutputValueClass(IntWritable.class);// reduce階段的輸出的value

		FileInputFormat.addInputPath(job2, new Path(args[1]));
		FileOutputFormat.setOutputPath(job2, new Path(args[2]));

		ControlledJob jobx = new ControlledJob(conf);
		jobx.setJob(job1);
		ControlledJob joby = new ControlledJob(conf);
		joby.setJob(job2);
		joby.addDependingJob(jobx);

		// 主的控制容器,控制上面的總的兩個子作業
		JobControl jobCtrl = new JobControl("myctrl");
		jobCtrl.addJob(jobx);
		jobCtrl.addJob(joby);

		// 線上程啟動,記住一定要有這個
		Thread t = new Thread(jobCtrl);
		t.start();

		while (true) {
			if (jobCtrl.allFinished()) {// 如果作業成功完成,就列印成功作業的資訊
				System.out.println(jobCtrl.getSuccessfulJobList());
				// 等任務執行完刪除第一個job的reduce結果。
				FileSystem fs = FileSystem.get(conf);
				Path path = new Path(new Path(args[1]).toUri());
				boolean bool = fs.deleteOnExit(path);
				if (bool) {
					System.out.println("檔案刪除成功");
				}
				fs.close();
				jobCtrl.stop();
				break;
			}
		}
	}
}


相關文章