hadoop的mapreduce串聯執行
import java.io.IOException; import java.util.Iterator; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob; import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class PickMain { private static final Log LOG = LogFactory.getLog(PickMain.class); public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { /* * Configuration conf = new Configuration(); Job job1 = Job.getInstance(conf); job1.setJarByClass(PickMain.class); job1.setMapperClass(FindMapper.class); job1.setReducerClass(FindReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1])); boolean flag1 = job1.waitForCompletion(true); //下面這種方法也可以實現串聯執行job if(flag1) { Job job2 = Job.getInstance(conf); job2.setJarByClass(PickMain.class); job2.setMapperClass(SecondFindMapper.class); job2.setReducerClass(SecondFindReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job2, new Path(args[1])); FileOutputFormat.setOutputPath(job2, new Path(args[2])); boolean flag2 = job2.waitForCompletion(true); System.out.println(flag2?0:1); if(flag2) { LOG.info("The job is done!"); System.exit(0); }else { LOG.info("The Second job is wrong!"); System.exit(1); } }else { LOG.info("The firt job is Running Wrong job break!"); System.exit(1); } */ //下面透過使用ContolledJob和JobControl來實現提交多個作業 Configuration conf = new Configuration(); Job job1 = Job.getInstance(conf); job1.setJarByClass(PickMain.class); job1.setMapperClass(FindMapper.class); job1.setReducerClass(FindReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1])); Configuration conf2 = new Configuration(); Job job2 = Job.getInstance(conf2); job2.setJarByClass(PickMain.class); job2.setMapperClass(SecondFindMapper.class); job2.setReducerClass(SecondFindReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job2, new Path(args[1])); FileOutputFormat.setOutputPath(job2, new Path(args[2])); //建立ControlledJob對job進行包裝 ControlledJob cjob1 = new ControlledJob(conf); ControlledJob cjob2 = new ControlledJob(conf2); cjob1.setJob(job1); cjob2.setJob(job2); //設定依賴關係,這個時候只有等到job1執行完成後job2才會執行 cjob2.addDependingJob(cjob1); //JobControl該類相當於一個job控制器,它是一個執行緒,需要透過執行緒啟動 JobControl jc = new JobControl("my_jobcontrol"); jc.addJob(cjob1); jc.addJob(cjob2); Thread th = new Thread(jc); th.start(); //等到所有的job都執行完成後在退出 while(!jc.allFinished()) { Thread.sleep(5000); } System.exit(0); } } class FindMapper extends Mapper<LongWritable, Text, Text, Text>{ Text m1 = new Text(); Text m2 = new Text(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { String line = value.toString(); String[] tmp1 = line.split(":"); String outval = tmp1[0]; String[] outkeys = tmp1[1].split(","); for(int i = 0 ; i<outkeys.length;i++) { m1.set(outkeys[i]);m2.set(outval); context.write(m1,m2); } } } class FindReducer extends Reducer<Text, Text, Text, NullWritable>{ StringBuilder sb = new StringBuilder(); NullWritable nul = NullWritable.get(); Text outval = new Text(); String spector = ":"; @Override protected void reduce(Text txt, Iterable<Text> txtiter, Reducer<Text, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException { sb.delete(0, sb.length()); sb.append(txt.toString()); Iterator<Text> it = txtiter.iterator(); while(it.hasNext()) { sb.append(spector+it.next().toString()); } outval.set(sb.toString()); context.write(outval, nul); } } class SecondFindMapper extends Mapper<LongWritable, Text, Text, Text>{ Text keyout = new Text(); Text valueout = new Text(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { String[] fs = value.toString().split(":"); valueout.set(fs[0]); if(fs.length>0) { for(int i = 1;i<fs.length-1;i++) { for(int j = i+1;j<fs.length;j++) { if((int)fs[i].toCharArray()[0]>(int)fs[j].toCharArray()[0]) { keyout.set(fs[j]+"-"+fs[i]); }else { keyout.set(fs[i]+"-"+fs[j]); } context.write(keyout, valueout); } } } } } class SecondFindReducer extends Reducer<Text, Text, Text, Text>{ StringBuilder sb = new StringBuilder(); Text outvalue = new Text(); @Override protected void reduce(Text key, Iterable<Text> iter, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { sb.delete(0, sb.length()); Iterator<Text> it = iter.iterator(); if(it.hasNext()) { sb.append(it.next().toString()); } while(it.hasNext()) { sb.append(","+it.next().toString()); } outvalue.set(sb.toString()); context.write(key, outvalue); } }
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/31506529/viewspace-2213390/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- 高可用Hadoop平臺-執行MapReduce程式Hadoop
- Hadoop1.x MapReduce 程式執行的詳細過程Hadoop
- MapReduce執行流程
- MapReduce的執行流程概述
- 在Docker容器中使用Hadoop執行Python MapReduce作業DockerHadoopPython
- hadoop之 node manager起不來, 執行mapreduce 程式hang住Hadoop
- MapReduce程式執行流程
- MapReduce&&HadoopHadoop
- windows下使用Eclipse編譯執行MapReduce程式 Hadoop2.6.0/UbuntuWindowsEclipse編譯HadoopUbuntu
- 使用MapReduce執行WordCount案例
- MapReduce 示例:減少 Hadoop MapReduce 中的側連線Hadoop
- mapreduce的一般執行步驟
- Mapreduce(二):MR的執行過程分析
- Hadoop學習——MapReduceHadoop
- hadoop_MapReduce yarnHadoopYarn
- Hadoop(十四)MapReduce概述Hadoop
- MapReduce 執行全過程解析
- Hadoop面試題之MapReduceHadoop面試題
- [hadoop]mapreduce原理簡述Hadoop
- 第一章:Hadoop生態系統及執行MapReduce任務介紹!Hadoop
- 【Hadoop】HDFS的執行原理Hadoop
- Hadoop1.x MapReduce的Slot的理解Hadoop
- Hadoop 專欄 - MapReduce 入門Hadoop
- hadoop MapReduce 三種連線Hadoop
- MapReduce V2---Yarn的架構及其執行原理Yarn架構
- Hadoop MapReduce進階 使用分散式快取進行replicated joinHadoop分散式快取
- MapReduce如何作為Yarn應用程式執行?Yarn
- Hadoop面試題總結(三)——MapReduceHadoop面試題
- Hadoop學習之YARN及MapReduceHadoopYarn
- 使用hadoop mapreduce分析mongodb資料HadoopMongoDB
- Hadoop 新 MapReduce 框架 Yarn 詳解Hadoop框架Yarn
- Hadoop的mapreduce出現問題,報錯The auxService:mapreduce_shuffle does not existHadoopUX
- Hadoop-叢集執行Hadoop
- Hadoop MapReduce Job提交後的互動日誌Hadoop
- Hadoop的Server及其執行緒模型分析HadoopServer執行緒模型
- Hadoop 的 Server 及其執行緒模型分析HadoopServer執行緒模型
- Hadoop學習(二)——MapReduce\Yarn架構HadoopYarn架構
- 從分治演算法到 Hadoop MapReduce演算法Hadoop