一個MapReduce 程式示例 細節決定成敗(一)

self_control發表於2016-05-26
最近在看MapReduce,想起一直都是Copy 然後修改的方法來寫。突然想試試自己動手寫一個及其簡單的mr程式。
細節決定成敗啊,不試不知道,一試才能發現平時注意不到的細節。
下面是我用了很快時間寫好的一個程式,注意,這份是有問題的!

點選(此處)摺疊或開啟

  1. package wordcount;

  2. import java.io.IOException;

  3. import org.apache.commons.lang.StringUtils;
  4. import org.apache.hadoop.conf.Configuration;
  5. import org.apache.hadoop.conf.Configured;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.LongWritable;
  8. import org.apache.hadoop.io.Text;
  9. import org.apache.hadoop.mapreduce.Job;
  10. import org.apache.hadoop.mapreduce.Mapper;
  11. import org.apache.hadoop.mapreduce.Reducer;
  12. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  13. import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
  14. import org.apache.hadoop.util.Tool;
  15. import org.apache.hadoop.util.ToolRunner;
  16. import org.apache.log4j.Logger;

  17. public class MyWordCountJob extends Configured implements Tool {
  18.         Logger log = Logger.getLogger(MyWordCountJob.class);

  19.         public class MyWordCountMapper extends
  20.                         Mapper<LongWritable, Text, LongWritable, Text> {
  21.                 Logger log = Logger.getLogger(MyWordCountJob.class);

  22.                 LongWritable mapKey = new LongWritable();
  23.                 Text mapValue = new Text();
  24.                 @Override
  25.                 protected void map(LongWritable key, Text value, Context context)
  26.                                 throws IOException, InterruptedException {
  27.                         mapKey.set(key.get());
  28.                         mapValue.set(value.toString());
  29.                         log.info("Mapper: mapKey--" + mapKey.get() + "mapValue --"+ mapValue.toString());
  30.                         context.write(mapKey, mapValue);
  31.                 }

  32.         }

  33.         public class MyWordCountReducer extends Reducer<LongWritable, Text, LongWritable, Text> {

  34.                 @Override
  35.                 protected void reduce(LongWritable key, Iterable<Text> values,Context context)
  36.                                 throws IOException, InterruptedException {
  37.                         for(Text value :values)
  38.                                 context.write(key, value);
  39.                 }
  40.         }

  41.         @Override
  42.         public int run(String[] args) throws Exception {
  43.                 log.info("begin to run");
  44.                 Job job = Job.getInstance(getConf(), "MyWordCountJob");
  45.                 job.setJarByClass(MyWordCountJob.class);

  46.                 Path inPath = new Path("demos/pigdemo.txt");
  47.                 Path outPath = new Path("demos/pigdemoOut.txt");

  48.                 outPath.getFileSystem(getConf()).delete(outPath,true);
  49.                 TextInputFormat.setInputPaths(job, inPath);
  50.                 TextOutputFormat.setOutputPath(job, outPath);


  51.                 job.setMapperClass(MyWordCountJob.MyWordCountMapper.class);
  52.                 job.setReducerClass(MyWordCountJob.MyWordCountReducer.class);
  53.                 job.setInputFormatClass(TextInputFormat.class);
  54.                 job.setOutputFormatClass(TextOutputFormat.class);

  55.                 job.setMapOutputKeyClass(LongWritable.class);
  56.                 job.setMapOutputValueClass(Text.class);
  57.                 job.setOutputKeyClass(LongWritable.class);
  58.                 job.setOutputValueClass(Text.class);
  59.                 return job.waitForCompletion(true)?0:1;
  60.         }
  61.         public static void main(String [] args){
  62.                 int result = 0;
  63.                 try {
  64.                         result = ToolRunner.run(new Configuration(), new MyWordCountJob(), args);
  65.                 } catch (Exception e) {
  66.                         e.printStackTrace();
  67.                 }
  68.                 System.exit(result);
  69.         }

  70. }
寫完成編譯,打包然後執行。

點選(此處)摺疊或開啟

  1. 16/05/10 22:43:46 INFO mapreduce.Job: Running job: job_1462517728035_0033
  2. 16/05/10 22:43:54 INFO mapreduce.Job: Job job_1462517728035_0033 running in uber mode : false
  3. 16/05/10 22:43:54 INFO mapreduce.Job: map 0% reduce 0%
  4. 16/05/10 22:43:58 INFO mapreduce.Job: Task Id : attempt_1462517728035_0033_m_000000_0, Status : FAILED
  5. Error: java.lang.RuntimeException: java.lang.NoSuchMethodException: wordcount.MyWordCountJob$MyWordCountMapper.<init>()
  6.         at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:131)
  7.         at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:721)
  8.         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:339)
  9.         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162)
  10.         at java.security.AccessController.doPrivileged(Native Method)
  11.         at javax.security.auth.Subject.doAs(Subject.java:396)
  12.         at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
  13.         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157)
  14. Caused by: java.lang.NoSuchMethodException: wordcount.MyWordCountJob$MyWordCountMapper.<init>()
  15.         at java.lang.Class.getConstructor0(Class.java:2706)
  16.         at java.lang.Class.getDeclaredConstructor(Class.java:1985)
  17.         at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:125)
  18.         ... 7 more

  19. 16/05/10 22:44:02 INFO mapreduce.Job: Task Id : attempt_1462517728035_0033_m_000000_1, Status : FAILED
  20. Error: java.lang.RuntimeException: java.lang.NoSuchMethodException: wordcount.MyWordCountJob$MyWordCountMapper.<init>()
  21.         at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:131)
  22.         at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:721)
  23.         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:339)
  24.         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162)
  25.         at java.security.AccessController.doPrivileged(Native Method)
  26.         at javax.security.auth.Subject.doAs(Subject.java:396)
  27.         at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
  28.         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157)
  29. Caused by: java.lang.NoSuchMethodException: wordcount.MyWordCountJob$MyWordCountMapper.<init>()
  30.         at java.lang.Class.getConstructor0(Class.java:2706)
  31.         at java.lang.Class.getDeclaredConstructor(Class.java:1985)
  32.         at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:125)
  33.         ... 7 more

  34. 16/05/10 22:44:07 INFO mapreduce.Job: Task Id : attempt_1462517728035_0033_m_000000_2, Status : FAILED
  35. Error: java.lang.RuntimeException: java.lang.NoSuchMethodException: wordcount.MyWordCountJob$MyWordCountMapper.<init>()
  36.         at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:131)
  37.         at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:721)
  38.         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:339)
  39.         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162)
  40.         at java.security.AccessController.doPrivileged(Native Method)
  41.         at javax.security.auth.Subject.doAs(Subject.java:396)
  42.         at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
  43.         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157)
  44. Caused by: java.lang.NoSuchMethodException: wordcount.MyWordCountJob$MyWordCountMapper.<init>()
  45.         at java.lang.Class.getConstructor0(Class.java:2706)
  46.         at java.lang.Class.getDeclaredConstructor(Class.java:1985)
  47.         at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:125)
  48.         ... 7 more

  49. 16/05/10 22:44:14 INFO mapreduce.Job: map 100% reduce 100%
  50. 16/05/10 22:44:14 INFO mapreduce.Job: Job job_1462517728035_0033 failed with state FAILED due to: Task failed task_1462517728035_0033_m_000000
  51. Job failed as tasks failed. failedMaps:1 failedReduces:0

  52. 16/05/10 22:44:15 INFO mapreduce.Job: Counters: 6
  53.         Job Counters
  54.                 Failed map tasks=4
  55.                 Launched map tasks=4
  56.                 Other local map tasks=3
  57.                 Data-local map tasks=1
  58.                 Total time spent by all maps in occupied slots (ms)=99584
  59.                 Total time spent by all reduces in occupied slots (ms)=0

上面的問題百思不得甚解,完全不知道什麼地方錯了。
然後跟之前copy的程式碼進行比對。終於找出了問題所在!
注意Mapper 與 Reducer 類寫成內部類,一定要加static !!!!
留個小任務,檢視一下生成的結果檔案可以發現什麼?
使用TextInputFormat時,進入map 函式中的LongWritable型別的key 代表什麼?
經實驗確認這個key 其實是本行的首字元在整個檔案中的偏移量。

下一篇中介紹瞭如何檢視執行日誌,通過不斷改進一個mapreduce 任務學習hadoop 
一個MapReduce 程式示例 細節決定成敗(二) :觀察日誌及 Counter 

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/30066956/viewspace-2107549/,如需轉載,請註明出處,否則將追究法律責任。

相關文章