1 import org.apache.hadoop.fs.Path; 2 import org.apache.hadoop.io.*; 3 import org.apache.hadoop.mapred.*; 4 5 import java.io.IOException; 6 import java.util.*; 7 8 public class WordCount { 9 10 /* 11 * 實現輸入內容單詞的計數功能 12 * 一、mapper方法將輸入內容處理為<key1,value1>形式 13 * 二、reduce方法接收mapper的結果,將相同key1的value值相加得到單詞的個數 14 * 三、輸出得到的結果到hdfs中 15 * 16 * */ 17 18 //main函式 19 public static void main(String[] args) throws Exception{ 20 JobConf conf=new JobConf(WordCount.class); 21 conf.setJobName("WordCount"); 22 conf.setOutputKeyClass(Text.class); 23 conf.setOutputValueClass(IntWritable.class); 24 25 conf.setMapperClass(Map.class); 26 conf.setReducerClass(Reduce.class); 27 28 conf.setInputFormat(TextInputFormat.class); 29 conf.setOutputFormat(TextOutputFormat.class); 30 31 FileInputFormat.setInputPaths(conf,new Path(args[0])); 32 FileOutputFormat.setOutputPath(conf, new Path(args[1])); 33 34 JobClient.runJob(conf); 35 36 } 37 38 //map函式 39 public static class Map extends MapReduceBase implements Mapper<LongWritable,Text, 40 Text,IntWritable>{ 41 private final static IntWritable one=new IntWritable(1); 42 private Text word=new Text(); 43 44 public void map(LongWritable key,Text value, 45 OutputCollector<Text,IntWritable>output,Reporter reporter)throws IOException{ 46 String line=value.toString(); 47 StringTokenizer tokenizer=new StringTokenizer(line); 48 while(tokenizer.hasMoreTokens()){ 49 word.set(tokenizer.nextToken()); 50 output.collect(word, one); 51 52 } 53 54 } 55 56 } 57 58 //reduce函式 59 public static class Reduce extends MapReduceBase implements Reducer<Text,IntWritable, 60 Text,IntWritable>{ 61 public void reduce(Text key,Iterator<IntWritable>values,OutputCollector<Text, 62 IntWritable>output,Reporter repoter) throws IOException{ 63 int sum=0; 64 while(values.hasNext()){ 65 sum+=values.next().get(); 66 } 67 output.collect(key,new IntWritable(sum)); 68 } 69 } 70 71 72 73 }