import org.apache.hadoop.conf.{Configuration, Configured}; import org.apache.hadoop.util.{ToolRunner, Tool}; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.io.{LongWritable, Text, IntWritable}; import org.apache.hadoop.mapreduce.{Reducer, Mapper, Job}; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; /** * Created with IntelliJ IDEA. * User: riley * Date: 8/26/13 * Time: 1:58 PM */ object WordCount extends Configured with Tool { class Map extends Mapper[LongWritable, Text, Text, IntWritable] { private val one: IntWritable = new IntWritable(1); private var word: Text; override def map(key: LongWritable, rowLine: Text, context: Mapper[LongWritable, Text, Text, IntWritable]#Context) { val line = rowLine.toString(); if (line.isEmpty) return; val tokens: Array[String] = line.split(" "); for (item: String <- tokens) { word.set(item); context.write(word, one); } } } class Reduce extends Reducer[Text, IntWritable, Text, IntWritable] { private var count: IntWritable = new IntWritable(); override def reduce(key: Text, values: Iterable[IntWritable], context: Reducer[Text, IntWritable, Text, IntWritable]#Context) { var sum: Int = 0; for (i: IntWritable <- values) sum = sum + i.get(); count.set(sum); context.write(key, count); } } def run(args: Array[String]) = { val conf = super.getConf(); val job = new Job(conf, "WordCount"); job.setJarByClass(this.getClass); job.setOutputKeyClass(classOf[Text]); job.setOutputValueClass(classOf[IntWritable]); job.setMapperClass(classOf[Map]); job.setReducerClass(classOf[Reduce]); job.setCombinerClass(classOf[Reduce]); FileInputFormat.addInputPath(job, new Path(args(0))); FileOutputFormat.setOutputPath(job, new Path(args(1))); val status = job.waitForCompletion(true); if (status) 0 else 1; } def main(args: Array[String]) { val conf: Configuration = new Configuration(); System.exit(ToolRunner.run(conf, this, args)); } }