1 import java.io.IOException; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.conf.Configured; 5 import org.apache.hadoop.fs.Path; 6 import org.apache.hadoop.io.IntWritable; 7 import org.apache.hadoop.io.LongWritable; 8 import org.apache.hadoop.io.Text; 9 import org.apache.hadoop.mapreduce.Job; 10 import org.apache.hadoop.mapreduce.Mapper; 11 import org.apache.hadoop.mapreduce.Partitioner; 12 import org.apache.hadoop.mapreduce.Reducer; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 15 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 16 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 17 import org.apache.hadoop.util.Tool; 18 import org.apache.hadoop.util.ToolRunner; 19 public class Sort extends Configured implements Tool { 20 /* 21 * 排序 22 * 輸入格式:每個資料佔一行 23 * 輸出格式: 24 * 1 21 25 * 2 32 26 * 3 62 27 * 設計思路: 28 * 使用reduce自帶的預設排序規則。MapReduce按照key值進行排序。如果Key值為Intwritable型別,則按照數字大小排序 29 * 如果key值為Text型別,則按照字典順序對字串進行排序。 30 * 注意:要重寫Partition函式。Reduce排序只能保證自己區域性的資料順序,並不能保證全域性的。 31 * */ 32 public static class Map extends Mapper<LongWritable,Text,IntWritable,IntWritable>{ 33 private IntWritable line=new IntWritable(); 34 public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{ 35 line.set(Integer.parseInt(value.toString())); 36 context.write(line, new IntWritable(1)); 37 } 38 39 } 40 41 public static class Reduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{ 42 private IntWritable num=new IntWritable(1); 43 public void reduce(IntWritable key,Iterable<IntWritable> values,Context context)throws IOException,InterruptedException{ 44 for(IntWritable var:values){ 45 context.write(num, key); 46 num=new IntWritable(num.get()+1); 47 } 48 } 49 50 } 51 52 public static class Partition extends Partitioner<IntWritable ,IntWritable>{ 53 54 @Override 55 public int getPartition(IntWritable key, IntWritable value, int numPartitions) { 56 // TODO Auto-generated method stub 57 System.out.println(numPartitions); 58 int maxnum=65223; 59 int bound=maxnum/numPartitions+1; 60 for(int i=0;i<numPartitions;i++) 61 { 62 if(key.get()>=bound*(i-1)&&key.get()<=bound*i) 63 { 64 return i; 65 } 66 } 67 return 0; 68 } 69 70 } 71 72 public int run(String[] args)throws Exception{ 73 Configuration conf=new Configuration(); 74 Job job=new Job(conf,"Sort"); 75 job.setJarByClass(Sort.class); 76 77 job.setOutputKeyClass(IntWritable.class); 78 job.setOutputValueClass(IntWritable.class); 79 80 81 job.setMapperClass(Map.class); 82 job.setReducerClass(Reduce.class); 83 job.setPartitionerClass(Partition.class); 84 85 job.setInputFormatClass(TextInputFormat.class); 86 job.setOutputFormatClass(TextOutputFormat.class); 87 88 FileInputFormat.addInputPath(job, new Path(args[0])); 89 FileOutputFormat.setOutputPath(job, new Path(args[1])); 90 91 boolean success=job.waitForCompletion(true); 92 return success?0:1; 93 } 94 95 public static void main(String[] args)throws Exception{ 96 int ret=ToolRunner.run(new Sort(), args); 97 System.exit(ret); 98 } 99 100 }