1 import java.io.IOException; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.conf.Configured; 5 import org.apache.hadoop.fs.Path; 6 import org.apache.hadoop.io.LongWritable; 7 import org.apache.hadoop.io.Text; 8 import org.apache.hadoop.mapreduce.Job; 9 import org.apache.hadoop.mapreduce.Mapper; 10 import org.apache.hadoop.mapreduce.Reducer; 11 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 12 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 13 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 15 import org.apache.hadoop.util.Tool; 16 import org.apache.hadoop.util.ToolRunner; 17 public class Dedpu extends Configured implements Tool { 18 /** 19 * 資料去重 20 * 資料樣例: 21 * 輸入資料 22 * 2006-6-9 a 23 * 2006-6-10 b 24 * 2006-6-9 a 25 * 結果資料 26 * 2006-6-9 a 27 * 2006-6-10 b 28 * 設計思路: 29 * Map階段 <時間,字元> 30 * Reduce階段輸入<時間,list<字元>>,去除重複的字元,輸出 31 * 32 * **/ 33 public static class Map extends Mapper<LongWritable,Text,Text,Text>{ 34 public void map(LongWritable key,Text value,Context context)throws IOException, InterruptedException{ 35 String line=value.toString(); 36 Text myvalue=new Text(""); 37 context.write(new Text(line), myvalue); 38 // StringTokenizer tokenizer=new StringTokenizer(line); 39 // String datestr="",datastr=""; 40 // while(tokenizer.hasMoreTokens()) 41 // { 42 // datestr=tokenizer.nextToken(); 43 // datastr=tokenizer.nextToken(); 44 // context.write(new Text(datestr), new Text(datastr)); 45 // 46 // } 47 } 48 49 } 50 51 public static class Reduce extends Reducer<Text,Text,Text,Text>{ 52 public void reduce(Text key,Iterable<Text>values,Context context)throws IOException,InterruptedException{ 53 54 context.write(key, new Text("")); 55 // ArrayList arr=new ArrayList(); 56 // Text mykey=key; 57 // for(Text txt:values) 58 // { 59 // 60 // if(!arr.contains(txt.toString())){ 61 // arr.add(txt.toString()); 62 // } 63 // 64 // 65 // } 66 // for(int i=0;i<arr.size();i++){ 67 // context.write(mykey, new Text(arr.get(i).toString())); 68 // 69 // } 70 71 72 73 } 74 75 } 76 77 public int run(String[] args)throws Exception 78 { 79 Configuration conf=new Configuration(); 80 Job job=new Job(conf,"Data Depution"); 81 job.setJarByClass(Dedpu.class); 82 83 job.setMapperClass(Map.class); 84 job.setCombinerClass(Reduce.class); 85 job.setReducerClass(Reduce.class); 86 87 job.setOutputKeyClass(Text.class); 88 job.setOutputValueClass(Text.class); 89 90 job.setInputFormatClass(TextInputFormat.class); 91 job.setOutputFormatClass(TextOutputFormat.class); 92 93 FileInputFormat.setInputPaths(job, new Path(args[0])); 94 FileOutputFormat.setOutputPath(job, new Path(args[1])); 95 96 boolean success=job.waitForCompletion(true); 97 return success?0:1; 98 99 } 100 101 public static void main(String[] args) throws Exception{ 102 int ret=ToolRunner.run(new Dedpu(), args); 103 System.exit(ret); 104 } 105 }