import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
/**
* 從HDFS批量匯入資料到HBASE
*
* 示例檔案是移動裝置的上網資訊
*/
public class BatchImport {
static class BatchImportMapper extends Mapper<LongWritable, Text, LongWritable, Text>{
SimpleDateFormat dateformat1=new SimpleDateFormat("yyyyMMddHHmmss");
Text v2 = new Text();
protected void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException {
final String[] splited = value.toString().split("\t");
try {
final Date date = new Date(Long.parseLong(splited[0].trim()));//上網時間
final String dateFormat = dateformat1.format(date);
String rowKey = splited[1]+":"+dateFormat;//將手機號和上網時間作為一個行鍵
v2.set(rowKey+"\t"+value.toString());//將行鍵+行文字內容作為value2
context.write(key, v2);//輸出的是<偏移量,行鍵+行文字>
} catch (NumberFormatException e) {
final Counter counter = context.getCounter("BatchImport", "ErrorFormat");
counter.increment(1L);
System.out.println("出錯了"+splited[0]+" "+e.getMessage());
}
};
}
/**
* 這邊的reduce繼承TableReducer
*/
static class BatchImportReducer extends TableReducer<LongWritable, Text, NullWritable>{
protected void reduce(LongWritable key, java.lang.Iterable<Text> values, Context context) throws java.io.IOException ,InterruptedException {
for (Text text : values) {
final String[] splited = text.toString().split("\t");
final Put put = new Put(Bytes.toBytes(splited[0]));//引數是行鍵
//引數是列族,列名,值
put.add(Bytes.toBytes("cf"), Bytes.toBytes("date"), Bytes.toBytes(splited[1]));
put.add(Bytes.toBytes("cf"), Bytes.toBytes("msisdn"), Bytes.toBytes(splited[2]));
//省略其他欄位,呼叫put.add(....)即可
context.write(NullWritable.get(), put);
}
};
}
public static void main(String[] args) throws Exception {
final Configuration configuration = new Configuration();
//設定zookeeper
configuration.set("hbase.zookeeper.quorum", "xxc");
//設定hbase表名稱
configuration.set(TableOutputFormat.OUTPUT_TABLE, "wlan_log");
//將該值改大,防止hbase超時退出
configuration.set("dfs.socket.timeout", "180000");
final Job job = new Job(configuration, "HBaseBatchImport");
job.setMapperClass(BatchImportMapper.class);
job.setReducerClass(BatchImportReducer.class);
//設定map的輸出,不設定reduce的輸出型別
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
//********不再設定輸出路徑,而是設定輸出格式型別**********
job.setOutputFormatClass(TableOutputFormat.class);
FileInputFormat.setInputPaths(job, "hdfs://xxc:9000/input");
job.waitForCompletion(true);
}
}