spark與hbase
package hgs.spark.hbase import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.spark.rdd.NewHadoopRDD import org.apache.hadoop.hbase.mapreduce.TableInputFormat object HbaseTest { def main(args: Array[String]): Unit = { val conf = new SparkConf conf.setMaster("local").setAppName("local") val context = new SparkContext(conf) val hadoopconf = new HBaseConfiguration hadoopconf.set("hbase.zookeeper.quorum", "bigdata01:2181,bigdata02:2181,bigdata03:2181") hadoopconf.set("hbase.zookeeper.property.clientPort", "2181") val tableName = "test1" hadoopconf.set(TableInputFormat.INPUT_TABLE, tableName) hadoopconf.set(TableInputFormat.SCAN_ROW_START, "h") hadoopconf.set(TableInputFormat.SCAN_ROW_STOP, "x") hadoopconf.set(TableInputFormat.SCAN_COLUMN_FAMILY, "cf1") hadoopconf.set(TableInputFormat.SCAN_COLUMNS, "cf1:col1,cf1:col2") /*val startrow = "h" val stoprow = "w" val scan = new Scan scan.setStartRow(startrow.getBytes) scan.setStartRow(stoprow.getBytes) val proto = ProtobufUtil.toScan(scan) val scanToString = Base64.encodeBytes(proto.toByteArray()) println(scanToString) hadoopconf.set(TableInputFormat.SCAN, scanToString) */ val hbaseRdd = context.newAPIHadoopRDD(hadoopconf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) hbaseRdd.foreach(x=>{ val vale = x._2.getValue("cf1".getBytes, "col1".getBytes) val val2 = x._2.getValue("cf1".getBytes, "col2".getBytes) println(new String(vale),new String(val2)) }) context.stop() } }
package hgs.spark.hbase import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.mapred.TableOutputFormat import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.io.ImmutableBytesWritable object SparkToHbase { def main(args: Array[String]): Unit = { val conf = new SparkConf conf.setMaster("local").setAppName("local") val context = new SparkContext(conf) val rdd = context.parallelize(List(("aaaaaaa","aaaaaaa"),("bbbbb","bbbbb")), 2) val hadoopconf = new HBaseConfiguration hadoopconf.set("hbase.zookeeper.quorum", "bigdata01:2181,bigdata02:2181,bigdata03:2181") hadoopconf.set("hbase.zookeeper.property.clientPort", "2181") hadoopconf.set(TableOutputFormat.OUTPUT_TABLE, "test1") //hadoopconf.set(TableOutputFormat., "test1") val jobconf = new JobConf(hadoopconf,this.getClass) jobconf.set(TableOutputFormat.OUTPUT_TABLE, "test1") jobconf.setOutputFormat(classOf[TableOutputFormat]) val exterrdd = rdd.map(x=>{ val put = new Put(x._1.getBytes) put.add("cf1".getBytes, "col1".getBytes, x._2.getBytes) (new ImmutableBytesWritable,put) }) exterrdd.saveAsHadoopDataset(jobconf) context.stop() } }
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/31506529/viewspace-2220682/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- HBase實操:HBase-Spark-Read-Demo 分享Spark
- spark讀取hbase的資料Spark
- Spark 如何寫入HBase/Redis/MySQL/KafkaSparkRedisMySqlKafka
- MapReduce和Spark讀取HBase快照表Spark
- CDH版Hadoop-zookeeper-hbase-spark安裝文件HadoopSpark
- Spark+Hbase 億級流量分析實戰( PV/UV )Spark
- Spark拉取Kafka的流資料,轉插入HBase中SparkKafka
- hbase與phoenix整合(使用phoenix操作hbase資料)
- 【Spark實戰】Spark操作HBase問題:java.io.IOException: Non-increasing Bloom keysSparkJavaExceptionOOM
- Spark+Hbase 億級流量分析實戰( 留存計算)Spark
- Hbase的安裝與部署
- Spark讀Hbase優化 --手動劃分region提高並行數Spark優化並行
- spark 與flume 1.6.0Spark
- spark與elasticsearch整合SparkElasticsearch
- Spark下載與入門(Spark自學二)Spark
- hive與hbase的聯絡與區別Hive
- Spark 讀取 Hbase 優化 --手動劃分 region 提高並行數Spark優化並行
- spark 與 yarn 結合SparkYarn
- Spark+Hbase 億級流量分析實戰(日誌儲存設計)Spark
- 一文讀懂Hadoop、HBase、Hive、Spark分散式系統架構HadoopHiveSpark分散式架構
- spark與kafaka整合workcount示例 spark-stream-kafkaSparkKafka
- spark學習筆記--Spark調優與除錯Spark筆記除錯
- Spark GraphX簡介與教程Spark
- Hadoop與Spark關係HadoopSpark
- Hbase(二)Hbase常用操作
- Hadoop與HBase中遇到的問題Hadoop
- HBase 教程:什麼是 HBase?
- HBase篇--HBase常用優化優化
- hbase - [04] java訪問hbaseJava
- Spark 安裝部署與快速上手Spark
- 【Spark篇】---SparkStream初始與應用Spark
- Hadoop與Spark的比較HadoopSpark
- Storm與Spark Streaming比較ORMSpark
- HBase篇--HBase操作Api和Java操作Hbase相關ApiAPIJava
- Spark修煉之道(進階篇)——Spark入門到精通:第八節 Spark SQL與DataFrame(一)SparkSQL
- HBase 系列(五)——HBase常用 Shell 命令
- HBase可用性分析與高可用實踐
- 【HBase】start master 與 start master --backup 的區別AST