spark與elasticsearch整合
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>${scala.version}</version>
<scope>compile</scope>
</dependency>
<!--<dependency>-->
<!--<groupId>org.specs2</groupId>-->
<!--<artifactId>specs2_${scala.binary.version}</artifactId>-->
<!--<version>3.3.1</version>-->
<!--<scope>test</scope>-->
<!--</dependency>-->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-hadoop</artifactId>
<version>${elasticsearch.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
</exclusion>
<exclusion>
<groupId>cascading</groupId>
<artifactId>cascading-hadoop</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-pool2</artifactId>
<version>2.2</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.9.0.1</version>
</dependency>
<dependency>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
<version>1.3.7</version>
</dependency>
</dependencies>
demo1:
package demo.spark.elasticsearch
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.elasticsearch.spark._
/**
* Created by cao on 16-3-25.
*/
object Demo1 {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("ESDemo1")
conf.set("es.index.auto.create", "true")
val sc = new SparkContext(conf)
val numbers = Map("one" -> 1, "two" -> 2, "three" -> 3)
val airports = Map("arrival" -> "Otopeni", "SFO" -> "San Fran")
sc.makeRDD(Seq(numbers,airports)).saveToEs("spark/docs")
}
}
{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":12,"max_score":1.0,"hits":[{"_index":"spark","_type":"docs","_id":"AVOukOOI0OVBGh8ft4am","_score":1.0,"_source":{"one":1,"two":2,"three":3}},{"_index":"spark","_type":"docs","_id":"AVOu-vRa0OVBGh8ft4a9","_score":1.0,"_source":{"one":1,"two":2,"three":3}},{"_index":"spark","_type":"docs","_id":"AVOu_kMq0OVBGh8ft4a_","_score":1.0,"_source":{"departure":"MUC","arrival":"OTP"}},{"_index":"spark","_type":"docs","_id":"AVOvAVuS0OVBGh8ft4bE","_score":1.0,"_source":{"one":1,"two":2,"three":3}},{"_index":"spark","_type":"docs","_id":"AVOujInV0OVBGh8ft4aj","_score":1.0,"_source":{"arrival":"Otopeni","SFO":"San Fran"}},{"_index":"spark","_type":"docs","_id":"AVOujInn0OVBGh8ft4ak","_score":1.0,"_source":{"one":1,"two":2,"three":3}},{"_index":"spark","_type":"docs","_id":"AVOumniH0OVBGh8ft4as","_score":1.0,"_source":{"departure":"MUC","arrival":"OTP"}},{"_index":"spark","_type":"docs","_id":"AVOumniH0OVBGh8ft4at","_score":1.0,"_source":{"departure":"OTP","arrival":"SFO"}},{"_index":"spark","_type":"docs","_id":"AVOu_kMq0OVBGh8ft4a-","_score":1.0,"_source":{"departure":"OTP","arrival":"SFO"}},{"_index":"spark","_type":"docs","_id":"AVOvAVuJ0OVBGh8ft4bD","_score":1.0,"_source":{"arrival":"Otopeni","SFO":"San Fran"}}]}}
package demo.spark.elasticsearch
/**
* Created by cao on 16-3-26.
*/
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.SQLContext._
import org.elasticsearch.spark.rdd.EsSpark
import org.elasticsearch.spark.sql._
import org.apache.spark.rdd.RDD._
import org.elasticsearch.spark._
object Demo2 {
def main(args: Array[String]) {
val sc = new SparkContext(new SparkConf().setAppName("Demo2"))
case class Trip(departure: String, arrival: String)
val upcomingTrip = Trip("OTP", "SFO")
val lastWeekTrip = Trip("MUC", "OTP")
val rdd = sc.makeRDD(Seq(upcomingTrip, lastWeekTrip))
EsSpark.saveToEs(rdd, "spark/docs")
}
}
package demo.spark.elasticsearch
/**
* Created by cao on 16-3-26.
*/
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.elasticsearch.spark.sql._
import org.apache.spark.rdd.RDD._
import org.elasticsearch.spark._
//定義Person case class
case class Person(name: String, surname: String, age: Int)
object Demo3 {
def main(args: Array[String]) {
val sc = new SparkContext(new SparkConf().setAppName("Demo4"))
//建立sqlContext
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
//建立DataFrame
val people = sc.textFile("file:///home/cao/Desktop/poeple.txt").map(_.split(",")).map(p => Person(p(0), p(1), p(2).trim.toInt)).toDF()
people.saveToEs("spark/people")
}
}
Demo4
package demo.spark.elasticsearch
/**
* Created by cao on 16-3-26.
*/
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.elasticsearch.spark.sql._
import org.apache.spark.rdd.RDD._
import org.elasticsearch.spark._
object Demo4 {
def main(args: Array[String]) {
val sc = new SparkContext(new SparkConf().setAppName("Demo4"))
//建立sqlContext
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
val options = Map("pushdown" -> "true", "es.nodes" -> "localhost", "es.port" -> "9200")
val spark14DF = sqlContext.read.format("org.elasticsearch.spark.sql").options(options).load("spark/people")
spark14DF.select("name","age").collect().foreach(println(_))
spark14DF.registerTempTable("people")
val results = sqlContext.sql("SELECT name FROM people")
results.map(t => "Name:"+t(0)).collect().foreach(println)
}
}
Demo5
package demo.spark.elasticsearch
/**
* Created by cao on 16-3-26.
*/
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
object Demo5 {
def main(args: Array[String]) {
val sc = new SparkContext(new SparkConf().setAppName("Demo4"))
//建立sqlContext
val sqlContext = new SQLContext(sc)
sqlContext.sql(
"CREATE TEMPORARY TABLE myPeople " +
"USING org.elasticsearch.spark.sql " +
"OPTIONS ( resource 'spark/people', nodes 'localhost:9200')" )
sqlContext.sql("select * from myPeople").collect.foreach(println)
}
}
Demo6
package demo.spark.elasticsearch
/**
* Created by cao on 16-3-26.
*/
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
import org.elasticsearch.spark.sql._
object Demo6 {
def main(args: Array[String]) {
val sc = new SparkContext(new SparkConf().setAppName("Demo4"))
//建立sqlContext
val sqlContext = new SQLContext(sc)
val people = sqlContext.esDF("spark/people")
println(people.schema.treeString)
val wangs = sqlContext.esDF("spark/people","?q=wang")
wangs.show()
}
}
相關文章
- Elasticsearch 與 Kafka 整合剖析ElasticsearchKafka
- ElasticSearch與Spring Boot整合ElasticsearchSpring Boot
- spark與kafaka整合workcount示例 spark-stream-kafkaSparkKafka
- elasticsearch-spark的用法ElasticsearchSpark
- Spark整合hiveSparkHive
- 《Elasticsearch技術解析與實戰》Chapter 1.4 Spring Boot整合ElasticsearchElasticsearchAPTSpring Boot
- ElasticSearch與SpringBoot的整合與JPA方法的使用ElasticsearchSpring Boot
- springmvc整合elasticsearchSpringMVCElasticsearch
- Spark 系列(十五)—— Spark Streaming 整合 FlumeSpark
- ElasticSearch8 - SpringBoot整合ElasticSearchElasticsearchSpring Boot
- Spark讀取elasticsearch資料指南SparkElasticsearch
- Elasticsearch學習(三)springboot整合ElasticSearchElasticsearchSpring Boot
- SpringBoot 整合 elasticsearchSpring BootElasticsearch
- Spring Boot 整合 elasticsearchSpring BootElasticsearch
- ElasticSearch 整合 SpringBootElasticsearchSpring Boot
- SpringBoot整合elasticsearchSpring BootElasticsearch
- PostgreSQL與Elasticsearch和PGSync的實時資料整合 -ToluSQLElasticsearch
- Elasticsearch整合HanLP分詞器ElasticsearchHanLP分詞
- Spring 4+ElasticSearch 整合SpringElasticsearch
- 透過spark將資料儲存到elasticsearchSparkElasticsearch
- Spark 實時計算整合案例Spark
- Spring Boot 整合 Elasticsearch 實戰Spring BootElasticsearch
- SpringBoot整合ElasticSearch7.6.2Spring BootElasticsearch
- SpingBoot:整合Elasticsearch7.2.0bootElasticsearch
- 「Elasticsearch」SpringBoot快速整合ESElasticsearchSpring Boot
- spark與hbaseSpark
- SpringBoot整合ElasticSearch(第八更)Spring BootElasticsearch
- ElasticSearch-IK分詞器和整合使用Elasticsearch分詞
- Elasticsearch學習<四>SpringBoot整合esElasticsearchSpring Boot
- spark 與flume 1.6.0Spark
- Spark下載與入門(Spark自學二)Spark
- elasticsearch 6.x 與elasticsearch 7.x 配置與使用(Java)ElasticsearchJava
- Spark Streaming和Kafka整合開發指南(一)SparkKafka
- Spark Streaming和Kafka整合開發指南(二)SparkKafka
- 手把手教你SpringBoot整合Elasticsearch(ES)Spring BootElasticsearch
- SpringBoot整合Elasticsearch遊標查詢(scroll)Spring BootElasticsearch
- spark 與 yarn 結合SparkYarn
- Spark安裝與配置Spark