Spark_SQl
引入 hadoop 的core-site.xml
加入與之對應版本的Spark-client
和Spark-Sql 依賴
這是一個簡單SparkSql 方式的Word-count 的例子
···
package com.xxx
import org.apache.spark.sql.SparkSession
object WordCount {
def main(args: Array[String]): Unit = {
val spark =SparkSession.builder().master(“local[*]”).appName(“spark sql word count”).getOrCreate()
//連線hdfs
//匯入隱式轉換
import spark.implicits._
import spark.sql
//匯入檔案
val rdd=spark.sparkContext.textFile("/user_info.txt")
val ds =rdd toDS()
ds.printSchema()
ds.createOrReplaceTempView("line_str")
val wcResult =sql(
"""
|select word
| ,count(1) as count
| from(
| select explode(split(value,``)) as word
| from line_str
| )
| group by word
""".stripMargin)
wcResult.show()
}
}
···