Calcite 使用原生的RDD 處理Spark
1 通過配置 :
properties.setProperty("spark", "true"); //可以執行呼叫內部的函式
2 需要修改 Calcite-spark 中相關的檔案
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.spark;
import org.apache.calcite.adapter.enumerable.EnumerableRules;
import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.linq4j.tree.ClassDeclaration;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.runtime.ArrayBindable;
import org.apache.calcite.util.Util;
import org.apache.calcite.util.javac.JaninoCompiler;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Calendar;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Implementation of
* {@link org.apache.calcite.jdbc.CalcitePrepare.SparkHandler}. Gives the core
* Calcite engine access to rules that only exist in the Spark module.
*/
public class SparkHandlerImpl implements CalcitePrepare.SparkHandler {
private final HttpServer classServer;
private final AtomicInteger classId;
private final SparkSession spark ;
private
3 直接可以使用例子如下:
package org.apache.calcite.test;
import java.io.Serializable;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.Properties;
import org.apache.calcite.jdbc.CalciteConnection;
import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.jdbc.CalcitePrepare.SparkHandler;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
/**
* @Auther: caozq
* @Date: 2018/5/14 10:59
* @Description:
*/
public class SparkTest {
public static class Person implements Serializable {
private static final long serialVersionUID = -6259413972682177507L;
private String name;
private int age;
public Person(String name, int age) {
this.name = name;
this.age = age;
}
public String toString() {
return name + ": " + age;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
}
public static void getDate(String sql) {
Properties properties = new Properties();
properties.setProperty("spark", "true");
CalciteConnection calciteConnection = null;
try {
Class.forName("org.apache.calcite.jdbc.Driver");
Connection aConnection = DriverManager.getConnection("jdbc:calcite:", properties);
DatabaseMetaData metaData = aConnection.getMetaData();
System.out.println("productName="+metaData.getDatabaseProductName());
calciteConnection = aConnection.unwrap(CalciteConnection.class);
CalcitePrepare.Context context = calciteConnection.createPrepareContext();
SparkHandler sparkHandler = context.spark();
JavaSparkContext sparkcontext = (JavaSparkContext) sparkHandler.sparkContext();
JavaRDD<String> input = sparkcontext.parallelize(Arrays.asList("abc,1", "test,2"));
JavaRDD<Person> persons = input.map(s -> s.split(",")).map(s -> new Person(s[0], Integer.parseInt(s[1])));
System.out.println(persons.collect());
SparkSession spark = SparkSession.builder().appName("Test").getOrCreate();
Dataset<Row> df = spark.createDataFrame(persons, Person.class);
df.show();
df.printSchema();
SQLContext sqls = new SQLContext(spark);
sqls.registerDataFrameAsTable(df, "person");
sqls.sql("SELECT * FROM person WHERE age>1").show();
sparkcontext.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
calciteConnection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
String sql = "select * from test";
getDate(sql);
}
}
相關文章
- Spark RDD APISparkAPI
- spark-RDDSpark
- Spark 的核心概念 RDDSpark
- Spark - [03] RDD概述Spark
- Spark RDD的預設分割槽數:(spark 2.1.0)Spark
- Spark RDD在Spark中的地位和作用如何?Spark
- spark RDD,reduceByKey vs groupByKeySpark
- 大白話講解Spark中的RDDSpark
- Spark RDD中Runtime流程解析Spark
- SparkSQL /DataFrame /Spark RDD誰快?SparkSQL
- Spark RDD 特徵及其依賴Spark特徵
- spark學習筆記--RDDSpark筆記
- Spark Streaming 流式處理Spark
- Spark(十三) Spark效能調優之RDD持久化Spark持久化
- Spark SQL中的RDD與DataFrame轉換SparkSQL
- Spark學習(二)——RDD基礎Spark
- 【大資料】Spark RDD基礎大資料Spark
- 使用spark-sql處理Doris大表關聯SparkSQL
- Spark RDD詳解 | RDD特性、lineage、快取、checkpoint、依賴關係Spark快取
- Spark從入門到放棄---RDDSpark
- Spark RDD運算元(八)mapPartitions, mapPartitionsWithIndexSparkAPPIndex
- 快取Apache Spark RDD - 效能調優快取ApacheSpark
- spark處理json資料DemoSparkJSON
- 大資料學習—Spark核心概念RDD大資料Spark
- Spark----RDD運算元分類 DAGSpark
- spark學習筆記--RDD鍵對操作Spark筆記
- 深入原始碼理解Spark RDD的資料分割槽原理原始碼Spark
- Spark Task 的執行流程④ - task 結果的處理Spark
- 一文帶你過完Spark RDD的基礎概念Spark
- 《深入理解Spark》之sparkSQL 處理流程SparkSQL
- RDD持久化,不使用RDD持久化的問題的工作原理持久化
- Spark效能優化:對RDD持久化或CheckPoint操作Spark優化持久化
- Calcite(一):javacc語法框架及使用Java框架
- 通過WordCount解析Spark RDD內部原始碼機制Spark原始碼
- spark RDD textFile運算元 分割槽數量詳解Spark
- Spark 叢集執行任務失敗的故障處理Spark
- Spark運算元:統計RDD分割槽中的元素及數量Spark
- Spark SQL:實現日誌離線批處理SparkSQL