Calcite 使用原生的RDD 處理Spark
1 通過配置 :
properties.setProperty("spark", "true"); //可以執行呼叫內部的函式
2 需要修改 Calcite-spark 中相關的檔案
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.adapter.spark;
import org.apache.calcite.adapter.enumerable.EnumerableRules;
import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.linq4j.tree.ClassDeclaration;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.runtime.ArrayBindable;
import org.apache.calcite.util.Util;
import org.apache.calcite.util.javac.JaninoCompiler;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Calendar;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Implementation of
* {@link org.apache.calcite.jdbc.CalcitePrepare.SparkHandler}. Gives the core
* Calcite engine access to rules that only exist in the Spark module.
*/
public class SparkHandlerImpl implements CalcitePrepare.SparkHandler {
private final HttpServer classServer;
private final AtomicInteger classId;
private final SparkSession spark ;
private
3 直接可以使用例子如下:
package org.apache.calcite.test;
import java.io.Serializable;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.Properties;
import org.apache.calcite.jdbc.CalciteConnection;
import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.jdbc.CalcitePrepare.SparkHandler;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
/**
* @Auther: caozq
* @Date: 2018/5/14 10:59
* @Description:
*/
public class SparkTest {
public static class Person implements Serializable {
private static final long serialVersionUID = -6259413972682177507L;
private String name;
private int age;
public Person(String name, int age) {
this.name = name;
this.age = age;
}
public String toString() {
return name + ": " + age;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
}
public static void getDate(String sql) {
Properties properties = new Properties();
properties.setProperty("spark", "true");
CalciteConnection calciteConnection = null;
try {
Class.forName("org.apache.calcite.jdbc.Driver");
Connection aConnection = DriverManager.getConnection("jdbc:calcite:", properties);
DatabaseMetaData metaData = aConnection.getMetaData();
System.out.println("productName="+metaData.getDatabaseProductName());
calciteConnection = aConnection.unwrap(CalciteConnection.class);
CalcitePrepare.Context context = calciteConnection.createPrepareContext();
SparkHandler sparkHandler = context.spark();
JavaSparkContext sparkcontext = (JavaSparkContext) sparkHandler.sparkContext();
JavaRDD<String> input = sparkcontext.parallelize(Arrays.asList("abc,1", "test,2"));
JavaRDD<Person> persons = input.map(s -> s.split(",")).map(s -> new Person(s[0], Integer.parseInt(s[1])));
System.out.println(persons.collect());
SparkSession spark = SparkSession.builder().appName("Test").getOrCreate();
Dataset<Row> df = spark.createDataFrame(persons, Person.class);
df.show();
df.printSchema();
SQLContext sqls = new SQLContext(spark);
sqls.registerDataFrameAsTable(df, "person");
sqls.sql("SELECT * FROM person WHERE age>1").show();
sparkcontext.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
calciteConnection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
String sql = "select * from test";
getDate(sql);
}
}
相關文章
- Spark RDD使用詳解--RDD原理Spark
- Spark RDD APISparkAPI
- spark-RDDSpark
- Spark 的核心概念 RDDSpark
- Spark - [03] RDD概述Spark
- Spark RDD的預設分割槽數:(spark 2.1.0)Spark
- Spark RDD在Spark中的地位和作用如何?Spark
- Spark Basic RDD 操作示例Spark
- Spark開發-spark執行原理和RDDSpark
- SparkSQL /DataFrame /Spark RDD誰快?SparkSQL
- Spark RDD 特徵及其依賴Spark特徵
- spark學習筆記--RDDSpark筆記
- 大白話講解Spark中的RDDSpark
- Spark(十三) Spark效能調優之RDD持久化Spark持久化
- spark處理jsonFileSparkJSON
- Spark SQL中的RDD與DataFrame轉換SparkSQL
- Spark學習(二)——RDD基礎Spark
- 【大資料】Spark RDD基礎大資料Spark
- spark RDD,reduceByKey vs groupByKeySpark
- Spark RDD中Runtime流程解析Spark
- spark常用RDD介紹及DemoSpark
- spark: RDD與DataFrame之間的相互轉換Spark
- Spark Streaming 流式處理Spark
- Spark從入門到放棄---RDDSpark
- 快取Apache Spark RDD - 效能調優快取ApacheSpark
- RDD程式設計 上(Spark自學三)程式設計Spark
- RDD程式設計 下(Spark自學四)程式設計Spark
- Spark開發-RDD介面程式設計Spark程式設計
- 使用Spark和Cassandra進行資料處理(一)Spark
- 使用spark-sql處理Doris大表關聯SparkSQL
- Spark筆記:複雜RDD的API的理解(下)Spark筆記API
- 大資料學習—Spark核心概念RDD大資料Spark
- Spark----RDD運算元分類 DAGSpark
- Spark開發-RDD分割槽重新劃分Spark
- spark學習筆記--RDD鍵對操作Spark筆記
- Spark RDD詳解 | RDD特性、lineage、快取、checkpoint、依賴關係Spark快取
- 深入原始碼理解Spark RDD的資料分割槽原理原始碼Spark
- 關於Spark中RDD的設計的一些分析Spark