Calcite 使用原生的RDD 處理Spark

強子no2發表於2018-06-28

1   通過配置 :  

properties.setProperty("spark", "true");  //可以執行呼叫內部的函式

2  需要修改 Calcite-spark  中相關的檔案

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.calcite.adapter.spark;

import org.apache.calcite.adapter.enumerable.EnumerableRules;
import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.linq4j.tree.ClassDeclaration;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.runtime.ArrayBindable;
import org.apache.calcite.util.Util;
import org.apache.calcite.util.javac.JaninoCompiler;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Calendar;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * Implementation of
 * {@link org.apache.calcite.jdbc.CalcitePrepare.SparkHandler}. Gives the core
 * Calcite engine access to rules that only exist in the Spark module.
 */
public class SparkHandlerImpl implements CalcitePrepare.SparkHandler {
  private final HttpServer classServer;
  private final AtomicInteger classId;

  private final SparkSession spark ;
  private

3   直接可以使用例子如下:

package org.apache.calcite.test;

import java.io.Serializable;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.Properties;

import org.apache.calcite.jdbc.CalciteConnection;
import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.jdbc.CalcitePrepare.SparkHandler;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;


/**
 * @Auther: caozq
 * @Date: 2018/5/14 10:59
 * @Description:
 */
public class SparkTest {

	 public static class Person implements Serializable {
	        private static final long serialVersionUID = -6259413972682177507L;
	        private String name;
	        private int age;
	        
	        public Person(String name, int age) {
	            this.name = name;
	            this.age = age;
	        }
	        public String toString() {
	            return name + ": " + age;
	        }
	        public String getName() {
	            return name;
	        }
	        public void setName(String name) {
	            this.name = name;
	        }
	        public int getAge() {
	            return age;
	        }
	        public void setAge(int age) {
	            this.age = age;
	        }
	    }
	 
	public static void getDate(String sql) {
		
		Properties properties = new Properties();
		properties.setProperty("spark", "true");
		CalciteConnection calciteConnection = null;

		try {
			Class.forName("org.apache.calcite.jdbc.Driver");
			Connection aConnection = DriverManager.getConnection("jdbc:calcite:", properties);

			DatabaseMetaData metaData = aConnection.getMetaData();
			System.out.println("productName="+metaData.getDatabaseProductName());
			calciteConnection = aConnection.unwrap(CalciteConnection.class);
			CalcitePrepare.Context context = calciteConnection.createPrepareContext();

			SparkHandler sparkHandler = context.spark();
			JavaSparkContext sparkcontext = (JavaSparkContext) sparkHandler.sparkContext();

	        JavaRDD<String> input = sparkcontext.parallelize(Arrays.asList("abc,1", "test,2"));
	        JavaRDD<Person> persons = input.map(s -> s.split(",")).map(s -> new Person(s[0], Integer.parseInt(s[1])));
	        
	        System.out.println(persons.collect());
	        SparkSession spark = SparkSession.builder().appName("Test").getOrCreate();

	        Dataset<Row> df = spark.createDataFrame(persons, Person.class);
	        df.show();
	        
	        
	        df.printSchema();
	        SQLContext sqls = new SQLContext(spark);
	        sqls.registerDataFrameAsTable(df, "person");


	        sqls.sql("SELECT * FROM person WHERE age>1").show();
	        
	        sparkcontext.close();
	        
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				calciteConnection.close();
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
	}

	public static void main(String[] args) {
		String sql = "select * from test";
		getDate(sql);
	}

}


相關文章