Spark SQL 最簡單例子

21ca發表於2017-03-30
pom.xml
<dependencies>
  1.     <dependency>
  2.         <groupId>org.apache.spark</groupId>
  3.         <artifactId>spark-core_2.10</artifactId>
  4.         <version>2.1.0</version>
  5.     </dependency>
  6.     <dependency>
  7.         <groupId>org.apache.spark</groupId>
  8.         <artifactId>spark-sql_2.10</artifactId>
  9.         <version>2.1.0</version>
  10.     </dependency>
  11. </dependencies>
Java:
  1. import java.io.Serializable;
  2. import java.util.Arrays;

  3. import org.apache.spark.SparkConf;
  4. import org.apache.spark.api.java.JavaRDD;
  5. import org.apache.spark.api.java.JavaSparkContext;
  6. import org.apache.spark.sql.Dataset;
  7. import org.apache.spark.sql.Row;
  8. import org.apache.spark.sql.SQLContext;
  9. import org.apache.spark.sql.SparkSession;

  10. public class SparkSqlTest {
  11.     public static class Person implements Serializable {
  12.         private static final long serialVersionUID = -6259413972682177507L;
  13.         private String name;
  14.         private int age;
  15.         
  16.         public Person(String name, int age) {
  17.             this.name = name;
  18.             this.age = age;
  19.         }
  20.         public String toString() {
  21.             return name + ": " + age;
  22.         }
  23.         public String getName() {
  24.             return name;
  25.         }
  26.         public void setName(String name) {
  27.             this.name = name;
  28.         }
  29.         public int getAge() {
  30.             return age;
  31.         }
  32.         public void setAge(int age) {
  33.             this.age = age;
  34.         }
  35.     }
  36.     
  37.     public static void main(String[] args) {
  38.         SparkConf conf = new SparkConf().setAppName("Test").setMaster("local");
  39.         JavaSparkContext sc = new JavaSparkContext(conf);
  40.         
  41.         SparkSession spark = SparkSession.builder().appName("Test").getOrCreate();
  42.         JavaRDD<String> input = sc.parallelize(Arrays.asList("abc,1", "test,2"));
  43.         JavaRDD<Person> persons = input.map(s -> s.split(",")).map(s -> new Person(s[0], Integer.parseInt(s[1])));
  44.         
  45.         //[abc: 1, test: 2]
  46.         System.out.println(persons.collect());
  47.         
  48.         Dataset<Row> df = spark.createDataFrame(persons, Person.class);
  49.         
  50.         /*
  51.         +---+----+
  52.         |age|name|
  53.         +---+----+
  54.         | 1| abc|
  55.         | 2|test|
  56.         +---+----+
  57.          */
  58.         df.show();
  59.         
  60.         /*
  61.          root
  62.           |-- age: integer (nullable = false)
  63.           |-- name: string (nullable = true)
  64.          */
  65.         df.printSchema();

  66.         SQLContext sql = new SQLContext(spark);
  67.         sql.registerDataFrameAsTable(df, "person");
  68.         
  69.         /*
  70.         +---+----+
  71.         |age|name|
  72.         +---+----+
  73.         | 2|test|
  74.         +---+----+
  75.          */
  76.         sql.sql("SELECT * FROM person WHERE age>1").show();
  77.         
  78.         sc.close();
  79.     }

  80. }

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/10742815/viewspace-2136337/,如需轉載,請註明出處,否則將追究法律責任。

相關文章