概述
- Flink具有Table API和SQL-用於統一流和批處理。
- Table API是用於Scala和Java的語言整合查詢API,它允許以非常直觀的方式組合來自關係運算子(例如選擇,過濾和聯接)的查詢。
- Flink的SQL支援基於實現SQL標準的Apache Calcite。無論輸入是批處理輸入(DataSet)還是流輸入(DataStream),在兩個介面中指定的查詢都具有相同的語義並指定相同的結果。
Table API和SQL尚未完成所有功能,正在積極開發中,支援程度需檢視 官方文件
使用
多表連線案例
pom依賴
flink 版本為:1.9.3
<dependencies>
<!-- Apache Flink dependencies -->
<!-- These dependencies are provided, because they should not be packaged into the JAR file. -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>${flink.version}</version>
</dependency>
模擬一個實時流
import lombok.Data;
@Data
public class Product {
public Integer id;
public String seasonType;
}
自定義Source
import common.Product;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.util.ArrayList;
import java.util.Random;
public class ProductStremingSource implements SourceFunction<Product> {
private boolean isRunning = true;
@Override
public void run(SourceContext<Product> ctx) throws Exception {
while (isRunning){
// 每一秒鐘產生一條資料
Product product = generateProduct();
ctx.collect(product);
Thread.sleep(1000);
}
}
private Product generateProduct(){
int i = new Random().nextInt(100);
ArrayList<String> list = new ArrayList();
list.add("spring");
list.add("summer");
list.add("autumn");
list.add("winter");
Product product = new Product();
product.setSeasonType(list.get(new Random().nextInt(4)));
product.setId(i);
return product;
}
@Override
public void cancel() {
}
}
主程式
public class TableStremingDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
// 使用Blink
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
SingleOutputStreamOperator<Item> source = bsEnv.addSource(new MyStremingSource())
.map(new MapFunction<Item, Item>() {
@Override
public Item map(Item value) throws Exception {
return value;
}
});
// 分割流
final OutputTag<Item> even = new OutputTag<Item>("even") {
};
final OutputTag<Item> old = new OutputTag<Item>("old") {
};
SingleOutputStreamOperator<Item> sideOutputData = source.process(new ProcessFunction<Item, Item>() {
@Override
public void processElement(Item value, Context ctx, Collector<Item> out) throws Exception {
if (value.getId() % 2 == 0) {
ctx.output(even,value);
}else{
ctx.output(old,value);
}
}
});
DataStream<Item> evenStream = sideOutputData.getSideOutput(even);
DataStream<Item> oldStream = sideOutputData.getSideOutput(old);
// 註冊兩個 表 : evenTable,oddTable
bsTableEnv.registerDataStream("evenTable",evenStream , "name,id");
bsTableEnv.registerDataStream("oddTable", oldStream, "name,id");
// 執行sql 輸出Table
Table queryTable = bsTableEnv.sqlQuery("select a.id,a.name,b.id,b.name from evenTable as a join oddTable as b on a.name = b.name");
queryTable.printSchema();;
// 獲取流
DataStream<Tuple2<Boolean, Tuple4<Integer, String, Integer, String>>> dataStream = bsTableEnv.toRetractStream(queryTable, TypeInformation.of(new TypeHint<Tuple4<Integer,String,Integer,String>>(){}));
dataStream.print();
bsEnv.execute("demo");
}
}
結果列印
輸出name相同的元素。
總結
簡單的介紹了Flink Table Api & SQL和實現了兩表連線的示例。
更多文章:www.ipooli.com
掃碼關注公眾號《ipoo》