一、背景說明
需求為從Kafka消費對應主題資料,通過TableApi對資料進行WordCount後,基於DDL寫法將資料寫入ES。
二、程式碼部分
說明:程式碼中關於Kafka及ES的連線部分可以抽象到單獨的工具類使用,這裡只是一個演示的小demo,後續操作均可自行擴充套件,如Kakfa一般處理為json格式資料,引入fastjson等工具使用富函式進行格式處理即可。
package com.flinksql.test;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import java.util.Properties;
import static org.apache.flink.table.api.Expressions.$;
/**
* @author: Rango
* @create: 2021-06-20 10:21
* @description: 使用FlinkSQL實現從kafka讀取資料計算wordcount並將資料寫入ES
**/
public class FlinkTableAPI_Test {
public static void main(String[] args) throws Exception {
//1.建立環境,測試不設定CK
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
//2.獲取kafka端資料
Properties prop = new Properties();
prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop102:9092");
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"BD");
DataStreamSource<String> sourceDS = env
.addSource(new FlinkKafkaConsumer<String>("test", new SimpleStringSchema(), prop));
//3.使用flatmap轉換資料到javabean,使用flatmap可以實現過濾
SingleOutputStreamOperator<Tuple2<String, Integer>> flatMapDS = sourceDS
.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] split = value.split(",");
for (String s : split) {
out.collect(new Tuple2<>(s, 1));
}}
});
//4.流資料轉為table
Table table = tableEnv.fromDataStream(flatMapDS);
Table table1 = table
.groupBy($("f0"))
.select($("f0").as("word"), $("f1").sum().as("num"));
tableEnv.toRetractStream(table1, Row.class).print();
//5.DDL方式建立臨時表,寫入datastream資料,為演示需要maxactions設定為1,預設是批量寫入
tableEnv.executeSql("CREATE TABLE sensor (" +
" word STRING," +
" num BIGINT," +
" PRIMARY KEY (word) NOT ENFORCED" +
") WITH (" +
" 'connector' = 'elasticsearch-7'," +
" 'hosts' = 'http://localhost:9200'," +
" 'index' = 'test'," +
" 'sink.bulk-flush.max-actions' = '1')");
//6.資料寫入
table1.executeInsert("sensor");
env.execute();
}
}
學習交流,有任何問題還請隨時評論指出交流。