Flink從Kafka取數WordCount後TableApi寫入ES

Rango_lhl發表於2021-06-20

一、背景說明

需求為從Kafka消費對應主題資料,通過TableApi對資料進行WordCount後,基於DDL寫法將資料寫入ES。

二、程式碼部分

說明:程式碼中關於Kafka及ES的連線部分可以抽象到單獨的工具類使用,這裡只是一個演示的小demo,後續操作均可自行擴充套件,如Kakfa一般處理為json格式資料,引入fastjson等工具使用富函式進行格式處理即可。

package com.flinksql.test;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Properties;
import static org.apache.flink.table.api.Expressions.$;

/**
 * @author: Rango
 * @create: 2021-06-20 10:21
 * @description: 使用FlinkSQL實現從kafka讀取資料計算wordcount並將資料寫入ES
 **/
public class FlinkTableAPI_Test {
    public static void main(String[] args) throws Exception {
        //1.建立環境,測試不設定CK
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

        //2.獲取kafka端資料
        Properties prop = new Properties();
        prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop102:9092");
        prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"BD");
        DataStreamSource<String> sourceDS = env
                .addSource(new FlinkKafkaConsumer<String>("test", new SimpleStringSchema(), prop));

        //3.使用flatmap轉換資料到javabean,使用flatmap可以實現過濾
        SingleOutputStreamOperator<Tuple2<String, Integer>> flatMapDS = sourceDS
                .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
                String[] split = value.split(",");
                for (String s : split) {
                    out.collect(new Tuple2<>(s, 1));
                }}
        });

        //4.流資料轉為table
        Table table = tableEnv.fromDataStream(flatMapDS);
        Table table1 = table
                .groupBy($("f0"))
                .select($("f0").as("word"), $("f1").sum().as("num"));
        tableEnv.toRetractStream(table1, Row.class).print();

        //5.DDL方式建立臨時表,寫入datastream資料,為演示需要maxactions設定為1,預設是批量寫入
        tableEnv.executeSql("CREATE TABLE sensor (" +
                "  word STRING," +
                "  num BIGINT," +
                "  PRIMARY KEY (word) NOT ENFORCED" +
                ") WITH (" +
                "  'connector' = 'elasticsearch-7'," +
                "  'hosts' = 'http://localhost:9200'," +
                "  'index' = 'test'," +
                "  'sink.bulk-flush.max-actions' = '1')");
        //6.資料寫入
        table1.executeInsert("sensor");
        env.execute();
    }
}

學習交流,有任何問題還請隨時評論指出交流。

相關文章