一、背景說明
Flink的API做了4層的封裝,上兩層TableAPI、SQL語法相對簡單便於編寫,面對小需求可以快速上手解決,本文參考官網及部分線上教程編寫source端、sink端程式碼,分別讀取socket、kafka及文字作為source,並將流資料輸出寫入Kafka、ES及MySQL,方便後續檢視使用。
二、程式碼部分
說明:這裡使用connect及DDL兩種寫法,connect滿足Flink1.10及以前版本使用,目前官方文件均是以DDL寫法作為介紹,建議1.10以後的版本使用DDL寫法操作,通用性更強。
1.讀取(Source)端寫法
1.1 基礎環境建立,方便演示並行度為1且不設定CK
//建立Stream環境,設定並行度為1
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setParallelism(1);
//建立Table環境
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
1.2 讀取Socket埠資料,並使用TableAPI及SQL兩種方式查詢
//讀取伺服器9999埠資料,並轉換為對應JavaBean
SingleOutputStreamOperator<WaterSensor> mapDS = env.socketTextStream("hadoop102", 9999)
.map(value -> {
String[] split = value.split(",");
return new WaterSensor(split[0]
, Long.parseLong(split[1])
, Integer.parseInt(split[2]));});
//建立表:將流轉換成動態表。
Table table = tableEnv.fromDataStream(mapDS);
//對動態表進行查詢,TableAPI方式
Table selectResult = table.where($("id").isEqual("ws_001")).select($("id"), $("ts"), $("vc"));
//對動態表映象查詢,SQL方式-未登錄檔
Table selectResult = tableEnv.sqlQuery("select * from " + table);
1.3 讀取文字(FileSystem)資料,並使用TableAPI進行查詢
//Flink1.10寫法使用connect方式,讀取txt檔案並建立臨時表
tableEnv.connect(new FileSystem().path("input/sensor.txt"))
.withFormat(new Csv().fieldDelimiter(',').lineDelimiter("\n"))
.withSchema(new Schema().field("id", DataTypes.STRING())
.field("ts", DataTypes.BIGINT())
.field("vc",DataTypes.INT()))
.createTemporaryTable("sensor");
//轉換成表物件,對錶進行查詢。SQL寫法參考Socket段寫法
Table table = tableEnv.from("sensor");
Table selectResult = table.groupBy($("id")).aggregate($("id").count().as("id_count"))select($("id"), $("id_count"));
1.4 消費Kafka資料,並使用TableAPI進行查詢,分別用conncet及DDL寫法
//Flink1.10寫法使用connect方式,消費kafka對應主題並建立臨時表
tableEnv.connect(new Kafka().version("universal")
.topic("sensor")
.startFromLatest()
.property(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop102:9092")
.property(ConsumerConfig.GROUP_ID_CONFIG,"BD"))//消費者組
.withSchema(new Schema().field("id", DataTypes.STRING())
.field("ts", DataTypes.BIGINT())
.field("vc",DataTypes.INT()))
.withFormat(new Csv())
.createTemporaryTable("sensor");
//Flink1.10以後使用DDL寫法
tableEnv.executeSql("CREATE TABLE sensor (" +
" `id` STRING," +
" `ts` BIGINT," +
" `vc` INT" +
") WITH (" +
" 'connector' = 'kafka'," +
" 'topic' = 'sensor'," +
" 'properties.bootstrap.servers' = 'hadoop102:9092'," +
" 'properties.group.id' = 'BD'," +
" 'scan.startup.mode' = 'latest-offset'," +
" 'format' = 'csv'" +
")");
//轉換成表物件,對錶進行查詢。SQL寫法參考Socket段寫法
Table table = tableEnv.from("sensor");
Table selectResult = table.groupBy($("id")).aggregate($("id").count().as("id_count"))
.select($("id"), $("id_count"));
2.寫入(Sink)端部分寫法
2.1 寫入文字檔案
//建立表:建立輸出表,connect寫法
tableEnv.connect(new FileSystem().path("out/sensor.txt"))
.withFormat(new Csv())
.withSchema(new Schema().field("id", DataTypes.STRING())
.field("ts", DataTypes.BIGINT())
.field("vc",DataTypes.INT()))
.createTemporaryTable("sensor");
//將資料寫入到輸出表中即實現sink寫入,selectResult則是上面source側查詢出來的結果表
selectResult.executeInsert("sensor");
2.2 寫入Kafka
//connect寫法
tableEnv.connect(new Kafka().version("universal")
.topic("sensor")
.sinkPartitionerRoundRobin() //輪詢寫入
.property(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop102:9092"))
.withSchema(new Schema().field("id", DataTypes.STRING())
.field("ts", DataTypes.BIGINT())
.field("vc",DataTypes.INT()))
.withFormat(new Json())
.createTemporaryTable("sensor");
//DDL寫法
tableEnv.executeSql("CREATE TABLE sensor (" +
" `id` STRING," +
" `ts` BIGINT," +
" `vc` INT" +
") WITH (" +
" 'connector' = 'kafka'," +
" 'topic' = 'sensor'," +
" 'properties.bootstrap.servers' = 'hadoop102:9092'," +
" 'format' = 'json'" +
")");
//將資料寫入到輸出表中即實現sink寫入,selectResult則是上面source側查詢出來的結果表
selectResult.executeInsert("sensor");
2.3 寫入MySQL(JDBC方式,這裡手動匯入了mysql-connector-java-5.1.9.jar)
//DDL
tableEnv.executeSql("CREATE TABLE sink_sensor (" +
" id STRING," +
" ts BIGINT," +
" vc INT," +
" PRIMARY KEY (id) NOT ENFORCED" +
") WITH (" +
" 'connector' = 'jdbc'," +
" 'url' = 'jdbc:mysql://hadoop102:3306/test?useSSL=false'," +
" 'table-name' = 'sink_test'," +
" 'username' = 'root'," +
" 'password' = '123456'" +
")");
//將資料寫入到輸出表中即實現sink寫入,selectResult則是上面source側查詢出來的結果表
selectResult.executeInsert("sensor");
2.4 寫入ES
//connect寫法
tableEnv.connect(new Elasticsearch()
.index("sensor")
.documentType("_doc")
.version("7")
.host("localhost",9200,"http")
//設定為1,每行資料都寫入是方便客戶端輸出展示,生產勿使用
.bulkFlushMaxActions(1))
.withSchema(new Schema()
.field("id", DataTypes.STRING())
.field("ts", DataTypes.BIGINT())
.field("vc",DataTypes.INT()))
.withFormat(new Json())
.inAppendMode()
.createTemporaryTable("sensor");
//DDL寫法
tableEnv.executeSql("CREATE TABLE sensor (" +
" id STRING," +
" ts BIGINT," +
" vc INT," +
" PRIMARY KEY (id) NOT ENFORCED" +
") WITH (" +
" 'connector' = 'elasticsearch-7'," +
" 'hosts' = 'http://localhost:9200'," +
" 'index' = 'users'," +
" 'sink.bulk-flush.max-actions' = '1')";)
//將資料寫入到輸出表中即實現sink寫入,selectResult則是上面source側查詢出來的結果表
selectResult.executeInsert("sensor");
三、補充說明
依賴部分pom.xml
<properties>
<java.version>1.8</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<flink.version>1.12.0</flink.version>
<scala.version>2.12</scala.version>
<hadoop.version>3.1.3</hadoop.version>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>7.8.0</version>
</dependency>
<!-- elasticsearch 的客戶端 -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.8.0</version>
</dependency>
<!-- elasticsearch 依賴 2.x 的 log4j -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.9.9</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch7_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.16</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
</project>
學習交流,有任何問題還請隨時評論指出交流。