一、流程架構圖
二、開源框架及本版選擇
本次專案中用到的相關服務有:hadoop、zookeeper、kafka、maxwell、hbase、phoenix、flink
三、服務部署完成後,開發Flink主程式
3.1 結構圖如下:
3.2 程式碼詳細內容
3.2.1 pom檔案
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> <artifactId>gamll-realtime-2024</artifactId> <groupId>org.example</groupId> <version>1.0-SNAPSHOT</version> </parent> <modelVersion>4.0.0</modelVersion> <groupId>com.dianyanyuan</groupId> <artifactId>gmall-realtime</artifactId> <properties> <java.version>1.8</java.version> <maven.compiler.source>${java.version}</maven.compiler.source> <maven.compiler.target>${java.version}</maven.compiler.target> <flink.version>1.13.0</flink.version> <scala.version>2.12</scala.version> <hadoop.version>3.1.3</hadoop.version> </properties> <dependencies> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_${scala.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka_${scala.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_${scala.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-json</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.68</version> </dependency> <!--如果儲存檢查點到hdfs上,需要引入此依賴--> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <version>1.18.20</version> </dependency> <!--Flink預設使用的是slf4j記錄日誌,相當於一個日誌的介面,我們這裡使用log4j作為具體的日誌實現--> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>1.7.25</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.25</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-to-slf4j</artifactId> <version>2.14.0</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-jdbc_${scala.version}</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>com.ververica</groupId> <artifactId>flink-connector-mysql-cdc</artifactId> <version>2.1.0</version> </dependency> <dependency> <groupId>org.apache.phoenix</groupId> <artifactId>phoenix-spark</artifactId> <version>5.0.0-HBase-2.0</version> <exclusions> <exclusion> <groupId>org.glassfish</groupId> <artifactId>javax.el</artifactId> </exclusion> </exclusions> </dependency> <!-- 如果不引入 flink-table 相關依賴,則會報錯: Caused by: java.lang.ClassNotFoundException: org.apache.flink.connector.base.source.reader.RecordEmitter 引入以下依賴可以解決這個問題(引入某些其它的 flink-table相關依賴也可) --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-api-java-bridge_2.12</artifactId> <version>1.13.0</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>druid</artifactId> <version>1.1.16</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>3.1.1</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <artifactSet> <excludes> <exclude>com.google.code.findbugs:jsr305</exclude> <exclude>org.slf4j:*</exclude> <exclude>log4j:*</exclude> <exclude>org.apache.hadoop:*</exclude> </excludes> </artifactSet> <filters> <filter> <!-- Do not copy the signatures in the META-INF folder.Otherwise, this might cause SecurityExceptions when using the JAR. --> <!-- 打包時不復制META-INF下的簽名檔案,避免報非法簽名檔案的SecurityExceptions異常--> <artifact>*:*</artifact> <excludes> <exclude>META-INF/*.SF</exclude> <exclude>META-INF/*.DSA</exclude> <exclude>META-INF/*.RSA</exclude> </excludes> </filter> </filters> <transformers combine.children="append"> <!-- The service transformer is needed to merge META-INF/services files --> <!-- connector和format依賴的工廠類打包時會相互覆蓋,需要使用ServicesResourceTransformer解決--> <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> </transformers> </configuration> </execution> </executions> </plugin> </plugins> </build> </project>
3.2.2 log4.properties檔案
log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.target=System.out log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %10p (%c:%M) - %m%n log4j.rootLogger=error,stdout
3.2.3 hbase配置檔案 (這個可以直接複製伺服器上hbase服務conf資料夾中的hbase-site.xml檔案)
<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- /** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ --> <configuration> <property> <name>hbase.rootdir</name> <value>hdfs://hadoop101:8020/hbase</value> </property> <property> <name>hbase.cluster.distributed</name> <value>true</value> </property> <property> <name>hbase.zookeeper.quorum</name> <value>hadoop101,hadoop102,hadoop103</value> </property> <property> <name>phoenix.schema.isNamespaceMappingEnabled</name> <value>true</value> </property> <property> <name>phoenix.schema.mapSystemTablesToNamespace</name> <value>true</value> </property> </configuration>
3.2.4 工具類-druid
package com.dianyan.utils; import com.alibaba.druid.pool.DruidDataSource; import com.dianyan.common.GmallConfig; public class DruidDSUtil { private static DruidDataSource druidDataSource = null; public static DruidDataSource createDataSource() { // 建立連線池 druidDataSource = new DruidDataSource(); // 設定驅動全類名 druidDataSource.setDriverClassName(GmallConfig.PHOENIX_DRIVER); // 設定連線 url druidDataSource.setUrl(GmallConfig.PHOENIX_SERVER); // 設定初始化連線池時池中連線的數量 druidDataSource.setInitialSize(5); // 設定同時活躍的最大連線數 druidDataSource.setMaxActive(20); // 設定空閒時的最小連線數,必須介於 0 和最大連線數之間,預設為 0 druidDataSource.setMinIdle(1); // 設定沒有空餘連線時的等待時間,超時丟擲異常,-1 表示一直等待 druidDataSource.setMaxWait(-1); // 驗證連線是否可用使用的 SQL 語句 druidDataSource.setValidationQuery("select 1"); // 指明連線是否被空閒連線回收器(如果有)進行檢驗,如果檢測失敗,則連線將被從池中去除 // 注意,預設值為 true,如果沒有設定 validationQuery,則報錯 // testWhileIdle is true, validationQuery not set druidDataSource.setTestWhileIdle(true); // 借出連線時,是否測試,設定為 false,不測試,否則很影響效能 druidDataSource.setTestOnBorrow(false); // 歸還連線時,是否測試 druidDataSource.setTestOnReturn(false); // 設定空閒連線回收器每隔 30s 執行一次 druidDataSource.setTimeBetweenEvictionRunsMillis(30 * 1000L); // 設定池中連線空閒 30min 被回收,預設值即為 30 min druidDataSource.setMinEvictableIdleTimeMillis(30 * 60 * 1000L); return druidDataSource; } }
3.2.5 工具類-kafka
package com.dianyan.utils; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.typeinfo.BasicTypeInfo; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.consumer.ConsumerRecord; import java.util.Properties; public class MyKafkaUtil { private static final String KAFKA_SERVER = "hadoop101:9092"; public static FlinkKafkaConsumer<String> getFlinkKafkaConsumer(String topic,String groupId){ Properties properties = new Properties(); properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG,groupId); properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,KAFKA_SERVER); return new FlinkKafkaConsumer<String>( topic, new KafkaDeserializationSchema<String>() { @Override public boolean isEndOfStream(String s) { return false; } @Override public String deserialize(ConsumerRecord<byte[], byte[]> consumerRecord) throws Exception { if(consumerRecord == null || consumerRecord.value() == null){ return null; }else { return new String(consumerRecord.value()); } } @Override public TypeInformation<String> getProducedType() { return BasicTypeInfo.STRING_TYPE_INFO; } }, properties ); } }
3.2.6 工具類-Phoenix
package com.dianyan.utils; import com.alibaba.druid.pool.DruidPooledConnection; import com.alibaba.fastjson.JSONObject; import com.dianyan.common.GmallConfig; import org.apache.commons.lang3.StringUtils; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.Collection; import java.util.Set; public class PhoenixUtil { /** * * @param connection Phoenix連線 * @param sinkTable 表名 * @param data 資料 */ public static void upsertValues(DruidPooledConnection connection, String sinkTable, JSONObject data) throws SQLException { //1.拼接SQL語句 upsert into db.table(id,name,sex) values("1001","張三","male") Set<String> columns = data.keySet(); Collection<Object> values = data.values(); String sql = "upsert into " + GmallConfig.HBASE_SCHEMA + "." + sinkTable + "(" + StringUtils.join(columns,",") + ") values ('" + StringUtils.join(values,"','") + "')"; //2.預編譯sql PreparedStatement preparedStatement = connection.prepareStatement(sql); //3.執行 preparedStatement.execute(); connection.commit(); //4.釋放資源 preparedStatement.close(); } }
3.2.7 資料庫驅動
package com.dianyan.common; public class GmallConfig { // Phoenix庫名 public static final String HBASE_SCHEMA = "GMALL_REALTIME"; // Phoenix驅動 public static final String PHOENIX_DRIVER = "org.apache.phoenix.jdbc.PhoenixDriver"; // Phoenix連線引數 public static final String PHOENIX_SERVER = "jdbc:phoenix:hadoop101,hadoop102,hadoop103:2181"; }
3.2.8 table配置
package com.dianyan.bean; import lombok.Data; @Data public class TableProcess { //來源表 String sourceTable; //輸出表 String sinkTable; //輸出欄位 String sinkColumns; //主鍵欄位 String sinkPk; //建表擴充套件 String sinkExtend; }
3.2.9 簡單過濾邏輯
package com.dianyan.app.func; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.dianyan.bean.TableProcess; import com.dianyan.common.GmallConfig; import org.apache.flink.api.common.state.BroadcastState; import org.apache.flink.api.common.state.MapStateDescriptor; import org.apache.flink.api.common.state.ReadOnlyBroadcastState; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction; import org.apache.flink.util.Collector; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.*; public class TableProcessFunction extends BroadcastProcessFunction<JSONObject, String, JSONObject> { private Connection connection; private MapStateDescriptor<String, TableProcess> mapStateDescriptor; // 構造器 public TableProcessFunction(MapStateDescriptor<String, TableProcess> mapStateDescriptor) { this.mapStateDescriptor = mapStateDescriptor; } @Override public void open(Configuration parameters) throws Exception { connection = DriverManager.getConnection(GmallConfig.PHOENIX_SERVER); } //{"before":null,"after":{"source_table":"3213","sink_table":"22","sink_columns":"33","sink_pk":"44","sink_extend":"55"},"source":{"version":"1.5.4.Final","connector":"mysql","name": // "mysql_binlog_source","ts_ms":1710926254168,"snapshot":"false","db":"gmall-config","sequence":null,"table":"table_process","server_id":0,"gtid":null,"file":"","pos":0,"row":0, // "thread":null,"query":null},"op":"r","ts_ms":1710926254171,"transaction":null} @Override public void processBroadcastElement(String value, Context context, Collector<JSONObject> out) throws Exception { // 1.獲取並解析資料 JSONObject jsonObject = JSON.parseObject(value); TableProcess tableProcess = JSON.parseObject(jsonObject.getString("after"), TableProcess.class); // 2.校驗並建表 checkTable(tableProcess.getSinkTable(), tableProcess.getSinkColumns(), tableProcess.getSinkPk(), tableProcess.getSinkExtend()); // 3.寫入狀態,廣播出去 BroadcastState<String, TableProcess> broadcastState = context.getBroadcastState(mapStateDescriptor); broadcastState.put(tableProcess.getSourceTable(),tableProcess); } /** * 校驗並建表 : create table if not exists db.table(id varchar primary key ,bb varchar ,cc varchar) xxx * @param sinkTable phoenix表名 * @param sinkColumns phoenix表欄位 * @param sinkPk phoenix表主鍵 * @param sinkExtend phoenix表擴充套件欄位 */ private void checkTable(String sinkTable,String sinkColumns,String sinkPk,String sinkExtend){ PreparedStatement preparedStatement = null; try { // 處理特殊欄位,比如欄位值為null的情況 if(sinkPk == null || "".equals(sinkPk)){ sinkPk = "id"; } if(sinkExtend == null){ sinkExtend = ""; } //1.拼接SQL create table if not exists db.table(id varchar primary key ,bb varchar ,cc varchar) xxx StringBuilder createTableSql = new StringBuilder("create table if not exists ") .append(GmallConfig.HBASE_SCHEMA) .append(".") .append(sinkTable) .append("("); String[] columns = sinkColumns.split(","); for (int i = 0; i < columns.length; i++) { // 取出欄位 String column = columns[i]; // 是否為主鍵 if(sinkPk.equals(column)){ createTableSql.append(column).append(" varchar primary key"); }else{ createTableSql.append(column).append(" varchar"); } // 判斷是否為最後一個欄位 if(i < columns.length-1){ createTableSql.append(","); } } createTableSql.append(")").append(sinkExtend); //2.編譯SQL System.out.println("建表語句為>>>>>" + createTableSql); // 預編譯 preparedStatement = connection.prepareStatement(createTableSql.toString()); //3.執行SQL,建表 preparedStatement.execute(); } catch (SQLException e) { throw new RuntimeException("建表失敗:" + sinkTable ); // 把編譯時異常轉換為執行時異常 } finally { //4.釋放資源 if(preparedStatement != null){ try { preparedStatement.close(); } catch (SQLException e) { e.printStackTrace(); } } } } // {"database":"gmall","table":"base_trademark","type":"bootstrap-insert","ts":1710921861,"data":{"id":2,"tm_name":"蘋果","logo_url":"/static/default.jpg"}} @Override public void processElement(JSONObject value, ReadOnlyContext readOnlyContext, Collector<JSONObject> collector) throws Exception { //1.獲取廣播的配置資料 ReadOnlyBroadcastState<String, TableProcess> broadcastState = readOnlyContext.getBroadcastState(mapStateDescriptor); String table = value.getString("table"); TableProcess tableProcess = broadcastState.get(table); if(tableProcess != null){ //2.過濾欄位 filterColumn(value.getJSONObject("data"),tableProcess.getSinkColumns()); //3.補充SinkTable並寫出到流中 value.put("sinkTable",tableProcess.getSinkTable()); collector.collect(value); }else{ System.out.println("找不到對應的Key:" + table); } } /** * 過濾欄位 * @param data {"id":2,"tm_name":"蘋果","logo_url":"/static/default.jpg"} * @param sinkColumns "id","tm_name" */ private void filterColumn(JSONObject data, String sinkColumns) { // 切分 String[] columns = sinkColumns.split(","); List<String> columnList = Arrays.asList(columns); Set<Map.Entry<String, Object>> entries = data.entrySet(); // 遍歷 Iterator<Map.Entry<String, Object>> iterator = entries.iterator(); // 迭代器 while(iterator.hasNext()){ Map.Entry<String, Object> next = iterator.next(); if(!columnList.contains(next.getKey())){ iterator.remove(); } } } }
3.2.10 sink端
package com.dianyan.app.func; import com.alibaba.druid.pool.DruidDataSource; import com.alibaba.druid.pool.DruidPooledConnection; import com.alibaba.fastjson.JSONObject; import com.dianyan.utils.DruidDSUtil; import com.dianyan.utils.PhoenixUtil; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; public class DimSinkFunction extends RichSinkFunction<JSONObject> { private DruidDataSource druidDataSource = null; @Override public void open(Configuration parameters) throws Exception { druidDataSource = DruidDSUtil.createDataSource(); } // {"database":"gmall","table":"base_trademark","type":"bootstrap-insert","ts":1710921861,"data":{"id":2,"tm_name":"蘋果","logo_url":"/static/default.jpg"},"sinkTable":"dim_xxx"} @Override public void invoke(JSONObject value, Context context) throws Exception { // 獲取連線 DruidPooledConnection connection = druidDataSource.getConnection(); // 寫出資料 String sinkTable = value.getString("sinkTable"); // 表名 JSONObject data = value.getJSONObject("data"); PhoenixUtil.upsertValues(connection,sinkTable,data); // 歸還連線 connection.close(); } }
3.2.11 主程式
package com.dianyan.app.dim; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.dianyan.app.func.DimSinkFunction; import com.dianyan.app.func.TableProcessFunction; import com.dianyan.bean.TableProcess; import com.dianyan.utils.MyKafkaUtil; import com.ververica.cdc.connectors.mysql.source.MySqlSource; import com.ververica.cdc.connectors.mysql.table.StartupOptions; import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema; import org.apache.flink.api.common.eventtime.WatermarkStrategy; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.restartstrategy.RestartStrategies; import org.apache.flink.api.common.state.MapStateDescriptor; import org.apache.flink.runtime.state.hashmap.HashMapStateBackend; import org.apache.flink.streaming.api.CheckpointingMode; import org.apache.flink.streaming.api.datastream.BroadcastConnectedStream; import org.apache.flink.streaming.api.datastream.BroadcastStream; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction; import org.apache.flink.util.Collector; public class DimApp { public static void main(String[] args) throws Exception { //TODO 1.獲取執行環境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 並行度 生產環境根據Kafka的分割槽數 // 1.1 開啟checkpoint // env.enableCheckpointing(5 * 6000L, CheckpointingMode.EXACTLY_ONCE); // env.getCheckpointConfig().setCheckpointTimeout(10 * 6000L); // env.getCheckpointConfig().setMaxConcurrentCheckpoints(2); //checkpoint的並行度 // env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,5000L)); // 如果Flink服務意外掛了,此處配置每隔5秒重新連線一次,一共嘗試3次。 // 1.2 設定狀態後端 // env.setStateBackend(new HashMapStateBackend()); // env.getCheckpointConfig().setCheckpointStorage("hdfs://hadoop101:8020/dianyan"); // System.setProperty("HADOOP_USER_NAME","linxueze100"); //TODO 2.讀取kafka topic=maxwell 主題資料建立主流 String topic = "maxwell"; String groupid = "dim_app_001"; DataStreamSource<String> kafkaDS = env.addSource(MyKafkaUtil.getFlinkKafkaConsumer(topic, groupid)); //TODO 3.過濾掉非JSON資料以及保留新增、變化以及初始化資料 並將資料轉換為JSON格式 SingleOutputStreamOperator<JSONObject> filterJsonObjDS = kafkaDS.flatMap(new FlatMapFunction<String, JSONObject>() { @Override public void flatMap(String value, Collector<JSONObject> collector) throws Exception { try { // 將資料轉換為JSON格式 JSONObject jsonObject = JSON.parseObject(value); // 獲取資料中的操作欄位 String type = jsonObject.getString("type"); // 保留新增、變化和初始化的資料 if ("insert".equals(type) || "update".equals(type) || "bootstrap-insert".equals(type)) { collector.collect(jsonObject); } } catch (Exception e) { System.out.println("發現髒資料:》》》》》》" + value); } } }); //TODO 4.使用FlinkCDC讀取Mysql配置資訊表,建立配置流 MySqlSource<String> mySqlSource = MySqlSource.<String>builder() .hostname("hadoop101") .port(3306) .username("root") .password("123456") .databaseList("gmall-config") .tableList("gmall-config.table_process") .startupOptions(StartupOptions.initial()) // 啟動方式 .deserializer(new JsonDebeziumDeserializationSchema()) // 反序列化 binlog二進位制 .build(); DataStreamSource<String> mySqlSourceDS = env.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "MysqlSource"); //TODO 5.將配置流處理為廣播流 MapStateDescriptor<String, TableProcess> mapStateDescriptor = new MapStateDescriptor<>("map-state", String.class, TableProcess.class); BroadcastStream<String> broadcastStream = mySqlSourceDS.broadcast(mapStateDescriptor); //TODO 6.連線主流和配置流 BroadcastConnectedStream<JSONObject, String> connectedStream = filterJsonObjDS.connect(broadcastStream); //TODO 7.處理連線流 根據配置資訊 處理主流資料 SingleOutputStreamOperator<JSONObject> dimDS = connectedStream.process(new TableProcessFunction(mapStateDescriptor)); //TODO 8.將資料寫入Phoenix dimDS.addSink(new DimSinkFunction()); dimDS.print(">>>>>>>>>>>>>>>"); //TODO 9.啟動任務 env.execute("DimApp"); } }
四、實現效果
透過maxwell實時監控並抽取mysql的binlog檔案,對資料的insert、update做實時採集並寫入kafka對應topic;透過Flink程式消費kafka指定topic中的資料,簡單清洗資料並寫入hbase中。過程中zk做協同,phoenix做select等便捷查詢。
五、寫在最後
此篇文章,重在記錄調研實時數倉的碎片記憶。很多細節的地方,沒有寫出來,也是因為時間有限,比如主程式中Flink消費kafka的topic的名稱,要和maxwell採集過來寫入kafka的topic保持一致,還有maxwell監控mysql的binlog的配置表的一些問題。