Apache SeaTunnel除了單表之間的資料同步之外,也支援單表同步到多表,多表同步到單表,以及多表同步到多表,下面簡單舉例說明如何實現這些功能。
單表 to 單表
一個source,一個sink。
從mysql同步到mysql,中間不做區分
env {
# You can set flink configuration here
execution.parallelism = 2
job.mode = "BATCH"
}
source{
Jdbc {
url = "jdbc:mysql://127.0.0.1:3306/test"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 100
user = "user"
password = "password"
query = "select * from base_region"
}
}
transform {
# If you would like to get more information about how to configure seatunnel and see full list of transform plugins,
# please go to https://seatunnel.apache.org/docs/transform/sql
}
sink {
jdbc {
url = "jdbc:mysql://127.0.0.1:3306/dw"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 100
user = "user"
password = "password"
query = "insert into base_region(id,region_name) values(?,?)"
}
}
執行任務
./bin/seatunnel.sh --config ./config/mysql2mysql_batch.conf
單表 to 多表
一個source,多個sink。
從MySQL同步到MySQL,將一個使用者表資料同步過去,中間透過2個sql元件分佈將男性使用者和女性使用者分開,在sink階段分別插入到不同的表:
env {
execution.parallelism = 2
job.mode = "BATCH"
}
source {
Jdbc {
url = "jdbc:mysql://127.0.0.1:3306/test"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 100
user = "user"
password = "password"
result_table_name="t_user"
query = "select * from t_user;"
}
}
transform {
Sql {
source_table_name = "t_user"
result_table_name = "t_user_nan"
query = "select id,name,birth,gender from t_user where gender ='男';"
}
Sql {
source_table_name = "t_user"
result_table_name = "t_user_nv"
query = "select id,name,birth,gender from t_user where gender ='女';"
}
}
sink {
jdbc {
url = "jdbc:mysql://127.0.0.1:3306/dw"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 100
user = "user"
password = "password"
source_table_name = "t_user_nan"
query = "insert into t_user_nan(id,name,birth,gender) values(?,?,?,?)"
}
jdbc {
url = "jdbc:mysql://127.0.0.1:3306/dw"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 100
user = "user"
password = "password"
source_table_name = "t_user_nv"
query = "insert into t_user_nv(id,name,birth,gender) values(?,?,?,?)"
}
}
./bin/seatunnel.sh --config ./config/mysql2mysql_1n.conf
多表 to 單表
多個source,一個sink。
假如有一張交換器使用情況表,一張路由器使用情況表,目標表是將這種資料合在一起的olap表。
表結構如下:
-- dw 源表1
CREATE TABLE IF NOT EXISTS ads_device_switch_performance (
`event_time` timestamp COMMENT '業務時間',
`device_id` VARCHAR(32) COMMENT '裝置id',
`device_type` VARCHAR(32) COMMENT '裝置型別',
`device_name` VARCHAR(128) COMMENT '裝置名稱',
`cpu_usage` INT COMMENT 'CPU使用率百分比'
) ;
INSERT INTO `ads_device_switch_performance` VALUES ('2024-01-15 14:25:11', '2001', '2', '交換器1', 49);
INSERT INTO `ads_device_switch_performance` VALUES ('2024-01-17 22:25:40', '2002', '1', '交換器2', 65);
-- dw 源表2
CREATE TABLE IF NOT EXISTS ads_device_router_performance (
`event_time` timestamp COMMENT '業務時間',
`device_id` VARCHAR(32) COMMENT '裝置id',
`device_type` VARCHAR(32) COMMENT '裝置型別',
`device_name` VARCHAR(128) COMMENT '裝置名稱',
`cpu_usage` INT COMMENT 'CPU使用率百分比'
);
INSERT INTO `ads_device_router_performance` VALUES ('2024-01-17 21:23:22', '1001', '1', '路由器1', 35);
INSERT INTO `ads_device_router_performance` VALUES ('2024-01-16 17:23:53', '1002', '2', '路由器2', 46);
-------------------------------------------------------------------------------
-- olap 目標表
CREATE TABLE `device_performance` (
`id` INT NOT NULL AUTO_INCREMENT COMMENT '表主鍵',
`event_time` VARCHAR(32) NOT NULL COMMENT '業務時間',
`device_id` VARCHAR(32) COMMENT '裝置id',
`device_type` VARCHAR(32) COMMENT '裝置型別',
`device_name` VARCHAR(128) NOT NULL COMMENT '裝置名稱',
`cpu_usage` FLOAT NOT NULL COMMENT 'CPU利用率單位是%',
`create_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '建立時間',
`update_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '更新時間',
PRIMARY KEY (`id`)
) COMMENT='裝置狀態';
將交換器資料和路由器資料一起同步到olap目標表,總結透過sql元件處理:
env {
job.mode="BATCH"
job.name="device_performance"
}
source {
Jdbc {
url="jdbc:mysql://127.0.0.1:3306/dw?allowMultiQueries=true&characterEncoding=utf-8"
driver="com.mysql.cj.jdbc.Driver"
user = "user"
password = "password"
result_table_name="switch_src"
query="SELECT `event_time`, `device_id`, `device_type`, `device_name`, `cpu_usage` FROM ads_device_switch_performance;"
}
Jdbc {
url="jdbc:mysql://127.0.0.1:3306/dw?allowMultiQueries=true&characterEncoding=utf-8"
driver="com.mysql.cj.jdbc.Driver"
user = "user"
password = "password"
result_table_name="router_src"
query="SELECT `event_time`, `device_id`, `device_type`, `device_name`, `cpu_usage` FROM ads_device_router_performance;"
}
}
transform {
Sql {
source_table_name = "switch_src"
result_table_name = "switch_dst"
query = "SELECT event_time , device_id, device_type, device_name, cpu_usage, NOW() AS create_time, NOW() AS update_time FROM switch_src;"
}
Sql {
source_table_name = "router_src"
result_table_name = "router_dst"
query = "SELECT event_time, device_id, device_type, device_name, cpu_usage, NOW() AS create_time, NOW() AS update_time FROM router_src;"
}
}
sink {
Jdbc {
url="jdbc:mysql://127.0.0.1:3306/olap?allowMultiQueries=true&characterEncoding=utf-8"
driver="com.mysql.cj.jdbc.Driver"
user = "user"
password = "password"
source_table_name = "switch_dst"
query="INSERT INTO device_performance VALUES(null,?, ?, ?, ?, ?, ?, ?) ;"
}
Jdbc {
url="jdbc:mysql://127.0.0.1:3306/olap?allowMultiQueries=true&characterEncoding=utf-8"
driver="com.mysql.cj.jdbc.Driver"
user = "user"
password = "password"
source_table_name = "router_dst"
query="INSERT INTO device_performance VALUES(null,?, ?, ?, ?, ?, ?, ?) ;"
}
}
執行任務:
./bin/seatunnel.sh --config ./syn_job/mysql2mysql_n1_batch.conf
作業成功!
多表 to 多表
多個source,多個sink。
將交換器使用情況資料和路由器使用情況資料分別同步到對應的目標表,中間sql元件處理
env {
job.mode="BATCH"
job.name="device_performance"
}
source {
Jdbc {
url="jdbc:mysql://127.0.0.1:3306/dw?allowMultiQueries=true&characterEncoding=utf-8"
driver="com.mysql.cj.jdbc.Driver"
user = "user"
password = "password"
result_table_name="switch_src"
query="SELECT `event_time`, `device_id`, `device_type`, `device_name`, `cpu_usage` FROM ads_device_switch_performance;"
}
Jdbc {
url="jdbc:mysql://127.0.0.1:3306/dw?allowMultiQueries=true&characterEncoding=utf-8"
driver="com.mysql.cj.jdbc.Driver"
user = "user"
password = "password"
result_table_name="router_src"
query="SELECT `event_time`, `device_id`, `device_type`, `device_name`, `cpu_usage` FROM ads_device_router_performance;"
}
}
transform {
Sql {
source_table_name = "switch_src"
result_table_name = "switch_dst"
query = "SELECT event_time , device_id, device_type, device_name, cpu_usage, NOW() AS create_time, NOW() AS update_time FROM switch_src;"
}
Sql {
source_table_name = "router_src"
result_table_name = "router_dst"
query = "SELECT event_time, device_id, device_type, device_name, cpu_usage, NOW() AS create_time, NOW() AS update_time FROM router_src;"
}
}
sink {
Jdbc {
url="jdbc:mysql://127.0.0.1:3306/olap?allowMultiQueries=true&characterEncoding=utf-8"
driver="com.mysql.cj.jdbc.Driver"
user = "user"
password = "password"
source_table_name = "switch_dst"
query="INSERT INTO device_performance_switch VALUES(null,?, ?, ?, ?, ?, ?, ?) ;"
}
Jdbc {
url="jdbc:mysql://127.0.0.1:3306/olap?allowMultiQueries=true&characterEncoding=utf-8"
driver="com.mysql.cj.jdbc.Driver"
user = "user"
password = "password"
source_table_name = "router_dst"
query="INSERT INTO device_performance_router VALUES(null,?, ?, ?, ?, ?, ?, ?) ;"
}
}
結語
綜上所述,Apache SeaTunnel多表同步技術具有高效、實時、可靠和靈活的特點,在企業的資料同步領域發揮著重要作用。藉助Apache SeaTunnel多表同步功能,企業能夠更好地實現不同系統和資料庫之間資料的無縫流轉,提升資料管理和利用的效率,為業務發展提供有力支援。希望本文能夠幫助讀者更好地瞭解和應用Apache SeaTunnel多表同步,從而為企業資料同步帶來更多可能性。
原文連結:https://blog.csdn.net/weixin_44586883/article/details/136049897
本文由 白鯨開源 提供釋出支援!