Flink的join操作樣例

優優我心發表於2019-03-28

join必須依賴視窗及watermark操作

public class Main3 {
    public static final long DELAY = 3000l;

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //watermark 自動生成時間,預設每100ms一次
        env.getConfig().setAutoWatermarkInterval(200);

        DataStream<Tuple3<String, String, Long>> src1 = env.addSource(FlinkSourceManager.getServSource()).flatMap(new FlatMapFunction<String, Tuple3<String, String, Long>>() {
            @Override
            public void flatMap(String value, Collector<Tuple3<String, String, Long>> out) throws Exception {
                String str[] = value.split(",");
                if (str.length > 2)
                    out.collect(Tuple3.of(str[0], str[1], Long.parseLong(str[2])));
            }
        }).name("src1").assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple3<String, String, Long>>() {
            long currentTimeStamp;

            //每生成一個watermark即呼叫一次獲取當前watermark時間
            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(currentTimeStamp);
            }

            //每來一條資料都會呼叫此函式,更新watermark時間,視窗計算
            @Override
            public long extractTimestamp(Tuple3<String, String, Long> stringStringLongTuple3, long l) {
                long eventTime = stringStringLongTuple3.f2;
                currentTimeStamp = Math.max(eventTime, currentTimeStamp) - DELAY;
                return eventTime;
            }
        });
        DataStream<Tuple3<String, String, Long>> src2 = env.addSource(FlinkSourceManager.getRankSource()).flatMap(new FlatMapFunction<String, Tuple3<String, String, Long>>() {
            @Override
            public void flatMap(String value, Collector<Tuple3<String, String, Long>> out) throws Exception {
                String str[] = value.split(",");
                if (str.length > 2)
                    out.collect(Tuple3.of(str[0], str[1], Long.parseLong(str[2])));
            }
        }).name("src2").assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple3<String, String, Long>>() {
            long currentTimeStamp;

            //每生成一個watermark即呼叫一次獲取當前watermark時間
            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(currentTimeStamp);
            }

            //每來一條資料都會呼叫此函式,更新watermark時間,視窗計算
            @Override
            public long extractTimestamp(Tuple3<String, String, Long> stringStringLongTuple3, long l) {
                long eventTime = stringStringLongTuple3.f2;
                currentTimeStamp = Math.max(eventTime, currentTimeStamp) - DELAY;
                return eventTime;
            }
        });
        src1.join(src2)
                .where(new KeySelector<Tuple3<String, String, Long>, Object>() {
                    @Override
                    public Object getKey(Tuple3<String, String, Long> value) throws Exception {
                        return value.f0;
                    }
                })
                .equalTo(new KeySelector<Tuple3<String, String, Long>, Object>() {
                    @Override
                    public Object getKey(Tuple3<String, String, Long> value) throws Exception {
                        return value.f0;
                    }
                })
                .window(TumblingEventTimeWindows.of(Time.seconds(10)))
                .apply(new FlatJoinFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, String>() {
                    @Override
                    public void join(Tuple3<String, String, Long> first, Tuple3<String, String, Long> second, Collector<String> out) throws Exception {
                        System.out.println(first.f0 + "|" + first.f1 + "|" + first.f2 + "|" + second.f1 + "|" + second.f2);
                        out.collect(first.f0 + "|" + first.f1 + "|" + first.f2 + "|" + second.f1 + "|" + second.f2);
                    }
                }).print();

        env.execute();

    }
}

注:只有watermark時間達到視窗結束時間才會觸發視窗計算,否則會一直等待資料,或者實現視窗的觸發器來觸發視窗計算

相關文章