Flink-電商使用者行為分析(實時對賬)

Knight_AL發表於2020-11-03

原文網址 : https://blog.csdn.net/qq_46548855/article/details/109466791

連結：https://pan.baidu.com/s/1_DJmEPtNxsCiDnw8KNwmoA 
提取碼：exq9

對於訂單支付事件，使用者支付完成其實並不算完，我們還得確認平臺賬戶上是否到賬了。而往往這會來自不同的日誌資訊，所以我們要同時讀入兩條流的資料來做合併處理。這裡我們利用connect將兩條流進行連線，然後用collect進行處理或者使用join。
接下來我將使用兩種方法(1.collect，2.使用join操作)
collect程式碼實現

import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.co.CoProcessFunction
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector


object OrderPayTxMatch {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    //從檔案中讀取資料，並轉換成樣例類
    val resource1 = getClass.getResource("/OrderLog.csv")
    val orderEventStream = env.readTextFile(resource1.getPath)
      .map(data=>{
        val dataArray = data.split(",")
        OrderEvent(dataArray(0).toLong,dataArray(1),dataArray(2),dataArray(3).toLong)
      })
      .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[OrderEvent](Time.seconds(3)) {
        override def extractTimestamp(element: OrderEvent): Long = element.eventTime * 1000L
      })
      .filter(_.txId != "")
      .keyBy(_.txId)

    val resource2 = getClass.getResource("/ReceiptLog.csv")
    val receiptStream = env.readTextFile(resource2.getPath)
      .map(data=>{
        val dataArray = data.split(",")
        ReceiptEvent(dataArray(0),dataArray(1),dataArray(2).toLong)
      })
      .assignAscendingTimestamps(_.timestamp * 1000L)
      .keyBy(_.txId)

    //connect連線兩條流，匹配事件進行處理
    val resultStream = orderEventStream.connect(receiptStream)
      .process(new OrderPayTxDetect())

    //定義側輸出流
    val unmatchedPays = new OutputTag[OrderEvent]("unmatched-pays")
    val unmatchedReceipts = new OutputTag[ReceiptEvent]("unmatched-receipts")

    resultStream.print()
    resultStream.getSideOutput(unmatchedPays).print("unmatched-pays")
    resultStream.getSideOutput(unmatchedReceipts).print("unmatched-receipts")
    env.execute("order pay tx match job")
  }

}
//定義CoProcessFunction，實現兩條流資料的匹配檢測
class OrderPayTxDetect() extends CoProcessFunction[OrderEvent,ReceiptEvent,(OrderEvent,ReceiptEvent)]{
  //定義兩個ValueState，儲存當前交易對應的支付事件和到賬事件
  lazy val payState:ValueState[OrderEvent] = getRuntimeContext.getState(new ValueStateDescriptor[OrderEvent]("pay",
    classOf[OrderEvent]))
  lazy val receiptState:ValueState[ReceiptEvent] = getRuntimeContext.getState(new ValueStateDescriptor[ReceiptEvent]
  ("receipt",classOf[ReceiptEvent]))
  val unmatchedPays = new OutputTag[OrderEvent]("unmatched-pays")
  val unmatchedReceipts = new OutputTag[ReceiptEvent]("unmatched-receipts")
  override def processElement1(pay: OrderEvent, ctx: CoProcessFunction[OrderEvent, ReceiptEvent, (OrderEvent,
    ReceiptEvent)]
    #Context, out: Collector[(OrderEvent, ReceiptEvent)]): Unit = {
    //pay來了，考察有沒有對應的receipt來過
    val receipt = receiptState.value()
    if (receipt != null){
      //如果已經又receipt，正常輸出到主流
      out.collect((pay,receipt))
    }else{
      //如果receipt還沒來，那麼把pay存入莊濤，註冊一個定時器等待5秒
      payState.update(pay)
      ctx.timerService().registerEventTimeTimer(pay.eventTime * 1000L + 5000L)
    }
  }

  override def processElement2(receipt: ReceiptEvent, ctx: CoProcessFunction[OrderEvent, ReceiptEvent, (OrderEvent,
    ReceiptEvent)]
    #Context, out: Collector[(OrderEvent, ReceiptEvent)]): Unit = {
    //receipt來了，考察有沒有對應的pay來過
    val pay = payState.value()
    if (pay != null){
      //如果已經有pay，那麼正常匹配，輸出到主流
      out.collect((pay,receipt))
    }else{
      //如果pay還沒來，那麼把receipt存入狀態，註冊一個定時器等待3秒
      receiptState.update(receipt)
      ctx.timerService().registerEventTimeTimer(receipt.timestamp * 1000L + 3000L)
    }
  }

  override def onTimer(timestamp: Long, ctx: CoProcessFunction[OrderEvent, ReceiptEvent, (OrderEvent, ReceiptEvent)]
    #OnTimerContext, out: Collector[(OrderEvent, ReceiptEvent)]): Unit = {
    //如果pay不為空，說明receipt沒來，輸出unmatchedPays
    if(payState.value() != null)
      ctx.output(unmatchedPays,payState.value())
    if (receiptState.value() != null)
      ctx.output(unmatchedReceipts,receiptState.value())
    //情況狀態
    payState.clear()
    receiptState.clear()
  }
}

在這裡插入圖片描述
join程式碼實現

import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector

case class OrderEvent(orderId:Long,eventType:String,txId:String,eventTime:Long)
case class ReceiptEvent(txId:String,payChannel:String,timestamp:Long)

object OrderPayTxMatchWithJoin {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    //從檔案中讀取資料，並轉換成樣例類
    val resource1 = getClass.getResource("/OrderLog.csv")
    val orderEventStream = env.readTextFile(resource1.getPath)
      .map(data=>{
        val dataArray = data.split(",")
        OrderEvent(dataArray(0).toLong,dataArray(1),dataArray(2),dataArray(3).toLong)
      })
      .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[OrderEvent](Time.seconds(3)) {
        override def extractTimestamp(element: OrderEvent): Long = element.eventTime * 1000L
      })
      .filter(_.txId != "")
      .keyBy(_.txId)

    val resource2 = getClass.getResource("/ReceiptLog.csv")
    val receiptEventStream = env.readTextFile(resource2.getPath)
      .map(data=>{
        val dataArray = data.split(",")
        ReceiptEvent(dataArray(0),dataArray(1),dataArray(2).toLong)
      })
      .assignAscendingTimestamps(_.timestamp * 1000L)
      .keyBy(_.txId)

    //使用join連線兩條流
    val resultStream = orderEventStream
      .intervalJoin(receiptEventStream)
      .between(Time.seconds(-3), Time.seconds(5))
      .process(new OrderPayTxDetectWithJoin())

    resultStream.print()
    env.execute("order pay tx match with join job")
  }

}
//自定義ProcessJoinFunction
class OrderPayTxDetectWithJoin() extends ProcessJoinFunction[OrderEvent,ReceiptEvent,(OrderEvent,ReceiptEvent)]{
  override def processElement(left: OrderEvent, right: ReceiptEvent, ctx: ProcessJoinFunction[OrderEvent, ReceiptEvent, (OrderEvent, ReceiptEvent)]
    #Context, out: Collector[(OrderEvent, ReceiptEvent)]): Unit = {
    out.collect((left,right))
  }
}

在這裡插入圖片描述

總結

雖然join很方便，簡單，但是有侷限性，只能匹配對應上的，不能輸出沒有匹配上的！

Flink SQL結合Kafka、Elasticsearch、Kibana實時分析電商使用者行為
2021-01-16
SQLKafkaElasticsearch
基於flink的電商使用者行為資料分析【3】| 實時流量統計
2020-11-27
基於flink的電商使用者行為資料分析【2】| 實時熱門商品統計
2020-11-24
使用者行為分析模型實踐（四）—— 留存分析模型
2024-04-19
模型
python實現淘寶使用者行為分析
2020-11-15
Python
使用者行為分析模型實踐（一）—— 路徑分析模型
2021-03-15
模型
基於flink的電商使用者行為資料分析【4】| 惡意登入監控
2020-11-28
【資料分析】針對家庭用電資料進行時序分析（1）
2023-09-26
豐網踏踏實實為電商使用者做好快遞服務，贏得電商認可與肯定
2023-04-03
反欺詐（羊毛盾）API 實現使用者行為分析的思路分析
2023-04-13
API
[Flink-原始碼分析]Blink SQL 回撤解密
2021-12-26
原始碼SQL解密
Spark綜合使用及使用者行為案例訪問session統計分析實戰-Spark商業應用實戰
2018-12-24
SparkSession
DT時代，如何成為一名合格的電商資料分析師？
2022-04-12
使用者行為分析，指定操作順序
2020-09-24
2014年天貓使用者行為分析
2020-10-24
如何做好使用者行為分析
2021-12-21
淺談使用者行為分析之“留存”
2022-12-29
YouGov：YouTube和TikTok使用者行為分析
2021-11-19
Go
B站基於Flink的海量使用者行為實時ETL實踐
2023-04-06
2019年澳大利亞電子商務消費者行為及市場分析
2019-05-29
電商企業在實施ERP專案時的應對策略有哪些？
2022-01-25
使用Amazon Pinpoint對使用者行為追蹤
2021-04-04
電商API介面的實踐與案例分析
2024-01-20
API
如今電商時代，傳統電商該如何破局？ Smartbi教你幾個分析方法！
2021-12-15
後電商時代，傳統電商如何破局？這幾個分析方法很重要
2021-11-16
恆訊科技分析：如何避免亞馬遜電商賬戶被封？租用跨境vps
2022-07-20
亞馬遜
B站基於ClickHouse的海量使用者行為分析應用實踐
2023-01-12
機器學習增強的電子商務平臺使用者行為預測
2019-04-08
機器學習
Spark綜合使用及使用者行為案例區域內熱門商品統計分析實戰-Spark商業應用實戰
2018-12-25
Spark
網站使用者行為分析——Linux的安裝
2018-07-05
網站Linux
【大資料之網站使用者行為分析】
2018-03-04
大資料網站
拼多多如何做好使用者行為分析？
2020-12-08
黑馬PM-電商專案-電商使用者端
2024-11-01
電商 SaaS 全渠道實時資料中臺最佳實踐
2023-03-09
電商RPA助力電商運營做好資料分析
2022-08-25
對 Steam 的同理心：使用者的購物行為
2020-05-13
日誌服務之分析使用者訪問行為
2022-04-27
實時計算Flink-獨享模式-Batch（試用）-建立源表——建立CSV源表
2018-11-14
模式BAT

Flink-電商使用者行為分析(實時對賬)

總結

相關文章