【雲星資料---Apache Flink實戰系列(精品版)】:Apache Flink高階特性與高階應用015-Flink中廣播變數和分散式快取001

雲星資料-大資料團隊發表於2017-11-19

1.flink中的廣播變數

flink支援將變數廣播到worker上,以供程式運算使用。

執行程式

package code.book.batch.sinksource.scala
import java.util
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment, _}
import org.apache.flink.configuration.Configuration

object BroadcastVariables001 {
  def main(args: Array[String]): Unit = {
    val env = ExecutionEnvironment.getExecutionEnvironment
    //1.準備工人資料(用於map)
    case class Worker(name: String, salaryPerMonth: Double)
    val workers: DataSet[Worker] = env.fromElements(
      Worker("zhagnsan", 1356.67),
      Worker("lisi", 1476.67)
    )
    //2準備統計資料(用於廣播,通過withBroadcastSet進行廣播)
    case class Count(name: String, month: Int)
    val counts: DataSet[Count] = env.fromElements(
      Count("zhagnsan", 4),
      Count("lisi", 5)
    )

    //3.使用map資料和廣播資料進行計算
    workers.map(new RichMapFunction[Worker, Worker] {
      private var cwork: util.List[Count] = null

      override def open(parameters: Configuration): Unit = {
        super.open(parameters)
        // 3.1 訪問廣播資料
        cwork = getRuntimeContext.getBroadcastVariable[Count]("countWorkInfo")
      }

      override def map(w: Worker): Worker = {
        //3.2解析廣播資料
        var i = 0
        while (i < cwork.size()) {
          val c = cwork.get(i)
          i += 1
          if (c.name.equalsIgnoreCase(w.name)) {
            //有相應的資訊的返回值
            return Worker(w.name, w.salaryPerMonth * c.month)
          }
        }
        //無相應的資訊的返回值
        Worker("###", 0)
      }
    }).withBroadcastSet(counts, "countWorkInfo").print()
  }
}

執行效果

Worker(zhagnsan,5426.68)
Worker(lisi,7383.35)

相關文章