Spark原始碼分析之Worker啟動通訊機制

happy19870612發表於2017-11-09

Worker是spark的工作節點,主要負責接受Master指令,啟動或者殺掉Executor,Driver等;彙報Driver或者Executor狀態到Master;傳送心跳請求到Master等等

一 重要屬性

RpcEnv rpcEnv: 用於註冊和維護RpcEndpoint和RpcEndpointRef

Int webUiPort: web ui 埠

Int cores: 分配給該worker的CPU核數

Int coresUsed: 該worker使用的CPU核數

Int coresFree = cores - coresUsed 還剩餘的CPU核數

Int memory: 分配給該worker的記憶體容量

Int memoryUsed:該worker使用的記憶體容量

Int memoryFree = memory - memoryUsed  還剩餘的記憶體容量

Array[RpcAddress] masterRpcAddresses: master RpcAddress陣列

String endpointName: worker的rpc終端名字

String workDirPath: 工作目錄

forwordMessageScheduler: 一個後臺排程執行緒,在指定的時間傳送訊息

cleanupThreadExecutor: 一個後臺清理工作目錄的執行緒

Option[RpcEndpointRef] master: master終端

String activeMasterUrl: 當前有效的master url

String activeMasterWebUiUrl: 當前有效的master web ui url

String workerWebUiUrl: worker的web ui url

String workerUri: worker的url

boolean registered: 該worker是否已經註冊

boolean connected: 該worker是否連線到master

String workerId: worker的id

HashMap[String, DriverRunner] drivers: worker維護的一個所有driver id -> DriverRunner的對映

HashMap[String, ExecutorRunner] executors:  worker維護的一個所有executor id -> ExecutorRunner的對映

LinkedHashMap[String, DriverRunner]finishedDrivers: worker維護的一個已經完成任務的driver id -> DriverRunner的對映

HashMap[String, Seq[String]] appDirectories: worker維護的一個application id -> app目錄的對映

HashSet[String] finishedApps: 該worker已經完成工作的application

HEARTBEAT_MILLIS: 向Master傳送心跳頻率

INITIAL_REGISTRATION_RETRIES: 向master註冊初始重試次數,預設是6次

TOTAL_REGISTRATION_RETRIES: 向master註冊總的嘗試次數

INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS: 初始化的註冊重試間隔

PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS:延長的註冊重試間隔

CLEANUP_ENABLED: 是否啟用cleanup功能

CLEANUP_INTERVAL_MILLIS: cleanup時間間隔

APP_DATA_RETENTION_SECONDS: app資料儲存時間長度

 

二 核心方法

2.1 main方法

def main(argStrings:Array[String]) {
  Utils.initDaemon(log)
  val conf = new SparkConf
 
// 解析啟動引數列表
 
val args= new WorkerArguments(argStrings,conf)
  // 啟動Rpc通訊環境和通訊終端
 
val rpcEnv= startRpcEnvAndEndpoint(args.host,args.port,args.webUiPort,args.cores,
    args.memory,args.masters,args.workDir,conf = conf)
  rpcEnv.awaitTermination()
}

2.2onstart 啟動worker

# 建立工作目錄

# 建立Web UI,並且繫結Web UI

# 向Master註冊

override def onStart() {
  assert(!registered)
  logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
    host, port, cores, Utils.megabytesToString(memory)))
  logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
  logInfo("Spark home: " + sparkHome)
  // 建立工作目錄
  createWorkDir()
  // 如果ExternalShuffleService 啟用了,就呼叫它的start方法
  shuffleService.startIfEnabled()
  // 建立 workerweb ui
  webUi = new WorkerWebUI(this, workDir, webUiPort)
  webUi.bind()

  workerWebUiUrl = s"http://$publicAddress:${webUi.boundPort}"
  // Master註冊
  registerWithMaster()

  metricsSystem.registerSource(workerSource)
  metricsSystem.start()
  // Attach the worker metrics servlet handler to the web ui after the metrics system is started.
  metricsSystem.getServletHandlers.foreach(webUi.attachHandler)
}

 

2.3createWorkDir 建立工作目錄

/**
 * 建立worker的存放一些資料的目錄
 * app-20170613113959-0000
 * app-20170613114457-0001
 * app-20170613114710-0002
 */
private def createWorkDir() {
  // 獲取工作目錄
  workDir = Option(workDirPath).map(new File(_)).getOrElse(new File(sparkHome, "work"))
  try {
    // 建立目錄
    workDir.mkdirs()
    // 如果目錄不存在或者不是目錄,則退出
    if ( !workDir.exists() || !workDir.isDirectory) {
      logError("Failed to create work directory " + workDir)
      System.exit(1)
    }
    assert (workDir.isDirectory)
  } catch {
    case e: Exception =>
      logError("Failed to create work directory " + workDir, e)
      System.exit(1)
  }
}

 

2.4 registerWithMaster():向master註冊

private def registerWithMaster() {
  registrationRetryTimer match {
    //如果沒有,說明還沒有註冊,然後會開始去註冊
    case None =>
      // 初始註冊狀態為false
      registered = false
      // 嘗試向所有master註冊
      registerMasterFutures = tryRegisterAllMasters()
      // 連線嘗試次數設為0
      connectionAttemptCount = 0
      // 後臺執行緒定時排程,傳送ReregisterWithMaster請求,如果之前已經註冊成功,則下一次來註冊,則啥也不做
      registrationRetryTimer = Some(forwordMessageScheduler.scheduleAtFixedRate(
        new Runnable {
          override def run(): Unit = Utils.tryLogNonFatalError {
            Option(self).foreach(_.send(ReregisterWithMaster))
          }
        },
        INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
        INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
        TimeUnit.SECONDS))
    // 如果已經有 registrationRetryTimer,就啥都不做
    case Some(_) =>

  }
}

 

2.5tryRegisterAllMasters 嘗試向所有的叢集內所有master註冊

private def tryRegisterAllMasters(): Array[JFuture[_]] = {
  masterRpcAddresses.map { masterAddress =>
    registerMasterThreadPool.submit(new Runnable {
      override def run(): Unit = {
        try {
          logInfo("Connecting to master " + masterAddress + "...")
          // 構造master RpcEndpoint,用於向master傳送訊息或者請求
          val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME)
          // 向指定的master註冊
          registerWithMaster(masterEndpoint)
        } catch {
          case ie: InterruptedException => // Cancelled
          case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
        }
      }
    })
  }
}

2.6registerWithMaster(masterEndpoint: RpcEndpointRef)

向master註冊

private def registerWithMaster(masterEndpoint: RpcEndpointRef): Unit = {
  // master傳送RegisterWorker請求
  masterEndpoint.ask[RegisterWorkerResponse](RegisterWorker(
    workerId, host, port, self, cores, memory, workerWebUiUrl))
    .onComplete {
      // 回撥成功,則呼叫handleRegisterResponse
      case Success(msg) =>
        Utils.tryLogNonFatalError {
          handleRegisterResponse(msg)
        }
      // 回撥失敗,則退出
      case Failure(e) =>
        logError(s"Cannot register with master: ${masterEndpoint.address}", e)
        System.exit(1)
    }(ThreadUtils.sameThread)
}

 

2.7 handleRegisterResponse 處理回撥函式的結果

private def handleRegisterResponse(msg: RegisterWorkerResponse): Unit = synchronized {
  msg match {
    // 如果是RegisteredWorker請求,表示已經註冊成功
    case RegisteredWorker(masterRef, masterWebUiUrl) =>
      logInfo("Successfully registered with master " + masterRef.address.toSparkURL)
      registered = true // 更新registered狀態
      changeMaster(masterRef, masterWebUiUrl)
      // 後臺執行緒開始定時排程向master傳送心跳的執行緒
      forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
        override def run(): Unit = Utils.tryLogNonFatalError {
          self.send(SendHeartbeat)
        }
      }, 0, HEARTBEAT_MILLIS, TimeUnit.MILLISECONDS)
      // 如果啟用了cleanup功能,後臺執行緒開始定時排程傳送WorkDirCleanup指令,清理目錄
      if (CLEANUP_ENABLED) {
        logInfo(
          s"Worker cleanup enabled; old application directories will be deleted in: $workDir")
        forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
          override def run(): Unit = Utils.tryLogNonFatalError {
            self.send(WorkDirCleanup)
          }
        }, CLEANUP_INTERVAL_MILLIS, CLEANUP_INTERVAL_MILLIS, TimeUnit.MILLISECONDS)
      }
      // 根據worker所持有的executor構造ExecutorDescription物件,描述該executor
      val execs = executors.values.map { e =>
        new ExecutorDescription(e.appId, e.execId, e.cores, e.state)
      }
      // master傳送WorkerLatestState請求,獲取worker最近狀態
      masterRef.send(WorkerLatestState(workerId, execs.toList, drivers.keys.toSeq))
    // 如果是RegisterWorkerFailed請求,表示註冊失敗
    case RegisterWorkerFailed(message) =>
      // 如果還沒有註冊成功,則退出
      if (!registered) {
        logError("Worker registration failed: " + message)
        System.exit(1)
      }
    // 如果是MasterInStandby請求,則啥也不做
    case MasterInStandby =>
      // Ignore. Master not yet ready.
  }
}

 

2.8receive 接受訊息,但是不需要返回結果

override def receive: PartialFunction[Any, Unit] = synchronized {
  // 如果接收的是SendHeartbeat訊息,表示需要向master傳送心跳請求
  case SendHeartbeat =>
    if (connected) { sendToMaster(Heartbeat(workerId, self)) }
  // 如果接收的是WorkDirCleanup訊息,表示需要清理工作目錄
  case WorkDirCleanup =>
    // 首先通過executors獲取它所對應的app id的集合
    val appIds = executors.values.map(_.appId).toSet
    // 獲取那些已經完畢的application目錄,並且遞迴刪除之,將處理結果封裝在Future物件裡
    val cleanupFuture = concurrent.Future {
      // 獲取該目錄下所有檔案
      val appDirs = workDir.listFiles()
      if (appDirs == null) {
        throw new IOException("ERROR: Failed to list files in " + appDirs)
      }
      //
      appDirs.filter { dir =>
        val appIdFromDir = dir.getName // 獲取目錄名字
        val isAppStillRunning = appIds.contains(appIdFromDir) // 判斷這個目錄所在的application是否正在執行
        // 如果是目錄,且不再包含任何新檔案,則遞迴刪除該目錄
        dir.isDirectory && !isAppStillRunning &&
        !Utils.doesDirectoryContainAnyNewFiles(dir, APP_DATA_RETENTION_SECONDS)
      }.foreach { dir =>
        logInfo(s"Removing directory: ${dir.getPath}")
        Utils.deleteRecursively(dir)
      }
    }(cleanupThreadExecutor)

    cleanupFuture.onFailure {
      case e: Throwable =>
        logError("App dir cleanup failed: " + e.getMessage, e)
    }(cleanupThreadExecutor)
  // 如果接收MasterChanged訊息,表示master已經發生變化了
  case MasterChanged(masterRef, masterWebUiUrl) =>
    logInfo("Master has changed, new master is at " + masterRef.address.toSparkURL)
    // 獲取新的masterurlmaster,連線狀態置為true,取消之前的嘗試重新註冊
    changeMaster(masterRef, masterWebUiUrl)
    // 建立當前節點executors的簡單描述物件ExecutorDescription
    val execs = executors.values.
      map(e => new ExecutorDescription(e.appId, e.execId, e.cores, e.state))
    // 向新的master傳送WorkerSchedulerStateResponse訊息,然後會做一些操作
    masterRef.send(WorkerSchedulerStateResponse(workerId, execs.toList, drivers.keys.toSeq))
  // 如果接收到ReconnectWorker訊息,表示之前worker斷開,需要重新連線
  case ReconnectWorker(masterUrl) =>
    logInfo(s"Master with url $masterUrl requested this worker to reconnect.")
    // 斷開之後,需要重新向master註冊
    registerWithMaster()
  // 如果接收到LaunchExecutor訊息,表示需要發起executor
  case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
    // 檢測master是否有效
    if (masterUrl != activeMasterUrl) {
      logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
    } else {
      try {
        logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))

        // 建立executor目錄,appId/execId
        val executorDir = new File(workDir, appId + "/" + execId)
        if (!executorDir.mkdirs()) {
          throw new IOException("Failed to create directory " + executorDir)
        }

        // 獲取application本地目錄,如果沒有則建立,最後這些目錄在應用程式執行完畢之後刪除掉
        val appLocalDirs = appDirectories.getOrElse(appId,
          Utils.getOrCreateLocalRootDirs(conf).map { dir =>
            val appDir = Utils.createDirectory(dir, namePrefix = "executor")
            Utils.chmod700(appDir)
            appDir.getAbsolutePath()
          }.toSeq)
        appDirectories(appId) = appLocalDirs
        // 建立ExecutorRunner物件,主要負責管理executor程式的執行
        val manager = new ExecutorRunner(
          appId,
          execId,
          appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
          cores_,
          memory_,
          self,
          workerId,
          host,
          webUi.boundPort,
          publicAddress,
          sparkHome,
          executorDir,
          workerUri,
          conf,
          appLocalDirs, ExecutorState.RUNNING)
        // worker維護的executor id->ExecutorRunner 對映新增這個新建的 ExecutorRunner
        executors(appId + "/" + execId) = manager
        // 啟動這個ExecutorRunner
        manager.start()
        // 重新計算已經使用的cpu核數和記憶體容量
        coresUsed += cores_
        memoryUsed += memory_
        // master傳送ExecutorStateChanged訊息
        sendToMaster(ExecutorStateChanged(appId, execId, manager.state, None, None))
      } catch {
        case e: Exception =>
          logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
          if (executors.contains(appId + "/" + execId)) {
            executors(appId + "/" + execId).kill()
            executors -= appId + "/" + execId
          }
          sendToMaster(ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
            Some(e.toString), None))
      }
    }
  // 如果接收ExecutorStateChanged訊息,表示executor狀態發生改變
  case executorStateChanged @ ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
    handleExecutorStateChanged(executorStateChanged)
  // 如果接收到KillExecutor訊息,表示需要殺掉這個executor程式
  case KillExecutor(masterUrl, appId, execId) =>
    if (masterUrl != activeMasterUrl) {
      logWarning("Invalid Master (" + masterUrl + ") attempted to kill executor " + execId)
    } else {
      val fullId = appId + "/" + execId
      executors.get(fullId) match {
        case Some(executor) =>
          logInfo("Asked to kill executor " + fullId)
          executor.kill()
        case None =>
          logInfo("Asked to kill unknown executor " + fullId)
      }
    }
  // 如果接收到LaunchDriver訊息,表示啟動Driver
  case LaunchDriver(driverId, driverDesc) =>
    logInfo(s"Asked to launch driver $driverId")
    // 建立DriverRunner,分配資源
    val driver = new DriverRunner(
      conf,
      driverId,
      workDir,
      sparkHome,
      driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
      self,
      workerUri,
      securityMgr)
    //加入到drivers
    drivers(driverId) = driver
    // 啟動這個driver
    driver.start()
    // 重新計算當前worker使用的記憶體和cpu
    coresUsed += driverDesc.cores
    memoryUsed += driverDesc.mem
  // 如果接收到KillDriver訊息,表示小殺掉這個driver
  case KillDriver(driverId) =>
    logInfo(s"Asked to kill driver $driverId")
    drivers.get(driverId) match {
      case Some(runner) =>
        runner.kill()
      case None =>
        logError(s"Asked to kill unknown driver $driverId")
    }
  // 如果接收到DriverStateChanged訊息,表示driver狀態改變
  case driverStateChanged @ DriverStateChanged(driverId, state, exception) =>
    handleDriverStateChanged(driverStateChanged)
  // 如果接收到ReregisterWithMaster訊息,表示需要重新向master註冊
  case ReregisterWithMaster =>
    reregisterWithMaster()
  // 如果接收到ApplicationFinished訊息,表示application已經執行完畢
  case ApplicationFinished(id) =>
    finishedApps += id
    // 這時候可能需要清理application目錄了
    maybeCleanupApplication(id)
}

 

2.9receiveAndReply 接收訊息嗎,返回結果

override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
  // 如果接收到RequestWorkerState訊息,則需要返回worker的狀態資訊
  case RequestWorkerState =>
    context.reply(WorkerStateResponse(host, port, workerId, executors.values.toList,
      finishedExecutors.values.toList, drivers.values.toList,
      finishedDrivers.values.toList, activeMasterUrl, cores, memory,
      coresUsed, memoryUsed, activeMasterWebUiUrl))
}

 

2.10 changeMaster

獲取新的master的url和master,取消之前那些重新註冊嘗試,因為已經發現新的master

private def changeMaster(masterRef: RpcEndpointRef, uiUrl: String) {
  // 獲取新的masterurlmaster
  activeMasterUrl = masterRef.address.toSparkURL
  activeMasterWebUiUrl = uiUrl
  master = Some(masterRef)
  connected = true // 連線狀態置為true
  if (conf.getBoolean("spark.ui.reverseProxy", false)) {
    logInfo(s"WorkerWebUI is available at $activeMasterWebUiUrl/proxy/$workerId")
  }
  // 取消之前那些重新註冊嘗試,因為已經發現新的master
  cancelLastRegistrationRetry()
}

 

2.11handleExecutorStateChanged 處理executor狀態改變

private[worker] def handleExecutorStateChanged(executorStateChanged: ExecutorStateChanged):
  Unit = {
  // 首先向master傳送ExecutorStateChanged訊息
  sendToMaster(executorStateChanged)
  // 獲取Executor狀態
  val state = executorStateChanged.state
  // 如果是完成狀態
  if (ExecutorState.isFinished(state)) {
    // 獲取該Executor對應的application id
    val appId = executorStateChanged.appId
    // appId/execId
    val fullId = appId + "/" + executorStateChanged.execId
    val message = executorStateChanged.message
    val exitStatus = executorStateChanged.exitStatus
    // worker的維護的executor idExecuteRunner中獲取ExecuteRunner
    executors.get(fullId) match {
      case Some(executor) =>
        logInfo("Executor " + fullId + " finished with state " + state +
          message.map(" message " + _).getOrElse("") +
          exitStatus.map(" exitStatus " + _).getOrElse(""))
        // 首先將這個ExecuteRunner移除executors對映集合
        executors -= fullId
        // 將它移動到處於完成狀態的對映集合finishedExecutors
        finishedExecutors(fullId) = executor
        // 如果需要,則刪除一些完成的executors
        trimFinishedExecutorsIfNecessary()
        // 釋放CPU和記憶體
        coresUsed -= executor.cores
        memoryUsed -= executor.memory
      case None =>
        logInfo("Unknown Executor " + fullId + " finished with state " + state +
          message.map(" message " + _).getOrElse("") +
          exitStatus.map(" exitStatus " + _).getOrElse(""))
    }
    // 這時候可能會清理application工作目錄
    maybeCleanupApplication(appId)
  }
}

 

2.12handleDriverStateChanged 處理driver狀態改變

private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
  // 獲取driver id
  val driverId = driverStateChanged.driverId
  val exception = driverStateChanged.exception
  // 獲取driver的狀態
  val state = driverStateChanged.state
  state match {
    case DriverState.ERROR =>
      logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
    case DriverState.FAILED =>
      logWarning(s"Driver $driverId exited with failure")
    case DriverState.FINISHED =>
      logInfo(s"Driver $driverId exited successfully")
    case DriverState.KILLED =>
      logInfo(s"Driver $driverId was killed by user")
    case _ =>
      logDebug(s"Driver $driverId changed state to $state")
  }
  // master傳送DriverStateChanged訊息
  sendToMaster(driverStateChanged)
  // drivers集合移除,並把它新增到處於完成狀態的集合finishedDrivers
  val driver = drivers.remove(driverId).get
  finishedDrivers(driverId) = driver
  // 如果需要,則刪除一些完成的executors
  trimFinishedDriversIfNecessary()
  // 釋放CPU和記憶體
  memoryUsed -= driver.driverDesc.mem
  coresUsed -= driver.driverDesc.cores
}

 

2.13reregisterWithMaster 重新註冊

有時候早遇到網路異常或者master失敗,則需要重新向master註冊,如果註冊超過指定的次數,則worker退出

private def reregisterWithMaster(): Unit = {
  Utils.tryOrExit {
    // 初始化嘗試連線次數加1
    connectionAttemptCount += 1
    // 如果之前已經註冊成功的,則取消最近的重新嘗試註冊
    if (registered) {
      cancelLastRegistrationRetry()
    }
    // 如果嘗試連線次數小於總的註冊嘗試次數,則進行重新註冊,否則退出
    else if (connectionAttemptCount <= TOTAL_REGISTRATION_RETRIES) {
      logInfo(s"Retrying connection to master (attempt # $connectionAttemptCount)")
      // 向有效的master重新註冊,如果沒有,這就意味著worker仍然處於引導狀態,還沒有和master建立連線
      // 在此種情況下,我們應該向所有的master重新註冊
      master match {
        // 如果master存在,但是registered又是false,表示我們失去了master的連線,所以我們需要重新建立
        // Master RpcEndpoint
        case Some(masterRef) =>
          if (registerMasterFutures != null) {
            registerMasterFutures.foreach(_.cancel(true))
          }
          val masterAddress = masterRef.address
          registerMasterFutures = Array(registerMasterThreadPool.submit(new Runnable {
            override def run(): Unit = {
              try {
                logInfo("Connecting to master " + masterAddress + "...")
                // 重新建立masterEndpoint
                val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME)
                // 然後再給新的master傳送註冊訊息去註冊
                registerWithMaster(masterEndpoint)
              } catch {
                case ie: InterruptedException => // Cancelled
                case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
              }
            }
          }))
        // 如果沒有則向所有master註冊,否則容易出現重複的worker錯誤
        case None =>
          if (registerMasterFutures != null) {
            registerMasterFutures.foreach(_.cancel(true))
          }
          registerMasterFutures = tryRegisterAllMasters()
      }
      // 如果重新註冊次數超過初始的閥值,那麼就會使用一個更大間隔的閥值
      if (connectionAttemptCount == INITIAL_REGISTRATION_RETRIES) {
        registrationRetryTimer.foreach(_.cancel(true))
        registrationRetryTimer = Some(
          forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
            override def run(): Unit = Utils.tryLogNonFatalError {
              self.send(ReregisterWithMaster)
            }
          }, PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS,
            PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS,
            TimeUnit.SECONDS))
      }
    } else {
      logError("All masters are unresponsive! Giving up.")
      System.exit(1)
    }
  }
}

 

相關文章