這一節講解master 選舉以及之後的處理流程
上一節說到在Master啟動過程中,首先呼叫了 Akka actor的preStart方法。
-
override def preStart() {
-
logInfo("Starting Spark master at " + masterUrl)
-
logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
-
-
context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
-
webUi.bind()
-
masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort
-
context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis, self, CheckForWorkerTimeOut)
-
-
masterMetricsSystem.registerSource(masterSource)
-
masterMetricsSystem.start()
-
applicationMetricsSystem.start()
-
-
-
masterMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
-
applicationMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
-
-
-
-
-
val (persistenceEngine_, leaderElectionAgent_) = RECOVERY_MODE match {
-
case "ZOOKEEPER" =>
-
logInfo("Persisting recovery state to ZooKeeper")
-
val zkFactory =
-
new ZooKeeperRecoveryModeFactory(conf, SerializationExtension(context.system))
-
(zkFactory.createPersistenceEngine(), zkFactory.createLeaderElectionAgent(this))
-
case "FILESYSTEM" =>
-
val fsFactory =
-
new FileSystemRecoveryModeFactory(conf, SerializationExtension(context.system))
-
(fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
-
case "CUSTOM" =>
-
val clazz = Class.forName(conf.get("spark.deploy.recoveryMode.factory"))
-
val factory = clazz.getConstructor(classOf[SparkConf], classOf[Serialization])
-
.newInstance(conf, SerializationExtension(context.system))
-
.asInstanceOf[StandaloneRecoveryModeFactory]
-
(factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this))
-
case _ =>
-
(new BlackHolePersistenceEngine(), new MonarchyLeaderAgent(this))
-
}
-
persistenceEngine = persistenceEngine_
-
leaderElectionAgent = leaderElectionAgent_
-
}
上面的persistenceEngine_封裝了在zk中讀寫後設資料資訊,以及序列化反序列化的介面
leaderElectionAgent_封裝了master的選舉過程,見下面程式碼註釋中的解釋
- private[master] class ZooKeeperLeaderElectionAgent(val masterActor: LeaderElectable,
- conf: SparkConf) extends LeaderLatchListener with LeaderElectionAgent with Logging {
-
-
- val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/leader_election"
-
- private var zk: CuratorFramework = _
- private var leaderLatch: LeaderLatch = _
- private var status = LeadershipStatus.NOT_LEADER
-
-
- start()
-
- private def start() {
- logInfo("Starting ZooKeeper LeaderElection agent")
- zk = SparkCuratorUtil.newClient(conf)
- leaderLatch = new LeaderLatch(zk, WORKING_DIR)
- leaderLatch.addListener(this)
- leaderLatch.start()
- }
-
- override def stop() {
- leaderLatch.close()
- zk.close()
- }
-
-
- override def isLeader() {
- synchronized {
-
- if (!leaderLatch.hasLeadership) {
- return
- }
-
- logInfo("We have gained leadership")
- updateLeadershipStatus(true)
- }
- }
-
-
- override def notLeader() {
- synchronized {
-
- if (leaderLatch.hasLeadership) {
- return
- }
-
- logInfo("We have lost leadership")
- updateLeadershipStatus(false)
- }
- }
- private def updateLeadershipStatus(isLeader: Boolean) {
-
- if (isLeader && status == LeadershipStatus.NOT_LEADER) {
- status = LeadershipStatus.LEADER
- masterActor.electedLeader()
-
- } else if (!isLeader && status == LeadershipStatus.LEADER) {
- status = LeadershipStatus.NOT_LEADER
- masterActor.revokedLeadership()
- }
- }
-
- private object LeadershipStatus extends Enumeration {
- type LeadershipStatus = Value
- val LEADER, NOT_LEADER = Value
- }
- }
繼續檢視master中的邏輯
- override def receiveWithLogging: PartialFunction[Any, Unit] = {
- case ElectedLeader => {
-
- val (storedApps, storedDrivers, storedWorkers) = persistenceEngine.readPersistedData()
- state = if (storedApps.isEmpty && storedDrivers.isEmpty && storedWorkers.isEmpty) {
- RecoveryState.ALIVE
- } else {
- RecoveryState.RECOVERING
- }
- logInfo("I have been elected leader! New state: " + state)
- if (state == RecoveryState.RECOVERING) {
- beginRecovery(storedApps, storedDrivers, storedWorkers)
- recoveryCompletionTask = context.system.scheduler.scheduleOnce(WORKER_TIMEOUT millis, self,
- CompleteRecovery)
- }
- }
-
- case CompleteRecovery => completeRecovery()
-
-
- case RevokedLeadership => {
- logError("Leadership has been revoked -- master shutting down.")
- System.exit(0)
- }
開始恢復
- private def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
- storedWorkers: Seq[WorkerInfo]) {
- for (app <- storedApps) {
- logInfo("Trying to recover app: " + app.id)
- try {
- registerApplication(app)
- app.state = ApplicationState.UNKNOWN
- app.driver ! MasterChanged(masterUrl, masterWebUiUrl)
- } catch {
- case e: Exception => logInfo("App " + app.id + " had exception on reconnect")
- }
- }
-
- for (driver <- storedDrivers) {
-
-
- drivers += driver
- }
-
- for (worker <- storedWorkers) {
- logInfo("Trying to recover worker: " + worker.id)
- try {
- registerWorker(worker)
- worker.state = WorkerState.UNKNOWN
- worker.actor ! MasterChanged(masterUrl, masterWebUiUrl)
- } catch {
- case e: Exception => logInfo("Worker " + worker.id + " had exception on reconnect")
- }
- }
- }
看driver端收到MasterChanged訊息會發生什麼?在AppClient.scala中
只有主master會傳送MasterChanged訊息,所以這裡的masterUrl肯定是新的主master的
- case MasterChanged(masterUrl, masterWebUiUrl) =>
- logInfo("Master has changed, new master is at " + masterUrl)
-
- changeMaster(masterUrl)
- alreadyDisconnected = false
- sender ! MasterChangeAcknowledged(appId)
master這時會收到所有app中driver發來的訊息,我們看master收到MasterChangeAcknowledged訊息的處理方式,引數為appId- case MasterChangeAcknowledged(appId) => {
- idToApp.get(appId) match {
- case Some(app) =>
- logInfo("Application has been re-registered: " + appId)
- app.state = ApplicationState.WAITING
- case None =>
- logWarning("Master change ack from unknown app: " + appId)
- }
-
- if (canCompleteRecovery) { completeRecovery() }
- }
看worker端收到MasterChanged訊息會發生什麼?在Worker.scala中
- case MasterChanged(masterUrl, masterWebUiUrl) =>
- logInfo("Master has changed, new master is at " + masterUrl)
- changeMaster(masterUrl, masterWebUiUrl)
-
-
- val execs = executors.values.
- map(e => new ExecutorDescription(e.appId, e.execId, e.cores, e.state))
- sender ! WorkerSchedulerStateResponse(workerId, execs.toList, drivers.keys.toSeq)
繼續看master中的處理邏輯
- case WorkerSchedulerStateResponse(workerId, executors, driverIds) => {
- idToWorker.get(workerId) match {
- case Some(worker) =>
- logInfo("Worker has been re-registered: " + workerId)
- worker.state = WorkerState.ALIVE
-
-
- val validExecutors = executors.filter(exec => idToApp.get(exec.appId).isDefined)
- for (exec <- validExecutors) {
- val app = idToApp.get(exec.appId).get
- val execInfo = app.addExecutor(worker, exec.cores, Some(exec.execId))
- worker.addExecutor(execInfo)
- execInfo.copyState(exec)
- }
-
-
- for (driverId <- driverIds) {
- drivers.find(_.id == driverId).foreach { driver =>
- driver.worker = Some(worker)
- driver.state = DriverState.RUNNING
- worker.drivers(driverId) = driver
- }
- }
- case None =>
- logWarning("Scheduler state from unknown worker: " + workerId)
- }
-
- if (canCompleteRecovery) { completeRecovery() }
- }
這一切都處理完畢之後,看master的completeRecovery,這個是在beginRecovery呼叫之後,在延遲worker_timeout時間之後呼叫,一般情況下,上面的訊息來回傳送處理應該都已經結束了
- private def completeRecovery() {
-
- synchronized {
- if (state != RecoveryState.RECOVERING) { return }
- state = RecoveryState.COMPLETING_RECOVERY
- }
-
-
-
- workers.filter(_.state == WorkerState.UNKNOWN).foreach(removeWorker)
- apps.filter(_.state == ApplicationState.UNKNOWN).foreach(finishApplication)
-
-
-
- drivers.filter(_.worker.isEmpty).foreach { d =>
- logWarning(s"Driver ${d.id} was not found after master recovery")
- if (d.desc.supervise) {
- logWarning(s"Re-launching ${d.id}")
- relaunchDriver(d)
- } else {
- removeDriver(d.id, DriverState.ERROR, None)
- logWarning(s"Did not re-launch ${d.id} because it was not supervised")
- }
- }
-
- state = RecoveryState.ALIVE
- schedule()
- logInfo("Recovery complete - resuming operations!")
- }
轉載:http://blog.csdn.net/yueqian_zhu/article/details/47954705