Spark on Yarn 任務提交流程原始碼分析
流程分析,以作備忘:
shell呼叫org.apache.spark.deploy.SparkSubmit > org.apache.spark.deploy.yarn.Client
開啟org.apache.spark.deploy.yarn.Client的main方法:
-
def main(argStrings: Array[String]) {
-
if (!sys.props.contains("SPARK_SUBMIT")) {
-
logWarning("WARNING: This client is deprecated and will be removed in a " +
-
"future version of Spark. Use ./bin/spark-submit with \"--master yarn\"")
-
}
-
-
// Set an env variable indicating we are running in YARN mode.
-
// Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes
-
System.setProperty("SPARK_YARN_MODE", "true")
-
val sparkConf = new SparkConf
-
-
val args = new ClientArguments(argStrings, sparkConf)
-
// to maintain backwards-compatibility
-
if (!Utils.isDynamicAllocationEnabled(sparkConf)) {
-
sparkConf.setIfMissing("spark.executor.instances", args.numExecutors.toString)
-
}
-
new Client(args, sparkConf).run()
- }
-
def run(): Unit = {
-
val appId = submitApplication()
- //判斷是否等待完成,根據提交模式判斷(client、cluster兩種模式)
-
// private val fireAndForget = isClusterMode && !sparkConf.getBoolean("spark.yarn.submit.waitAppCompletion", true)
-
if (fireAndForget) {
- val report = getApplicationReport(appId) //ApplicationReport是應用程式的報告(包括程式使用者、程式佇列、程式名稱等等)
-
val state = report.getYarnApplicationState //得到應用程式的完成狀態
-
logInfo(s"Application report for $appId (state: $state)")
-
logInfo(formatReportDetails(report))
-
if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) { //判斷狀態
-
throw new SparkException(s"Application $appId finished with status: $state")
-
}
-
} else {
-
val (yarnApplicationState, finalApplicationStatus) = monitorApplication(appId) //client模式提交的處理 (博文後邊細說此方法) 涉及兩個物件YarnApplicationState(對於yarn來說任務的狀態)、FinalApplicationStatus(對於任務來說任務的執行狀態)
-
if (yarnApplicationState == YarnApplicationState.FAILED ||
-
finalApplicationStatus == FinalApplicationStatus.FAILED) {
-
throw new SparkException(s"Application $appId finished with failed status")
-
}
-
if (yarnApplicationState == YarnApplicationState.KILLED ||
-
finalApplicationStatus == FinalApplicationStatus.KILLED) {
-
throw new SparkException(s"Application $appId is killed")
-
}
-
if (finalApplicationStatus == FinalApplicationStatus.UNDEFINED) {
-
throw new SparkException(s"The final status of application $appId is undefined")
-
}
-
}
- }
-
def submitApplication(): ApplicationId = {
-
var appId: ApplicationId = null
-
try {
-
// Setup the credentials before doing anything else,
-
// so we have don't have issues at any point.
-
setupCredentials()
-
yarnClient.init(yarnConf)
-
yarnClient.start()
-
-
logInfo("Requesting a new application from cluster with %d NodeManagers"
-
.format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
-
-
// Get a new application from our RM
- val newApp = yarnClient.createApplication()
-
val newAppResponse = newApp.getNewApplicationResponse()
-
appId = newAppResponse.getApplicationId()
-
-
// Verify whether the cluster has enough resources for our AM
-
verifyClusterResources(newAppResponse) //記憶體判斷 (博文後邊細講此方法)
-
-
// Set up the appropriate contexts to launch our AM
-
val containerContext = createContainerLaunchContext(newAppResponse) // 構建ApplicationMaster的container(包括jar包路徑 userClas等)
-
val appContext = createApplicationSubmissionContext(newApp, containerContext)
-
-
// Finally, submit and monitor the application
-
logInfo(s"Submitting application ${appId.getId} to ResourceManager")
-
yarnClient.submitApplication(appContext)
-
appId
-
} catch {
-
case e: Throwable =>
-
if (appId != null) {
-
cleanupStagingDir(appId)
-
}
-
throw e
-
}
- }
- import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication}
-
private val yarnClient = YarnClient.createYarnClient
-
@InterfaceAudience.Public
-
@InterfaceStability.Stable
-
public abstract class YarnClient extends AbstractService
-
{
-
@InterfaceAudience.Public
-
public static YarnClient createYarnClient()
-
{
-
YarnClient client = new YarnClientImpl();
-
return client;
-
}
-
-
@InterfaceAudience.Private
-
protected YarnClient(String name) {
-
super(name);
- }
- ......
- }
-
public YarnClientApplication createApplication()
-
throws YarnException, IOException
-
{
-
ApplicationSubmissionContext context = (ApplicationSubmissionContext)Records.newRecord(ApplicationSubmissionContext.class);
-
-
GetNewApplicationResponse newApp = getNewApplication();
-
ApplicationId appId = newApp.getApplicationId();
-
context.setApplicationId(appId);
-
return new YarnClientApplication(newApp, context);
- }
-
private GetNewApplicationResponse getNewApplication() throws YarnException, IOException
{
GetNewApplicationRequest request = (GetNewApplicationRequest)Records.newRecord(GetNewApplicationRequest.class);
return this.rmClient.getNewApplication(request);
}
看一下ApplicationClientProtocol的getNewApplication(request)方法:
@InterfaceAudience.Public
@InterfaceStability.Stable
@Idempotent
public abstract GetNewApplicationResponse getNewApplication(GetNewApplicationRequest paramGetNewApplicationRequest)
throws YarnException, IOException;
{
GetNewApplicationRequest request = (GetNewApplicationRequest)Records.newRecord(GetNewApplicationRequest.class);
return this.rmClient.getNewApplication(request);
}
看一下ApplicationClientProtocol的getNewApplication(request)方法:
@InterfaceAudience.Public
@InterfaceStability.Stable
@Idempotent
public abstract GetNewApplicationResponse getNewApplication(GetNewApplicationRequest paramGetNewApplicationRequest)
throws YarnException, IOException;
最後再回到run()方法看一下如果為client模式提交的處理邏輯 進入monitorApplication()方法:
-
-
def monitorApplication(
-
appId: ApplicationId,
-
returnOnRunning: Boolean = false,
-
logApplicationReport: Boolean = true): (YarnApplicationState, FinalApplicationStatus) = {
-
val interval = sparkConf.getLong("spark.yarn.report.interval", 1000) //app執行監控的間隔時間ms
-
var lastState: YarnApplicationState = null
-
while (true) { //寫死 一直等到程式完成才返回
-
Thread.sleep(interval)
-
val report: ApplicationReport =
-
try {
-
getApplicationReport(appId)
-
} catch {
-
case e: ApplicationNotFoundException =>
-
logError(s"Application $appId not found.")
-
return (YarnApplicationState.KILLED, FinalApplicationStatus.KILLED)
-
case NonFatal(e) =>
-
logError(s"Failed to contact YARN for application $appId.", e)
-
return (YarnApplicationState.FAILED, FinalApplicationStatus.FAILED)
-
}
-
val state = report.getYarnApplicationState
-
-
if (logApplicationReport) {
-
logInfo(s"Application report for $appId (state: $state)")
-
-
// If DEBUG is enabled, log report details every iteration
-
// Otherwise, log them every time the application changes state
-
if (log.isDebugEnabled) {
-
logDebug(formatReportDetails(report))
-
} else if (lastState != state) {
-
logInfo(formatReportDetails(report))
-
}
-
}
-
-
if (state == YarnApplicationState.FINISHED ||
-
state == YarnApplicationState.FAILED ||
-
state == YarnApplicationState.KILLED) {
-
cleanupStagingDir(appId)
-
return (state, report.getFinalApplicationStatus) //返回執行結果
-
}
-
-
if (returnOnRunning && state == YarnApplicationState.RUNNING) {
-
return (state, report.getFinalApplicationStatus)
-
}
-
-
lastState = state
-
}
-
-
// Never reached, but keeps compiler happy
-
throw new SparkException("While loop is depleted! This should never happen...")
- }
至此,流程結束 yarn利用分散式快取機制將application部署到各個計算節點
深入看一下verifyClusterResources(newAppResponse)方法:
-
private def verifyClusterResources(newAppResponse: GetNewApplicationResponse): Unit = {
-
val maxMem = newAppResponse.getMaximumResourceCapability().getMemory() //每個task最多可申請的記憶體 container的最大值
-
logInfo("Verifying our application has not requested more than the maximum " +
-
s"memory capability of the cluster ($maxMem MB per container)")
-
val executorMem = args.executorMemory + executorMemoryOverhead //1024(M) +設定的executor的值 args.executorMemory在1.5版本中為寫死1024單位M
-
if (executorMem > maxMem) { //如果executor所需要的記憶體大於container的最大值
-
throw new IllegalArgumentException(s"Required executor memory (${args.executorMemory}" +
-
s"+$executorMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster! " +
-
"Please increase the value of 'yarn.scheduler.maximum-allocation-mb'.")
-
}
- val amMem = args.amMemory + amMemoryOverhead
- //args.amMemory在1.5版本中為寫死512單位M args.amMemoryOverhead:
- // if (isClusterMode) driverMemOverheadKey else amMemOverheadKey
-
// val driverMemOverheadKey = "spark.yarn.driver.memoryOverhead"
// val amMemOverheadKey = "spark.yarn.am.memoryOverhead"
-
if (amMem > maxMem) {
-
throw new IllegalArgumentException(s"Required AM memory (${args.amMemory}" +
-
s"+$amMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster! " +
-
"Please increase the value of 'yarn.scheduler.maximum-allocation-mb'.")
-
}
-
logInfo("Will allocate AM container, with %d MB memory including %d MB overhead".format(
-
amMem,
-
amMemoryOverhead))
-
-
// We could add checks to make sure the entire cluster has enough resources but that involves
-
// getting all the node reports and computing ourselves.
- }
總結:
Cluster模式:
客戶端操作:
1、SparkSubmit中根據yarnConf來初始化yarnClient,並啟動yarnClient
2、建立客戶端Application,並獲取Application的ID,進一步判斷叢集中的資源是否滿足executor和ApplicationMaster申請的資源,如果不滿足則丟擲IllegalArgumentException;
3、設定資源、環境變數:其中包括了設定Application的Staging目錄、準備本地資源(jar檔案、log4j.properties)、設定Application其中的環境變數、建立Container啟動的Context等;
4、設定Application提交的Context,包括設定應用的名字、佇列、AM的申請的Container、標記該作業的型別為spark;
5、申請Memory,並最終透過submitApplication方法向ResourceManager提交該Application。
當作業提交到YARN上之後,客戶端就沒事了,會關閉此程式,因為整個作業執行在YARN叢集上進行,執行的結果將會儲存到HDFS或者日誌中。
Yarn操作:
1、執行ApplicationMaster的run方法;
2、設定好相關的環境變數。
3、建立amClient,並啟動;
4、在Spark UI啟動之前設定Spark UI的AmIpFilter;
5、在startUserClass函式專門啟動了一個執行緒(名稱為Driver的執行緒)來啟動使用者提交的Application,也就是啟動了Driver。在Driver中將會初始化SparkContext;
6、等待SparkContext初始化完成,最多等待spark.yarn.applicationMaster.waitTries次數(預設為10),如果等待了的次數超過了配置的,程式將會退出;否則用SparkContext初始化yarnAllocator;
怎麼知道SparkContext初始化完成?
其實在5步驟中啟動Application的過程中會初始化SparkContext,在初始化SparkContext的時候將會建立YarnClusterScheduler,在SparkContext初始化完成的時候,
會呼叫YarnClusterScheduler類中的postStartHook方法,而該方法會通知ApplicationMaster已經初始化好了SparkContext
為何要等待SparkContext初始化完成?
CoarseGrainedExecutorBackend啟動後需要向CoarseGrainedSchedulerBackend註冊
7、當SparkContext初始化完成的時候,透過amClient向ResourceManager註冊ApplicationMaster
8、分配並啟動Executeors。在啟動Executeors之前,先要透過yarnAllocator獲取到numExecutors個Container,然後在Container中啟動Executeors。如果在啟動Executeors的過程中失敗的次數達到了maxNumExecutorFailures的次數,
那麼這個Application將失敗,將Application Status標明為FAILED,並將關閉SparkContext。其實,啟動Executeors是透過ExecutorRunnable實現的,而ExecutorRunnable內部是啟動CoarseGrainedExecutorBackend的,
CoarseGrainedExecutorBackend啟動後會向SchedulerBackend註冊。(resourceManager是如何決定該分配幾個container? 在shell提交時跟引數 預設啟動兩個executor)
9、最後,Task將在CoarseGrainedExecutorBackend裡面執行,然後執行狀況會透過Akka通知CoarseGrainedScheduler,直到作業執行完成。
Client模式:
客戶端操作:
1、透過SparkSubmit類的launch的函式直接呼叫作業的main函式(透過反射機制實現),如果是叢集模式就會呼叫Client的main函式。
2、而應用程式的main函式一定都有個SparkContent,並對其進行初始化;
3、在SparkContent初始化中將會依次做如下的事情:設定相關的配置、註冊MapOutputTracker、BlockManagerMaster、BlockManager,建立taskScheduler和dagScheduler;其中比較重要的是建立taskScheduler和dagScheduler。在建立taskScheduler的時候會根據我們傳進來的master來選擇Scheduler和SchedulerBackend。由於我們選擇的是yarn-client模式,程式會選擇YarnClientClusterScheduler和YarnClientSchedulerBackend,並將YarnClientSchedulerBackend的例項初始化YarnClientClusterScheduler,上面兩個例項的獲取都是透過反射機制實現的,YarnClientSchedulerBackend類是CoarseGrainedSchedulerBackend類的子類,YarnClientClusterScheduler是TaskSchedulerImpl的子類,僅僅重寫了TaskSchedulerImpl中的getRackForHost方法。
4、初始化完taskScheduler後,將建立dagScheduler,然後透過taskScheduler.start()啟動taskScheduler,而在taskScheduler啟動的過程中也會呼叫SchedulerBackend的start方法。在SchedulerBackend啟動的過程中將會初始化一些引數,封裝在ClientArguments中,並將封裝好的ClientArguments傳進Client類中,並client.submitApplication()方法獲取Application ID。
Yarn操作:
1、執行ApplicationMaster的run方法(runExecutorLauncher);
2、無需等待SparkContext初始化完成(因為YarnClientClusterScheduler已啟動完成),向sparkYarnAM註冊該Application
3、分配Executors,這裡面的分配邏輯和yarn-cluster裡面類似,就不再說了。
4、最後,Task將在CoarseGrainedExecutorBackend裡面執行,然後執行狀況會透過Akka通知CoarseGrainedScheduler,直到作業執行完成。
5、在作業執行的時候,YarnClientSchedulerBackend會每隔1秒透過client獲取到作業的執行狀況,並列印出相應的執行資訊,當Application的狀態是FINISHED、FAILED和KILLED中的一種,那麼程式將退出等待。
6、最後有個執行緒會再次確認Application的狀態,當Application的狀態是FINISHED、FAILED和KILLED中的一種,程式就執行完成,並停止SparkContext。整個過程就結束了。
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/29754888/viewspace-1815323/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- Spark3.0YarnCluster模式任務提交流程原始碼分析SparkYarn模式原始碼
- 【Spark篇】---Spark中yarn模式兩種提交任務方式SparkYarn模式
- spark原始碼之任務提交過程Spark原始碼
- spark核心(下)——job任務提交原始碼解析Spark原始碼
- Spark原始碼解析-Yarn部署流程(ApplicationMaster)Spark原始碼YarnAPPAST
- spark core原始碼分析1 叢集啟動及任務提交過程Spark原始碼
- MapReduce——客戶端提交任務原始碼分析客戶端原始碼
- Flink原始碼剖析:Jar包任務提交流程原始碼JAR
- spark-submit提交任務時執行流程(簡單版)SparkMIT
- Spark job分配流程原始碼分析Spark原始碼
- spark 原始碼分析之十九 -- Stage的提交Spark原始碼
- Flink Yarn的2種任務提交方式Yarn
- Docker中提交任務到Spark叢集DockerSpark
- Spark 原始碼系列(七)Spark on yarn 具體實現Spark原始碼Yarn
- YARN 核心原始碼分析Yarn原始碼
- mapreduce job提交流程原始碼級分析(三)原始碼
- YARN原始碼解析(3)-作業提交2Yarn原始碼
- spark core原始碼分析2 master啟動流程Spark原始碼AST
- spark core原始碼分析4 worker啟動流程Spark原始碼
- yarn-per-job提交流程Yarn
- [原始碼解析]Oozie來龍去脈之提交任務原始碼
- 【Spark篇】---Spark中資源和任務排程原始碼分析與資源配置引數應用Spark原始碼
- Spark修煉之道(高階篇)——Spark原始碼閱讀:第一節 Spark應用程式提交流程Spark原始碼
- mapreduce job提交流程原始碼級分析(二)(原創)原始碼
- LiteOS-任務篇-原始碼分析-任務排程函式原始碼函式
- Flink on Yarn三部曲之三:提交Flink任務Yarn
- 第一個application WordCount,各種任務提交模式(local,yarn,standalone)APP模式Yarn
- Spark 原始碼分析系列Spark原始碼
- spark 原始碼分析之二十一 -- Task的執行流程Spark原始碼
- LiteOS核心原始碼分析:任務LOS_Schedule原始碼
- OkHttp3原始碼分析[任務佇列]HTTP原始碼佇列
- OkHttp 3.7原始碼分析(三)——任務佇列HTTP原始碼佇列
- quartz2.3任務管理的原始碼分析quartz原始碼
- job提交的原始碼分析原始碼
- Spark原始碼分析之MemoryManagerSpark原始碼
- Spark原始碼分析之BlockStoreSpark原始碼BloC
- springboot 事務建立流程原始碼分析Spring Boot原始碼
- Spark原始碼分析之DiskBlockMangaer分析Spark原始碼BloC