Spark原始碼-SparkContext原始碼解析

Xlucas發表於2017-09-24

核心
1、Spark1.6.3原始碼SparkContext解析

下面這段是建立SparkContext的部分程式碼,主要是建立TaskScheduler和TaskSchedulerBackend

// We need to register "HeartbeatReceiver" before "createTaskScheduler" because Executor will
// retrieve "HeartbeatReceiver" in the constructor. (SPARK-6640)
_heartbeatReceiver = env.rpcEnv.setupEndpoint(
  HeartbeatReceiver.ENDPOINT_NAME, new HeartbeatReceiver(this))

// Create and start the scheduler
//根據master和SparkContext物件建立TaskScheduler,返回TaskScheduler以及TaskscheduleBackend
val (sched, ts) = SparkContext.createTaskScheduler(this, master)
_schedulerBackend = sched
_taskScheduler = ts
_dagScheduler = new DAGScheduler(this)
_heartbeatReceiver.ask[Boolean](TaskSchedulerIsSet)

// start TaskScheduler after taskScheduler sets DAGScheduler reference in DAGScheduler's
// constructor
_taskScheduler.start()

_applicationId = _taskScheduler.applicationId()
_applicationAttemptId = taskScheduler.applicationAttemptId()
_conf.set("spark.app.id", _applicationId)
_ui.foreach(_.setAppId(_applicationId))
_env.blockManager.initialize(_applicationId)

跳到createTaskScheduler方法,可以看到如下原始碼:主要是針對不同的提交模式來匹配TaskScheduler採用的方式和資源排程採用的方式

/**
 * Create a task scheduler based on a given master URL.
 * Return a 2-tuple of the scheduler backend and the task scheduler.
 */
private def createTaskScheduler(
    sc: SparkContext,
    master: String): (SchedulerBackend, TaskScheduler) = {
//這裡直接匯入了包,來看檢視匹配的叢集的模式
  import SparkMasterRegex._

  // When running locally, don't try to re-execute tasks on failure.
//當執行是本地模式的時候,預設是情況下task失敗以後不重試
  val MAX_LOCAL_TASK_FAILURES = 1

  master match {
//本地單執行緒模式,其中taskschedule採用了TaskSchedulerImpl 資源排程採用了LocalBackend
    case "local" =>
      val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
      val backend = new LocalBackend(sc.getConf, scheduler, 1)
      scheduler.initialize(backend)
      (backend, scheduler)
//本地多執行緒模式,匹配Local[N]和Local[*],其中taskschedule採用了TaskSchedulerImpl 資源排程採用了LocalBackend
    case LOCAL_N_REGEX(threads) =>
      def localCpuCount: Int = Runtime.getRuntime.availableProcessors()
      // local[*] estimates the number of cores on the machine; local[N] uses exactly N threads.
      val threadCount = if (threads == "*") localCpuCount else threads.toInt
      if (threadCount <= 0) {
        throw new SparkException(s"Asked to run locally with $threadCount threads")
      }
      val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
      val backend = new LocalBackend(sc.getConf, scheduler, threadCount)
      scheduler.initialize(backend)
      (backend, scheduler)
//匹配本地模式 Local[N,M]和Local[*,M] M表示task失敗以後重試的次數,其中taskschedule採用了TaskSchedulerImpl 資源排程採用了LocalBackend
    case LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>
      def localCpuCount: Int = Runtime.getRuntime.availableProcessors()
      // local[*, M] means the number of cores on the computer with M failures
      // local[N, M] means exactly N threads with M failures
      val threadCount = if (threads == "*") localCpuCount else threads.toInt
      val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)
      val backend = new LocalBackend(sc.getConf, scheduler, threadCount)
      scheduler.initialize(backend)
      (backend, scheduler)
//匹配spark standalone模式,其中taskschedule採用了TaskSchedulerImpl 資源排程採用了SparkDeploySchedulerBackend
    case SPARK_REGEX(sparkUrl) =>
      val scheduler = new TaskSchedulerImpl(sc)
      val masterUrls = sparkUrl.split(",").map("spark://" + _)
      val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls)
      scheduler.initialize(backend)
      (backend, scheduler)
//匹配本地叢集模式 Local-cluster,其中taskschedule採用了TaskSchedulerImpl 資源排程採用了SparkDeploySchedulerBackend
    case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
      // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
      val memoryPerSlaveInt = memoryPerSlave.toInt
      if (sc.executorMemory > memoryPerSlaveInt) {
        throw new SparkException(
          "Asked to launch cluster with %d MB RAM / worker but requested %d MB/worker".format(
            memoryPerSlaveInt, sc.executorMemory))
      }

      val scheduler = new TaskSchedulerImpl(sc)
      val localCluster = new LocalSparkCluster(
        numSlaves.toInt, coresPerSlave.toInt, memoryPerSlaveInt, sc.conf)
      val masterUrls = localCluster.start()
      val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls)
      scheduler.initialize(backend)
      backend.shutdownCallback = (backend: SparkDeploySchedulerBackend) => {
        localCluster.stop()
      }
      (backend, scheduler)
//"yarn-standalone"或"yarn-cluster"執行模式,其中taskschedule採用了YarnClusterScheduler 資源排程採用了YarnClusterSchedulerBackend
    case "yarn-standalone" | "yarn-cluster" =>
      if (master == "yarn-standalone") {
        logWarning(
          "\"yarn-standalone\" is deprecated as of Spark 1.0. Use \"yarn-cluster\" instead.")
      }
      val scheduler = try {
        val clazz = Utils.classForName("org.apache.spark.scheduler.cluster.YarnClusterScheduler")
        val cons = clazz.getConstructor(classOf[SparkContext])
        cons.newInstance(sc).asInstanceOf[TaskSchedulerImpl]
      } catch {
        // TODO: Enumerate the exact reasons why it can fail
        // But irrespective of it, it means we cannot proceed !
        case e: Exception => {
          throw new SparkException("YARN mode not available ?", e)
        }
      }
      val backend = try {
        val clazz =
          Utils.classForName("org.apache.spark.scheduler.cluster.YarnClusterSchedulerBackend")
        val cons = clazz.getConstructor(classOf[TaskSchedulerImpl], classOf[SparkContext])
        cons.newInstance(scheduler, sc).asInstanceOf[CoarseGrainedSchedulerBackend]
      } catch {
        case e: Exception => {
          throw new SparkException("YARN mode not available ?", e)
        }
      }
      scheduler.initialize(backend)
      (backend, scheduler)
//匹配yarn-client模式,其中taskschedule採用了YarnScheduler,其中YarnScheduler為TaskSchedulerImpl的子類, 資源排程採用了YarnClientSchedulerBackend
    case "yarn-client" =>
      val scheduler = try {
        val clazz = Utils.classForName("org.apache.spark.scheduler.cluster.YarnScheduler")
        val cons = clazz.getConstructor(classOf[SparkContext])
        cons.newInstance(sc).asInstanceOf[TaskSchedulerImpl]

      } catch {
        case e: Exception => {
          throw new SparkException("YARN mode not available ?", e)
        }
      }

      val backend = try {
        val clazz =
          Utils.classForName("org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend")
        val cons = clazz.getConstructor(classOf[TaskSchedulerImpl], classOf[SparkContext])
        cons.newInstance(scheduler, sc).asInstanceOf[CoarseGrainedSchedulerBackend]
      } catch {
        case e: Exception => {
          throw new SparkException("YARN mode not available ?", e)
        }
      }

      scheduler.initialize(backend)
      (backend, scheduler)
//匹配Mesos執行模式 taskscheduler採用了TaskSchedulerImpl,
    case MESOS_REGEX(mesosUrl) =>
      MesosNativeLibrary.load()
      val scheduler = new TaskSchedulerImpl(sc)
      val coarseGrained = sc.conf.getBoolean("spark.mesos.coarse", defaultValue = true)
//根據coarseGrained 選擇粗粒度還是細粒度來選擇資源排程器
      val backend = if (coarseGrained) {
//當為粗粒度的時候CoarseMesosSchedulerBackend
        new CoarseMesosSchedulerBackend(scheduler, sc, mesosUrl, sc.env.securityManager)
      } else {
//當為細粒度的時候MesosSchedulerBackend
        new MesosSchedulerBackend(scheduler, sc, mesosUrl)
      }
      scheduler.initialize(backend)
      (backend, scheduler)
//匹配spark in MapReduce V1模式,taskscheduler採用了TaskSchedulerImpl,資源排程採用了SimrSchedulerBackend
    case SIMR_REGEX(simrUrl) =>
      val scheduler = new TaskSchedulerImpl(sc)
      val backend = new SimrSchedulerBackend(scheduler, sc, simrUrl)
      scheduler.initialize(backend)
      (backend, scheduler)
//匹配如果是zookeeper模式,底層還是和mesos模式類似
    case zkUrl if zkUrl.startsWith("zk://") =>
      logWarning("Master URL for a multi-master Mesos cluster managed by ZooKeeper should be " +
        "in the form mesos://zk://host:port. Current Master URL will stop working in Spark 2.0.")
      createTaskScheduler(sc, "mesos://" + zkUrl)

    case _ =>
      throw new SparkException("Could not parse Master URL: '" + master + "'")
  }
}

匹配不同的提交模式

private object SparkMasterRegex {
  // Regular expression used for local[N] and local[*] master formats
//// 正規表示式,用於匹配local[N] 和 local[*]
  val LOCAL_N_REGEX = """local\[([0-9]+|\*)\]""".r
  // Regular expression for local[N, maxRetries], used in tests with failing tasks
//正規表示式,用於匹配local[N, maxRetries], maxRetries表示失敗後的最大重複次數
  val LOCAL_N_FAILURES_REGEX = """local\[([0-9]+|\*)\s*,\s*([0-9]+)\]""".r
  // Regular expression for simulating a Spark cluster of [N, cores, memory] locally
//正規表示式,用於匹配local-cluster[N, cores, memory],它是一種偽分散式模式
  val LOCAL_CLUSTER_REGEX = """local-cluster\[\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*]""".r
  // Regular expression for connecting to Spark deploy clusters
//正規表示式用於匹配 Spark Standalone叢集執行模式
  val SPARK_REGEX = """spark://(.*)""".r
  // Regular expression for connection to Mesos cluster by mesos:// or mesos://zk:// url
// 正規表示式用於匹配 Mesos叢集資源管理器執行模式匹配 mesos:// 或 zk:// url
  val MESOS_REGEX = """mesos://(.*)""".r
// 正規表示式和於匹配Spark in MapReduce v1,用於相容老版本的Hadoop叢集
  // Regular expression for connection to Simr cluster
  val SIMR_REGEX = """simr://(.*)""".r
}

相關文章