本文主要針對於Kafka的原始碼進行分析,版本為kafka-0.8.2.1。 由於時間有限,可能更新比較慢...
Kafka.scala
// 讀取配置檔案 val props = Utils.loadProps(args(0)) val serverConfig = new KafkaConfig(props) KafkaMetricsReporter.startReporters(serverConfig.props) val kafkaServerStartable = new KafkaServerStartable(serverConfig) // 註冊一個關閉鉤子,當JVM關閉時呼叫KafkaServerStartable.shutdown Runtime.getRuntime().addShutdownHook(new Thread() { override def run() = kafkaServerStartable.shutdown }) // 執行並等待結束 kafkaServerStartable.startup kafkaServerStartable.awaitShutdown
Server
實際呼叫類為KafkaServer
def startup() { kafkaScheduler.startup() // 初始化Zookeeper內相關路徑 zkClient = initZk() // 日誌管理器 logManager = createLogManager(zkClient, brokerState) logManager.startup() socketServer = new SocketServer(...) socketServer.startup() // 啟動副本管理器 replicaManager = new ReplicaManager(config, time, zkClient, kafkaScheduler, logManager, isShuttingDown) // 建立偏移量管理器 offsetManager = createOffsetManager() // 例項化排程器 kafkaController = new KafkaController(config, zkClient, brokerState) // 請求處理器 apis = new KafkaApis(...) // 網路請求處理 requestHandlerPool = new KafkaRequestHandlerPool(config.brokerId, socketServer.requestChannel, apis, config.numIoThreads) brokerState.newState(RunningAsBroker) Mx4jLoader.maybeLoad() replicaManager.startup() kafkaController.startup() // Topic配置管理器 topicConfigManager = new TopicConfigManager(zkClient, logManager) topicConfigManager.startup() // Broker的心跳檢查 kafkaHealthcheck = new KafkaHealthcheck(...) kafkaHealthcheck.startup() registerStats() startupComplete.set(true) info("started") }
在KafkaServer的startup中看到主要進行幾個主要服務的初始化和啟動。
private def initZk(): ZkClient = { info("Connecting to zookeeper on " + config.zkConnect) // Kafka在Zookeeper中的工作根目錄 val chroot = { if (config.zkConnect.indexOf("/") > 0) config.zkConnect.substring(config.zkConnect.indexOf("/")) else "" } // 建立工作根目錄 if (chroot.length > 1) { val zkConnForChrootCreation = config.zkConnect.substring(0, config.zkConnect.indexOf("/")) val zkClientForChrootCreation = new ZkClient(...) ZkUtils.makeSurePersistentPathExists(zkClientForChrootCreation, chroot) info("Created zookeeper path " + chroot) zkClientForChrootCreation.close() } // 例項化ZkClient val zkClient = new ZkClient(config.zkConnect, config.zkSessionTimeoutMs, config.zkConnectionTimeoutMs, ZKStringSerializer) // 在Zookeeper中建立必要持久路徑 ZkUtils.setupCommonPaths(zkClient) zkClient }
KafkaScheduler實際為對執行緒池ScheduledThreadPoolExecutor的封裝,這裡不做過多的分析。
KafkaHealthcheck(...) { val brokerIdPath = ZkUtils.BrokerIdsPath + "/" + brokerId val sessionExpireListener = new SessionExpireListener def startup() { // 註冊一個Zookeeper事件(狀態)監聽器 zkClient.subscribeStateChanges(sessionExpireListener) // 在Zookeeper的/brokers/ids/id目錄建立臨時節點並寫入節點資訊 register() } }
IZkStateListener 定義了兩種事件:一種是連線狀態的改變,例如由未連線改變成連線上,連線上改為過期等;
另一種建立一個新的session(連線), 通常是由於session失效然後新的session被建立時觸發。
class SessionExpireListener() extends IZkStateListener { @throws(classOf[Exception]) def handleStateChanged(state: KeeperState) {} @throws(classOf[Exception]) def handleNewSession() = register() }
ReplicaManager
def startup() { scheduler.schedule("isr-expiration", maybeShrinkIsr, period = config.replicaLagTimeMaxMs, unit = TimeUnit.MILLISECONDS) } // 定時呼叫maybeShrinkIsr private def maybeShrinkIsr(): Unit = { trace("Evaluating ISR list of partitions to see which replicas can be removed from the ISR") allPartitions.values.foreach(partition => partition.maybeShrinkIsr(config.replicaLagTimeMaxMs, config.replicaLagMaxMessages)) }
這裡呼叫了cluster.Partition中的maybeShrinkIsr來將卡住的或者低效的副本從ISR中去除並更新HighWatermark。
def maybeShrinkIsr(replicaMaxLagTimeMs: Long, replicaMaxLagMessages: Long) { inWriteLock(leaderIsrUpdateLock) { leaderReplicaIfLocal() match { case Some(leaderReplica) => // 找出卡住和低效的Replica並從ISR中去除 val outOfSyncReplicas = getOutOfSyncReplicas(leaderReplica, replicaMaxLagTimeMs, replicaMaxLagMessages) if(outOfSyncReplicas.size > 0) { val newInSyncReplicas = inSyncReplicas -- outOfSyncReplicas assert(newInSyncReplicas.size > 0) // 更新ZK中的ISR updateIsr(newInSyncReplicas) // 計算HW並更新 maybeIncrementLeaderHW(leaderReplica) replicaManager.isrShrinkRate.mark() } ... }
def getOutOfSyncReplicas(leaderReplica: Replica, keepInSyncTimeMs: Long, keepInSyncMessages: Long): Set[Replica] = { // Leader的最後寫入偏移量 val leaderLogEndOffset = leaderReplica.logEndOffset // ISR中排除LeaderReplica的其他集合 val candidateReplicas = inSyncReplicas - leaderReplica // 卡住的Replica集合 val stuckReplicas = candidateReplicas.filter(r => (time.milliseconds - r.logEndOffsetUpdateTimeMs) > keepInSyncTimeMs) // 低效的Replica // 條件1 Replicas的offset > 0 // 條件2 Leader的offset - Replicas的offset > 閥值 val slowReplicas = candidateReplicas.filter(r => r.logEndOffset.messageOffset >= 0 && leaderLogEndOffset.messageOffset - r.logEndOffset.messageOffset > keepInSyncMessages) // 返回卡住的和低效的Replicas stuckReplicas ++ slowReplicas }
Cluster
Controller