Kubernetes:kubelet 原始碼分析之探針

lubanseven發表於2024-05-20

0. 前言

kubernetes 提供三種探針,配置探針(Liveness),就緒探針(Readiness)和啟動(Startup)探針判斷容器健康狀態。其中,存活探針確定什麼時候重啟容器,就緒探針確定容器何時準備好接受流量請求,啟動探針判斷應用容器何時啟動。

本文透過分析 kubelet 原始碼瞭解 kubernetes 的探針是怎麼工作的。

1. kubelet probeManager

kubelet 中的 probeManager 模組提供了探針服務,直接分析 probeManager

// kubernetes/pkg/kubelet/kubelet.go
func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,...) (*Kubelet, error) {
    ...
    klet.livenessManager = proberesults.NewManager()
	klet.readinessManager = proberesults.NewManager()
	klet.startupManager = proberesults.NewManager()

    ...
	if kubeDeps.ProbeManager != nil {
		klet.probeManager = kubeDeps.ProbeManager
	} else {
		klet.probeManager = prober.NewManager(
			klet.statusManager,
			klet.livenessManager,
			klet.readinessManager,
			klet.startupManager,
			klet.runner,
			kubeDeps.Recorder)
	}
    ...
}

NewMainKubelet 中初始化 probeManager。其中,probeManager 包括三種探針 statusManagerlivenessManagerreadinessManager

kubelet 處理 pod 時,會將 pod 新增到 probeManager

// kubernetes/pkg/kubelet/kubelet.go
func (kl *Kubelet) SyncPod(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus) (isTerminal bool, err error) {
	...
	// Ensure the pod is being probed
	kl.probeManager.AddPod(pod)
    ...
}

// kubernetes/pkg/kubelet/prober/prober_manager.go
func (m *manager) AddPod(pod *v1.Pod) {
    ...
    key := probeKey{podUID: pod.UID}
    for _, c := range append(pod.Spec.Containers, getRestartableInitContainers(pod)...) {
        key.containerName = c.Name

		if c.StartupProbe != nil {
			...
		}

        if c.ReadinessProbe != nil {
			key.probeType = readiness
			if _, ok := m.workers[key]; ok {
				klog.V(8).ErrorS(nil, "Readiness probe already exists for container",
					"pod", klog.KObj(pod), "containerName", c.Name)
				return
			}
			w := newWorker(m, readiness, pod, c)
			m.workers[key] = w
			go w.run()
		}

        if c.LivenessProbe != nil {
			...
		}
    }
}

manager.AddPod 中包含三種探針的處理邏輯,這裡以 ReadinessProbe 探針為例進行分析。首先,建立 ReadinessProbe 的 worker,接著開啟一個協程執行該 worker:

// kubernetes/pkg/kubelet/prober/worker.go
func (w *worker) run() {
	...
probeLoop:
    // doProbe 進行探針檢測
	for w.doProbe(ctx) {
		// Wait for next probe tick.
		select {
		case <-w.stopCh:
			break probeLoop
		case <-probeTicker.C:
		case <-w.manualTriggerCh:
			// continue
		}
	}
}

func (w *worker) doProbe(ctx context.Context) (keepGoing bool) {
    ...
    // Note, exec probe does NOT have access to pod environment variables or downward API
	result, err := w.probeManager.prober.probe(ctx, w.probeType, w.pod, status, w.container, w.containerID)
	if err != nil {
		// Prober error, throw away the result.
		return true
	}
    ...
}

進入 worker.probeManager.prober.probe 檢視探針是怎麼探測 container 的:

// kubernetes/pkg/kubelet/prober/prober.go
// probe probes the container.
func (pb *prober) probe(ctx context.Context, probeType probeType, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID) (results.Result, error) {
	var probeSpec *v1.Probe
	switch probeType {
	case readiness:
		probeSpec = container.ReadinessProbe
	case liveness:
		probeSpec = container.LivenessProbe
	case startup:
		probeSpec = container.StartupProbe
	default:
		return results.Failure, fmt.Errorf("unknown probe type: %q", probeType)
	}

    if probeSpec == nil {
		klog.InfoS("Probe is nil", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
		return results.Success, nil
	}

    result, output, err := pb.runProbeWithRetries(ctx, probeType, probeSpec, pod, status, container, containerID, maxProbeRetries)
    ...
}

// runProbeWithRetries tries to probe the container in a finite loop, it returns the last result
// if it never succeeds.
func (pb *prober) runProbeWithRetries(ctx context.Context, probeType probeType, p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID, retries int) (probe.Result, string, error) {
	var err error
	var result probe.Result
	var output string
	for i := 0; i < retries; i++ {
		result, output, err = pb.runProbe(ctx, probeType, p, pod, status, container, containerID)
		if err == nil {
			return result, output, nil
		}
	}
	return result, output, err
}

func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID) (probe.Result, string, error) {
	timeout := time.Duration(p.TimeoutSeconds) * time.Second
	if p.Exec != nil {
        klog.V(4).InfoS("Exec-Probe runProbe", "pod", klog.KObj(pod), "containerName", container.Name, "execCommand", p.Exec.Command)
		command := kubecontainer.ExpandContainerCommandOnlyStatic(p.Exec.Command, container.Env)
		return pb.exec.Probe(pb.newExecInContainer(ctx, container, containerID, command, timeout))
    }

    if p.HTTPGet != nil {
        req, err := httpprobe.NewRequestForHTTPGetAction(p.HTTPGet, &container, status.PodIP, "probe")
        ...
    }

    if p.TCPSocket != nil {
        ...
    }

    if p.GRPC != nil {
        ...
    }
    ...
}

到這裡我們可以看到,根據探針的不同型別執行不同的方法,對於用命令列探測的探針,執行 prober.exec.Probe 方法,對於 http 型別的探針,執行 httpprobe.NewRequestForHTTPGetAction 型別的方法,等等。

2. 小結

本文從 kubelet 原始碼層面介紹了 kubernetes 中探針的檢測邏輯,力圖做到知其然,知其所以然。


相關文章