Yolov8-原始碼解析-四十-

绝不原创的飞龙發表於2024-09-05

Yolov8 原始碼解析(四十)

.\yolov8\ultralytics\utils\benchmarks.py

# 從 glob 模組中匯入 glob 函式,用於檔案路徑的模糊匹配
import glob
# 匯入 os 模組,提供了許多與作業系統互動的函式
import os
# 匯入 platform 模組,用於獲取系統平臺資訊
import platform
# 匯入 re 模組,支援正規表示式操作
import re
# 匯入 shutil 模組,提供了高階的檔案操作功能
import shutil
# 匯入 time 模組,提供時間相關的功能
import time
# 從 pathlib 模組中匯入 Path 類,用於操作檔案路徑
from pathlib import Path

# 匯入 numpy 庫,用於數值計算
import numpy as np
# 匯入 torch.cuda 模組,用於 CUDA 相關操作
import torch.cuda
# 匯入 yaml 庫,用於處理 YAML 格式的檔案
import yaml

# 從 ultralytics 包中匯入 YOLO 和 YOLOWorld 類
from ultralytics import YOLO, YOLOWorld
# 從 ultralytics.cfg 模組中匯入 TASK2DATA 和 TASK2METRIC 變數
from ultralytics.cfg import TASK2DATA, TASK2METRIC
# 從 ultralytics.engine.exporter 模組中匯入 export_formats 函式
from ultralytics.engine.exporter import export_formats
# 從 ultralytics.utils 模組中匯入 ARM64, ASSETS, IS_JETSON, IS_RASPBERRYPI 等變數
from ultralytics.utils import ARM64, ASSETS, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, MACOS, TQDM, WEIGHTS_DIR
# 從 ultralytics.utils.checks 模組中匯入 IS_PYTHON_3_12, check_requirements, check_yolo 等函式和變數
from ultralytics.utils.checks import IS_PYTHON_3_12, check_requirements, check_yolo
# 從 ultralytics.utils.downloads 模組中匯入 safe_download 函式
from ultralytics.utils.downloads import safe_download
# 從 ultralytics.utils.files 模組中匯入 file_size 函式
from ultralytics.utils.files import file_size
# 從 ultralytics.utils.torch_utils 模組中匯入 select_device 函式
from ultralytics.utils.torch_utils import select_device


def benchmark(
    model=WEIGHTS_DIR / "yolov8n.pt", data=None, imgsz=160, half=False, int8=False, device="cpu", verbose=False
):
    """
    Benchmark a YOLO model across different formats for speed and accuracy.

    Args:
        model (str | Path | optional): Path to the model file or directory. Default is
            Path(SETTINGS['weights_dir']) / 'yolov8n.pt'.
        data (str, optional): Dataset to evaluate on, inherited from TASK2DATA if not passed. Default is None.
        imgsz (int, optional): Image size for the benchmark. Default is 160.
        half (bool, optional): Use half-precision for the model if True. Default is False.
        int8 (bool, optional): Use int8-precision for the model if True. Default is False.
        device (str, optional): Device to run the benchmark on, either 'cpu' or 'cuda'. Default is 'cpu'.
        verbose (bool | float | optional): If True or a float, assert benchmarks pass with given metric.
            Default is False.
    """
    # 函式主體,用於評估 YOLO 模型在不同格式下的速度和準確性,引數詳細說明在函式文件字串中給出
    pass  # 這裡是示例,實際程式碼會在此基礎上繼續開發
    def benchmark(model='yolov8n.pt', imgsz=640):
        """
        Benchmark function to evaluate model performance.
    
        Args:
            model (str or Path): Path to the model checkpoint.
            imgsz (int): Image size for inference.
    
        Returns:
            df (pandas.DataFrame): A pandas DataFrame with benchmark results for each format, including file size,
                metric, and inference time.
    
        Example:
            ```python
            from ultralytics.utils.benchmarks import benchmark
    
            benchmark(model='yolov8n.pt', imgsz=640)
            ```
        """
        import pandas as pd  # Import pandas library for DataFrame operations
        pd.options.display.max_columns = 10  # Set maximum display columns in pandas DataFrame
        pd.options.display.width = 120  # Set display width for pandas DataFrame
    
        device = select_device(device, verbose=False)  # Select device for model inference
        if isinstance(model, (str, Path)):
            model = YOLO(model)  # Initialize YOLO model if model is given as a string or Path
    
        is_end2end = getattr(model.model.model[-1], "end2end", False)  # Check if model supports end-to-end inference
    
        y = []  # Initialize an empty list to store benchmark results
        t0 = time.time()  # Record current time for benchmarking purposes
    
        check_yolo(device=device)  # Print system information relevant to YOLO
    
        # Create a pandas DataFrame 'df' with columns defined for benchmark results
        df = pd.DataFrame(y, columns=["Format", "Status❔", "Size (MB)", key, "Inference time (ms/im)", "FPS"])
    
        name = Path(model.ckpt_path).name  # Extract the name of the model checkpoint file
        # Construct a string 's' summarizing benchmark results and logging information
        s = f"\nBenchmarks complete for {name} on {data} at imgsz={imgsz} ({time.time() - t0:.2f}s)\n{df}\n"
        LOGGER.info(s)  # Log 's' to the logger file
    
        with open("benchmarks.log", "a", errors="ignore", encoding="utf-8") as f:
            f.write(s)  # Append string 's' to the 'benchmarks.log' file
    
        if verbose and isinstance(verbose, float):
            metrics = df[key].array  # Extract the 'key' column values from the DataFrame 'df'
            floor = verbose  # Set the minimum metric floor to compare against
            # Assert that all metrics are greater than 'floor' if they are not NaN
            assert all(x > floor for x in metrics if pd.notna(x)), f"Benchmark failure: metric(s) < floor {floor}"
    
        return df  # Return the pandas DataFrame 'df' containing benchmark results
class RF100Benchmark:
    """Benchmark YOLO model performance across formats for speed and accuracy."""

    def __init__(self):
        """Function for initialization of RF100Benchmark."""
        # 初始化空列表,用於儲存資料集名稱
        self.ds_names = []
        # 初始化空列表,用於儲存資料集配置檔案路徑
        self.ds_cfg_list = []
        # 初始化 RF 物件為 None
        self.rf = None
        # 定義驗證指標列表
        self.val_metrics = ["class", "images", "targets", "precision", "recall", "map50", "map95"]

    def set_key(self, api_key):
        """
        Set Roboflow API key for processing.

        Args:
            api_key (str): The API key.
        """
        # 檢查是否滿足 Roboflow 相關的依賴
        check_requirements("roboflow")
        # 匯入 Roboflow 模組
        from roboflow import Roboflow
        # 建立 Roboflow 物件並設定 API 金鑰
        self.rf = Roboflow(api_key=api_key)

    def parse_dataset(self, ds_link_txt="datasets_links.txt"):
        """
        Parse dataset links and downloads datasets.

        Args:
            ds_link_txt (str): Path to dataset_links file.
        """
        # 如果存在 rf-100 目錄,則刪除並重新建立;否則直接建立
        (shutil.rmtree("rf-100"), os.mkdir("rf-100")) if os.path.exists("rf-100") else os.mkdir("rf-100")
        # 切換當前工作目錄至 rf-100
        os.chdir("rf-100")
        # 在 rf-100 目錄下建立 ultralytics-benchmarks 目錄
        os.mkdir("ultralytics-benchmarks")
        # 安全下載 datasets_links.txt 檔案
        safe_download("https://github.com/ultralytics/assets/releases/download/v0.0.0/datasets_links.txt")

        # 開啟資料集連結檔案,逐行處理
        with open(ds_link_txt, "r") as file:
            for line in file:
                try:
                    # 使用正規表示式拆分資料集連結
                    _, url, workspace, project, version = re.split("/+", line.strip())
                    # 將專案名稱新增到資料集名稱列表
                    self.ds_names.append(project)
                    # 組合專案和版本資訊
                    proj_version = f"{project}-{version}"
                    # 如果該版本資料集尚未下載,則使用 Roboflow 物件下載到 yolov8 目錄下
                    if not Path(proj_version).exists():
                        self.rf.workspace(workspace).project(project).version(version).download("yolov8")
                    else:
                        print("Dataset already downloaded.")
                    # 新增資料集配置檔案路徑到列表中
                    self.ds_cfg_list.append(Path.cwd() / proj_version / "data.yaml")
                except Exception:
                    continue

        return self.ds_names, self.ds_cfg_list

    @staticmethod
    def fix_yaml(path):
        """
        Function to fix YAML train and val path.

        Args:
            path (str): YAML file path.
        """
        # 使用安全載入方式讀取 YAML 檔案
        with open(path, "r") as file:
            yaml_data = yaml.safe_load(file)
        # 修改 YAML 檔案中的訓練和驗證路徑
        yaml_data["train"] = "train/images"
        yaml_data["val"] = "valid/images"
        # 使用安全寫入方式將修改後的 YAML 資料寫回檔案
        with open(path, "w") as file:
            yaml.safe_dump(yaml_data, file)
    def evaluate(self, yaml_path, val_log_file, eval_log_file, list_ind):
        """
        Model evaluation on validation results.

        Args:
            yaml_path (str): YAML file path.
            val_log_file (str): val_log_file path.
            eval_log_file (str): eval_log_file path.
            list_ind (int): Index for current dataset.
        """
        # 定義跳過的符號列表,這些符號出現在日誌行中時將被跳過
        skip_symbols = ["🚀", "⚠️", "💡", "❌"]
        
        # 從 YAML 檔案中讀取類別名稱列表
        with open(yaml_path) as stream:
            class_names = yaml.safe_load(stream)["names"]
        
        # 開啟驗證日誌檔案,讀取其中的所有行
        with open(val_log_file, "r", encoding="utf-8") as f:
            lines = f.readlines()
            eval_lines = []
            
            # 遍歷每一行日誌
            for line in lines:
                # 如果日誌行包含需要跳過的符號,則跳過此行
                if any(symbol in line for symbol in skip_symbols):
                    continue
                
                # 將每行日誌按空格分隔為條目列表
                entries = line.split(" ")
                # 過濾空字串並去除每個條目結尾的換行符
                entries = list(filter(lambda val: val != "", entries))
                entries = [e.strip("\n") for e in entries]
                
                # 將符合條件的條目加入到評估結果列表中
                eval_lines.extend(
                    {
                        "class": entries[0],
                        "images": entries[1],
                        "targets": entries[2],
                        "precision": entries[3],
                        "recall": entries[4],
                        "map50": entries[5],
                        "map95": entries[6],
                    }
                    for e in entries
                    if e in class_names or (e == "all" and "(AP)" not in entries and "(AR)" not in entries)
                )
        
        # 初始化 map_val 變數為 0.0
        map_val = 0.0
        
        # 如果評估結果列表中條目數量大於 1,則進行下列操作
        if len(eval_lines) > 1:
            print("There's more dicts")
            # 遍歷評估結果列表中的每一個字典
            for lst in eval_lines:
                # 如果當前字典的類別為 "all",則將 map_val 設定為其 map50 值
                if lst["class"] == "all":
                    map_val = lst["map50"]
        else:
            print("There's only one dict res")
            # 否則,如果評估結果列表中只有一個字典,則將 map_val 設定為第一個字典的 map50 值
            map_val = [res["map50"] for res in eval_lines][0]
        
        # 將結果寫入評估日誌檔案中,格式為 "<資料集名稱>: <map_val>"
        with open(eval_log_file, "a") as f:
            f.write(f"{self.ds_names[list_ind]}: {map_val}\n")
    """
    ProfileModels class for profiling different models on ONNX and TensorRT.

    This class profiles the performance of different models, returning results such as model speed and FLOPs.

    Attributes:
        paths (list): Paths of the models to profile.
        num_timed_runs (int): Number of timed runs for the profiling. Default is 100.
        num_warmup_runs (int): Number of warmup runs before profiling. Default is 10.
        min_time (float): Minimum number of seconds to profile for. Default is 60.
        imgsz (int): Image size used in the models. Default is 640.
        half (bool): Flag indicating whether to use half-precision floating point for profiling. Default is True.
        trt (bool): Flag indicating whether to use TensorRT for profiling. Default is True.
        device (torch.device): Device used for profiling. Automatically determined if None.

    Methods:
        profile(): Profiles the models and prints the result.

    Example:
        ```py
        from ultralytics.utils.benchmarks import ProfileModels

        ProfileModels(['yolov8n.yaml', 'yolov8s.yaml'], imgsz=640).profile()
        ```
    """

    def __init__(
        self,
        paths: list,
        num_timed_runs=100,
        num_warmup_runs=10,
        min_time=60,
        imgsz=640,
        half=True,
        trt=True,
        device=None,
    ):
        """
        Initialize the ProfileModels class for profiling models.

        Args:
            paths (list): List of paths of the models to be profiled.
            num_timed_runs (int, optional): Number of timed runs for the profiling. Default is 100.
            num_warmup_runs (int, optional): Number of warmup runs before the actual profiling starts. Default is 10.
            min_time (float, optional): Minimum time in seconds for profiling a model. Default is 60.
            imgsz (int, optional): Size of the image used during profiling. Default is 640.
            half (bool, optional): Flag to indicate whether to use half-precision floating point for profiling. Default is True.
            trt (bool, optional): Flag to indicate whether to profile using TensorRT. Default is True.
            device (torch.device, optional): Device used for profiling. If None, it is determined automatically.
        """
        # 初始化各個屬性,用於儲存傳入的引數和設定預設值
        self.paths = paths
        self.num_timed_runs = num_timed_runs
        self.num_warmup_runs = num_warmup_runs
        self.min_time = min_time
        self.imgsz = imgsz
        self.half = half
        self.trt = trt  # 是否執行 TensorRT 的效能分析
        # 如果 device 為 None,則自動確定使用的裝置
        self.device = device or torch.device(0 if torch.cuda.is_available() else "cpu")
    def profile(self):
        """
        Logs the benchmarking results of a model, checks metrics against floor and returns the results.
        """
        # 獲取所有相關檔案路徑列表
        files = self.get_files()

        if not files:
            # 若沒有找到匹配的 *.pt 或 *.onnx 檔案,則列印訊息並返回
            print("No matching *.pt or *.onnx files found.")
            return

        table_rows = []
        output = []
        for file in files:
            # 生成引擎檔名(字尾為 .engine)
            engine_file = file.with_suffix(".engine")
            if file.suffix in {".pt", ".yaml", ".yml"}:
                # 如果檔案字尾是 .pt, .yaml 或 .yml,建立 YOLO 模型物件
                model = YOLO(str(file))
                model.fuse()  # 執行模型融合操作,以獲取正確的引數和GFLOPs(在 model.info() 中)
                model_info = model.info()
                if self.trt and self.device.type != "cpu" and not engine_file.is_file():
                    # 如果啟用 TensorRT(self.trt),且裝置型別不是 CPU,並且引擎檔案不存在,則匯出為引擎檔案
                    engine_file = model.export(
                        format="engine", half=self.half, imgsz=self.imgsz, device=self.device, verbose=False
                    )
                # 匯出 ONNX 檔案
                onnx_file = model.export(
                    format="onnx", half=self.half, imgsz=self.imgsz, simplify=True, device=self.device, verbose=False
                )
            elif file.suffix == ".onnx":
                # 如果檔案字尾是 .onnx,獲取 ONNX 模型資訊
                model_info = self.get_onnx_model_info(file)
                onnx_file = file
            else:
                continue

            # 對 TensorRT 模型進行效能分析
            t_engine = self.profile_tensorrt_model(str(engine_file))
            # 對 ONNX 模型進行效能分析
            t_onnx = self.profile_onnx_model(str(onnx_file))
            # 生成表格行資料並新增到列表
            table_rows.append(self.generate_table_row(file.stem, t_onnx, t_engine, model_info))
            # 生成結果字典並新增到輸出列表
            output.append(self.generate_results_dict(file.stem, t_onnx, t_engine, model_info))

        # 列印表格
        self.print_table(table_rows)
        # 返回結果輸出列表
        return output

    def get_files(self):
        """
        Returns a list of paths for all relevant model files given by the user.
        """
        # 初始化檔案列表
        files = []
        for path in self.paths:
            path = Path(path)
            if path.is_dir():
                # 如果路徑是目錄,則獲取目錄下所有匹配的檔案路徑
                extensions = ["*.pt", "*.onnx", "*.yaml"]
                files.extend([file for ext in extensions for file in glob.glob(str(path / ext))])
            elif path.suffix in {".pt", ".yaml", ".yml"}:  # add non-existing
                # 如果路徑是檔案且字尾符合條件,直接新增到檔案列表中
                files.append(str(path))
            else:
                # 否則,獲取路徑下所有檔案路徑並新增到檔案列表中
                files.extend(glob.glob(str(path)))

        # 列印正在分析的檔案列表
        print(f"Profiling: {sorted(files)}")
        # 返回路徑物件列表
        return [Path(file) for file in sorted(files)]

    def get_onnx_model_info(self, onnx_file: str):
        """
        Retrieves the information including number of layers, parameters, gradients and FLOPs for an ONNX model
        file.
        """
        # 暫時返回零值表示資訊獲取未實現
        return 0.0, 0.0, 0.0, 0.0  # return (num_layers, num_params, num_gradients, num_flops)
    def iterative_sigma_clipping(data, sigma=2, max_iters=3):
        """Applies an iterative sigma clipping algorithm to the given data."""
        # 將資料轉換為 NumPy 陣列
        data = np.array(data)
        # 執行最大迭代次數的迴圈
        for _ in range(max_iters):
            # 計算資料的平均值和標準差
            mean, std = np.mean(data), np.std(data)
            # 根據均值和標準差進行 sigma 剪下,並獲取剪下後的資料
            clipped_data = data[(data > mean - sigma * std) & (data < mean + sigma * std)]
            # 如果剪下後的資料和原資料長度相同,則退出迴圈
            if len(clipped_data) == len(data):
                break
            # 更新資料為剪下後的資料,繼續下一次迭代
            data = clipped_data
        # 返回最終剪下後的資料
        return data

    def profile_tensorrt_model(self, engine_file: str, eps: float = 1e-3):
        """Profiles the TensorRT model, measuring average run time and standard deviation among runs."""
        # 如果 TensorRT 未初始化或者引擎檔案不存在,則返回預設值
        if not self.trt or not Path(engine_file).is_file():
            return 0.0, 0.0

        # 初始化模型和輸入資料
        model = YOLO(engine_file)
        input_data = np.random.rand(self.imgsz, self.imgsz, 3).astype(np.float32)  # 必須是 FP32

        # 預熱執行
        elapsed = 0.0
        for _ in range(3):
            start_time = time.time()
            for _ in range(self.num_warmup_runs):
                model(input_data, imgsz=self.imgsz, verbose=False)
            elapsed = time.time() - start_time

        # 計算執行次數,取最大值作為 min_time 或 num_timed_runs 的倍數
        num_runs = max(round(self.min_time / (elapsed + eps) * self.num_warmup_runs), self.num_timed_runs * 50)

        # 計時執行
        run_times = []
        for _ in TQDM(range(num_runs), desc=engine_file):
            results = model(input_data, imgsz=self.imgsz, verbose=False)
            # 提取推理速度並轉換為毫秒
            run_times.append(results[0].speed["inference"])

        # 對執行時間進行 sigma 剪下
        run_times = self.iterative_sigma_clipping(np.array(run_times), sigma=2, max_iters=3)
        # 返回執行時間的平均值和標準差
        return np.mean(run_times), np.std(run_times)
    def profile_onnx_model(self, onnx_file: str, eps: float = 1e-3):
        """Profiles an ONNX model by executing it multiple times and returns the mean and standard deviation of run
        times.
        """
        # 檢查執行環境是否滿足要求,確保安裝了'onnxruntime'庫
        check_requirements("onnxruntime")
        import onnxruntime as ort

        # 建立會話選項物件,並設定圖最佳化級別為最大,同時限制執行緒數為8
        sess_options = ort.SessionOptions()
        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
        sess_options.intra_op_num_threads = 8  # 限制並行執行的執行緒數目

        # 建立 ONNX 推理會話物件,指定使用CPU執行提供者
        sess = ort.InferenceSession(onnx_file, sess_options, providers=["CPUExecutionProvider"])

        # 獲取模型輸入張量資訊
        input_tensor = sess.get_inputs()[0]
        input_type = input_tensor.type
        # 檢查輸入張量是否具有動態形狀
        dynamic = not all(isinstance(dim, int) and dim >= 0 for dim in input_tensor.shape)
        # 根據動態形狀設定輸入張量的形狀
        input_shape = (1, 3, self.imgsz, self.imgsz) if dynamic else input_tensor.shape

        # 將ONNX資料型別對映到numpy資料型別
        if "float16" in input_type:
            input_dtype = np.float16
        elif "float" in input_type:
            input_dtype = np.float32
        elif "double" in input_type:
            input_dtype = np.float64
        elif "int64" in input_type:
            input_dtype = np.int64
        elif "int32" in input_type:
            input_dtype = np.int32
        else:
            raise ValueError(f"Unsupported ONNX datatype {input_type}")

        # 生成隨機輸入資料,以輸入張量的形狀和資料型別為基礎
        input_data = np.random.rand(*input_shape).astype(input_dtype)
        input_name = input_tensor.name
        output_name = sess.get_outputs()[0].name

        # 預熱執行,執行若干次,計算平均時間
        elapsed = 0.0
        for _ in range(3):
            start_time = time.time()
            for _ in range(self.num_warmup_runs):
                sess.run([output_name], {input_name: input_data})
            elapsed = time.time() - start_time

        # 計算需要執行的總次數,確保滿足最小時間要求或指定的執行次數
        num_runs = max(round(self.min_time / (elapsed + eps) * self.num_warmup_runs), self.num_timed_runs)

        # 正式計時執行
        run_times = []
        for _ in TQDM(range(num_runs), desc=onnx_file):
            start_time = time.time()
            sess.run([output_name], {input_name: input_data})
            run_times.append((time.time() - start_time) * 1000)  # 將執行時間轉換為毫秒

        # 對執行時間進行迭代的sigma剪裁
        run_times = self.iterative_sigma_clipping(np.array(run_times), sigma=2, max_iters=5)
        # 返回執行時間的均值和標準差作為效能分析結果
        return np.mean(run_times), np.std(run_times)
    # 生成包含模型效能和指標詳情的表格行的格式化字串
    def generate_table_row(self, model_name, t_onnx, t_engine, model_info):
        """Generates a formatted string for a table row that includes model performance and metric details."""
        layers, params, gradients, flops = model_info
        return (
            f"| {model_name:18s} | {self.imgsz} | - | {t_onnx[0]:.2f} ± {t_onnx[1]:.2f} ms | {t_engine[0]:.2f} ± "
            f"{t_engine[1]:.2f} ms | {params / 1e6:.1f} | {flops:.1f} |"
        )

    @staticmethod
    # 生成包含模型名稱、引數、GFLOPS和速度指標的字典
    def generate_results_dict(model_name, t_onnx, t_engine, model_info):
        """Generates a dictionary of model details including name, parameters, GFLOPS and speed metrics."""
        layers, params, gradients, flops = model_info
        return {
            "model/name": model_name,
            "model/parameters": params,
            "model/GFLOPs": round(flops, 3),
            "model/speed_ONNX(ms)": round(t_onnx[0], 3),
            "model/speed_TensorRT(ms)": round(t_engine[0], 3),
        }

    @staticmethod
    # 格式化並列印包含不同模型統計和效能資料的比較表格
    def print_table(table_rows):
        """Formats and prints a comparison table for different models with given statistics and performance data."""
        gpu = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "GPU"
        header = (
            f"| Model | size<br><sup>(pixels) | mAP<sup>val<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | "
            f"Speed<br><sup>{gpu} TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |"
        )
        separator = (
            "|-------------|---------------------|--------------------|------------------------------|"
            "-----------------------------------|------------------|-----------------|"
        )

        # 列印表格的標題和分隔線
        print(f"\n\n{header}")
        print(separator)
        # 列印每行表格內容
        for row in table_rows:
            print(row)

.\yolov8\ultralytics\utils\callbacks\base.py

# Ultralytics YOLO 🚀, AGPL-3.0 license
"""Base callbacks."""

from collections import defaultdict
from copy import deepcopy

# Trainer callbacks ----------------------------------------------------------------------------------------------------

# 在訓練器開始執行預訓練流程前呼叫
def on_pretrain_routine_start(trainer):
    pass

# 在預訓練流程結束後呼叫
def on_pretrain_routine_end(trainer):
    pass

# 在訓練開始時呼叫
def on_train_start(trainer):
    pass

# 在每個訓練 epoch 開始時呼叫
def on_train_epoch_start(trainer):
    pass

# 在每個訓練 batch 開始時呼叫
def on_train_batch_start(trainer):
    pass

# 當最佳化器執行一步最佳化時呼叫
def optimizer_step(trainer):
    pass

# 在每個訓練 batch 結束時呼叫
def on_train_batch_end(trainer):
    pass

# 在每個訓練 epoch 結束時呼叫
def on_train_epoch_end(trainer):
    pass

# 在每個 fit epoch 結束時呼叫(包括訓練和驗證)
def on_fit_epoch_end(trainer):
    pass

# 當模型儲存時呼叫
def on_model_save(trainer):
    pass

# 在訓練結束時呼叫
def on_train_end(trainer):
    pass

# 當模型引數更新時呼叫
def on_params_update(trainer):
    pass

# 在訓練過程拆除時呼叫
def teardown(trainer):
    pass

# Validator callbacks --------------------------------------------------------------------------------------------------

# 在驗證開始時呼叫
def on_val_start(validator):
    pass

# 在每個驗證 batch 開始時呼叫
def on_val_batch_start(validator):
    pass

# 在每個驗證 batch 結束時呼叫
def on_val_batch_end(validator):
    pass

# 在驗證結束時呼叫
def on_val_end(validator):
    pass

# Predictor callbacks --------------------------------------------------------------------------------------------------

# 在預測開始時呼叫
def on_predict_start(predictor):
    pass

# 在每個預測 batch 開始時呼叫
def on_predict_batch_start(predictor):
    pass

# 在每個預測 batch 結束時呼叫
def on_predict_batch_end(predictor):
    pass

# 在預測後處理結束時呼叫
def on_predict_postprocess_end(predictor):
    pass

# 在預測結束時呼叫
def on_predict_end(predictor):
    pass

# Exporter callbacks ---------------------------------------------------------------------------------------------------

# 在模型匯出開始時呼叫
def on_export_start(exporter):
    pass

# 在模型匯出結束時呼叫
def on_export_end(exporter):
    pass
default_callbacks = {
    # 在訓練器中執行的回撥函式
    "on_pretrain_routine_start": [on_pretrain_routine_start],
    "on_pretrain_routine_end": [on_pretrain_routine_end],
    "on_train_start": [on_train_start],
    "on_train_epoch_start": [on_train_epoch_start],
    "on_train_batch_start": [on_train_batch_start],
    "optimizer_step": [optimizer_step],
    "on_before_zero_grad": [on_before_zero_grad],
    "on_train_batch_end": [on_train_batch_end],
    "on_train_epoch_end": [on_train_epoch_end],
    "on_fit_epoch_end": [on_fit_epoch_end],  # fit = train + val
    "on_model_save": [on_model_save],
    "on_train_end": [on_train_end],
    "on_params_update": [on_params_update],
    "teardown": [teardown],
    # 在驗證器中執行的回撥函式
    "on_val_start": [on_val_start],
    "on_val_batch_start": [on_val_batch_start],
    "on_val_batch_end": [on_val_batch_end],
    "on_val_end": [on_val_end],
    # 在預測器中執行的回撥函式
    "on_predict_start": [on_predict_start],
    "on_predict_batch_start": [on_predict_batch_start],
    "on_predict_postprocess_end": [on_predict_postprocess_end],
    "on_predict_batch_end": [on_predict_batch_end],
    "on_predict_end": [on_predict_end],
    # 在匯出器中執行的回撥函式
    "on_export_start": [on_export_start],
    "on_export_end": [on_export_end],
}


def get_default_callbacks():
    """
    返回一個 default_callbacks 字典的副本,其中預設值為列表。

    Returns:
        (defaultdict): 使用 default_callbacks 的鍵,空列表作為預設值的 defaultdict。
    """
    return defaultdict(list, deepcopy(default_callbacks))


def add_integration_callbacks(instance):
    """
    向例項的回撥函式中新增來自各種來源的整合回撥函式。

    Args:
        instance (Trainer, Predictor, Validator, Exporter): 具有 'callbacks' 屬性的物件,其值為回撥函式列表的字典。
    """

    # 載入 HUB 回撥函式
    from .hub import callbacks as hub_cb

    callbacks_list = [hub_cb]

    # 載入訓練回撥函式
    if "Trainer" in instance.__class__.__name__:
        from .clearml import callbacks as clear_cb
        from .comet import callbacks as comet_cb
        from .dvc import callbacks as dvc_cb
        from .mlflow import callbacks as mlflow_cb
        from .neptune import callbacks as neptune_cb
        from .raytune import callbacks as tune_cb
        from .tensorboard import callbacks as tb_cb
        from .wb import callbacks as wb_cb

        callbacks_list.extend([clear_cb, comet_cb, dvc_cb, mlflow_cb, neptune_cb, tune_cb, tb_cb, wb_cb])

    # 將回撥函式新增到回撥字典中
    for callbacks in callbacks_list:
        for k, v in callbacks.items():
            if v not in instance.callbacks[k]:
                instance.callbacks[k].append(v)

.\yolov8\ultralytics\utils\callbacks\clearml.py

# Ultralytics YOLO 🚀, AGPL-3.0 license

# 引入必要的日誌器、設定和測試執行狀態的標誌
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING

# 嘗試匯入並驗證 ClearML 相關的設定和環境
try:
    # 確保不在執行 pytest 時記錄日誌
    assert not TESTS_RUNNING
    # 確保 ClearML 整合已啟用
    assert SETTINGS["clearml"] is True
    import clearml
    from clearml import Task

    # 確保 clearml 包已成功匯入且有版本資訊
    assert hasattr(clearml, "__version__")

except (ImportError, AssertionError):
    clearml = None


# 定義一個函式用於將檔案路徑列表中的影像作為除錯樣本記錄到 ClearML 任務中
def _log_debug_samples(files, title="Debug Samples") -> None:
    """
    Log files (images) as debug samples in the ClearML task.

    Args:
        files (list): A list of file paths in PosixPath format.
        title (str): A title that groups together images with the same values.
    """
    import re

    # 如果當前存在 ClearML 任務,則依次處理檔案
    if task := Task.current_task():
        for f in files:
            if f.exists():
                # 從檔名中提取批次號並轉換為整數
                it = re.search(r"_batch(\d+)", f.name)
                iteration = int(it.groups()[0]) if it else 0
                # 將影像檔案報告到 ClearML 任務日誌
                task.get_logger().report_image(
                    title=title, series=f.name.replace(it.group(), ""), local_path=str(f), iteration=iteration
                )


# 定義一個函式用於將儲存的影像檔案作為繪圖記錄到 ClearML 的繪圖部分
def _log_plot(title, plot_path) -> None:
    """
    Log an image as a plot in the plot section of ClearML.

    Args:
        title (str): The title of the plot.
        plot_path (str): The path to the saved image file.
    """
    import matplotlib.image as mpimg
    import matplotlib.pyplot as plt

    # 讀取影像檔案並建立繪圖物件
    img = mpimg.imread(plot_path)
    fig = plt.figure()
    ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect="auto", xticks=[], yticks=[])  # 不顯示刻度
    ax.imshow(img)

    # 報告 Matplotlib 繪製的影像到 ClearML 任務日誌
    Task.current_task().get_logger().report_matplotlib_figure(
        title=title, series="", figure=fig, report_interactive=False
    )


# 定義一個函式,在預訓練過程開始時初始化並連線/記錄任務到 ClearML
def on_pretrain_routine_start(trainer):
    """Runs at start of pretraining routine; initializes and connects/ logs task to ClearML."""
    try:
        # 如果當前存在 ClearML 任務,則更新 PyTorch 和 Matplotlib 的繫結
        if task := Task.current_task():
            # 警告:確保禁用自動的 PyTorch 和 Matplotlib 繫結!
            # 我們正在手動在整合中記錄這些繪圖和模型檔案
            from clearml.binding.frameworks.pytorch_bind import PatchPyTorchModelIO
            from clearml.binding.matplotlib_bind import PatchedMatplotlib

            PatchPyTorchModelIO.update_current_task(None)
            PatchedMatplotlib.update_current_task(None)
        else:
            # 否則初始化一個新的 ClearML 任務
            task = Task.init(
                project_name=trainer.args.project or "YOLOv8",
                task_name=trainer.args.name,
                tags=["YOLOv8"],
                output_uri=True,
                reuse_last_task_id=False,
                auto_connect_frameworks={"pytorch": False, "matplotlib": False},
            )
            # 記錄警告資訊,提示使用者如何在遠端環境執行 YOLO
            LOGGER.warning(
                "ClearML Initialized a new task. If you want to run remotely, "
                "please add clearml-init and connect your arguments before initializing YOLO."
            )
        # 將訓練器引數連線到 ClearML 任務
        task.connect(vars(trainer.args), name="General")
    # 捕獲所有異常並將其儲存在變數e中
    except Exception as e:
        # 使用WARNING級別的日誌記錄器LOGGER記錄警告訊息,指出ClearML未正確初始化,
        # 因此不能記錄這次執行的日誌。同時輸出異常資訊e。
        LOGGER.warning(f"WARNING ⚠️ ClearML installed but not initialized correctly, not logging this run. {e}")
def on_train_epoch_end(trainer):
    """Logs debug samples for the first epoch of YOLO training and report current training progress."""
    # 獲取當前任務物件,如果存在
    if task := Task.current_task():
        # 如果當前是第一個 epoch,則記錄除錯樣本
        if trainer.epoch == 1:
            _log_debug_samples(sorted(trainer.save_dir.glob("train_batch*.jpg")), "Mosaic")
        # 報告當前訓練進度
        for k, v in trainer.label_loss_items(trainer.tloss, prefix="train").items():
            task.get_logger().report_scalar("train", k, v, iteration=trainer.epoch)
        # 報告當前學習率
        for k, v in trainer.lr.items():
            task.get_logger().report_scalar("lr", k, v, iteration=trainer.epoch)


def on_fit_epoch_end(trainer):
    """Reports model information to logger at the end of an epoch."""
    # 獲取當前任務物件,如果存在
    if task := Task.current_task():
        # 報告每個 epoch 的耗時
        task.get_logger().report_scalar(
            title="Epoch Time", series="Epoch Time", value=trainer.epoch_time, iteration=trainer.epoch
        )
        # 報告驗證指標
        for k, v in trainer.metrics.items():
            task.get_logger().report_scalar("val", k, v, iteration=trainer.epoch)
        # 如果是第一個 epoch,報告模型資訊給日誌記錄器
        if trainer.epoch == 0:
            from ultralytics.utils.torch_utils import model_info_for_loggers

            for k, v in model_info_for_loggers(trainer).items():
                task.get_logger().report_single_value(k, v)


def on_val_end(validator):
    """Logs validation results including labels and predictions."""
    # 如果存在當前任務物件
    if Task.current_task():
        # 記錄驗證結果的標籤和預測
        _log_debug_samples(sorted(validator.save_dir.glob("val*.jpg")), "Validation")


def on_train_end(trainer):
    """Logs final model and its name on training completion."""
    # 獲取當前任務物件,如果存在
    if task := Task.current_task():
        # 記錄最終結果,如混淆矩陣和精確率-召回率曲線
        files = [
            "results.png",
            "confusion_matrix.png",
            "confusion_matrix_normalized.png",
            *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R")),
        ]
        # 過濾存在的檔案
        files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()]  # filter
        for f in files:
            _log_plot(title=f.stem, plot_path=f)
        # 報告最終指標
        for k, v in trainer.validator.metrics.results_dict.items():
            task.get_logger().report_single_value(k, v)
        # 記錄最終模型
        task.update_output_model(model_path=str(trainer.best), model_name=trainer.args.name, auto_delete_file=False)


callbacks = (
    {
        "on_pretrain_routine_start": on_pretrain_routine_start,
        "on_train_epoch_end": on_train_epoch_end,
        "on_fit_epoch_end": on_fit_epoch_end,
        "on_val_end": on_val_end,
        "on_train_end": on_train_end,
    }
    if clearml
    else {}
)

.\yolov8\ultralytics\utils\callbacks\comet.py

# Ultralytics YOLO 🚀, AGPL-3.0 license

# 匯入必要的模組和變數
from ultralytics.utils import LOGGER, RANK, SETTINGS, TESTS_RUNNING, ops

try:
    # 確保在執行 pytest 測試時不進行日誌記錄
    assert not TESTS_RUNNING  
    # 驗證 Comet 整合已啟用
    assert SETTINGS["comet"] is True  

    # 嘗試匯入 comet_ml 庫,並驗證其版本是否存在
    import comet_ml
    assert hasattr(comet_ml, "__version__")  

    import os
    from pathlib import Path

    # 確保特定的日誌函式僅適用於支援的任務
    COMET_SUPPORTED_TASKS = ["detect"]

    # YOLOv8 建立的記錄到 Comet 的圖表名稱
    EVALUATION_PLOT_NAMES = "F1_curve", "P_curve", "R_curve", "PR_curve", "confusion_matrix"
    LABEL_PLOT_NAMES = "labels", "labels_correlogram"

    _comet_image_prediction_count = 0

except (ImportError, AssertionError):
    # 如果匯入失敗或斷言失敗,則設定 comet_ml 為 None
    comet_ml = None


def _get_comet_mode():
    """返回在環境變數中設定的 Comet 模式,如果未設定則預設為 'online'。"""
    return os.getenv("COMET_MODE", "online")


def _get_comet_model_name():
    """返回 Comet 的模型名稱,從環境變數 'COMET_MODEL_NAME' 獲取,如果未設定則預設為 'YOLOv8'。"""
    return os.getenv("COMET_MODEL_NAME", "YOLOv8")


def _get_eval_batch_logging_interval():
    """從環境變數中獲取評估批次的日誌記錄間隔,如果未設定則使用預設值 1。"""
    return int(os.getenv("COMET_EVAL_BATCH_LOGGING_INTERVAL", 1))


def _get_max_image_predictions_to_log():
    """從環境變數中獲取要記錄的最大影像預測數。"""
    return int(os.getenv("COMET_MAX_IMAGE_PREDICTIONS", 100))


def _scale_confidence_score(score):
    """按環境變數中指定的因子對給定的置信度分數進行縮放。"""
    scale = float(os.getenv("COMET_MAX_CONFIDENCE_SCORE", 100.0))
    return score * scale


def _should_log_confusion_matrix():
    """根據環境變數的設定確定是否記錄混淆矩陣。"""
    return os.getenv("COMET_EVAL_LOG_CONFUSION_MATRIX", "false").lower() == "true"


def _should_log_image_predictions():
    """根據指定的環境變數確定是否記錄影像預測。"""
    return os.getenv("COMET_EVAL_LOG_IMAGE_PREDICTIONS", "true").lower() == "true"


def _get_experiment_type(mode, project_name):
    """根據模式和專案名稱返回一個實驗物件。"""
    if mode == "offline":
        return comet_ml.OfflineExperiment(project_name=project_name)

    return comet_ml.Experiment(project_name=project_name)


def _create_experiment(args):
    """確保在分散式訓練期間只在單個程序中建立實驗物件。"""
    if RANK not in {-1, 0}:
        return
    try:
        # 獲取當前 Comet 模式(如果存在)
        comet_mode = _get_comet_mode()
        # 獲取 Comet 專案名稱,如果未設定則使用引數中的專案名稱
        _project_name = os.getenv("COMET_PROJECT_NAME", args.project)
        # 根據 Comet 模式和專案名稱獲取實驗物件
        experiment = _get_experiment_type(comet_mode, _project_name)
        # 記錄命令列引數到 Comet 實驗中
        experiment.log_parameters(vars(args))
        # 記錄其他引數到 Comet 實驗中,包括批次評估日誌間隔、是否記錄混淆矩陣、是否記錄影像預測及最大影像預測數量等
        experiment.log_others(
            {
                "eval_batch_logging_interval": _get_eval_batch_logging_interval(),
                "log_confusion_matrix_on_eval": _should_log_confusion_matrix(),
                "log_image_predictions": _should_log_image_predictions(),
                "max_image_predictions": _get_max_image_predictions_to_log(),
            }
        )
        # 記錄額外資訊到 Comet 實驗中,指明由 yolov8 建立
        experiment.log_other("Created from", "yolov8")

    except Exception as e:
        # 異常處理:Comet 安裝但初始化失敗時發出警告,不記錄當前執行
        LOGGER.warning(f"WARNING ⚠️ Comet installed but not initialized correctly, not logging this run. {e}")
# 返回訓練器的後設資料,包括當前輪次和資產儲存狀態
def _fetch_trainer_metadata(trainer):
    # 獲取當前輪次(加1是因為epoch從0開始計數)
    curr_epoch = trainer.epoch + 1

    # 計算每個輪次的訓練步數
    train_num_steps_per_epoch = len(trainer.train_loader.dataset) // trainer.batch_size
    curr_step = curr_epoch * train_num_steps_per_epoch
    # 判斷是否是最後一個輪次
    final_epoch = curr_epoch == trainer.epochs

    # 讀取訓練器引數
    save = trainer.args.save
    save_period = trainer.args.save_period
    # 判斷是否需要儲存資產
    save_interval = curr_epoch % save_period == 0
    save_assets = save and save_period > 0 and save_interval and not final_epoch

    # 返回後設資料字典
    return dict(curr_epoch=curr_epoch, curr_step=curr_step, save_assets=save_assets, final_epoch=final_epoch)


# 將邊界框縮放到原始影像形狀的比例
def _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad):
    """
    YOLOv8 在訓練期間調整影像大小,並且基於這些調整大小的形狀對標籤值進行了歸一化。

    此函式將邊界框標籤重新縮放到原始影像形狀。
    """

    resized_image_height, resized_image_width = resized_image_shape

    # 將歸一化的xywh格式預測轉換為調整大小後的xyxy格式
    box = ops.xywhn2xyxy(box, h=resized_image_height, w=resized_image_width)
    # 將邊界框預測從調整大小的影像尺度縮放回原始影像尺度
    box = ops.scale_boxes(resized_image_shape, box, original_image_shape, ratio_pad)
    # 將邊界框格式從xyxy轉換為xywh,用於Comet日誌記錄
    box = ops.xyxy2xywh(box)
    # 調整xy中心以對應左上角
    box[:2] -= box[2:] / 2
    box = box.tolist()

    return box


# 為檢測格式化真實標註註釋
def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, class_name_map=None):
    """格式化用於檢測的真實標註。"""
    # 獲取與當前影像索引匹配的批次索引
    indices = batch["batch_idx"] == img_idx
    # 獲取邊界框標籤
    bboxes = batch["bboxes"][indices]
    if len(bboxes) == 0:
        LOGGER.debug(f"COMET WARNING: Image: {image_path} has no bounding boxes labels")
        return None

    # 獲取類別標籤
    cls_labels = batch["cls"][indices].squeeze(1).tolist()
    if class_name_map:
        cls_labels = [str(class_name_map[label]) for label in cls_labels]

    # 獲取原始影像形狀、調整大小的影像形狀和填充比例
    original_image_shape = batch["ori_shape"][img_idx]
    resized_image_shape = batch["resized_shape"][img_idx]
    ratio_pad = batch["ratio_pad"][img_idx]

    data = []
    for box, label in zip(bboxes, cls_labels):
        # 將邊界框縮放到原始影像形狀
        box = _scale_bounding_box_to_original_image_shape(box, resized_image_shape, original_image_shape, ratio_pad)
        data.append(
            {
                "boxes": [box],
                "label": f"gt_{label}",
                "score": _scale_confidence_score(1.0),
            }
        )

    return {"name": "ground_truth", "data": data}


# 為檢測格式化YOLO預測註釋
def _format_prediction_annotations_for_detection(image_path, metadata, class_label_map=None):
    """格式化用於物件檢測視覺化的YOLO預測。"""
    # 獲取影像檔名(不帶字尾)
    stem = image_path.stem
    image_id = int(stem) if stem.isnumeric() else stem

    # 獲取指定影像的預測結果
    predictions = metadata.get(image_id)
    # 如果predictions為空列表,則記錄警告並返回None
    if not predictions:
        LOGGER.debug(f"COMET WARNING: Image: {image_path} has no bounding boxes predictions")
        return None

    # 初始化一個空列表,用於儲存處理後的預測資料
    data = []

    # 遍歷每個預測結果
    for prediction in predictions:
        # 獲取預測框的座標資訊
        boxes = prediction["bbox"]
        # 調整預測得分的置信度,並儲存到score變數中
        score = _scale_confidence_score(prediction["score"])
        # 獲取預測類別的標籤ID
        cls_label = prediction["category_id"]
        
        # 如果提供了類別對映字典,則將標籤ID轉換為相應的字串標籤
        if class_label_map:
            cls_label = str(class_label_map[cls_label])
        
        # 將處理後的預測資料以字典形式新增到data列表中
        data.append({"boxes": [boxes], "label": cls_label, "score": score})

    # 返回一個包含預測名稱和處理後資料的字典
    return {"name": "prediction", "data": data}
# 將影像索引、影像路徑、批次、預測後設資料對映和類標籤對映格式化為檢測任務的地面真實註釋
def _fetch_annotations(img_idx, image_path, batch, prediction_metadata_map, class_label_map):
    ground_truth_annotations = _format_ground_truth_annotations_for_detection(
        img_idx, image_path, batch, class_label_map
    )
    # 根據影像路徑和預測後設資料對映格式化預測註釋
    prediction_annotations = _format_prediction_annotations_for_detection(
        image_path, prediction_metadata_map, class_label_map
    )

    # 將地面真實註釋和預測註釋合併到一個列表中(排除為空的註釋)
    annotations = [
        annotation for annotation in [ground_truth_annotations, prediction_annotations] if annotation is not None
    ]
    return [annotations] if annotations else None


# 建立基於影像 ID 分組的模型預測後設資料對映
def _create_prediction_metadata_map(model_predictions):
    pred_metadata_map = {}
    for prediction in model_predictions:
        pred_metadata_map.setdefault(prediction["image_id"], [])
        pred_metadata_map[prediction["image_id"]].append(prediction)

    return pred_metadata_map


# 將混淆矩陣記錄到 Comet 實驗中
def _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch):
    conf_mat = trainer.validator.confusion_matrix.matrix
    names = list(trainer.data["names"].values()) + ["background"]
    # 記錄混淆矩陣到 Comet 實驗中
    experiment.log_confusion_matrix(
        matrix=conf_mat, labels=names, max_categories=len(names), epoch=curr_epoch, step=curr_step
    )


# 記錄影像到 Comet 實驗中,可以選擇包含註釋
def _log_images(experiment, image_paths, curr_step, annotations=None):
    if annotations:
        # 對於每個影像路徑和對應的註釋,記錄影像到 Comet 實驗中
        for image_path, annotation in zip(image_paths, annotations):
            experiment.log_image(image_path, name=image_path.stem, step=curr_step, annotations=annotation)
    else:
        # 對於每個影像路徑,記錄影像到 Comet 實驗中
        for image_path in image_paths:
            experiment.log_image(image_path, name=image_path.stem, step=curr_step)


# 在訓練期間記錄單個影像的預測框到 Comet 實驗中
def _log_image_predictions(experiment, validator, curr_step):
    global _comet_image_prediction_count

    task = validator.args.task
    if task not in COMET_SUPPORTED_TASKS:
        return

    jdict = validator.jdict
    if not jdict:
        return

    # 建立預測後設資料對映
    predictions_metadata_map = _create_prediction_metadata_map(jdict)
    dataloader = validator.dataloader
    class_label_map = validator.names

    # 獲取評估批次記錄間隔和最大要記錄的影像預測數量
    batch_logging_interval = _get_eval_batch_logging_interval()
    max_image_predictions = _get_max_image_predictions_to_log()
    # 遍歷資料載入器中的每個批次和批次索引
    for batch_idx, batch in enumerate(dataloader):
        # 如果當前批次索引不是批次日誌間隔的整數倍,跳過本次迴圈
        if (batch_idx + 1) % batch_logging_interval != 0:
            continue

        # 獲取當前批次中影像檔案路徑列表
        image_paths = batch["im_file"]
        
        # 遍歷當前批次中的每張影像和影像索引
        for img_idx, image_path in enumerate(image_paths):
            # 如果已記錄的Comet影像預測次數超過了最大預測數,函式結束
            if _comet_image_prediction_count >= max_image_predictions:
                return

            # 將影像路徑轉換為Path物件
            image_path = Path(image_path)
            
            # 獲取影像的註釋資訊,呼叫_fetch_annotations函式
            annotations = _fetch_annotations(
                img_idx,
                image_path,
                batch,
                predictions_metadata_map,
                class_label_map,
            )
            
            # 記錄影像及其註釋到Comet實驗中,呼叫_log_images函式
            _log_images(
                experiment,
                [image_path],
                curr_step,
                annotations=annotations,
            )
            
            # 增加已記錄的Comet影像預測次數計數器
            _comet_image_prediction_count += 1
# 在實驗和訓練器上記錄評估圖和標籤圖的函式
def _log_plots(experiment, trainer):
    # 根據評估圖的名稱列表生成影像檔名列表
    plot_filenames = [trainer.save_dir / f"{plots}.png" for plots in EVALUATION_PLOT_NAMES]
    # 呼叫_log_images函式記錄評估影像到實驗中
    _log_images(experiment, plot_filenames, None)

    # 根據標籤圖的名稱列表生成影像檔名列表
    label_plot_filenames = [trainer.save_dir / f"{labels}.jpg" for labels in LABEL_PLOT_NAMES]
    # 呼叫_log_images函式記錄標籤影像到實驗中
    _log_images(experiment, label_plot_filenames, None)


# 記錄最佳訓練模型到Comet.ml的函式
def _log_model(experiment, trainer):
    # 獲取要記錄的模型的名稱
    model_name = _get_comet_model_name()
    # 呼叫experiment.log_model函式將最佳模型記錄到Comet.ml
    experiment.log_model(model_name, file_or_folder=str(trainer.best), file_name="best.pt", overwrite=True)


# 在YOLO預訓練過程開始時建立或恢復CometML實驗的函式
def on_pretrain_routine_start(trainer):
    # 獲取全域性的CometML實驗物件
    experiment = comet_ml.get_global_experiment()
    # 檢查實驗是否存在並且處於活躍狀態
    is_alive = getattr(experiment, "alive", False)
    # 如果實驗不存在或不處於活躍狀態,則建立新的實驗
    if not experiment or not is_alive:
        _create_experiment(trainer.args)


# 在每個訓練週期結束時記錄指標和批次影像的函式
def on_train_epoch_end(trainer):
    # 獲取全域性的CometML實驗物件
    experiment = comet_ml.get_global_experiment()
    # 如果實驗物件不存在,則直接返回
    if not experiment:
        return

    # 獲取訓練器的後設資料
    metadata = _fetch_trainer_metadata(trainer)
    curr_epoch = metadata["curr_epoch"]
    curr_step = metadata["curr_step"]

    # 記錄訓練損失相關的指標到CometML
    experiment.log_metrics(trainer.label_loss_items(trainer.tloss, prefix="train"), step=curr_step, epoch=curr_epoch)

    # 如果當前是第一個訓練週期,記錄訓練批次影像到CometML
    if curr_epoch == 1:
        _log_images(experiment, trainer.save_dir.glob("train_batch*.jpg"), curr_step)


# 在每個訓練週期完成時記錄模型資產的函式
def on_fit_epoch_end(trainer):
    # 獲取全域性的CometML實驗物件
    experiment = comet_ml.get_global_experiment()
    # 如果實驗物件不存在,則直接返回
    if not experiment:
        return

    # 獲取訓練器的後設資料
    metadata = _fetch_trainer_metadata(trainer)
    curr_epoch = metadata["curr_epoch"]
    curr_step = metadata["curr_step"]
    save_assets = metadata["save_assets"]

    # 記錄訓練器的指標到CometML
    experiment.log_metrics(trainer.metrics, step=curr_step, epoch=curr_epoch)
    experiment.log_metrics(trainer.lr, step=curr_step, epoch=curr_epoch)

    # 如果當前是第一個訓練週期,記錄模型資訊到CometML
    if curr_epoch == 1:
        from ultralytics.utils.torch_utils import model_info_for_loggers
        experiment.log_metrics(model_info_for_loggers(trainer), step=curr_step, epoch=curr_epoch)

    # 如果不儲存資產,則直接返回
    if not save_assets:
        return

    # 記錄最佳模型到CometML
    _log_model(experiment, trainer)

    # 如果應記錄混淆矩陣,則記錄混淆矩陣到CometML
    if _should_log_confusion_matrix():
        _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch)

    # 如果應記錄影像預測,則記錄影像預測到CometML
    if _should_log_image_predictions():
        _log_image_predictions(experiment, trainer.validator, curr_step)


# 在訓練結束時執行的操作的函式
def on_train_end(trainer):
    # 獲取全域性的CometML實驗物件
    experiment = comet_ml.get_global_experiment()
    # 如果實驗物件不存在,則直接返回
    if not experiment:
        return

    # 獲取訓練器的後設資料
    metadata = _fetch_trainer_metadata(trainer)
    curr_epoch = metadata["curr_epoch"]
    curr_step = metadata["curr_step"]
    plots = trainer.args.plots

    # 記錄最佳模型到CometML
    _log_model(experiment, trainer)

    # 如果應記錄圖表,則記錄圖表到CometML
    if plots:
        _log_plots(experiment, trainer)

    # 記錄混淆矩陣到CometML
    _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch)
    # 記錄影像預測結果到日誌,使用當前實驗和驗證器的資訊和當前步驟數
    _log_image_predictions(experiment, trainer.validator, curr_step)
    # 結束當前實驗,進行必要的清理工作
    experiment.end()
    
    # 設定全域性變數 _comet_image_prediction_count 為 0,用於統計影像預測的數量
    global _comet_image_prediction_count
    _comet_image_prediction_count = 0
# 如果 comet_ml 變數為真,則定義一個包含多個回撥函式的字典,否則定義一個空字典
callbacks = (
    {
        "on_pretrain_routine_start": on_pretrain_routine_start,
        "on_train_epoch_end": on_train_epoch_end,
        "on_fit_epoch_end": on_fit_epoch_end,
        "on_train_end": on_train_end,
    }
    if comet_ml
    else {}
)

.\yolov8\ultralytics\utils\callbacks\dvc.py

# Ultralytics YOLO 🚀, AGPL-3.0 license

# 匯入必要的模組和變數
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, checks

try:
    # 確保不在執行 pytest 時記錄日誌
    assert not TESTS_RUNNING
    # 確保整合設定已啟用
    assert SETTINGS["dvc"] is True
    # 嘗試匯入 dvclive
    import dvclive

    # 檢查 dvclive 版本是否符合要求
    assert checks.check_version("dvclive", "2.11.0", verbose=True)

    import os
    import re
    from pathlib import Path

    # DVCLive 日誌例項
    live = None
    # 記錄已處理的繪圖
    _processed_plots = {}

    # `on_fit_epoch_end` 在最終驗證時被呼叫(可能需要修復),目前是我們區分最佳模型的最終評估與最後一個 epoch 驗證的方式
    _training_epoch = False

except (ImportError, AssertionError, TypeError):
    # 捕獲異常,設定 dvclive 為 None
    dvclive = None


def _log_images(path, prefix=""):
    """使用 DVCLive 記錄指定路徑下的影像,可選新增字首。"""
    if live:
        name = path.name

        # 根據批次分組影像,以便在使用者介面中使用滑塊瀏覽
        if m := re.search(r"_batch(\d+)", name):
            ni = m[1]
            new_stem = re.sub(r"_batch(\d+)", "_batch", path.stem)
            name = (Path(new_stem) / ni).with_suffix(path.suffix)

        live.log_image(os.path.join(prefix, name), path)


def _log_plots(plots, prefix=""):
    """記錄訓練進度的繪圖,如果尚未處理過。"""
    for name, params in plots.items():
        timestamp = params["timestamp"]
        if _processed_plots.get(name) != timestamp:
            _log_images(name, prefix)
            _processed_plots[name] = timestamp


def _log_confusion_matrix(validator):
    """使用 DVCLive 記錄給定驗證器的混淆矩陣。"""
    targets = []
    preds = []
    matrix = validator.confusion_matrix.matrix
    names = list(validator.names.values())
    if validator.confusion_matrix.task == "detect":
        names += ["background"]

    for ti, pred in enumerate(matrix.T.astype(int)):
        for pi, num in enumerate(pred):
            targets.extend([names[ti]] * num)
            preds.extend([names[pi]] * num)

    live.log_sklearn_plot("confusion_matrix", targets, preds, name="cf.json", normalized=True)


def on_pretrain_routine_start(trainer):
    """在預訓練過程開始時初始化 DVCLive 記錄器,用於記錄訓練後設資料。"""
    try:
        global live
        live = dvclive.Live(save_dvc_exp=True, cache_images=True)
        LOGGER.info("DVCLive is detected and auto logging is enabled (run 'yolo settings dvc=False' to disable).")
    except Exception as e:
        LOGGER.warning(f"WARNING ⚠️ DVCLive installed but not initialized correctly, not logging this run. {e}")


def on_pretrain_routine_end(trainer):
    """在預訓練過程結束時記錄與訓練程序相關的繪圖。"""
    _log_plots(trainer.plots, "train")


def on_train_start(trainer):
    """如果 DVCLive 記錄器處於活動狀態,則記錄訓練引數。"""
    if live:
        live.log_params(trainer.args)


def on_train_epoch_start(trainer):
    # 這裡留空,可能在後續實現具體功能
    # 設定全域性變數 _training_epoch 在每個訓練週期開始時為 True
    global _training_epoch
    # 將 _training_epoch 設定為 True,指示當前處於訓練週期中
    _training_epoch = True
def on_fit_epoch_end(trainer):
    """Logs training metrics and model info, and advances to next step on the end of each fit epoch."""
    global _training_epoch
    if live and _training_epoch:
        # Collect all training metrics including loss, custom metrics, and learning rate
        all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix="train"), **trainer.metrics, **trainer.lr}
        # Log each metric to DVCLive
        for metric, value in all_metrics.items():
            live.log_metric(metric, value)

        # Log model information if it's the first epoch
        if trainer.epoch == 0:
            from ultralytics.utils.torch_utils import model_info_for_loggers
            # Log model-specific information to DVCLive
            for metric, value in model_info_for_loggers(trainer).items():
                live.log_metric(metric, value, plot=False)

        # Log training plots
        _log_plots(trainer.plots, "train")
        # Log validation plots
        _log_plots(trainer.validator.plots, "val")

        # Advance to the next step in the training process
        live.next_step()
        _training_epoch = False


def on_train_end(trainer):
    """Logs the best metrics, plots, and confusion matrix at the end of training if DVCLive is active."""
    if live:
        # Log all final training metrics including loss, custom metrics, and learning rate
        all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix="train"), **trainer.metrics, **trainer.lr}
        # Log each metric to DVCLive
        for metric, value in all_metrics.items():
            live.log_metric(metric, value, plot=False)

        # Log validation plots
        _log_plots(trainer.plots, "val")
        # Log validation plots from validator
        _log_plots(trainer.validator.plots, "val")

        # Log confusion matrix for validation data
        _log_confusion_matrix(trainer.validator)

        # If there exists a best model artifact, log it to DVCLive
        if trainer.best.exists():
            live.log_artifact(trainer.best, copy=True, type="model")

        # End the DVCLive logging session
        live.end()


callbacks = (
    {
        "on_pretrain_routine_start": on_pretrain_routine_start,
        "on_pretrain_routine_end": on_pretrain_routine_end,
        "on_train_start": on_train_start,
        "on_train_epoch_start": on_train_epoch_start,
        "on_fit_epoch_end": on_fit_epoch_end,
        "on_train_end": on_train_end,
    }
    if dvclive
    else {}
)

.\yolov8\ultralytics\utils\callbacks\hub.py

# Ultralytics YOLO 🚀, AGPL-3.0 license

import json
from time import time

from ultralytics.hub import HUB_WEB_ROOT, PREFIX, HUBTrainingSession, events
from ultralytics.utils import LOGGER, RANK, SETTINGS


def on_pretrain_routine_start(trainer):
    """Create a remote Ultralytics HUB session to log local model training."""
    # 檢查是否處於主程序或單程序訓練,且設定中允許使用 HUB,並且有有效的 API 金鑰,且未建立會話
    if RANK in {-1, 0} and SETTINGS["hub"] is True and SETTINGS["api_key"] and trainer.hub_session is None:
        # 建立一個基於訓練模型和引數的 HUBTrainingSession 物件
        trainer.hub_session = HUBTrainingSession.create_session(trainer.args.model, trainer.args)


def on_pretrain_routine_end(trainer):
    """Logs info before starting timer for upload rate limit."""
    session = getattr(trainer, "hub_session", None)
    if session:
        # 開始計時器以控制上傳速率限制
        session.timers = {"metrics": time(), "ckpt": time()}  # 在 session.rate_limit 上啟動計時器


def on_fit_epoch_end(trainer):
    """Uploads training progress metrics at the end of each epoch."""
    session = getattr(trainer, "hub_session", None)
    if session:
        # 在驗證結束後上傳度量指標
        all_plots = {
            **trainer.label_loss_items(trainer.tloss, prefix="train"),
            **trainer.metrics,
        }
        if trainer.epoch == 0:
            from ultralytics.utils.torch_utils import model_info_for_loggers

            # 在第一個 epoch 時,新增模型資訊到上傳佇列中的度量指標
            all_plots = {**all_plots, **model_info_for_loggers(trainer)}

        # 將所有度量指標轉換為 JSON 格式並加入度量佇列
        session.metrics_queue[trainer.epoch] = json.dumps(all_plots)

        # 如果度量指標上傳失敗,將它們加入失敗佇列以便再次嘗試上傳
        if session.metrics_upload_failed_queue:
            session.metrics_queue.update(session.metrics_upload_failed_queue)

        # 如果超過度量上傳速率限制時間間隔,執行上傳度量指標操作並重置計時器和佇列
        if time() - session.timers["metrics"] > session.rate_limits["metrics"]:
            session.upload_metrics()
            session.timers["metrics"] = time()  # 重置計時器
            session.metrics_queue = {}  # 重置佇列


def on_model_save(trainer):
    """Saves checkpoints to Ultralytics HUB with rate limiting."""
    session = getattr(trainer, "hub_session", None)
    if session:
        # 使用速率限制上傳檢查點
        is_best = trainer.best_fitness == trainer.fitness
        if time() - session.timers["ckpt"] > session.rate_limits["ckpt"]:
            # 記錄檢查點上傳資訊並上傳模型
            LOGGER.info(f"{PREFIX}Uploading checkpoint {HUB_WEB_ROOT}/models/{session.model.id}")
            session.upload_model(trainer.epoch, trainer.last, is_best)
            session.timers["ckpt"] = time()  # 重置計時器


def on_train_end(trainer):
    """Upload final model and metrics to Ultralytics HUB at the end of training."""
    session = getattr(trainer, "hub_session", None)
    # 如果會話存在,則執行以下操作
    if session:
        # 記錄資訊日誌,顯示同步最終模型的進度
        LOGGER.info(f"{PREFIX}Syncing final model...")
        # 透過會話物件上傳最終模型和指標,使用指數抵消法
        session.upload_model(
            trainer.epoch,  # 上傳訓練器的當前週期數
            trainer.best,   # 上傳訓練器的最佳模型
            map=trainer.metrics.get("metrics/mAP50-95(B)", 0),  # 上傳訓練器的指定指標
            final=True,     # 標記為最終模型
        )
        # 停止心跳資訊傳送
        session.alive = False  # 將會話物件的 alive 屬性設為 False
        # 記錄資訊日誌,顯示操作完成和模型的訪問連結
        LOGGER.info(f"{PREFIX}Done ✅\n" f"{PREFIX}View model at {session.model_url} 🚀")
# 定義在訓練開始時執行的回撥函式,呼叫 events 函式並傳遞 trainer 的引數
def on_train_start(trainer):
    """Run events on train start."""
    events(trainer.args)


# 定義在驗證開始時執行的回撥函式,呼叫 events 函式並傳遞 validator 的引數
def on_val_start(validator):
    """Runs events on validation start."""
    events(validator.args)


# 定義在預測開始時執行的回撥函式,呼叫 events 函式並傳遞 predictor 的引數
def on_predict_start(predictor):
    """Run events on predict start."""
    events(predictor.args)


# 定義在匯出開始時執行的回撥函式,呼叫 events 函式並傳遞 exporter 的引數
def on_export_start(exporter):
    """Run events on export start."""
    events(exporter.args)


# 根據 SETTINGS["hub"] 的值決定是否啟用回撥函式,如果啟用則初始化一個包含不同回撥函式的字典,否則為空字典
callbacks = (
    {
        "on_pretrain_routine_start": on_pretrain_routine_start,
        "on_pretrain_routine_end": on_pretrain_routine_end,
        "on_fit_epoch_end": on_fit_epoch_end,
        "on_model_save": on_model_save,
        "on_train_end": on_train_end,
        "on_train_start": on_train_start,
        "on_val_start": on_val_start,
        "on_predict_start": on_predict_start,
        "on_export_start": on_export_start,
    }
    if SETTINGS["hub"] is True
    else {}
)  # verify enabled

.\yolov8\ultralytics\utils\callbacks\mlflow.py

# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
MLflow Logging for Ultralytics YOLO.

This module enables MLflow logging for Ultralytics YOLO. It logs metrics, parameters, and model artifacts.
For setting up, a tracking URI should be specified. The logging can be customized using environment variables.

Commands:
    1. To set a project name:
        `export MLFLOW_EXPERIMENT_NAME=<your_experiment_name>` or use the project=<project> argument

    2. To set a run name:
        `export MLFLOW_RUN=<your_run_name>` or use the name=<name> argument

    3. To start a local MLflow server:
        mlflow server --backend-store-uri runs/mlflow
       It will by default start a local server at http://127.0.0.1:5000.
       To specify a different URI, set the MLFLOW_TRACKING_URI environment variable.

    4. To kill all running MLflow server instances:
        ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9
"""

from ultralytics.utils import LOGGER, RUNS_DIR, SETTINGS, TESTS_RUNNING, colorstr

try:
    import os

    assert not TESTS_RUNNING or "test_mlflow" in os.environ.get("PYTEST_CURRENT_TEST", "")  # do not log pytest
    assert SETTINGS["mlflow"] is True  # verify integration is enabled
    import mlflow

    assert hasattr(mlflow, "__version__")  # verify package is not directory
    from pathlib import Path

    PREFIX = colorstr("MLflow: ")

except (ImportError, AssertionError):
    mlflow = None


def sanitize_dict(x):
    """Sanitize dictionary keys by removing parentheses and converting values to floats."""
    return {k.replace("(", "").replace(")", ""): float(v) for k, v in x.items()}


def on_pretrain_routine_end(trainer):
    """
    Log training parameters to MLflow at the end of the pretraining routine.

    This function sets up MLflow logging based on environment variables and trainer arguments. It sets the tracking URI,
    experiment name, and run name, then starts the MLflow run if not already active. It finally logs the parameters
    from the trainer.

    Args:
        trainer (ultralytics.engine.trainer.BaseTrainer): The training object with arguments and parameters to log.

    Global:
        mlflow: The imported mlflow module to use for logging.

    Environment Variables:
        MLFLOW_TRACKING_URI: The URI for MLflow tracking. If not set, defaults to 'runs/mlflow'.
        MLFLOW_EXPERIMENT_NAME: The name of the MLflow experiment. If not set, defaults to trainer.args.project.
        MLFLOW_RUN: The name of the MLflow run. If not set, defaults to trainer.args.name.
        MLFLOW_KEEP_RUN_ACTIVE: Boolean indicating whether to keep the MLflow run active after the end of training.
    """
    global mlflow

    # 獲取 MLflow 追蹤的 URI,如果未設定,則預設為 RUNS_DIR 下的 'mlflow'
    uri = os.environ.get("MLFLOW_TRACKING_URI") or str(RUNS_DIR / "mlflow")
    LOGGER.debug(f"{PREFIX} tracking uri: {uri}")
    # 設定 MLflow 追蹤 URI
    mlflow.set_tracking_uri(uri)

    # 設定實驗名稱和執行名稱
    # 如果環境變數中未設定 MLFLOW_EXPERIMENT_NAME,則預設使用 trainer.args.project 或者 '/Shared/YOLOv8'
    experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or trainer.args.project or "/Shared/YOLOv8"
    # 獲取執行名稱,優先從環境變數中獲取,否則使用 trainer 的引數中的名稱
    run_name = os.environ.get("MLFLOW_RUN") or trainer.args.name
    
    # 設定 MLflow 實驗名稱
    mlflow.set_experiment(experiment_name)
    
    # 自動記錄所有的引數和指標
    mlflow.autolog()
    
    try:
        # 獲取當前活躍的 MLflow 執行,如果沒有則啟動一個新的執行,使用指定的執行名稱
        active_run = mlflow.active_run() or mlflow.start_run(run_name=run_name)
        
        # 記錄執行 ID 到日誌中
        LOGGER.info(f"{PREFIX}logging run_id({active_run.info.run_id}) to {uri}")
        
        # 如果指定的 URI 是一個目錄,則記錄一個檢視 URI 的資訊,包括本地訪問地址
        if Path(uri).is_dir():
            LOGGER.info(f"{PREFIX}view at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri {uri}'")
        
        # 提示如何禁用 MLflow 記錄
        LOGGER.info(f"{PREFIX}disable with 'yolo settings mlflow=False'")
        
        # 記錄所有 trainer 引數到 MLflow 的引數日誌中
        mlflow.log_params(dict(trainer.args))
    
    except Exception as e:
        # 如果出現異常,記錄警告日誌,提示初始化失敗,並不跟蹤這次執行
        LOGGER.warning(f"{PREFIX}WARNING ⚠️ Failed to initialize: {e}\n" f"{PREFIX}WARNING ⚠️ Not tracking this run")
# 在每個訓練週期結束時將訓練指標記錄到 MLflow 中
def on_train_epoch_end(trainer):
    """Log training metrics at the end of each train epoch to MLflow."""
    # 檢查是否啟用了 MLflow
    if mlflow:
        # 將訓練學習率和標籤損失項的指標進行處理和記錄
        mlflow.log_metrics(
            metrics={
                **sanitize_dict(trainer.lr),
                **sanitize_dict(trainer.label_loss_items(trainer.tloss, prefix="train")),
            },
            step=trainer.epoch,
        )


# 在每個擬合(fit)週期結束時將訓練指標記錄到 MLflow 中
def on_fit_epoch_end(trainer):
    """Log training metrics at the end of each fit epoch to MLflow."""
    # 檢查是否啟用了 MLflow
    if mlflow:
        # 將擬合週期的指標進行處理和記錄
        mlflow.log_metrics(metrics=sanitize_dict(trainer.metrics), step=trainer.epoch)


# 在訓練結束時記錄模型工件到 MLflow
def on_train_end(trainer):
    """Log model artifacts at the end of the training."""
    # 如果沒有啟用 MLflow,則直接返回
    if not mlflow:
        return
    # 記錄最佳模型和其他檔案到 MLflow
    mlflow.log_artifact(str(trainer.best.parent))  # 記錄最佳模型目錄中的 weights 資料夾(包含 best.pt 和 last.pt)
    for f in trainer.save_dir.glob("*"):  # 記錄儲存目錄中的所有其他檔案
        if f.suffix in {".png", ".jpg", ".csv", ".pt", ".yaml"}:
            mlflow.log_artifact(str(f))
    # 檢查是否需要保持 MLflow 執行活動狀態
    keep_run_active = os.environ.get("MLFLOW_KEEP_RUN_ACTIVE", "False").lower() == "true"
    if keep_run_active:
        LOGGER.info(f"{PREFIX}mlflow run still alive, remember to close it using mlflow.end_run()")
    else:
        mlflow.end_run()
        LOGGER.debug(f"{PREFIX}mlflow run ended")

    # 記錄 MLflow 結果的輸出資訊
    LOGGER.info(
        f"{PREFIX}results logged to {mlflow.get_tracking_uri()}\n{PREFIX}disable with 'yolo settings mlflow=False'"
    )


# 如果啟用了 MLflow,則配置相應的回撥函式
callbacks = (
    {
        "on_pretrain_routine_end": on_pretrain_routine_end,
        "on_train_epoch_end": on_train_epoch_end,
        "on_fit_epoch_end": on_fit_epoch_end,
        "on_train_end": on_train_end,
    }
    if mlflow
    else {}  # 如果未啟用 MLflow,則回撥為空字典
)

.\yolov8\ultralytics\utils\callbacks\neptune.py

# Ultralytics YOLO 🚀, AGPL-3.0 license

# 從 ultralytics.utils 模組匯入 LOGGER、SETTINGS 和 TESTS_RUNNING
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING

# 嘗試檢查測試是否執行,不記錄 pytest 測試
try:
    assert not TESTS_RUNNING  
    # 確認 SETTINGS 中的 neptune 設定為 True,驗證 Neptune 整合已啟用
    assert SETTINGS["neptune"] is True  
    import neptune
    from neptune.types import File

    assert hasattr(neptune, "__version__")

    run = None  # NeptuneAI 實驗記錄器例項

except (ImportError, AssertionError):
    neptune = None


def _log_scalars(scalars, step=0):
    """Log scalars to the NeptuneAI experiment logger."""
    # 如果 run 不為 None,將標量寫入 NeptuneAI 實驗記錄器
    if run:
        for k, v in scalars.items():
            run[k].append(value=v, step=step)


def _log_images(imgs_dict, group=""):
    """Log scalars to the NeptuneAI experiment logger."""
    # 如果 run 不為 None,上傳影像到 NeptuneAI 實驗記錄器
    if run:
        for k, v in imgs_dict.items():
            run[f"{group}/{k}"].upload(File(v))


def _log_plot(title, plot_path):
    """
    Log plots to the NeptuneAI experiment logger.

    Args:
        title (str): 圖表的標題.
        plot_path (PosixPath | str): 影像檔案的路徑.
    """
    import matplotlib.image as mpimg
    import matplotlib.pyplot as plt

    # 讀取影像檔案
    img = mpimg.imread(plot_path)
    # 建立新的圖表
    fig = plt.figure()
    ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect="auto", xticks=[], yticks=[])  # 不顯示刻度
    ax.imshow(img)
    # 上傳圖表到 NeptuneAI 實驗記錄器
    run[f"Plots/{title}"].upload(fig)


def on_pretrain_routine_start(trainer):
    """Callback function called before the training routine starts."""
    try:
        global run
        # 初始化 NeptuneAI 實驗記錄器
        run = neptune.init_run(project=trainer.args.project or "YOLOv8", name=trainer.args.name, tags=["YOLOv8"])
        # 記錄超引數配置到 NeptuneAI 實驗記錄器
        run["Configuration/Hyperparameters"] = {k: "" if v is None else v for k, v in vars(trainer.args).items()}
    except Exception as e:
        # 若 NeptuneAI 安裝但初始化不正確,記錄警告資訊
        LOGGER.warning(f"WARNING ⚠️ NeptuneAI installed but not initialized correctly, not logging this run. {e}")


def on_train_epoch_end(trainer):
    """Callback function called at end of each training epoch."""
    # 記錄訓練損失到 NeptuneAI 實驗記錄器
    _log_scalars(trainer.label_loss_items(trainer.tloss, prefix="train"), trainer.epoch + 1)
    # 記錄學習率到 NeptuneAI 實驗記錄器
    _log_scalars(trainer.lr, trainer.epoch + 1)
    # 如果是第一個 epoch,記錄訓練批次影像到 NeptuneAI 實驗記錄器中的"Mosaic"組
    if trainer.epoch == 1:
        _log_images({f.stem: str(f) for f in trainer.save_dir.glob("train_batch*.jpg")}, "Mosaic")


def on_fit_epoch_end(trainer):
    """Callback function called at end of each fit (train+val) epoch."""
    if run and trainer.epoch == 0:
        from ultralytics.utils.torch_utils import model_info_for_loggers

        # 記錄模型資訊到 NeptuneAI 實驗記錄器
        run["Configuration/Model"] = model_info_for_loggers(trainer)
    # 記錄指標到 NeptuneAI 實驗記錄器
    _log_scalars(trainer.metrics, trainer.epoch + 1)


def on_val_end(validator):
    """Callback function called at end of each validation."""
    if run:
        # 記錄驗證影像到 NeptuneAI 實驗記錄器中的"Validation"組
        _log_images({f.stem: str(f) for f in validator.save_dir.glob("val*.jpg")}, "Validation")


def on_train_end(trainer):
    """Callback function called at end of training."""
    # 如果 run 變數為真,則執行以下操作
    if run:
        # 定義要記錄的檔案列表,包括結果影像和混淆矩陣等
        files = [
            "results.png",  # 結果影像檔名
            "confusion_matrix.png",  # 混淆矩陣影像檔名
            "confusion_matrix_normalized.png",  # 歸一化混淆矩陣影像檔名
            *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R")),  # F1、PR、P、R 曲線影像檔名
        ]
        # 使用列表推導式篩選出存在的檔案路徑
        files = [(trainer.save_dir / f) for f in files if (trainer.save_dir / f).exists()]  # filter
        # 遍歷篩選後的檔案列表,記錄每個檔案的標題和路徑
        for f in files:
            _log_plot(title=f.stem, plot_path=f)  # 記錄影像,使用檔名的基本名稱作為標題
        # 記錄最終的模型權重檔案
        run[f"weights/{trainer.args.name or trainer.args.task}/{trainer.best.name}"].upload(File(str(trainer.best)))
# 如果 neptune 變數為真,則定義一個包含多個回撥函式的字典;否則定義一個空字典。
callbacks = (
    {
        "on_pretrain_routine_start": on_pretrain_routine_start,
        "on_train_epoch_end": on_train_epoch_end,
        "on_fit_epoch_end": on_fit_epoch_end,
        "on_val_end": on_val_end,
        "on_train_end": on_train_end,
    }
    if neptune
    else {}
)

相關文章