借用Ultralytics Yolo快速訓練一個物體檢測器
https://github.com/ultralytics/ultralytics
Step-1 準備資料集
你需要一些待檢測物體比如安全帽, 把它從各個角度拍攝一下. 再找一些不相關的背景圖片. 然後把安全帽給放大縮小旋轉等等貼到背景圖片上去, 生成一堆訓練資料.
配置檔案:
extract_cfg: output_dir: '/datasets/images' fps: 0.25 screen_images_path: '/datasets/待檢測圖片' max_scale: 1.0 min_scale: 0.1 manual_scale: [ {name: 'logo', min_scale: 0.05, max_scale: 0.3}, {name: 'logo', min_scale: 0.1, max_scale: 0.5}, {name: '箭頭', min_scale: 0.1, max_scale: 0.5} ] data_cfgs: [ {id: 0, name: 'logo', min_scale: 0.05, max_scale: 0.3, gen_num: 2}, {id: 1, name: '截圖', min_scale: 0.1, max_scale: 1.0, gen_num: 3, need_full_screen: true}, {id: 2, name: '紅包', min_scale: 0.1, max_scale: 0.5, gen_num: 2}, {id: 3, name: '箭頭', min_scale: 0.1, max_scale: 0.5, gen_num: 2, rotate_aug: true}, ] save_oss_dir: /datasets/gen_datasets/ gen_num_per_image: 2 max_bg_img_sample: 1
資料集生成:
from pathlib import Path import io import random import cv2 import numpy as np from PIL import Image import hydra from omegaconf import DictConfig import json from tqdm import tqdm # 載入圖片 def load_images(background_path, overlay_path): background = cv2.imread(background_path) overlay = cv2.imread(overlay_path, cv2.IMREAD_UNCHANGED) return background, overlay # 隨機縮放和位置 def random_scale_and_position(bg_shape, overlay_shape, max_scale=1.0, min_scale=0.1): max_height, max_width = bg_shape[:2] overlay_height, overlay_width = overlay_shape[:2] base_scale = min(max_height / overlay_height, max_width / overlay_width) # 隨機縮放 scale_factor = random.uniform( min_scale * base_scale, max_scale * base_scale) new_height, new_width = int( overlay_height * scale_factor), int(overlay_width * scale_factor) # 隨機位置 max_x = max_width - new_width - 1 max_y = max_height - new_height - 1 position_x = random.randint(0, max_x) position_y = random.randint(0, max_y) return scale_factor, (position_x, position_y) def get_resized_overlay(overlay, scale): overlay_resized = cv2.resize(overlay, (0, 0), fx=scale, fy=scale) return overlay_resized def rotate_image(img, angle): if isinstance(img, np.ndarray): img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA)) # 確保影像具有alpha通道(透明度) img = img.convert("RGBA") # 旋轉原始影像並貼上到新的透明影像框架中 rotated_img = img.rotate(angle, resample=Image.BICUBIC, expand=True) rotated_img = np.asarray(rotated_img) return cv2.cvtColor(rotated_img, cv2.COLOR_RGBA2BGRA) # 合成圖片 def overlay_image(background, overlay_resized, position, scale): h, w = overlay_resized.shape[:2] x, y = position # 透明度處理 alpha_s = overlay_resized[:, :, 3] / 255.0 alpha_l = 1.0 - alpha_s for c in range(0, 3): background[y:y + h, x:x + w, c] = (alpha_s * overlay_resized[:, :, c] + alpha_l * background[y:y + h, x:x + w, c]) # 畫出位置,除錯使用 # print("position", x, y, w, h) # cv2.rectangle(background, (x, y), (x + w, y + h), (0, 255, 0), 2) background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB) return Image.fromarray(background) class Box: def __init__(self, x, y, width, height, category_id, image_width, image_height): self.x = x self.y = y self.width = width self.height = height self.image_width = image_width self.image_height = image_height self.category_id = category_id def to_yolo_format(self): x_center = (self.x + self.width / 2) / self.image_width y_center = (self.y + self.height / 2) / self.image_height width = self.width / self.image_width height = self.height / self.image_height box_line = f"{self.category_id} {x_center} {y_center} {width} {height}" return box_line class SingleCategoryGen: def __init__(self, cfg, data_cfg, output_dir): self.output_dir = output_dir self.screen_png_images = [] self.coco_images = [] self.coco_annotations = [] screen_images_path = Path( cfg.screen_images_path.format(user_root=user_root)) self.manual_scale = {} self.data_cfg = data_cfg self.category_id = data_cfg.id self.category_name = self.data_cfg.name self.max_scale = self.data_cfg.max_scale self.min_scale = self.data_cfg.min_scale self.gen_num = self.data_cfg.gen_num self.rotate_aug = self.data_cfg.get("rotate_aug", False) self.need_full_screen = self.data_cfg.get("need_full_screen", False) self.category_num = 0 self.category_names = {} self.butcket = get_oss_bucket(cfg.bucket_name) output_dir = Path(output_dir) save_oss_dir = f"{cfg.save_oss_dir}/{output_dir.parent.name}/{output_dir.name}" self.save_oss_dir = save_oss_dir self.images_save_oss_dir = f"{save_oss_dir}/images" self.label_save_oss_dir = f"{save_oss_dir}/labels" self.annotations_save_oss_path = f"{save_oss_dir}/annotations.json" self.load_screen_png_images_and_category(screen_images_path) def load_screen_png_images_and_category(self, screen_images_dir): screen_images_dir = Path(screen_images_dir) category_id = self.category_id screen_images_path = screen_images_dir / self.category_name img_files = [p for p in screen_images_path.iterdir() if p.suffix in [ ".png", ".jpg"]] img_files.sort(key=lambda x: x.stem) for i, img_file in enumerate(img_files): self.screen_png_images.append( dict(id=i, name=img_file.stem, supercategory=None, path=str(img_file))) def add_new_images(self, bg_img_path: Path, gen_image_num=None, subset="train"): gen_image_num = gen_image_num or self.gen_num background_origin = cv2.imread(str(bg_img_path)) if background_origin is None: print(f"open image {bg_img_path} failed") return max_box_num = 1 for gen_id in range(gen_image_num): background = background_origin.copy() category_id = self.category_id overlay_img_path = self.sample_category_data() overlay = cv2.imread(overlay_img_path, cv2.IMREAD_UNCHANGED) if overlay.shape[2] == 3: overlay = cv2.cvtColor(overlay, cv2.COLOR_BGR2BGRA) if self.rotate_aug: overlay = rotate_image(overlay, random.uniform(-180, 180)) # # 隨機裁剪圖片 # if random.random() < 0.5: # origin_height = overlay.shape[0] # min_height = origin_height // 4 # new_height = random.randint(min_height, origin_height) # new_top = random.randint(0, origin_height - new_height) # overlay = overlay[new_top:new_top+new_height, :, :] box_num = random.randint(1, max_box_num) # 獲取隨機縮放和位置 max_scale = self.max_scale min_scale = self.min_scale scale, position = random_scale_and_position( background.shape, overlay.shape, max_scale, min_scale) # 縮放overlay圖片 overlay_resized = get_resized_overlay(overlay, scale) # 合成後的圖片 merged_img = overlay_image(background, overlay_resized, position, scale) # 儲存合成後的圖片 filename = f"{bg_img_path.stem}_{category_id}_{gen_id:02d}.png" merged_img.save(f'{output_dir}/{filename}') # 生成COCO格式的標註資料 box = Box(*position, overlay_resized.shape[1], overlay_resized.shape[0], category_id, background.shape[1], background.shape[0]) self.upload_image_to_oss(merged_img, filename, subset, [box]) def sample_category_data(self): return random.choice(self.screen_png_images)["path"] image_id = self.gen_image_id() image_json = { "id": image_id, "width": image.width, "height": image.height, "file_name": image_name, } self.coco_images.append(image_json) annotation_json = { "id": image_id, "image_id": image_id, "category_id": 0, "segmentation": None, "area": bbox[2] * bbox[3], "bbox": bbox, "iscrowd": 0 } self.coco_annotations.append(annotation_json) def upload_image_to_oss(self, image, image_name, subset, box_list=None): image_bytesio = io.BytesIO() image.save(image_bytesio, format="PNG") self.butcket.put_object( f"{self.images_save_oss_dir}/{subset}/{image_name}", image_bytesio.getvalue()) if box_list: label_str = "\n".join([box.to_yolo_format() for box in box_list]) label_name = image_name.split(".")[0] + ".txt" self.butcket.put_object( f"{self.label_save_oss_dir}/{subset}/{label_name}", label_str) def upload_full_screen_image(self): if not self.need_full_screen: return name = self.category_name category_id = self.category_id image_list = self.screen_png_images subset_list = ["train" if i % 10 <= 7 else "val" if i % 10 <= 8 else "test" for i in range(len(image_list))] for i in range(len(image_list)): image_data = image_list[i] subset = subset_list[i] overlay_img_path = image_data["path"] image = Image.open(overlay_img_path) if random.random() < 0.5: origin_height = image.height min_height = origin_height // 4 new_height = random.randint(min_height, origin_height) new_top = random.randint(0, origin_height - new_height) image = image.crop( (0, new_top, image.width, new_top + new_height)) filename = f"{name}_{category_id}_{i:05}.png" box = Box(0, 0, image.width, image.height, category_id, image.width, image.height) self.upload_image_to_oss(image, filename, subset, [box]) class ScreenDatasetGen: def __init__(self, cfg, output_dir): self.output_dir = output_dir self.screen_png_images = {} self.coco_images = [] self.coco_annotations = [] screen_images_path = Path( cfg.screen_images_path.format(user_root=user_root)) self.max_scale = cfg.max_scale self.min_scale = cfg.min_scale self.manual_scale = {} for info in cfg.manual_scale: self.manual_scale[info.name] = dict( max_scale=info.max_scale, min_scale=info.min_scale) self.category_num = 0 self.category_names = {} self.category_id_loop = -1 self.butcket = get_oss_bucket(cfg.bucket_name) output_dir = Path(output_dir) save_oss_dir = f"{cfg.save_oss_dir}/{output_dir.parent.name}/{output_dir.name}" self.save_oss_dir = save_oss_dir self.images_save_oss_dir = f"{save_oss_dir}/images" self.label_save_oss_dir = f"{save_oss_dir}/labels" self.annotations_save_oss_path = f"{save_oss_dir}/annotations.json" self.load_screen_png_images_and_category(screen_images_path) def add_new_images(self, bg_img_path: Path, gen_image_num=1, subset="train"): background_origin = cv2.imread(str(bg_img_path)) if background_origin is None: print(f"open image {bg_img_path} failed") return max_box_num = 1 for gen_id in range(gen_image_num): background = background_origin.copy() category_id = self.get_category_id_loop() overlay_img_path = self.sample_category_data( category_id, subset=subset) overlay = cv2.imread(overlay_img_path, cv2.IMREAD_UNCHANGED) if overlay.shape[2] == 3: overlay = cv2.cvtColor(overlay, cv2.COLOR_BGR2BGRA) # # 隨機裁剪圖片 # if random.random() < 0.5: # origin_height = overlay.shape[0] # min_height = origin_height // 4 # new_height = random.randint(min_height, origin_height) # new_top = random.randint(0, origin_height - new_height) # overlay = overlay[new_top:new_top+new_height, :, :] box_num = random.randint(1, max_box_num) # 獲取隨機縮放和位置 category_name = self.category_names[category_id] if category_name in self.manual_scale: max_scale = self.manual_scale[category_name]["max_scale"] min_scale = self.manual_scale[category_name]["min_scale"] else: max_scale = self.max_scale min_scale = self.min_scale scale, position = random_scale_and_position( background.shape, overlay.shape, max_scale, min_scale) # 縮放overlay圖片 overlay_resized = get_resized_overlay(overlay, scale) # 合成後的圖片 merged_img = overlay_image( background, overlay_resized, position, scale) # 儲存合成後的圖片 filename = f"{bg_img_path.stem}_{category_id}_{gen_id:02d}.png" # merged_img.save(f'{output_dir}/{filename}') # 生成COCO格式的標註資料 box = Box(*position, overlay_resized.shape[1], overlay_resized.shape[0], category_id, background.shape[1], background.shape[0]) self.upload_image_to_oss(merged_img, filename, subset, [box]) # self.add_image_annotion_to_coco(box, merged_img, filename) def upload_full_screen_image(self, category_name=None): if category_name is None: return if not isinstance(category_name, list): category_name = [category_name] for category_id in range(self.category_num): name = self.category_names[category_id] if name not in category_name: continue image_list = self.screen_png_images[category_id] subset_list = ["train" if i % 10 <= 7 else "val" if i % 10 <= 8 else "test" for i in range(len(image_list))] for i in range(len(image_list)): image_data = image_list[i] subset = subset_list[i] overlay_img_path = image_data["path"] image = Image.open(overlay_img_path) if random.random() < 0.5: origin_height = image.height min_height = origin_height // 4 new_height = random.randint(min_height, origin_height) new_top = random.randint(0, origin_height - new_height) image = image.crop( (0, new_top, image.width, new_top + new_height)) filename = f"{name}_{category_id}_{i:05}.png" box = Box(0, 0, image.width, image.height, category_id, image.width, image.height) self.upload_image_to_oss(image, filename, subset, [box]) def load_screen_png_images_and_category(self, screen_images_dir): screen_images_dir = Path(screen_images_dir) screen_images_paths = [ f for f in screen_images_dir.iterdir() if f.is_dir()] screen_images_paths.sort(key=lambda x: x.stem) for category_id, screen_images_path in enumerate(screen_images_paths): img_files = [p for p in screen_images_path.iterdir() if p.suffix in [ ".png", ".jpg"]] img_files.sort(key=lambda x: x.stem) self.screen_png_images[category_id] = [] self.category_names[category_id] = screen_images_path.stem print(f"{category_id}: {self.category_names[category_id]}") for i, img_file in enumerate(img_files): self.screen_png_images[category_id].append( dict(id=i, name=img_file.stem, supercategory=None, path=str(img_file))) self.category_num = len(screen_images_paths) print(f"category_num: {self.category_num}") def get_category_id_loop(self): # self.category_id_loop = (self.category_id_loop + 1) % self.category_num self.category_id_loop = random.randint(0, self.category_num - 1) return self.category_id_loop def sample_category_data(self, category_id, subset): image_data = self.screen_png_images[category_id] # valid_id = [] # if subset == "train": # valid_id = [i for i in range(len(image_data)) if i % 10 <= 7] # elif subset == "val": # valid_id = [i for i in range(len(image_data)) if i % 10 == 8] # elif subset == "test": # valid_id = [i for i in range(len(image_data)) if i % 10 == 9] # image_data = [image_data[i] for i in valid_id] return random.choice(image_data)["path"] def gen_image_id(self): return len(self.coco_images) + 1 def add_image_annotion_to_coco(self, bbox, image: Image.Image, image_name): image_id = self.gen_image_id() image_json = { "id": image_id, "width": image.width, "height": image.height, "file_name": image_name, } self.coco_images.append(image_json) annotation_json = { "id": image_id, "image_id": image_id, "category_id": 0, "segmentation": None, "area": bbox[2] * bbox[3], "bbox": bbox, "iscrowd": 0 } self.coco_annotations.append(annotation_json) def upload_image_to_oss(self, image, image_name, subset, box_list=None): image_bytesio = io.BytesIO() image.save(image_bytesio, format="PNG") self.butcket.put_object( f"{self.images_save_oss_dir}/{subset}/{image_name}", image_bytesio.getvalue()) if box_list: label_str = "\n".join([box.to_yolo_format() for box in box_list]) label_name = image_name.split(".")[0] + ".txt" self.butcket.put_object( f"{self.label_save_oss_dir}/{subset}/{label_name}", label_str) def dump_coco_json(self): categories = [{key: item[key] for key in ("id", "name", "supercategory")} for item in self.screen_png_images.values()] coco_json = { "images": self.coco_images, "annotations": self.coco_annotations, "categories": categories } self.butcket.put_object( self.annotations_save_oss_path, json.dumps(coco_json, indent=2)) # with open(f"{self.output_dir}/coco.json", "w") as fp: # json.dump(coco_json, fp, indent=2) @hydra.main(version_base=None, config_path=".", config_name="conf") def main(cfg: DictConfig): output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir # get_image_and_annotation(output_dir) # screen_dataset_gen = ScreenDatasetGen(cfg, output_dir) category_generators = [] for data_cfg in cfg.data_cfgs: category_generators.append(SingleCategoryGen(cfg, data_cfg, output_dir)) bg_img_files = [f for f in Path(cfg.extract_cfg.output_dir.format(user_root=user_root)).iterdir() if f.suffix in [".png", ".jpg"]] if cfg.get("max_bg_img_sample"): bg_img_files = random.sample(bg_img_files, cfg.max_bg_img_sample) img_index = 0 for bg_img_file in tqdm(bg_img_files): subset = "train" if img_index % 10 <= 7 else "val" if img_index % 10 == 8 else "test" img_index += 1 for category_generator in category_generators: category_generator.add_new_images(bg_img_path=bg_img_file, subset=subset) for category_generator in category_generators: category_generator.upload_full_screen_image() if __name__ == '__main__': main()
執行後, 可以在outputs資料夾下生成符合要求的訓練資料.
image 就是背景+檢測物體
labels 中的內容就是這樣的檔案:
1 0.6701388888888888 0.289453125 0.5736111111111111 0.57421875 # 型別 box
Step-2 訓練模型
這個更簡單, 在官網下載一個模型權重, 比如yolo8s.pt, 對付安全帽這種東西, 幾M大的模型就夠了.
訓練配置檔案:
names: 0: logo 1: 截圖 2: 紅包 path: /outputs test: images/test train: images/train val: images/val
訓練程式碼:
沒錯就這麼一點
from ultralytics import YOLO model = YOLO('./yolo8s.pt') model.train(data='dataset.yaml', epochs=100, imgsz=1280)
然後就可以自動化訓練了, 結束後會自動儲存模型與評估檢測效果.
Step-3 檢測
class Special_Obj_Detect(object): def __init__(self, cfg) -> None: model_path = cfg.model_path self.model = YOLO(model_path) self.model.requires_grad_ = False self.cls_names = {0: 'logo', 1: '截圖', 2: '紅包'} # 單幀影像檢測 def detect_image(self, img_path): results = self.model(img_path) objects = [] objects_cnt = dict() objects_area_pct = dict() for result in results: result = result.cpu() boxes = list(result.boxes) for box in boxes: if box.conf < 0.8: continue boxcls = box.cls[0].item() objects.append(self.cls_names[boxcls]) objects_cnt[self.cls_names[boxcls]] = objects_cnt.get(self.cls_names[boxcls], 0) + 1 area_p = sum([ (xywh[2]*xywh[3]).item() for xywh in box.xywhn]) area_p = min(1, area_p) objects_area_pct[self.cls_names[boxcls]] = area_p objects = list(set(objects)) return objects, objects_cnt, objects_area_pct
收工.
本文作者:JiangOil
本文連結: https://www.codebonobo.tech/post/14
版權宣告:本部落格所有文章除特別宣告外,均採用 BY-NC-SA 許可協議。轉載請註明出處!