借用Ultralytics Yolo快速訓練一個物體檢測器
# 借用Ultralytics Yolo快速訓練一個物體檢測器
[同步發表於 https://www.codebonobo.tech/post/14](https://www.codebonobo.tech/post/14 "https://www.codebonobo.tech/post/14")
大約在16/17年, 深度學習剛剛流行時, Object Detection 還是相當高階的技術, 各大高校還很流行水Fast RCNN / Faster RCNN之類的論文, 幹著安全帽/行人/車輛檢測之類的橫項. 但到了2024年, 隨著技術成熟, 物體檢測幾乎已經是個死方向了, 現在的學校應該忙著把別人好不容易訓練的通用大模型退化成各領域的專用大模型吧...哈哈
回到主題, 目前物體檢測模型的訓練已經流程化了, 不再需要費腦去寫訓練程式碼, 使用Ultralytics Yolo就可以快速訓練一個特定物品的檢測器, 比如安全帽檢測.
[https://github.com/ultralytics/ultralytics](https://github.com/ultralytics/ultralytics)
![](https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png)
# Step-1 準備資料集
你需要一些待檢測物體比如安全帽, 把它從各個角度拍攝一下. 再找一些不相關的背景圖片. 然後把安全帽給放大縮小旋轉等等貼到背景圖片上去, 生成一堆訓練資料.
配置檔案:
```python
extract_cfg:
output_dir: '/datasets/images'
fps: 0.25
screen_images_path: '/datasets/待檢測圖片'
max_scale: 1.0
min_scale: 0.1
manual_scale: [ {name: 'logo', min_scale: 0.05, max_scale: 0.3},
{name: 'logo', min_scale: 0.1, max_scale: 0.5},
{name: '箭頭', min_scale: 0.1, max_scale: 0.5}
]
data_cfgs: [ {id: 0, name: 'logo', min_scale: 0.05, max_scale: 0.3, gen_num: 2},
{id: 1, name: '截圖', min_scale: 0.1, max_scale: 1.0, gen_num: 3, need_full_screen: true},
{id: 2, name: '紅包', min_scale: 0.1, max_scale: 0.5, gen_num: 2},
{id: 3, name: '箭頭', min_scale: 0.1, max_scale: 0.5, gen_num: 2, rotate_aug: true},
]
save_oss_dir: /datasets/gen_datasets/
gen_num_per_image: 2
max_bg_img_sample:
```
資料集生成:
```python
from pathlib import Path
import io
import random
import cv2
import numpy as np
from PIL import Image
import hydra
from omegaconf import DictConfig
import json
from tqdm import tqdm
# 載入圖片
def load_images(background_path, overlay_path):
background = cv2.imread(background_path)
overlay = cv2.imread(overlay_path, cv2.IMREAD_UNCHANGED)
return background, overlay
# 隨機縮放和位置
def random_scale_and_position(bg_shape, overlay_shape, max_scale=1.0, min_scale=0.1):
max_height, max_width = bg_shape[:2]
overlay_height, overlay_width = overlay_shape[:2]
base_scale = min(max_height / overlay_height, max_width / overlay_width)
# 隨機縮放
scale_factor = random.uniform(
min_scale * base_scale, max_scale * base_scale)
new_height, new_width = int(
overlay_height * scale_factor), int(overlay_width * scale_factor)
# 隨機位置
max_x = max_width - new_width - 1
max_y = max_height - new_height - 1
position_x = random.randint(0, max_x)
position_y = random.randint(0, max_y)
return scale_factor, (position_x, position_y)
def get_resized_overlay(overlay, scale):
overlay_resized = cv2.resize(overlay, (0, 0), fx=scale, fy=scale)
return overlay_resized
def rotate_image(img, angle):
if isinstance(img, np.ndarray):
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA))
# 確保影像具有alpha通道(透明度)
img = img.convert("RGBA")
# 旋轉原始影像並貼上到新的透明影像框架中
rotated_img = img.rotate(angle, resample=Image.BICUBIC, expand=True)
rotated_img = np.asarray(rotated_img)
return cv2.cvtColor(rotated_img, cv2.COLOR_RGBA2BGRA)
# 合成圖片
def overlay_image(background, overlay_resized, position, scale):
h, w = overlay_resized.shape[:2]
x, y = position
# 透明度處理
alpha_s = overlay_resized[:, :, 3] / 255.0
alpha_l = 1.0 - alpha_s
for c in range(0, 3):
background[y:y + h, x:x + w, c] = (alpha_s * overlay_resized[:, :, c] +
alpha_l * background[y:y + h, x:x + w, c])
# 畫出位置,除錯使用
# print("position", x, y, w, h)
# cv2.rectangle(background, (x, y), (x + w, y + h), (0, 255, 0), 2)
background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
return Image.fromarray(background)
class Box:
def __init__(self, x, y, width, height, category_id, image_width, image_height):
self.x = x
self.y = y
self.width = width
self.height = height
self.image_width = image_width
self.image_height = image_height
self.category_id = category_id
def to_yolo_format(self):
x_center = (self.x + self.width / 2) / self.image_width
y_center = (self.y + self.height / 2) / self.image_height
width = self.width / self.image_width
height = self.height / self.image_height
box_line = f"{self.category_id} {x_center} {y_center} {width} {height}"
return box_line
class SingleCategoryGen:
def __init__(self, cfg, data_cfg, output_dir):
self.output_dir = output_dir
self.screen_png_images = []
self.coco_images = []
self.coco_annotations = []
screen_images_path = Path(
cfg.screen_images_path.format(user_root=user_root))
self.manual_scale = {}
self.data_cfg = data_cfg
self.category_id = data_cfg.id
self.category_name = self.data_cfg.name
self.max_scale = self.data_cfg.max_scale
self.min_scale = self.data_cfg.min_scale
self.gen_num = self.data_cfg.gen_num
self.rotate_aug = self.data_cfg.get("rotate_aug", False)
self.need_full_screen = self.data_cfg.get("need_full_screen", False)
self.category_num = 0
self.category_names = {}
self.butcket = get_oss_bucket(cfg.bucket_name)
output_dir = Path(output_dir)
save_oss_dir = f"{cfg.save_oss_dir}/{output_dir.parent.name}/{output_dir.name}"
self.save_oss_dir = save_oss_dir
self.images_save_oss_dir = f"{save_oss_dir}/images"
self.label_save_oss_dir = f"{save_oss_dir}/labels"
self.annotations_save_oss_path = f"{save_oss_dir}/annotations.json"
self.load_screen_png_images_and_category(screen_images_path)
def load_screen_png_images_and_category(self, screen_images_dir):
screen_images_dir = Path(screen_images_dir)
category_id = self.category_id
screen_images_path = screen_images_dir / self.category_name
img_files = [p for p in screen_images_path.iterdir() if p.suffix in [
".png", ".jpg"]]
img_files.sort(key=lambda x: x.stem)
for i, img_file in enumerate(img_files):
self.screen_png_images.append(
dict(id=i, name=img_file.stem, supercategory=None, path=str(img_file)))
def add_new_images(self, bg_img_path: Path, gen_image_num=None, subset="train"):
gen_image_num = gen_image_num or self.gen_num
background_origin = cv2.imread(str(bg_img_path))
if background_origin is None:
print(f"open image {bg_img_path} failed")
return
max_box_num = 1
for gen_id in range(gen_image_num):
background = background_origin.copy()
category_id = self.category_id
overlay_img_path = self.sample_category_data()
overlay = cv2.imread(overlay_img_path, cv2.IMREAD_UNCHANGED)
if overlay.shape[2] == 3:
overlay = cv2.cvtColor(overlay, cv2.COLOR_BGR2BGRA)
if self.rotate_aug:
overlay = rotate_image(overlay, random.uniform(-180, 180))
# # 隨機裁剪圖片
# if random.random() < 0.5:
# origin_height = overlay.shape[0]
# min_height = origin_height // 4
# new_height = random.randint(min_height, origin_height)
# new_top = random.randint(0, origin_height - new_height)
# overlay = overlay[new_top:new_top+new_height, :, :]
box_num = random.randint(1, max_box_num)
# 獲取隨機縮放和位置
max_scale = self.max_scale
min_scale = self.min_scale
scale, position = random_scale_and_position(
background.shape, overlay.shape, max_scale, min_scale)
# 縮放overlay圖片
overlay_resized = get_resized_overlay(overlay, scale)
# 合成後的圖片
merged_img = overlay_image(background, overlay_resized, position, scale)
# 儲存合成後的圖片
filename = f"{bg_img_path.stem}_{category_id}_{gen_id:02d}.png"
merged_img.save(f'{output_dir}/{filename}')
# 生成COCO格式的標註資料
box = Box(*position, overlay_resized.shape[1], overlay_resized.shape[0], category_id, background.shape[1],
background.shape[0])
self.upload_image_to_oss(merged_img, filename, subset, [box])
def sample_category_data(self):
return random.choice(self.screen_png_images)["path"]
image_id = self.gen_image_id()
image_json = {
"id": image_id,
"width": image.width,
"height": image.height,
"file_name": image_name,
}
self.coco_images.append(image_json)
annotation_json = {
"id": image_id,
"image_id": image_id,
"category_id": 0,
"segmentation": None,
"area": bbox[2] * bbox[3],
"bbox": bbox,
"iscrowd": 0
}
self.coco_annotations.append(annotation_json)
def upload_image_to_oss(self, image, image_name, subset, box_list=None):
image_bytesio = io.BytesIO()
image.save(image_bytesio, format="PNG")
self.butcket.put_object(
f"{self.images_save_oss_dir}/{subset}/{image_name}", image_bytesio.getvalue())
if box_list:
label_str = "\n".join([box.to_yolo_format() for box in box_list])
label_name = image_name.split(".")[0] + ".txt"
self.butcket.put_object(
f"{self.label_save_oss_dir}/{subset}/{label_name}", label_str)
def upload_full_screen_image(self):
if not self.need_full_screen:
return
name = self.category_name
category_id = self.category_id
image_list = self.screen_png_images
subset_list = ["train" if i % 10 <= 7 else "val" if i %
10 <= 8 else "test" for i in range(len(image_list))]
for i in range(len(image_list)):
image_data = image_list[i]
subset = subset_list[i]
overlay_img_path = image_data["path"]
image = Image.open(overlay_img_path)
if random.random() < 0.5:
origin_height = image.height
min_height = origin_height // 4
new_height = random.randint(min_height, origin_height)
new_top = random.randint(0, origin_height - new_height)
image = image.crop(
(0, new_top, image.width, new_top + new_height))
filename = f"{name}_{category_id}_{i:05}.png"
box = Box(0, 0, image.width, image.height,
category_id, image.width, image.height)
self.upload_image_to_oss(image, filename, subset, [box])
class ScreenDatasetGen:
def __init__(self, cfg, output_dir):
self.output_dir = output_dir
self.screen_png_images = {}
self.coco_images = []
self.coco_annotations = []
screen_images_path = Path(
cfg.screen_images_path.format(user_root=user_root))
self.max_scale = cfg.max_scale
self.min_scale = cfg.min_scale
self.manual_scale = {}
for info in cfg.manual_scale:
self.manual_scale[info.name] = dict(
max_scale=info.max_scale, min_scale=info.min_scale)
self.category_num = 0
self.category_names = {}
self.category_id_loop = -1
self.butcket = get_oss_bucket(cfg.bucket_name)
output_dir = Path(output_dir)
save_oss_dir = f"{cfg.save_oss_dir}/{output_dir.parent.name}/{output_dir.name}"
self.save_oss_dir = save_oss_dir
self.images_save_oss_dir = f"{save_oss_dir}/images"
self.label_save_oss_dir = f"{save_oss_dir}/labels"
self.annotations_save_oss_path = f"{save_oss_dir}/annotations.json"
self.load_screen_png_images_and_category(screen_images_path)
def add_new_images(self, bg_img_path: Path, gen_image_num=1, subset="train"):
background_origin = cv2.imread(str(bg_img_path))
if background_origin is None:
print(f"open image {bg_img_path} failed")
return
max_box_num = 1
for gen_id in range(gen_image_num):
background = background_origin.copy()
category_id = self.get_category_id_loop()
overlay_img_path = self.sample_category_data(
category_id, subset=subset)
overlay = cv2.imread(overlay_img_path, cv2.IMREAD_UNCHANGED)
if overlay.shape[2] == 3:
overlay = cv2.cvtColor(overlay, cv2.COLOR_BGR2BGRA)
# # 隨機裁剪圖片
# if random.random() < 0.5:
# origin_height = overlay.shape[0]
# min_height = origin_height // 4
# new_height = random.randint(min_height, origin_height)
# new_top = random.randint(0, origin_height - new_height)
# overlay = overlay[new_top:new_top+new_height, :, :]
box_num = random.randint(1, max_box_num)
# 獲取隨機縮放和位置
category_name = self.category_names[category_id]
if category_name in self.manual_scale:
max_scale = self.manual_scale[category_name]["max_scale"]
min_scale = self.manual_scale[category_name]["min_scale"]
else:
max_scale = self.max_scale
min_scale = self.min_scale
scale, position = random_scale_and_position(
background.shape, overlay.shape, max_scale, min_scale)
# 縮放overlay圖片
overlay_resized = get_resized_overlay(overlay, scale)
# 合成後的圖片
merged_img = overlay_image(
background, overlay_resized, position, scale)
# 儲存合成後的圖片
filename = f"{bg_img_path.stem}_{category_id}_{gen_id:02d}.png"
# merged_img.save(f'{output_dir}/{filename}')
# 生成COCO格式的標註資料
box = Box(*position, overlay_resized.shape[1], overlay_resized.shape[0], category_id, background.shape[1],
background.shape[0])
self.upload_image_to_oss(merged_img, filename, subset, [box])
# self.add_image_annotion_to_coco(box, merged_img, filename)
def upload_full_screen_image(self, category_name=None):
if category_name is None:
return
if not isinstance(category_name, list):
category_name = [category_name]
for category_id in range(self.category_num):
name = self.category_names[category_id]
if name not in category_name:
continue
image_list = self.screen_png_images[category_id]
subset_list = ["train" if i % 10 <= 7 else "val" if i %
10 <= 8 else "test" for i in range(len(image_list))]
for i in range(len(image_list)):
image_data = image_list[i]
subset = subset_list[i]
overlay_img_path = image_data["path"]
image = Image.open(overlay_img_path)
if random.random() < 0.5:
origin_height = image.height
min_height = origin_height // 4
new_height = random.randint(min_height, origin_height)
new_top = random.randint(0, origin_height - new_height)
image = image.crop(
(0, new_top, image.width, new_top + new_height))
filename = f"{name}_{category_id}_{i:05}.png"
box = Box(0, 0, image.width, image.height,
category_id, image.width, image.height)
self.upload_image_to_oss(image, filename, subset, [box])
def load_screen_png_images_and_category(self, screen_images_dir):
screen_images_dir = Path(screen_images_dir)
screen_images_paths = [
f for f in screen_images_dir.iterdir() if f.is_dir()]
screen_images_paths.sort(key=lambda x: x.stem)
for category_id, screen_images_path in enumerate(screen_images_paths):
img_files = [p for p in screen_images_path.iterdir() if p.suffix in [
".png", ".jpg"]]
img_files.sort(key=lambda x: x.stem)
self.screen_png_images[category_id] = []
self.category_names[category_id] = screen_images_path.stem
print(f"{category_id}: {self.category_names[category_id]}")
for i, img_file in enumerate(img_files):
self.screen_png_images[category_id].append(
dict(id=i, name=img_file.stem, supercategory=None, path=str(img_file)))
self.category_num = len(screen_images_paths)
print(f"category_num: {self.category_num}")
def get_category_id_loop(self):
# self.category_id_loop = (self.category_id_loop + 1) % self.category_num
self.category_id_loop = random.randint(0, self.category_num - 1)
return self.category_id_loop
def sample_category_data(self, category_id, subset):
image_data = self.screen_png_images[category_id]
# valid_id = []
# if subset == "train":
# valid_id = [i for i in range(len(image_data)) if i % 10 <= 7]
# elif subset == "val":
# valid_id = [i for i in range(len(image_data)) if i % 10 == 8]
# elif subset == "test":
# valid_id = [i for i in range(len(image_data)) if i % 10 == 9]
# image_data = [image_data[i] for i in valid_id]
return random.choice(image_data)["path"]
def gen_image_id(self):
return len(self.coco_images) + 1
def add_image_annotion_to_coco(self, bbox, image: Image.Image, image_name):
image_id = self.gen_image_id()
image_json = {
"id": image_id,
"width": image.width,
"height": image.height,
"file_name": image_name,
}
self.coco_images.append(image_json)
annotation_json = {
"id": image_id,
"image_id": image_id,
"category_id": 0,
"segmentation": None,
"area": bbox[2] * bbox[3],
"bbox": bbox,
"iscrowd": 0
}
self.coco_annotations.append(annotation_json)
def upload_image_to_oss(self, image, image_name, subset, box_list=None):
image_bytesio = io.BytesIO()
image.save(image_bytesio, format="PNG")
self.butcket.put_object(
f"{self.images_save_oss_dir}/{subset}/{image_name}", image_bytesio.getvalue())
if box_list:
label_str = "\n".join([box.to_yolo_format() for box in box_list])
label_name = image_name.split(".")[0] + ".txt"
self.butcket.put_object(
f"{self.label_save_oss_dir}/{subset}/{label_name}", label_str)
def dump_coco_json(self):
categories = [{key: item[key] for key in ("id", "name", "supercategory")} for item in
self.screen_png_images.values()]
coco_json = {
"images": self.coco_images,
"annotations": self.coco_annotations,
"categories": categories
}
self.butcket.put_object(
self.annotations_save_oss_path, json.dumps(coco_json, indent=2))
# with open(f"{self.output_dir}/coco.json", "w") as fp:
# json.dump(coco_json, fp, indent=2)
@hydra.main(version_base=None, config_path=".", config_name="conf")
def main(cfg: DictConfig):
output_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir
# get_image_and_annotation(output_dir)
# screen_dataset_gen = ScreenDatasetGen(cfg, output_dir)
category_generators = []
for data_cfg in cfg.data_cfgs:
category_generators.append(SingleCategoryGen(cfg, data_cfg, output_dir))
bg_img_files = [f for f in Path(cfg.extract_cfg.output_dir.format(user_root=user_root)).iterdir() if
f.suffix in [".png", ".jpg"]]
if cfg.get("max_bg_img_sample"):
bg_img_files = random.sample(bg_img_files, cfg.max_bg_img_sample)
img_index = 0
for bg_img_file in tqdm(bg_img_files):
subset = "train" if img_index % 10 <= 7 else "val" if img_index % 10 == 8 else "test"
img_index += 1
for category_generator in category_generators:
category_generator.add_new_images(bg_img_path=bg_img_file, subset=subset)
for category_generator in category_generators:
category_generator.upload_full_screen_image()
if __name__ == '__main__':
main()
```
執行後, 可以在outputs資料夾下生成符合要求的訓練資料.
![](https://cdn.nlark.com/yuque/0/2024/png/114633/1730344557819-d66c0669-2275-413f-9194-39dca8bf2908.png)
image 就是背景+檢測物體
labels 中的內容就是這樣的檔案:
```python
1 0.6701388888888888 0.289453125 0.5736111111111111 0.57421875
# 型別 box
```
# Step-2 訓練模型
這個更簡單, 在官網下載一個模型權重, 比如yolo8s.pt, 對付安全帽這種東西, 幾M大的模型就夠了.
訓練配置檔案:
```python
names:
0: logo
1: 截圖
2: 紅包
path: /outputs
test: images/test
train: images/train
val: images/val
```
訓練程式碼:
沒錯就這麼一點
```python
from ultralytics import YOLO
model = YOLO('./yolo8s.pt')
model.train(data='dataset.yaml', epochs=100, imgsz=1280)
```
然後就可以自動化訓練了, 結束後會自動儲存模型與評估檢測效果.
![](https://cdn.nlark.com/yuque/0/2024/png/114633/1730344671489-386881b2-8ccf-4fe7-aa9b-554c78439513.png)
# Step-3 檢測
檢測程式碼示意:
```python
class Special_Obj_Detect(object):
def __init__(self, cfg) -> None:
model_path = cfg.model_path
self.model = YOLO(model_path)
self.model.requires_grad_ = False
self.cls_names = {0: 'logo', 1: '截圖', 2: '紅包'}
# 單幀影像檢測
def detect_image(self, img_path):
results = self.model(img_path)
objects = []
objects_cnt = dict()
objects_area_pct = dict()
for result in results:
result = result.cpu()
boxes = list(result.boxes)
for box in boxes:
if box.conf < 0.8: continue
boxcls = box.cls[0].item()
objects.append(self.cls_names[boxcls])
objects_cnt[self.cls_names[boxcls]] = objects_cnt.get(self.cls_names[boxcls], 0) + 1
area_p = sum([ (xywh[2]*xywh[3]).item() for xywh in box.xywhn])
area_p = min(1, area_p)
objects_area_pct[self.cls_names[boxcls]] = area_p
objects = list(set(objects))
return objects, objects_cnt, objects_area_pct
```
收工.
#
相關文章
- 目標檢測 YOLO v3 訓練 人臉檢測模型YOLO模型
- 物體檢測實戰:使用 OpenCV 進行 YOLO 物件檢測OpenCVYOLO物件
- 訓練一個目標檢測模型模型
- 目標檢測入門系列手冊五:YOLO訓練教程YOLO
- 實時物體檢測:YOLO,YOLOv2和YOLOv3(一)YOLO
- YOLO2訓練YOLO
- Opencv 用SVM訓練檢測器OpenCV
- 自己訓練SVM分類器進行HOG行人檢測HOG
- YOLOv5模型訓練及檢測YOLO模型
- AI影片物體檢測AI
- 目標檢測之YOLO系列YOLO
- Yolov5——訓練目標檢測模型YOLO模型
- 探索 YOLO v3 原始碼 - 第1篇 訓練YOLO原始碼
- OPCV 移動物體檢測
- 機器學習在入侵檢測方面的應用 - 基於ADFA-LD訓練集訓練入侵檢測判別模型機器學習模型
- Arduino和低功耗微控制器機器學習 - 喚醒詞檢測模型訓練UI機器學習模型
- K210,yolo,face_mask口罩檢測模型訓練及其在K210,kd233上部署YOLO模型
- 用神經網路訓練一個文字分類器神經網路文字分類
- halcon——缺陷檢測常用方法總結(特徵訓練)特徵
- 目標檢測---教你利用yolov5訓練自己的目標檢測模型YOLO模型
- 深度有趣 | 11 TensorFlow物體檢測
- 物體檢測、影像分割技術概述
- 使用 YOLO 進行實時目標檢測YOLO
- yolo5+gradio影片檢測案例YOLO
- OpenCV訓練自己的衣服探測分類器OpenCV
- 【sql】訓練一SQL
- SSD物體檢測演算法詳解演算法
- 整合多種YOLO改進點,面向小白科研的YOLO檢測程式碼庫YOLOAirYOLOAI
- 訓練一個影像分類器demo in PyTorch【學習筆記】PyTorch筆記
- 目標檢測入門系列手冊二:RCNN訓練教程CNN
- 目標檢測入門系列手冊六:SSD訓練教程
- 京東AI研究院提出 ScratchDet:隨機初始化訓練SSD目標檢測器AI隨機
- 用初次訓練的SVM+HOG分類器在負樣本原圖上檢測HardExampleHOG
- 三分鐘:極速體驗JAVA版目標檢測(YOLO4)JavaYOLO
- 伺服器端訓練yolov5使用tensorboard+埠轉發 實時檢視訓練成果伺服器YOLOORB
- 伺服器SSL/TLS快速檢測工具TLLSSLed伺服器TLS
- 目標檢測 YOLO v3 驗證 COCO 模型YOLO模型
- 單級式目標檢測方法概述:YOLO與SSDYOLO