yolo各数据类型及数据集预处理等

1 xml转txt

#! /usr/local/bin/ python
# -*- coding: utf-8 -*-
# .xml文件转换成.txt文件
 
import copy
from xml.etree import Element, SubElement, tostring, ElementTree
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
 
# 检测目标的类别
classes = ["ore carrier", "passenger ship",
           "container ship", "bulk cargo carrier",
           "general cargo ship", "fishing boat"]
 
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
 
def convert(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[1]) / 2.0    # (x_min + x_max) / 2.0
    y = (box[2] + box[3]) / 2.0    # (y_min + y_max) / 2.0
    w = box[1] - box[0]   # x_max - x_min
    h = box[3] - box[2]   # y_max - y_min
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)
 
def convert_annotation(image_id):
    # .xml格式文件的地址
    in_file = open('地址1\%s.xml' % (image_id), encoding='UTF-8')
 
    # 生成的.txt格式文件的地址
    out_file = open('地址2\%s.txt' % (image_id), 'w')
    
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
 
    for obj in root.iter('object'):
        cls = obj.find('name').text
        
        if cls not in classes:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('ymax').text))
        bb = convert((w, h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
 
# .xml格式文件的地址
xml_path = os.path.join(CURRENT_DIR, '地址1/')
 
# xml列表
img_xmls = os.listdir(xml_path)
for img_xml in img_xmls:
    label_name = img_xml.split('.')[0]
    print(label_name)
    convert_annotation(label_name)

2 txt转xml

#! /usr/local/bin/ python
# -*- coding: utf-8 -*-
import os
import cv2
import xml.etree.ElementTree as ET

def convert_yolo_to_voc(txt_path, img_path, xml_path, classes):
    # 获取图像的宽度和高度
    img = cv2.imread(img_path)
    height, width, depth = img.shape

    # 读取txt文件
    with open(txt_path, 'r') as file:
        lines = file.readlines()

    # 创建XML根节点
    annotation = ET.Element("annotation")

    # 添加基本信息
    folder = ET.SubElement(annotation, "folder")
    folder.text = os.path.basename(os.path.dirname(img_path))

    filename = ET.SubElement(annotation, "filename")
    filename.text = os.path.basename(img_path)

    path = ET.SubElement(annotation, "path")
    path.text = img_path

    source = ET.SubElement(annotation, "source")
    database = ET.SubElement(source, "database")
    database.text = "Unknown"

    size = ET.SubElement(annotation, "size")
    width_elem = ET.SubElement(size, "width")
    width_elem.text = str(width)
    height_elem = ET.SubElement(size, "height")
    height_elem.text = str(height)
    depth_elem = ET.SubElement(size, "depth")
    depth_elem.text = str(depth)

    segmented = ET.SubElement(annotation, "segmented")
    segmented.text = "0"

    # 解析每个物体
    for line in lines:
        data = line.strip().split()
        class_id = int(data[0])
        x_center = float(data[1])
        y_center = float(data[2])
        box_width = float(data[3])
        box_height = float(data[4])

        # 转换为像素坐标
        x_min = int((x_center - box_width / 2) * width)
        y_min = int((y_center - box_height / 2) * height)
        x_max = int((x_center + box_width / 2) * width)
        y_max = int((y_center + box_height / 2) * height)

        # 创建物体节点
        object_elem = ET.SubElement(annotation, "object")
        name = ET.SubElement(object_elem, "name")
        name.text = classes[class_id]
        pose = ET.SubElement(object_elem, "pose")
        pose.text = "Unspecified"
        truncated = ET.SubElement(object_elem, "truncated")
        truncated.text = "0"
        difficult = ET.SubElement(object_elem, "difficult")
        difficult.text = "0"

        bndbox = ET.SubElement(object_elem, "bndbox")
        xmin = ET.SubElement(bndbox, "xmin")
        xmin.text = str(x_min)
        ymin = ET.SubElement(bndbox, "ymin")
        ymin.text = str(y_min)
        xmax = ET.SubElement(bndbox, "xmax")
        xmax.text = str(x_max)
        ymax = ET.SubElement(bndbox, "ymax")
        ymax.text = str(y_max)

    # 生成XML文件
    # xml_str = ET.tostring(annotation, encoding='unicode')
    xml_str = ET.tostring(annotation, encoding='utf-8')
    xml_str = xml_str.decode('utf-8')  # 将字节字符串解码为 Unicode 字符串
    with open(xml_path, 'w') as file:
        file.write(xml_str)

def convert_all_txt_to_xml(txt_folder, img_folder, output_folder, classes):
    # 创建输出文件夹（不使用 exist_ok 参数）
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # 遍历文件夹中的所有txt文件
    for txt_file in os.listdir(txt_folder):
        if txt_file.endswith('.txt'):
            txt_path = os.path.join(txt_folder, txt_file)
            img_name = txt_file.replace('.txt', '.jpg')  # 假设图片是以jpg格式存储
            img_path = os.path.join(img_folder, img_name)

            if os.path.exists(img_path):
                xml_file_name = txt_file.replace('.txt', '.xml')
                xml_path = os.path.join(output_folder, xml_file_name)
                convert_yolo_to_voc(txt_path, img_path, xml_path, classes)
                # print(f"Converted {txt_file} to {xml_file_name}")
            else:
                print("1")
                # print(f"Image for {txt_file} not found: {img_path}")

# 示例使用
txt_folder = '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/labels'  # 输入YOLO格式txt文件夹路径
img_folder = '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/images'  # 输入对应图像文件夹路径
output_folder = '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/xmls'  # 输出.xml文件夹路径
classes = ['corrosion','craze', 'hide_craze','surface_attach','surface_corrosion','surface_eye',
        'surface_injure','surface_oil','thunderstrike']  # 替换为你自己的类别列表

convert_all_txt_to_xml(txt_folder, img_folder, output_folder, classes)

3 train、test、val处理

import os
import random
import shutil

def split_dataset(images_dir, labels_dir, output_dir, split_ratio=(0.8, 0.1, 0.1)):
    """
    将图像和标签数据集划分为训练集、验证集和测试集。

    :param images_dir: 图像文件夹路径
    :param labels_dir: 标签文件夹路径
    :param output_dir: 输出目录路径
    :param split_ratio: 划分比例 (train, val, test)
    """
    # 确保输出目录存在
    os.makedirs(output_dir, exist_ok=True)
    for subdir in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_dir, subdir, 'images'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, subdir, 'labels'), exist_ok=True)

    # 获取所有图像文件名
    images = [f for f in os.listdir(images_dir) if f.endswith('.jpg') or f.endswith('.png')]
    labels = [f.replace('.jpg', '.txt').replace('.png', '.txt') for f in images]

    # 打乱顺序
    combined = list(zip(images, labels))
    random.shuffle(combined)
    images[:], labels[:] = zip(*combined)

    # 计算划分点
    num_train = int(len(images) * split_ratio[0])
    num_val = int(len(images) * split_ratio[1])

    # 划分数据集
    for i, image in enumerate(images):
        label = labels[i]
        if i < num_train:
            subset = 'train'
        elif i < num_train + num_val:
            subset = 'val'
        else:
            subset = 'test'

        shutil.copy(os.path.join(images_dir, image), os.path.join(output_dir, subset, 'images', image))
        shutil.copy(os.path.join(labels_dir, label), os.path.join(output_dir, subset, 'labels', label))

# 示例调用
split_dataset('/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/images',
              '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/labels',
              '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data')

4 生成yaml文件

import yaml
import os
def create_yaml(output_dir, train_dir, val_dir, test_dir, class_names, num_classes):
    """
    创建 YOLOv8 数据集配置文件。

    :param output_dir: 输出目录路径
    :param train_dir: 训练集目录路径
    :param val_dir: 验证集目录路径
    :param test_dir: 测试集目录路径
    :param class_names: 类别名称列表
    :param num_classes: 类别数量
    """
    data = {
        'train': train_dir,
        'val': val_dir,
        'test': test_dir,
        'nc': num_classes,
        'names': class_names
    }
    with open(os.path.join(output_dir, 'dataset.yaml'), 'w') as f:
        yaml.dump(data, f, default_flow_style=False)

# 示例调用
create_yaml('/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data',
            '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data/train/images',
            '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data/val/images',
            '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data/test/images',
            ['corrosion','craze', 'hide_craze','surface_attach','surface_corrosion','surface_eye',
        'surface_injure','surface_oil','thunderstrike'], 9)

5 .pt转换为.onnx

#! /usr/local/bin/ python
# -*- coding: utf-8 -*-
from ultralytics import YOLO
 
model = YOLO("best.pt")
 
success = model.export(format="onnx", half=False, dynamic=True, opset=17)
 
print("demo")

6 数据增强

import cv2
import numpy as np
import os
import glob
 
# 数据增强函数
def augment_data(img):
    rows,cols,_ = img.shape
 
    # 水平翻转图像
    if np.random.random() > 0.5:
        img = cv2.flip(img, 1)
        img_name = os.path.splitext(save_path)[0] + "_flip.png"
        cv2.imwrite(img_name, img)
        print("Saved augmented image:", img_name)
 
    # 随机缩放图像
    scale = np.random.uniform(0.9, 1.1)
    M = cv2.getRotationMatrix2D((cols/2, rows/2), 0, scale)
    img_transformed = cv2.warpAffine(img, M, (cols, rows))
    img_name = os.path.splitext(save_path)[0] + "_transform.png"
    cv2.imwrite(img_name, img_transformed)
    print("Saved augmented image:", img_name)
 
    # 随机旋转图像
    angle = np.random.randint(-10, 10)
    M = cv2.getRotationMatrix2D((cols/2, rows/2), angle, 1)
    img_rotated = cv2.warpAffine(img, M, (cols, rows))
    img_name = os.path.splitext(save_path)[0] + "_rotated.png"
    cv2.imwrite(img_name, img_rotated)
    print("Saved augmented image:", img_name)
 
    # 添加高斯噪音
    mean = 0
    std = np.random.uniform(5, 15)
    noise = np.zeros(img.shape, np.float32)
    cv2.randn(noise, mean, std)
    noise = np.uint8(noise)
    img_noisy = cv2.add(img, noise)
    img_name = os.path.splitext(save_path)[0] + "_noisy.png"
    cv2.imwrite(img_name, img_noisy)
    print("Saved augmented image:", img_name)
 
    # 随机调整对比度和亮度
    alpha = np.random.uniform(0.8, 1.2)
    beta = np.random.randint(-10, 10)
    img_contrast = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
    img_name = os.path.splitext(save_path)[0] + "_contrast.png"
    cv2.imwrite(img_name, img_contrast)
    print("Saved augmented image:", img_name)
 
    return img
 
 
# 读取 data 文件夹中的所有图片，并进行数据增强
data_dir = "data"
save_dir = "result"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
 
# 使用 glob 库来遍历 data 文件夹中所有图像
for img_path in glob.glob(os.path.join(data_dir, "*.png")):
 
    img = cv2.imread(img_path)
 
    # 获取保存增强后的图片文件名
    img_name = os.path.basename(img_path)
    save_path = os.path.join(save_dir, img_name)
 
    # 数据增强
    augment_data(img)
 
    # 保存原始图片
    cv2.imwrite(save_path, img)
    print("Saved original image:", save_path)

1 xml转txt

2 txt转xml

3 train、test、val处理

4 生成yaml文件

5 .pt转换为.onnx

6 数据增强

Latest comments

Categories