1 xml转txt
#! /usr/local/bin/ python
# -*- coding: utf-8 -*-
# .xml文件转换成.txt文件
import copy
from xml.etree import Element, SubElement, tostring, ElementTree
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
# 检测目标的类别
classes = ["ore carrier", "passenger ship",
"container ship", "bulk cargo carrier",
"general cargo ship", "fishing boat"]
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0 # (x_min + x_max) / 2.0
y = (box[2] + box[3]) / 2.0 # (y_min + y_max) / 2.0
w = box[1] - box[0] # x_max - x_min
h = box[3] - box[2] # y_max - y_min
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(image_id):
# .xml格式文件的地址
in_file = open('地址1\%s.xml' % (image_id), encoding='UTF-8')
# 生成的.txt格式文件的地址
out_file = open('地址2\%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
if cls not in classes:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
# .xml格式文件的地址
xml_path = os.path.join(CURRENT_DIR, '地址1/')
# xml列表
img_xmls = os.listdir(xml_path)
for img_xml in img_xmls:
label_name = img_xml.split('.')[0]
print(label_name)
convert_annotation(label_name)
2 txt转xml
#! /usr/local/bin/ python
# -*- coding: utf-8 -*-
import os
import cv2
import xml.etree.ElementTree as ET
def convert_yolo_to_voc(txt_path, img_path, xml_path, classes):
# 获取图像的宽度和高度
img = cv2.imread(img_path)
height, width, depth = img.shape
# 读取txt文件
with open(txt_path, 'r') as file:
lines = file.readlines()
# 创建XML根节点
annotation = ET.Element("annotation")
# 添加基本信息
folder = ET.SubElement(annotation, "folder")
folder.text = os.path.basename(os.path.dirname(img_path))
filename = ET.SubElement(annotation, "filename")
filename.text = os.path.basename(img_path)
path = ET.SubElement(annotation, "path")
path.text = img_path
source = ET.SubElement(annotation, "source")
database = ET.SubElement(source, "database")
database.text = "Unknown"
size = ET.SubElement(annotation, "size")
width_elem = ET.SubElement(size, "width")
width_elem.text = str(width)
height_elem = ET.SubElement(size, "height")
height_elem.text = str(height)
depth_elem = ET.SubElement(size, "depth")
depth_elem.text = str(depth)
segmented = ET.SubElement(annotation, "segmented")
segmented.text = "0"
# 解析每个物体
for line in lines:
data = line.strip().split()
class_id = int(data[0])
x_center = float(data[1])
y_center = float(data[2])
box_width = float(data[3])
box_height = float(data[4])
# 转换为像素坐标
x_min = int((x_center - box_width / 2) * width)
y_min = int((y_center - box_height / 2) * height)
x_max = int((x_center + box_width / 2) * width)
y_max = int((y_center + box_height / 2) * height)
# 创建物体节点
object_elem = ET.SubElement(annotation, "object")
name = ET.SubElement(object_elem, "name")
name.text = classes[class_id]
pose = ET.SubElement(object_elem, "pose")
pose.text = "Unspecified"
truncated = ET.SubElement(object_elem, "truncated")
truncated.text = "0"
difficult = ET.SubElement(object_elem, "difficult")
difficult.text = "0"
bndbox = ET.SubElement(object_elem, "bndbox")
xmin = ET.SubElement(bndbox, "xmin")
xmin.text = str(x_min)
ymin = ET.SubElement(bndbox, "ymin")
ymin.text = str(y_min)
xmax = ET.SubElement(bndbox, "xmax")
xmax.text = str(x_max)
ymax = ET.SubElement(bndbox, "ymax")
ymax.text = str(y_max)
# 生成XML文件
# xml_str = ET.tostring(annotation, encoding='unicode')
xml_str = ET.tostring(annotation, encoding='utf-8')
xml_str = xml_str.decode('utf-8') # 将字节字符串解码为 Unicode 字符串
with open(xml_path, 'w') as file:
file.write(xml_str)
def convert_all_txt_to_xml(txt_folder, img_folder, output_folder, classes):
# 创建输出文件夹(不使用 exist_ok 参数)
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 遍历文件夹中的所有txt文件
for txt_file in os.listdir(txt_folder):
if txt_file.endswith('.txt'):
txt_path = os.path.join(txt_folder, txt_file)
img_name = txt_file.replace('.txt', '.jpg') # 假设图片是以jpg格式存储
img_path = os.path.join(img_folder, img_name)
if os.path.exists(img_path):
xml_file_name = txt_file.replace('.txt', '.xml')
xml_path = os.path.join(output_folder, xml_file_name)
convert_yolo_to_voc(txt_path, img_path, xml_path, classes)
# print(f"Converted {txt_file} to {xml_file_name}")
else:
print("1")
# print(f"Image for {txt_file} not found: {img_path}")
# 示例使用
txt_folder = '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/labels' # 输入YOLO格式txt文件夹路径
img_folder = '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/images' # 输入对应图像文件夹路径
output_folder = '/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/xmls' # 输出.xml文件夹路径
classes = ['corrosion','craze', 'hide_craze','surface_attach','surface_corrosion','surface_eye',
'surface_injure','surface_oil','thunderstrike'] # 替换为你自己的类别列表
convert_all_txt_to_xml(txt_folder, img_folder, output_folder, classes)
3 train、test、val处理
import os
import random
import shutil
def split_dataset(images_dir, labels_dir, output_dir, split_ratio=(0.8, 0.1, 0.1)):
"""
将图像和标签数据集划分为训练集、验证集和测试集。
:param images_dir: 图像文件夹路径
:param labels_dir: 标签文件夹路径
:param output_dir: 输出目录路径
:param split_ratio: 划分比例 (train, val, test)
"""
# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)
for subdir in ['train', 'val', 'test']:
os.makedirs(os.path.join(output_dir, subdir, 'images'), exist_ok=True)
os.makedirs(os.path.join(output_dir, subdir, 'labels'), exist_ok=True)
# 获取所有图像文件名
images = [f for f in os.listdir(images_dir) if f.endswith('.jpg') or f.endswith('.png')]
labels = [f.replace('.jpg', '.txt').replace('.png', '.txt') for f in images]
# 打乱顺序
combined = list(zip(images, labels))
random.shuffle(combined)
images[:], labels[:] = zip(*combined)
# 计算划分点
num_train = int(len(images) * split_ratio[0])
num_val = int(len(images) * split_ratio[1])
# 划分数据集
for i, image in enumerate(images):
label = labels[i]
if i < num_train:
subset = 'train'
elif i < num_train + num_val:
subset = 'val'
else:
subset = 'test'
shutil.copy(os.path.join(images_dir, image), os.path.join(output_dir, subset, 'images', image))
shutil.copy(os.path.join(labels_dir, label), os.path.join(output_dir, subset, 'labels', label))
# 示例调用
split_dataset('/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/images',
'/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/origin_data/labels',
'/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data')
4 生成yaml文件
import yaml
import os
def create_yaml(output_dir, train_dir, val_dir, test_dir, class_names, num_classes):
"""
创建 YOLOv8 数据集配置文件。
:param output_dir: 输出目录路径
:param train_dir: 训练集目录路径
:param val_dir: 验证集目录路径
:param test_dir: 测试集目录路径
:param class_names: 类别名称列表
:param num_classes: 类别数量
"""
data = {
'train': train_dir,
'val': val_dir,
'test': test_dir,
'nc': num_classes,
'names': class_names
}
with open(os.path.join(output_dir, 'dataset.yaml'), 'w') as f:
yaml.dump(data, f, default_flow_style=False)
# 示例调用
create_yaml('/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data',
'/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data/train/images',
'/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data/val/images',
'/home/wyh/artrc_catkin/src/artrc_yolov8/datasets/split_data/test/images',
['corrosion','craze', 'hide_craze','surface_attach','surface_corrosion','surface_eye',
'surface_injure','surface_oil','thunderstrike'], 9)
5 .pt转换为.onnx
#! /usr/local/bin/ python
# -*- coding: utf-8 -*-
from ultralytics import YOLO
model = YOLO("best.pt")
success = model.export(format="onnx", half=False, dynamic=True, opset=17)
print("demo")
6 数据增强
import cv2
import numpy as np
import os
import glob
# 数据增强函数
def augment_data(img):
rows,cols,_ = img.shape
# 水平翻转图像
if np.random.random() > 0.5:
img = cv2.flip(img, 1)
img_name = os.path.splitext(save_path)[0] + "_flip.png"
cv2.imwrite(img_name, img)
print("Saved augmented image:", img_name)
# 随机缩放图像
scale = np.random.uniform(0.9, 1.1)
M = cv2.getRotationMatrix2D((cols/2, rows/2), 0, scale)
img_transformed = cv2.warpAffine(img, M, (cols, rows))
img_name = os.path.splitext(save_path)[0] + "_transform.png"
cv2.imwrite(img_name, img_transformed)
print("Saved augmented image:", img_name)
# 随机旋转图像
angle = np.random.randint(-10, 10)
M = cv2.getRotationMatrix2D((cols/2, rows/2), angle, 1)
img_rotated = cv2.warpAffine(img, M, (cols, rows))
img_name = os.path.splitext(save_path)[0] + "_rotated.png"
cv2.imwrite(img_name, img_rotated)
print("Saved augmented image:", img_name)
# 添加高斯噪音
mean = 0
std = np.random.uniform(5, 15)
noise = np.zeros(img.shape, np.float32)
cv2.randn(noise, mean, std)
noise = np.uint8(noise)
img_noisy = cv2.add(img, noise)
img_name = os.path.splitext(save_path)[0] + "_noisy.png"
cv2.imwrite(img_name, img_noisy)
print("Saved augmented image:", img_name)
# 随机调整对比度和亮度
alpha = np.random.uniform(0.8, 1.2)
beta = np.random.randint(-10, 10)
img_contrast = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
img_name = os.path.splitext(save_path)[0] + "_contrast.png"
cv2.imwrite(img_name, img_contrast)
print("Saved augmented image:", img_name)
return img
# 读取 data 文件夹中的所有图片,并进行数据增强
data_dir = "data"
save_dir = "result"
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# 使用 glob 库来遍历 data 文件夹中所有图像
for img_path in glob.glob(os.path.join(data_dir, "*.png")):
img = cv2.imread(img_path)
# 获取保存增强后的图片文件名
img_name = os.path.basename(img_path)
save_path = os.path.join(save_dir, img_name)
# 数据增强
augment_data(img)
# 保存原始图片
cv2.imwrite(save_path, img)
print("Saved original image:", save_path)