Many training algorithms use the coco format, and the original data set may use the yolov5 data format, so write a simple tutorial;
The directory format of the yolov5 dataset:
images storage images, such as 1.jpg, 2.jpg.
labels store the annotation information of the corresponding image, such as 1.txt, 2.txt.
The information in the txt is as follows:
(Box height) Each line corresponds to a bbox frame information, which are class_id, xc (the x coordinate of the center of the box), yc (the x coordinate of the center of the box), w (the width of the box), h (the height of the box)
The directory of the coco dataset is as follows:
The format of instances_test2017.json is as follows:
The overall structure of #COCO is as follows { "info": info, "licenses": [license], "categories": [categories], "images": [image], "annotations": [annotation], } #The descriptions of info, license, categories, image, and annotation are as follows ----- info = { "year": int, #year "version": str, #dataset version "description": str, #dataset description "contributor": str, #The provider of the data set "url": str, #The download address of the data set "date_created": datetime, #The creation date of the dataset } categories = { "id": int, #category id "name": str, #category name "supercategory": str, ##large category name } license = { "id": int, "name": str, "url": str, } image = { "id": int, #The index id of the image, specify by yourself "width": int, #image width "height": int, #image height "file_name": str, #The file name of the image "license": int, "flickr_url": str, "coco_url": str, "date_captured": datetime, } annotation = { "id": int, #boudingbox index id, specify by yourself "image_id": int, #corresponds to the index id of the image where it is located; "category_id": int, #id of the category to which it belongs; "segmentation": RLE or [polygon], #segmented point set sequence; "area": float, the area of #bbox "bbox": [x,y,width,height], #Important, the coordinates of the upper left corner, the width and height of the bbox; "iscrowd": 0 or 1, ##Is it crowded # }
The completed conversion code is as follows:
import json import os import shut-off import cv2 # info, license, categories structure initialization; # The information in train.json, val.json, and test.json is consistent; # info, the license is temporarily unavailable info = { "year": 2022, "version": '1.0', "date_created": 2022-10-15 } licenses = { "id": 1, "name": "null", "url": "null", } #Your own label category should correspond to yolov5's; categories = [ { "id": 0, "name": 'class_1', "supercategory": 'lines', }, { "id": 1, "name": 'class_2', "supercategory": 'lines', } ] #Initialize train, test data dictionary # info licenses categories are consistent in train and test; train_data = {'info': info, 'licenses': licenses, 'categories': categories, 'images': [], 'annotations': []} test_data = {'info': info, 'licenses': licenses, 'categories': categories, 'images': [], 'annotations': []} # image_path corresponds to the image path of yolov5, such as images/train; # label_path corresponds to the label path of yolov5, for example, labels/train corresponds to images; def v5_covert_coco_format(image_path, label_path): images = [] annotations = [] for index, img_file in enumerate(os. listdir(image_path)): if img_file.endswith('.jpg'): image_info = {} img = cv2.imread(os.path.join(image_path, img_file)) height, width, channel = img. shape image_info['id'] = index image_info['file_name'] = img_file image_info['width'], image_info['height'] = width, height else: continue if image_info != {}: images.append(image_info) # Processing label information ------- label_file = os.path.join(label_path, img_file.replace('.jpg', '.txt')) with open(label_file, 'r') as f: for idx, line in enumerate(f. readlines()): info_annotation = {} class_num, xs, ys, ws, hs = line.strip().split(' ') class_id, xc, yc, w, h = int(class_num), float(xs), float(ys), float(ws), float(hs) xmin = (xc - w / 2) * width ymin = (yc - h / 2) * height xmax = (xc + w / 2) * width ymax = (yc + h / 2) * height bbox_w = int(width * w) bbox_h = int(height * h) img_copy = img[int(ymin):int(ymax),int(xmin):int(xmax)].copy() info_annotation["category_id"] = class_id # category id info_annotation['bbox'] = [xmin, ymin, bbox_w, bbox_h] ## coordinates of bbox info_annotation['area'] = bbox_h * bbox_w ###area info_annotation['image_id'] = index # id of bbox info_annotation['id'] = index * 100 + idx # id of bbox # cv2.imwrite(f"./temp/{info_annotation['id']}.jpg", img_copy) info_annotation['segmentation'] = [[xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]] # coordinates of four points info_annotation['iscrowd'] = 0 # singleton annotations.append(info_annotation) return images, annotations # key == train, test, val # Corresponding to the json file to be generated, such as instances_train2017.json, instances_test2017.json, instances_val2017.json # Just to avoid duplicating code. . . . . def gen_json_file(yolov5_data_path, coco_format_path, key): # json path json_path = os.path.join(coco_format_path, f'annotations/instances_{key}2017.json') dst_path = os.path.join(coco_format_path, f'{key}2017') if not os.path.exists(os.path.dirname(json_path)): os.makedirs(os.path.dirname(json_path), exist_ok=True) data_path = os.path.join(yolov5_data_path, f'images/{key}') label_path = os.path.join(yolov5_data_path, f'labels/{key}') images, anns = v5_covert_coco_format(data_path, label_path) if key == 'train': train_data['images'] = images train_data['annotations'] = anns with open(json_path, 'w') as f: json.dump(train_data, f, indent=2) # shutil. copy(data_path,'') elif key == 'test': test_data['images'] = images test_data['annotations'] = anns with open(json_path, 'w') as f: json.dump(test_data, f, indent=2) else: print(f'key is {key}') print(f'generate {key} json success!') return if __name__ == '__main__': yolov5_data_path = '/your/yolov5/datasets/path' coco_format_path = '/your/coco/datasets/path' gen_json_file(yolov5_data_path, coco_format_path, key='train') gen_json_file(yolov5_data_path, coco_format_path, key='test')