Extract yolov5 tags from .json to train the model to recognize traffic signs

1. Extract yolov5 tags from annotations.json as data set tags

Code explanation

1. The keys in the dictionary represent the categories of traffic signs.

import json
import os

json_file_path = './.json'

#Specify the folder path to save the txt file
txt_folder_path = 'D:\execre\Practical_training/out1'

#Create a mapping dictionary from category to index
category_to_index = {
        'pl40': 0, 'p26': 1, 'p27': 2, 'pne': 3, 'i5': 4,
 'p5': 5, 'ip': 6, 'pn': 7, 'w30': 8, 'p11': 9, 'p9': 10,' i4': 11, 'i2': 12,
 'pg': 13, 'w59': 14, 'p3': 15, 'p17': 16, 'p12': 17, 'p22': 18, ' w57': 19,
'p10': 20, 'w55': 21, 'w13': 22,'w63': 23, 'p16': 24,'w32': 25, ' pb': 26, 'w58': 27, 'p19': 28, 'ps': 29, 'pr40': 30, 'p6': 31, 'p23\ ': 32, 'ph3.5': 33, 'p18': 34,
'w3': 35, 'p8': 36,'w12': 37, 'w47': 38, 'il50': 39, 'p4': 40, ' w37': 41, 'w46': 42, 'i10': 43, 'w34': 44, 'i13': 45, 'p1': 46, 'i12\ ': 47, 'w22': 48,
'w16': 49, 'p14': 50, 'pm10': 51, 'w15': 52, 'p2': 53, 'w45': 54, ' i3': 55, 'p25': 56, 'p15': 57, 'w21': 58,'p13': 59, 'pa10': 60,'i1\ ': 61, 'w42': 62, 'p21': 63, 'w10': 64, 'i14': 65, 'w20': 66, 'w35': 67, 'i11': 68, 'w8': 69, 'w41': 70, 'p20': 71, 'pw3': 72,
 'w28': 73, 'p28': 74, 'w18': 75, 'w5': 76, 'w24': 77, 'i15': 78, ' w38': 79, 'w2': 80, 'w56': 81, 'w43': 82, 'w66': 83, 'p24': 84
}

#Load JSON file
with open(json_file_path, 'r') as f:
    data = json.load(f)

#Create a directory to save txt files
os.makedirs(txt_folder_path, exist_ok=True)

# Loop through each image
for img_id, img_data in data['imgs'].items():
    img_path = img_data['path']
    objects = img_data['objects']

    # Get the file name of the image (excluding path and extension)
    img_name = os.path.splitext(os.path.basename(img_path))[0]

    # Convert JSON data to YOLOv5 txt format
    lines = []
    for obj in objects:
        category = obj['category']
        bbox = obj['bbox']
        img_width = 2048
        img_height = 2048

        # Calculate the normalized coordinates of the bounding box as well as the center point, width and height
        normalized_xmin = bbox['xmin'] / img_width
        normalized_ymin = bbox['ymin'] / img_height
        normalized_xmax = bbox['xmax'] / img_width
        normalized_ymax = bbox['ymax'] / img_height
        x_center = (normalized_xmin + normalized_xmax) / 2
        y_center = (normalized_ymin + normalized_ymax) / 2
        width = normalized_xmax - normalized_xmin
        height = normalized_ymax - normalized_ymin

        if category not in category_to_index.keys():
               continue

        # Convert categories to indices
        category_index = category_to_index.get(category, -1)
        if category_index != -1:
            line = f"{category_index} {x_center} {y_center} {width} {height}"
            lines.append(line)

    # Save as corresponding txt file
    txt_file_path = os.path.join(txt_folder_path, f'{img_name}.txt')
    with open(txt_file_path, 'w') as f:
        f.write('\\
'.join(lines))

The final result of a single txt

1. Delete the txt files with different names from the jpg folder in the txt folder – remove redundant tags.
import os

#Specify JPG folder path
jpg_folder = 'path/to/your/jpg/folder'

#Specify TXT folder path
txt_folder = 'path/to/your/txt/folder'

# Get all file names in the JPG folder (excluding extensions)
jpg_files = [os.path.splitext(file)[0] for file in os.listdir(jpg_folder) if file.endswith('.jpg')]

# Traverse the files in the TXT folder
for txt_file in os.listdir(txt_folder):
    if txt_file.endswith('.txt'):
        txt_basename = os.path.splitext(txt_file)[0]
        # If the TXT file name is not in the list of file names in the JPG folder, delete the TXT file
        if txt_basename not in jpg_files:
            txt_file_path = os.path.join(txt_folder, txt_file)
            os.remove(txt_file_path)
2. If the data set is too large, you can delete the number of photos and tags at the same time through code
import os
import random

#Specify the first folder path
folder1 = './image'

#Specify the second folder path
folder2 = './labels'

# Get the paths of all files in the first folder
folder1_files = [os.path.join(folder1, file) for file in os.listdir(folder1)]

# Get the paths of all files in the second folder
folder2_files = [os.path.join(folder2, file) for file in os.listdir(folder2)]

# Find files with the same name in two folders
common_files = set(os.path.splitext(os.path.basename(file))[0] for file in folder1_files) & set(os.path.splitext(os.path.basename(file))[0] for file in folder2_files)

# Randomly select files to be deleted k=number of deleted files
delete_files = random.sample(common_files, k=4585)

# Delete the files that need to be deleted in the two folders
for delete_file in delete_files:
    file1_path = os.path.join(folder1, f'{delete_file}.jpg')
    file2_path = os.path.join(folder2, f'{delete_file}.txt')
    if os.path.isfile(file1_path):
        os.remove(file1_path)
    if os.path.isfile(file2_path):
        os.remove(file2_path)

2. You can use the following link to train the model

YOLOv5 trains its own txt label data set_Where to find yolov5s.txt-CSDN Blog

3. The following are the problems encountered during the training process and their solutions

1. The problem that P, R, map and other values are all zero during yolov5 training

Solution: Change the cuda version to version 10.2

Go to pytorch official website

Copy the download link without copying -c pytorch, it will be faster to change the source to download.

Then it is recommended to configure a new environment for training

Finally, there is this result as a reference.