The previous blog post [Target Detection] YOLOv5 ran through the VisDrone data set introduced the Visdrone data set. I will not repeat it here. This article mainly performs target extraction and filtering on the Visdrone data set and CARPK data set.
Description of requirements
This article needs to extract and merge the data sets about cars and people in the Visdrone data set. Cars are marked as category 0 and people are marked as category 1, and converted into txt format supported by YOLO.
Visdrone Dataset
Convert Visdrone data set to YOLO txt format
First, perform a format conversion on the original data set. The following code continues to use the official conversion script.
from utils.general import download, os, Path def visdrone2yolo(dir): from PIL import Image from tqdm import tqdm def convert_box(size, box): # Convert VisDrone box to YOLO xywh box dw = 1./size[0] dh = 1. / size[1] return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh (dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {<!-- -->dir}') for f in pbar: img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size lines = [] with open(f, 'r') as file: # read annotation.txt for row in [x.split(',') for x in file.read().strip().splitlines()]: if row[4] == '0': # VisDrone 'ignored regions' class 0 continue cls = int(row[5]) - 1 #Category number-1 box = convert_box(img_size, tuple(map(int, row[:4]))) lines.append(f"{<!-- -->cls} {<!-- -->' '.join(f'{<!-- -->x:.6f}' for x in box)}\\ ") with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl: fl.writelines(lines) # write label.txt dir = Path(r'E:\Dataset\VisDrone') # Visdrone2019 folder directory under the datasets folder #Convert for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev': visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
Tag visualization
Visualize the txt tag and see the effect before filtering.
import os import numpy as np import cv2 # Modify the input image folder img_folder = "image" img_list = os.listdir(img_folder) img_list.sort() # Modify the input label folder label_folder = "labels2" label_list = os.listdir(label_folder) label_list.sort() # Output image folder location path = os.getcwd() output_folder = path + '/' + str("output") os.mkdir(output_folder) #Coordinate conversion def xywh2xyxy(x, w1, h1, img): label, x, y, w, h = x # print("Original image width and height:\\ w1={}\\ h1={}".format(w1, h1)) # Bounding box denormalization x_t = x * w1 y_t = y * h1 w_t = w * w1 h_t = h * h1 # print("Output after denormalization:\\ First:{}\tSecond:{}\tThird:{}\tFourth:{}\ t\\ \\ ".format(x_t, y_t, w_t, h_t)) # Calculate coordinates top_left_x = x_t - w_t / 2 top_left_y = y_t - h_t / 2 bottom_right_x = x_t + w_t / 2 bottom_right_y = y_t + h_t / 2 # print('label:{}'.format(labels[int(label)])) # print("Top left x coordinate:{}".format(top_left_x)) # print("Top left y coordinate:{}".format(top_left_y)) # print("Bottom right x coordinate:{}".format(bottom_right_x)) # print("Bottom right y coordinate:{}".format(bottom_right_y)) # Draw a rectangular box # cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), colormap[1], 2) # (Optional) Draw different color boxes for different targets if int(label) == 0: cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), (0, 255, 0), 2) elif int(label) == 1: cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), (255, 0, 0), 2) else: cv2.rectangle(img, (int(top_left_x), int(top_left_y)), (int(bottom_right_x), int(bottom_right_y)), (0, 0, 0), 2) return img if __name__ == '__main__': for i in range(len(img_list)): image_path = img_folder + "/" + img_list[i] label_path = label_folder + "/" + label_list[i] #Read image file img = cv2.imread(str(image_path)) h, w = img.shape[:2] # Read labels with open(label_path, 'r') as f: lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # Draw each target for x in lb: #Denormalize and get the upper left and lower right coordinates, and draw a rectangular frame img = xywh2xyxy(x, w, h, img) """ # Directly view the generated result graph cv2.imshow('show', img) cv2.waitKey(0) """ cv2.imwrite(output_folder + '/' + '{}.png'.format(image_path.split('/')[-1][:-4]), img)
The visualization effect is shown in the figure:
Note: This data set also distinguishes human postures. People who are walking are classified as pedestrians, and other postures (such as lying down or sitting down) are marked as people.
Filter tag
Specific filtering rules:
- Merge car, van, truck, bus into car(0)
- Merge pedestrian, people is person(1)
- Discard other categories
import os import numpy as np from tqdm import tqdm # Visdrone Category # names: ['pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning- tricycle', 'bus', 'motor' ] # Modify the input label folder label_folder = "labels" label_list = os.listdir(label_folder) # Label output folder label_output = "labels2" # class_set car_set = [3, 4, 5, 8] person_set = [0, 1] if __name__ == '__main__': for label_file in tqdm(os.listdir(label_folder)): # Read labels with open(os.path.join(label_folder, label_file), 'r') as f: lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # write labels with open(os.path.join(label_output, label_file), 'a') as f: for obj in lb: # If it is a pedestrian, change the category to 1 if int(obj[0]) in person_set: obj[0] = 1 f.write(('%g ' * 5).rstrip() % tuple(obj) + '\\ ') # If it is a vehicle, modify the category to 0 elif int(obj[0]) in car_set: obj[0] = 0 f.write(('%g ' * 5).rstrip() % tuple(obj) + '\\ ')
The effect after filtering is shown in the figure:
CARPK data set
The CARPK data set is a car data set captured by a drone at an altitude of 40 meters, which only contains a single target of a car.
Download address: https://github.com/zstar1003/Dataset
Original label format:
1019 521 1129 571 1
1013 583 1120 634 1
The corresponding meanings are: xmin, ymin, xmax, ymax, cls
Processing script:
import os import numpy as np from tqdm import tqdm # Modify the input label folder # label_folder = r"E:\Dataset\CARPK_devkit\data\Annotations" label_folder = r"annotations" label_list = os.listdir(label_folder) # Label output folder label_output = r"labels" # Image width and height img_width = 1280 img_height = 720 if __name__ == '__main__': for label_file in tqdm(os.listdir(label_folder)): # Read labels with open(os.path.join(label_folder, label_file), 'r') as f: lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=int) for obj in lb: class_index = obj[4] xmin, ymin, xmax, ymax = obj[0], obj[1], obj[2], obj[3] # Convert box information to yolo format xcenter = xmin + (xmax - xmin) / 2 ycenter = ymin + (ymax - ymin) / 2 w = xmax - xmin h = ymax - ymin # Convert absolute coordinates to relative coordinates, save 6 decimal places xcenter = round(xcenter / img_width, 6) ycenter = round(ycenter / img_height, 6) w = round(w / img_width, 6) h = round(h / img_height, 6) info = [str(i) for i in [class_index, xcenter, ycenter, w, h]] # Write labels with open(os.path.join(label_output, label_file), 'a') as f: # If the file is not empty, add a newline if os.path.getsize(os.path.join(label_output, label_file)): f.write("\\ " + " ".join(info)) else: f.write(" ".join(info))
Visually verify the conversion effect: