Convert Image and Target at one time and store them in VOC format
Most of the codes for converting cityscapes to VOC on the Internet are incomplete, so I wrote this script to completely convert the VOC format. I hope you can give me some likes and encouragement.
Note:
cityscapes_root needs to be modified by yourself and should be set to the directory where leftImg8bit_trainvaltest and gtFine_trainvaltest are located.
Since cityscapes does not contain the truncated and difficult information in the VOC, the values of truncated and difficult in all objects are set to 0.
For the cityscapes data set, val is usually used as a test set to verify performance, because the official label of the test set is not released. The purpose is to prevent someone from using the test set to train and score points on the website.
import os importos.path from PIL import Image import json def position(pos): # This function is used to find xmin, ymin, xmax, ymax, which is the bbox bounding box x = [] y = [] nums = len(pos) for i in range(nums): x.append(pos[i][0]) y.append(pos[i][1]) x_max = max(x) x_min = min(x) y_max = max(y) y_min = min(y) b = (float(x_min), float(y_min), float(x_max), float(y_max)) return b def convert_annotation(image_id): load_f = open(city_xml_dir + '/' + image_id + "_gtFine_polygons.json", 'r') # Path to import json tags load_dict = json.load(load_f) out_file = open(city_xml_dir + '/' + '%s_leftImg8bit.txt' % (image_id), 'w') # Path to output label objects = load_dict['objects'] nums = len(objects) cls_id = '' for i in range(0, nums): labels = objects[i]['label'] if (labels in ['person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle']): # The categories I need to use here are these 8 categories print(labels) pos = objects[i]['polygon'] bb = position(pos) cls_id = labels out_file.write(cls_id + " " + " ".join([str(a) for a in bb]) + '\\ ') if cls_id == '': print('no label json:', "%s_gtFine_polygons.json" % (image_id)) def images_id(orgin_picture_dir): # Get the name of each image in the training set (the path of the training set image in orgin_picture_dir) a = [] filenames = os.listdir(orgin_picture_dir) for filename in filenames: filename = filename.split('_leftImg8bit.png')[0] a.append(filename) return a # Generate xml file and convert image format to save path def transformation(orgin_picture_dir, city_xml_dir, save_dir): img_basenames = os.listdir(orgin_picture_dir) img_names = [] for item in img_basenames: path = os.path.join(orgin_picture_dir, item) image = Image.open(path) save_params = { 'format': 'JPEG', 'pnginfo': image.info } # Extract the image name and image suffix name respectively (with _leftImg8bit) temp1, temp2 = os.path.splitext(item) jpg_name = temp1 + '.jpg' # Convert PNG images to JPG format image.save(os.path.join(Images_dir, jpg_name), **save_params) img_names.append(temp1) print(img_names) for img in img_names: # img is the pic name without suffix im = Image.open((orgin_picture_dir + img + '.png')) width, height = im.size # open the crospronding temp2txt file gt = open(city_xml_dir + '/' + img + '.txt').read().splitlines() # write in xml file xml_file = open((save_dir + '/' + img + '.xml'), 'w') xml_file.write('<annotation>\\ ') xml_file.write(' <folder>CITYSCAPE</folder>\\ ') xml_file.write(' <filename>' + str(img) + '.png' + '</filename>\\ ') xml_file.write(' <size>\\ ') xml_file.write(' <width>' + str(width) + '</width>\\ ') xml_file.write(' <height>' + str(height) + '</height>\\ ') xml_file.write(' <depth>3</depth>\\ ') xml_file.write(' </size>\\ ') # write the region of image on xml file for img_each_label in gt: spt = img_each_label.split(' ') # If the txt is separated by commas ',', then change it to spt = img_each_label.split(','). xml_file.write(' <object>\\ ') xml_file.write(' <name>' + str(spt[0]) + '</name>\\ ') xml_file.write(' <pose>Unspecified</pose>\\ ') xml_file.write(' <truncated>0</truncated>\\ ') xml_file.write(' <difficult>0</difficult>\\ ') xml_file.write(' <bndbox>\\ ') xml_file.write(' <xmin>' + str(spt[1]) + '</xmin>\\ ') xml_file.write(' <ymin>' + str(spt[2]) + '</ymin>\\ ') xml_file.write(' <xmax>' + str(spt[3]) + '</xmax>\\ ') xml_file.write(' <ymax>' + str(spt[4]) + '</ymax>\\ ') xml_file.write(' </bndbox>\\ ') xml_file.write(' </object>\\ ') xml_file.write('</annotation>') if __name__ == '__main__': #Add path # cityscapes_root needs to be modified by yourself and should be set to the directory where leftImg8bit_trainvaltest and gtFine_trainvaltest are located. cityscapes_root = '/home/lyd/GithubProject/Cityscapes_file/' cityscapes_img = os.path.join(cityscapes_root, 'leftImg8bit_trainvaltest/leftImg8bit/' ) cityscapes_xml = os.path.join(cityscapes_root, 'gtFine_trainvaltest/gtFine/') # cityscapes data set, usually val is used as the test set to verify performance, because the official label of the test set is not released, the purpose is to prevent someone from using the test set to train and score points on the website datasets = ['train', 'val'] # Path to be created save_root = os.path.join(cityscapes_root, 'VOC2012/') Annotations_dir = os.path.join(save_root, 'Annotations') Images_dir = os.path.join(save_root, 'JPEGImages') ImageSets_dir = os.path.join(save_root, 'ImageSets/Main') #Create path os.makedirs(save_root, exist_ok=True) os.makedirs(Annotations_dir, exist_ok=True) os.makedirs(Images_dir, exist_ok=True) os.makedirs(ImageSets_dir, exist_ok=True) # Start traversing the dataset and cities for dataset in datasets: set = [] xml_dataset_dir = cityscapes_xml + dataset + '/' img_dataset_dir = cityscapes_img + dataset + '/' city_names = os.listdir(xml_dataset_dir) print(city_names) for city in city_names: city_xml_dir = xml_dataset_dir + city + '/' # json path city_img_dir = img_dataset_dir + city + '/' # The path of cityscape’s train for image in os.listdir(city_img_dir): name, png = os.path.splitext(image) set.append(name) names = images_id(city_img_dir) # Convert to txt format annotation for image_id in names: print(image_id) convert_annotation(image_id) #Use txt and then convert to xml transformation(city_img_dir, city_xml_dir, Annotations_dir) # Save imageSets save_file = dataset + '.txt' with open(os.path.join(ImageSets_dir, save_file), 'w') as f: for item in set: f.write(str(item) + '\\ ')