Cityscapes converted to VOC format, target detection tags, complete code included

Convert Image and Target at one time and store them in VOC format

Most of the codes for converting cityscapes to VOC on the Internet are incomplete, so I wrote this script to completely convert the VOC format. I hope you can give me some likes and encouragement.

Note:

cityscapes_root needs to be modified by yourself and should be set to the directory where leftImg8bit_trainvaltest and gtFine_trainvaltest are located.

Since cityscapes does not contain the truncated and difficult information in the VOC, the values of truncated and difficult in all objects are set to 0.

For the cityscapes data set, val is usually used as a test set to verify performance, because the official label of the test set is not released. The purpose is to prevent someone from using the test set to train and score points on the website.

import os
importos.path
from PIL import Image
import json


def position(pos):
    # This function is used to find xmin, ymin, xmax, ymax, which is the bbox bounding box
    x = []
    y = []
    nums = len(pos)
    for i in range(nums):
        x.append(pos[i][0])
        y.append(pos[i][1])
    x_max = max(x)
    x_min = min(x)
    y_max = max(y)
    y_min = min(y)
    b = (float(x_min), float(y_min), float(x_max), float(y_max))
    return b


def convert_annotation(image_id):
    load_f = open(city_xml_dir + '/' + image_id + "_gtFine_polygons.json", 'r') # Path to import json tags
    load_dict = json.load(load_f)
    out_file = open(city_xml_dir + '/' + '%s_leftImg8bit.txt' % (image_id), 'w') # Path to output label
    objects = load_dict['objects']
    nums = len(objects)

    cls_id = ''
    for i in range(0, nums):
        labels = objects[i]['label']
        if (labels in ['person',
                       'rider',
                       'car',
                       'truck',
                       'bus',
                       'train',
                       'motorcycle',
                       'bicycle']): # The categories I need to use here are these 8 categories
            print(labels)
            pos = objects[i]['polygon']
            bb = position(pos)
            cls_id = labels
            out_file.write(cls_id + " " + " ".join([str(a) for a in bb]) + '\\
')
    if cls_id == '':
        print('no label json:', "%s_gtFine_polygons.json" % (image_id))


def images_id(orgin_picture_dir): # Get the name of each image in the training set (the path of the training set image in orgin_picture_dir)
    a = []
    filenames = os.listdir(orgin_picture_dir)
    for filename in filenames:
        filename = filename.split('_leftImg8bit.png')[0]
        a.append(filename)
    return a


# Generate xml file and convert image format to save path
def transformation(orgin_picture_dir, city_xml_dir, save_dir):
    img_basenames = os.listdir(orgin_picture_dir)
    img_names = []
    for item in img_basenames:
        path = os.path.join(orgin_picture_dir, item)
        image = Image.open(path)
        save_params = {
            'format': 'JPEG',
            'pnginfo': image.info
        }

        # Extract the image name and image suffix name respectively (with _leftImg8bit)
        temp1, temp2 = os.path.splitext(item)
        jpg_name = temp1 + '.jpg'

        # Convert PNG images to JPG format
        image.save(os.path.join(Images_dir, jpg_name), **save_params)

        img_names.append(temp1)
        print(img_names)
    for img in img_names: # img is the pic name without suffix
        im = Image.open((orgin_picture_dir + img + '.png'))
        width, height = im.size

        # open the crospronding temp2txt file
        gt = open(city_xml_dir + '/' + img + '.txt').read().splitlines()

        # write in xml file
        xml_file = open((save_dir + '/' + img + '.xml'), 'w')
        xml_file.write('<annotation>\\
')
        xml_file.write(' <folder>CITYSCAPE</folder>\\
')
        xml_file.write(' <filename>' + str(img) + '.png' + '</filename>\\
')
        xml_file.write(' <size>\\
')
        xml_file.write(' <width>' + str(width) + '</width>\\
')
        xml_file.write(' <height>' + str(height) + '</height>\\
')
        xml_file.write(' <depth>3</depth>\\
')
        xml_file.write(' </size>\\
')

        # write the region of image on xml file
        for img_each_label in gt:
            spt = img_each_label.split(' ') # If the txt is separated by commas ',', then change it to spt = img_each_label.split(',').
            xml_file.write(' <object>\\
')
            xml_file.write(' <name>' + str(spt[0]) + '</name>\\
')
            xml_file.write(' <pose>Unspecified</pose>\\
')
            xml_file.write(' <truncated>0</truncated>\\
')
            xml_file.write(' <difficult>0</difficult>\\
')
            xml_file.write(' <bndbox>\\
')
            xml_file.write(' <xmin>' + str(spt[1]) + '</xmin>\\
')
            xml_file.write(' <ymin>' + str(spt[2]) + '</ymin>\\
')
            xml_file.write(' <xmax>' + str(spt[3]) + '</xmax>\\
')
            xml_file.write(' <ymax>' + str(spt[4]) + '</ymax>\\
')
            xml_file.write(' </bndbox>\\
')
            xml_file.write(' </object>\\
')
        xml_file.write('</annotation>')


if __name__ == '__main__':

    #Add path
    # cityscapes_root needs to be modified by yourself and should be set to the directory where leftImg8bit_trainvaltest and gtFine_trainvaltest are located.
    cityscapes_root = '/home/lyd/GithubProject/Cityscapes_file/'
    cityscapes_img = os.path.join(cityscapes_root, 'leftImg8bit_trainvaltest/leftImg8bit/' )
    cityscapes_xml = os.path.join(cityscapes_root, 'gtFine_trainvaltest/gtFine/')
    # cityscapes data set, usually val is used as the test set to verify performance, because the official label of the test set is not released, the purpose is to prevent someone from using the test set to train and score points on the website
    datasets = ['train', 'val']

    # Path to be created
    save_root = os.path.join(cityscapes_root, 'VOC2012/')
    Annotations_dir = os.path.join(save_root, 'Annotations')
    Images_dir = os.path.join(save_root, 'JPEGImages')
    ImageSets_dir = os.path.join(save_root, 'ImageSets/Main')

    #Create path
    os.makedirs(save_root, exist_ok=True)
    os.makedirs(Annotations_dir, exist_ok=True)
    os.makedirs(Images_dir, exist_ok=True)
    os.makedirs(ImageSets_dir, exist_ok=True)

    # Start traversing the dataset and cities
    for dataset in datasets:
        set = []
        xml_dataset_dir = cityscapes_xml + dataset + '/'
        img_dataset_dir = cityscapes_img + dataset + '/'
        city_names = os.listdir(xml_dataset_dir)
        print(city_names)
        for city in city_names:
            city_xml_dir = xml_dataset_dir + city + '/' # json path
            city_img_dir = img_dataset_dir + city + '/' # The path of cityscape’s train
            for image in os.listdir(city_img_dir):
                name, png = os.path.splitext(image)
                set.append(name)

            names = images_id(city_img_dir)

            # Convert to txt format annotation
            for image_id in names:
                print(image_id)
                convert_annotation(image_id)

            #Use txt and then convert to xml
            transformation(city_img_dir, city_xml_dir, Annotations_dir)

        # Save imageSets
        save_file = dataset + '.txt'
        with open(os.path.join(ImageSets_dir, save_file), 'w') as f:
            for item in set:
                f.write(str(item) + '\\
')