Convert XML and JSON formats to txt

The XML is as follows:

  • convert code

    import os
    import xml.etree.ElementTree as ET
    
    # xml file storage directory (change to your own file name)
    input_dir = r'C:\121\Annotations'
    
    # Output txt file directory (folder created by yourself)
    out_dir = r'C:\121\txt'
    
    class_list = []
    
    
    # Get all xml files in the directory
    def file_name(input_dir):
        F = []
        for root, dirs, files in os. walk(input_dir):
    
            for file in files:
                # print file.decode('gbk') #Transcode when there are Chinese characters in the file name
                if os.path.splitext(file)[1] == '.xml':
                    t = os.path.splitext(file)[0]
                    F.append(t) # Add all file names to the L list
        return F # return L list
    
    
    # Get all categories
    def get_class(filelist):
        for i in filelist:
            f_dir = input_dir + "\" + i + ".xml"
            in_file = open(f_dir, encoding='UTF-8')
            filetree = ET. parse(in_file)
            in_file. close()
            root = filetree. getroot()
            for obj in root.iter('object'):
                cls = obj.find('name').text
                if cls not in class_list:
                    class_list.append(cls)
    
    
    def ConverCoordinate(imgshape, bbox):
        # Convert xml pixel coordinates to txt normalized coordinates
        xmin, xmax, ymin, ymax = bbox
        width = imgshape[0]
        height = imgshape[1]
        dw = 1. / width
        dh = 1./height
        x = (xmin + xmax) / 2.0
        y = (ymin + ymax) / 2.0
        w = xmax - xmin
        h = ymax - ymin
    
        # Normalized
        x = x * dw
        y = y * dh
        w = w * dw
        h = h * dh
    
        return x, y, w, h
    
    
    def readxml(i):
        f_dir = input_dir + "\" + i + ".xml"
    
        txtresult = ''
    
        outfile = open(f_dir, encoding='UTF-8')
        filetree = ET. parse(outfile)
        outfile. close()
        root = filetree. getroot()
    
        # Get image size
        size = root. find('size')
        width = int(size. find('width'). text)
        height = int(size. find('height'). text)
        imgshape = (width, height)
    
        # Convert to yolov5 format
        for obj in root.findall('object'):
            # get category name
            obj_name = obj.find('name').text
    
            obj_id = class_list. index(obj_name)
            # Get the upper left and lower right coordinates of the bbox box of each obj
            bbox = obj.find('bndbox')
            xmin = float(bbox.find('xmin').text)
            xmax = float(bbox.find('xmax').text)
            ymin = float(bbox.find('ymin').text)
            ymax = float(bbox.find('ymax').text)
            bbox_coor = (xmin, xmax, ymin, ymax)
    
            x, y, w, h = ConverCoordinate(imgshape, bbox_coor)
            txt = '{} {} {} {} {}\\
    '.format(obj_id, x, y, w, h)
            txtresult = txtresult + txt
    
        # print(txtresult)
        f = open(out_dir + "\" + i + ".txt", 'a')
        f.write(txtresult)
        f. close()
    
    
    # Get all files in the folder
    filelist = file_name(input_dir)
    
    # Get all categories
    get_class(filelist)
    
    # print class
    print(class_list)
    
    # xml to txt
    for i in filelist:
        readxml(i)
    
    # Generate a class file under out_dir
    f = open(out_dir + "\classes.txt", 'a')
    classresult = ''
    for i in class_list:
        classresult = classresult + i + "\\
    "
    f. write(classresult)
    f. close()
    

JSON format 1:

{“image”: “3591.jpg”, “annotations”: [{“label”: “boat”, “coordinates”: {“x”: 163.9615384615385, “y”: 76.0384615384616, “width”: 146.0, “height “: 23.0}}, {“label”: “boat”, “coordinates”: {“x”: 247.4615384615385, “y”: 38.538461538461604, “width”: 291.0, “height”: 52.0}}, {“label” : “boat”, “coordinates”: {“x”: 1756.9615384615386, “y”: 32.538461538461604, “width”: 136.0, “height”: 56.0}}]}]

  • The corresponding conversion code is as follows:

    import os
    import json
    
    # Specify the folder path containing the JSON file
    folder_path = "C:/labels/json/"
    
    # Get the filenames of all JSON files in the folder
    json_files = [f for f in os.listdir(folder_path) if f.endswith(".json")]
    
    for json_file in json_files:
        # Build the full path to the JSON file
        json_path = os.path.join(folder_path, json_file)
    
        # read JSON file
        with open(json_path, "r") as file:
            data = json. load(file)
    
        # Extract comment information
        annotations = data[0]['annotations']
    
        # Convert to text format
        annotations_str = ""
        for annotation in annotations:
            coordinates = annotation['coordinates']
            #Set the content format of the output
            #annotation_str = f"label: {annotation['label']}, x: {coordinates['x']}, y: {coordinates['y']}, width: {coordinates[ 'width']}, height: {coordinates['height']}\\
    "
            annotation_str = f"{<!-- -->'0'} {<!-- -->coordinates['x']} {<!-- -->coordinates['y\ ']} {<!-- -->coordinates['width']} {<!-- -->coordinates['height']}\\
    "
            annotations_str += annotation_str
    
        # Build the path and filename of the text file
        txt_file = os.path.splitext(json_file)[0] + ".txt"
        txt_path = os.path.join(folder_path, txt_file)
    
        # Save the comment information as a text file
        with open(txt_path, "w") as file:
            file.write(annotations_str)
    
    

    JSON format 2

  • convert code

import json
import os

name2id = {'boat': 0} # label name


def convert(img_size, box):
    dw = 1. / (img_size[0])
    dh = 1. / (img_size[1])
    x = (box[0] + box[2]) / 2.0 - 1
    y = (box[1] + box[3]) / 2.0 - 1
    w = box[2] - box[0]
    h = box[3] - box[1]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)


def decode_json(json_floder_path, json_name):
    txt_name = 'C:/Users/labels/txt/' + json_name[0:-5] + '.txt'
    # The absolute path of the txt folder
    txt_file = open(txt_name, 'w')

    json_path = os.path.join(json_floder_path, json_name)
    data = json.load(open(json_path, 'r', encoding='gb2312', errors='ignore'))

    img_w = data['imageWidth']
    img_h = data['imageHeight']

    for i in data['shapes']:

        label_name = i['label']
        if (i['shape_type'] == 'rectangle'):
            x1 = int(i['points'][0][0])
            y1 = int(i['points'][0][1])
            x2 = int(i['points'][1][0])
            y2 = int(i['points'][1][1])

            bb = (x1, y1, x2, y2)
            bbox = convert((img_w, img_h), bb)
            txt_file.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in bbox]) + '\\
')


if __name__ == "__main__":

    json_floder_path = 'C:/Users/labels/json/'
    # The absolute path of the json folder
    json_names = os.listdir(json_floder_path)
    for json_name in json_names:
        decode_json(json_floder_path, json_name)