ChestX-Det-Dataset data set URL: https://github.com/Deepwise-AILab/ChestX-Det-Dataset/tree/main
Dataset JSON content:
[ {<!-- --> "file_name": "36199.png", "syms": [], "boxes": [], "polygons": [] }, {<!-- --> "file_name": "36302.png", "syms": [ "Effusion" ], "boxes": [ [ 799, 666, 937, 761 ] ], "polygons": [ [ [ 799, 678 ], [ 799, 678 ], [ 799, 680 ], [ 801, 681 ], [ 805, 684 ], [ 807, 684 ], [ 809, 685 ], [ 811, 686 ], [ 813, 686 ], [ 814, 686 ], [ 817, 687 ], [ 820, 687 ], [ 824, 690 ], [ 827, 690 ], [ 830, 691 ], [ 832, 691 ], [ 833, 691 ], [ 836, 693 ], [ 837, 693 ], [ 840, 695 ], [ 844, 696 ], [ 848, 696 ], [ 851, 697 ], [ 854, 697 ], [ 855, 697 ], [ 856, 698 ], [ 861, 699 ], [ 864, 699 ], [ 870, 701 ], [ 872, 703 ], [ 875, 704 ], [ 878, 705 ], [ 881, 705 ], [ 886, 707 ], [ 890, 709 ], [ 894, 711 ], [ 896, 713 ], [ 897, 714 ], [ 899, 714 ], [ 902, 716 ], [ 903, 717 ], [ 906, 720 ], [ 908, 721 ], [ 910, 725 ], [ 912, 726 ], [ 914, 728 ], [ 916, 731 ], [ 916, 732 ], [ 917, 733 ], [ 918, 734 ], [ 921, 738 ], [ 922, 740 ], [ 924, 741 ], [ 925, 744 ], [ 927, 745 ], [ 929, 747 ], [ 930, 751 ], [ 931, 752 ], [ 934, 753 ], [ 935, 755 ], [ 935, 756 ], [ 935, 757 ], [ 936, 758 ], [ 937, 759 ], [ 937, 761 ], [ 937, 759 ], [ 937, 757 ], [ 937, 756 ], [ 937, 752 ], [ 937, 750 ], [ 937, 747 ], [ 937, 745 ], [ 937, 744 ], [ 937, 743 ], [ 937, 741 ], [ 937, 740 ], [ 937, 739 ], [ 937, 738 ], [ 937, 737 ], [ 937, 735 ], [ 937, 733 ], [ 937, 731 ], [ 937, 729 ], [ 937, 728 ], [ 937, 726 ], [ 937, 723 ], [ 937, 720 ], [ 937, 717 ], [ 937, 716 ], [ 936, 714 ], [ 935, 710 ], [ 935, 709 ], [ 935, 708 ], [ 934, 705 ], [ 934, 704 ], [ 934, 703 ], [ 934, 702 ], [ 933, 701 ], [ 933, 698 ], [ 933, 696 ], [ 931, 695 ], [ 931, 692 ], [ 931, 691 ], [ 930, 690 ], [ 930, 686 ], [ 930, 685 ], [ 929, 681 ], [ 929, 680 ], [ 929, 679 ], [ 929, 677 ], [ 928, 674 ], [ 928, 673 ], [ 927, 672 ], [ 927, 671 ], [ 925, 671 ], [ 924, 668 ], [ 924, 666 ], [ 924, 667 ], [ 924, 669 ], [ 924, 672 ], [ 924, 674 ], [ 923, 677 ], [ 923, 678 ], [ 923, 679 ], [ 923, 680 ], [ 922, 681 ], [ 921, 683 ], [ 921, 684 ], [ 920, 685 ], [ 918, 685 ], [ 918, 686 ], [ 915, 689 ], [ 912, 690 ], [ 910, 691 ], [ 909, 692 ], [ 908, 692 ], [ 906, 692 ], [ 905, 693 ], [ 904, 693 ], [ 902, 695 ], [ 900, 695 ], [ 900, 695 ], [ 899, 695 ], [ 898, 695 ], [ 896, 695 ], [ 894, 693 ], [ 891, 693 ], [ 887, 693 ], [ 886, 693 ], [ 884, 693 ], [ 881, 692 ], [ 879, 692 ], [ 876, 691 ], [ 874, 691 ], [ 870, 690 ], [ 867, 690 ], [ 866, 690 ], [ 863, 690 ], [ 861, 689 ], [ 860, 689 ], [ 857, 689 ], [ 856, 687 ], [ 854, 687 ], [ 851, 687 ], [ 848, 686 ], [ 845, 686 ], [ 842, 686 ], [ 840, 686 ], [ 839, 685 ], [ 837, 685 ], [ 834, 684 ], [ 828, 683 ], [ 825, 683 ], [ 822, 681 ], [ 819, 680 ], [ 815, 679 ], [ 814, 679 ], [ 812, 679 ], [ 811, 679 ], [ 811, 678 ], [ 808, 677 ], [ 806, 675 ], [ 803, 675 ], [ 802, 675 ], [ 801, 675 ] ] ] },
Converted coco format sample json:
The python code used is as follows:
import json import os importsys import cv2 from tqdm import tqdm import math che_json = './chetrain.json' dst_json = './chestrain_coco.json' test_img = './train_data/train' # che_json = './chetest.json' # dst_json = './chetest_coco.json' # test_img = './test_data/test' def polygon_area(vertices): n = len(vertices) area=0.0 for i in range(n): x1, y1 = vertices[i] x2, y2 = vertices[(i + 1) % n] area + = (x1 * y2 - x2 * y1) return abs(area) / 2.0 def main(): coco_data = {<!-- --> "info": {<!-- -->}, "licenses": [], "categories": [], "images": [], "annotations": [] } category_mapping = {<!-- -->} category_id = 1 image_id = 1 annotation_id = 1 with open(che_json,'r',encoding='utf-8') as js: json_info = json.load(js) image = {<!-- -->} annotation = {<!-- -->} boxid = 0 for jsfo in tqdm(json_info): image['file_name'] = jsfo['file_name'] img =cv2.imread(test_img + '/' + jsfo['file_name']) image['height'] = img.shape[0] image['width'] = img.shape[1] image['id'] = image_id coco_data["images"].append(image) image = {<!-- -->} category_name = jsfo['syms'] for ii in range(len(category_name)): if category_name[ii] not in category_mapping: category_mapping[category_name[ii]] = category_id coco_data["categories"].append({<!-- --> "supercategory": category_name[ii], "id": category_id, "name": category_name[ii] }) category_id + = 1 box_cnt = len(jsfo['boxes']) for i in range(box_cnt): boxid = boxid + 1 segpnts = [] segtmp = jsfo['polygons'][i] for segt in segtmp: segpnts.append(segt[0]) segpnts.append(segt[1]) segarea = polygon_area(segtmp) annotation['segmentation'] = [segpnts] annotation['image_id'] = image_id annotation['area'] = segarea boxtmp = jsfo['boxes'][i] x_left,y_left,x_br,y_br = boxtmp box_w,box_h = x_br-x_left, y_br-y_left annotation['bbox'] = [x_left,y_left,box_w,box_h] annotation['category_id'] = category_mapping[category_name[i]] annotation['id'] = boxid coco_data["annotations"].append(annotation) annotation = {<!-- -->} image_id = image_id + 1 with open(dst_json,'w') as jsout: json.dump(coco_data, jsout) if __name__=='__main__': main()
Run the training set first or the validation set first to generate the dictionary content of category_mapping. After generation, use the same one to ensure that the labels of the training set and the validation set are consistent.