[ACDC Data Set]: Preprocess ACDC cardiac 3D MRI image data set to VOC data set format, convert nii to jpg, label to png

[Segment Anything Model] Column link for segmentation, welcome to learn.
[Blogger WeChat]cvxiaoyixiao
This column is the introduction and preprocessing of public data sets and is being updated continuously.

Article directory

1 ACDC data set introduction
2 ACDC data set sample
3 Preprocessing ACDC target
4 Processing result sample
5 Code
6 Divide test set and training set

1 ACDC data set introduction

It is a multi-category cardiac 3D MRI image data set`, 2017 ACDC Challenge (Automated Cardiac Diagnosis
Challenge).

The original data set is obtained and the network disk is permanently valid:
Link: https://pan.baidu.com/s/1F4Xq1crtUSmFcSKxwO4Eaw?pwd=ejfa Extraction code: ejfa
– Sharing from Baidu Netdisk super member V6

2 ACDC data set sample

First of all, let me boast that the storage format of the ACDC data set is very regular and the data quality is very high. It is clearly divided into three categories.

There are 100 patients in training and 50 patients in testing. The number of slices per nii varies depending on the tumor being delineated.

Info is media information
patient001_4d.nii.gz is all the slices. I don’t know how to describe it medically. It is the three-dimensional image of this patient, including the part without tumors.
patient001_frame01.nii.gz is a compressed package affected by nii in a period
patient001_frame01_gt.nii.gz A compressed package corresponding to the label of the tumor outline of patient001_frame01.nii.gz. Four types of pixels correspond to four categories including background.
patient001_frame12.nii.gz is an image of the same patient in another period
patient001_frame12_gt.nii.gz is the corresponding label

In short, each patient has an overall image, as well as two different images and corresponding labels. The following is the image after opening.

3 Preprocessing ACDC target

Goal:Process ACDC’s nii.gz data set into 2D png images in VOC format
VOC format This article talks about it directly

For example:Process patient001_frame01.nii.gz into several pngs as input to the network image. Process patient001_frame01_gt.nii.gz into several pngs as labels. Only 100 patients from training were processed. regarded training as the entire set and divided training and testing. And it only cares about the first stage of frame01, but not the second stage. I only care about the remaining parts. enough.

4 Processing result sample

Processing result sample image:
Convert img to jpg label to png and the pixels are 0-3. 0 represents background, 1-3 represents classification. The pixels of the label image are too small and cannot be seen here. You can see the third array image.

5 Code

Prepare the VOC folder in advance

"""
Process the ACDC downloaded from the official website to the VOC format required by the network, and call it on demand
author: cvxiayixiao
Wechat: cvxiayixiao
"""
import os
from os.path import join
import nibabel as nib
import gzip
import shutil
import matplotlib.pyplot as plt
import numpy as np

ori_ACDC_train_path = './ACDC_challenge_20170617/training'


def niigz2nii():
    """
    Unzip the nii.gz and gt.nii.gz of each patient's stage 01 to the input folder "ACDC_nii"
    """

    input_path = ori_ACDC_train_path
    # Process image
    # target='frame01.nii'
    # output_path = 'ACDC_nii/images'

    # handle gt
    target = 'frame01_gt.nii'
    output_path = 'ACDC_nii/labels'

    for patient in os.listdir(input_path):
        #ACDC_challenge_20170617/training/patient001
        patient_path = join(input_path, patient)
        for niigz in os.listdir(patient_path):
            if target in niigz:
                niigzpath = join(patient_path, niigz)
                new_nii_path = join(output_path, niigz)
                shutil.copy(niigzpath, new_nii_path)


#niigz2nii()
def convert_nii_to_jpg():
    '''
    Transfer nii in ACDC_nii/images to VOCjpg
    :return:
    '''
    image_num = 0
    nii_path = "./ACDC_nii/images"
    output_dir = "./VOCdevkit/VOC2007/JPEGImages"
    for patient in os.listdir(nii_path):
        patient_path = join(nii_path, patient)
        for one in os.listdir(patient_path):
            one_patient_nii_path = join(patient_path, one)
            # Load .nii file
            nii_img = nib.load(one_patient_nii_path)
            data = nii_img.get_fdata()
            # Traverse each slice of data and save it as a .png file
            for i in range(data.shape[2]):
                image_num + = 1
                # Get the current slice data
                slice_data = data[:, :, i]
                #Create output file path
                num = f"{<!-- -->image_num}".zfill(6)
                output_path = os.path.join(output_dir, f'{<!-- -->patient}_{<!-- -->num}.jpg')
                #Save the slice data as a .png file in grayscale image format
                plt.imsave(output_path, slice_data, cmap='gray')


# convert_nii_to_jpg()

def convert_nii_to_png():
    '''
    Move nii in ACDC_nii/labels to ACDC_nii/tmp_png_label
    The pixels at this time are the pixels in the original label, not the classified pixels in the network, and a conversion is required.
    :return:
    '''
    image_num = 0
    nii_path = "./ACDC_nii/labels"
    output_dir = "./tmp"
    for patient in os.listdir(nii_path):
        patient_path = join(nii_path, patient)
        for one in os.listdir(patient_path):
            one_patient_nii_path = join(patient_path, one)
            # Load .nii file
            nii_img = nib.load(one_patient_nii_path)
            data = nii_img.get_fdata()
            # Traverse each slice of data and save it as a .png file
            for i in range(data.shape[2]):
                image_num + = 1
                # Get the current slice data
                slice_data = data[:, :, i]
                #Create output file path
                num = f"{<!-- -->image_num}".zfill(6)
                output_path = os.path.join(output_dir, f'{<!-- -->patient}_{<!-- -->num}.png')
                #Save the slice data as a .png file in grayscale image format
                plt.imsave(output_path, slice_data, cmap='gray')

from PIL import Image


def turnto255():
    from PIL import Image
    from PIL import Image
    for i in os.listdir("tmp"):
        output_path = os.path.join("./tmp1", i)
        png_path = join("tmp", i)
        # read image
        image = plt.imread(png_path)

        # Take the average of three channels
        im_gray = np.mean(image, axis=2)
        im_gray = Image.fromarray((im_gray * 255).astype(np.uint8)).convert("L")
        # Save output image
        im_gray.save(output_path)


def rename():
    """
    The processed VOC2007 train and test have different names. If the names do not correspond, training cannot be performed.
    Convert to the corresponding one, the previously reserved copy can correspond to the original image
    :return:
    """
    root = "VOCdevkit/VOC2007/SegmentationClass"
    for i in os.listdir(root):
        img_old_path = join(root, i)
        new_png_path = join(root, i[-10:])
        shutil.copy(img_old_path, new_png_path)


#rename()

Convert 0-255 to 0-3 category code and add WeChat cvxiayixiao

6 Divide test set and training set

import os
import random

import numpy as np
from PIL import Image
from tqdm import tqdm

# Modify train_percent to change the ratio of the validation set to 9:1
trainval_percent = 1
train_percent = 0.9
VOCdevkit_path = 'VOCdevkit'

if __name__ == "__main__":
    random.seed(0)
    print("Generate txt in ImageSets.")
    segfilepath = os.path.join(VOCdevkit_path, 'VOC2007/SegmentationClass')
    saveBasePath = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Segmentation')
    
    temp_seg = os.listdir(segfilepath)
    total_seg = []
    for seg in temp_seg:
        if seg.endswith(".png"):
            total_seg.append(seg)

    num = len(total_seg)
    list = range(num)
    tv = int(num*trainval_percent)
    tr = int(tv*train_percent)
    trainval= random.sample(list,tv)
    train = random.sample(trainval,tr)
    
    print("train and val size",tv)
    print("traub suze",tr)
    ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w')
    ftest = open(os.path.join(saveBasePath,'test.txt'), 'w')
    ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w')
    fval = open(os.path.join(saveBasePath,'val.txt'), 'w')
    
    for i in list:
        name = total_seg[i][:-4] + '\\
'
        if i in trainval:
            ftrainval.write(name)
            if i in train:
                ftrain.write(name)
            else:
                fval.write(name)
        else:
            ftest.write(name)
    
    ftrainval.close()
    ftrain.close()
    fval.close()
    ftest.close()

Successfully divided