Ubuntu20 runs SegNeXt code to extract road water bodies (4) – successfully solved the problem of iou of 0 in training and inference of your own data set! !

In this blog post of mine
Ubuntu20 runs SegNeXt code to extract road water bodies (3) – SegNeXt training and inference on your own data set
After a series of configurations
The iou calculation is 0
After many attempts
Finally, I tried out the correct configuration method!

For specific configuration details, please see this article

1. Initial definition of the data set under mmseg/datasets

I created a new myroaddata.py file
The content inside is:

# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp

import mmcv
import numpy as np
from PIL import Image

from .builder import DATASETS
from .custom import CustomDataset


@DATASETS.register_module()
class MyRoadData(CustomDataset):
    
    CLASSES = ('background','road')

    PALETTE = [[0,0,0],[255, 255, 255]]

    def __init__(self, **kwargs):
    super(MyRoadData, self).__init__(img_suffix='_sat.tif', seg_map_suffix='_mask.png',
                     **kwargs)
    assert osp.exists(self.img_dir)

2. Modify _init_.py in the mmseg/datasets/ directory

Add my custom data set to the original _init_.py

# Copyright (c) OpenMMLab. All rights reserved.
from .ade import ADE20KDataset
from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
from .chase_db1 import ChaseDB1Dataset
from .cityscapes import CityscapesDataset
from .coco_stuff import COCOStuffDataset
from .custom import CustomDataset
from .dark_zurich import DarkZurichDataset
from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset,
                               RepeatDataset)
from .drive import DRIVEDataset
from .hrf import HRFDataset
from .isaid import iSAIDDataset
from .isprs import ISPRSDataset
from .loveda import LoveDADataset
from .night_driving import NightDrivingDataset
from .pascal_context import PascalContextDataset, PascalContextDataset59
from .potsdam import PotsdamDataset
from .stare import STAREDataset
from .voc import PascalVOCDataset
from .myroaddata import MyRoadData

__all__ = [
    'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
    'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
    'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset',
    'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset',
    'STAREDaset', 'DarkZurichDataset', 'NightDrivingDataset',
    'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset',
    'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset','MyRoadData'
]

3. Define data loading under configs/base/datasets

I created a new myroad.py

The content inside is

# dataset settings
dataset_type = 'MyRoadData'
data_root = 'data/MyRoadData'
img_norm_cfg = dict(
    mean=[0.5947, 0.5815, 0.5625], std=[0.1173, 0.1169, 0.1157], to_rgb=True)
img_scale = (512, 512)
crop_size = (256, 256)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations'),
    dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PhotoMetricDistortion'),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=img_scale,
        # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]

data = dict(
    samples_per_gpu=4,
    workers_per_gpu=8,
    train=dict(
        type='RepeatDataset',
        times=40000,
        dataset=dict(
            type=dataset_type,
            data_root=data_root,
            img_dir='images/training',
            ann_dir='annotations/training',
            pipeline=train_pipeline)),
    val=dict(
        type=dataset_type,
        data_root=data_root,
        img_dir='images/validation',
        ann_dir='annotations/validation',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        data_root=data_root,
        img_dir='images/validation',
        ann_dir='annotations/validation',
        pipeline=test_pipeline))

4. Select the model parameters you need to modify under configs/

Select the model parameters you need under configs/ to modify them. Taking pspnet as an example, create a new file pspnet_r50-d8_512x1024_40k_myroaddata.py under configs/pspnet/

_base_ = [
    '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/myroad.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
]

5. Modify pspnet_r50-d8.py under configs/base/models/

# model settings
norm_cfg = dict(type='BN', requires_grad=True)
model = dict(
    type='EncoderDecoder',
    pretrained='open-mmlab://resnet50_v1c',
    backbone=dict(
        type='ResNetV1c',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        dilations=(1, 1, 2, 4),
        strides=(1, 2, 1, 1),
        norm_cfg=norm_cfg,
        norm_eval=False,
        style='pytorch',
        contract_dilation=True),
    decode_head=dict(
        type='PSPHead',
        in_channels=2048,
        in_index=3,
        channels=512,
        pool_scales=(1, 2, 3, 6),
        dropout_ratio=0.1,
        num_classes=19,
        norm_cfg=norm_cfg,
        align_corners=False,
        loss_decode=dict(
            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
    auxiliary_head=dict(
        type='FCNHead',
        in_channels=1024,
        in_index=2,
        channels=256,
        num_convs=1,
        concat_input=False,
        dropout_ratio=0.1,
        num_classes=19,
        norm_cfg=norm_cfg,
        align_corners=False,
        loss_decode=dict(
            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
    # model training and testing settings
    train_cfg=dict(),
    test_cfg=dict(mode='whole'))

6. Return to tools/train.py for training

python tools/train.py configs/pspnet/pspnet_r50-d8_512x1024_40k_myroaddata.py
You can run now

Result graph:

Customized data set format configuration

Create a new MyRoadData folder under the data folder to store the data

Create two new folders again

Create new training and validation folders under annotation and images

annotation-training decentralizes training labels

annotation-validation puts prediction labels

Same reason

images-training puts the original training images

images-validation decentralizes the predicted original image

1. The image format requires 8-bit depth

Note that if it is a 24-bit image, all images must be converted to 8-bit! ! ! !

Otherwise, an error will be reported

The conversion code is as follows

# -*- coding: utf-8 -*-
"""
Created on Wed Oct 4 16:50:20 2022

@author:Laney_Midory
csdn: Laney_Midory
"""
import cv2
import os

import glob
import shutil

import matplotlib.pyplot as plt
import numpy as np

from PIL import Image

import torch
import torch.nn as nn
import torch.utils.data as data
from torch.autograd import Variable as V

import pickle

from time import time



os.environ["CUDA_VISIBLE_DEVICES"] = '0' # Specify the first GPU available

# config.gpu_options.per_process_gpu_memory_fraction = 0.7 # The program can only occupy up to 50% of the video memory of the specified gpu. Comment out this sentence on the server

Image.MAX_IMAGE_PIXELS = None

tar = "/home/wangtianni/SegNeXt-main/SegNeXt-main/data/data/MyRoadData/annotations/training/"
print('Convert 24-bit depth to 8-bit')
mask_names = filter(lambda x: x.find('png')!=-1, os.listdir(tar))
#trainlist = list(map(lambda x: x[:-8], imagelist))


#new_path = "C:/Users/Administrator/Desktop/white/" #Destination folder


for file in mask_names:

    path = tar + file.strip()
    if not os.path.exists(path):
        continue;
    img = Image.open(tar + file)#Read the system’s internal photos
   
    img2 = img.convert('P')
   # print(train_path + '' + base_name[0] + '_mask.png')

    img2.save(path)
   
    #img2.save(new_path + path2 + "_mask.png")
    print("Finish deep change!")
   

2. Pictures must be converted into 0 and 1 formats! !

If you don’t convert it to 0,1 format, it can run, but the result will be wrong.

Because my road is 255 and the background is 0, now I want to change the road to 1 and the background is 0. The code is as follows:

# -*- coding: utf-8 -*-
"""
Created on Wed Oct 4 16:50:20 2022

@author:Laney_Midory
csdn: Laney_Midory
"""
import cv2
import os

import glob
import shutil

import matplotlib.pyplot as plt
import numpy as np

from PIL import Image

import torch
import torch.nn as nn
import torch.utils.data as data
from torch.autograd import Variable as V

import pickle

from time import time


os.environ["CUDA_VISIBLE_DEVICES"] = '0' # Specify the first GPU available

# config.gpu_options.per_process_gpu_memory_fraction = 0.7 # The program can only occupy up to 50% of the video memory of the specified gpu. Comment out this sentence on the server

Image.MAX_IMAGE_PIXELS = None


tar = "/home/wangtianni/SegNeXt-main/data/MyRoadData/annotations/training/"
mask_list = os.listdir(tar)

for file in mask_list:
    i = 0
    j = 0
    
    path = tar + file.strip()
    if not os.path.exists(path):
        continue;
    img = Image.open(tar + file)#Read the system’s internal photos
    

    width = img.size[0]#length
    height = img.size[1]#width
    for i in range(0,width):#Traverse points of all lengths
        for j in range(0,height):#Traverse points of all widths
            data = (img.getpixel((i,j)))#Print all points of the image
            #print (data)#Print the color RGBA value of each pixel (r, g, b)
            #print (data[0])#Print the r value of RGBA
            if(data!=0):
                img.putpixel((i,j),1)
                data = (img.getpixel((i,j)))#Print all points of the image
            print(data)
                #img_array2[i, j] = (0, 0, 0)
    #img = img.convert("RGB")# Force the image to be converted to RGB
    print(path)
    img.save(path)#Save the image after modifying the pixels
print("finish!")
   


If you want to see if the pixel value of your picture is 0 or 1, just print it directly.

3. Modify the mean and variance of the data set! !

This is also very important

Modify myroad.py in SegNeXt-main/configs/_base_/datasets

My modification became

img_norm_cfg = dict(
mean=[0.5947, 0.5815, 0.5625], std=[0.1173, 0.1169, 0.1157], to_rgb=True)

You need to calculate the variance of the image, because if the value is wrong, you still won’t be able to get it out.

At this point, do you think you can run successfully?

If you think so, you are totally wrong.

Error report in operation result

File “/home/wangtianni/.conda/envs/pytorch/lib/python3.6/site-packages/torch/nn/functional.py”, line 2248, in _verify_batch_size
raise ValueError(“Expected more than 1 value per channel when training, got input size {}”.format(size))
ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 512, 1, 1])

The input torchsize is [1,1,512,1]

But the program requires 1 size

I’m very strange

I have obviously set the image to 8-bit

Why does it still report an error?

After watching it for a long time and going through various experiments

Finally I found the solution:

Run step 1 again! ! ! !

4. Run the code in step 1 again to change the image depth to 8 bits

It can run normally!

The knowledge points of the article match the official knowledge files, and you can further learn relevant knowledge. Python entry skill treeHomepageOverview 384,274 people are learning the system