Realization of Water Body Extraction from Radar Remote Sensing Image Based on U-Net and DeepLabv3+ Model

Reposted from ChatGPT, non-original, used to compare your own models. Not much nonsense, just upload the code~

U-Net model improvements:

Use a deeper network structure, such as adding more convolutional layers and pooling layers, to improve the expressiveness and generalization ability of the model.
Use different loss functions, such as Focal Loss, Dice Loss, etc., to improve the accuracy of the model for edge information.
Data enhancement processing, such as rotation, flipping, scaling, etc., to increase the diversity of data and improve the robustness of the model.

The following is the code of the U-Net model:

import torch.nn as nn

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, in_channels=1, out_channels=1, features=[64, 128, 256, 512]):
        super(UNet, self).__init__()
        self.ups = nn.ModuleList()
        self.downs = nn.ModuleList()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Down part of U-Net
        for feature in features:
            self.downs.append(DoubleConv(in_channels, feature))
            in_channels = feature

        # Up part of U-Net
        for feature in reversed(features):
            self.ups.append(nn.ConvTranspose2d(feature*2, feature, kernel_size=2, stride=2))
            self.ups.append(DoubleConv(feature*2, feature))

        self. bottleneck = DoubleConv(features[-1], features[-1]*2)
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)

    def forward(self, x):
        skip_connections = []
        for down in self. downs:
            x = down(x)
            skip_connections.append(x)
            x = self. pool(x)

        x = self. bottleneck(x)
        skip_connections = skip_connections[::-1]

        for idx in range(0, len(self.ups), 2):
            x = self.ups[idx](x)
            skip_connection = skip_connections[idx//2]

            if x.shape != skip_connection.shape:
                x = TF.resize(x, size=skip_connection.shape[2:])

            concat_skip = torch.cat((skip_connection, x), dim=1)
            x = self.ups[idx + 1](concat_skip)

        return self. final_conv(x)

2. DeepLabv3 + model improvements:

Add the spatial pyramid pooling module to improve the model’s ability to capture information of different scales.
The deformable convolutional layer is used to increase the receptive field of the model and improve the adaptability of the model to the target shape.
Multi-scale training and testing are used to improve the model’s ability to detect objects of different scales.

Below is the code for the DeepLabv3+ model:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import model_zoo

model_urls = {
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
}

class ASPP(nn.Module):
    def __init__(self, in_channels, out_channels=256, rates=[6, 12, 18]):
        super(ASPP, self).__init__()
        self.conv1x1 = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        self.atrous_conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=rates[0], dilation=rates[0])
        self.atrous_conv2 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=rates[1], dilation=rates[1])
        self.atrous_conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=rates[2], dilation=rates[2])
        self.pool = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_channels, out_channels, kernel_size=1)
        )

        self.conv = nn.Conv2d(out_channels*5, out_channels, kernel_size=1)

    def forward(self, x):
        feature_map = self.conv1x1(x)
        atrous_1 = self.atrous_conv1(x)
        atrous_2 = self.atrous_conv2(x)
        atrous_3 = self.atrous_conv3(x)
        pool = F.interpolate(self.pool(x), size=feature_map.shape[2:], mode='bilinear', align_corners=True)

        x = torch.cat((feature_map, atrous_1, atrous_2, atrous_3, pool), dim=1)
        return self.conv(x)

class DeepLabv3Plus(nn.Module):
    def __init__(self, in_channels=3, out_channels=21, backbone='resnet50', pretrained=True):
        super(DeepLabv3Plus, self).__init__()
        if backbone == 'resnet50':
            resnet = models.resnet50(pretrained=pretrained)
            channels = 2048
        elif backbone == 'resnet101':
            resnet = models.resnet101(pretrained=pretrained)
            channels = 2048

        self.conv1 = resnet.conv1
        self.bn1 = resnet.bn1
        self.relu = resnet.relu
        self.maxpool = resnet.maxpool
        self.layer1 = resnet.layer1
        self.layer2 = resnet.layer2
        self.layer3 = resnet.layer3
        self.layer4 = resnet.layer4

        self.aspp = ASPP(channels)

        self.up_conv1 = nn.ConvTranspose2d(channels//2, channels//4, kernel_size=4, stride=2, padding=1)
        self.up_conv2 = nn.ConvTranspose2d(channels//4, channels//8, kernel_size=4, stride=2, padding=1)
        self.up_conv3 = nn.ConvTranspose2d(channels//8, channels//16, kernel_size=4, stride=2, padding=1)

        self.final_conv = nn.Conv2d(channels//16, out_channels, kernel_size=1)

    def forward(self, x):
        x_size = x.size()
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self. maxpool(x)

        x = self. layer1(x)
        x = self. layer2(x)
        x = self. layer3(x)
        x = self. layer4(x)

        x = self.aspp(x)

        x = self.up_conv1(x)
        x = F.interpolate(x, size=self.layer3(x).size()[2:], mode='bilinear', align_corners=True)

        x = torch.cat((x, self.layer3(x)), dim=1)
        x = self.up_conv2(x)
        x = F.interpolate(x, size=self.layer2(x).size()[2:], mode='bilinear', align_corners=True)

        x = torch.cat((x, self.layer2(x)), dim=1)
        x = self.up_conv3(x)
        x = F.interpolate(x, size=self.layer1(x).size()[2:], mode='bilinear', align_corners=True)

        x = torch.cat((x, self.layer1(x)), dim=1)
        x = self. final_conv(x)
        x = F. interpolate(x, size=x_size[2:], mode='bilinear', align_corners=True)

        return x