Reposted from ChatGPT, non-original, used to compare your own models. Not much nonsense, just upload the code~
- U-Net model improvements:
- Use a deeper network structure, such as adding more convolutional layers and pooling layers, to improve the expressiveness and generalization ability of the model.
- Use different loss functions, such as Focal Loss, Dice Loss, etc., to improve the accuracy of the model for edge information.
- Data enhancement processing, such as rotation, flipping, scaling, etc., to increase the diversity of data and improve the robustness of the model.
The following is the code of the U-Net model:
import torch.nn as nn class DoubleConv(nn.Module): def __init__(self, in_channels, out_channels): super(DoubleConv, self).__init__() self.conv = nn.Sequential( nn.Conv2d(in_channels, out_channels, 3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True), nn.Conv2d(out_channels, out_channels, 3, padding=1), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) ) def forward(self, x): return self.conv(x) class UNet(nn.Module): def __init__(self, in_channels=1, out_channels=1, features=[64, 128, 256, 512]): super(UNet, self).__init__() self.ups = nn.ModuleList() self.downs = nn.ModuleList() self.pool = nn.MaxPool2d(kernel_size=2, stride=2) # Down part of U-Net for feature in features: self.downs.append(DoubleConv(in_channels, feature)) in_channels = feature # Up part of U-Net for feature in reversed(features): self.ups.append(nn.ConvTranspose2d(feature*2, feature, kernel_size=2, stride=2)) self.ups.append(DoubleConv(feature*2, feature)) self. bottleneck = DoubleConv(features[-1], features[-1]*2) self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1) def forward(self, x): skip_connections = [] for down in self. downs: x = down(x) skip_connections.append(x) x = self. pool(x) x = self. bottleneck(x) skip_connections = skip_connections[::-1] for idx in range(0, len(self.ups), 2): x = self.ups[idx](x) skip_connection = skip_connections[idx//2] if x.shape != skip_connection.shape: x = TF.resize(x, size=skip_connection.shape[2:]) concat_skip = torch.cat((skip_connection, x), dim=1) x = self.ups[idx + 1](concat_skip) return self. final_conv(x)
2. DeepLabv3 + model improvements:
- Add the spatial pyramid pooling module to improve the model’s ability to capture information of different scales.
- The deformable convolutional layer is used to increase the receptive field of the model and improve the adaptability of the model to the target shape.
- Multi-scale training and testing are used to improve the model’s ability to detect objects of different scales.
Below is the code for the DeepLabv3+ model:
import torch import torch.nn as nn import torch.nn.functional as F from torch.utils import model_zoo model_urls = { 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', } class ASPP(nn.Module): def __init__(self, in_channels, out_channels=256, rates=[6, 12, 18]): super(ASPP, self).__init__() self.conv1x1 = nn.Conv2d(in_channels, out_channels, kernel_size=1) self.atrous_conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=rates[0], dilation=rates[0]) self.atrous_conv2 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=rates[1], dilation=rates[1]) self.atrous_conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=rates[2], dilation=rates[2]) self.pool = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Conv2d(in_channels, out_channels, kernel_size=1) ) self.conv = nn.Conv2d(out_channels*5, out_channels, kernel_size=1) def forward(self, x): feature_map = self.conv1x1(x) atrous_1 = self.atrous_conv1(x) atrous_2 = self.atrous_conv2(x) atrous_3 = self.atrous_conv3(x) pool = F.interpolate(self.pool(x), size=feature_map.shape[2:], mode='bilinear', align_corners=True) x = torch.cat((feature_map, atrous_1, atrous_2, atrous_3, pool), dim=1) return self.conv(x) class DeepLabv3Plus(nn.Module): def __init__(self, in_channels=3, out_channels=21, backbone='resnet50', pretrained=True): super(DeepLabv3Plus, self).__init__() if backbone == 'resnet50': resnet = models.resnet50(pretrained=pretrained) channels = 2048 elif backbone == 'resnet101': resnet = models.resnet101(pretrained=pretrained) channels = 2048 self.conv1 = resnet.conv1 self.bn1 = resnet.bn1 self.relu = resnet.relu self.maxpool = resnet.maxpool self.layer1 = resnet.layer1 self.layer2 = resnet.layer2 self.layer3 = resnet.layer3 self.layer4 = resnet.layer4 self.aspp = ASPP(channels) self.up_conv1 = nn.ConvTranspose2d(channels//2, channels//4, kernel_size=4, stride=2, padding=1) self.up_conv2 = nn.ConvTranspose2d(channels//4, channels//8, kernel_size=4, stride=2, padding=1) self.up_conv3 = nn.ConvTranspose2d(channels//8, channels//16, kernel_size=4, stride=2, padding=1) self.final_conv = nn.Conv2d(channels//16, out_channels, kernel_size=1) def forward(self, x): x_size = x.size() x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self. maxpool(x) x = self. layer1(x) x = self. layer2(x) x = self. layer3(x) x = self. layer4(x) x = self.aspp(x) x = self.up_conv1(x) x = F.interpolate(x, size=self.layer3(x).size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x, self.layer3(x)), dim=1) x = self.up_conv2(x) x = F.interpolate(x, size=self.layer2(x).size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x, self.layer2(x)), dim=1) x = self.up_conv3(x) x = F.interpolate(x, size=self.layer1(x).size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x, self.layer1(x)), dim=1) x = self. final_conv(x) x = F. interpolate(x, size=x_size[2:], mode='bilinear', align_corners=True) return x