Network subject, file name u2net
import torch import torch.nn as nn import torch.nn.functional as F # Convolution layer class REBNCONV(nn.Module): def __init__(self, in_ch=3, out_ch=3, dirate=1): super(REBNCONV, self).__init__() # Try the output effect of padding = dilation self.conv_s1 = nn.Conv2d(in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate) self.bn_s1 = nn.BatchNorm2d(out_ch) # inplace=True: The output is passed by address, which is more efficient. self.relu_s1 = nn.ReLU(inplace=True) def forward(self, x): hx = x xout = self.relu_s1(self.bn_s1(self.conv_s1(hx))) return xout # upsampling ## upsample tensor 'src' to have the same spatial size with tensor 'tar' def _upsample_like(src, tar): # bilinear: linear interpolation method #sizeThe size of the target output # size = tar.shape[2:] only refers to the value of HW for interpolation src = F.interpolate(src, size=tar.shape[2:], mode='bilinear') return src ### RSU-7 ### class RSU7(nn.Module): def __init__(self, in_ch=3, mid_ch=12, out_ch=3): super(RSU7, self).__init__() self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1) # Atrous convolution self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2) self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) def forward(self, x): hx = x # hxin finally does the residual hxin = self.rebnconvin(hx) hx1 = self.rebnconv1(hxin) hx = self.pool1(hx1) hx2 = self.rebnconv2(hx) hx = self.pool2(hx2) hx3 = self.rebnconv3(hx) hx = self.pool3(hx3) hx4 = self.rebnconv4(hx) hx = self.pool4(hx4) hx5 = self.rebnconv5(hx) hx = self.pool5(hx5) hx6 = self.rebnconv6(hx) hx7 = self.rebnconv7(hx6) hx6d = self.rebnconv6d(torch.cat((hx7, hx6), dim=1)) hx6dup = _upsample_like(hx6d, hx5) hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), dim=1)) hx5dup = _upsample_like(hx5d, hx4) hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), dim=1)) hx4dup = _upsample_like(hx4d, hx3) hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), dim=1)) hx3dup = _upsample_like(hx3d, hx2) hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), dim=1)) hx2dup = _upsample_like(hx2d, hx1) hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), dim=1)) # Residuals return hx1d + hxin ### RSU-6 ### class RSU6(nn.Module): # UNet06DRES(nn.Module): def __init__(self, in_ch=3, mid_ch=12, out_ch=3): super(RSU6, self).__init__() self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) #ceil_mode=True: ceiling mode, see when the HW of the input data is an odd number self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1) self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2) self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) def forward(self, x): hx = x hxin = self.rebnconvin(hx) hx1 = self.rebnconv1(hxin) hx = self.pool1(hx1) hx2 = self.rebnconv2(hx) hx = self.pool2(hx2) hx3 = self.rebnconv3(hx) hx = self.pool3(hx3) hx4 = self.rebnconv4(hx) hx = self.pool4(hx4) hx5 = self.rebnconv5(hx) hx6 = self.rebnconv6(hx5) hx5d = self.rebnconv5d(torch.cat((hx6, hx5), dim=1)) hx5dup = _upsample_like(hx5d, hx4) hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), dim=1)) hx4dup = _upsample_like(hx4d, hx3) hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), dim=1)) hx3dup = _upsample_like(hx3d, hx2) hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), dim=1)) hx2dup = _upsample_like(hx2d, hx1) hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), dim=1)) return hx1d + hxin ### RSU-5 ### class RSU5(nn.Module): # UNet05DRES(nn.Module): def __init__(self, in_ch=3, mid_ch=12, out_ch=3): super(RSU5, self).__init__() self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1) self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2) self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) def forward(self, x): hx = x hxin = self.rebnconvin(hx) hx1 = self.rebnconv1(hxin) hx = self.pool1(hx1) hx2 = self.rebnconv2(hx) hx = self.pool2(hx2) hx3 = self.rebnconv3(hx) hx = self.pool3(hx3) hx4 = self.rebnconv4(hx) hx5 = self.rebnconv5(hx4) hx4d = self.rebnconv4d(torch.cat((hx5, hx4), dim=1)) hx4dup = _upsample_like(hx4d, hx3) hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), dim=1)) hx3dup = _upsample_like(hx3d, hx2) hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), dim=1)) hx2dup = _upsample_like(hx2d, hx1) hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), dim=1)) return hx1d + hxin ### RSU-4 ### class RSU4(nn.Module): # UNet04DRES(nn.Module): def __init__(self, in_ch=3, mid_ch=12, out_ch=3): super(RSU4, self).__init__() self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1) self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1) self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2) self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1) self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) def forward(self, x): hx = x hxin = self.rebnconvin(hx) hx1 = self.rebnconv1(hxin) hx = self.pool1(hx1) hx2 = self.rebnconv2(hx) hx = self.pool2(hx2) hx3 = self.rebnconv3(hx) hx4 = self.rebnconv4(hx3) hx3d = self.rebnconv3d(torch.cat((hx4, hx3), dim=1)) hx3dup = _upsample_like(hx3d, hx2) hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), dim=1)) hx2dup = _upsample_like(hx2d, hx1) hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), dim=1)) return hx1d + hxin ### RSU-4F ### class RSU4F(nn.Module): # UNet04FRES(nn.Module): def __init__(self, in_ch=3, mid_ch=12, out_ch=3): super(RSU4F, self).__init__() self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1) self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1) self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2) self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4) self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8) self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4) self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2) self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1) def forward(self, x): hx = x hxin = self.rebnconvin(hx) hx1 = self.rebnconv1(hxin) hx2 = self.rebnconv2(hx1) hx3 = self.rebnconv3(hx2) hx4 = self.rebnconv4(hx3) hx3d = self.rebnconv3d(torch.cat((hx4, hx3), dim=1)) hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), dim=1)) hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), dim=1)) return hx1d + hxin ##### U^2-Net #### class U2NET(nn.Module): def __init__(self, in_ch=3, out_ch=1): super(U2NET, self).__init__() # encoder self.stage1 = RSU7(in_ch, 32, 64) self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage2 = RSU6(64, 32, 128) self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage3 = RSU5(128, 64, 256) self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage4 = RSU4(256, 128, 512) self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage5 = RSU4F(512, 256, 512) self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage6 = RSU4F(512, 256, 512) # decoder self.stage5d = RSU4F(1024, 256, 512) self.stage4d = RSU4(1024, 128, 256) self.stage3d = RSU5(512, 64, 128) self.stage2d = RSU6(256, 32, 64) self.stage1d = RSU7(128, 16, 64) # Define 6 output layers output Layer self.side1 = nn.Conv2d(64, out_ch, 3, padding=1) self.side2 = nn.Conv2d(64, out_ch, 3, padding=1) self.side3 = nn.Conv2d(128, out_ch, 3, padding=1) self.side4 = nn.Conv2d(256, out_ch, 3, padding=1) self.side5 = nn.Conv2d(512, out_ch, 3, padding=1) self.side6 = nn.Conv2d(512, out_ch, 3, padding=1) self.outconv = nn.Conv2d(6, out_ch, 1) def forward(self, x): hx = x # -------------------- encoder -------------------- # stage 1 hx1 = self.stage1(hx) hx = self.pool12(hx1) # stage 2 hx2 = self.stage2(hx) hx = self.pool23(hx2) # stage 3 hx3 = self.stage3(hx) hx = self.pool34(hx3) # stage 4 hx4 = self.stage4(hx) hx = self.pool45(hx4) # stage 5 hx5 = self.stage5(hx) hx = self.pool56(hx5) # stage 6 hx6 = self.stage6(hx) hx6up = _upsample_like(hx6, hx5) # -------------------- decoder -------------------- hx5d = self.stage5d(torch.cat((hx6up, hx5), 1)) hx5dup = _upsample_like(hx5d, hx4) hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1)) hx4dup = _upsample_like(hx4d, hx3) hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1)) hx3dup = _upsample_like(hx3d, hx2) hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1)) hx2dup = _upsample_like(hx2d, hx1) hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1)) # side output d1 = self.side1(hx1d) d2 = self.side2(hx2d) d2 = _upsample_like(d2, d1) d3 = self.side3(hx3d) d3 = _upsample_like(d3, d1) d4 = self.side4(hx4d) d4 = _upsample_like(d4, d1) d5 = self.side5(hx5d) d5 = _upsample_like(d5, d1) d6 = self.side6(hx6) d6 = _upsample_like(d6, d1) d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1)) # The model output is a binary image, a binary classification problem return torch.sigmoid(d0), torch.sigmoid(d1), torch.sigmoid(d2), \ torch.sigmoid(d3), torch.sigmoid(d4), torch.sigmoid(d5), torch.sigmoid(d6) ### U^2-Net small ### class U2NETP(nn.Module): # The structure has not changed, but the number of convolution kernels has decreased, so the parameters have decreased. def __init__(self, in_ch=3, out_ch=1): super(U2NETP, self).__init__() self.stage1 = RSU7(in_ch, 16, 64) self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage2 = RSU6(64, 16, 64) self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage3 = RSU5(64, 16, 64) self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage4 = RSU4(64, 16, 64) self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage5 = RSU4F(64, 16, 64) self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.stage6 = RSU4F(64, 16, 64) # decoder self.stage5d = RSU4F(128, 16, 64) self.stage4d = RSU4(128, 16, 64) self.stage3d = RSU5(128, 16, 64) self.stage2d = RSU6(128, 16, 64) self.stage1d = RSU7(128, 16, 64) self.side1 = nn.Conv2d(64, out_ch, 3, padding=1) self.side2 = nn.Conv2d(64, out_ch, 3, padding=1) self.side3 = nn.Conv2d(64, out_ch, 3, padding=1) self.side4 = nn.Conv2d(64, out_ch, 3, padding=1) self.side5 = nn.Conv2d(64, out_ch, 3, padding=1) self.side6 = nn.Conv2d(64, out_ch, 3, padding=1) self.outconv = nn.Conv2d(6, out_ch, 1) def forward(self, x): hx = x # stage 1 hx1 = self.stage1(hx) hx = self.pool12(hx1) # stage 2 hx2 = self.stage2(hx) hx = self.pool23(hx2) # stage 3 hx3 = self.stage3(hx) hx = self.pool34(hx3) # stage 4 hx4 = self.stage4(hx) hx = self.pool45(hx4) # stage 5 hx5 = self.stage5(hx) hx = self.pool56(hx5) # stage 6 hx6 = self.stage6(hx) hx6up = _upsample_like(hx6, hx5) # decoder hx5d = self.stage5d(torch.cat((hx6up, hx5), 1)) hx5dup = _upsample_like(hx5d, hx4) hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1)) hx4dup = _upsample_like(hx4d, hx3) hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1)) hx3dup = _upsample_like(hx3d, hx2) hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1)) hx2dup = _upsample_like(hx2d, hx1) hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1)) # side output d1 = self.side1(hx1d) d2 = self.side2(hx2d) d2 = _upsample_like(d2, d1) d3 = self.side3(hx3d) d3 = _upsample_like(d3, d1) d4 = self.side4(hx4d) d4 = _upsample_like(d4, d1) d5 = self.side5(hx5d) d5 = _upsample_like(d5, d1) d6 = self.side6(hx6) d6 = _upsample_like(d6, d1) d0 = self.outconv(torch.cat((d1, d2, d3, d4, d5, d6), 1)) return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6) if __name__ == '__main__': u2net = U2NET() x = torch.randn(1, 3, 224, 224) y = u2net(x) print(y[0].shape, y[1].shape, y[2].shape, y[3].shape, y[4].shape, y[5].shape, y[6].shape )
The data set was created and included data augmentation with random cut and paste. File name u2net_train_eye
import torch.random from torch.utils.data import Dataset import os import cv2 from PIL import Image import numpy as np from torchvision.utils import save_image from torchvision import transforms class Eye_Dataset(Dataset): def __init__(self, root, isTrain=True, transfrom=None): super(Eye_Dataset, self).__init__() self.isTrain = isTrain self.transfrom = transfrom if isTrain: self.path = root + "/training" else: self.path = root + "/test" self.img_name = os.listdir(self.path + "/images") def __len__(self): return len(self.img_name) def __getitem__(self, index): img_name = self.img_name[index] img_path = self.path + "/images/" + img_name img_data = cv2.imread(img_path) img_data = cv2.cvtColor(img_data, cv2.COLOR_BGR2RGB) img = Image.fromarray(img_data) if self.isTrain: file_name = img_name[0:2] label_name = file_name + "_manual1.gif" label_path = self.path + "/1st_manual/" + label_name label = Image.open(label_path) label_data = np.array(label) ret, label_data = cv2.threshold(label_data, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) label = Image.fromarray(label_data) # Make the image and label random cropping area correspond to each other seed = torch.random.seed() torch.random.manual_seed(seed) img = self.transfrom(img) torch.random.manual_seed(seed) label = self.transfrom(label) return img, label else: return self.transfrom(img) if __name__ == '__main__': transfrom = transforms.Compose([ transforms.Resize((320, 320)), transforms.RandomCrop(288), transforms.ToTensor() ]) eyeDataset = Eye_Dataset(root=r"eye", isTrain=True, transfrom=transfrom) i=1 for (a, b) in eyeDataset: save_image(a, "img/img/img{0}.jpg".format(i), nrow=1) save_image(b, "img/label/label{0}.jpg".format(i), nrow=1) i+=1
Training file, file name u2net_train_eye
import torch import torch.nn as nn from torch.utils.data import DataLoader import torch.optim as optim from torchvision.utils import save_image from u2net import U2NET from eye_dataset import Eye_Dataset from torch.utils.tensorboard import SummaryWriter import os from torchvision import transforms bce_loss = nn.BCELoss(reduction='mean') def muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, labels_v): print(d0.shape) print(labels_v.shape) loss0 = bce_loss(d0, labels_v) loss1 = bce_loss(d1, labels_v) loss2 = bce_loss(d2, labels_v) loss3 = bce_loss(d3, labels_v) loss4 = bce_loss(d4, labels_v) loss5 = bce_loss(d5, labels_v) loss6 = bce_loss(d6, labels_v) loss = loss0 + loss1 + loss2 + loss3 + loss4 + loss5 + loss6 return loss0, loss def main(): transfrom = transforms.Compose([ transforms.Resize((320, 320)), transforms.RandomCrop(288), transforms.ToTensor() ]) DEVICE = "cuda" if torch.cuda.is_available() else "cpu" module = r"saved_models/u2net/net{0}.pth" summerWriter = SummaryWriter("logs") eyeDataset = Eye_Dataset(root=r"eye", isTrain=True, transfrom=transfrom) salobj_dataloader = DataLoader(eyeDataset, batch_size=1, shuffle=True, num_workers=1) net = U2NET() net.to(DEVICE) # Continue training after breakpoint if os.path.exists(module.format(1102)): net.load_state_dict(torch.load(module.format(1102))) print("Loading successful") else: print("no params") optimizer = optim.Adam(net.parameters()) epoch = 1021 d0 = [] img_ = 0 #Training eye data: 3 hours while True: net.train() total_loss = 0.0 for i, (img, label) in enumerate(salobj_dataloader): img = img.to(DEVICE) label = label.to(DEVICE) d0, d1, d2, d3, d4, d5, d6 = net(img) loss2, loss = muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, label) optimizer.zero_grad() loss.backward() optimizer.step() total_loss + = loss.item() avg_loss = total_loss / len(salobj_dataloader) print("loss:", avg_loss, "epoch:", epoch) summerWriter.add_scalar("loss", avg_loss, epoch) torch.save(net.state_dict(), module.format(epoch)) # cuda, unbind j = d0[0] save_image(j.cpu(), f"train_img_u2/{<!-- -->epoch}.jpg", nrow=1) epoch + = 1 if __name__ == "__main__": main()
The project and data set are all here. The data set is the open source DRIVE eyeball data set.
Because it was a project too long ago, I don’t have a deep impression on it. If you need it, you can pick it up yourself.
Link: https://pan.baidu.com/s/13zGWfx6tFN3IBkSy4QARaw?pwd=pscq
Extraction code: pscq
Link: https://pan.baidu.com/s/1n4x-9AGW-axzxtSZepWPyA?pwd=vzzr
Extraction code: vzzr