[torch version – Convolutional Neural Networks (Convolutional Neural Networks) realizes Mnist handwritten digit classification]

[torch version – Convolutional Neural Networks (Convolutional Neural Networks) realizes Mnist handwritten digit classification]

The following code is implemented by Jupyter Notebook

1. Load data

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

size = 28
num_classes = 10
batch_size = 32
learning_rate = 0.005
num_epochs = 50

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

train_dataset = torchvision.datasets.MNIST(root = 'data',
                                            train=True,
                                            transform = torchvision.transforms.ToTensor(),
                                            download=True)
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                            batch_size = batch_size,
                                            shuffle = True)
test_dataset = torchvision.datasets.MNIST(root = 'data',
                                            train = False,
                                            transform = torchvision.transforms.ToTensor(),
                                            download=True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                            batch_size = batch_size,
                                            shuffle = True)
print(len(train_loader),len(test_loader))
cuda:0
1875 313

2. Define LeNet

class LeNet(nn.Module):
    '''This is an initialization function for the LeNet model written in PyTorch.
    LeNet is a classic convolutional neural network, proposed by Yann LeCun et al. in 1998.
    It contains two convolutional layers and three fully connected layers for classifying images.
    The initialization function of the model contains the definition of various network layers that need to be used in the model.
    Among them, super(LeNet, self).init() is used to call the initialization function of the parent class,
    nn.Module is the base class of all neural network models '''
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1,6,3) # This code is a convolution layer definition in PyTorch, which creates a convolution with an input channel of 1, an output channel of 6, and a convolution kernel size of 3x3 layer. input: [1,28,28]
        self.pool1 = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6,16,3)
        self.pool2 = nn.MaxPool2d(2,2)
        self.fc3 = nn.Linear(16*5*5, 120)
        self.fc4 = nn.Linear(120, 84)
        self.fc5 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.relu(x)
        x = self. pool1(x)

        x = self.conv2(x)
        x = torch.relu(x)
        x = self. pool2(x)

        x = x.view(x.size(0), -1) # This is a function for adjusting the shape of the tensor, adjusting the dimension of a multidimensional tensor x from [batch_size, channel, height, width] to [ batch_size, -1]. Where -1 means that the size of the position is automatically derived by the computer to ensure that the size of the tensor remains unchanged. Typically, this function is used to transform the output of a convolutional layer into the input of a fully connected layer for tasks such as classification or regression.
        x = self.fc3(x)
        x = torch.relu(x)
        x = self.fc4(x)
        x = torch.relu(x)
        x = self.fc5(x)

        return x

3. Test LeNet

from torchsummary import summary
model = LeNet().to(device)
summary(model, (1,28,28))

x = torch.randn(1,1,28,28).to(device)
out = model(x)
print(out, out. shape)
---------------------------------------------- -----------------
        Layer (type) Output Shape Param #
==================================================== ===============
            Conv2d-1 [-1, 6, 26, 26] 60
         MaxPool2d-2 [-1, 6, 13, 13] 0
            Conv2d-3 [-1, 16, 11, 11] 880
         MaxPool2d-4 [-1, 16, 5, 5] 0
            Linear-5 [-1, 120] 48,120
            Linear-6 [-1, 84] 10,164
            Linear-7 [-1, 10] 850
==================================================== ===============
Total params: 60,074
Trainable params: 60,074
Non-trainable params: 0
-------------------------------------------------- --------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.06
Params size (MB): 0.23
Estimated Total Size (MB): 0.29
-------------------------------------------------- --------------
tensor([[ 0.0470, -0.1018, -0.0517, -0.0125, 0.0844, 0.1247, 0.0207, 0.1338,
         -0.0425, -0.0684]], device='cuda:0', grad_fn=<AddmmBackward0>) torch.Size([1, 10])

4. Training function

def train(model, num_epochs, optimizer, save_name, device='cpu'): # Here, the default is CPU, and variables with default parameters are placed later to avoid calling errors
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        # train
        model. train()
        train_loss = 0.
        for x,y in train_loader:
            x = x.to(device)
            y = y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            train_loss += loss.item()
            optimizer. zero_grad()
            loss. backward()
            optimizer. step()
        print('Epoch: =/%d, training loss: %.6f,' %(epoch + 1, num_epochs,
                                            train_loss/len(train_loader.dataset)*batch_size), end=' ')
        
        # test
        model.eval() #Close bn and dropout
        with torch.no_grad(): # No need to calculate the gradient
            test_loss = 0.
            error = 0.
            for x,y in test_loader:
                x = x.to(device)
                y = y.to(device)
                outputs = model(x)
                loss = criterion(outputs, y)
                test_loss += loss.item()
                pred = torch.argmax(outputs, axis=1)
                error + = torch.sum((pred!=y).float()).item()
            test_loss /= len(test_loader.dataset)
            error /= len(test_loader.dataset)
            print('test loss: %.6f, test error rate: %.2f%%' %(test_loss, error*100))
        torch. save(model, save_name)

5. Start LeNet training

model2 = LeNet().to(device)
optimizer = torch.optim.SGD(model2.parameters(), lr = learning_rate)
train(model2, num_epochs, optimizer, 'Lenet.pth', device)
Epoch: 1/50, training loss: 2.300986, testing loss: 0.071927, testing error rate: 88.65%
Epoch: 2/50, training loss: 2.294205, testing loss: 0.071585, testing error rate: 88.65%
Epoch: 3/50, training loss: 2.189130, testing loss: 0.048439, testing error rate: 41.12%
Epoch: 4/50, training loss: 0.603036, testing loss: 0.010060, testing error rate: 9.44%
Epoch: 5/50, training loss: 0.295670, testing loss: 0.007042, testing error rate: 6.55%
...
Epoch: 46/50, training loss: 0.017435, testing loss: 0.001094, testing error rate: 1.08%
Epoch: 47/50, training loss: 0.017018, testing loss: 0.001171, testing error rate: 1.22%
Epoch: 48/50, training loss: 0.016646, testing loss: 0.001084, testing error rate: 1.03%
Epoch: 49/50, training loss: 0.015562, testing loss: 0.001299, testing error rate: 1.27%
Epoch: 50/50, training loss: 0.014892, testing loss: 0.001314, testing error rate: 1.30%

6. Define the AlexNet network

class AlexNet(nn.Module):
    '''AlexNet is a convolutional neural network model for computer vision tasks. It was proposed by Google scientists Alex Krizhevsky, Ilya Sutskever and Geoff Hinton in 2012 and is one of the convolutional neural networks that won the ImageNet image recognition competition.
    AlexNet has 5 convolutional layers and 3 fully connected layers. It is based on more than previous methods, such as using ReLU activation function, using Dropout regularization and other methods. In addition, AlexNet also used GPU training, which was a new technology at the time and was not widely used before AlexNet.
    The input of AlexNet is a 224x224 color image, and the output is the classification of the image. On the ImageNet dataset, the error rate of AlexNet is more than 15% lower than that of earlier methods. AlexNet has also become a milestone in deep learning and is of great significance in the development of deep learning.
    Reference: http://t.csdn.cn/nSl8r'''
    def __init__(self, out_size = 10, init_weights = False):
        super(AlexNet, self).__init__()
        # Use nn.Sequential() to package the network into a module to simplify the code
        self.features = nn.Sequential( # Convolution layer extracts image features
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2), # input[1, 28, 28]
            nn.ReLU(inplace=True), #The calculation results generated will not be affected. The use of in-place computing can save internal (display) memory, and can also save the time of repeatedly applying for and releasing memory. But it will overwrite the original variable, as long as it does not bring errors.
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),

            nn.Conv2d(64, 128, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),

            nn.Conv2d(128, 128, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        
        self.classifier = nn.Sequential(
            nn. Dropout(p=0.5),
            nn.Linear(128*3*3,1152),
            nn.ReLU(inplace=True),
            nn. Dropout(p=0.5),
            nn. Linear(1152, 1152),
            nn.ReLU(inplace=True),
            nn.Linear(1152, out_size),
        )

        if init_weights: # Automatic kaiming initialization of convolutional and fully connected layers in pytorch
            self._initialize_weights()
        
    def forward(self, x):
        x = self.features(x) # Convolution layer extracts features
        x = torch.flatten(x, start_dim=1) # The usual arrangement order of tensor in pytorch: [batch, channel, height, width]
        x = self.classifier(x) # fully connected layer classification
        return x
        
    # Network weight initialization, in fact, pytorch will automatically initialize the weight when building the network
    def _initialize_weights(self):
        for m in self. modules():
            if isinstance(m, nn.Conv2d): # If it is a convolutional layer
                '''This condition checks whether the variable "m" is an instance of the PyTorch class "nn". Conv2d".
                If "m" is indeed an instance of this class, then the condition will evaluate to true,
                And the code inside the If statement will be executed. If "m" is not an instance of this class,
                then the condition will evaluate to false, and the code inside the If statement will be skipped. '''
                nn.init.kaiming_normal_(m.weight, mode='fan_out',
                                         nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear): # If it is a fully connected layer
                nn.init.normal_(m.weight, 0, 0.01) # normal distribution
                nn.init.constant_(m.bias, 0)

7. Test AlexNet

from torchsummary import summary
model = AlexNet().to(device)
summary(model, (1,28,28))

x = torch.randn(1,1,28,28).to(device)
out = model(x)
print(out, out. shape)
---------------------------------------------- -----------------
        Layer (type) Output Shape Param #
==================================================== ===============
            Conv2d-1 [-1, 16, 28, 28] 416
              ReLU-2 [-1, 16, 28, 28] 0
         MaxPool2d-3 [-1, 16, 14, 14] 0
            Conv2d-4 [-1, 32, 14, 14] 12,832
              ReLU-5 [-1, 32, 14, 14] 0
         MaxPool2d-6 [-1, 32, 7, 7] 0
            Conv2d-7 [-1, 64, 7, 7] 51,264
              ReLU-8 [-1, 64, 7, 7] 0
            Conv2d-9 [-1, 128, 7, 7] 204,928
             ReLU-10 [-1, 128, 7, 7] 0
           Conv2d-11 [-1, 128, 7, 7] 409,728
             ReLU-12 [-1, 128, 7, 7] 0
        MaxPool2d-13 [-1, 128, 3, 3] 0
          Dropout-14 [-1, 1152] 0
           Linear-15 [-1, 1152] 1,328,256
             ReLU-16 [-1, 1152] 0
          Dropout-17 [-1, 1152] 0
           Linear-18 [-1, 1152] 1,328,256
             ReLU-19 [-1, 1152] 0
           Linear-20 [-1, 10] 11,530
==================================================== ===============
Total params: 3,347,210
Trainable params: 3,347,210
Non-trainable params: 0
-------------------------------------------------- --------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.62
Params size (MB): 12.77
Estimated Total Size (MB): 13.40
-------------------------------------------------- --------------
tensor([[ 0.0068, 0.0046, 0.0296, -0.0015, -0.0043, -0.0235, 0.0388, 0.0007,
          0.0145, -0.0453]], device='cuda:0', grad_fn=<AddmmBackward0>) torch.Size([1, 10])

8. Training AlexNet

model3 = AlexNet(out_size=10,init_weights=True).to(device)
print(model3)
optimizer = torch.optim.SGD(model3.parameters(), lr = learning_rate)
train(model3, num_epochs, optimizer, 'AlexNet.pth', device)
Epoch: 1/50, training loss: 2.301477, testing loss: 0.071999, testing error rate: 88.65%
Epoch: 2/50, training loss: 2.299148, testing loss: 0.071852, testing error rate: 86.17%
Epoch: 3/50, training loss: 2.273426, testing loss: 0.069350, testing error rate: 80.80%
Epoch: 4/50, training loss: 1.308875, testing loss: 0.010009, testing error rate: 9.43%
Epoch: 5/50, training loss: 0.289133, testing loss: 0.004225, testing error rate: 4.40%
...
Epoch: 46/50, training loss: 0.015937, testing loss: 0.000644, testing error rate: 0.70%
Epoch: 47/50, training loss: 0.014567, testing loss: 0.000742, testing error rate: 0.69%
Epoch: 48/50, training loss: 0.014157, testing loss: 0.000673, testing error rate: 0.64%
Epoch: 49/50, training loss: 0.013786, testing loss: 0.000747, testing error rate: 0.82%
Epoch: 50/50, training loss: 0.013212, testing loss: 0.000781, testing error rate: 0.73%

9. Define the InceptionNet network

class InceptionA(nn.Module):
    def __init__(self):
        super(InceptionA, self).__init__()
        self.conv11 = nn.Conv2d(12,6,1)
        self.conv31 = nn.Conv2d(6,6,3,padding=1,stride=2)
        self.conv12 = nn.Conv2d(12,8,1)
        self.conv32 = nn.Conv2d(8,8,3,padding=1)
        self.conv33 = nn.Conv2d(8,8,3,padding=1,stride=2)
        self.pool = nn.MaxPool2d(2,2)
        self.conv13 = nn.Conv2d(12,4,1)

    def forward(self, x):
        '''x: b*12*14*14
        out:b*18*7*7'''
        out1 = self.conv11(x)
        out1 = self.conv31(out1) #(b,6,7,7)
        
        out2 = self.conv12(x)
        out2 = self.conv32(out2)
        out2 = self. conv33(out2) #8
        
        out3 = self. pool(x)
        out3 = self. conv13(out3) #4
        
        return torch.cat([out1,out2,out3], 1) #(b,18,7,7) row splicing
    
class InceptionB(nn.Module):

    def __init__(self):
        super(InceptionB, self).__init__()
        self.conv11 = nn.Conv2d(18,6,1)
        self.conv12 = nn.Conv2d(18,8,1)
        self.conv31 = nn.Conv2d(8,8,3,padding=1)
        self.conv13 = nn.Conv2d(18,8,1)
        self.conv32 = nn.Conv2d(8,8,3,padding=1)
        self.conv33 = nn.Conv2d(8,8,3,padding=1)
        self.pool = nn.MaxPool2d(3,1,padding=1)
        self.conv14 = nn.Conv2d(18,4,1)
        
    def forward(self, x):
        '''x: b*18*7*7
        out:b*26*7*7'''
        out1 = self.conv11(x)
        
        out2 = self.conv12(x)
        out2 = self.conv31(out2)
        
        out3 = self.conv13(x)
        out3 = self.conv32(out3)
        out3 = self.conv33(out3)
        
        out4 = self. pool(x)
        out4 = self.conv14(out4)
        
        return torch.cat([out1,out2,out3,out4], 1)

class InceptionNet(nn.Module):

    def __init__(self):
        super(InceptionNet, self).__init__()
        self.conv1 = nn.Conv2d(1,12,3,padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(12,12,3,padding=1)
        self.inception1 = InceptionA()
        self.inception2 = InceptionB()
        self.conv3 = nn.Conv2d(26,32,3)
        self.avg_pool = nn.AvgPool2d(5)
        self.fc = nn.Linear(32, 10)
    
    def forward(self, x):
        out = self.conv1(x)
        out = self. pool(out)
        out = self.conv2(out)
        out = self.inception1(out)
        out = self.inception2(out)
        out = self.conv3(out)
        out = self.avg_pool(out)
        out = out. view(out. size(0), -1)
        out = self. fc(out)
        out = F.softmax(out, 0)
        return out

10. Test InceptionNet

from torchsummary import summary
model = InceptionNet().to(device)
summary(model, (1,28,28))

x = torch.randn(1,1,28,28).to(device)
out = model(x)
print(out, out. shape)
---------------------------------------------- -----------------
        Layer (type) Output Shape Param #
==================================================== ===============
            Conv2d-1 [-1, 12, 28, 28] 120
         MaxPool2d-2 [-1, 12, 14, 14] 0
            Conv2d-3 [-1, 12, 14, 14] 1,308
            Conv2d-4 [-1, 6, 14, 14] 78
            Conv2d-5 [-1, 6, 7, 7] 330
            Conv2d-6 [-1, 8, 14, 14] 104
            Conv2d-7 [-1, 8, 14, 14] 584
            Conv2d-8 [-1, 8, 7, 7] 584
         MaxPool2d-9 [-1, 12, 7, 7] 0
           Conv2d-10 [-1, 4, 7, 7] 52
       InceptionA-11 [-1, 18, 7, 7] 0
           Conv2d-12 [-1, 6, 7, 7] 114
           Conv2d-13 [-1, 8, 7, 7] 152
           Conv2d-14 [-1, 8, 7, 7] 584
           Conv2d-15 [-1, 8, 7, 7] 152
           Conv2d-16 [-1, 8, 7, 7] 584
           Conv2d-17 [-1, 8, 7, 7] 584
        MaxPool2d-18 [-1, 18, 7, 7] 0
           Conv2d-19 [-1, 4, 7, 7] 76
       InceptionB-20 [-1, 26, 7, 7] 0
           Conv2d-21 [-1, 32, 5, 5] 7,520
        AvgPool2d-22 [-1, 32, 1, 1] 0
           Linear-23 [-1, 10] 330
==================================================== ===============
Total params: 13,256
Trainable params: 13,256
Non-trainable params: 0
-------------------------------------------------- --------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.20
Params size (MB): 0.05
Estimated Total Size (MB): 0.25
-------------------------------------------------- --------------
tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], device='cuda:0',
       grad_fn=<SoftmaxBackward0>) torch. Size([1, 10])

11. Training InceptionNet

model4 = InceptionNet().to(device)
optimizer = torch.optim.SGD(model4.parameters(), lr=0.001)

train(model4, num_epochs, optimizer, 'InceptionNet.pth', device)
Epoch: 1/50, training loss: 2.302589, testing loss: 0.072071, testing error rate: 88.53%
Epoch: 2/50, training loss: 2.302589, testing loss: 0.072071, testing error rate: 88.55%
Epoch: 3/50, training loss: 2.302589, testing loss: 0.072071, testing error rate: 88.34%
Epoch: 4/50, training loss: 2.302588, testing loss: 0.072071, testing error rate: 88.34%
Epoch: 5/50, training loss: 2.302588, testing loss: 0.072071, testing error rate: 88.28%
...
Epoch: 46/50, training loss: 2.302578, testing loss: 0.072071, testing error rate: 86.97%
Epoch: 47/50, training loss: 2.302577, testing loss: 0.072071, testing error rate: 86.93%
Epoch: 48/50, training loss: 2.302577, testing loss: 0.072071, testing error rate: 86.95%
Epoch: 49/50, training loss: 2.302577, testing loss: 0.072071, testing error rate: 86.63%
Epoch: 50/50, training loss: 2.302577, testing loss: 0.072071, testing error rate: 86.65%

12. Define the ResNet residual network

#3x3Convolution
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3,
                     stride=stride, padding=1, bias=False)

#residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self. downsample(x)
        out + = residual
        out = self.relu(out)
        return out
    
#ResNet
class ResNet(nn.Module):
    def __init__(self, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(1, 16)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(16, layers[0])
        self.layer2 = self.make_layer(32, layers[1], 2)
        self.layer3 = self.make_layer(64, layers[2], 2)
        self.avg_pool = nn.AvgPool2d(7)
        self.fc = nn.Linear(64, num_classes)
        
    def make_layer(self, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn. Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(ResidualBlock(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))
        return nn. Sequential(*layers)
    
    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self. layer1(out)
        out = self. layer2(out)
        out = self. layer3(out)
        out = self.avg_pool(out)
        out = out. view(out. size(0), -1)
        out = self. fc(out)
        out = F.softmax(out, dim=1)
        return out

13. Test ResNet

from torchsummary import summary
model = ResNet([2,2,2]).to(device)
summary(model, (1,28,28))

x = torch.randn(1,1,28,28).to(device)
out = model(x)
print(out, out. shape)
---------------------------------------------- -----------------
        Layer (type) Output Shape Param #
==================================================== ===============
            Conv2d-1 [-1, 16, 28, 28] 144
       BatchNorm2d-2 [-1, 16, 28, 28] 32
              ReLU-3 [-1, 16, 28, 28] 0
            Conv2d-4 [-1, 16, 28, 28] 2,304
       BatchNorm2d-5 [-1, 16, 28, 28] 32
              ReLU-6 [-1, 16, 28, 28] 0
            Conv2d-7 [-1, 16, 28, 28] 2,304
       BatchNorm2d-8 [-1, 16, 28, 28] 32
              ReLU-9 [-1, 16, 28, 28] 0
    ResidualBlock-10 [-1, 16, 28, 28] 0
           Conv2d-11 [-1, 16, 28, 28] 2,304
      BatchNorm2d-12 [-1, 16, 28, 28] 32
             ReLU-13 [-1, 16, 28, 28] 0
           Conv2d-14 [-1, 16, 28, 28] 2,304
      BatchNorm2d-15 [-1, 16, 28, 28] 32
             ReLU-16 [-1, 16, 28, 28] 0
    ResidualBlock-17 [-1, 16, 28, 28] 0
           Conv2d-18 [-1, 32, 14, 14] 4,608
      BatchNorm2d-19 [-1, 32, 14, 14] 64
             ReLU-20 [-1, 32, 14, 14] 0
           Conv2d-21 [-1, 32, 14, 14] 9,216
      BatchNorm2d-22 [-1, 32, 14, 14] 64
           Conv2d-23 [-1, 32, 14, 14] 4,608
      BatchNorm2d-24 [-1, 32, 14, 14] 64
             ReLU-25 [-1, 32, 14, 14] 0
    ResidualBlock-26 [-1, 32, 14, 14] 0
           Conv2d-27 [-1, 32, 14, 14] 9,216
      BatchNorm2d-28 [-1, 32, 14, 14] 64
             ReLU-29 [-1, 32, 14, 14] 0
           Conv2d-30 [-1, 32, 14, 14] 9,216
      BatchNorm2d-31 [-1, 32, 14, 14] 64
             ReLU-32 [-1, 32, 14, 14] 0
    ResidualBlock-33 [-1, 32, 14, 14] 0
           Conv2d-34 [-1, 64, 7, 7] 18,432
      BatchNorm2d-35 [-1, 64, 7, 7] 128
             ReLU-36 [-1, 64, 7, 7] 0
           Conv2d-37 [-1, 64, 7, 7] 36,864
      BatchNorm2d-38 [-1, 64, 7, 7] 128
           Conv2d-39 [-1, 64, 7, 7] 18,432
      BatchNorm2d-40 [-1, 64, 7, 7] 128
             ReLU-41 [-1, 64, 7, 7] 0
    ResidualBlock-42 [-1, 64, 7, 7] 0
           Conv2d-43 [-1, 64, 7, 7] 36,864
      BatchNorm2d-44 [-1, 64, 7, 7] 128
             ReLU-45 [-1, 64, 7, 7] 0
           Conv2d-46 [-1, 64, 7, 7] 36,864
      BatchNorm2d-47 [-1, 64, 7, 7] 128
             ReLU-48 [-1, 64, 7, 7] 0
    ResidualBlock-49 [-1, 64, 7, 7] 0
        AvgPool2d-50 [-1, 64, 1, 1] 0
           Linear-51 [-1, 10] 650
==================================================== ===============
Total params: 195,450
Trainable params: 195,450
Non-trainable params: 0
-------------------------------------------------- --------------
Input size (MB): 0.00
Forward/backward pass size (MB): 2.78
Params size (MB): 0.75
Estimated Total Size (MB): 3.52
-------------------------------------------------- --------------
tensor([[0.0604, 0.0980, 0.1416, 0.0921, 0.1122, 0.0830, 0.1059, 0.1051, 0.0831,
         0.1187]], device='cuda:0', grad_fn=<SoftmaxBackward0>) torch.Size([1, 10])

14. Training ResNet

model5 = ResNet([2, 2, 2]).to(device)
print(model5)
optimizer = torch.optim.SGD(model5.parameters(), lr=learning_rate)

train(model5, num_epochs, optimizer, 'ResNet.pth', device)
Epoch: 1/50, training loss: 2.189773, testing loss: 0.063730, testing error rate: 57.93%
Epoch: 2/50, training loss: 1.909820, testing loss: 0.055589, testing error rate: 27.88%
Epoch: 3/50, training loss: 1.652832, testing loss: 0.047946, testing error rate: 3.05%
Epoch: 4/50, training loss: 1.528145, testing loss: 0.047026, testing error rate: 2.33%
Epoch: 5/50, training loss: 1.506562, testing loss: 0.046566, testing error rate: 1.52%
...
Epoch: 46/50, training loss: 1.464764, testing loss: 0.045982, testing error rate: 0.67%
Epoch: 47/50, training loss: 1.464779, testing loss: 0.045961, testing error rate: 0.53%
Epoch: 48/50, training loss: 1.464784, testing loss: 0.045953, testing error rate: 0.50%
Epoch: 49/50, training loss: 1.464439, testing loss: 0.045962, testing error rate: 0.56%
Epoch: 50/50, training loss: 1.464311, testing loss: 0.045947, testing error rate: 0.47%