[torch version – Convolutional Neural Networks (Convolutional Neural Networks) realizes Mnist handwritten digit classification]
The following code is implemented by Jupyter Notebook
1. Load data
import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torchvision size = 28 num_classes = 10 batch_size = 32 learning_rate = 0.005 num_epochs = 50 device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print(device) train_dataset = torchvision.datasets.MNIST(root = 'data', train=True, transform = torchvision.transforms.ToTensor(), download=True) train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True) test_dataset = torchvision.datasets.MNIST(root = 'data', train = False, transform = torchvision.transforms.ToTensor(), download=True) test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True) print(len(train_loader),len(test_loader))
cuda:0 1875 313
2. Define LeNet
class LeNet(nn.Module): '''This is an initialization function for the LeNet model written in PyTorch. LeNet is a classic convolutional neural network, proposed by Yann LeCun et al. in 1998. It contains two convolutional layers and three fully connected layers for classifying images. The initialization function of the model contains the definition of various network layers that need to be used in the model. Among them, super(LeNet, self).init() is used to call the initialization function of the parent class, nn.Module is the base class of all neural network models ''' def __init__(self): super(LeNet, self).__init__() self.conv1 = nn.Conv2d(1,6,3) # This code is a convolution layer definition in PyTorch, which creates a convolution with an input channel of 1, an output channel of 6, and a convolution kernel size of 3x3 layer. input: [1,28,28] self.pool1 = nn.MaxPool2d(2,2) self.conv2 = nn.Conv2d(6,16,3) self.pool2 = nn.MaxPool2d(2,2) self.fc3 = nn.Linear(16*5*5, 120) self.fc4 = nn.Linear(120, 84) self.fc5 = nn.Linear(84, 10) def forward(self, x): x = self.conv1(x) x = torch.relu(x) x = self. pool1(x) x = self.conv2(x) x = torch.relu(x) x = self. pool2(x) x = x.view(x.size(0), -1) # This is a function for adjusting the shape of the tensor, adjusting the dimension of a multidimensional tensor x from [batch_size, channel, height, width] to [ batch_size, -1]. Where -1 means that the size of the position is automatically derived by the computer to ensure that the size of the tensor remains unchanged. Typically, this function is used to transform the output of a convolutional layer into the input of a fully connected layer for tasks such as classification or regression. x = self.fc3(x) x = torch.relu(x) x = self.fc4(x) x = torch.relu(x) x = self.fc5(x) return x
3. Test LeNet
from torchsummary import summary model = LeNet().to(device) summary(model, (1,28,28)) x = torch.randn(1,1,28,28).to(device) out = model(x) print(out, out. shape)
---------------------------------------------- ----------------- Layer (type) Output Shape Param # ==================================================== =============== Conv2d-1 [-1, 6, 26, 26] 60 MaxPool2d-2 [-1, 6, 13, 13] 0 Conv2d-3 [-1, 16, 11, 11] 880 MaxPool2d-4 [-1, 16, 5, 5] 0 Linear-5 [-1, 120] 48,120 Linear-6 [-1, 84] 10,164 Linear-7 [-1, 10] 850 ==================================================== =============== Total params: 60,074 Trainable params: 60,074 Non-trainable params: 0 -------------------------------------------------- -------------- Input size (MB): 0.00 Forward/backward pass size (MB): 0.06 Params size (MB): 0.23 Estimated Total Size (MB): 0.29 -------------------------------------------------- -------------- tensor([[ 0.0470, -0.1018, -0.0517, -0.0125, 0.0844, 0.1247, 0.0207, 0.1338, -0.0425, -0.0684]], device='cuda:0', grad_fn=<AddmmBackward0>) torch.Size([1, 10])
4. Training function
def train(model, num_epochs, optimizer, save_name, device='cpu'): # Here, the default is CPU, and variables with default parameters are placed later to avoid calling errors criterion = nn.CrossEntropyLoss() for epoch in range(num_epochs): # train model. train() train_loss = 0. for x,y in train_loader: x = x.to(device) y = y.to(device) outputs = model(x) loss = criterion(outputs, y) train_loss += loss.item() optimizer. zero_grad() loss. backward() optimizer. step() print('Epoch: =/%d, training loss: %.6f,' %(epoch + 1, num_epochs, train_loss/len(train_loader.dataset)*batch_size), end=' ') # test model.eval() #Close bn and dropout with torch.no_grad(): # No need to calculate the gradient test_loss = 0. error = 0. for x,y in test_loader: x = x.to(device) y = y.to(device) outputs = model(x) loss = criterion(outputs, y) test_loss += loss.item() pred = torch.argmax(outputs, axis=1) error + = torch.sum((pred!=y).float()).item() test_loss /= len(test_loader.dataset) error /= len(test_loader.dataset) print('test loss: %.6f, test error rate: %.2f%%' %(test_loss, error*100)) torch. save(model, save_name)
5. Start LeNet training
model2 = LeNet().to(device) optimizer = torch.optim.SGD(model2.parameters(), lr = learning_rate) train(model2, num_epochs, optimizer, 'Lenet.pth', device)
Epoch: 1/50, training loss: 2.300986, testing loss: 0.071927, testing error rate: 88.65% Epoch: 2/50, training loss: 2.294205, testing loss: 0.071585, testing error rate: 88.65% Epoch: 3/50, training loss: 2.189130, testing loss: 0.048439, testing error rate: 41.12% Epoch: 4/50, training loss: 0.603036, testing loss: 0.010060, testing error rate: 9.44% Epoch: 5/50, training loss: 0.295670, testing loss: 0.007042, testing error rate: 6.55% ... Epoch: 46/50, training loss: 0.017435, testing loss: 0.001094, testing error rate: 1.08% Epoch: 47/50, training loss: 0.017018, testing loss: 0.001171, testing error rate: 1.22% Epoch: 48/50, training loss: 0.016646, testing loss: 0.001084, testing error rate: 1.03% Epoch: 49/50, training loss: 0.015562, testing loss: 0.001299, testing error rate: 1.27% Epoch: 50/50, training loss: 0.014892, testing loss: 0.001314, testing error rate: 1.30%
6. Define the AlexNet network
class AlexNet(nn.Module): '''AlexNet is a convolutional neural network model for computer vision tasks. It was proposed by Google scientists Alex Krizhevsky, Ilya Sutskever and Geoff Hinton in 2012 and is one of the convolutional neural networks that won the ImageNet image recognition competition. AlexNet has 5 convolutional layers and 3 fully connected layers. It is based on more than previous methods, such as using ReLU activation function, using Dropout regularization and other methods. In addition, AlexNet also used GPU training, which was a new technology at the time and was not widely used before AlexNet. The input of AlexNet is a 224x224 color image, and the output is the classification of the image. On the ImageNet dataset, the error rate of AlexNet is more than 15% lower than that of earlier methods. AlexNet has also become a milestone in deep learning and is of great significance in the development of deep learning. Reference: http://t.csdn.cn/nSl8r''' def __init__(self, out_size = 10, init_weights = False): super(AlexNet, self).__init__() # Use nn.Sequential() to package the network into a module to simplify the code self.features = nn.Sequential( # Convolution layer extracts image features nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2), # input[1, 28, 28] nn.ReLU(inplace=True), #The calculation results generated will not be affected. The use of in-place computing can save internal (display) memory, and can also save the time of repeatedly applying for and releasing memory. But it will overwrite the original variable, as long as it does not bring errors. nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(16, 32, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(32, 64, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.Conv2d(64, 128, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.Conv2d(128, 128, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), ) self.classifier = nn.Sequential( nn. Dropout(p=0.5), nn.Linear(128*3*3,1152), nn.ReLU(inplace=True), nn. Dropout(p=0.5), nn. Linear(1152, 1152), nn.ReLU(inplace=True), nn.Linear(1152, out_size), ) if init_weights: # Automatic kaiming initialization of convolutional and fully connected layers in pytorch self._initialize_weights() def forward(self, x): x = self.features(x) # Convolution layer extracts features x = torch.flatten(x, start_dim=1) # The usual arrangement order of tensor in pytorch: [batch, channel, height, width] x = self.classifier(x) # fully connected layer classification return x # Network weight initialization, in fact, pytorch will automatically initialize the weight when building the network def _initialize_weights(self): for m in self. modules(): if isinstance(m, nn.Conv2d): # If it is a convolutional layer '''This condition checks whether the variable "m" is an instance of the PyTorch class "nn". Conv2d". If "m" is indeed an instance of this class, then the condition will evaluate to true, And the code inside the If statement will be executed. If "m" is not an instance of this class, then the condition will evaluate to false, and the code inside the If statement will be skipped. ''' nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): # If it is a fully connected layer nn.init.normal_(m.weight, 0, 0.01) # normal distribution nn.init.constant_(m.bias, 0)
7. Test AlexNet
from torchsummary import summary model = AlexNet().to(device) summary(model, (1,28,28)) x = torch.randn(1,1,28,28).to(device) out = model(x) print(out, out. shape)
---------------------------------------------- ----------------- Layer (type) Output Shape Param # ==================================================== =============== Conv2d-1 [-1, 16, 28, 28] 416 ReLU-2 [-1, 16, 28, 28] 0 MaxPool2d-3 [-1, 16, 14, 14] 0 Conv2d-4 [-1, 32, 14, 14] 12,832 ReLU-5 [-1, 32, 14, 14] 0 MaxPool2d-6 [-1, 32, 7, 7] 0 Conv2d-7 [-1, 64, 7, 7] 51,264 ReLU-8 [-1, 64, 7, 7] 0 Conv2d-9 [-1, 128, 7, 7] 204,928 ReLU-10 [-1, 128, 7, 7] 0 Conv2d-11 [-1, 128, 7, 7] 409,728 ReLU-12 [-1, 128, 7, 7] 0 MaxPool2d-13 [-1, 128, 3, 3] 0 Dropout-14 [-1, 1152] 0 Linear-15 [-1, 1152] 1,328,256 ReLU-16 [-1, 1152] 0 Dropout-17 [-1, 1152] 0 Linear-18 [-1, 1152] 1,328,256 ReLU-19 [-1, 1152] 0 Linear-20 [-1, 10] 11,530 ==================================================== =============== Total params: 3,347,210 Trainable params: 3,347,210 Non-trainable params: 0 -------------------------------------------------- -------------- Input size (MB): 0.00 Forward/backward pass size (MB): 0.62 Params size (MB): 12.77 Estimated Total Size (MB): 13.40 -------------------------------------------------- -------------- tensor([[ 0.0068, 0.0046, 0.0296, -0.0015, -0.0043, -0.0235, 0.0388, 0.0007, 0.0145, -0.0453]], device='cuda:0', grad_fn=<AddmmBackward0>) torch.Size([1, 10])
8. Training AlexNet
model3 = AlexNet(out_size=10,init_weights=True).to(device) print(model3) optimizer = torch.optim.SGD(model3.parameters(), lr = learning_rate) train(model3, num_epochs, optimizer, 'AlexNet.pth', device)
Epoch: 1/50, training loss: 2.301477, testing loss: 0.071999, testing error rate: 88.65% Epoch: 2/50, training loss: 2.299148, testing loss: 0.071852, testing error rate: 86.17% Epoch: 3/50, training loss: 2.273426, testing loss: 0.069350, testing error rate: 80.80% Epoch: 4/50, training loss: 1.308875, testing loss: 0.010009, testing error rate: 9.43% Epoch: 5/50, training loss: 0.289133, testing loss: 0.004225, testing error rate: 4.40% ... Epoch: 46/50, training loss: 0.015937, testing loss: 0.000644, testing error rate: 0.70% Epoch: 47/50, training loss: 0.014567, testing loss: 0.000742, testing error rate: 0.69% Epoch: 48/50, training loss: 0.014157, testing loss: 0.000673, testing error rate: 0.64% Epoch: 49/50, training loss: 0.013786, testing loss: 0.000747, testing error rate: 0.82% Epoch: 50/50, training loss: 0.013212, testing loss: 0.000781, testing error rate: 0.73%
9. Define the InceptionNet network
class InceptionA(nn.Module): def __init__(self): super(InceptionA, self).__init__() self.conv11 = nn.Conv2d(12,6,1) self.conv31 = nn.Conv2d(6,6,3,padding=1,stride=2) self.conv12 = nn.Conv2d(12,8,1) self.conv32 = nn.Conv2d(8,8,3,padding=1) self.conv33 = nn.Conv2d(8,8,3,padding=1,stride=2) self.pool = nn.MaxPool2d(2,2) self.conv13 = nn.Conv2d(12,4,1) def forward(self, x): '''x: b*12*14*14 out:b*18*7*7''' out1 = self.conv11(x) out1 = self.conv31(out1) #(b,6,7,7) out2 = self.conv12(x) out2 = self.conv32(out2) out2 = self. conv33(out2) #8 out3 = self. pool(x) out3 = self. conv13(out3) #4 return torch.cat([out1,out2,out3], 1) #(b,18,7,7) row splicing class InceptionB(nn.Module): def __init__(self): super(InceptionB, self).__init__() self.conv11 = nn.Conv2d(18,6,1) self.conv12 = nn.Conv2d(18,8,1) self.conv31 = nn.Conv2d(8,8,3,padding=1) self.conv13 = nn.Conv2d(18,8,1) self.conv32 = nn.Conv2d(8,8,3,padding=1) self.conv33 = nn.Conv2d(8,8,3,padding=1) self.pool = nn.MaxPool2d(3,1,padding=1) self.conv14 = nn.Conv2d(18,4,1) def forward(self, x): '''x: b*18*7*7 out:b*26*7*7''' out1 = self.conv11(x) out2 = self.conv12(x) out2 = self.conv31(out2) out3 = self.conv13(x) out3 = self.conv32(out3) out3 = self.conv33(out3) out4 = self. pool(x) out4 = self.conv14(out4) return torch.cat([out1,out2,out3,out4], 1) class InceptionNet(nn.Module): def __init__(self): super(InceptionNet, self).__init__() self.conv1 = nn.Conv2d(1,12,3,padding=1) self.pool = nn.MaxPool2d(2,2) self.conv2 = nn.Conv2d(12,12,3,padding=1) self.inception1 = InceptionA() self.inception2 = InceptionB() self.conv3 = nn.Conv2d(26,32,3) self.avg_pool = nn.AvgPool2d(5) self.fc = nn.Linear(32, 10) def forward(self, x): out = self.conv1(x) out = self. pool(out) out = self.conv2(out) out = self.inception1(out) out = self.inception2(out) out = self.conv3(out) out = self.avg_pool(out) out = out. view(out. size(0), -1) out = self. fc(out) out = F.softmax(out, 0) return out
10. Test InceptionNet
from torchsummary import summary model = InceptionNet().to(device) summary(model, (1,28,28)) x = torch.randn(1,1,28,28).to(device) out = model(x) print(out, out. shape)
---------------------------------------------- ----------------- Layer (type) Output Shape Param # ==================================================== =============== Conv2d-1 [-1, 12, 28, 28] 120 MaxPool2d-2 [-1, 12, 14, 14] 0 Conv2d-3 [-1, 12, 14, 14] 1,308 Conv2d-4 [-1, 6, 14, 14] 78 Conv2d-5 [-1, 6, 7, 7] 330 Conv2d-6 [-1, 8, 14, 14] 104 Conv2d-7 [-1, 8, 14, 14] 584 Conv2d-8 [-1, 8, 7, 7] 584 MaxPool2d-9 [-1, 12, 7, 7] 0 Conv2d-10 [-1, 4, 7, 7] 52 InceptionA-11 [-1, 18, 7, 7] 0 Conv2d-12 [-1, 6, 7, 7] 114 Conv2d-13 [-1, 8, 7, 7] 152 Conv2d-14 [-1, 8, 7, 7] 584 Conv2d-15 [-1, 8, 7, 7] 152 Conv2d-16 [-1, 8, 7, 7] 584 Conv2d-17 [-1, 8, 7, 7] 584 MaxPool2d-18 [-1, 18, 7, 7] 0 Conv2d-19 [-1, 4, 7, 7] 76 InceptionB-20 [-1, 26, 7, 7] 0 Conv2d-21 [-1, 32, 5, 5] 7,520 AvgPool2d-22 [-1, 32, 1, 1] 0 Linear-23 [-1, 10] 330 ==================================================== =============== Total params: 13,256 Trainable params: 13,256 Non-trainable params: 0 -------------------------------------------------- -------------- Input size (MB): 0.00 Forward/backward pass size (MB): 0.20 Params size (MB): 0.05 Estimated Total Size (MB): 0.25 -------------------------------------------------- -------------- tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], device='cuda:0', grad_fn=<SoftmaxBackward0>) torch. Size([1, 10])
11. Training InceptionNet
model4 = InceptionNet().to(device) optimizer = torch.optim.SGD(model4.parameters(), lr=0.001) train(model4, num_epochs, optimizer, 'InceptionNet.pth', device)
Epoch: 1/50, training loss: 2.302589, testing loss: 0.072071, testing error rate: 88.53% Epoch: 2/50, training loss: 2.302589, testing loss: 0.072071, testing error rate: 88.55% Epoch: 3/50, training loss: 2.302589, testing loss: 0.072071, testing error rate: 88.34% Epoch: 4/50, training loss: 2.302588, testing loss: 0.072071, testing error rate: 88.34% Epoch: 5/50, training loss: 2.302588, testing loss: 0.072071, testing error rate: 88.28% ... Epoch: 46/50, training loss: 2.302578, testing loss: 0.072071, testing error rate: 86.97% Epoch: 47/50, training loss: 2.302577, testing loss: 0.072071, testing error rate: 86.93% Epoch: 48/50, training loss: 2.302577, testing loss: 0.072071, testing error rate: 86.95% Epoch: 49/50, training loss: 2.302577, testing loss: 0.072071, testing error rate: 86.63% Epoch: 50/50, training loss: 2.302577, testing loss: 0.072071, testing error rate: 86.65%
12. Define the ResNet residual network
#3x3Convolution def conv3x3(in_channels, out_channels, stride=1): return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False) #residual block class ResidualBlock(nn.Module): def __init__(self, in_channels, out_channels, stride=1, downsample=None): super(ResidualBlock, self).__init__() self.conv1 = conv3x3(in_channels, out_channels, stride) self.bn1 = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(out_channels, out_channels) self.bn2 = nn.BatchNorm2d(out_channels) self.downsample = downsample def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample: residual = self. downsample(x) out + = residual out = self.relu(out) return out #ResNet class ResNet(nn.Module): def __init__(self, layers, num_classes=10): super(ResNet, self).__init__() self.in_channels = 16 self.conv = conv3x3(1, 16) self.bn = nn.BatchNorm2d(16) self.relu = nn.ReLU(inplace=True) self.layer1 = self.make_layer(16, layers[0]) self.layer2 = self.make_layer(32, layers[1], 2) self.layer3 = self.make_layer(64, layers[2], 2) self.avg_pool = nn.AvgPool2d(7) self.fc = nn.Linear(64, num_classes) def make_layer(self, out_channels, blocks, stride=1): downsample = None if stride != 1 or self.in_channels != out_channels: downsample = nn. Sequential( conv3x3(self.in_channels, out_channels, stride=stride), nn.BatchNorm2d(out_channels)) layers = [] layers.append(ResidualBlock(self.in_channels, out_channels, stride, downsample)) self.in_channels = out_channels for i in range(1, blocks): layers.append(ResidualBlock(out_channels, out_channels)) return nn. Sequential(*layers) def forward(self, x): out = self.conv(x) out = self.bn(out) out = self.relu(out) out = self. layer1(out) out = self. layer2(out) out = self. layer3(out) out = self.avg_pool(out) out = out. view(out. size(0), -1) out = self. fc(out) out = F.softmax(out, dim=1) return out
13. Test ResNet
from torchsummary import summary model = ResNet([2,2,2]).to(device) summary(model, (1,28,28)) x = torch.randn(1,1,28,28).to(device) out = model(x) print(out, out. shape)
---------------------------------------------- ----------------- Layer (type) Output Shape Param # ==================================================== =============== Conv2d-1 [-1, 16, 28, 28] 144 BatchNorm2d-2 [-1, 16, 28, 28] 32 ReLU-3 [-1, 16, 28, 28] 0 Conv2d-4 [-1, 16, 28, 28] 2,304 BatchNorm2d-5 [-1, 16, 28, 28] 32 ReLU-6 [-1, 16, 28, 28] 0 Conv2d-7 [-1, 16, 28, 28] 2,304 BatchNorm2d-8 [-1, 16, 28, 28] 32 ReLU-9 [-1, 16, 28, 28] 0 ResidualBlock-10 [-1, 16, 28, 28] 0 Conv2d-11 [-1, 16, 28, 28] 2,304 BatchNorm2d-12 [-1, 16, 28, 28] 32 ReLU-13 [-1, 16, 28, 28] 0 Conv2d-14 [-1, 16, 28, 28] 2,304 BatchNorm2d-15 [-1, 16, 28, 28] 32 ReLU-16 [-1, 16, 28, 28] 0 ResidualBlock-17 [-1, 16, 28, 28] 0 Conv2d-18 [-1, 32, 14, 14] 4,608 BatchNorm2d-19 [-1, 32, 14, 14] 64 ReLU-20 [-1, 32, 14, 14] 0 Conv2d-21 [-1, 32, 14, 14] 9,216 BatchNorm2d-22 [-1, 32, 14, 14] 64 Conv2d-23 [-1, 32, 14, 14] 4,608 BatchNorm2d-24 [-1, 32, 14, 14] 64 ReLU-25 [-1, 32, 14, 14] 0 ResidualBlock-26 [-1, 32, 14, 14] 0 Conv2d-27 [-1, 32, 14, 14] 9,216 BatchNorm2d-28 [-1, 32, 14, 14] 64 ReLU-29 [-1, 32, 14, 14] 0 Conv2d-30 [-1, 32, 14, 14] 9,216 BatchNorm2d-31 [-1, 32, 14, 14] 64 ReLU-32 [-1, 32, 14, 14] 0 ResidualBlock-33 [-1, 32, 14, 14] 0 Conv2d-34 [-1, 64, 7, 7] 18,432 BatchNorm2d-35 [-1, 64, 7, 7] 128 ReLU-36 [-1, 64, 7, 7] 0 Conv2d-37 [-1, 64, 7, 7] 36,864 BatchNorm2d-38 [-1, 64, 7, 7] 128 Conv2d-39 [-1, 64, 7, 7] 18,432 BatchNorm2d-40 [-1, 64, 7, 7] 128 ReLU-41 [-1, 64, 7, 7] 0 ResidualBlock-42 [-1, 64, 7, 7] 0 Conv2d-43 [-1, 64, 7, 7] 36,864 BatchNorm2d-44 [-1, 64, 7, 7] 128 ReLU-45 [-1, 64, 7, 7] 0 Conv2d-46 [-1, 64, 7, 7] 36,864 BatchNorm2d-47 [-1, 64, 7, 7] 128 ReLU-48 [-1, 64, 7, 7] 0 ResidualBlock-49 [-1, 64, 7, 7] 0 AvgPool2d-50 [-1, 64, 1, 1] 0 Linear-51 [-1, 10] 650 ==================================================== =============== Total params: 195,450 Trainable params: 195,450 Non-trainable params: 0 -------------------------------------------------- -------------- Input size (MB): 0.00 Forward/backward pass size (MB): 2.78 Params size (MB): 0.75 Estimated Total Size (MB): 3.52 -------------------------------------------------- -------------- tensor([[0.0604, 0.0980, 0.1416, 0.0921, 0.1122, 0.0830, 0.1059, 0.1051, 0.0831, 0.1187]], device='cuda:0', grad_fn=<SoftmaxBackward0>) torch.Size([1, 10])
14. Training ResNet
model5 = ResNet([2, 2, 2]).to(device) print(model5) optimizer = torch.optim.SGD(model5.parameters(), lr=learning_rate) train(model5, num_epochs, optimizer, 'ResNet.pth', device)
Epoch: 1/50, training loss: 2.189773, testing loss: 0.063730, testing error rate: 57.93% Epoch: 2/50, training loss: 1.909820, testing loss: 0.055589, testing error rate: 27.88% Epoch: 3/50, training loss: 1.652832, testing loss: 0.047946, testing error rate: 3.05% Epoch: 4/50, training loss: 1.528145, testing loss: 0.047026, testing error rate: 2.33% Epoch: 5/50, training loss: 1.506562, testing loss: 0.046566, testing error rate: 1.52% ... Epoch: 46/50, training loss: 1.464764, testing loss: 0.045982, testing error rate: 0.67% Epoch: 47/50, training loss: 1.464779, testing loss: 0.045961, testing error rate: 0.53% Epoch: 48/50, training loss: 1.464784, testing loss: 0.045953, testing error rate: 0.50% Epoch: 49/50, training loss: 1.464439, testing loss: 0.045962, testing error rate: 0.56% Epoch: 50/50, training loss: 1.464311, testing loss: 0.045947, testing error rate: 0.47%