MNIST algorithm model generated by GPT4 and subsequent improvement experiments

1. Model generated by GPT4

The MNIST algorithm model was generated with GPT4. The first model, after searching, found that the content of a certain textbook is almost exactly the same. It feels like this guy must have referenced a ready-made database in the background. Regenerated one, I feel that the structure is a bit weird, and there are some errors, a little adjustment, training, the accuracy rate is about 82%.

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

# Set hyperparameters, including learning rate, training times and the size of each batch, etc.
learning_rate = 0.001
num_epochs = 100
batch_size = 128
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR10 data, and perform data enhancement and preprocessing
transform = transforms. Compose(
    [transforms. RandomCrop(32, padding=4),
     transforms.RandomHorizontalFlip(),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# define the model class
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(256 * 4 * 4, 1024)
        self.dropout = nn.Dropout()
        self.fc2 = nn.Linear(1024, 10)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = self.bn1(x)
        x = self. pool1(x)
        x = nn.functional.relu(self.conv2(x))
        x = self.bn2(x)
        x = self. pool2(x)
        x = nn.functional.relu(self.conv3(x))
        x = self.bn3(x)
        x = self. pool3(x)
        x = x. view(-1, 256 * 4 * 4)
        x = nn.functional.relu(self.fc1(x))
        x = self. dropout(x)
        x = self.fc2(x)
        return x

# Define the model, loss function and optimizer used
net = Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optimize.Adam(params=net.parameters(),lr=learning_rate)

# train the model
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer. zero_grad()
        outputs = net(inputs)
        # print(inputs. shape, outputs. shape, labels. shape)
        # print(labels)
        loss = criterion(outputs, labels)
        loss. backward()
        optimizer. step()

        running_loss += loss.item()
        if i % 50 == 49: # Output statistics every 50 batches
            print('[%d, ]] loss: %.3f' % (epoch + 1, i + 1, running_loss / 50))
            running_loss = 0.0

# Test the model accuracy on the test set
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total + = labels. size(0)
        correct + = (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

After running it for a while, the accuracy is 85%.

... ?
[100, 300] loss: 0.120
[100, 350] loss: 0.133
Accuracy of the network on the 10000 test images: 85 %

2. An “ideal” CNN model I made

In the form of 3x32x32 -> 64x16x16 -> 128x8x8 -> 256x4x4 -> 512x2x2 -> 1024x1x1 -> fc1 -> fc2 -> fc3, the code is as follows:

# Define the model class
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.pool4 = nn.MaxPool2d(2, 2)
        
        self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.pool5 = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(1024, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = self. pool1(x)

        x = nn.functional.relu(self.conv2(x))
        x = self. pool2(x)
        
        x = nn.functional.relu(self.conv3(x))
        x = self. pool3(x)
        
        x = nn.functional.relu(self.conv4(x))
        x = self. pool4(x)

        x = nn.functional.relu(self.conv5(x))
        x = self. pool5(x)

        x = x. view(-1, 1024)
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x

After training for a long time, test the results. Unfortunately, the accuracy rate is only 10%, which means that there is no recognition ability. Analyzing the reason, it should be that the learning rate is too small. Trying to increase the learning rate, the training process is too long, so I gave up this idea and accelerated the convergence speed by adding the BN layer.

Supplement: After studying this model carefully, I found that the learning rate needs to be greatly reduced to converge. After one night of training, the recognition accuracy rate increased to 82%. This score is lower than the model provided by GPT4, but the model complexity is much higher. The specific reasons need to be further explored.

3. Add BN layer

The BN layer reduces the size of the state space by standardizing the calculation results of each layer, thereby improving the convergence speed. code show as below

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(512)
        self.pool4 = nn.MaxPool2d(2, 2)
        
        self.conv5 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(1024)
        self.pool5 = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(1024, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = self.bn1(x)
        x = self. pool1(x)

        x = nn.functional.relu(self.conv2(x))
        x = self.bn2(x)
        x = self. pool2(x)
        
        x = nn.functional.relu(self.conv3(x))
        x = self.bn3(x)
        x = self. pool3(x)
        
        x = nn.functional.relu(self.conv4(x))
        x = self.bn4(x)
        x = self. pool4(x)

        x = nn.functional.relu(self.conv5(x))
        x = self.bn5(x)
        x = self. pool5(x)

        x = x. view(-1, 1024)
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x

Sure enough, it lived up to expectations and gave a recognition accuracy of 87%.

Accuracy of the network on the 10000 test images: 87 %

Supplement: After a longer training period, it was confirmed that the maximum accuracy rate can be increased to 88%. When exploring how to improve the accuracy rate, we have to think about many details and the working mechanism of the neural network, urging us to understand this kind of technology from a deeper level.