Use pytorch to build ResNet18 network to train CIFAR100 data set

Notes

1. When extracting specific classes for training, you can use the following methods:

class_indices = list(range(10, 20)) + list(range(50, 60)) + list(range(80,90)) #Define the category number to be extracted
#Extract training and test samples based on category numbers
The labels of the #CIFAR-100 data set start from 0, so the category number [10, 19] corresponds to the label index [10, 20),
#Category number [50, 59] corresponds to label index [50, 60). Make sure you define category numbers that correspond to the categories in your dataset.
#train_sampler and test_sampler, they are custom samplers. Samplers are used to determine which samples from the dataset are selected for training and testing.
#Here, train_sampler and test_sampler use train_indices and test_indices respectively to select the corresponding samples.
train_indices = [i for i in range(len(train_data)) if train_data.targets[i] in class_indices]
train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indices) #Create a custom sampler and select only samples containing the selected category

test_indices = [i for i in range(len(test_data)) if test_data.targets[i] in class_indices]
test_sampler = torch.utils.data.sampler.SubsetRandomSampler(test_indices)

print(len(train_sampler))
print(len(test_sampler))

#Use dataloader to load the data set
train= DataLoader(train_data, batch_size=64, sampler=train_sampler) #As can be seen from the source code explanation below, if the sampler is not the default None, there is no need to set the shuffle attribute.
test = DataLoader(test_data, batch_size=64, sampler=test_sampler)

2. Each data in the original data set consists of two parts: data (picture) and targets (label). It is called through train_data.data and train_data.targets. The dimension (shape) of the picture is (32, 32, 3). The test code is as follows :

# Test output
print(train_data.targets[0]) #Output the first label value, which is 19, corresponding to the cow label
print(type(train_data.targets)) # <class 'list'>, the data set label type is a list
print(train_data.data[0].shape) #(32, 32, 3) Dimensions of the original data set image
plt.imshow(train_data.data[0]) #Output the picture of the cow
plt.show()

3. After using DataLoader to process the data set, the dimension (shape) of the image becomes torch.Size([3, 32, 32]), which is the dimension and type acceptable to the neural network, so if you need to output the data after processing by DataLoader The picture of the data needs to be dimensionally converted. The specific operations are as follows:

examples = enumerate(test) #Combine a traversable data object (such as a list, tuple or string) into an index sequence, and list the data and data subscripts at the same time, generally used in for loops.
batch_idx, (example_imgs, example_labels) = next(examples) #next is used to return the next item of the iterator, which is equivalent to taking out the first batch of test_data
print(batch_idx) #0
print(example_imgs[0].shape) #torch.Size([3, 32, 32]), after passing through DataLoader, the dimension of the data will change from 32, 32, 3 to 3, 32, 32, which means the neural network can accept it Dimensions
print(example_labels[0].shape) #torch.Size([])
fig = plt.figure()
for i in range(64):
    img=example_imgs[i] #3,32,32 are the dimensions of the original image data
    img=np.transpose(img,(1,2,0)) #32,32,3 need to be converted to this dimension to output a 3-channel image, that is, a color image
    plt.subplot(8, 8, i + 1)
    plt.imshow(img)
# plt.savefig('CIFAR100')
plt.show()

4. The accuracy of the traditional ResNet18 network training CIFAR100 is not high, so this article has made optimizations, mainly changing the convolution kernel of the first convolution layer and the parameters of the maximum pooling layer; the selection of the optimizer; and the setting of the learning rate. ; Dropout settings.

The network structure diagram is as follows (the parameters on the diagram have not been modified, please refer to the code for specific parameter settings):

Specific code

The specific code is as follows:

import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pickle
import time

if torch.cuda.is_available():
    device=torch.device("cuda")
else:
    device=torch.device("cpu")

mean = [0.5070751592371323, 0.48654887331495095, 0.4409178433670343]
std = [0.2673342858792401, 0.2564384629170883, 0.27615047132568404]

transforms_fn=torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean, std)
])

#100 classes containing 600 images each. There are 500 training images and 100 testing images per class.
#Training set
train_data=torchvision.datasets.CIFAR100('./cifar100_data',train=True,transform=transforms_fn,download=True)
#test set
test_data=torchvision.datasets.CIFAR100('./cifar100_data',train=False,transform=transforms_fn,download=True)

train_data_size=len(train_data)
test_data_size=len(test_data)
print("The length of the training data set is {}".format(train_data_size))
print("The length of the test data set is {}".format(test_data_size))

#Test output
print(train_data.targets[0]) #Output the first label value, which is 19, corresponding to the cow label
print(type(train_data.targets)) # <class 'list'>, the data set label type is a list
print(train_data.data[0].shape) #(32, 32, 3) Dimensions of the original data set image
plt.imshow(train_data.data[0]) #Output the picture of the cow
plt.show()

class_indices = list(range(10, 20)) + list(range(50, 60)) + list(range(80,90)) #Define the category number to be extracted
#Extract training and test samples based on category numbers
The labels of the #CIFAR-100 data set start from 0, so the category number [10, 19] corresponds to the label index [10, 20),
#Category number [50, 59] corresponds to label index [50, 60). Make sure you define category numbers that correspond to the categories in your dataset.
#train_sampler and test_sampler, they are custom samplers. Samplers are used to determine which samples from the dataset are selected for training and testing.
#Here, train_sampler and test_sampler use train_indices and test_indices respectively to select the corresponding samples.
train_indices = [i for i in range(len(train_data)) if train_data.targets[i] in class_indices]
train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indices) #Create a custom sampler and select only samples containing the selected category

test_indices = [i for i in range(len(test_data)) if test_data.targets[i] in class_indices]
test_sampler = torch.utils.data.sampler.SubsetRandomSampler(test_indices)

print(len(train_sampler))
print(len(test_sampler))

#Use dataloader to load the data set
train= DataLoader(train_data, batch_size=64, sampler=train_sampler) #As can be seen from the source code explanation below, if the sampler is not the default None, there is no need to set the shuffle attribute.
test = DataLoader(test_data, batch_size=64, sampler=test_sampler)



examples = enumerate(test) #Combine a traversable data object (such as a list, tuple or string) into an index sequence, and list the data and data subscripts at the same time, generally used in for loops.
batch_idx, (example_imgs, example_labels) = next(examples) #next is used to return the next item of the iterator, which is equivalent to taking out the first batch of test_data
print(batch_idx) #0
print(example_imgs[0].shape) #torch.Size([3, 32, 32]), after passing through DataLoader, the dimension of the data will change from 32, 32, 3 to 3, 32, 32, which means the neural network can accept it Dimensions
print(example_labels[0].shape) #torch.Size([])
fig = plt.figure()
for i in range(64):
    img=example_imgs[i] #3,32,32 are the dimensions of the original image data
    img=np.transpose(img,(1,2,0)) #32,32,3 need to be converted to this dimension to output a 3-channel image, that is, a color image
    plt.subplot(8, 8, i + 1)
    plt.imshow(img)
# plt.savefig('CIFAR100')
plt.show()

# examples = enumerate(test_data) #Combine a traversable data object (such as a list, tuple or string) into an index sequence, and list the data and data subscripts at the same time, generally used in for loops.
# batch_idx, (example_data, example_targets) = next(examples)
# fig = plt.figure()
# for i in range(100):
# plt.subplot(4,25,i + 1)
# plt.tight_layout()
# plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
# plt.title("Ground Truth: {}".format(example_targets[i]))
# plt.xticks([])
# plt.yticks([])
# plt.show()

#Define the residual block ResBlock
class ResBlock(nn.Module):
    def __init__(self,inchannel,outchannel,stride=1):
        super(ResBlock, self).__init__()
        #Define two consecutive convolutional layers in the residual block
        self.block_conv=nn.Sequential(
            nn.Conv2d(inchannel,outchannel,kernel_size=3,stride=stride,padding=1),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(),
            #nn.MaxPool2d(2),
            nn.Conv2d(outchannel,outchannel,kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(outchannel)
        )

        # shortcut part
        # Since there are inconsistencies in dimensions, it is divided into different situations.
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                # Set the convolution kernel to 1 to perform dimensionality enhancement and reduction.
                # Note that the transition is always when stride!=1, that is, every time the output channel is dimensioned.
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self,x):
        out1=self.block_conv(x)
        out2=self.shortcut(x) + out1
        out2=F.relu(out2) #F.relu() is a function call, generally used in the forward function. And nn.ReLU() is a module call, generally used when defining the network layer.
        return out2


#BuildRESNET18
class ResNet_18(nn.Module):
    def __init__(self,ResBlock,num_classes):
        super(ResNet_18, self).__init__()

        self.in_channels = 64 #channel when inputting layer1
        #First layer of separate convolution layer
        self.conv1=nn.Sequential(
            # (n-f + 2*p)/s + 1,n=28,n=32
            # nn.Conv2d(in_channels=3,out_channels=64,kernel_size=7,stride=2,padding=3),
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1), #64
            nn.BatchNorm2d(64),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
            nn.MaxPool2d(kernel_size=1, stride=1, padding=0) #64
            #nn.Dropout(0.25)
        )

        self.layer1=self.make_layer(ResBlock,64,2,stride=1) #64
        self.layer2 = self.make_layer(ResBlock, 128, 2, stride=2) #32
        self.layer3 = self.make_layer(ResBlock, 256, 2, stride=2) #16
        self.layer4 = self.make_layer(ResBlock, 512, 2, stride=2) #8


        self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) #torch.nn.AdaptiveAvgPool2d() accepts two parameters, which are the length and width of the output feature map. The number of channels does not change before and after.
                                                    #That is, the input image pixels are forced to be converted to 1*1.
        # self.linear=nn.Linear(2*2*512,512)
        # self.linear2=nn.Linear(512,100)

        self.linear=nn.Linear(512*1*1,num_classes)

        self.dropout = nn.Dropout(0.3)

    # This function is mainly used to repeat the same residual block
    def make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        x=self.conv1(x)
        # x=self.dropout(x)
        x=self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x=self.avgpool(x)
        x = x.view(x.size(0), -1)
        x=self.linear(x)
        x=self.dropout(x)

        return x

#networkmodel
model=ResNet_18(ResBlock,num_classes=100)
model.to(device)
print(model)
#Loss function
loss_fn=nn.CrossEntropyLoss() #For cross_entropy, he will first perform a log_softmax operation on the input, and then send the result of log_softmax(input) to nll_loss; and the input of nll_loss is input.
#In multi-classification problems, if you use nn.CrossEntropyLoss(), there is no need to add a softmax layer to the output layer of the prediction model! ! !
#If it is F.nll_loss, you need to add a softmax layer!!!
loss_fn.to(device)

learning_rate=0.01

optimizer=torch.optim.SGD(params=model.parameters(),lr=learning_rate, momentum=0.9,weight_decay=0.0001)


train_acc_list = []
train_loss_list = []
test_acc_list = []
test_loss_list=[]
epochs=50

for epoch in range(epochs):
    print("-----The {}th round of training begins------".format(epoch + 1))
    train_loss=0.0
    test_loss=0.0
    train_sum,train_cor,test_sum,test_cor=0,0,0,0

    #Training step starts
    model.train()
    for batch_idx,(data,target) in enumerate(train):
        data,target=data.to(device),target.to(device)

        optimizer.zero_grad() # To clear the gradient, because if the gradient is not cleared, pytorch will accumulate the gradient calculated last time and the gradient calculated this time.
        # output = model(data)
        output = model(data)
        # loss = loss_fn(output, target)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step() # Update all parameters

        # Calculate the Loss of each round of training set
        train_loss + = loss.item()

        _, predicted = torch.max(output.data, 1) # The number of columns where the largest (probability) value is selected is the number of categories it corresponds to.
        # train_cor + = (predicted == target).sum().item() # Number of correct classifications
        train_cor + = (predicted == target).sum().item() # Number of correct classifications
        train_sum + = target.size(0) # train_sum + =predicted.shape[0]

    #Test step begins
    model.eval()
    # with torch.no_grad():
    for batch_idx1,(data,target) in enumerate(test):
        data, target = data.to(device), target.to(device)

        output = model(data)
        loss = loss_fn(output, target)
        test_loss + =loss.item()
        _, predicted = torch.max(output.data, 1)
        test_cor + = (predicted == target).sum().item()
        test_sum + = target.size(0)

    print("Train loss:{} Train accuracy:{}% Test loss:{} Test accuracy:{}%".format(train_loss/batch_idx,100*train_cor/train_sum,
                                                                                       test_loss/batch_idx1,100*test_cor/test_sum))
    train_loss_list.append(train_loss / batch_idx)
    train_acc_list.append(100 * train_cor / train_sum)
    test_acc_list.append(100 * test_cor/ test_sum)
    test_loss_list.append(test_loss / batch_idx1)

#savenetwork
torch.save(model,"CIFAR100_epoch{}.pth".format(epochs))



plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
fig=plt.figure()
plt.plot(range(len(train_loss_list)),train_loss_list,'blue')
plt.plot(range(len(test_loss_list)),test_loss_list,'red')
plt.legend(['training loss','test loss'],fontsize=14,loc='best')
plt.xlabel('Number of training rounds',fontsize=14)
plt.ylabel('Loss value',fontsize=14)
plt.grid()
# plt.savefig('CIFAR100_figLOSS_6')
plt.show()

fig=plt.figure()
plt.plot(range(len(train_acc_list)),train_acc_list,'blue')
plt.plot(range(len(test_acc_list)),test_acc_list,'red')
plt.legend(['Training accuracy','Test accuracy'],fontsize=14,loc='best')
plt.xlabel('Number of training rounds',fontsize=14)
plt.ylabel('Accuracy (%)',fontsize=14)
plt.grid()
# plt.savefig('CIFAR100_figAccuracy_6')
plt.show()

Training results