Code implementation of resnet residual network

Table of Contents

What is resnet residual network

Code

Download Document

Import required libraries

ResNet18 model defined

Load training and test data

Training model

operation result


What is resnet residual network

ResNet is a deep residual network proposed byMicrosoft researchers. It adopts the residual learning method and solves the problems of gradient disappearance and gradient explosion in deep networks by introducing “skip connection”. This effectively solves the problem of improving network performance by increasing network depth.

The core idea is: Imagine learning an identity mapping. If it is difficult to learn the mapping by directly fitting it, you can build a shallower network with the same mapping. Through this new network and “Jump links” between old networks for better learning.

For detailed explanation, please click this link to learn

Code Implementation

File Download

food_dataset2 train.txt test.txt

Import required libraries

# Import Python’s time module for timing and delay
import time
# Import the PyTorch library, which is a powerful deep learning framework
import torch
# Import the nn module from the torch library, which is the neural network module of PyTorch
from torch import nn
#Import data-related modules from the torch library, including data sets and data loaders
from torch.utils.data import Dataset,DataLoader
#Import the NumPy library, which is a Python library for numerical calculations and is often used to process multi-dimensional arrays and matrices
import numpy as np
#Import the Image module in the PIL library for image processing
from PIL import Image
#Import the models module in the torchvision library, which contains some pre-trained deep learning models
import torchvision.models as models
#Import the transforms module in the torchvision library, which contains some methods for image preprocessing
from torchvision import transforms

ResNet18 model defined

# Import the ResNet18 model and use the default pre-training weights
resnet_model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
# Traverse all parameters in the model and set their requires_grad attribute to False so that these parameters will not be updated by gradient descent during training.
for param in resnet_model.parameters(): #'#
    param.requires_grad=False #'#
  
# Get the number of input features of the last layer (fully connected layer) of the ResNet18 model
in_features = resnet_model.fc.in_features
# Replace the last layer of the model with a new fully connected layer, and the number of output features is 20
resnet_model.fc = nn.Linear(in_features,20)
  
# Create an empty list to save the parameters that need to be trained. This mainly refers to the parameters of the fully connected layer.
param_to_updata = [] #Save the parameters that need to be trained, including only the parameters of the fully connected layer
# Iterate through all parameters in the model again
for param in resnet_model.parameters(): #'#'
    # If the requires_grad attribute of the parameter is True, add it to the param_to_updata list
    if param.requires_grad == True: #'#
        param_to_updata.append(param) #'#
  
# Define data transformation, which mainly involves a series of transformations on image data, including size adjustment, rotation, cropping, flipping, brightness and contrast adjustment, grayscale conversion and standardization
data_transforms = { #You can also use the PIL library and smote to manually fit the data
    'train':
        transforms.Compose([
        transforms.Resize([300,300]), #is the image transformation size
        transforms.RandomRotation(45),#Random rotation, randomly selected between -45 and 45 degrees
        transforms.CenterCrop(256),#Crop from the center[256,256]
        transforms.RandomHorizontalFlip(p=0.5),#Random horizontal flip choose a probability probability
        transforms.RandomVerticalFlip(p=0.5),#Random vertical flip
        transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1), #Parameter 1 is brightness, parameter 2 is contrast, parameter 3 is saturation, parameter 4 is hue
        transforms.RandomGrayscale(p=0.1),#Probability is converted into grayscale rate, 3 channels are R=G=B
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])#Standardization, mean, standard deviation
    ]),
    'valid':
        transforms.Compose([
        transforms.Resize([256,256]),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

Load training and test data

# Define a custom class named food_dataset. This class inherits torch.utils.data.Dataset. This class can be changed according to your needs.
class food_dataset(Dataset):
    #Initialization method of the class, called when creating an instance of the class
    def __init__(self, file_path, transform=None):
        # Save file path
        self.file_path = file_path
        #Initialize empty picture list and tag list
        self.imgs = []
        self.labels = []
        # Save data conversion operation, default is None
        self.transform = transform
        # Open the file path and read the data line by line. Each line of data is separated by spaces and saved to the samples list.
        with open(self.file_path) as f:
            samples = [x.strip().split(' ') for x in f.readlines()]
            # Traverse each sample, save the image path to the imgs list, and save the label to the labels list
            for img_path, label in samples:
                self.imgs.append(img_path)
                self.labels.append(label)
  
    # Return the size of the data set, that is, the number of pictures
    def __len__(self):
        return len(self.imgs)
  
    # Get a single sample in the data set through the index and return the sample data and its corresponding label
    def __getitem__(self, idx):
        #Open the image corresponding to the image path and save it to the image object
        image = Image.open(self.imgs[idx])
        # If there is a data conversion operation, convert the image
        if self.transform:
            image = self.transform(image)
        # Get the label, where the label may be of string type, convert it to an integer type and save it to the label object
        label = self.labels[idx]
        label = torch.from_numpy(np.array(label, dtype = np.int64))
        # Return images and tags as tuples
        return image, label
  
# Instantiate the food_dataset class and create a training data set object. The input parameters are the training data file path 'train.txt' and the training data transformation operation data_transforms['train']
training_data = food_dataset(file_path = 'train.txt', transform = data_transforms['train'])
# Instantiate the food_dataset class and create a test data set object. The input parameters are the test data file path 'test.txt' and the test data transformation operation data_transforms['valid']
test_data = food_dataset(file_path = 'test.txt', transform = data_transforms['valid'])
  
# Select the device based on device availability. If CUDA is available, use CUDA. Otherwise, if MPS is available, use MPS. Otherwise, use CPU.
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")
  
# Load the pre-trained ResNet model into the model object. The import process of the model is not shown here and may have been completed before.
model = resnet_model.to(device)
  
#Create a cross-entropy loss function object for loss calculation during training
loss_fn = nn.CrossEntropyLoss()
  
#Create an Adam optimizer object with the parameters to be updated and the learning rate (learning rate) 0.001
optimizer =torch.optim.Adam(param_to_updata, lr=0.001)

def train(dataloader, model, loss_fn, optimizer):
    model.train()
#pytorch provides 2 ways to switch between training and testing modes: model.train() and model.eval().
# The general usage is: write model.trian() before training starts, and model.eval() during testing.
#batch_size_num = 1
    for X, y in dataloader: #where batch is the number of each data
        X, y = X.to(device), y.to(device) #Transfer the training data set and labels to the cpu or GPU
        pred = model.forward(X) #Automatically initialize w weight
        loss = loss_fn(pred, y) #Calculate the loss value loss through the cross entropy loss function
        # Backpropagation comes in with a batch of data, calculates the gradient once, and updates the network once.
        optimizer.zero_grad() #Clear the gradient value to zero
        loss.backward() #Backward propagation calculates the gradient value of each parameter
        optimizer.step() #Update network parameters according to gradient

        # loss = loss.item() #Get the loss value
        # print(f"loss: {loss:>7f} [number:{batch_size_num}]")
        # batch_size_num + = 1
best_acc=0
def test(dataloader, model, loss_fn):
    global best_acc
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval() #
    test_loss, correct = 0, 0
    with torch.no_grad(): #A context manager that turns off gradient calculation. When you confirm that Tensor.backward() will not be called. This reduces the memory consumption used by calculations.
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model.forward(X)
            test_loss + = loss_fn(pred, y).item() #
            correct + = (pred.argmax(1) == y).type(torch.float).sum().item()
            a = (pred.argmax(1) == y) #dim=1 represents the index number corresponding to the maximum value in each row, dim=0 represents the index number corresponding to the maximum value in each column
            b = (pred.argmax(1) == y).type(torch.float)
    test_loss /= num_batches
    correct /= size
    print(f"Test result: \\
 Accuracy: {(100*correct)}%, Avg loss: {test_loss}")
    acc_s.append(correct)
    loss_s.append(test_loss)

    if correct > best_acc:
        best_acc = correct

Training Model

epochs = 20
acc_s = []
loss_s = []
start_time = time.time()
for t in range(epochs):
    train_dataloader = DataLoader(training_data,batch_size=64,shuffle=True)
    test_dataloader = DataLoader(test_data,batch_size=64,shuffle=True)
    print(f'Epoch{t + 1}\\
--------------Training----------------- ----')
    train(train_dataloader,model,loss_fn,optimizer)
    test(test_dataloader,model,loss_fn)
    end_time_epochs = time.time()
    all_time = end_time_epochs - start_time
    print(f'all time is :{all_time:.2f} seconds')

print('The optimal training result is:',best_acc)

Run result