Table of Contents
1. Download the cpp and python version codes of MNIST-demo
2. Read the pytorch code in five minutes
3. Download the MNIST data set and train the model
4. Model serialization and visual analysis
This article uses mnist, a relatively simple deep learning task, to start explaining how libtorch deploys the model. So this is a hands-on tutorial on how to write libtorch code.
1. Download the cpp and python version codes of MNIST-demo
Enter the link: https://github.com/pytorch as shown below: Open example
Download the code under the above example to the local computer, and you can see the python version of mnist in the root directory: main.py; open cpp/mnist, and you can see the c++ version of mnist (implemented by libtorch): mnist.cpp. The above two code files will be used below.
mnist is a demo for handwritten digit recognition, as shown below, to quickly understand the principle.
Two, five minutes to understand the pytorch code
Read thoroughly the above python version code. Open main.py, we know that the input image size is 28*28. The following is an interpretation of the entire MNISTpython official code, which is the training code and has been commented in detail. As shown in the picture below, after execution, the handwritten digital data will be automatically downloaded and then training will begin. (torch1.13.1)
Note: If you cannot automatically download the dataset, please see Chapter 3.
main.py:
from __future__ import print_function import argparse import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.optim.lr_scheduler import StepLR class Net(nn.Module): def __init__(self): # self refers to the class instance object itself (note: not the class itself). # self is not a keyword # super is used for inheritance, https://www.runoob.com/python/python-func-super.html super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.Conv2d(32, 64, 3, 1) self.dropout1 = nn.Dropout(0.25) self.dropout2 = nn.Dropout(0.5) self.fc1 = nn.Linear(9216, 128) self.fc2 = nn.Linear(128, 10) def forward(self, x): # input:28*28 x = self.conv1(x) # -> (28 - 3 + 1 = 26),26*26*32 x = F.relu(x) # input:26*26*32 x = self.conv2(x) # -> (26 - 3 + 1 = 24),24*24*64 # input:24*24*64 x = F.relu(x) x = F.max_pool2d(x, 2)# -> 12*12*64 = 9216 x = self.dropout1(x) #Do not change dimensions x = torch.flatten(x, 1) # 9216*1 # w = 128*9216 x = self.fc1(x) # -> 128*1 x = F.relu(x) x = self.dropout2(x) # w = 10*128 x = self.fc2(x) # -> 10*1 output = F.log_softmax(x, dim=1) # softmax normalization return output def train(args, model, device, train_loader, optimizer, epoch): # When using pytorch to build a neural network, a model.train() sentence will be added above the program during the training process. #The function is to enable batch normalization and drop out. # Model.eval() will be used during the test. At this time, the neural network will use the batch normalization value and will not use drop out. model.train() # You can view the parameter size of the convolution kernel #model.conv1.weight.shape torch.Size([32, 1, 3, 3] i.e.: 32 2D convolution kernels #model.conv2.weight.shape torch.Size([64, 32, 3, 3]) That is: 64 3D convolution kernels for batch_idx, (data, target) in enumerate(train_loader): # train_loader.dataset.data.shape # Out[9]: torch.Size([60000, 28, 28]) # batch_size:64 # data: 64 sample input, torch.Size([64, 1, 28, 28]) # target: 64 labels,torch.Size([64]) data, target = data.to(device), target.to(device) optimizer.zero_grad() # output:torch.Size([64, 10]) output = model(data) # Similar to cross entropy # reference: https://blog.csdn.net/qq_22210253/article/details/85229988 loss = F.nll_loss(output, target) loss.backward() optimizer.step() # Let’s print a convolution kernel parameter to see # print(model.conv2._parameters) if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) if args.dry_run: break def test(model, device, test_loader): model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss + = F.nll_loss(output, target, reduction='sum').item() # sum up batch loss pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability correct + = pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) print('\\ Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\ '.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=2, metavar='N', help='number of epochs to train (default: 14)') parser.add_argument('--lr', type=float, default=1.0, metavar='LR', help='learning rate (default: 1.0)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--dry-run', action='store_true', default=False, help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=True, help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") train_kwargs = {'batch_size': args.batch_size} test_kwargs = {'batch_size': args.test_batch_size} if use_cuda: cuda_kwargs = {'num_workers': 1, 'pin_memory': True, # Lock page memory, which can speed up the speed of memory to video memory 'shuffle': True} train_kwargs.update(cuda_kwargs) test_kwargs.update(cuda_kwargs) # torchvision.transforms is an image preprocessing package in pytorch. Compose is generally used to integrate multiple steps together. # transform = transforms.Compose([ transforms.ToTensor(), # (H x W x C), [0, 255] -> (C x H x W), [0.0, 1.0] transforms.Normalize((0.1307,), (0.3081,)) # Normalization of data ]) dataset1 = datasets.MNIST('data', train=True, download=False, transform=transform) dataset2 = datasets.MNIST('data', train=False, transform=transform) train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs) test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) model = Net().to(device) optimizer = optim.Adadelta(model.parameters(), lr=args.lr) # Fixed step attenuation # reference: https://zhuanlan.zhihu.com/p/93624972 scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) test(model, device, test_loader) scheduler.step() if args.save_model: #torch.save(model.state_dict(), "pytorch_mnist.pt") torch.save(model, "pytorch_mnist.pth") if __name__ == '__main__': main()
3. Download the MNIST data set and train the model
How to download the data set: There are a lot of tutorials on the Internet, but they seem to be of no use. The truth is that the bottom layer of pytorch automatically downloads the fixed website and creates a new folder locally. The relative path of data exists as follows:
I looked at the bottom layer and saw that the next data, xxx, also needed to verify the MD5 code. It was like X. In other words, if you download the compressed package shared by others, pytorch cannot be loaded, I x! But then I found a data set in .pt format. It turned out that after pytorch read the compressed data in .gz format, it converted it into a .pt format file and stored it in the processed folder, as shown below:
Download link: https://github.com/MorvanZhou/PyTorch-Tutorial/tree/master/tutorial-contents-notebooks/mnist/processed
After downloading, just put it in the above-mentioned processed folder. You don’t need to worry about the data set in the original compressed package format. The data in .pt format can be copied and shared with others. The final project directory is as shown below:
Run main.py, execute python code training, and save the model file pytorch_mnist.pth. The test print information is as follows:
...... 184 Train Epoch: 2 [54400/60000 (91%)] Loss: 0.003272 185 Train Epoch: 2 [55040/60000 (92%)] Loss: 0.236524 186 Train Epoch: 2 [55680/60000 (93%)] Loss: 0.087931 187 Train Epoch: 2 [56320/60000 (94%)] Loss: 0.013646 188 Train Epoch: 2 [56960/60000 (95%)] Loss: 0.027721 189 Train Epoch: 2 [57600/60000 (96%)] Loss: 0.100714 190 Train Epoch: 2 [58240/60000 (97%)] Loss: 0.155445 191 Train Epoch: 2 [58880/60000 (98%)] Loss: 0.113110 192 Train Epoch: 2 [59520/60000 (99%)] Loss: 0.039872 193 194 Test set: Average loss: 0.0398, Accuracy: 9864/10000 (99%)
Here is another model testing code in the pytorch and python-opencv environments. The code reads the model file saved by the above training, and then infers the image. Two small pictures are given below the image.
sample graph:
infer.py
from main import Net from torchvision import datasets, transforms from PIL import Image if __name__ == '__main__': device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = torch.load('pytorch_mnist.pth') # Load model model = model.to(device) model.eval() # Convert the model to test mode img = cv2.imread("9.jpg", 0) # Read the grayscale image to be predicted cv2.imshow("img", img) cv2.waitKey(100) img = Image.fromarray(img) trans = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) img = trans(img) img = img.unsqueeze(0) # The image is expanded to one more dimension, [batch_size, channel, length, width], at this time batch_size=1 img = img.to(device) output = model(img) pred = output.max(1, keepdim=True)[1] pred = torch.squeeze(pred) print('The detection result is: %d' % (pred.cpu().numpy()))
4. Model serialization, visual analysis
Based on the above work, we have obtained the model file in .pt format. If we want to load the model in libtorch, we still need to do the model serialization. The following is the serialization code in the python environment. Note: A picture needs to be read. Finally, convert the .pth format file into a .pt format file.
import torch import cv2 import torch.nn.functional as F from main import Net from torchvision import datasets, transforms from PIL import Image if __name__ == '__main__': device = torch.device('cpu') # Use cpu for inference model = torch.load('pytorch_mnist.pth') # Load model model = model.to(device) model.eval() # Convert the model to test mode img = cv2.imread("9.jpg", 0) # Read the grayscale image to be predicted img = Image.fromarray(img) trans = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) img = trans(img) img = img.unsqueeze(0) # Expand the image to one more dimension, [batch_size, channel, length, width] img = img.to(device) traced_net = torch.jit.trace(model, img) traced_net.save("pytorch_mnist.pt") print("Model serialization export successful")
The generated .pt format file will be deployed to the libtorch environment for use, which will be explained in detail in the next section. Finally, we use the link: https://netron.app/ to visualize the above serialized model file (ie: pytorch_mnist.pt), as shown in the figure (here, paste the above network definition code again for convenience Compared):
class Net(nn.Module): def __init__(self): # self refers to the class instance object itself (note: not the class itself). # self is not a keyword # super is used for inheritance, https://www.runoob.com/python/python-func-super.html super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.Conv2d(32, 64, 3, 1) self.dropout1 = nn.Dropout(0.25) self.dropout2 = nn.Dropout(0.5) self.fc1 = nn.Linear(9216, 128) self.fc2 = nn.Linear(128, 10) def forward(self, x): # input:28*28 x = self.conv1(x) # -> (28 - 3 + 1 = 26),26*26*32 x = F.relu(x) # input:26*26*32 x = self.conv2(x) # -> (26 - 3 + 1 = 24),24*24*64 # input:24*24*64 x = F.relu(x) x = F.max_pool2d(x, 2)# -> 12*12*64 = 9216 x = self.dropout1(x) #Do not change dimensions x = torch.flatten(x, 1) # 9216*1 # w = 128*9216 x = self.fc1(x) # -> 128*1 x = F.relu(x) x = self.dropout2(x) # w = 10*128 x = self.fc2(x) # -> 10*1 output = F.log_softmax(x, dim=1) # softmax normalization return output