Leaf 176 classifier (resnet34) based on pytorch

Dataset: Classify Leaves | KaggleTrain models to predict the plant specieshttps://www.kaggle.com/competitions/classify-leaves /data This article recommends using jupyter notebook to run

The following are the libraries required for this training model

import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
import torchvision.models as models
from tqdm import tqdm
import seaborn as sns

View general information

# Change the path to the download path of your own data set
train = 'D:/microsoft_edge/kaggle/uesd_dataset/leaves_train.csv'
test = 'D:/microsoft_edge/kaggle/uesd_dataset/leaves_test.csv'

train_ = pd.read_csv(train)
test_ = pd.read_csv(test)
print(len(train_), len(test_), len(train_) + len(test_))

You can view the following general categories:

def barw(ax):
    for p in ax.patches:
        val = p.get_width() #height
        x = p.get_x() + p.get_width()
        y = p.get_y() + p.get_height()/2
        ax.annotate(round(val,2),(x,y))

plt.figure(figsize = (15,30))
ax0 =sns.countplot(y=labels_dataframe['label'],
                   order=labels_dataframe['label'].value_counts().index)
barw(ax0)
plt.show()

# Save categories to list
leaves_labels = sorted(list(set(labels_dataframe['label'])))
n_classes = len(leaves_labels)
print(leaves_labels[:10])
print(n_classes)

class_to_num = dict(zip(leaves_labels, range(n_classes)))

# Convert "Number:Category" to "Category:Number" to facilitate subsequent use
num_to_class = {i: j for j, i in class_to_num.items()}

Rewrite your own Dataset:

# Inherit pytorch’s dataset and create your own
class LeavesData(Dataset):
    def __init__(self, csv_path, file_path, mode, valid_ratio=0.2):

        self.file_path = file_path
        self.mode = mode

        self.data_info = pd.read_csv(csv_path)
        # Calculate length
        self.data_len = len(self.data_info.index) - 1
        self.train_len = int(self.data_len * (1 - valid_ratio))
        
        if mode == 'train':
            self.train_image = np.asarray(self.data_info.iloc[1:self.train_len, 0])
            self.train_label = np.asarray(self.data_info.iloc[1:self.train_len, 1])
            self.image_arr = self.train_image
            self.label_arr = self.train_label
            
        elif mode == 'valid':
            self.valid_image = np.asarray(self.data_info.iloc[self.train_len:, 0])
            self.valid_label = np.asarray(self.data_info.iloc[self.train_len:, 1])
            self.image_arr = self.valid_image
            self.label_arr = self.valid_label
            
        elif mode == 'test':
            self.test_image = np.asarray(self.data_info.iloc[1:, 0])
            self.image_arr = self.test_image
            
        self.real_len = len(self.image_arr)

        print(f'Successfully read {mode} data set {self.real_len} data')

    def __getitem__(self, index):
        # Get the file name corresponding to the index from image_arr
        single_image_name = self.image_arr[index]

        #Read image file
        img_as_img = Image.open(self.file_path + single_image_name)

        #Set the variables that need to be converted, and also include a series of nomarlize and other operations
        if self.mode == 'train':
            transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(p=0.5), #Random horizontal flip choose a probability
                transforms.ToTensor()
            ])
        else:
            #valid and test do not perform data enhancement
            transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor()
            ])
        
        img_as_img = transform(img_as_img)
        
        if self.mode == 'test':
            return img_as_img
        else:
            # Get the string label of the image
            label = self.label_arr[index]
            # number label
            number_label = class_to_num[label]

            return img_as_img, number_label #Return the image data corresponding to each index and the corresponding label

    def __len__(self):
        return self.real_len

# Because the name of the picture itself contains image, img_path only needs to write the upper level
train_path = 'D:/microsoft_edge/kaggle/uesd_dataset/leaves_train.csv'
test_path = 'D:/microsoft_edge/kaggle/uesd_dataset/leaves_test.csv'
img_path = 'D:/microsoft_edge/kaggle/uesd_dataset/'

dataset = {x : LeavesData(train_path, img_path, mode=x) for x in['train', 'valid']}
test_dataset = LeavesData(test_path, img_path, mode='test')

You can verify whether Dataloader can read data in batches

# Display data
def im_convert(tensor):
    image = tensor.to("cpu").clone().detach()
    image = image.numpy().squeeze()
    image = image.transpose(1,2,0)
    image = image.clip(0, 1)

    return image

fig = plt.figure(figsize=(20, 12))
columns = 4
rows = 2

inputs, classes = next(iter(dataloader['valid']))

for idx in range(columns*rows):
    ax = fig.add_subplot(rows, columns, idx + 1, xticks=[], yticks=[])
    ax.set_title(num_to_class[int(classes[idx])])
    plt.imshow(im_convert(inputs[idx]))
plt.show()

Start the training module:

evice = 'cuda'
feature_extracting = False

#Set convolutional layer weights that do not require training
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False


# resnet34 model
def initialize_model(num_classes, feature_extracting, use_pretrained=True):

    model_ft = models.resnet34(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extracting)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, num_classes))

    return model_ft

Some variables:

The model is not complicated and does not require weight decay. Of course, you can also try it to see if it has any impact on the accuracy.

learning_rate = 3e-4
weight_decay = 0
num_epoch = 20
model_path = 'D:\microsoft_edge\kaggle\model/leaf_cls.ckpt'

model = initialize_model(n_classes, feature_extracting)
criterion = nn.CrossEntropyLoss()
optimizer_fn = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

Training module:

The tqdm library can visualize the training process

def train_model(model, num_epoch, detaloader, criterion, optimizer, model_path):
    best_acc = 0.0
    model = model.to(device)
    
    for epoch in range(num_epoch):
        
        for phase in ['train', 'valid']:
            
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            running_loss = []
            running_acc = []
            
            for imgs, labels in tqdm(dataloader[phase]):
                imgs = imgs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(imgs)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                acc = (outputs.argmax(dim=-1) == labels).float().mean()
                running_loss.append(loss.item())
                running_acc.append(acc)
                
            epoch_loss = sum(running_loss) / len(running_loss)
            epoch_acc = sum(running_acc) / len(running_acc)
            
            print(f"[{phase}|{epoch + 1:02d}/{num_epoch}] loss:{epoch_loss:.4f}, acc:{epoch_acc:.4f}")
            
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                torch.save(model.state_dict(), model_path)
                print(f'[Accuracy {best_acc:.4f} model has been saved]')

train_model(model, num_epoch, dataloader, criterion, optimizer_fn, model_path)

save_file = 'D:\microsoft_edge\kaggle\csv_submission/leaves_sub.csv'

model = initialize_model(176, feature_extracting)

model = model.to(device)
model.load_state_dict(torch.load(model_path))

model.eval()

predictions = []

for batch in tqdm(test_loader):
    imgs=batch
    imgs = imgs.to(device)
    
    with torch.no_grad():
        outputs = model(imgs)
        
    predictions.extend(outputs.argmax(dim=-1).cpu().numpy().tolist())
    
pre=[]
for i in predictions:
    pre.append(num_to_class[i])
    
test_data = pd.read_csv(test_path)
test_data['label'] = pd.Series(pre)
submission = pd.concat([test_data['image'], test_data['label']], axis=1)
submission.to_csv(save_file, index=False)
print("Done")

Finish

The knowledge points of the article match the official knowledge files, and you can further learn relevant knowledge. Python entry skill treeArtificial intelligenceDeep learning 385338 people are learning the system