Image similarity comparison based on Aidlux

Seal testing process:

Use deep neural networks to extract deep features of seals and learn the similarity between seals. One is similar to oneself and one is not similar to others.

1. Siamese Network

The Siamese network is a commonly used deep learning similarity measurement method. It contains two CNN networks that share weights (to put it bluntly, these two networks are actually one network, just build a network in the code), and the two input Map to the same feature space, and then calculate their distance or similarity – using shared convolutional layers and fully connected layers, output feature vector representations, and then calculate the similarity.

2.Triplet Loss Network

TripletLoss network is a method of training a network by comparing the similarity between three samples. It contains three CNN networks with shared weights, which process anchor, positive and negative samples respectively. The positive samples are similar to the anchor and the negative samples are not similar. Learn to map samples of the same category to adjacent areas and samples of different categories to farther areas through triplet training method.

3. Method of this article

This article uses the Lisheng network to input genuine and fake seals at the same time for learning. The similarity between true and true is 1; the similarity between true and false is 0. A loss function (combined with BCELoss and Contrastive Loss) is designed for model training.

Training steps:

1. Place the dataset in the above format and place it under the dataset folder.

2. Set train_own_data in train.py to True.

3. Run train.py to start training, and you can observe the accuracy of the training set and verification set corresponding to the step.

Convert the trained .pth file to an onnx model, and then convert onnx to tflite and dlc models through AIMO.

Aidlux platform deployment

tfilte deployment

import aidlite_gpu
import cv2
from cvs import *
import numpy as np
import os
import time
from PIL import Image
from contrast_utils.utils import letterbox_image, preprocess_input, cvtColor

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

if __name__ == "__main__":

    # 1. Initialize the aidlite class and create the aidlite object
    aidlite = aidlite_gpu.aidlite()
    print("ok")

    # 2. Load model
    w = h = 112
    input_shape = [w, h]
    in_shape = [1 * w * h * 3 * 4, 1 * w * h * 3 * 4]
    out_shape = [1 * 1 * 1 * 4]

    model_path = "/home/aidlux/model/tflite/vgg16_fixed_fp32.tflite"
    value = aidlite.ANNModel(model_path, in_shape, out_shape, 4, 0)
    print("gpu:", value)

    img1_pth = "/home/aidlux/test_imgs/test/false/beijing_2019-11-21_10406_200_200_seal.jpg"
    img2_pth = "/home/aidlux/test_imgs/test/true/beijing_0905_61269575.jpg"
    out = "result"
    os.makedirs(out, exist_ok=True)

    img10 = cv2.imread(img1_pth)
    img20 = cv2.imread(img2_pth)

    img1 = Image.fromarray(cv2.cvtColor(img10, cv2.COLOR_BGR2RGB))
    img2 = Image.fromarray(cv2.cvtColor(img20, cv2.COLOR_BGR2RGB))

    image_1 = letterbox_image(img1, [input_shape[1], input_shape[0]], False)
    image_2 = letterbox_image(img2, [input_shape[1], input_shape[0]], False)

    photo_1 = preprocess_input(np.array(image_1, np.float32))
    photo_2 = preprocess_input(np.array(image_2, np.float32))

    photo_1 = np.expand_dims(np.transpose(photo_1, (2, 0, 1)), 0)
    photo_2 = np.expand_dims(np.transpose(photo_2, (2, 0, 1)), 0)
    
    # 3. Pass in model input data
    # input_data = np.array([photo_1, photo_2])
    aidlite.setInput_Float32(photo_1, index=0)
    aidlite.setInput_Float32(photo_2, index=1)

    # 4. Perform inference
    start = time.time()
    aidlite.invoke()
    end = time.time()
    timerValue = (end - start) * 1000
    print("infer time(ms):{}".format(timerValue))

    # 5. Get output
    pred = aidlite.getOutput_Float32(0)[0]
    print(pred)
    outs = round(sigmoid(pred), 9)
    print(outs)

    img_pair = np.hstack((cv2.resize(img10, (112,112)), cv2.resize(img20, (112,112))))
    h, w = img_pair.shape[:2]
    print('-- + + + ', img_pair.shape)
    h, w = img_pair.shape[:2]
    cv2.putText(img_pair, 'sim:{}'.format(str(outs)), (0, h), cv2.FONT_ITALIC, 1, (255,255,0), 2)

    # from cvs import *
    cvs.imshow(img_pair)
    cv2.imwrite("/home/aidlux/res/adilux_tflite_img_pair.jpg", img_pair)

dlc deployment

import aidlite_gpu
import cv2
from cvs import *
import numpy as np
import os
import time
from PIL import Image
from contrast_utils.utils import letterbox_image, preprocess_input, cvtColor

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

if __name__ == "__main__":

    # 1. Initialize the aidlite class and create the aidlite object
    aidlite = aidlite_gpu.aidlite()
    print("ok")

    # 2. Load model
    w = h = 112
    input_shape = [w, h]
    #rgb3 channel 1 float is 32 bits, which is 4 bytes, each data is 4 bytes, 4 represents 4 bytes
    in_shape = [1 * w * h * 3 * 4, 1 * w * h * 3 * 4]
    out_shape = [1 * 1 * 1 * 4]

    model_path = "/home/aidlux/model/dlc/vgg16_fixed.dlc"
    # value = aidlite.ANNModel(model_path, in_shape, out_shape, numberOfThreads, enableNNAPI)
    #numberOfThreads- int type. The number of cores required to load data and models. Optional values are 1, 2, 3, and 4.
    # enableNNAPI - int type. Select the inference method of the model. The default optional values are -1: inference on CPU, 0: inference on GPU, 1: mixed mode inference, 2: dsp inference mode.

    value = aidlite.ANNModel(model_path, in_shape, out_shape, 4, 0) #Does not support multiple inputs
    # value = aidlite.FAST_ANNModel(model_path, in_shape, out_shape, 4, 0)
    print("gpu:", value)

    img1_pth = "/home/aidlux/test_imgs/test/false/beijing_2019-11-21_10406_200_200_seal.jpg"
    img2_pth = "/home/aidlux/test_imgs/test/true/beijing_0905_61269575.jpg"
    out = "result"
    os.makedirs(out, exist_ok=True)

    img10 = cv2.imread(img1_pth)
    img20 = cv2.imread(img2_pth)

    img1 = Image.fromarray(cv2.cvtColor(img10, cv2.COLOR_BGR2RGB))
    img2 = Image.fromarray(cv2.cvtColor(img20, cv2.COLOR_BGR2RGB))

    image_1 = letterbox_image(img1, [input_shape[1], input_shape[0]], False)
    image_2 = letterbox_image(img2, [input_shape[1], input_shape[0]], False)

    photo_1 = preprocess_input(np.array(image_1, np.float32))
    photo_2 = preprocess_input(np.array(image_2, np.float32))

    photo_1 = np.expand_dims(np.transpose(photo_1, (2, 0, 1)), 0)
    photo_2 = np.expand_dims(np.transpose(photo_2, (2, 0, 1)), 0)
    
    # 3. Pass in model input data
    aidlite.setInput_Float32(photo_1, index=0)
    aidlite.setInput_Float32(photo_2, index=1)

    # 4. Perform inference
    start = time.time()
    aidlite.invoke()
    end = time.time()
    timerValue = (end - start) * 1000
    print("infer time(ms):{}".format(timerValue))

    # 5. Get output
    pred = aidlite.getOutput_Float32(0)[0]
    print(pred)
    outs = round(sigmoid(float(pred)), 9)
    print(outs)

    img_pair = np.hstack((cv2.resize(img10, (112,112)), cv2.resize(img20, (112,112))))
    h, w = img_pair.shape[:2]
    print('-- + + + ', img_pair.shape)
    h, w = img_pair.shape[:2]
    cv2.putText(img_pair, 'sim:{}'.format(str(outs)), (0, h), cv2.FONT_ITALIC, 1, (0,0,255), 2)

    # from cvs import *
    cvs.imshow(img_pair)
    cv2.imwrite("/home/aidlux/res/adilux_dlc_img_pair.jpg", img_pair)

Effect video:

pth to onnx, onnx reasoning, tflite reasoning, tflite and dlc conversion process: model conversion reasoning process_bilibili_bilibili
tflite deployment: Image similarity comparison based on Aidlux – tflite deployment_bilibili_bilibili

dlc deployment: image similarity comparison based on Aidlux – dlc deployment_bilibili_bilibili