“Image merge” creates data out of nothing

When working on a new project, there is often a lack of real data, which makes it impossible to train the model. At this time, algorithm engineers need to create some data by themselves. For example, in the bag target detection shared in this article, there is no real bag in the detection area. data

At this time, you can use image splicing to paste the collected bag images into the scene image [premise: the target image is usually a “photo sticker”]. Of course, not all locations in the scene image can be pasted, and there are generally specific areas. , such as the ground, walls, certain equipment, etc. Therefore, it is necessary to use annotation tools to mark these target areas. The algorithm reads the corresponding target area, randomly sets the coordinate points in the area for pasting, and pastes the target map into the scene map. among

Of course, not any form of target image can be pasted. In order to better integrate the scene, the target needs to be extracted from the photo sticker, that is, except for the target area, other areas are set to a transparent format, so that the splicing effect is ” More “more real” [still very fake! ! ! 】

The specific operation process is shown. The code is attached at the end. We apologize for any bugs!

1. Make a negative template

The blogger uses the LabelImg marking tool to mark the area of the film to be pasted.

display interface of labelImg

Annotate the completed txt content

2. Process the corresponding photo header image

“Buckle” the target from the jpg image. The pixel cutout used here sets the white area as transparent. This is not universal. Friends with different targets can modify the code by themselves. The png image format can save the alpha channel.

# !/usr/bin/env python
# -*- coding:utf-8 -*-
# @Time : 2023.11
# @Author : green feather
# @Email: [email protected]
# @Blog: https://blog.csdn.net/ViatorSun
# @Note: jpg2png.py



import os
import cv2
import numpy as np
import os.path as osp



file_path = "/media/yinzhe/DataYZ/DataSet/DataSet/bag_masknew"
save_path = file_path + "_out"

if not osp.exists(save_path):
    os.makedirs(save_path)

img_lst = []
for path, dirs, files in os.walk(file_path):
    for file in files:
        if os.path.splitext(file)[1] in ['.jpg', ".png", ".JPG", ".jpeg"]: # Scan files in the specified format
            img_dir = osp.join(path, file)
            img_save = osp.join(save_path, file)
            img = cv2.imread(img_dir)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #Convert to RGB format
            png_img = osp.join(save_path, file.split(".")[0] + ".png")

            white_mask = cv2.inRange(img, (200, 200, 200), (255, 255, 255)) # Extract the white part
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGRA)

            img[:,:,3][white_mask == 255] = 0 # Make the white part transparent

            cv2.imwrite(png_img, img)

3. Overlay and fuse the film with the transparent target

There are many methods of overlay and fusion. Here we only use the simplest overlay method. You can also consider cv2.seamlessClone
The final result is shown below

In addition to generating synthetic images, annotation information is also generated simultaneously.

Attention! The annotation information here is saved according to the size of the annotation box [hide bugs here for subsequent optimization]
Tip: You can save based on the non-transparent area of the target area

# !/usr/bin/env python
# -*- coding:utf-8 -*-
# @Time : 2023.10
# @Author : green feather
# @Email: [email protected]
# @Blog: https://blog.csdn.net/ViatorSun
# @Note: ps_merge_img.py



import os
import cv2
import random
from random import sample
import numpy as np
import argparse




def read_label_txt(label_dir):
    labels = []
    with open(label_dir) as fp:
        for f in fp.readlines():
            labels.append(f.strip().split(' '))
    return labels

def rescale_yolo_labels(labels, img_shape):
    height, width, nchannel = img_shape
    rescale_boxes = []
    for box in list(labels):
        x_c = float(box[1]) * width
        y_c = float(box[2]) * height
        w = float(box[3]) * width
        h = float(box[4]) * height
        x_left = x_c - w * .5
        y_left = y_c - h * .5
        x_right = x_c + w * .5
        y_right = y_c + h * .5
        rescale_boxes.append([box[0], int(x_left), int(y_left), int(x_right), int(y_right)])
    return rescale_boxes

def xyxy2xywh(image, bboxes):
    height, width, _ = image.shape
    boxes = []
    for box in boxes:
        if len(box) < 4:
            continue
        cls = int(box[0])
        x_min = box[1]
        y_min = box[2]
        x_max = box[3]
        y_max = box[4]
        w = x_max - x_min
        h = y_max - y_min
        x_c = (x_min + x_max) / 2.0
        y_c = (y_min + y_max) / 2.0
        x_c = x_c / width
        y_c = y_c / height
        w = float(w) / width
        h = float(h) / height
        boxes.append([cls, x_c, y_c, w, h])
    return boxes

def cast_color(img, value):
    img_t = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
    h,s,v = cv2.split(img_t)
    # Increase image contrast
    v2 = np.clip(cv2.add(2*v,value),0,255)
    img2 = np.uint8(cv2.merge((h,s,v2)))
    img_cast = cv2.cvtColor(img2,cv2.COLOR_HSV2BGR) #Change image contrast
    return img_cast

def brightness(img, value):
    img_t = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
    h,s,v = cv2.split(img_t)
    # Increase image brightness
    v1 = np.clip(cv2.add(1*v,value),0,255)
    img1 = np.uint8(cv2.merge((h,s,v1)))
    img_brightness = cv2.cvtColor(img1,cv2.COLOR_HSV2BGR) # Change the image brightness
    return img_brightness


def add_alpha_channel(img):
    """ Add alpha channel to jpg image """

    b_channel, g_channel, r_channel = cv2.split(img) # Split jpg image channels
    alpha_channel = np.ones(b_channel.shape, dtype=b_channel.dtype) * 255 # Create Alpha channel

    img_new = cv2.merge((b_channel, g_channel, r_channel, alpha_channel)) # Merge channel
    return img_new


def merge_img(jpg_img, png_img, y1, y2, x1, x2):
    """ Overlay png transparent images with jpg images
        y1, y2, x1, x2 are the superimposed position coordinate values
    """

    # Determine whether the jpg image is already 4 channels
    if jpg_img.shape[2] == 3:
        jpg_img = add_alpha_channel(jpg_img)

    '''
    When superimposing images, the boundary of the png image may exceed the background jpg image due to improper setting of the superimposition position, and the program will report an error.
    A series of overlay position restrictions are set here to meet the requirements that when png images exceed the range of jpg images, they can still be overlaid normally.
    '''
    yy1 = 0
    yy2 = png_img.shape[0]
    xx1 = 0
    xx2 = png_img.shape[1]

    if x1 < 0:
        xx1 = -x1
        x1 = 0
    if y1 < 0:
        yy1 = - y1
        y1 = 0
    if x2 > jpg_img.shape[1]:
        xx2 = png_img.shape[1] - (x2 - jpg_img.shape[1])
        x2 = jpg_img.shape[1]
    if y2 > jpg_img.shape[0]:
        yy2 = png_img.shape[0] - (y2 - jpg_img.shape[0])
        y2 = jpg_img.shape[0]

    # Get the alpha value of the image to be overwritten, divide the pixel value by 255, so that the value remains between 0-1
    alpha_png = png_img[yy1:yy2, xx1:xx2, 3] / 255.0
    alpha_jpg = 1 - alpha_png

    # Start overlay
    for c in range(0, 3):
        jpg_img[y1:y2, x1:x2, c] = ((alpha_jpg * jpg_img[y1:y2, x1:x2, c]) + (alpha_png * png_img[yy1:yy2, xx1:xx2, c]))

    return jpg_img


def random_add_patches_on_objects(image, mask_lst, rescale_boxes, paste_number):

    img = image.copy()
    new_bboxes = []
    cl=0

    random.shuffle(rescale_boxes)

    for i, rescale_bbox in enumerate(rescale_boxes[:int(len(mask_lst))]): # The image to be ps is in the target box

        p_img = mask_lst[i]
        bbox_h, bbox_w, bbox_c = p_img.shape

        obj_xmin, obj_ymin, obj_xmax, obj_ymax = rescale_bbox[1:]
        obj_w = obj_xmax - obj_xmin + 1 # Target box size
        obj_h = obj_ymax - obj_ymin + 1

        while not (bbox_w < obj_w and bbox_h < obj_h): # If the target box is smaller than the mask size, scale the mask to ensure that it can be placed in the bbox
            new_bbox_w = int(bbox_w * random.uniform(0.5, 0.8))
            new_bbox_h = int(bbox_h * random.uniform(0.5, 0.8))
            bbox_w, bbox_h = new_bbox_w, new_bbox_h

        success_num = 0
        while success_num < paste_number:
            center_search_space = [obj_xmin, obj_ymin, obj_xmax - new_bbox_w - 1, obj_ymax - new_bbox_h - 1] # Select and generate a random point area
            if center_search_space[0] >= center_search_space[2] or center_search_space[1] >= center_search_space[3]:
                print('============== center_search_space error!!!! ================')
                success_num + = 1
                continue

            new_bbox_x_min = random.randint(center_search_space[0], center_search_space[2]) # Randomly generate point coordinates
            new_bbox_y_min = random.randint(center_search_space[1], center_search_space[3])
            new_bbox_x_left, new_bbox_y_top, new_bbox_x_right, new_bbox_y_bottom = new_bbox_x_min, new_bbox_y_min, new_bbox_x_min + new_bbox_w - 1, new_bbox_y_min + new_bbox_h - 1
            new_bbox = [cl, int(new_bbox_x_left), int(new_bbox_y_top), int(new_bbox_x_right), int(new_bbox_y_bottom)]
            success_num + = 1
            new_bboxes.append(new_bbox)

            p_img = cv2.resize(p_img, (new_bbox_w, new_bbox_h))

            img = merge_img(img, p_img, new_bbox_y_top, new_bbox_y_bottom + 1, new_bbox_x_left, new_bbox_x_right + 1)


    return img, new_bboxes




if __name__ == "__main__":
    # Container used to load parameters
    parser = argparse.ArgumentParser(description='PS')
    #Add command line parameters to this parsing object
    parser.add_argument('-i', '--images', default= '/media/yinzhe/DataYZ/DataSet/DataSet/bag_model', type=str, help='path of images')
    parser.add_argument('-t', '--mask', default= '/media/yinzhe/DataYZ/DataSet/DataSet/bag_mask', type=str, help='path of masks')
    parser.add_argument('-s', '--saveImage',default= '/media/yinzhe/DataYZ/DataSet/DataSet/bag_save', type=str, help='path of ')
    parser.add_argument('-c', '--scale', default= 0.2, type=float, help='number of img')
    parser.add_argument('-n', '--num', default= 5, type=int, help='number of img')

    args = parser.parse_args() # Get all parameters

    mask_filedirs = args.mask
    images_path = args.images
    save_path = args.saveImage
    scale, num = args.scale, args.num
    mask_paths = []

    if not os.path.exists(save_path):
        os.makedirs(save_path)


    # Read all mask templates
    mask_lst = []
    for t_path in os.listdir(mask_filedirs):
        mask = cv2.imread(os.path.join(mask_filedirs, t_path), cv2.IMREAD_UNCHANGED)
        if (mask.shape[2] != 4): # RGB alpha
            break
        mask_lst.append(mask)

    #template_paths = random.shuffle(template_paths) #Shuffle the order
    for image_path in os.listdir(images_path) :
        if "txt" in image_path:
            continue

        image = cv2.imread(os.path.join(images_path, image_path))
        pre_name = image_path.split('.')[0]
        bbox_lst = read_label_txt(os.path.join(images_path, pre_name + ".txt"))

        if image is None or len(bbox_lst) == 0:
            print("empty image!!! or empty label!!!")
            continue

        #yolo txt converted to x1y1x2y2
        rescale_bboxes = rescale_yolo_labels(bbox_lst, image.shape) #Convert coordinate representation
        # maskses_path = sample(mask_paths, int(len(bbox_lst) * scale))

        #
        for i in range(num):
            masks = sample(mask_lst, int(len(bbox_lst) * scale))
            img, bboxes = random_add_patches_on_objects(image, masks, rescale_bboxes, 1)
            boxes = xyxy2xywh(img, bboxes)
            img_name = pre_name + '_' + str(i) + '.jpg'

            print('handle img:', img_name)
            cv2.imwrite(os.path.join(save_path, img_name), img)

            with open(os.path.join(save_path, img_name[:-4] + ".txt"), 'a') as f:
                for box in boxes:

                    mess = str(3) + " " + str(box[1]) + " " + str(box[2]) + " " + str(box[3] * 0.6) + " " + str(box[4 ]* 0.6) + "\
"
                    f.write(mess)