Code implementation of projection transformation in OpenCV

Table of Contents

introduction

technical background

transformation process

Complete code display

operation result

Introduction

Projective transformation is one of the commonly used techniques in the fields of computer vision and image processing. It can be used to map images from one perspective relationship to another. Common applications include image correction, depth of field changes, and the creation of virtual reality. This article will introduce how to use the cv2.warpPerspective function in OpenCV for projection transformation.

Technical Background

The core of the projective transformation is to use a 3×3 transformation matrix, which maps points in the source image to corresponding points in the target image. This transformation matrix is calculated from some known point pairs, which correspond to feature points in the source image and the target image respectively.

Conversion process

Read image: Use the cv2.imread function to read the input image.
Define feature points: Define four feature points in the source image and the target image. These feature points are the key to projection transformation.
Calculate the perspective transformation matrix: Use the cv2.getPerspectiveTransform function to calculate the perspective transformation matrix.
Perspective transformation: Use the cv2.warpPerspective function to transform the source image to the target image.
Display images: Use the matplotlib library to display the original and transformed images.
Core code:

def four_point_transform(image, pts):
    # Get the input coordinate point
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    # Calculate the input w and h values
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))

    # Corresponding coordinate position after transformation
    dst = np.array([[0, 0], [maxWidth - 1, 0],
                    [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32")

    # Image perspective transformation cv2.getPerspectiveTransform(src, dst[, solveMethod]) → MP obtains the relationship between transformations
    # cv2.warpPerspective(src, MP, dsize[, dst[, flags[, borderMode[, borderValue]]]]) → dst
    # Parameter Description:
    # src: Image quadrilateral vertex coordinates before transformation
    # dst: transformed image quadrilateral vertex coordinates
    # MP: perspective transformation matrix, 3 rows and 3 columns
    # dsize: The size of the output image, a tuple (width, height)
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

    #Return the transformed result
    return warped

Complete code display

import numpy as np
import argparse
import cv2

#Set parameters, pass in the images to be processed through command parameters, -i image path
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
                help="Path to the image to be scanned")
args = vars(ap.parse_args())


def cv_show(name, img):
    cv2.imshow(name, img)
    cv2.waitKey(0)


def order_points(pts):
    # A total of 4 coordinate points
    rect = np.zeros((4, 2), dtype="float32")

    # Find the corresponding coordinates 0123 in order: lower left, lower right, upper right, upper left
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    return rect


def four_point_transform(image, pts):
    # Get the input coordinate point
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    # Calculate the input w and h values
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))

    # Corresponding coordinate position after transformation
    dst = np.array([[0, 0], [maxWidth - 1, 0],
                    [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32")

    # Image perspective transformation cv2.getPerspectiveTransform(src, dst[, solveMethod]) → MP obtains the relationship between transformations
    # cv2.warpPerspective(src, MP, dsize[, dst[, flags[, borderMode[, borderValue]]]]) → dst
    # Parameter Description:
    # src: Image quadrilateral vertex coordinates before transformation
    # dst: transformed image quadrilateral vertex coordinates
    # MP: perspective transformation matrix, 3 rows and 3 columns
    # dsize: The size of the output image, a tuple (width, height)
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

    #Return the transformed result
    return warped


def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim=None
    (h, w) = image.shape[:2]
    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))
    resized = cv2.resize(image, dim, interpolation=inter)
    return resized


# Read input
image = cv2.imread(args["image"])
cv_show('image', image)

# The image is too large, reduce it.
ratio = image.shape[0] / 500.0 # Calculate the reduction ratio
orig = image.copy()
image = resize(orig, height=500)

#Contour detection
print("STEP 1: Contour detection")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Read grayscale image

edged = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] # Automatically find threshold binarization
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[0]
image_contours=cv2.drawContours(image.copy(),cnts,-1,(0,0,255),1)
cv_show('image_contours', image_contours)

print("STEP 2: Get the maximum contour")
screenCnt = sorted(cnts, key=cv2.contourArea, reverse=True)[0] # Get the contour with the largest area

peri = cv2.arcLength(screenCnt, True) # Calculate the contour perimeter
screenCnt = cv2.approxPolyDP(screenCnt, 0.02 * peri, True) # Contour approximation
image_contour = cv2.drawContours(image.copy(), [screenCnt], -1, (0, 255, 0), 2)

cv2.imshow("image_contour", image_contour)
cv2.waitKey(0)

# Perspective transformation
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
cv2.imwrite('invoice_new.jpg', warped)
cv2.namedWindow("warped", cv2.WINDOW_NORMAL)
cv2.imshow("warped", warped)
cv2.waitKey(0)

# Binary processing
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

kernel = np.ones((2, 2), np.uint8) #Set the kenenel size
ref_new = cv2.morphologyEx(ref, cv2.MORPH_CLOSE, kernel) # Close operation, first expand and then corrode
cv2.namedWindow("ref_new", cv2.WINDOW_NORMAL)
cv2.imshow("ref_new", ref_new)
cv2.waitKey(0)

Run results

The left is the original image, and the right is the image after projection transformation: