Core function: mask operation of matrix OpenCV v4.8.0

Previous tutorial : How to scan an image, lookup table and measure time using OpenCV

Next tutorial: Image manipulation

Original author Bernát Gábor
Compatibility OpenCV >= 3.0

Masking operations on matrices are very simple. The principle is that we recalculate the value of each pixel in the image based on the mask matrix (also called a kernel). The values in this mask matrix adjust how much neighboring pixels (and the current pixel) influence the new pixel value. From a mathematical point of view, we do a weighted average using the specified values.

Our test cases

Let us consider the problem of image contrast enhancement methods. Basically, we want to apply the following formula to each pixel of the image:

The first method is to use a formula, and the second method is to use a mask, which is a streamlined version of the first method. When using masks, you need to place the center of the mask matrix (indicated by zero-zero index in capital letters) on the pixel you want to calculate, then multiply and sum the pixel value with the overlap matrix value. It’s the same thing, but the latter notation is easier to see with larger matrices.

Code

C++

You can download the source code here, or view samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp in the examples directory of the OpenCV source code repository.

#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
using namespace std;
using namespace cv;
static void help(char* progName)
{<!-- -->
 cout << endl
 << "This program shows how to filter images with mask: the write it yourself and the"
 << "filter2d way. " << endl
 << "Usage:" << endl
 << progName << " [image_path -- default lena.jpg] [G -- grayscale] " << endl << endl;
}
void Sharpen(const Mat & amp; myImage,Mat & amp; Result);
int main(int argc, char* argv[])
{<!-- -->
 help(argv[0]);
 const char* filename = argc >=2 ? argv[1] : "lena.jpg";
 Mat src, dst0, dst1;
 if (argc >= 3 & amp; & amp; !strcmp("G", argv[2]))
 src = imread( samples::findFile( filename ), IMREAD_GRAYSCALE);
 else
 src = imread( samples::findFile( filename ), IMREAD_COLOR);
 if (src.empty())
 {<!-- -->
 cerr << "Can't open image [" << filename << "]" << endl;
 return EXIT_FAILURE;
 }
 namedWindow("Input", WINDOW_AUTOSIZE);
 namedWindow("Output", WINDOW_AUTOSIZE);
 imshow( "Input", src );
 double t = (double)getTickCount();
 Sharpen(src, dst0);
 t = ((double)getTickCount() - t)/getTickFrequency();
 cout << "Hand written function time passed in seconds: " << t << endl;
 imshow( "Output", dst0 );
 waitKey();
 Mat kernel = (Mat_<char>(3,3) << 0, -1, 0,
 -1, 5, -1,
 0, -1, 0);
 t = (double)getTickCount();
 filter2D( src, dst1, src.depth(), kernel );
 t = ((double)getTickCount() - t)/getTickFrequency();
 cout << "Built-in filter2D time passed in seconds: " << t << endl;
 imshow( "Output", dst1 );
 waitKey();
 return EXIT_SUCCESS;
}
void Sharpen(const Mat & amp; myImage,Mat & amp; Result)
{<!-- -->
 CV_Assert(myImage.depth() == CV_8U); // accept only uchar images
 const int nChannels = myImage.channels();
 Result.create(myImage.size(),myImage.type());
 for(int j = 1; j < myImage.rows-1; + + j)
 {<!-- -->
 const uchar* previous = myImage.ptr<uchar>(j - 1);
 const uchar* current = myImage.ptr<uchar>(j);
 const uchar* next = myImage.ptr<uchar>(j + 1);
 uchar* output = Result.ptr<uchar>(j);
 for(int i= nChannels;i < nChannels*(myImage.cols-1); + + i)
 {<!-- -->
 output[i] = saturate_cast<uchar>(5*current[i]
 -current[i-nChannels] - current[i + nChannels] - previous[i] - next[i]);
 }
 }
 Result.row(0).setTo(Scalar(0));
 Result.row(Result.rows-1).setTo(Scalar(0));
 Result.col(0).setTo(Scalar(0));
 Result.col(Result.cols-1).setTo(Scalar(0));
}

Java

You can download the source code here, or view samples/java/tutorial_code/core/mat_mask_operations/MatMaskOperations.java. in the examples directory of the OpenCV source code repository.

import org.opencv.core.Core;
import org.opencv.core.CvType;
import org.opencv.core.Mat;
import org.opencv.core.Scalar;
import org.opencv.highgui.HighGui;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
class MatMaskOperationsRun {<!-- -->
 public void run(String[] args) {<!-- -->
 String filename = "../data/lena.jpg";
 int img_codec = Imgcodecs.IMREAD_COLOR;
 if (args.length != 0) {<!-- -->
 filename = args[0];
 if (args.length >= 2 & amp; & args[1].equals("G"))
 img_codec = Imgcodecs.IMREAD_GRAYSCALE;
 }
 Mat src = Imgcodecs.imread(filename, img_codec);
 if (src.empty()) {<!-- -->
 System.out.println("Can't open image [" + filename + "]");
 System.out.println("Program Arguments: [image_path -- default ../data/lena.jpg] [G -- grayscale]");
 System.exit(-1);
 }
 HighGui.namedWindow("Input", HighGui.WINDOW_AUTOSIZE);
 HighGui.namedWindow("Output", HighGui.WINDOW_AUTOSIZE);
 HighGui.imshow( "Input", src );
 double t = System.currentTimeMillis();
 Mat dst0 = sharpen(src, new Mat());
 t = ((double) System.currentTimeMillis() - t) / 1000;
 System.out.println("Hand written function time passed in seconds: " + t);
 HighGui.imshow( "Output", dst0 );
 HighGui.moveWindow("Output", 400, 400);
 HighGui.waitKey();
 Mat kern = new Mat(3, 3, CvType.CV_8S);
 int row = 0, col = 0;
 kern.put(row, col, 0, -1, 0, -1, 5, -1, 0, -1, 0);
 t = System.currentTimeMillis();
 Mat dst1 = new Mat();
 Imgproc.filter2D(src, dst1, src.depth(), kern);
 t = ((double) System.currentTimeMillis() - t) / 1000;
 System.out.println("Built-in filter2D time passed in seconds: " + t);
 HighGui.imshow( "Output", dst1 );
 HighGui.waitKey();
 System.exit(0);
 }
 public static double saturate(double x) {<!-- -->
 return x > 255.0 ? 255.0 : (x < 0.0 ? 0.0 : x);
 }
 public Mat sharpen(Mat myImage, Mat Result) {<!-- -->
 myImage.convertTo(myImage, CvType.CV_8U);
 int nChannels = myImage.channels();
 Result.create(myImage.size(), myImage.type());
 for (int j = 1; j < myImage.rows() - 1; + + j) {<!-- -->
 for (int i = 1; i < myImage.cols() - 1; + + i) {<!-- -->
 double sum[] = new double[nChannels];
 for (int k = 0; k < nChannels; + + k) {<!-- -->
 double top = -myImage.get(j - 1, i)[k];
 double bottom = -myImage.get(j + 1, i)[k];
 double center = (5 * myImage.get(j, i)[k]);
 double left = -myImage.get(j, i - 1)[k];
 double right = -myImage.get(j, i + 1)[k];
 sum[k] = saturate(top + bottom + center + left + right);
 }
 Result.put(j, i, sum);
 }
 }
 Result.row(0).setTo(new Scalar(0));
 Result.row(Result.rows() - 1).setTo(new Scalar(0));
 Result.col(0).setTo(new Scalar(0));
 Result.col(Result.cols() - 1).setTo(new Scalar(0));
 return Result;
 }
}
public class MatMaskOperations {<!-- -->
 public static void main(String[] args) {<!-- -->
 // Load the native library.
 System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
 new MatMaskOperationsRun().run(args);
 }
}

Python

You can download the source code here, or view samples/python/tutorial_code/core/mat_mask_operations/mat_mask_operations.py in the examples directory of the OpenCV source code repository.

from __future__ import print_function
importsys
import time
import numpy as np
import cv2 as cv
def is_grayscale(my_image):
 return len(my_image.shape) < 3
def saturated(sum_value):
 if sum_value > 255:
 sum_value = 255
 if sum_value < 0:
 sum_value = 0
 return sum_value
def sharpen(my_image):
 if is_grayscale(my_image):
 height, width = my_image.shape
 else:
 my_image = cv.cvtColor(my_image, cv.CV_8U)
 height, width, n_channels = my_image.shape
 result = np.zeros(my_image.shape, my_image.dtype)
 
 for j in range(1, height - 1):
 for i in range(1, width - 1):
 if is_grayscale(my_image):
 sum_value = 5 * my_image[j, i] - my_image[j + 1, i] - my_image[j - 1, i] \
 - my_image[j, i + 1] - my_image[j, i - 1]
 result[j, i] = saturated(sum_value)
 else:
 for k in range(0, n_channels):
 sum_value = 5 * my_image[j, i, k] - my_image[j + 1, i, k] \
 - my_image[j - 1, i, k] - my_image[j, i + 1, k]\
 - my_image[j, i - 1, k]
 result[j, i, k] = saturated(sum_value)
 
 return result
def main(argv):
 filename = 'lena.jpg'
 img_codec = cv.IMREAD_COLOR
 if argv:
 filename = sys.argv[1]
 if len(argv) >= 2 and sys.argv[2] == "G":
 img_codec = cv.IMREAD_GRAYSCALE
 src = cv.imread(cv.samples.findFile(filename), img_codec)
 if src is None:
 print("Can't open image [" + filename + "]")
 print("Usage:")
 print("mat_mask_operations.py [image_path -- default lena.jpg] [G -- grayscale]")
 return -1
 cv.namedWindow("Input", cv.WINDOW_AUTOSIZE)
 cv.namedWindow("Output", cv.WINDOW_AUTOSIZE)
 cv.imshow("Input", src)
 t = round(time.time())
 dst0 = sharpen(src)
 t = (time.time() - t)
 print("Hand written function time passed in seconds: %s" % t)
 cv.imshow("Output", dst0)
 cv.waitKey()
 t = time.time()
 
 kernel = np.array([[0, -1, 0],
 [-1, 5, -1],
 [0, -1, 0]], np.float32) # kernel should be floating point type
 
 dst1 = cv.filter2D(src, -1, kernel)
 # ddepth = -1, means destination image has depth same as input image
 
 t = (time.time() - t)
 print("Built-in filter2D time passed in seconds: %s" % t)
 cv.imshow("Output", dst1)
 cv.waitKey(0)
 cv.destroyAllWindows()
 return 0
if __name__ == "__main__":
 main(sys.argv[1:])

Basic Method

Now let’s see how to achieve this using basic pixel access methods or the filter2D() function.

The following function can achieve this functionality:

C++

void Sharpen(const Mat & amp; myImage,Mat & amp; Result)
{<!-- -->
 CV_Assert(myImage.depth() == CV_8U); // accept only uchar images
 const int nChannels = myImage.channels();
 Result.create(myImage.size(),myImage.type());
 for(int j = 1; j < myImage.rows-1; + + j)
 {<!-- -->
 const uchar* previous = myImage.ptr<uchar>(j - 1);
 const uchar* current = myImage.ptr<uchar>(j);
 const uchar* next = myImage.ptr<uchar>(j + 1);
 uchar* output = Result.ptr<uchar>(j);
 for(int i= nChannels;i < nChannels*(myImage.cols-1); + + i)
 {<!-- -->
 output[i] = saturate_cast<uchar>(5*current[i]
 -current[i-nChannels] - current[i + nChannels] - previous[i] - next[i]);
 }
 }
 Result.row(0).setTo(Scalar(0));
 Result.row(Result.rows-1).setTo(Scalar(0));
 Result.col(0).setTo(Scalar(0));
 Result.col(Result.cols-1).setTo(Scalar(0));
}

Java

public static double saturate(double x) {<!-- -->
 return x > 255.0 ? 255.0 : (x < 0.0 ? 0.0 : x);
 }
 public Mat sharpen(Mat myImage, Mat Result) {<!-- -->
 myImage.convertTo(myImage, CvType.CV_8U);
 int nChannels = myImage.channels();
 Result.create(myImage.size(), myImage.type());
 for (int j = 1; j < myImage.rows() - 1; + + j) {<!-- -->
 for (int i = 1; i < myImage.cols() - 1; + + i) {<!-- -->
 double sum[] = new double[nChannels];
 for (int k = 0; k < nChannels; + + k) {<!-- -->
 double top = -myImage.get(j - 1, i)[k];
 double bottom = -myImage.get(j + 1, i)[k];
 double center = (5 * myImage.get(j, i)[k]);
 double left = -myImage.get(j, i - 1)[k];
 double right = -myImage.get(j, i + 1)[k];
 sum[k] = saturate(top + bottom + center + left + right);
 }
 Result.put(j, i, sum);
 }
 }
 Result.row(0).setTo(new Scalar(0));
 Result.row(Result.rows() - 1).setTo(new Scalar(0));
 Result.col(0).setTo(new Scalar(0));
 Result.col(Result.cols() - 1).setTo(new Scalar(0));
 return Result;
 }

Python

def is_grayscale(my_image):
 return len(my_image.shape) < 3
def saturated(sum_value):
 if sum_value > 255:
 sum_value = 255
 if sum_value < 0:
 sum_value = 0
 return sum_value
def sharpen(my_image):
 if is_grayscale(my_image):
 height, width = my_image.shape
 else:
 my_image = cv.cvtColor(my_image, cv.CV_8U)
 height, width, n_channels = my_image.shape
 result = np.zeros(my_image.shape, my_image.dtype)
 
 for j in range(1, height - 1):
 for i in range(1, width - 1):
 if is_grayscale(my_image):
 sum_value = 5 * my_image[j, i] - my_image[j + 1, i] - my_image[j - 1, i] \
 - my_image[j, i + 1] - my_image[j, i - 1]
 result[j, i] = saturated(sum_value)
 else:
 for k in range(0, n_channels):
 sum_value = 5 * my_image[j, i, k] - my_image[j + 1, i, k] \
 - my_image[j - 1, i, k] - my_image[j, i + 1, k]\
 - my_image[j, i - 1, k]
 result[j, i, k] = saturated(sum_value)
 
 return result

First, we need to ensure that the input image data is in unsigned 8-bit format.

C++

 CV_Assert(myImage.depth() == CV_8U); // Only accepts uchar images

Java

myImage.convertTo(myImage, CvType.CV_8U);

Python

my_image = cv.cvtColor(my_image, cv.CV_8U)

The output image we create is the same size and type as the input image. As you can see in the storage section, depending on the number of channels, we may have one or more subcolumns.

C++

 const int nChannels = myImage.channels();
 Result.create(myImage.size(),myImage.type());

Java

 int nChannels = myImage.channels();
 Result.create(myImage.size(), myImage.type());

Python

height, width, n_channels = my_image.shape
result = np.zeros(my_image.shape, my_image.dtype)

We’ll use the normal C[] operator to access pixels. Because we need to access multiple rows simultaneously, we will get a pointer to each row (previous row, current row, and next row). We also need another pointer to where we want to save the calculation results. Then just use the [] operator to access the correct item. As for moving the output pointer forward, we just increment it by one byte after each operation:

C++

 for(int j = 1; j < myImage.rows-1; + + j)
 {<!-- -->
 const uchar* previous = myImage.ptr<uchar>(j - 1);
 const uchar* current = myImage.ptr<uchar>(j);
 const uchar* next = myImage.ptr<uchar>(j + 1);
 uchar* output = Result.ptr<uchar>(j);
 for(int i= nChannels;i < nChannels*(myImage.cols-1); + + i)
 {<!-- -->
 output[i] = saturate_cast<uchar>(5*current[i]
 -current[i-nChannels] - current[i + nChannels] - previous[i] - next[i]);
 }
 }

Java

 for (int j = 1; j < myImage.rows() - 1; + + j) {<!-- -->
 for (int i = 1; i < myImage.cols() - 1; + + i) {<!-- -->
 double sum[] = new double[nChannels];
 for (int k = 0; k < nChannels; + + k) {<!-- -->
 double top = -myImage.get(j - 1, i)[k];
 double bottom = -myImage.get(j + 1, i)[k];
 double center = (5 * myImage.get(j, i)[k]);
 double left = -myImage.get(j, i - 1)[k];
 double right = -myImage.get(j, i + 1)[k];
 sum[k] = saturate(top + bottom + center + left + right);
 }
 Result.put(j, i, sum);
 }
 }

Python

 for j in range(1, height - 1):
 for i in range(1, width - 1):
 if is_grayscale(my_image):
 sum_value = 5 * my_image[j, i] - my_image[j + 1, i] - my_image[j - 1, i] \
 - my_image[j, i + 1] - my_image[j, i - 1]
 result[j, i] = saturated(sum_value)
 else:
 for k in range(0, n_channels):
 sum_value = 5 * my_image[j, i, k] - my_image[j + 1, i, k] \
 - my_image[j - 1, i, k] - my_image[j, i + 1, k]\
 - my_image[j, i - 1, k]
 result[j, i, k] = saturated(sum_value)

filter2D function

In image processing, it is very common to apply such filters, so OpenCV provides a function to handle the application of masks (also called kernels in some places). To do this, you first need to define an object that holds the mask:

C++

Mat kernel = (Mat_<char>(3,3) << 0, -1, 0,
 -1, 5, -1,
 0, -1, 0);

Java

 Mat kern = new Mat(3, 3, CvType.CV_8S);
 int row = 0, col = 0;
 kern.put(row, col, 0, -1, 0, -1, 5, -1, 0, -1, 0);

Python

 kernel = np.array([[0, -1, 0],
 [-1, 5, -1],
 [0, -1, 0]], np.float32) # The kernel should be of floating point type

Then call the filter2D() function, specifying the input, output image, and kernel to use:

C++

 filter2D( src, dst1, src.depth(), kernel );

Java

 Imgproc.filter2D(src, dst1, src.depth(), kern);

Python

 dst1 = cv.filter2D(src, -1, kernel)
 # ddepth =-1, indicating that the depth of the target image is the same as the input image

The function even has a fifth optional parameter that specifies the center of the kernel; a sixth parameter that adds an optional value to the filtered pixel before storing it in K; and a seventh parameter that Determines how operations should be performed in areas (boundaries) where operations are not defined.

This function is shorter, less time-consuming, and because of some optimizations, is usually faster than the hand-coded approach. For example, in my tests, the second function took only 13 milliseconds, while the first function took about 31 milliseconds. The difference is quite big.

For example