Application Utility – Video Input using OpenCV and Similarity Measurement OpenCV v4.8.0

Previous tutorial: Reading geospatial raster files using GDAL

Next tutorial: Creating a video using OpenCV

Original author Bernát Gábor
Compatibility OpenCV >= 3.0

Goals

Today, owning a digital video recording system has become a common phenomenon. So what you end up dealing with is no longer a batch of images, but a stream of video. These video streams may come in two forms: a live image feed (such as a webcam) or a file that is pre-recorded and stored on your hard drive. Fortunately, OpenCV handles both streams in the same way using the same C++ classes. Here’s what this tutorial is about:

  • How to open and read video stream
  • Two ways to check image similarity: PSNR and SSIM

Source code

As a test case to demonstrate these methods using OpenCV, I created a small program that reads in two video files and performs a similarity check on them. You can use it to check whether the new video compression algorithm works. Suppose there is a reference (original) video, such as this little clip from Megamind, and a compressed version of it. You can also find the source code and these video files in the samples/data folder of the OpenCV source code repository.
C++

#include <iostream> // for standard I/O
#include <string> // for strings
#include <iomanip> // Used to control floating point printing precision
#include <sstream> // String to number conversion
#include <opencv2/core.hpp> // OpenCV basic structure (cv::Mat, Scalar)
#include <opencv2/imgproc.hpp> // Gaussian blur
#include <opencv2/videoio.hpp> // Gaussian blur
#include <opencv2/highgui.hpp> // OpenCV window I/O
using namespace std;
using namespace cv;
double getPSNR ( const Mat & amp; I1, const Mat & amp; I2);
Scalar getMSSIM( const Mat & amp; I1, const Mat & amp; I2);
static void help()
{<!-- -->
 cout
 << "------------------------------------------------- ----------------------------------" << endl
 << "This program shows how to read a video file with OpenCV. In addition, it "
 << "tests the similarity of two input videos first with PSNR, and for the frames "
 << "below a PSNR trigger value, also with MSSIM." << endl
 << "Usage:" << endl
 << "./video-input-psnr-ssim <referenceVideo> <useCaseTestVideo> <PSNR_Trigger_Value> <Wait_Between_Frames> " << endl
 << "------------------------------------------------- ----------------------------" << endl
 << endl;
}
int main(int argc, char *argv[])
{<!-- -->
 help();
 if (argc != 5)
 {<!-- -->
 cout << "Not enough parameters" << endl;
 return -1;
 }
 stringstream conv;
 const string sourceReference = argv[1], sourceCompareWith = argv[2];
 int psnrTriggerValue, delay;
 conv << argv[3] << endl << argv[4]; // Input string
 conv >> psnrTriggerValue >> delay; // Delete numbers
 int frameNum = -1; // frame counter
 VideoCapture captRefrnc(sourceReference), captUndTst(sourceCompareWith);
 if (!captRefrnc.isOpened())
 {<!-- -->
 cout << "Could not open reference " << sourceReference << endl;
 return -1;
 }
 if (!captUndTst.isOpened())
 {<!-- -->
 cout << "Could not open case test " << sourceCompareWith << endl;
 return -1;
 }
 Size refS = Size((int) captRefrnc.get(CAP_PROP_FRAME_WIDTH),
 (int) captRefrnc.get(CAP_PROP_FRAME_HEIGHT)),
 uTSi = Size((int) captUndTst.get(CAP_PROP_FRAME_WIDTH),
 (int) captUndTst.get(CAP_PROP_FRAME_HEIGHT));
 if (refS != uTSi)
 {<!-- -->
 cout << "Inputs have different size!!! Closing." << endl;
 return -1;
 }
 const char* WIN_UT = "Under Test";
 const char* WIN_RF = "Reference";
 // window
 namedWindow(WIN_RF, WINDOW_AUTOSIZE);
 namedWindow(WIN_UT, WINDOW_AUTOSIZE);
 moveWindow(WIN_RF, 400, 0); //750, 2 (bernat =0)
 moveWindow(WIN_UT, refS.width, 0); //1500, 2
 cout << "Reference frame resolution: Width=" << refS.width << " Height=" << refS.height
 << " of nr#: " << captRefrnc.get(CAP_PROP_FRAME_COUNT) << endl;
 cout << "PSNR trigger value " << setiosflags(ios::fixed) << setprecision(3)
 << psnrTriggerValue << endl;
 Mat frameReference, frameUnderTest;
 double psnrV;
 Scalar mssimV;
 for(;;) //Display captured image in window and repeat
 {<!-- -->
 captRefrnc >> frameReference;
 captUndTst >> frameUnderTest;
 if (frameReference.empty() || frameUnderTest.empty())
 {<!-- -->
 cout << " < < < Game over! > > > ";
 break;
 }
  + + frameNum;
 cout << "Frame: " << frameNum << "# ";
 psnrV = getPSNR(frameReference,frameUnderTest);
 cout << setiosflags(ios::fixed) << setprecision(3) << psnrV << "dB";
 if (psnrV < psnrTriggerValue & amp; & amp; psnrV)
 {<!-- -->
 mssimV = getMSSIM(frameReference, frameUnderTest);
 cout << " MSSIM: "
 << " R " << setiosflags(ios::fixed) << setprecision(2) << mssimV.val[2] * 100 << "%"
 << " G " << setiosflags(ios::fixed) << setprecision(2) << mssimV.val[1] * 100 << "%"
 << " B " << setiosflags(ios::fixed) << setprecision(2) << mssimV.val[0] * 100 << "%";
 }
 cout << endl;
 imshow(WIN_RF, frameReference);
 imshow(WIN_UT, frameUnderTest);
 char c = (char)waitKey(delay);
 if (c == 27) break;
 }
 return 0;
}
// ![get-psnr]
double getPSNR(const Mat & amp; I1, const Mat & amp; I2)
{<!-- -->
 Mat s1;
 absdiff(I1, I2, s1); // |I1 - I2|
 s1.convertTo(s1, CV_32F); // Cannot perform squaring operation on 8 bits
 s1 = s1.mul(s1); // |I1 - I2|^2
 Scalar s = sum(s1); // Sum of elements for each channel
 double sse = s.val[0] + s.val[1] + s.val[2]; // Channel sum
 if( sse <= 1e-10) // For decimal values, return 0
 return 0;
 else
 {<!-- -->
 double mse = sse / (double)(I1.channels() * I1.total());
 double psnr = 10.0 * log10((255 * 255) / mse);
 return psnr;
 }
}
// ![get-psnr]
// ![get-mssim]
Scalar getMSSIM( const Mat & amp; i1, const Mat & amp; i2)
{<!-- -->
 const double C1 = 6.5025, C2 = 58.5225;
 /****************************** INITS ******************* ***************/
 int d = CV_32F;
 Mat I1, I2;
 i1.convertTo(I1, d); // Unable to calculate a large value of one byte
 i2.convertTo(I2, d);
 Mat I2_2 = I2.mul(I2); // I2^2
 Mat I1_2 = I1.mul(I1); // I1^2
 Mat I1_I2 = I1.mul(I2); // I1 * I2
 /****************************** END INITS ************************ ******************/
 Mat mu1, mu2; // Preliminary calculation
 GaussianBlur(I1, mu1, Size(11, 11), 1.5);
 GaussianBlur(I2, mu2, Size(11, 11), 1.5);
 Mat mu1_2 = mu1.mul(mu1);
 Mat mu2_2 = mu2.mul(mu2);
 Mat mu1_mu2 = mu1.mul(mu2);
 Mat sigma1_2, sigma2_2, sigma12;
 GaussianBlur(I1_2, sigma1_2, Size(11, 11), 1.5);
 sigma1_2 -= mu1_2;
 GaussianBlur(I2_2, sigma2_2, Size(11, 11), 1.5);
 sigma2_2 -= mu2_2;
 GaussianBlur(I1_I2, sigma12, Size(11, 11), 1.5);
 sigma12 -= mu1_mu2;
 Mat t1, t2, t3;
 t1 = 2 * mu1_mu2 + C1;
 t2 = 2 * sigma12 + C2;
 t3 = t1.mul(t2); // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
 t1 = mu1_2 + mu2_2 + C1;
 t2 = sigma1_2 + sigma2_2 + C2;
 t1 = t1.mul(t2); // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
 Mat ssim_map;
 divide(t3, t1, ssim_map); // ssim_map = t3./t1;
 Scalar mssim = mean(ssim_map); // mssim = the mean of ssim map
 return mssim;
}
// ![get-mssim]

Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import cv2 as cv
import argparse
importsys
#[get-psnr]
def getPSNR(I1, I2):
 s1 = cv.absdiff(I1, I2) #|I1 - I2|
 s1 = np.float32(s1) # Cannot perform squaring operation on 8 bits
 s1 = s1 * s1 # |I1 - I2|^2
 sse = s1.sum() # Sum of elements of each channel
 if sse <= 1e-10: # Sum channels
 return 0 # For decimal values, return 0
 else:
 shape = I1.shape
 mse = 1.0 * sse / (shape[0] * shape[1] * shape[2])
 psnr = 10.0 * np.log10((255 * 255) / mse)
 return psnr
#[get-psnr]
#[get-mssim]
def getMSSISM(i1, i2):
 C1 = 6.5025
 C2 = 58.5225
 #INITS
 I1 = np.float32(i1) # Unable to calculate a large value of one byte
 I2 = np.float32(i2)
 i2_2 = i2 * i2 # i2^2
 i1_2 = i1 * i1 # i1^2
 i1_i2 = i1 * i2 # i1 * i2
 #END INITS
 # Preliminary calculation
 mu1 = cv.GaussianBlur(I1, (11, 11), 1.5)
 mu2 = cv.GaussianBlur(I2, (11, 11), 1.5)
 mu1_2 = mu1 * mu1
 mu2_2 = mu2 * mu2
 mu1_mu2 = mu1 * mu2
 sigma1_2 = cv.GaussianBlur(I1_2, (11, 11), 1.5)
 sigma1_2 -= mu1_2
 sigma2_2 = cv.GaussianBlur(I2_2, (11, 11), 1.5)
 sigma2_2 -= mu2_2
 sigma12 = cv.GaussianBlur(I1_I2, (11, 11), 1.5)
 sigma12 -= mu1_mu2
 t1 = 2 * mu1_mu2 + C1
 t2 = 2 * sigma12 + C2
 t3 = t1 * t2 # t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
 t1 = mu1_2 + mu2_2 + C1
 t2 = sigma1_2 + sigma2_2 + C2
 t1 = t1 * t2 # t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
 ssim_map = cv.divide(t3, t1) # ssim_map = t3./t1;
 mssim = cv.mean(ssim_map) # mssim = mean of ssim map
 return mssim
#[get-mssim]
def main():
 parser = argparse.ArgumentParser()
 parser.add_argument("-d", "--delay", type=int, default=30, help=" Time delay")
 parser.add_argument("-v", "--psnrtriggervalue", type=int, default=30, help="PSNR Trigger Value")
 parser.add_argument("-r", "--ref", type=str, default="Megamind.avi", help="Path to reference video")
 parser.add_argument("-t", "--undertest", type=str, default="Megamind_bugy.avi",
 help="Path to the video to be tested")
 args = parser.parse_args()
 sourceReference = args.ref
 sourceCompareWith = args.undertest
 delay = args.delay
 psnrTriggerValue = args.psnrtriggervalue
 framenum = -1 # Frame counter
 captRefrnc = cv.VideoCapture(cv.samples.findFileOrKeep(sourceReference))
 captUndTst = cv.VideoCapture(cv.samples.findFileOrKeep(sourceCompareWith))
 if not captRefrnc.isOpened():
 print("Could not open the reference " + sourceReference)
 sys.exit(-1)
 if not captUndTst.isOpened():
 print("Could not open case test " + sourceCompareWith)
 sys.exit(-1)
 refS = (int(captRefrnc.get(cv.CAP_PROP_FRAME_WIDTH)), int(captRefrnc.get(cv.CAP_PROP_FRAME_HEIGHT)))
 uTSi = (int(captUndTst.get(cv.CAP_PROP_FRAME_WIDTH)), int(captUndTst.get(cv.CAP_PROP_FRAME_HEIGHT)))
 if refS != uTSi:
 print("Inputs have different size!!! Closing.")
 sys.exit(-1)
 WIN_UT = "Under Test"
 WIN_RF = "Reference"
 cv.namedWindow(WIN_RF, cv.WINDOW_AUTOSIZE)
 cv.namedWindow(WIN_UT, cv.WINDOW_AUTOSIZE)
 cv.moveWindow(WIN_RF, 400, 0) #750, 2 (bernat =0)
 cv.moveWindow(WIN_UT, refS[0], 0) #1500, 2
 print("Reference frame resolution: Width={} Height={} of nr#: {}".format(refS[0], refS[1],
 captRefrnc.get(cv.CAP_PROP_FRAME_COUNT)))
 print("PSNR trigger value {}".format(psnrTriggerValue))
 while True: # Display the captured image in the window and repeat
 _, frameReference = captRefrnc.read()
 _, frameUnderTest = captUndTst.read()
 if frameReference is None or frameUnderTest is None:
 print(" < < < Game over! > > > ")
 break
 framenum + = 1
 psnrv = getPSNR(frameReference, frameUnderTest)
 print("Frame: {}# {}dB".format(framenum, round(psnrv, 3)), end=" ")
 if (psnrv < psnrTriggerValue and psnrv):
 mssimv = getMSSISM(frameReference, frameUnderTest)
 print("MSSISM: R {}% G {}% B {}%".format(round(mssimv[2] * 100, 2), round(mssimv[1] * 100, 2),
 round(mssimv[0] * 100, 2)), end=" ")
 print()
 cv.imshow(WIN_RF, frameReference)
 cv.imshow(WIN_UT, frameUnderTest)
 k = cv.waitKey(delay)
 if k == 27:
 break
 sys.exit(0)
if __name__ == "__main__":
 main()

How to read video stream (online camera or offline file)?

Basically, all the functionality needed for video processing is integrated in the cv::VideoCapture C++ class. The class itself is based on the FFmpeg open source library. This is a basic dependency of OpenCV, so you don’t need to worry about it. Video consists of consecutive images, which we refer to in the literature as frames. Video files have a frame rate that specifies the time between frames. For cameras, there is usually a limit to the number of frames per second that can be digitized, but this feature is less important because the camera sees a snapshot of the current world at any time.

The first thing you need to do is assign a source to the cv::VideoCapture class. You can do this via cv::VideoCapture::VideoCapture or its cv::VideoCapture::open function. If the parameter is an integer, then you will bind the class to a camera, a device. The number passed here is the device ID assigned by the operating system. If you only have one camera connected to your system, its ID will most likely be 0 and then increase from there. If the parameter passed in is a string, it refers to the video file, and the string points to the location and name of the file. For example, a valid command line for the upper source code is

video/Megamind.avi video/Megamind_bug.avi 35 10

We perform a similarity check. This requires a reference file and a test case video file. The first two parameters refer to this file. Here we use relative addresses. This means that the application will look in the current working directory and open the Videos folder, trying to find Megamind.avi and Megamind_bug.avi there.

const string sourceReference = argv[1], sourceCompareWith = argv[2];
VideoCapture captRefrnc(sourceReference);
// or
VideoCapture captUndTst;
captUndTst.open(sourceCompareWith);

To check whether the class binding to the video source was successful, use the cv::VideoCapture::isOpened function:

if ( !captRefrnc.isOpened())
 {<!-- -->
 cout << "Could not open reference " << sourceReference << endl;
 return -1;
 }

The video automatically closes when the object’s destructor is called. However, if you want to close the video before then, you need to call the cv::VideoCapture::release function. Frames of a video are just simple images. So we just extract them from the cv::VideoCapture object and put them into a Mat object. The video stream is continuous. You can get frames one by one using cv::VideoCapture::read or the overloaded >> operator:

Mat frameReference, frameUnderTest;
captRefrnc >> frameReference;
captUndTst.read(frameUnderTest);

If the frame cannot be obtained (the video stream has been closed or the video file has ended), the upper read operation will leave the Mat object empty. We can check this with a simple if:

if( frameReference.empty() || frameUnderTest.empty())
{<!-- -->
 // exit the program
}

The reading method consists of frame grabbing and decoding. You can call these two functions explicitly using the cv::VideoCapture::grab and cv::VideoCapture::retrieve functions.

In addition to the content of the frame, a lot of information is attached to the video. This information is usually numeric, but in some cases may be a short sequence of characters (4 bytes or less). So, to get this information, there is a generic function called cv::VideoCapture::get that returns a double value containing these properties. Decodes characters from double type and conversion using bitwise operations where valid values are only integers. Its single parameter is the ID of the property being queried. For example, here we can get the size of the frames in the reference file and the test case video file, as well as the number of frames in the reference file.

Size refS = Size((int) captRefrnc.get(CAP_PROP_FRAME_WIDTH)、
 (int) captRefrnc.get(CAP_PROP_FRAME_HEIGHT)),
cout << "Reference frame resolution: width=" << refS.width << " height=" << refS.height
 << " nr#: " << captRefrnc.get(CAP_PROP_FRAME_COUNT) << endl;

When working with video, you may often want to control these values yourself. For this purpose, we provide a cv::VideoCapture::set function. Its first parameter is still the name of the property to be changed, and the second parameter is of type double containing the value to be set. Returns true if setting is successful, false otherwise. Finding a given time or frame in a video file is a good example:

captRefrnc.set(CAP_PROP_POS_MSEC, 1.2); // Go to 1.2 seconds in the video
captRefrnc.set(CAP_PROP_POS_FRAMES, 10); // Go to frame 10 of the video
// The read operation will now read the frame at the set position.

See the documentation for the cv::VideoCapture::get and cv::VideoCapture::set functions for the properties that can be read and changed.

Image similarity – PSNR and SSIM

We want to check how imperceptible video conversion operations are, so we need a system to check for similarities or differences frame by frame. The most commonly used algorithm is PSNR (also known asPeak Signal-to-Noise Ratio). The simplest definition starts with mean square error. Suppose there are two images: I1 and I2; dimensions i and j respectively, consisting of c channels.

M

S

E

=

1

c

?

i

?

j

(

I

1

?

I

2

)

2

MSE = \frac{1}{c*i*j} \sum{(I_1-I_2)^2}

MSE=c?i?j1?∑(I1I2?)2
Then the PSNR is expressed as

P

S

N

R

=

10

?

log

?

10

(

M

A

X

I

2

M

S

E

)

PSNR = 10 \cdot \log_{10} \left( \frac{MAX_I^2}{MSE} \right)

PSNR=10?log10?(MSEMAXI2)
here

M

A

X

I

MAX_I

MAXI? is the maximum effective value of a pixel. For simple single-byte images, the maximum valid value per channel per pixel is 255. When the two images are identical, the MSE will be zero, causing the divide-by-zero operation in the PSNR formula to be invalid. In this case, PSNR is undefined and we need to handle this case separately. The transition to a logarithmic scale occurs because the dynamic range of pixel values is very large. All these functions converted to OpenCV look like this
C++

double getPSNR(const Mat & amp; I1, const Mat & amp; I2)
{<!-- -->
 Mat s1;
 absdiff(I1, I2, s1); // |I1 - I2|
 s1.convertTo(s1, CV_32F); // cannot make a square on 8 bits
 s1 = s1.mul(s1); // |I1 - I2|^2
 Scalar s = sum(s1); // sum elements per channel
 double sse = s.val[0] + s.val[1] + s.val[2]; // sum channels
 if( sse <= 1e-10) // for small values return zero
 return 0;
 else
 {<!-- -->
 double mse = sse / (double)(I1.channels() * I1.total());
 double psnr = 10.0 * log10((255 * 255) / mse);
 return psnr;
 }
}

Python

def getPSNR(I1, I2):
 s1 = cv.absdiff(I1, I2) #|I1 - I2|
 s1 = np.float32(s1) # cannot make a square on 8 bits
 s1 = s1 * s1 # |I1 - I2|^2
 sse = s1.sum() # sum elements per channel
 if sse <= 1e-10: # sum channels
 return 0 # for small values return zero
 else:
 shape = I1.shape
 mse = 1.0 * sse / (shape[0] * shape[1] * shape[2])
 psnr = 10.0 * np.log10((255 * 255) / mse)
 return psnr

Typically, video compression results in values between 30 and 50, with higher being better. If the images are very different, the resulting value will be much lower, like 15, etc. This similarity check is simple and fast to calculate, but may be inconsistent with human eye perception in practical applications. The Structural Similarity algorithm is designed to correct this.

Describing these methods is well beyond the purpose of this tutorial. To do this, I invite you to read the article introducing this algorithm. However, you can get an idea of the algorithm by looking at the OpenCV implementation below.

Comments
SSIM is described in more depth in “Z. Wang, A. C. “Z. Wang, A. C. Bovik, H. R.
Sheikh and E. P. Simoncelli, “Image quality assessment: IEEE Transactions on Image
Processing, vol. 13, no. 4, pp.

C++

Scalar getMSSIM( const Mat & amp; i1, const Mat & amp; i2)
{<!-- -->
 const double C1 = 6.5025, C2 = 58.5225;
 /****************************** INITS ******************* ***************/
 int d = CV_32F;
 Mat I1, I2;
 i1.convertTo(I1, d); // cannot calculate on one byte large values
 i2.convertTo(I2, d);
 Mat I2_2 = I2.mul(I2); // I2^2
 Mat I1_2 = I1.mul(I1); // I1^2
 Mat I1_I2 = I1.mul(I2); // I1 * I2
 /****************************** END INITS ************************ ******************/
 Mat mu1, mu2; // PRELIMINARY COMPUTING
 GaussianBlur(I1, mu1, Size(11, 11), 1.5);
 GaussianBlur(I2, mu2, Size(11, 11), 1.5);
 Mat mu1_2 = mu1.mul(mu1);
 Mat mu2_2 = mu2.mul(mu2);
 Mat mu1_mu2 = mu1.mul(mu2);
 Mat sigma1_2, sigma2_2, sigma12;
 GaussianBlur(I1_2, sigma1_2, Size(11, 11), 1.5);
 sigma1_2 -= mu1_2;
 GaussianBlur(I2_2, sigma2_2, Size(11, 11), 1.5);
 sigma2_2 -= mu2_2;
 GaussianBlur(I1_I2, sigma12, Size(11, 11), 1.5);
 sigma12 -= mu1_mu2;
 Mat t1, t2, t3;
 t1 = 2 * mu1_mu2 + C1;
 t2 = 2 * sigma12 + C2;
 t3 = t1.mul(t2); // t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
 t1 = mu1_2 + mu2_2 + C1;
 t2 = sigma1_2 + sigma2_2 + C2;
 t1 = t1.mul(t2); // t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
 Mat ssim_map;
 divide(t3, t1, ssim_map); // ssim_map = t3./t1;
 Scalar mssim = mean(ssim_map); // mssim = average of ssim map
 return mssim;
}

Python

def getMSSISM(i1, i2):
 C1 = 6.5025
 C2 = 58.5225
 #INITS
 I1 = np.float32(i1) # cannot calculate on one byte large values
 I2 = np.float32(i2)
 I2_2 = I2 * I2 # I2^2
 I1_2 = I1 * I1 # I1^2
 I1_I2 = I1 * I2 # I1 * I2
 #END INITS
 # PRELIMINARY COMPUTING
 mu1 = cv.GaussianBlur(I1, (11, 11), 1.5)
 mu2 = cv.GaussianBlur(I2, (11, 11), 1.5)
 mu1_2 = mu1 * mu1
 mu2_2 = mu2 * mu2
 mu1_mu2 = mu1 * mu2
 sigma1_2 = cv.GaussianBlur(I1_2, (11, 11), 1.5)
 sigma1_2 -= mu1_2
 sigma2_2 = cv.GaussianBlur(I2_2, (11, 11), 1.5)
 sigma2_2 -= mu2_2
 sigma12 = cv.GaussianBlur(I1_I2, (11, 11), 1.5)
 sigma12 -= mu1_mu2
 t1 = 2 * mu1_mu2 + C1
 t2 = 2 * sigma12 + C2
 t3 = t1 * t2 # t3 = ((2*mu1_mu2 + C1).*(2*sigma12 + C2))
 t1 = mu1_2 + mu2_2 + C1
 t2 = sigma1_2 + sigma2_2 + C2
 t1 = t1 * t2 # t1 =((mu1_2 + mu2_2 + C1).*(sigma1_2 + sigma2_2 + C2))
 ssim_map = cv.divide(t3, t1) # ssim_map = t3./t1;
 mssim = cv.mean(ssim_map) # mssim = average of ssim map
 return mssim

This returns the similarity index for each channel of the image. The value is between 0 and 1, where 1 represents a perfect fit. Unfortunately, many Gaussian blurs are quite expensive, so while PSNR can work in a real-time-like environment (24 frames per second), getting similar performance results will take significantly longer than PSNR.

Therefore, the source code introduced at the beginning of this tutorial will perform PSNR measurements on every frame and SSIM measurements only on frames where the PSNR is lower than the input value. For visualization, we display both images in an OpenCV window and print the PSNR and MSSIM values to the console. Expect to see something like this:

You can see a running example on YouTube.