2D feature framework – AKAZE and ORB plane tracking OpenCV v4.8.0

Previous tutorial: AKAZE local feature matching

Next tutorial: The basic concept of homology explained with code

Original author Fedor Morozov
Compatibility OpenCV >= 3.0

Introduction

In this tutorial, we will compare AKAZE and ORB local features, use them to find matching points between video frames and track object motion.

The algorithm is as follows:

  • Detect and describe keypoints in the first frame, manually set object boundaries
  • every next frame
    1. Detect and describe key points
    2. Match using brute force matcher
    3. Estimating isomorphic transformations using RANSAC
    4. Filter outliers from all matching results
    5. Apply isomorphic transformation to bounding boxes to find objects
    6. Draw bounding boxes and outliers and calculate outlier ratio as evaluation metric

Data

To do tracking we need the video and the object position on the first frame.

You can download sample videos and data here.

To run the code, you must specify an input (camera ID or video file). Then, select a bounding box with the mouse and press any key to start tracking

./planar_tracking blais.mp4

Source code

#include <opencv2/features2d.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/highgui.hpp> //for imshow
#include <vector>
#include <iostream>
#include <iomanip>
#include "stats.h" // Statistics structure definition
#include "utils.h" // Drawing and printing functions
using namespace std;
using namespace cv;
const double akaze_thresh = 3e-4; // AKAZE detection threshold set to locate approximately 1000 keypoints
const double ransac_thresh = 2.5f; // RANSAC inlier threshold
const double nn_match_ratio = 0.8f; // nearest neighbor matching ratio
const int bb_min_inliers = 100; // Minimum number of outliers for drawing bounding boxes
const int stats_update_period = 10; // On-screen statistics are updated every 10 frames
namespace example {<!-- -->
class Tracker
{<!-- -->
public:
 Tracker(Ptr<Feature2D> _detector, Ptr<DescriptorMatcher> _matcher):
 detector(_detector),
 matcher(_matcher)
 {<!-- -->}
 void setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats & amp; stats);
 Mat process(const Mat frame, Stats & amp; stats);
 Ptr<Feature2D> getDetector() {<!-- -->
 return detector;
 }
protected:
 Ptr<Feature2D> detector;
 Ptr<DescriptorMatcher> matcher;
 Mat first_frame, first_desc;
 vector<KeyPoint> first_kp;
 vector<Point2f> object_bb;
};
void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats & amp; stats)
{<!-- -->
 cv::Point *ptMask = new cv::Point[bb.size()];
 const Point* ptContain = {<!-- --> & amp;ptMask[0] };
 int iSize = static_cast<int>(bb.size());
 for (size_t i=0; i<bb.size(); i + + ) {<!-- -->
 ptMask[i].x = static_cast<int>(bb[i].x);
 ptMask[i].y = static_cast<int>(bb[i].y);
 }
 first_frame = frame.clone();
 cv::Mat matMask = cv::Mat::zeros(frame.size(), CV_8UC1);
 cv::fillPoly(matMask, & amp;ptContain, & amp;iSize, 1, cv::Scalar::all(255));
 detector->detectAndCompute(first_frame, matMask, first_kp, first_desc);
 stats.keypoints = (int)first_kp.size();
 drawBoundingBox(first_frame, bb);
 putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);
 object_bb = bb;
 delete[] ptMask;
}
Mat Tracker::process(const Mat frame, Stats & amp; stats)
{<!-- -->
 TickMeter tm;
 vector<KeyPoint> kp;
 Mat desc;
 tm.start();
 detector->detectAndCompute(frame, noArray(), kp, desc);
 stats.keypoints = (int)kp.size();
 vector< vector<DMatch> > matches;
 vector<KeyPoint> matched1, matched2;
 matcher->knnMatch(first_desc, desc, matches, 2);
 for(unsigned i = 0; i < matches.size(); i + + ) {<!-- -->
 if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) {<!-- -->
 matched1.push_back(first_kp[matches[i][0].queryIdx]);
 matched2.push_back( kp[matches[i][0].trainIdx]);
 }
 }
 stats.matches = (int)matched1.size();
 Mat inlier_mask, homography;
 vector<KeyPoint> inliers1, inliers2;
 vector<DMatch> inlier_matches;
 if(matched1.size() >= 4) {<!-- -->
 homography = findHomography(Points(matched1), Points(matched2),
 RANSAC, ransac_thresh, inlier_mask);
 }
 tm.stop();
 stats.fps = 1./tm.getTimeSec();
 if(matched1.size() < 4 || homography.empty()) {<!-- -->
 Mat res;
 hconcat(first_frame, frame, res);
 stats.inliers = 0;
 stats.ratio = 0;
 return res;
 }
 for(unsigned i = 0; i < matched1.size(); i + + ) {<!-- -->
 if(inlier_mask.at<uchar>(i)) {<!-- -->
 int new_i = static_cast<int>(inliers1.size());
 inliers1.push_back(matched1[i]);
 inliers2.push_back(matched2[i]);
 inlier_matches.push_back(DMatch(new_i, new_i, 0));
 }
 }
 stats.inliers = (int)inliers1.size();
 stats.ratio = stats.inliers * 1.0 / stats.matches;
 vector<Point2f> new_bb;
 perspectiveTransform(object_bb, new_bb, homography);
 Mat frame_with_bb = frame.clone();
 if(stats.inliers >= bb_min_inliers) {<!-- -->
 drawBoundingBox(frame_with_bb, new_bb);
 }
 Mat res;
 drawMatches(first_frame, inliers1, frame_with_bb, inliers2,
 inlier_matches, res,
 Scalar(255, 0, 0), Scalar(255, 0, 0));
 return res;
}
}
int main(int argc, char **argv)
{<!-- -->
 CommandLineParser parser(argc, argv, "{@input_path |0|input path can be a camera id, like 0,1,2 or a video filename}");
 parser.printMessage();
 string input_path = parser.get<string>(0);
 string video_name = input_path;
 VideoCapture video_in;
 if ( ( isdigit(input_path[0]) & amp; & amp; input_path.size() == 1 ) )
 {<!-- -->
 int camera_no = input_path[0] - '0';
 video_in.open(camera_no);
 }
 else {<!-- -->
 video_in.open(video_name);
 }
 if(!video_in.isOpened()) {<!-- -->
 cerr << "Couldn't open " << video_name << endl;
 return 1;
 }
 Stats stats, akaze_stats, orb_stats;
 Ptr<AKAZE> akaze = AKAZE::create();
 akaze->setThreshold(akaze_thresh);
 Ptr<ORB> orb = ORB::create();
 Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-Hamming");
 example::Tracker akaze_tracker(akaze, matcher);
 example::Tracker orb_tracker(orb, matcher);
 Mat frame;
 namedWindow(video_name, WINDOW_NORMAL);
 cout << "\\
Press any key to stop the video and select a bounding box" << endl;
 while (waitKey(1) < 1)
 {<!-- -->
 video_in >> frame;
 cv::resizeWindow(video_name, frame.size());
 imshow(video_name, frame);
 }
 vector<Point2f>bb;
 cv::Rect uBox = cv::selectROI(video_name, frame);
 bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y)));
 bb.push_back(cv::Point2f(static_cast<float>(uBox.x + uBox.width), static_cast<float>(uBox.y)));
 bb.push_back(cv::Point2f(static_cast<float>(uBox.x + uBox.width), static_cast<float>(uBox.y + uBox.height)));
 bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y + uBox.height)));
 akaze_tracker.setFirstFrame(frame, bb, "AKAZE", stats);
 orb_tracker.setFirstFrame(frame, bb, "ORB", stats);
 Stats akaze_draw_stats, orb_draw_stats;
 Mat akaze_res, orb_res, res_frame;
 int i = 0;
 for(;;) {<!-- -->
 i + + ;
 bool update_stats = (i % stats_update_period == 0);
 video_in >> frame;
 // If there are no more images, stop the program
 if(frame.empty()) break;
 akaze_res = akaze_tracker.process(frame, stats);
 akaze_stats + = stats;
 if(update_stats) {<!-- -->
 akaze_draw_stats = stats;
 }
 orb->setMaxFeatures(stats.keypoints);
 orb_res = orb_tracker.process(frame, stats);
 orb_stats + = stats;
 if(update_stats) {<!-- -->
 orb_draw_stats = stats;
 }
 drawStatistics(akaze_res, akaze_draw_stats);
 drawStatistics(orb_res, orb_draw_stats);
 vconcat(akaze_res, orb_res, res_frame);
 cv::imshow(video_name, res_frame);
 if(waitKey(1)==27) break; //Press ESC key to exit
 }
 akaze_stats /= i - 1;
 orb_stats /= i - 1;
 printStatistics("AKAZE", akaze_stats);
 printStatistics("ORB", orb_stats);
 return 0;
}

Description

Tracker class

This class implements the algorithm described above using the given feature detector and descriptor matcher.

  • Set first frame
void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats & amp; stats)
{<!-- -->
 first_frame = frame.clone();
 (*detector)(first_frame, noArray(), first_kp, first_desc);
 stats.keypoints = (int)first_kp.size();
 drawBoundingBox(first_frame, bb);
 putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);
 object_bb = bb;
}

We compute and store the keypoints and descriptors of the first frame ready for output.

We need to save the number of detected keypoints to ensure that both detectors find approximately the same number of keypoints.

  • Processing frames
  1. Locate keypoints and compute descriptors
(*detector)(frame, noArray(), kp, desc);

To find the matching points between frames, we must first find the key points.

In this tutorial, the detector is set up to find approximately 1000 keypoints on each frame.

  1. Find correspondences using 2-nn matcher
matcher->knnMatch(first_desc, desc, matches, 2);
for(unsigned i = 0; i < matches.size(); i + + ) {<!-- -->
 if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) {<!-- -->
 matched1.push_back( first_kp[matches[i][0].queryIdx]);
 matched2.push_back(kp[matches[i][0].trainIdx]);
 }
}

The closest match is a match if it is closer to nn_match_ratio than the second closest match.

  1. Estimating homology transformations using RANSAC
homography = findHomography(Points(matched1), Points(matched2)、
 RANSAC, ransac_thresh, inlier_mask);

If there are at least 4 matching points, we can use random sample consensus to estimate the image transformation.

  1. Save outliers
for(unsigned i = 0; i < matched1.size(); i + + ) {<!-- -->
 if(inlier_mask.at<uchar>(i)) {<!-- -->
 int new_i = static_cast<int>(inliers1.size());
 inliers1.push_back(matched1[i]);
 inliers2.push_back(matched2[i]);
 inlier_matches.push_back(DMatch(new_i, new_i, 0));
 }
}

Since findHomography counts outliers, we only need to save the selected points and matching values.

  1. Item object bounding box
perspectiveTransform(object_bb, new_bb, homography);

If there are a reasonable number of outliers, we can use the estimated transformation to locate the object.

Results

You can watch the results video on youtube.

AKAZE Statistics:

Matches 626
Inliers 410
Inlier ratio 0.58
Keypoints 1117

ORB statistics:

Matches 504
Inliers 319
Inlier ratio 0.56
Keypoints 1112