My goal is to write a program that'll create a point cloud based on feature points found in a monocular video (I later plan on triangulating the points, and based on camera positions, create a depth map for each frame).
Points are being found by using Lucas and Kanade's optical flow algorithm that's already implemented in OpenCV (https://docs.opencv.org/3.4/d4/dee/tutorial_optical_flow.html)
The points are being saved into a matrix "W" that's later being used to factorize it and extract the camera rotation matrix "R" and shape matrix "S". That, however comes with a problem of finding a correcting matrix, "Q" in order to solve the metric constraints (https://ecommons.cornell.edu/server/api/core/bitstreams/47872efb-b0b7-4993-9d49-7b3ea8e9f3ed/content, chapter 3.3)
I'm unsure if there's an easy and/or elegant solution to that (whether through OpenCV or another library).
#include
#include
#include "rclcpp/rclcpp.hpp"
#include
#include
#include
#include
#include
#include
#include
#include
cv::Mat lucasKanadeOF(std::string filepath)
{
auto capture = cv::VideoCapture();
capture.open(filepath);
if (!capture.isOpened())
{
std::cerr << "Can't open video: " << filepath << "\n";
return;
}
std::vector corners1, corners2;
auto frame1 = cv::Mat();
auto gray1 = cv::Mat();
if (!capture.read(frame1))
{
return;
}
cv::cvtColor(frame1, gray1, cv::COLOR_BGR2GRAY);
cv::goodFeaturesToTrack(gray1, corners1, 500, 0.01, 10);
std::cout << "Found " << corners1.size() << " initial feature points." << std::endl;
int total_frames = static_cast(capture.get(cv::CAP_PROP_FRAME_COUNT));
auto W = cv::Mat(2 * total_frames, corners1.size(), CV_32F, cv::Scalar(0));
for (size_t i = 0; i < corners1.size(); i++)
{
W.at(0, i) = corners1[i].x;
W.at(1, i) = corners1[i].y;
}
cv::Mat mask = cv::Mat::zeros(frame1.size(), frame1.type());
// used for inserting frames into W
int frame_iter = 1;
std::vector valid_point_mask(corners1.size(), true);
// main loop
for (int frm = 0; frm < total_frames; frm++)
{
if (!capture.set(cv::CAP_PROP_POS_FRAMES, frm))
break;
auto frame2 = cv::Mat();
auto gray2 = cv::Mat();
auto success = capture.read(frame2);
if (!success || frame2.empty())
{
break;
}
cv::cvtColor(frame2, gray2, cv::COLOR_BGR2GRAY);
std::vector status;
std::vector error;
auto criteria = cv::TermCriteria((cv::TermCriteria::COUNT) + (cv::TermCriteria::EPS), 10, 0.03);
cv::calcOpticalFlowPyrLK(gray1, gray2, corners1, corners2, status, error, cv::Size(15, 15), 2, criteria);
int valid_points = 0;
for (uint i = 0; i < corners1.size(); i++)
{
if (status[i] == 1)
{
W.at(2 * frame_iter, i) = corners2[i].x;
W.at(2 * frame_iter + 1, i) = corners2[i].y;
valid_points++;
cv::line(mask, corners2[i], corners1[i], cv::Scalar(0, 255, 0), 2);
cv::circle(frame2, corners2[i], 3, cv::Scalar(255, 0, 0), -1);
}
else
{
// in case points are lost, they're marked as invalid and are later on removed
valid_point_mask[i] = false;
}
}
std::cout << "Frame " << frm << ": " << valid_points << " valid tracked points." << std::endl;
auto img = cv::Mat();
cv::add(frame2, mask, img);
// Show the frame with the tracked points and lines
cv::imshow("Optical Flow", img);
if (cv::waitKey(1) == 27)
{
break;
}
gray1 = gray2.clone();
corners1 = corners2;
frame_iter++;
}
int valid_count = std::count(valid_point_mask.begin(), valid_point_mask.end(), true);
cv::Mat W_filtered = cv::Mat::zeros(W.rows, valid_count, CV_32F);
int col_idx = 0;
for (size_t i = 0; i < valid_point_mask.size(); i++)
{
if (valid_point_mask[i])
{
W.col(i).copyTo(W_filtered.col(col_idx));
col_idx++;
}
}
capture.release();
return W;
}
void tomasiKanadiFactorize(cv::Mat &W)
{
if (W.empty() || W.cols < 3)
{
std::cerr << "W matrix is empty or has too few columns." << std::endl;
return;
}
// Subtract centroid
cv::Mat W_centered = W.clone();
for (int i = 0; i < W.rows; i++)
{
cv::Scalar mean = cv::mean(W.row(i));
W_centered.row(i) -= mean[0];
}
// Reorder W matrix to have X-coordinates first, followed by Y-coordinates (it's easier to input them this way and then reorder)
cv::Mat W_reordered = cv::Mat::zeros(W.rows, W.cols, W.type());
int num_frames = W.rows / 2;
for (int i = 0; i < num_frames; i++)
{
W.row(2 * i).copyTo(W_reordered.row(i));
}
for (int i = 0; i < num_frames; i++)
{
W.row(2 * i + 1).copyTo(W_reordered.row(num_frames + i));
}
W = W_reordered.clone();
/*
W is a 2FxP matrix. If W = U*Sigma*Vt, then:
*U is a 2F*2F matrix
*Sigma is a 2F*P matrix
*Vt is a PxP matrix
*/
cv::Mat U, Sigma, Vt;
cv::SVD::compute(W, Sigma, U, Vt, cv::SVD::MODIFY_A);
std::cout << "SVD computation complete." << std::endl;
int rank = std::min(3, std::min(U.cols, Sigma.rows));
std::cout << "Selected rank: " << rank << std::endl;
// Extract top 3 components
cv::Mat U3 = U.colRange(0, rank);
cv::Mat Sigma3 = cv::Mat::diag(Sigma.rowRange(0, rank));
cv::Mat V3 = Vt.rowRange(0, rank);
// W = U3 * sqrt(Sigma3) * sqrt(Sigma3) * V3
cv::Mat Sigma3_sqrt;
cv::sqrt(Sigma3, Sigma3_sqrt);
// Potential R and S, but the decomposition is not unique, equal split may not yield them
cv::Mat R = U3 * Sigma3_sqrt;
cv::Mat S = Sigma3_sqrt * V3;
/*
(R * Q)(Q^-1 * S) = R(Q*Q^-1)S = R*S=W
"In order to find Q we observe that the rows of the true rotation matrix R are unit vectors and the first F are orthogonal to corresponding F in the second half of R. Thhese metric constraints yield the over-constrained quadratic system:
i_Tf*QQ_T*i_f = 1
j_Tf*QQ_T*j_f = 1
i_Tf*QQ_T*j_f = 0
I'm not sure how to properly perform this.
*/
return;
}
int main(int argc, char **argv)
{
// I have used a cropped version of the medusa head recording, as shown in: https://youtu.be/0BVZDyRrYtQ?t=1090
auto filepath = "test.mp4";
auto W = lucasKanadeOF(filepath);
}