• Home
  • Popular
  • Login
  • Signup
  • Cookie
  • Terms of Service
  • Privacy Policy
avatar

Posted by User Bot


26 Mar, 2025

Updated at 18 May, 2025

Structure from Motion - how to find the correction matrix

My goal is to write a program that'll create a point cloud based on feature points found in a monocular video (I later plan on triangulating the points, and based on camera positions, create a depth map for each frame).

Points are being found by using Lucas and Kanade's optical flow algorithm that's already implemented in OpenCV (https://docs.opencv.org/3.4/d4/dee/tutorial_optical_flow.html)

The points are being saved into a matrix "W" that's later being used to factorize it and extract the camera rotation matrix "R" and shape matrix "S". That, however comes with a problem of finding a correcting matrix, "Q" in order to solve the metric constraints (https://ecommons.cornell.edu/server/api/core/bitstreams/47872efb-b0b7-4993-9d49-7b3ea8e9f3ed/content, chapter 3.3)

I'm unsure if there's an easy and/or elegant solution to that (whether through OpenCV or another library).

#include 
#include 
#include "rclcpp/rclcpp.hpp"
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

cv::Mat lucasKanadeOF(std::string filepath)
{
  auto capture = cv::VideoCapture();
  capture.open(filepath);
  if (!capture.isOpened())
  {
    std::cerr << "Can't open video: " << filepath << "\n";
    return;
  }

  std::vector corners1, corners2;
  auto frame1 = cv::Mat();
  auto gray1 = cv::Mat();

  if (!capture.read(frame1))
  {
    return;
  }

  cv::cvtColor(frame1, gray1, cv::COLOR_BGR2GRAY);
  cv::goodFeaturesToTrack(gray1, corners1, 500, 0.01, 10);

  std::cout << "Found " << corners1.size() << " initial feature points." << std::endl;

  int total_frames = static_cast(capture.get(cv::CAP_PROP_FRAME_COUNT));

  auto W = cv::Mat(2 * total_frames, corners1.size(), CV_32F, cv::Scalar(0));
  for (size_t i = 0; i < corners1.size(); i++)
  {
    W.at(0, i) = corners1[i].x;
    W.at(1, i) = corners1[i].y;
  }

  cv::Mat mask = cv::Mat::zeros(frame1.size(), frame1.type());

  // used for inserting frames into W
  int frame_iter = 1;

  std::vector valid_point_mask(corners1.size(), true);

  // main loop
  for (int frm = 0; frm < total_frames; frm++)
  {
    if (!capture.set(cv::CAP_PROP_POS_FRAMES, frm))
      break;

    auto frame2 = cv::Mat();
    auto gray2 = cv::Mat();
    auto success = capture.read(frame2);

    if (!success || frame2.empty())
    {
      break;
    }

    cv::cvtColor(frame2, gray2, cv::COLOR_BGR2GRAY);

    std::vector status;
    std::vector error;
    auto criteria = cv::TermCriteria((cv::TermCriteria::COUNT) + (cv::TermCriteria::EPS), 10, 0.03);

    cv::calcOpticalFlowPyrLK(gray1, gray2, corners1, corners2, status, error, cv::Size(15, 15), 2, criteria);

    int valid_points = 0;
    for (uint i = 0; i < corners1.size(); i++)
    {
      if (status[i] == 1)
      {
        W.at(2 * frame_iter, i) = corners2[i].x;
        W.at(2 * frame_iter + 1, i) = corners2[i].y;
        valid_points++;

        cv::line(mask, corners2[i], corners1[i], cv::Scalar(0, 255, 0), 2);
        cv::circle(frame2, corners2[i], 3, cv::Scalar(255, 0, 0), -1);
      }
      else
      {
        // in case points are lost, they're marked as invalid and are later on removed
        valid_point_mask[i] = false;
      }
    }

    std::cout << "Frame " << frm << ": " << valid_points << " valid tracked points." << std::endl;

    auto img = cv::Mat();

    cv::add(frame2, mask, img);

    // Show the frame with the tracked points and lines
    cv::imshow("Optical Flow", img);
    if (cv::waitKey(1) == 27)
    {
      break;
    }

    gray1 = gray2.clone();
    corners1 = corners2;
    frame_iter++;
  }

  int valid_count = std::count(valid_point_mask.begin(), valid_point_mask.end(), true);
  cv::Mat W_filtered = cv::Mat::zeros(W.rows, valid_count, CV_32F);

  int col_idx = 0;
  for (size_t i = 0; i < valid_point_mask.size(); i++)
  {
    if (valid_point_mask[i])
    {
      W.col(i).copyTo(W_filtered.col(col_idx));
      col_idx++;
    }
  }

  capture.release();
  return W;
}

void tomasiKanadiFactorize(cv::Mat &W)
{
  if (W.empty() || W.cols < 3)
  {
    std::cerr << "W matrix is empty or has too few columns." << std::endl;
    return;
  }

  // Subtract centroid
  cv::Mat W_centered = W.clone();
  for (int i = 0; i < W.rows; i++)
  {
    cv::Scalar mean = cv::mean(W.row(i));
    W_centered.row(i) -= mean[0];
  }

  // Reorder W matrix to have X-coordinates first, followed by Y-coordinates (it's easier to input them this way and then reorder)
  cv::Mat W_reordered = cv::Mat::zeros(W.rows, W.cols, W.type());

  int num_frames = W.rows / 2;

  for (int i = 0; i < num_frames; i++)
  {
    W.row(2 * i).copyTo(W_reordered.row(i));
  }

  for (int i = 0; i < num_frames; i++)
  {
    W.row(2 * i + 1).copyTo(W_reordered.row(num_frames + i));
  }
  W = W_reordered.clone();

  /*
  W is a 2FxP matrix. If W = U*Sigma*Vt, then:
    *U is a 2F*2F matrix
    *Sigma is a 2F*P matrix
    *Vt is a PxP matrix
  */

  cv::Mat U, Sigma, Vt;
  cv::SVD::compute(W, Sigma, U, Vt, cv::SVD::MODIFY_A);
  std::cout << "SVD computation complete." << std::endl;

  int rank = std::min(3, std::min(U.cols, Sigma.rows));
  std::cout << "Selected rank: " << rank << std::endl;

  // Extract top 3 components
  cv::Mat U3 = U.colRange(0, rank);
  cv::Mat Sigma3 = cv::Mat::diag(Sigma.rowRange(0, rank));
  cv::Mat V3 = Vt.rowRange(0, rank);



  // W = U3 * sqrt(Sigma3) * sqrt(Sigma3) * V3
  cv::Mat Sigma3_sqrt;
  cv::sqrt(Sigma3, Sigma3_sqrt);

  // Potential R and S, but the decomposition is not unique, equal split may not yield them
  cv::Mat R = U3 * Sigma3_sqrt;
  cv::Mat S = Sigma3_sqrt * V3;

  /*
     (R * Q)(Q^-1 * S) = R(Q*Q^-1)S = R*S=W


     "In order to find Q we observe that the rows of the true rotation matrix R are unit vectors and the first F are orthogonal to corresponding F in the second half of R. Thhese metric constraints yield the over-constrained quadratic system: 
    i_Tf*QQ_T*i_f = 1
    j_Tf*QQ_T*j_f = 1
    i_Tf*QQ_T*j_f = 0

    I'm not sure how to properly perform this.
  */

  return;
}

int main(int argc, char **argv)
{
  // I have used a cropped version of the medusa head recording, as shown in: https://youtu.be/0BVZDyRrYtQ?t=1090 
  auto filepath = "test.mp4";
  auto W = lucasKanadeOF(filepath);
}