Faster camera matrix undistortion.

This commit is contained in:
Daniel
2025-03-03 11:01:50 +01:00
parent 2e1d401dd4
commit 7b99a38ce2
3 changed files with 81 additions and 77 deletions

View File

@ -294,26 +294,26 @@ Results of the model in various experiments on different datasets. \
```json ```json
{ {
"img_loading": 0.0424103, "img_loading": 0.0419109,
"demosaicing": 0.000724716, "demosaicing": 0.000697378,
"avg_time_2d": 0.01494, "avg_time_2d": 0.0148942,
"avg_time_3d": 0.000128772, "avg_time_3d": 0.000115663,
"fps": 63.3173 "fps": 63.6649
} }
{ {
"triangulator_calls": 301, "triangulator_calls": 301,
"init_time": 1.60891e-06, "init_time": 2.90153e-06,
"undistort_time": 2.57178e-05, "undistort_time": 1.41506e-05,
"project_time": 2.22848e-06, "project_time": 2.27745e-06,
"match_time": 8.41567e-06, "match_time": 8.70995e-06,
"pairs_time": 4.53139e-06, "pairs_time": 4.43637e-06,
"pair_scoring_time": 2.67118e-05, "pair_scoring_time": 2.61534e-05,
"grouping_time": 4.63213e-06, "grouping_time": 4.53385e-06,
"full_time": 2.72313e-05, "full_time": 2.64184e-05,
"merge_time": 1.03292e-05, "merge_time": 1.0161e-05,
"post_time": 7.36791e-06, "post_time": 6.56943e-06,
"convert_time": 1.27439e-07, "convert_time": 1.31748e-07,
"total_time": 0.00011914 "total_time": 0.000106678
} }
{ {
"person_nums": { "person_nums": {
@ -332,10 +332,10 @@ Results of the model in various experiments on different datasets. \
}, },
"mpjpe": { "mpjpe": {
"count": 477, "count": 477,
"mean": 0.047983, "mean": 0.048001,
"median": 0.042569, "median": 0.042569,
"std": 0.01486, "std": 0.014925,
"sem": 0.000681, "sem": 0.000684,
"min": 0.03012, "min": 0.03012,
"max": 0.116311, "max": 0.116311,
"recall-0.025": 0.0, "recall-0.025": 0.0,
@ -346,11 +346,11 @@ Results of the model in various experiments on different datasets. \
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 477, "num_labels": 477,
"ap-0.025": 0.0, "ap-0.025": 0.0,
"ap-0.05": 0.389102, "ap-0.05": 0.389107,
"ap-0.1": 0.729848, "ap-0.1": 0.731078,
"ap-0.15": 0.747198, "ap-0.15": 0.74682,
"ap-0.25": 0.747198, "ap-0.25": 0.74682,
"ap-0.5": 0.747198 "ap-0.5": 0.74682
}, },
"head": { "head": {
"count": 477, "count": 477,
@ -390,7 +390,7 @@ Results of the model in various experiments on different datasets. \
"median": 0.045796, "median": 0.045796,
"std": 0.02312, "std": 0.02312,
"sem": 0.00106, "sem": 0.00106,
"min": 0.005349, "min": 0.005348,
"max": 0.147448, "max": 0.147448,
"recall-0.025": 0.100629, "recall-0.025": 0.100629,
"recall-0.05": 0.561845, "recall-0.05": 0.561845,
@ -450,26 +450,26 @@ Results of the model in various experiments on different datasets. \
}, },
"wrist_right": { "wrist_right": {
"count": 477, "count": 477,
"mean": 0.059177, "mean": 0.059427,
"median": 0.054405, "median": 0.054405,
"std": 0.033566, "std": 0.034224,
"sem": 0.001538, "sem": 0.001569,
"min": 0.009618, "min": 0.009618,
"max": 0.371666, "max": 0.371666,
"recall-0.025": 0.115304, "recall-0.025": 0.113208,
"recall-0.05": 0.415094, "recall-0.05": 0.415094,
"recall-0.1": 0.899371, "recall-0.1": 0.899371,
"recall-0.15": 0.981132, "recall-0.15": 0.979036,
"recall-0.25": 0.997904, "recall-0.25": 0.997904,
"recall-0.5": 1.0, "recall-0.5": 1.0,
"num_labels": 477 "num_labels": 477
}, },
"hip_left": { "hip_left": {
"count": 477, "count": 477,
"mean": 0.048042, "mean": 0.048028,
"median": 0.042252, "median": 0.042252,
"std": 0.026486, "std": 0.026459,
"sem": 0.001214, "sem": 0.001213,
"min": 0.006475, "min": 0.006475,
"max": 0.145904, "max": 0.145904,
"recall-0.025": 0.190776, "recall-0.025": 0.190776,
@ -562,7 +562,7 @@ Results of the model in various experiments on different datasets. \
}, },
"joint_recalls": { "joint_recalls": {
"num_labels": 6201, "num_labels": 6201,
"recall-0.025": 0.21158, "recall-0.025": 0.21142,
"recall-0.05": 0.61538, "recall-0.05": 0.61538,
"recall-0.1": 0.94275, "recall-0.1": 0.94275,
"recall-0.15": 0.98645, "recall-0.15": 0.98645,

View File

@ -85,13 +85,23 @@
CameraInternal::CameraInternal(const Camera &cam) CameraInternal::CameraInternal(const Camera &cam)
{ {
this->cam = cam; this->cam = cam;
this->invK = invert3x3(cam.K);
this->invR = transpose3x3(cam.R); this->invR = transpose3x3(cam.R);
// Camera center: // Camera center:
// C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T // C = -(Rᵀ * t) = -(Rᵀ * (R * (T * -1))) = -(Rᵀ * (R * -T)) = -(Rᵀ * -R * T) = -(-T) = T
this->center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]}; this->center = {cam.T[0][0], cam.T[1][0], cam.T[2][0]};
// Undistort camera matrix
// As with the undistortion, the own implementation avoids some overhead compared to OpenCV
if (cam.type == "fisheye")
{
newK = calc_optimal_camera_matrix_fisheye(1.0, {cam.width, cam.height});
}
else
{
newK = calc_optimal_camera_matrix_pinhole(1.0, {cam.width, cam.height});
}
this->invK = invert3x3(newK);
} }
// ================================================================================================= // =================================================================================================
@ -366,13 +376,19 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_p
} }
// Define key points // Define key points
const size_t N = 9; // Calculate only the contour points of the image, and use less points,
// the edges and centers should be enough if the camera has no strange distortions
const size_t N = 3;
std::vector<std ::array<float, 2>> pts; std::vector<std ::array<float, 2>> pts;
pts.reserve(N * N); pts.reserve(4 * (N - 1));
for (size_t y = 0; y < N; ++y) for (size_t y = 0; y < N; ++y)
{ {
for (size_t x = 0; x < N; ++x) for (size_t x = 0; x < N; ++x)
{ {
if (x != 0 && x != N - 1 && y != 0 && y != N - 1)
{
continue;
}
pts.push_back({x * (w - 1) / (N - 1), y * (h - 1) / (N - 1)}); pts.push_back({x * (w - 1) / (N - 1), y * (h - 1) / (N - 1)});
} }
} }
@ -406,22 +422,34 @@ std::array<std::array<float, 3>, 3> CameraInternal::calc_optimal_camera_matrix_p
{ {
for (size_t x = 0; x < N; ++x) for (size_t x = 0; x < N; ++x)
{ {
if (x != 0 && x != N - 1 && y != 0 && y != N - 1)
{
continue;
}
auto &pt = pts[k]; auto &pt = pts[k];
k += 1; k += 1;
oX0 = std::min(oX0, pt[0]);
oX1 = std::max(oX1, pt[0]);
oY0 = std::min(oY0, pt[1]);
oY1 = std::max(oY1, pt[1]);
if (x == 0) if (x == 0)
{
oX0 = std::min(oX0, pt[0]);
iX0 = std::max(iX0, pt[0]); iX0 = std::max(iX0, pt[0]);
}
if (x == N - 1) if (x == N - 1)
{
oX1 = std::max(oX1, pt[0]);
iX1 = std::min(iX1, pt[0]); iX1 = std::min(iX1, pt[0]);
}
if (y == 0) if (y == 0)
{
oY0 = std::min(oY0, pt[1]);
iY0 = std::max(iY0, pt[1]); iY0 = std::max(iY0, pt[1]);
}
if (y == N - 1) if (y == N - 1)
{
oY1 = std::max(oY1, pt[1]);
iY1 = std::min(iY1, pt[1]); iY1 = std::min(iY1, pt[1]);
}
} }
} }
float inner_width = iX1 - iX0; float inner_width = iX1 - iX0;
@ -916,29 +944,14 @@ void TriangulatorInternal::print_stats()
void TriangulatorInternal::undistort_poses( void TriangulatorInternal::undistort_poses(
std::vector<std::vector<std::array<float, 3>>> &poses_2d, CameraInternal &icam) std::vector<std::vector<std::array<float, 3>>> &poses_2d, CameraInternal &icam)
{ {
int width = icam.cam.width;
int height = icam.cam.height;
// Undistort camera matrix
// As with the undistortion, the own implementation avoids some overhead compared to OpenCV
std::array<std::array<float, 3>, 3> newK;
if (icam.cam.type == "fisheye")
{
newK = icam.calc_optimal_camera_matrix_fisheye(1.0, {width, height});
}
else
{
newK = icam.calc_optimal_camera_matrix_pinhole(1.0, {width, height});
}
float ifx_old = 1.0 / icam.cam.K[0][0]; float ifx_old = 1.0 / icam.cam.K[0][0];
float ify_old = 1.0 / icam.cam.K[1][1]; float ify_old = 1.0 / icam.cam.K[1][1];
float cx_old = icam.cam.K[0][2]; float cx_old = icam.cam.K[0][2];
float cy_old = icam.cam.K[1][2]; float cy_old = icam.cam.K[1][2];
float fx_new = newK[0][0]; float fx_new = icam.newK[0][0];
float fy_new = newK[1][1]; float fy_new = icam.newK[1][1];
float cx_new = newK[0][2]; float cx_new = icam.newK[0][2];
float cy_new = newK[1][2]; float cy_new = icam.newK[1][2];
// Undistort all the points // Undistort all the points
size_t num_persons = poses_2d.size(); size_t num_persons = poses_2d.size();
@ -971,6 +984,8 @@ void TriangulatorInternal::undistort_poses(
} }
// Mask out points that are far outside the image (points slightly outside are still valid) // Mask out points that are far outside the image (points slightly outside are still valid)
int width = icam.cam.width;
int height = icam.cam.height;
float mask_offset = (width + height) / 20.0; float mask_offset = (width + height) / 20.0;
for (size_t i = 0; i < num_persons; ++i) for (size_t i = 0; i < num_persons; ++i)
{ {
@ -985,18 +1000,6 @@ void TriangulatorInternal::undistort_poses(
} }
} }
} }
// Update the camera intrinsics
icam.cam.K = newK;
icam.invK = CameraInternal::invert3x3(newK);
if (icam.cam.type == "fisheye")
{
icam.cam.DC = {0.0, 0.0, 0.0, 0.0};
}
else
{
icam.cam.DC = {0.0, 0.0, 0.0, 0.0, 0.0};
}
} }
// ================================================================================================= // =================================================================================================
@ -1017,7 +1020,7 @@ TriangulatorInternal::project_poses(
all_dists.resize(num_persons); all_dists.resize(num_persons);
// Get camera parameters // Get camera parameters
const std::array<std::array<float, 3>, 3> &K = icam.cam.K; const std::array<std::array<float, 3>, 3> &K = icam.newK;
const std::array<std::array<float, 3>, 3> &R = icam.cam.R; const std::array<std::array<float, 3>, 3> &R = icam.cam.R;
const std::array<std::array<float, 1>, 3> &T = icam.cam.T; const std::array<std::array<float, 1>, 3> &T = icam.cam.T;

View File

@ -15,9 +15,10 @@ public:
Camera cam; Camera cam;
std::array<std::array<float, 3>, 3> invK;
std::array<std::array<float, 3>, 3> invR; std::array<std::array<float, 3>, 3> invR;
std::array<float, 3> center; std::array<float, 3> center;
std::array<std::array<float, 3>, 3> newK;
std::array<std::array<float, 3>, 3> invK;
static std::array<std::array<float, 3>, 3> transpose3x3( static std::array<std::array<float, 3>, 3> transpose3x3(
const std::array<std::array<float, 3>, 3> &M); const std::array<std::array<float, 3>, 3> &M);