ed721729fd
Add end-to-end RGB-D reconstruction support across the C++ core and Python API. - add a native merge_rgbd_views path, view-aware 3D pose containers, and nanobind bindings - expose Python helpers to sample aligned depth, apply per-joint offsets, lift UVD poses to world space, and run reconstruct_rgbd - add RGB-D regression tests for merging, manual pipeline parity, symmetric depth sampling windows, and out-of-bounds joints - bump the project version from 0.1.0 to 0.2.0 for the new feature surface
198 lines
6.4 KiB
Python
198 lines
6.4 KiB
Python
import numpy as np
|
|
|
|
import rpt
|
|
|
|
JOINT_NAMES = [
|
|
"nose",
|
|
"eye_left",
|
|
"eye_right",
|
|
"ear_left",
|
|
"ear_right",
|
|
"shoulder_left",
|
|
"shoulder_right",
|
|
"elbow_left",
|
|
"elbow_right",
|
|
"wrist_left",
|
|
"wrist_right",
|
|
"hip_left",
|
|
"hip_right",
|
|
"knee_left",
|
|
"knee_right",
|
|
"ankle_left",
|
|
"ankle_right",
|
|
"hip_middle",
|
|
"shoulder_middle",
|
|
"head",
|
|
]
|
|
|
|
|
|
def make_camera(name: str) -> rpt.Camera:
|
|
return rpt.make_camera(
|
|
name,
|
|
[[1000, 0, 0], [0, 1000, 0], [0, 0, 1]],
|
|
[0, 0, 0, 0, 0],
|
|
[[1, 0, 0], [0, 1, 0], [0, 0, 1]],
|
|
[[0], [0], [0]],
|
|
256,
|
|
256,
|
|
rpt.CameraModel.PINHOLE,
|
|
)
|
|
|
|
|
|
def make_config(num_views: int) -> rpt.TriangulationConfig:
|
|
return rpt.make_triangulation_config(
|
|
[make_camera(f"Camera {idx}") for idx in range(num_views)],
|
|
np.asarray([[10.0, 10.0, 10.0], [0.0, 0.0, 0.0]], dtype=np.float32),
|
|
JOINT_NAMES,
|
|
)
|
|
|
|
|
|
def make_body_2d() -> np.ndarray:
|
|
return np.asarray(
|
|
[
|
|
[150, 50, 1.0],
|
|
[145, 48, 1.0],
|
|
[155, 48, 1.0],
|
|
[138, 50, 1.0],
|
|
[162, 50, 1.0],
|
|
[135, 80, 1.0],
|
|
[165, 80, 1.0],
|
|
[125, 115, 1.0],
|
|
[175, 115, 1.0],
|
|
[115, 150, 1.0],
|
|
[185, 150, 1.0],
|
|
[145, 130, 1.0],
|
|
[155, 130, 1.0],
|
|
[145, 175, 1.0],
|
|
[155, 175, 1.0],
|
|
[145, 220, 1.0],
|
|
[155, 220, 1.0],
|
|
[150, 130, 1.0],
|
|
[150, 80, 1.0],
|
|
[150, 50, 1.0],
|
|
],
|
|
dtype=np.float32,
|
|
)
|
|
|
|
|
|
def test_sample_depth_for_poses_respects_person_counts_and_scores():
|
|
poses_2d = np.zeros((1, 2, 2, 3), dtype=np.float32)
|
|
poses_2d[0, 0, 0] = [5, 6, 0.8]
|
|
poses_2d[0, 0, 1] = [7, 8, 0.0]
|
|
person_counts = np.asarray([1], dtype=np.uint32)
|
|
|
|
depth_image = np.full((16, 16), 3000, dtype=np.float32)
|
|
depth_image[0, 0] = 1234
|
|
|
|
poses_uvd = rpt.sample_depth_for_poses(poses_2d, person_counts, [depth_image])
|
|
|
|
np.testing.assert_allclose(poses_uvd[0, 0, 0], [5.0, 6.0, 3000.0, 0.8], rtol=1e-6, atol=1e-6)
|
|
np.testing.assert_array_equal(poses_uvd[0, 0, 1], np.zeros((4,), dtype=np.float32))
|
|
np.testing.assert_array_equal(poses_uvd[0, 1], np.zeros((2, 4), dtype=np.float32))
|
|
|
|
|
|
def test_sample_depth_for_poses_uses_symmetric_window():
|
|
poses_2d = np.zeros((1, 1, 1, 3), dtype=np.float32)
|
|
poses_2d[0, 0, 0] = [5, 5, 1.0]
|
|
person_counts = np.asarray([1], dtype=np.uint32)
|
|
|
|
depth_image = np.zeros((16, 16), dtype=np.float32)
|
|
depth_image[5, 5] = 1000.0
|
|
depth_image[3, 5] = 5000.0
|
|
depth_image[5, 2] = 5000.0
|
|
depth_image[5, 3] = 5000.0
|
|
depth_image[5, 7] = 5000.0
|
|
depth_image[5, 8] = 5000.0
|
|
|
|
poses_uvd = rpt.sample_depth_for_poses(poses_2d, person_counts, [depth_image], window_size=3)
|
|
|
|
np.testing.assert_allclose(poses_uvd[0, 0, 0], [5.0, 5.0, 1000.0, 1.0], rtol=1e-6, atol=1e-6)
|
|
|
|
|
|
def test_sample_depth_for_poses_ignores_out_of_bounds_joints():
|
|
poses_2d = np.zeros((1, 1, 1, 3), dtype=np.float32)
|
|
poses_2d[0, 0, 0] = [99, -4, 0.7]
|
|
person_counts = np.asarray([1], dtype=np.uint32)
|
|
|
|
poses_uvd = rpt.sample_depth_for_poses(
|
|
poses_2d,
|
|
person_counts,
|
|
[np.full((16, 16), 3000, dtype=np.float32)],
|
|
)
|
|
|
|
np.testing.assert_array_equal(poses_uvd[0, 0, 0], np.zeros((4,), dtype=np.float32))
|
|
|
|
|
|
def test_apply_depth_offsets_uses_joint_name_mapping():
|
|
poses_uvd = np.zeros((1, 1, 3, 4), dtype=np.float32)
|
|
poses_uvd[0, 0, :, 2] = 3000.0
|
|
poses_uvd[0, 0, :, 3] = 1.0
|
|
|
|
adjusted = rpt.apply_depth_offsets(poses_uvd, ["nose", "shoulder_left", "unknown_joint"])
|
|
|
|
np.testing.assert_allclose(adjusted[0, 0, :, 2], [3005.0, 3030.0, 3000.0], rtol=1e-6, atol=1e-6)
|
|
np.testing.assert_allclose(poses_uvd[0, 0, :, 2], [3000.0, 3000.0, 3000.0], rtol=1e-6, atol=1e-6)
|
|
|
|
|
|
def test_lift_depth_poses_to_world_matches_camera_projection():
|
|
poses_uvd = np.zeros((1, 1, 2, 4), dtype=np.float32)
|
|
poses_uvd[0, 0, 0] = [100.0, 200.0, 3000.0, 0.9]
|
|
poses_uvd[0, 0, 1] = [0.0, 0.0, 0.0, 0.0]
|
|
|
|
lifted = rpt.lift_depth_poses_to_world(poses_uvd, [make_camera("Camera 1")])
|
|
|
|
np.testing.assert_allclose(lifted[0, 0, 0], [0.3, 0.6, 3.0, 0.9], rtol=1e-6, atol=1e-6)
|
|
np.testing.assert_array_equal(lifted[0, 0, 1], np.zeros((4,), dtype=np.float32))
|
|
|
|
|
|
def test_merge_rgbd_views_merges_identical_world_poses():
|
|
config = make_config(2)
|
|
body_2d = make_body_2d()
|
|
|
|
poses_2d = np.zeros((2, 1, len(JOINT_NAMES), 3), dtype=np.float32)
|
|
poses_2d[0, 0] = body_2d
|
|
poses_2d[1, 0] = body_2d
|
|
person_counts = np.asarray([1, 1], dtype=np.uint32)
|
|
depth_images = [np.full((256, 256), 3000, dtype=np.float32) for _ in range(2)]
|
|
|
|
poses_uvd = rpt.sample_depth_for_poses(poses_2d, person_counts, depth_images)
|
|
poses_uvd = rpt.apply_depth_offsets(poses_uvd, JOINT_NAMES)
|
|
poses_3d_by_view = rpt.lift_depth_poses_to_world(poses_uvd, config.cameras)
|
|
merged = rpt.merge_rgbd_views(poses_3d_by_view, person_counts, config)
|
|
|
|
assert merged.shape == (1, len(JOINT_NAMES), 4)
|
|
np.testing.assert_allclose(merged[0, :-1], poses_3d_by_view[0, 0, :-1], rtol=1e-5, atol=1e-5)
|
|
expected_head = (poses_3d_by_view[0, 0, 3] + poses_3d_by_view[0, 0, 4]) * 0.5
|
|
expected_head[3] = min(poses_3d_by_view[0, 0, 3, 3], poses_3d_by_view[0, 0, 4, 3])
|
|
np.testing.assert_allclose(merged[0, -1], expected_head, rtol=1e-5, atol=1e-5)
|
|
|
|
|
|
def test_reconstruct_rgbd_matches_manual_pipeline_and_single_view_person():
|
|
config = make_config(2)
|
|
body_2d = make_body_2d()
|
|
|
|
poses_2d = np.zeros((2, 1, len(JOINT_NAMES), 3), dtype=np.float32)
|
|
poses_2d[0, 0] = body_2d
|
|
person_counts = np.asarray([1, 0], dtype=np.uint32)
|
|
depth_images = [
|
|
np.full((256, 256), 3000, dtype=np.float32),
|
|
np.zeros((256, 256), dtype=np.float32),
|
|
]
|
|
|
|
manual = rpt.merge_rgbd_views(
|
|
rpt.lift_depth_poses_to_world(
|
|
rpt.apply_depth_offsets(
|
|
rpt.sample_depth_for_poses(poses_2d, person_counts, depth_images),
|
|
JOINT_NAMES,
|
|
),
|
|
config.cameras,
|
|
),
|
|
person_counts,
|
|
config,
|
|
)
|
|
reconstructed = rpt.reconstruct_rgbd(poses_2d, person_counts, depth_images, config)
|
|
|
|
assert reconstructed.shape == (1, len(JOINT_NAMES), 4)
|
|
np.testing.assert_allclose(reconstructed, manual, rtol=1e-5, atol=1e-5)
|
|
assert np.count_nonzero(reconstructed[0, :, 3] > 0.0) >= 7
|