Files
zed-playground/workspaces/py_workspace/compare_pose_sets.py
T
2026-03-06 17:17:59 +08:00

503 lines
15 KiB
Python

#!/usr/bin/env python3
"""
Compare two camera pose sets from different world frames using rigid alignment.
Assumes both pose sets are in world_from_cam convention.
"""
import json
import sys
from pathlib import Path
from typing import Final
import click
import numpy as np
import plotly.graph_objects as go
def parse_pose(pose_str: str, context: str = "") -> np.ndarray:
vals = [float(x) for x in pose_str.split()]
if len(vals) != 16:
raise ValueError(f"[{context}] Expected 16 values for pose, got {len(vals)}")
pose = np.array(vals).reshape((4, 4))
# Validate transformation matrix properties
# 1. Last row check [0, 0, 0, 1]
last_row = pose[3, :]
expected_last_row = np.array([0, 0, 0, 1], dtype=float)
if not np.allclose(last_row, expected_last_row, atol=1e-5):
raise ValueError(
f"[{context}] Invalid last row in transformation matrix: {last_row}. "
f"Expected [0, 0, 0, 1]"
)
# 2. Rotation block orthonormality
R = pose[:3, :3]
# R @ R.T approx I
identity_check = R @ R.T
if not np.allclose(identity_check, np.eye(3), atol=1e-3):
raise ValueError(
f"[{context}] Rotation block is not orthonormal (R @ R.T != I)."
)
# 3. Determinant check det(R) approx 1
det = np.linalg.det(R)
if not np.allclose(det, 1.0, atol=1e-3):
raise ValueError(
f"[{context}] Rotation block determinant is {det:.6f}, expected 1.0 (improper rotation or scaling)."
)
return pose
def load_poses_from_json(path: str) -> dict[str, np.ndarray]:
"""
Heuristically load poses from a JSON file.
Supports:
1) flat: {"serial": {"pose": "..."}}
2) nested Fusion: {"serial": {"FusionConfiguration": {"pose": "..."}}}
"""
with open(path, "r") as f:
data = json.load(f)
poses: dict[str, np.ndarray] = {}
for serial, entry in data.items():
if not isinstance(entry, dict):
continue
context = f"File: {path}, Serial: {serial}"
# Check nested FusionConfiguration first
if "FusionConfiguration" in entry and isinstance(
entry["FusionConfiguration"], dict
):
if "pose" in entry["FusionConfiguration"]:
poses[str(serial)] = parse_pose(
entry["FusionConfiguration"]["pose"], context=context
)
# Then check flat
elif "pose" in entry:
poses[str(serial)] = parse_pose(entry["pose"], context=context)
if not poses:
raise click.UsageError(
f"No parsable poses found in {path}.\n"
"Expected formats:\n"
' 1) Flat: {"serial": {"pose": "..."}}\n'
' 2) Nested: {"serial": {"FusionConfiguration": {"pose": "..."}}}'
)
return poses
def serialize_pose(pose: np.ndarray) -> str:
return " ".join(f"{x:.6f}" for x in pose.flatten())
def rigid_transform_3d(A: np.ndarray, B: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""
Find rigid alignment (R, t) such that R*A + t approx B.
A, B are (N, 3) arrays of points.
Uses Kabsch algorithm.
"""
assert A.shape == B.shape
centroid_A = np.mean(A, axis=0)
centroid_B = np.mean(B, axis=0)
AA = A - centroid_A
BB = B - centroid_B
H = AA.T @ BB
U, S, Vt = np.linalg.svd(H)
R_mat = Vt.T @ U.T
if np.linalg.det(R_mat) < 0:
Vt[2, :] *= -1
R_mat = Vt.T @ U.T
t = centroid_B - R_mat @ centroid_A
return R_mat, t
def get_camera_center(pose: np.ndarray) -> np.ndarray:
return pose[:3, 3]
def get_camera_up(pose: np.ndarray) -> np.ndarray:
# In CV convention, Y is down, so -Y is up.
# R is [x_axis, y_axis, z_axis]
return -pose[:3, 1]
def rotation_error_deg(R1: np.ndarray, R2: np.ndarray) -> float:
R_rel = R1.T @ R2
cos_theta = (np.trace(R_rel) - 1.0) / 2.0
cos_theta = np.clip(cos_theta, -1.0, 1.0)
return np.degrees(np.arccos(cos_theta))
def angle_between_vectors_deg(v1: np.ndarray, v2: np.ndarray) -> float:
v1_u = v1 / np.linalg.norm(v1)
v2_u = v2 / np.linalg.norm(v2)
cos_theta = np.dot(v1_u, v2_u)
cos_theta = np.clip(cos_theta, -1.0, 1.0)
return np.degrees(np.arccos(cos_theta))
def add_camera_trace(
fig: go.Figure,
pose: np.ndarray,
label: str,
scale: float = 0.2,
frustum_scale: float = 0.5,
fov_deg: float = 60.0,
color: str = "blue",
):
"""
Adds a camera frustum and axes to the Plotly figure.
"""
R = pose[:3, :3]
center = pose[:3, 3]
# OpenCV convention: X right, Y down, Z forward
x_axis_local = np.array([1, 0, 0])
y_axis_local = np.array([0, 1, 0])
z_axis_local = np.array([0, 0, 1])
# Transform local axes to world
x_axis_world = R @ x_axis_local
y_axis_world = R @ y_axis_local
z_axis_world = R @ z_axis_local
# Frustum points in local coordinates
fov_rad = np.radians(fov_deg)
w = frustum_scale * np.tan(fov_rad / 2.0)
h = w * 0.75 # 4:3 aspect ratio assumption
pts_local = np.array(
[
[0, 0, 0], # Center
[-w, -h, frustum_scale], # Top-Left
[w, -h, frustum_scale], # Top-Right
[w, h, frustum_scale], # Bottom-Right
[-w, h, frustum_scale], # Bottom-Left
]
)
# Transform frustum to world
pts_world = (R @ pts_local.T).T + center
# Create lines for frustum
x_lines, y_lines, z_lines = [], [], []
def add_line(i, j):
x_lines.extend([pts_world[i, 0], pts_world[j, 0], None])
y_lines.extend([pts_world[i, 1], pts_world[j, 1], None])
z_lines.extend([pts_world[i, 2], pts_world[j, 2], None])
for i in range(1, 5):
add_line(0, i)
add_line(1, 2)
add_line(2, 3)
add_line(3, 4)
add_line(4, 1)
fig.add_trace(
go.Scatter3d(
x=x_lines,
y=y_lines,
z=z_lines,
mode="lines",
line=dict(color=color, width=2),
name=f"{label} Frustum",
showlegend=False,
hoverinfo="skip",
)
)
# Add center point with label
fig.add_trace(
go.Scatter3d(
x=[center[0]],
y=[center[1]],
z=[center[2]],
mode="markers+text",
marker=dict(size=4, color="black"),
text=[label],
textposition="top center",
name=label,
showlegend=True,
)
)
# Add axes (RGB = XYZ)
for axis_world, axis_color in zip(
[x_axis_world, y_axis_world, z_axis_world], ["red", "green", "blue"]
):
end = center + axis_world * scale
fig.add_trace(
go.Scatter3d(
x=[center[0], end[0]],
y=[center[1], end[1]],
z=[center[2], end[2]],
mode="lines",
line=dict(color=axis_color, width=3),
showlegend=False,
hoverinfo="skip",
)
)
@click.command()
@click.option(
"--pose-a-json",
type=click.Path(exists=True),
required=True,
help="Pose set A. Supports flat {'serial': {'pose': '...'}} or nested FusionConfiguration format.",
)
@click.option(
"--pose-b-json",
type=click.Path(exists=True),
required=True,
help="Pose set B. Supports flat {'serial': {'pose': '...'}} or nested FusionConfiguration format.",
)
@click.option(
"--report-json",
type=click.Path(),
required=True,
help="Output path for comparison report",
)
@click.option(
"--aligned-pose-b-json",
type=click.Path(),
help="Output path for aligned pose B set",
)
@click.option(
"--plot-output",
type=click.Path(),
help="Output path for visualization (HTML or PNG)",
)
@click.option(
"--show-plot",
is_flag=True,
default=False,
help="Show the plot interactively",
)
@click.option(
"--frustum-scale",
type=float,
default=0.3,
help="Scale of the camera frustum",
)
@click.option(
"--axis-scale",
type=float,
default=0.1,
help="Scale of the camera axes",
)
def main(
pose_a_json: str,
pose_b_json: str,
report_json: str,
aligned_pose_b_json: str | None,
plot_output: str | None,
show_plot: bool,
frustum_scale: float,
axis_scale: float,
):
"""
Compare two camera pose sets from different world frames using rigid alignment.
Both are treated as T_world_from_cam.
Supports symmetric, heuristic input parsing for both A and B:
1) flat: {"serial": {"pose": "..."}}
2) nested Fusion: {"serial": {"FusionConfiguration": {"pose": "..."}}}
"""
poses_a = load_poses_from_json(pose_a_json)
poses_b = load_poses_from_json(pose_b_json)
shared_serials = sorted(list(set(poses_a.keys()) & set(poses_b.keys())))
if len(shared_serials) < 3:
click.echo(
f"Error: Found only {len(shared_serials)} shared serials ({shared_serials}). Need at least 3.",
err=True,
)
sys.exit(1)
pts_b = np.array([get_camera_center(poses_b[s]) for s in shared_serials])
pts_a = np.array([get_camera_center(poses_a[s]) for s in shared_serials])
# Align B to A: R_align * pts_b + t_align approx pts_a
R_align, t_align = rigid_transform_3d(pts_b, pts_a)
T_align = np.eye(4)
T_align[:3, :3] = R_align
T_align[:3, 3] = t_align
per_cam_results = []
pos_errors = []
rot_errors = []
up_errors = []
for s in shared_serials:
T_b = poses_b[s]
T_a = poses_a[s]
# T_world_a_from_cam = T_world_a_from_world_b * T_world_b_from_cam
T_b_aligned = T_align @ T_b
pos_err = np.linalg.norm(
get_camera_center(T_b_aligned) - get_camera_center(T_a)
)
rot_err = rotation_error_deg(T_b_aligned[:3, :3], T_a[:3, :3])
up_b = get_camera_up(T_b_aligned)
up_a = get_camera_up(T_a)
up_err = angle_between_vectors_deg(up_b, up_a)
per_cam_results.append(
{
"serial": s,
"position_error_m": float(pos_err),
"rotation_error_deg": float(rot_err),
"up_consistency_error_deg": float(up_err),
}
)
pos_errors.append(pos_err)
rot_errors.append(rot_err)
up_errors.append(up_err)
report = {
"shared_serials": shared_serials,
"alignment": {
"R_align": R_align.tolist(),
"t_align": t_align.tolist(),
"T_align": T_align.tolist(),
},
"per_camera": per_cam_results,
"summary": {
"mean_position_error_m": float(np.mean(pos_errors)),
"max_position_error_m": float(np.max(pos_errors)),
"mean_rotation_error_deg": float(np.mean(rot_errors)),
"max_rotation_error_deg": float(np.max(rot_errors)),
"mean_up_consistency_error_deg": float(np.mean(up_errors)),
"max_up_consistency_error_deg": float(np.max(up_errors)),
},
}
Path(report_json).parent.mkdir(parents=True, exist_ok=True)
with open(report_json, "w") as f:
json.dump(report, f, indent=4)
click.echo(f"Report written to {report_json}")
if aligned_pose_b_json:
aligned_data = {}
for s, T_b in poses_b.items():
T_b_aligned = T_align @ T_b
aligned_data[s] = {"pose": serialize_pose(T_b_aligned)}
Path(aligned_pose_b_json).parent.mkdir(parents=True, exist_ok=True)
with open(aligned_pose_b_json, "w") as f:
json.dump(aligned_data, f, indent=4)
click.echo(f"Aligned pose B set written to {aligned_pose_b_json}")
if plot_output or show_plot:
fig = go.Figure()
show_axis: Final[bool] = True
if show_axis:
for axis, color in zip(
[np.eye(3)[:, 0], np.eye(3)[:, 1], np.eye(3)[:, 2]],
["red", "green", "blue"],
):
fig.add_trace(
go.Scatter3d(
x=[0, axis[0] * axis_scale],
y=[0, axis[1] * axis_scale],
z=[0, axis[2] * axis_scale],
mode="lines",
line=dict(color=color, width=4),
name=f"World {'XYZ'[np.argmax(axis)]}",
showlegend=True,
)
)
show_ground: Final[bool] = False
if show_ground:
ground_size = 5.0
half_size = ground_size / 2.0
x_grid = np.linspace(-half_size, half_size, 2)
z_grid = np.linspace(-half_size, half_size, 2)
x_mesh, z_mesh = np.meshgrid(x_grid, z_grid)
y_mesh = np.zeros_like(x_mesh)
fig.add_trace(
go.Surface(
x=x_mesh,
y=y_mesh,
z=z_mesh,
showscale=False,
opacity=0.1,
colorscale=[[0, "gray"], [1, "gray"]],
name="Ground Plane",
hoverinfo="skip",
)
)
for s in sorted(poses_a.keys()):
add_camera_trace(
fig,
poses_a[s],
f"a_{s}",
scale=axis_scale,
frustum_scale=frustum_scale,
color="blue",
)
for s in sorted(poses_b.keys()):
T_b_aligned = T_align @ poses_b[s]
add_camera_trace(
fig,
T_b_aligned,
f"b_{s}",
scale=axis_scale,
frustum_scale=frustum_scale,
color="orange",
)
fig.update_layout(
title="Pose A vs Aligned Pose B",
scene=dict(
xaxis_title="X (Right)",
yaxis_title="Y (Down)",
zaxis_title="Z (Forward)",
aspectmode="data",
camera=dict(
up=dict(x=0, y=-1, z=0),
eye=dict(x=1.5, y=-1.5, z=1.5),
),
),
margin=dict(l=0, r=0, b=0, t=40),
)
if plot_output:
if plot_output.endswith(".html"):
fig.write_html(plot_output)
click.echo(f"Plot saved to {plot_output}")
else:
try:
fig.write_image(plot_output)
click.echo(f"Plot saved to {plot_output}")
except Exception as e:
click.echo(f"Error saving image (ensure kaleido is installed): {e}")
if not plot_output.endswith(".html"):
html_out = str(Path(plot_output).with_suffix(".html"))
fig.write_html(html_out)
click.echo(f"Fallback: Plot saved to {html_out}")
if show_plot:
fig.show()
if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter