Files
zed-playground/py_workspace/visualize_extrinsics.py
T

550 lines
16 KiB
Python

"""
Utility script to visualize camera extrinsics from a JSON file using Plotly.
"""
import json
import click
import numpy as np
import plotly.graph_objects as go
from typing import Any, Dict, Optional, List, Tuple
import configparser
from pathlib import Path
import re
import sys
RESOLUTION_MAP = {
"FHD1200": "FHD1200",
"FHD": "FHD",
"2K": "2K",
"HD": "HD",
"SVGA": "SVGA",
"VGA": "VGA",
}
def parse_pose(pose_str: str) -> np.ndarray:
"""Parses a 16-float pose string into a 4x4 matrix."""
try:
vals = [float(x) for x in pose_str.split()]
if len(vals) != 16:
raise ValueError(f"Expected 16 values, got {len(vals)}")
return np.array(vals).reshape((4, 4))
except Exception as e:
raise ValueError(f"Failed to parse pose string: {e}")
def load_zed_configs(
paths: List[str], resolution: str, eye: str
) -> Dict[str, Dict[str, float]]:
"""
Loads ZED intrinsics from config files.
Returns a mapping from serial (string) to intrinsics dict.
"""
configs = {}
eye_prefix = eye.upper()
# Map resolution to section suffix
res_map = {
"1200": "FHD1200",
"fhd": "FHD",
"2k": "2K",
"hd": "HD",
"svga": "SVGA",
"vga": "VGA",
}
res_suffix = res_map.get(resolution.lower(), resolution.upper())
section_name = f"{eye_prefix}_CAM_{res_suffix}"
all_files = []
for p in paths:
path = Path(p)
if path.is_dir():
all_files.extend(list(path.glob("SN*.conf")))
else:
all_files.append(path)
for f in all_files:
# Extract serial from filename SN<serial>.conf
match = re.search(r"SN(\d+)", f.name)
serial = match.group(1) if match else None
parser = configparser.ConfigParser()
try:
parser.read(f)
if section_name in parser:
sect = parser[section_name]
intrinsics = {
"fx": float(sect.get("fx", 0)),
"fy": float(sect.get("fy", 0)),
"cx": float(sect.get("cx", 0)),
"cy": float(sect.get("cy", 0)),
}
if serial:
configs[serial] = intrinsics
# Always store as default in case it's the only file
configs["default"] = intrinsics
except Exception as e:
print(f"Warning: Failed to parse config {f}: {e}")
# If only one config was provided, apply to all
if len(all_files) == 1 and "default" in configs:
return {"all": configs["default"]}
return configs
def get_frustum_points(
intrinsics: Optional[Dict[str, float]],
frustum_scale: float,
fov_deg: float,
) -> np.ndarray:
"""
Returns 5 points in local camera coordinates: center + 4 corners of the far plane.
Local coordinates: forward is +Z, right is +X, down is +Y (OpenCV convention).
"""
if intrinsics and all(k in intrinsics for k in ["fx", "fy", "cx", "cy"]):
fx, fy = intrinsics["fx"], intrinsics["fy"]
cx, cy = intrinsics["cx"], intrinsics["cy"]
# We assume the frustum plane is at Z = frustum_scale
# x = (u - cx) * Z / fx
# y = (v - cy) * Z / fy
# We'll assume a standard aspect ratio and center cx/cy for visualization
# if we don't have image dimensions.
# Let's approximate image size from principal point (assuming it's roughly center)
w_half = (cx / fx) * frustum_scale
h_half = (cy / fy) * frustum_scale
w, h = w_half, h_half
else:
fov_rad = np.radians(fov_deg)
# Assuming horizontal FOV
w = frustum_scale * np.tan(fov_rad / 2.0)
h = w * 0.75 # 4:3 aspect ratio assumption
# 5 points: center + 4 corners of the far plane
# OpenCV: +Z forward, +X right, +Y down
pts_local = np.array(
[
[0, 0, 0], # Center
[
-w,
-h,
frustum_scale,
], # Top-Left (if Y down is positive, -h is up) -> Wait.
# OpenCV: Y is down. So -h is UP in 3D space if we map Y->Y.
# But usually we want to visualize it.
# Let's stick to:
# +X right
# +Y down
# +Z forward
[w, -h, frustum_scale], # Top-Right
[w, h, frustum_scale], # Bottom-Right
[-w, h, frustum_scale], # Bottom-Left
]
)
return pts_local
def add_camera_trace(
fig: go.Figure,
pose: np.ndarray,
label: str,
scale: float = 0.2,
convention: str = "world_from_cam",
frustum_scale: float = 0.5,
fov_deg: float = 60.0,
intrinsics: Optional[Dict[str, float]] = None,
color: str = "blue",
):
"""
Adds a camera frustum and axes to the Plotly figure.
"""
R = pose[:3, :3]
t = pose[:3, 3]
if convention == "cam_from_world":
# Camera center in world coordinates: C = -R^T * t
center = -R.T @ t
# Camera orientation in world coordinates: R_world_from_cam = R^T
R_world = R.T
else:
# world_from_cam
center = t
R_world = R
# Local axes in world frame
x_axis = R_world[:, 0]
y_axis = R_world[:, 1]
z_axis = R_world[:, 2]
# Frustum points in local coordinates (OpenCV: +Z fwd, +X right, +Y down)
pts_local = get_frustum_points(intrinsics, frustum_scale, fov_deg)
# Transform to world
# pts_world = (R_world @ pts_local.T).T + center
pts_world = (R_world @ pts_local.T).T + center
# Create lines for frustum
# Edges: 0-1, 0-2, 0-3, 0-4 (pyramid sides)
# 1-2, 2-3, 3-4, 4-1 (base)
x_lines = []
y_lines = []
z_lines = []
def add_line(i, j):
x_lines.extend([pts_world[i, 0], pts_world[j, 0], None])
y_lines.extend([pts_world[i, 1], pts_world[j, 1], None])
z_lines.extend([pts_world[i, 2], pts_world[j, 2], None])
# Pyramid sides
for i in range(1, 5):
add_line(0, i)
# Base
add_line(1, 2)
add_line(2, 3)
add_line(3, 4)
add_line(4, 1)
# Add frustum trace
fig.add_trace(
go.Scatter3d(
x=x_lines,
y=y_lines,
z=z_lines,
mode="lines",
line=dict(color=color, width=2),
name=f"{label} Frustum",
showlegend=False,
hoverinfo="skip",
)
)
# Add center point with label
fig.add_trace(
go.Scatter3d(
x=[center[0]],
y=[center[1]],
z=[center[2]],
mode="markers+text",
marker=dict(size=4, color="black"),
text=[label],
textposition="top center",
name=label,
showlegend=True,
)
)
# Add axes (RGB = XYZ)
axis_len = scale
# X axis (Red)
fig.add_trace(
go.Scatter3d(
x=[center[0], center[0] + x_axis[0] * axis_len],
y=[center[1], center[1] + x_axis[1] * axis_len],
z=[center[2], center[2] + x_axis[2] * axis_len],
mode="lines",
line=dict(color="red", width=3),
showlegend=False,
hoverinfo="skip",
)
)
# Y axis (Green)
fig.add_trace(
go.Scatter3d(
x=[center[0], center[0] + y_axis[0] * axis_len],
y=[center[1], center[1] + y_axis[1] * axis_len],
z=[center[2], center[2] + y_axis[2] * axis_len],
mode="lines",
line=dict(color="green", width=3),
showlegend=False,
hoverinfo="skip",
)
)
# Z axis (Blue)
fig.add_trace(
go.Scatter3d(
x=[center[0], center[0] + z_axis[0] * axis_len],
y=[center[1], center[1] + z_axis[1] * axis_len],
z=[center[2], center[2] + z_axis[2] * axis_len],
mode="lines",
line=dict(color="blue", width=3),
showlegend=False,
hoverinfo="skip",
)
)
def run_diagnostics(poses: Dict[str, np.ndarray], convention: str):
"""
Runs numerical sanity checks on the poses.
"""
print("\n--- Diagnostics ---")
print(f"Pose Convention: {convention}")
centers = []
rotations = []
serials = []
for serial, pose in poses.items():
serials.append(serial)
R = pose[:3, :3]
t = pose[:3, 3]
if convention == "cam_from_world":
c = -R.T @ t
R_world = R.T
else:
c = t
R_world = R
centers.append(c)
rotations.append(R_world)
centers = np.array(centers)
rotations = np.array(rotations)
# 1. Orthonormality check
print("\n[Rotation Orthonormality]")
max_resid = 0.0
for i, R_mat in enumerate(rotations):
I_check = R_mat @ R_mat.T
resid = np.linalg.norm(I_check - np.eye(3))
det = np.linalg.det(R_mat)
max_resid = max(max_resid, resid)
if resid > 1e-3 or abs(det - 1.0) > 1e-3:
print(
f" WARN: Camera {serials[i]} rotation invalid! Resid={resid:.6f}, Det={det:.6f}"
)
print(f" Max orthonormality residual: {max_resid:.6e}")
# 2. Coplanarity of centers
if len(centers) >= 3:
print("\n[Center Coplanarity]")
# SVD of centered points
center_mean = np.mean(centers, axis=0)
centered = centers - center_mean
u, s, vh = np.linalg.svd(centered)
print(f" Singular values: {s}")
# If planar, smallest singular value should be small
planarity_ratio = s[2] / (s[0] + 1e-9)
print(f" Planarity ratio (s3/s1): {planarity_ratio:.4f}")
if planarity_ratio < 0.05:
print(" -> Centers appear roughly coplanar.")
else:
print(" -> Centers are NOT coplanar.")
# 3. Forward consistency (Z axis)
print("\n[Forward Axis Consistency]")
z_axes = rotations[:, :, 2] # All Z axes
# Mean Z
mean_z = np.mean(z_axes, axis=0)
mean_z /= np.linalg.norm(mean_z)
# Dot products
dots = z_axes @ mean_z
min_dot = np.min(dots)
print(f" Mean forward direction: {mean_z}")
print(f" Min alignment with mean: {min_dot:.4f}")
if min_dot < 0.8:
print(" WARN: Cameras pointing in significantly different directions.")
# 4. Up consistency (Y axis vs World -Y or +Y)
# Assuming Y-up world, check if camera -Y (OpenCV up is -Y usually? No, OpenCV Y is down)
# OpenCV: Y is down. So "Up" in camera frame is -Y.
# Let's check alignment of Camera Y with World Y.
print("\n[Up Axis Consistency]")
y_axes = rotations[:, :, 1]
# Check against World -Y (since camera Y is down)
world_up = np.array([0, 1, 0])
# If camera is upright, Camera Y (down) should be roughly World -Y (down)
# So dot(CamY, WorldY) should be roughly -1
y_dots = y_axes @ world_up
mean_y_dot = np.mean(y_dots)
print(f" Mean alignment of Camera Y (down) with World Y (up): {mean_y_dot:.4f}")
if mean_y_dot < -0.8:
print(" -> Cameras appear upright (Camera Y points down).")
elif mean_y_dot > 0.8:
print(" -> Cameras appear upside-down (Camera Y points up).")
else:
print(" -> Cameras have mixed or horizontal orientation.")
# 5. Center spread
print("\n[Center Spread]")
spread = np.max(centers, axis=0) - np.min(centers, axis=0)
print(f" Range X: {spread[0]:.3f} m")
print(f" Range Y: {spread[1]:.3f} m")
print(f" Range Z: {spread[2]:.3f} m")
@click.command()
@click.option("--input", "-i", required=True, help="Path to input JSON file.")
@click.option(
"--output", "-o", help="Path to save the output visualization (HTML or PNG)."
)
@click.option("--show", is_flag=True, help="Show the plot interactively.")
@click.option("--scale", type=float, default=0.2, help="Scale of the camera axes.")
@click.option(
"--birdseye",
is_flag=True,
help="Show a top-down bird-eye view (X-Z plane).",
)
@click.option(
"--pose-convention",
type=click.Choice(["world_from_cam", "cam_from_world"]),
default="world_from_cam",
help="Interpretation of the pose matrix in JSON. Defaults to 'world_from_cam'.",
)
@click.option(
"--frustum-scale", type=float, default=0.5, help="Scale of the camera frustum."
)
@click.option(
"--fov",
type=float,
default=60.0,
help="Horizontal FOV in degrees for frustum visualization.",
)
@click.option(
"--zed-configs",
multiple=True,
help="Path to ZED config file(s) or directory containing SN*.conf files.",
)
@click.option(
"--resolution",
type=click.Choice(RESOLUTION_MAP.keys()),
default="FHD1200",
help="Resolution suffix to use from ZED config.",
)
@click.option(
"--eye",
type=click.Choice(["left", "right"]),
default="left",
help="Which eye's intrinsics to use from ZED config.",
)
@click.option(
"--diagnose",
is_flag=True,
help="Run numerical diagnostics on the poses.",
)
def main(
input: str,
output: Optional[str],
show: bool,
scale: float,
birdseye: bool,
pose_convention: str,
frustum_scale: float,
fov: float,
zed_configs: List[str],
resolution: str,
eye: str,
diagnose: bool,
):
"""Visualize camera extrinsics from JSON using Plotly."""
try:
with open(input, "r") as f:
data = json.load(f)
except Exception as e:
print(f"Error reading input file: {e}")
return
# Parse poses
poses = {}
for serial, cam_data in data.items():
if not isinstance(cam_data, dict) or "pose" not in cam_data:
continue
try:
poses[serial] = parse_pose(str(cam_data["pose"]))
except ValueError as e:
print(f"Warning: Skipping camera {serial} due to error: {e}")
if not poses:
print("No valid camera poses found in the input file.")
return
if diagnose:
run_diagnostics(poses, pose_convention)
# Load ZED configs if provided
zed_intrinsics = {}
if zed_configs:
zed_intrinsics = load_zed_configs(list(zed_configs), resolution, eye)
matched_count = 0
for serial in poses.keys():
if "all" in zed_intrinsics or serial in zed_intrinsics:
matched_count += 1
print(
f"ZED Configs: matched {matched_count}/{len(poses)} cameras (fallback: {len(poses) - matched_count})"
)
# Create Plotly figure
fig = go.Figure()
for serial, pose in poses.items():
cam_intrinsics = zed_intrinsics.get("all") or zed_intrinsics.get(str(serial))
add_camera_trace(
fig,
pose,
str(serial),
scale=scale,
convention=pose_convention,
frustum_scale=frustum_scale,
fov_deg=fov,
intrinsics=cam_intrinsics,
)
# Configure layout
scene_dict: Dict[str, Any] = dict(
xaxis_title="X (m)",
yaxis_title="Y (m)",
zaxis_title="Z (m)",
aspectmode="data", # Important for correct proportions
)
if birdseye:
# For birdseye, we force top-down view
scene_dict["camera"] = dict(
projection=dict(type="orthographic"),
up=dict(x=0, y=0, z=1), # Z is up in Plotly? No, Y is usually up.
# Wait, we want X-Z plane. So we look down Y.
# Plotly default is Z up.
# If our data is Y-up (standard graphics), then we look from +Y down to X-Z.
eye=dict(x=0, y=2.5, z=0),
)
fig.update_layout(
title=f"Camera Extrinsics ({pose_convention})",
scene=scene_dict,
margin=dict(l=0, r=0, b=0, t=40),
legend=dict(x=0, y=1),
)
if output:
if output.endswith(".html"):
fig.write_html(output)
print(f"Saved interactive plot to {output}")
elif (
output.endswith(".png")
or output.endswith(".jpg")
or output.endswith(".jpeg")
):
try:
# Requires kaleido
fig.write_image(output)
print(f"Saved static image to {output}")
except Exception as e:
print(f"Error saving image (ensure kaleido is installed): {e}")
else:
# Default to HTML if unknown extension
out_path = output + ".html"
fig.write_html(out_path)
print(f"Saved interactive plot to {out_path}")
if show:
fig.show()
elif not output and not diagnose:
print(
"No output path specified and --show not passed. Plot not saved or shown."
)
if __name__ == "__main__":
# pylint: disable=no-value-for-parameter
main()