0bfeec77e4
Implement the next tracker tranche around a recursive articulated state rather than per-frame ad hoc updates. Track state now propagates full pose/velocity/shape covariance, uses process noise during prediction, and drives active-to-lost transitions from both miss counts and recursive score thresholds. The multiview update path replaces the generic SciPy least_squares call with a bounded LM/GN loop that returns parameter and beta covariance blocks, accepted-joint counts, mean reprojection error, and iteration diagnostics. Lost-track handling is stricter and safer: proposal-based reacquisition now requires same-frame 2D support and articulated refinement before a track can return to active. Proposal clusters retain contributing detection indices, the tracker searches broadly within contributing views, and proposal-compatible lost frames are surfaced explicitly instead of silently reviving a track. Old scene JSONs with imgpaths now default to the RPT camera-pose convention so proposal reprojection gating works on the sample scenes. Add ActualTest support diagnostics that summarize event counts, accepted support, reprojection quality, and tracker diagnostics, plus focused regressions for camera conventions, score-driven demotion, covariance behavior, proposal-compatible lost handling, and broader proposal-backed matching.
293 lines
12 KiB
Python
293 lines
12 KiB
Python
from collections import Counter
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
import click
|
|
import cv2
|
|
import numpy as np
|
|
import pyarrow.parquet as pq
|
|
from beartype import beartype
|
|
from loguru import logger
|
|
|
|
from pose_tracking_exp.common.normalization import infer_bbox_from_keypoints, normalize_rtmpose_body20
|
|
from pose_tracking_exp.schema import (
|
|
CameraCalibration,
|
|
CameraFrame,
|
|
FrameBundle,
|
|
PoseDetection,
|
|
SceneConfig,
|
|
TrackerConfig,
|
|
TrackerDiagnostics,
|
|
TrackedFrameResult,
|
|
)
|
|
from pose_tracking_exp.tracking import PoseTracker
|
|
|
|
_NOMINAL_FRAME_PERIOD_NS = 33_333_333
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class ActualTestTrackingSummary:
|
|
bundle_count: int
|
|
active_frames: int
|
|
proposal_frames: int
|
|
max_active_tracks: int
|
|
max_lost_tracks: int
|
|
update_action_counts: dict[str, int]
|
|
mean_accepted_views: float
|
|
mean_accepted_joints: float
|
|
mean_reprojection_error: float
|
|
diagnostics: TrackerDiagnostics
|
|
|
|
|
|
def _finite_mean(values: list[float]) -> float:
|
|
finite = [value for value in values if np.isfinite(value)]
|
|
if not finite:
|
|
return np.inf
|
|
return float(np.mean(np.asarray(finite, dtype=np.float64)))
|
|
|
|
|
|
@beartype
|
|
def summarize_tracking_results(
|
|
results: list[TrackedFrameResult],
|
|
diagnostics: TrackerDiagnostics,
|
|
) -> ActualTestTrackingSummary:
|
|
update_events = [event for result in results for event in result.update_events]
|
|
action_counts = Counter(event.action for event in update_events)
|
|
accepted_view_samples = [float(event.accepted_view_count) for event in update_events if event.accepted_view_count > 0]
|
|
accepted_joint_samples = [float(event.accepted_joint_count) for event in update_events if event.accepted_joint_count > 0]
|
|
reprojection_samples = [float(event.mean_reprojection_error) for event in update_events]
|
|
return ActualTestTrackingSummary(
|
|
bundle_count=len(results),
|
|
active_frames=sum(1 for result in results if result.active_tracks),
|
|
proposal_frames=sum(1 for result in results if result.proposals),
|
|
max_active_tracks=max((len(result.active_tracks) for result in results), default=0),
|
|
max_lost_tracks=max((len(result.lost_tracks) for result in results), default=0),
|
|
update_action_counts=dict(action_counts),
|
|
mean_accepted_views=_finite_mean(accepted_view_samples),
|
|
mean_accepted_joints=_finite_mean(accepted_joint_samples),
|
|
mean_reprojection_error=_finite_mean(reprojection_samples),
|
|
diagnostics=diagnostics,
|
|
)
|
|
|
|
|
|
@beartype
|
|
def format_frame_summary_lines(results: list[TrackedFrameResult]) -> tuple[str, ...]:
|
|
lines: list[str] = []
|
|
for result in results:
|
|
action_counts = Counter(event.action for event in result.update_events)
|
|
finite_reprojection_errors = [
|
|
float(event.mean_reprojection_error)
|
|
for event in result.update_events
|
|
if np.isfinite(event.mean_reprojection_error)
|
|
]
|
|
lines.append(
|
|
"bundle={} proposals={} active_ids={} lost_ids={} tentative_ids={} actions={} mean_event_reproj={}".format(
|
|
result.bundle_index,
|
|
len(result.proposals),
|
|
[track.track_id for track in result.active_tracks],
|
|
[track.track_id for track in result.lost_tracks],
|
|
[track.track_id for track in result.tentative_tracks],
|
|
dict(action_counts),
|
|
"{:.2f}".format(float(np.mean(np.asarray(finite_reprojection_errors, dtype=np.float64))))
|
|
if finite_reprojection_errors
|
|
else "nan",
|
|
)
|
|
)
|
|
return tuple(lines)
|
|
|
|
|
|
@beartype
|
|
def load_actual_test_scene(root: Path) -> SceneConfig:
|
|
# ActualTest parquet comes from the ChArUco/OpenCV side, so `rvec` / `tvec`
|
|
# are world->camera extrinsics. The RPT-facing camera pose is derived later
|
|
# from this canonical OpenCV form.
|
|
camera_rows = pq.read_table(root / "camera_params" / "camera_params.parquet").to_pylist()
|
|
cameras: list[CameraCalibration] = []
|
|
for item in camera_rows:
|
|
rotation, _ = cv2.Rodrigues(np.asarray(item["extrinsic"]["rvec"], dtype=np.float64).reshape(3, 1))
|
|
cameras.append(
|
|
CameraCalibration.from_opencv_extrinsics(
|
|
name=str(item["port"]),
|
|
width=int(item["resolution"]["width"]),
|
|
height=int(item["resolution"]["height"]),
|
|
K=np.asarray(item["intrinsic"]["camera_matrix"], dtype=np.float64),
|
|
DC=np.asarray(item["intrinsic"]["distortion_coefficients"], dtype=np.float64).reshape(-1),
|
|
R=np.asarray(rotation, dtype=np.float64),
|
|
T=np.asarray(item["extrinsic"]["tvec"], dtype=np.float64).reshape(3),
|
|
rvec=np.asarray(item["extrinsic"]["rvec"], dtype=np.float64).reshape(3),
|
|
)
|
|
)
|
|
return SceneConfig(
|
|
room_size=np.asarray([20.0, 20.0, 8.0], dtype=np.float64),
|
|
room_center=np.asarray([0.0, 0.0, 2.0], dtype=np.float64),
|
|
cameras=tuple(sorted(cameras, key=lambda camera: camera.name)),
|
|
)
|
|
|
|
|
|
@beartype
|
|
def load_actual_test_segment_bundles(
|
|
root: Path,
|
|
segment_name: str,
|
|
*,
|
|
frame_start: int = 690,
|
|
frame_stop: int | None = None,
|
|
max_frames: int | None = None,
|
|
min_cameras_with_rows: int = 1,
|
|
min_visible_joints: int = 6,
|
|
) -> list[FrameBundle]:
|
|
segment_root = root / segment_name
|
|
by_camera: dict[str, dict[int, tuple[PoseDetection, ...]]] = {}
|
|
|
|
for parquet_path in sorted(segment_root.glob("*_detected.parquet")):
|
|
camera_name = parquet_path.name.removesuffix("_detected.parquet")
|
|
rows = pq.read_table(parquet_path).to_pylist()
|
|
frames: dict[int, tuple[PoseDetection, ...]] = {}
|
|
for row in rows:
|
|
frame_index = int(row["frame_index"])
|
|
if frame_index < frame_start:
|
|
continue
|
|
if frame_stop is not None and frame_index >= frame_stop:
|
|
continue
|
|
|
|
detections: list[PoseDetection] = []
|
|
boxes = row["boxes"]
|
|
keypoints_batch = row["kps"]
|
|
confidence_batch = row["kps_scores"]
|
|
if not (len(boxes) == len(keypoints_batch) == len(confidence_batch)):
|
|
raise ValueError(
|
|
f"Mismatched detection arrays for camera {camera_name} frame {frame_index}: "
|
|
f"{len(boxes)=}, {len(keypoints_batch)=}, {len(confidence_batch)=}."
|
|
)
|
|
|
|
for box, keypoints_xy, confidences in zip(boxes, keypoints_batch, confidence_batch, strict=True):
|
|
keypoints_xy_array = np.asarray(keypoints_xy, dtype=np.float64)
|
|
confidences_array = np.asarray(confidences, dtype=np.float64)
|
|
pose = normalize_rtmpose_body20(keypoints_xy_array, confidences_array)
|
|
if np.count_nonzero(pose[:, 2] > 0.15) < min_visible_joints:
|
|
continue
|
|
bbox = (
|
|
np.asarray(box, dtype=np.float64)
|
|
if len(box) == 4
|
|
else infer_bbox_from_keypoints(pose)
|
|
)
|
|
visible_confidences = pose[pose[:, 2] > 0.0, 2]
|
|
detections.append(
|
|
PoseDetection(
|
|
bbox=bbox,
|
|
bbox_confidence=float(np.mean(visible_confidences)) if visible_confidences.size else 0.0,
|
|
keypoints=pose,
|
|
)
|
|
)
|
|
frames[frame_index] = tuple(detections)
|
|
by_camera[camera_name] = frames
|
|
|
|
if not by_camera:
|
|
return []
|
|
|
|
candidate_frames = sorted(set().union(*(set(frames) for frames in by_camera.values())))
|
|
if min_cameras_with_rows > 1:
|
|
candidate_frames = [
|
|
frame_index
|
|
for frame_index in candidate_frames
|
|
if sum(frame_index in frames for frames in by_camera.values()) >= min_cameras_with_rows
|
|
]
|
|
if max_frames is not None:
|
|
candidate_frames = candidate_frames[:max_frames]
|
|
|
|
scene = load_actual_test_scene(root)
|
|
camera_by_name = {camera.name: camera for camera in scene.cameras}
|
|
bundles: list[FrameBundle] = []
|
|
ordered_camera_names = [camera.name for camera in scene.cameras]
|
|
for bundle_index, frame_index in enumerate(candidate_frames):
|
|
timestamp_unix_ns = bundle_index * _NOMINAL_FRAME_PERIOD_NS
|
|
views: list[CameraFrame] = []
|
|
for camera_name in ordered_camera_names:
|
|
camera = camera_by_name[camera_name]
|
|
views.append(
|
|
CameraFrame(
|
|
camera_name=camera_name,
|
|
frame_index=frame_index,
|
|
timestamp_unix_ns=timestamp_unix_ns,
|
|
detections=by_camera.get(camera_name, {}).get(frame_index, ()),
|
|
source_size=(camera.width, camera.height),
|
|
)
|
|
)
|
|
bundles.append(
|
|
FrameBundle(
|
|
bundle_index=bundle_index,
|
|
timestamp_unix_ns=timestamp_unix_ns,
|
|
views=tuple(views),
|
|
)
|
|
)
|
|
return bundles
|
|
|
|
|
|
@click.command()
|
|
@click.argument("root_path", type=click.Path(path_type=Path, exists=True, file_okay=False))
|
|
@click.option("--segment", "segment_name", default="Segment_1", show_default=True)
|
|
@click.option("--frame-start", default=690, type=int, show_default=True)
|
|
@click.option("--frame-stop", type=int)
|
|
@click.option("--max-frames", type=click.IntRange(min=1))
|
|
@click.option("--min-camera-rows", default=1, type=click.IntRange(min=1), show_default=True)
|
|
@click.option("--max-active-tracks", default=1, type=click.IntRange(min=1), show_default=True)
|
|
@click.option("--verbose-frames/--no-verbose-frames", default=False, show_default=True)
|
|
def main(
|
|
root_path: Path,
|
|
segment_name: str,
|
|
frame_start: int,
|
|
frame_stop: int | None,
|
|
max_frames: int | None,
|
|
min_camera_rows: int,
|
|
max_active_tracks: int,
|
|
verbose_frames: bool,
|
|
) -> None:
|
|
logger.remove()
|
|
logger.add(
|
|
click.get_text_stream("stderr"),
|
|
level="INFO",
|
|
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
|
|
)
|
|
scene = load_actual_test_scene(root_path)
|
|
bundles = load_actual_test_segment_bundles(
|
|
root_path,
|
|
segment_name,
|
|
frame_start=frame_start,
|
|
frame_stop=frame_stop,
|
|
max_frames=max_frames,
|
|
min_cameras_with_rows=min_camera_rows,
|
|
)
|
|
tracker = PoseTracker(scene, TrackerConfig(max_active_tracks=max_active_tracks))
|
|
results = tracker.run(bundles)
|
|
summary = summarize_tracking_results(results, tracker.diagnostics_snapshot())
|
|
logger.info(
|
|
"actual_test bundles={} active_frames={} proposal_frames={} max_active_tracks={} max_lost_tracks={} "
|
|
"mean_accepted_views={} mean_accepted_joints={} mean_reprojection_error={}",
|
|
summary.bundle_count,
|
|
summary.active_frames,
|
|
summary.proposal_frames,
|
|
summary.max_active_tracks,
|
|
summary.max_lost_tracks,
|
|
"{:.2f}".format(summary.mean_accepted_views) if np.isfinite(summary.mean_accepted_views) else "nan",
|
|
"{:.2f}".format(summary.mean_accepted_joints) if np.isfinite(summary.mean_accepted_joints) else "nan",
|
|
"{:.2f}".format(summary.mean_reprojection_error) if np.isfinite(summary.mean_reprojection_error) else "nan",
|
|
)
|
|
logger.info(
|
|
"actual_test actions={} promotions={} reacquisitions={} predict_only_updates={} proposal_reacquisition_attempts={} "
|
|
"proposal_compatible_lost_frames={} nonlinear_refinements={} lm_iterations={}",
|
|
summary.update_action_counts,
|
|
summary.diagnostics.promotions,
|
|
summary.diagnostics.reacquisitions,
|
|
summary.diagnostics.predict_only_updates,
|
|
summary.diagnostics.proposal_reacquisition_attempts,
|
|
summary.diagnostics.proposal_compatible_lost_frames,
|
|
summary.diagnostics.nonlinear_refinements,
|
|
summary.diagnostics.lm_iterations,
|
|
)
|
|
if verbose_frames:
|
|
for line in format_frame_summary_lines(results):
|
|
logger.info("actual_test_frame {}", line)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|