diff --git a/README.md b/README.md index 6506675..2a010bf 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,11 @@ Fast triangulation of multiple persons from multiple camera views. ./run_container.sh ``` +- Build triangulator: + ```bash + cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd .. + ``` + - Test with samples: ```bash python3 /RapidPoseTriangulation/scripts/test_triangulate.py @@ -37,11 +42,3 @@ Fast triangulation of multiple persons from multiple camera views. export CUDA_VISIBLE_DEVICES=0 python3 /RapidPoseTriangulation/scripts/test_skelda_dataset.py ``` - -
- -## Debugging - -```bash -cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py -``` diff --git a/data/.gitignore b/data/.gitignore index 8975752..4ef61ca 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -5,3 +5,4 @@ *.json !*/*.json testoutput/ +trt_cache/ diff --git a/Dockerfile b/dockerfile similarity index 56% rename from Dockerfile rename to dockerfile index b2b00d1..1ddf0e7 100644 --- a/Dockerfile +++ b/dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/pytorch:23.02-py3 +FROM nvcr.io/nvidia/tensorrt:24.10-py3 ARG DEBIAN_FRONTEND=noninteractive ENV LANG=C.UTF-8 @@ -15,31 +15,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends python3-tk # Update pip to allow installation of skelda in editable mode RUN pip3 install --upgrade --no-cache-dir pip -# Install MMPose -ENV FORCE_CUDA="1" -ENV MMCV_WITH_OPS=1 -RUN pip3 install --upgrade --no-cache-dir openmim -RUN mim install mmengine -RUN mim install "mmcv>=2,<2.2.0" -RUN mim install "mmdet>=3" -RUN mim install "mmpose>=1.1.0" -# Fix an error when importing mmpose -RUN pip3 install --upgrade --no-cache-dir numpy scipy -RUN git clone --depth=1 --branch=main https://github.com/open-mmlab/mmpose.git - -# Download pretrained model -COPY scripts/utils_2d_pose.py / -RUN python3 -c "from utils_2d_pose import load_model; load_model();" -RUN python3 -c "from utils_2d_pose import load_wb_model; load_wb_model();" - -# Fix an undefined symbol error with ompi -RUN echo "ldconfig" >> ~/.bashrc - # Install swig and later dependencies RUN apt-get update && apt-get install -y --no-install-recommends build-essential RUN apt-get update && apt-get install -y --no-install-recommends swig RUN apt-get update && apt-get install -y --no-install-recommends libopencv-dev +# Install ONNX runtime +RUN pip3 install --upgrade --no-cache-dir onnxruntime-gpu + +# Install skelda +RUN pip3 install --upgrade --no-cache-dir scipy COPY ./skelda/ /skelda/ RUN pip3 install --no-cache-dir -e /skelda/ diff --git a/extras/easypose/README.md b/extras/easypose/README.md new file mode 100644 index 0000000..b9c769d --- /dev/null +++ b/extras/easypose/README.md @@ -0,0 +1,18 @@ +# Test ONNX with EasyPose + +Code files originally from: https://github.com/Dominic23331/EasyPose.git + +
+ +```bash +docker build --progress=plain -f extras/easypose/dockerfile -t rpt_easypose . + +./extras/easypose/run_container.sh +``` + +```bash +export CUDA_VISIBLE_DEVICES=0 + +python3 /RapidPoseTriangulation/scripts/test_triangulate.py +python3 /RapidPoseTriangulation/scripts/test_skelda_dataset.py +``` diff --git a/extras/easypose/base_model.py b/extras/easypose/base_model.py new file mode 100644 index 0000000..4eab24d --- /dev/null +++ b/extras/easypose/base_model.py @@ -0,0 +1,65 @@ +import warnings +from abc import ABC, abstractmethod +from typing import List +import time +import numpy as np +import onnxruntime as ort +from tqdm import tqdm + + +class BaseModel(ABC): + def __init__(self, model_path: str, device: str = 'CUDA', warmup: int = 30): + self.opt = ort.SessionOptions() + + if device == 'CUDA': + provider = 'CUDAExecutionProvider' + if provider not in ort.get_available_providers(): + warnings.warn("No CUDAExecutionProvider found, switched to CPUExecutionProvider.", UserWarning) + provider = 'CPUExecutionProvider' + elif device == 'CPU': + provider = 'CPUExecutionProvider' + else: + raise ValueError('Provider {} does not exist.'.format(device)) + + self.session = ort.InferenceSession(model_path, + providers=[provider], + sess_options=self.opt) + + self.input_name = self.session.get_inputs()[0].name + self.input_shape = self.session.get_inputs()[0].shape + + input_type = self.session.get_inputs()[0].type + if input_type == 'tensor(float32)': + self.input_type = np.float32 + elif input_type == 'tensor(float16)': + self.input_type = np.float16 + elif input_type == 'tensor(uint8)': + self.input_type = np.uint8 + else: + raise ValueError('Unknown input type: ', input_type) + + if warmup > 0: + self.warmup(warmup) + + @abstractmethod + def preprocess(self, image: np.ndarray): + pass + + @abstractmethod + def postprocess(self, tensor: List[np.ndarray]): + pass + + def forward(self, image: np.ndarray): + tensor = self.preprocess(image) + result = self.session.run(None, {self.input_name: tensor}) + output = self.postprocess(result) + return output + + def warmup(self, epoch: int = 30): + print('{} start warmup!'.format(self.__class__.__name__)) + tensor = np.random.random(self.input_shape).astype(self.input_type) + for _ in tqdm(range(epoch)): + self.session.run(None, {self.input_name: tensor}) + + def __call__(self, image: np.ndarray, *args, **kwargs): + return self.forward(image) diff --git a/extras/easypose/detection.py b/extras/easypose/detection.py new file mode 100644 index 0000000..e8cf15a --- /dev/null +++ b/extras/easypose/detection.py @@ -0,0 +1,100 @@ +import numpy as np +from typing import List + +from .base_model import BaseModel +from .utils import letterbox, nms_optimized, xywh2xyxy + + +class RTMDet(BaseModel): + def __init__(self, + model_path: str, + conf_threshold: float, + iou_threshold: float, + device: str = 'CUDA', + warmup: int = 30): + super(RTMDet, self).__init__(model_path, device, warmup) + self.conf_threshold = conf_threshold + self.iou_threshold = iou_threshold + self.dx = 0 + self.dy = 0 + self.scale = 0 + + def preprocess(self, image: np.ndarray): + th, tw = self.input_shape[1:3] + image, self.dx, self.dy, self.scale = letterbox( + image, (tw, th), fill_value=114 + ) + tensor = np.asarray(image).astype(self.input_type, copy=False)[..., ::-1] + tensor = np.expand_dims(tensor, axis=0) + return tensor + + def postprocess(self, tensor: List[np.ndarray]): + boxes = np.squeeze(tensor[0], axis=0) + classes = np.expand_dims(np.squeeze(tensor[1], axis=0), axis=-1) + boxes = np.concatenate([boxes, classes], axis=-1) + + boxes = nms_optimized(boxes, self.iou_threshold, self.conf_threshold) + + if boxes.shape[0] == 0: + return boxes + + human_class = boxes[..., -1] == 0 + boxes = boxes[human_class][..., :4] + + boxes[:, 0] -= self.dx + boxes[:, 2] -= self.dx + boxes[:, 1] -= self.dy + boxes[:, 3] -= self.dy + + boxes = np.clip(boxes, a_min=0, a_max=None) + boxes[:, :4] /= self.scale + + return boxes + + +class Yolov8(BaseModel): + def __init__(self, + model_path: str, + conf_threshold: float, + iou_threshold: float, + device: str = 'CUDA', + warmup: int = 30): + super(Yolov8, self).__init__(model_path, device, warmup) + self.conf_threshold = conf_threshold + self.iou_threshold = iou_threshold + self.dx = 0 + self.dy = 0 + self.scale = 0 + + def preprocess(self, image): + th, tw = self.input_shape[2:] + image, self.dx, self.dy, self.scale = letterbox(image, (tw, th)) + tensor = image / 255. + tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)).astype(np.float32) + return tensor + + def postprocess(self, tensor): + feature_map = tensor[0] + feature_map = np.squeeze(feature_map, axis=0).transpose((1, 0)) + + pred_class = feature_map[..., 4:] + pred_conf = np.max(pred_class, axis=-1, keepdims=True) + pred_class = np.argmax(pred_class, axis=-1, keepdims=True) + boxes = np.concatenate([feature_map[..., :4], pred_conf, pred_class], axis=-1) + + boxes = xywh2xyxy(boxes) + boxes = nms(boxes, self.iou_threshold, self.conf_threshold) + + if boxes.shape[0] == 0: + return boxes + + human_class = boxes[..., -1] == 0 + boxes = boxes[human_class][..., :4] + + boxes[:, 0] -= self.dx + boxes[:, 2] -= self.dx + boxes[:, 1] -= self.dy + boxes[:, 3] -= self.dy + boxes = np.clip(boxes, a_min=0, a_max=None) + boxes[:, :4] /= self.scale + return boxes diff --git a/extras/easypose/dockerfile b/extras/easypose/dockerfile new file mode 100644 index 0000000..91dd978 --- /dev/null +++ b/extras/easypose/dockerfile @@ -0,0 +1,10 @@ +FROM rapidposetriangulation + +WORKDIR / + +RUN pip3 install --upgrade --no-cache-dir onnxruntime-gpu +RUN git clone https://github.com/Dominic23331/EasyPose.git --depth=1 +RUN cd /EasyPose/; pip install -v -e . + +WORKDIR /RapidPoseTriangulation/ +CMD ["/bin/bash"] diff --git a/extras/easypose/pipeline.py b/extras/easypose/pipeline.py new file mode 100644 index 0000000..333e46f --- /dev/null +++ b/extras/easypose/pipeline.py @@ -0,0 +1,362 @@ +import os + +import cv2 +import numpy as np + +from easypose import model +from easypose.model import detection +from easypose.model import pose +from .download import get_url, get_model_path, download +from .consts import AvailablePoseModels, AvailableDetModels +from .common import Person, region_of_interest, restore_keypoints + + +def get_pose_model(pose_model_path, pose_model_decoder, device, warmup): + if pose_model_decoder == 'Dark': + pose_model = pose.Heatmap(pose_model_path, dark=True, device=device, warmup=warmup) + else: + pose_model = getattr(pose, pose_model_decoder)(pose_model_path, device=device, warmup=warmup) + return pose_model + + +def get_det_model(det_model_path, model_type, conf_thre, iou_thre, device, warmup): + det_model = getattr(detection, model_type)(det_model_path, conf_thre, iou_thre, device, warmup) + return det_model + + +def region_of_interest_warped( + image: np.ndarray, + box: np.ndarray, + target_size=(288, 384), + padding_scale: float = 1.25, +): + start_x, start_y, end_x, end_y = box + target_w, target_h = target_size + + # Calculate original bounding box width and height + bbox_w = end_x - start_x + bbox_h = end_y - start_y + + if bbox_w <= 0 or bbox_h <= 0: + raise ValueError("Invalid bounding box!") + + # Calculate the aspect ratios + bbox_aspect = bbox_w / bbox_h + target_aspect = target_w / target_h + + # Adjust the scaled bounding box to match the target aspect ratio + if bbox_aspect > target_aspect: + adjusted_h = bbox_w / target_aspect + adjusted_w = bbox_w + else: + adjusted_w = bbox_h * target_aspect + adjusted_h = bbox_h + + # Scale the bounding box by the padding_scale + scaled_bbox_w = adjusted_w * padding_scale + scaled_bbox_h = adjusted_h * padding_scale + + # Calculate the center of the original box + center_x = (start_x + end_x) / 2.0 + center_y = (start_y + end_y) / 2.0 + + # Calculate scaled bounding box coordinates + new_start_x = center_x - scaled_bbox_w / 2.0 + new_start_y = center_y - scaled_bbox_h / 2.0 + new_end_x = center_x + scaled_bbox_w / 2.0 + new_end_y = center_y + scaled_bbox_h / 2.0 + + # Define the new box coordinates + new_box = np.array( + [new_start_x, new_start_y, new_end_x, new_end_y], dtype=np.float32 + ) + scale = target_w / scaled_bbox_w + + # Define source and destination points for affine transformation + # See: /mmpose/structures/bbox/transforms.py + src_pts = np.array( + [ + [center_x, center_y], + [new_start_x, center_y], + [new_start_x, center_y + (center_x - new_start_x)], + ], + dtype=np.float32, + ) + dst_pts = np.array( + [ + [target_w * 0.5, target_h * 0.5], + [0, target_h * 0.5], + [0, target_h * 0.5 + (target_w * 0.5 - 0)], + ], + dtype=np.float32, + ) + + # Compute the affine transformation matrix + M = cv2.getAffineTransform(src_pts, dst_pts) + + # Apply affine transformation with border filling + extracted_region = cv2.warpAffine( + image, + M, + target_size, + flags=cv2.INTER_LINEAR, + ) + + return extracted_region, new_box, scale + + +class TopDown: + def __init__(self, + pose_model_name, + pose_model_decoder, + det_model_name, + conf_threshold=0.6, + iou_threshold=0.6, + device='CUDA', + warmup=30): + if not pose_model_name.endswith('.onnx') and pose_model_name not in AvailablePoseModels.POSE_MODELS: + raise ValueError( + 'The {} human pose estimation model is not in the model repository.'.format(pose_model_name)) + if not pose_model_name.endswith('.onnx') and pose_model_decoder not in AvailablePoseModels.POSE_MODELS[pose_model_name]: + raise ValueError( + 'No {} decoding head for the {} model was found in the model repository.'.format(pose_model_decoder, + pose_model_name)) + if not pose_model_name.endswith('.onnx') and det_model_name not in AvailableDetModels.DET_MODELS: + raise ValueError( + 'The {} detection model is not in the model repository.'.format(det_model_name)) + + if not pose_model_name.endswith('.onnx'): + pose_model_dir = get_model_path(AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder], + detection_model=False) + pose_model_path = os.path.join(pose_model_dir, + AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder]) + else: + pose_model_path = pose_model_name + + if os.path.exists(pose_model_path): + try: + self.pose_model = get_pose_model(pose_model_path, pose_model_decoder, device, warmup) + except Exception: + url = get_url(AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder], + detection_model=False) + download(url, pose_model_dir) + self.pose_model = get_pose_model(pose_model_path, pose_model_decoder, device, warmup) + else: + url = get_url(AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder], + detection_model=False) + download(url, pose_model_dir) + self.pose_model = get_pose_model(pose_model_path, pose_model_decoder, device, warmup) + + if not det_model_name.endswith('.onnx'): + det_model_dir = get_model_path(AvailableDetModels.DET_MODELS[det_model_name]['file_name'], + detection_model=True) + det_model_path = os.path.join(det_model_dir, + AvailableDetModels.DET_MODELS[det_model_name]['file_name']) + det_model_type = AvailableDetModels.DET_MODELS[det_model_name]['model_type'] + else: + det_model_path = det_model_name + if "rtmdet" in det_model_name: + det_model_type = 'RTMDet' + + if os.path.exists(det_model_path): + try: + self.det_model = get_det_model(det_model_path, + det_model_type, + conf_threshold, + iou_threshold, + device, + warmup) + except Exception: + url = get_url(AvailableDetModels.DET_MODELS[det_model_name]['file_name'], + detection_model=True) + download(url, det_model_dir) + self.det_model = get_det_model(det_model_path, + det_model_type, + conf_threshold, + iou_threshold, + device, + warmup) + else: + url = get_url(AvailableDetModels.DET_MODELS[det_model_name]['file_name'], + detection_model=True) + download(url, det_model_dir) + self.det_model = get_det_model(det_model_path, + det_model_type, + conf_threshold, + iou_threshold, + device, + warmup) + + def predict(self, image): + boxes = self.det_model(image) + results = [] + for i in range(boxes.shape[0]): + p = Person() + p.box = boxes[i] + region, p.box, _ = region_of_interest_warped(image, p.box) + kp = self.pose_model(region) + + # See: /mmpose/models/pose_estimators/topdown.py - add_pred_to_datasample() + th, tw = region.shape[:2] + bw, bh = [p.box[2] - p.box[0], p.box[3] - p.box[1]] + kp[:, :2] /= np.array([tw, th]) + kp[:, :2] *= np.array([bw, bh]) + kp[:, :2] += np.array([p.box[0] + bw / 2, p.box[1] + bh / 2]) + kp[:, :2] -= 0.5 * np.array([bw, bh]) + + p.keypoints = kp + results.append(p) + return results + + +class Pose: + def __init__(self, + pose_model_name, + pose_model_decoder, + device='CUDA', + warmup=30): + if pose_model_name not in AvailablePoseModels.POSE_MODELS: + raise ValueError( + 'The {} human pose estimation model is not in the model repository.'.format(pose_model_name)) + if pose_model_decoder not in AvailablePoseModels.POSE_MODELS[pose_model_name]: + raise ValueError( + 'No {} decoding head for the {} model was found in the model repository.'.format(pose_model_decoder, + pose_model_name)) + + pose_model_dir = get_model_path(AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder], + detection_model=False) + pose_model_path = os.path.join(pose_model_dir, + AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder]) + + if os.path.exists(pose_model_path): + try: + self.pose_model = get_pose_model(pose_model_path, pose_model_decoder, device, warmup) + except Exception: + url = get_url(AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder], + detection_model=False) + download(url, pose_model_dir) + self.pose_model = get_pose_model(pose_model_path, pose_model_decoder, device, warmup) + else: + url = get_url(AvailablePoseModels.POSE_MODELS[pose_model_name][pose_model_decoder], + detection_model=False) + download(url, pose_model_dir) + self.pose_model = get_pose_model(pose_model_path, pose_model_decoder, device, warmup) + + def predict(self, image): + p = Person() + box = np.array([0, 0, image.shape[3], image.shape[2], 1, 0]) + p.box = box + p.keypoints = self.pose_model(image) + return p + + +class CustomTopDown: + def __init__(self, + pose_model, + det_model, + pose_decoder=None, + device='CUDA', + iou_threshold=0.6, + conf_threshold=0.6, + warmup=30): + if isinstance(pose_model, model.BaseModel): + self.pose_model = pose_model + elif isinstance(pose_model, str): + if pose_model not in AvailablePoseModels.POSE_MODELS: + raise ValueError( + 'The {} human pose estimation model is not in the model repository.'.format(pose_model)) + if pose_model not in AvailablePoseModels.POSE_MODELS[pose_model]: + raise ValueError( + 'No {} decoding head for the {} model was found in the model repository.'.format(pose_decoder, + pose_model)) + + pose_model_dir = get_model_path(AvailablePoseModels.POSE_MODELS[pose_model][pose_decoder], + detection_model=False) + pose_model_path = os.path.join(pose_model_dir, + AvailablePoseModels.POSE_MODELS[pose_model][pose_decoder]) + + if os.path.exists(pose_model_path): + try: + self.pose_model = get_pose_model(pose_model_path, pose_decoder, device, warmup) + except Exception: + url = get_url(AvailablePoseModels.POSE_MODELS[pose_model][pose_decoder], + detection_model=False) + download(url, pose_model_dir) + self.pose_model = get_pose_model(pose_model_path, pose_decoder, device, warmup) + else: + url = get_url(AvailablePoseModels.POSE_MODELS[pose_model][pose_decoder], + detection_model=False) + download(url, pose_model_dir) + self.pose_model = get_pose_model(pose_model_path, pose_decoder, device, warmup) + else: + raise TypeError("Invalid type for pose model, Please write a custom model based on 'BaseModel'.") + + if isinstance(det_model, model.BaseModel): + self.det_model = det_model + elif isinstance(det_model, str): + if det_model not in AvailableDetModels.DET_MODELS: + raise ValueError( + 'The {} detection model is not in the model repository.'.format(det_model)) + + det_model_dir = get_model_path(AvailableDetModels.DET_MODELS[det_model]['file_name'], + detection_model=True) + det_model_path = os.path.join(det_model_dir, + AvailableDetModels.DET_MODELS[det_model]['file_name']) + det_model_type = AvailableDetModels.DET_MODELS[det_model]['model_type'] + if os.path.exists(det_model_path): + try: + self.det_model = get_det_model(det_model_path, + det_model_type, + conf_threshold, + iou_threshold, + device, + warmup) + except Exception: + url = get_url(AvailableDetModels.DET_MODELS[det_model]['file_name'], + detection_model=True) + download(url, det_model_dir) + self.det_model = get_det_model(det_model_path, + det_model_type, + conf_threshold, + iou_threshold, + device, + warmup) + else: + url = get_url(AvailableDetModels.DET_MODELS[det_model]['file_name'], + detection_model=True) + download(url, det_model_dir) + self.det_model = get_det_model(det_model_path, + det_model_type, + conf_threshold, + iou_threshold, + device, + warmup) + else: + raise TypeError("Invalid type for detection model, Please write a custom model based on 'BaseModel'.") + + def predict(self, image): + boxes = self.det_model(image) + results = [] + for i in range(boxes.shape[0]): + p = Person() + p.box = boxes[i] + region = region_of_interest(image, p.box) + kp = self.pose_model(region) + p.keypoints = restore_keypoints(p.box, kp) + results.append(p) + return results + + +class CustomSinglePose: + def __init__(self, pose_model): + if isinstance(pose_model, model.BaseModel): + self.pose_model = pose_model + else: + raise TypeError("Invalid type for pose model, Please write a custom model based on 'BaseModel'.") + + def predict(self, image): + p = Person() + box = np.array([0, 0, image.shape[3], image.shape[2], 1, 0]) + p.box = box + p.keypoints = self.pose_model(image) + return p diff --git a/extras/easypose/pose.py b/extras/easypose/pose.py new file mode 100644 index 0000000..b0e6327 --- /dev/null +++ b/extras/easypose/pose.py @@ -0,0 +1,52 @@ +import numpy as np +from typing import List + +from .base_model import BaseModel +from .utils import letterbox, get_heatmap_points, \ + get_real_keypoints, refine_keypoints_dark, refine_keypoints, simcc_decoder + + +class Heatmap(BaseModel): + def __init__(self, + model_path: str, + dark: bool = False, + device: str = 'CUDA', + warmup: int = 30): + super(Heatmap, self).__init__(model_path, device, warmup) + self.use_dark = dark + self.img_size = () + + def preprocess(self, image: np.ndarray): + th, tw = self.input_shape[2:] + self.img_size = image.shape[:2] + image, _, _, _ = letterbox(image, (tw, th)) + tensor = (image - np.array((103.53, 116.28, 123.675))) / np.array((57.375, 57.12, 58.395)) + tensor = np.expand_dims(tensor, axis=0).transpose((0, 3, 1, 2)).astype(np.float32) + return tensor + + def postprocess(self, tensor: List[np.ndarray]): + heatmaps = tensor[0] + heatmaps = np.squeeze(heatmaps, axis=0) + keypoints = get_heatmap_points(heatmaps) + if self.use_dark: + keypoints = refine_keypoints_dark(keypoints, heatmaps, 11) + else: + keypoints = refine_keypoints(keypoints, heatmaps) + keypoints = get_real_keypoints(keypoints, heatmaps, self.img_size) + return keypoints + + +class SimCC(BaseModel): + def __init__(self, model_path: str, device: str = 'CUDA', warmup: int = 30): + super(SimCC, self).__init__(model_path, device, warmup) + + def preprocess(self, image: np.ndarray): + tensor = np.asarray(image).astype(self.input_type, copy=False) + tensor = np.expand_dims(tensor, axis=0) + return tensor + + def postprocess(self, tensor: List[np.ndarray]): + keypoints = np.concatenate( + [tensor[0][0], np.expand_dims(tensor[1][0], axis=-1)], axis=-1 + ) + return keypoints diff --git a/extras/easypose/run_container.sh b/extras/easypose/run_container.sh new file mode 100755 index 0000000..e08d6bf --- /dev/null +++ b/extras/easypose/run_container.sh @@ -0,0 +1,16 @@ +#! /bin/bash + +xhost + +docker run --privileged --rm --network host -it \ + --gpus all --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \ + --volume "$(pwd)"/:/RapidPoseTriangulation/ \ + --volume "$(pwd)"/extras/easypose/pipeline.py:/EasyPose/easypose/pipeline.py \ + --volume "$(pwd)"/extras/easypose/base_model.py:/EasyPose/easypose/model/base_model.py \ + --volume "$(pwd)"/extras/easypose/detection.py:/EasyPose/easypose/model/detection.py \ + --volume "$(pwd)"/extras/easypose/pose.py:/EasyPose/easypose/model/pose.py \ + --volume "$(pwd)"/extras/easypose/utils.py:/EasyPose/easypose/model/utils.py \ + --volume "$(pwd)"/../datasets/:/datasets/ \ + --volume "$(pwd)"/skelda/:/skelda/ \ + --volume /tmp/.X11-unix:/tmp/.X11-unix \ + --env DISPLAY --env QT_X11_NO_MITSHM=1 \ + rpt_easypose diff --git a/extras/easypose/utils.py b/extras/easypose/utils.py new file mode 100644 index 0000000..cac76b0 --- /dev/null +++ b/extras/easypose/utils.py @@ -0,0 +1,259 @@ +from itertools import product +from typing import Sequence + +import cv2 +import numpy as np + + +def letterbox(img: np.ndarray, target_size: Sequence[int], fill_value: int = 128): + h, w = img.shape[:2] + tw, th = target_size + + scale = min(tw / w, th / h) + nw, nh = int(w * scale), int(h * scale) + dx, dy = (tw - nw) // 2, (th - nh) // 2 + + canvas = np.full((th, tw, img.shape[2]), fill_value, dtype=img.dtype) + canvas[dy:dy + nh, dx:dx + nw, :] = cv2.resize(img, (nw, nh)) + + return canvas, dx, dy, scale + + +def intersection_over_union(box1: np.ndarray, box2: np.ndarray): + area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]) + area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]) + + x1 = max(box1[0], box2[0]) + y1 = max(box1[1], box2[1]) + x2 = min(box1[2], box2[2]) + y2 = min(box1[3], box2[3]) + + intersection = (x2 - x1) * (y2 - y1) + union = area1 + area2 - intersection + iou = intersection / (union + 1e-6) + + return iou + + +def xywh2xyxy(boxes): + boxes[:, 0] -= boxes[:, 2] / 2 + boxes[:, 1] -= boxes[:, 3] / 2 + boxes[:, 2] += boxes[:, 0] + boxes[:, 3] += boxes[:, 1] + return boxes + +def nms(boxes: np.ndarray, iou_threshold: float, conf_threshold: float): + conf = boxes[..., 4] > conf_threshold + boxes = boxes[conf] + boxes = list(boxes) + boxes.sort(reverse=True, key=lambda x: x[4]) + + result = [] + while boxes: + chosen_box = boxes.pop() + + b = [] + for box in boxes: + if box[-1] != chosen_box[-1] or \ + intersection_over_union(chosen_box, box) \ + < iou_threshold: + b.append(box) + + result.append(chosen_box) + boxes = b + + return np.array(result) + + +def nms_optimized(boxes: np.ndarray, iou_threshold: float, conf_threshold: float): + """ + Perform Non-Maximum Suppression (NMS) on bounding boxes for a single class. + """ + + # Filter out boxes with low confidence scores + scores = boxes[:, 4] + keep = scores > conf_threshold + boxes = boxes[keep] + scores = scores[keep] + + if boxes.shape[0] == 0: + return np.empty((0, 5), dtype=boxes.dtype) + + # Compute the area of the bounding boxes + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + + # Sort the boxes by scores in descending order + order = scores.argsort()[::-1] + + keep_indices = [] + while order.size > 0: + i = order[0] + keep_indices.append(i) + + # Compute IoU of the current box with the rest + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + # Compute width and height of the overlapping area + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + + # Compute the area of the intersection + inter = w * h + + # Compute the IoU + iou = inter / (areas[i] + areas[order[1:]] - inter) + + # Keep boxes with IoU less than the threshold + inds = np.where(iou <= iou_threshold)[0] + + # Update the order array + order = order[inds + 1] + + # Return the boxes that are kept + return boxes[keep_indices] + + +def get_heatmap_points(heatmap: np.ndarray): + keypoints = np.zeros([1, heatmap.shape[0], 3], dtype=np.float32) + for i in range(heatmap.shape[0]): + h, w = np.nonzero(heatmap[i] == heatmap[i].max()) + h, w = h[0], w[0] + h_fixed = h + 0.5 + w_fixed = w + 0.5 + score = heatmap[i][h][w] + keypoints[0][i][0] = w_fixed + keypoints[0][i][1] = h_fixed + keypoints[0][i][2] = score + return keypoints + + +def gaussian_blur(heatmaps: np.ndarray, kernel: int = 11): + assert kernel % 2 == 1 + + border = (kernel - 1) // 2 + K, H, W = heatmaps.shape + + for k in range(K): + origin_max = np.max(heatmaps[k]) + dr = np.zeros((H + 2 * border, W + 2 * border), dtype=np.float32) + dr[border:-border, border:-border] = heatmaps[k].copy() + dr = cv2.GaussianBlur(dr, (kernel, kernel), 0) + heatmaps[k] = dr[border:-border, border:-border].copy() + heatmaps[k] *= origin_max / np.max(heatmaps[k]) + return heatmaps + + +def refine_keypoints(keypoints: np.ndarray, heatmaps: np.ndarray): + N, K = keypoints.shape[:2] + H, W = heatmaps.shape[:2] + + for n, k in product(range(N), range(K)): + x, y = keypoints[n, k, :2].astype(int) + + if 1 < x < W - 1 and 0 < y < H: + dx = heatmaps[k, y, x + 1] - heatmaps[k, y, x - 1] + else: + dx = 0. + + if 1 < y < H - 1 and 0 < x < W: + dy = heatmaps[k, y + 1, x] - heatmaps[k, y - 1, x] + else: + dy = 0. + + keypoints[n, k] += np.sign([dx, dy, 0], dtype=np.float32) * 0.25 + + return keypoints + + +def refine_keypoints_dark(keypoints: np.ndarray, heatmaps: np.ndarray, blur_kernel_size: int = 11): + N, K = keypoints.shape[:2] + H, W = heatmaps.shape[1:] + + # modulate heatmaps + heatmaps = gaussian_blur(heatmaps, blur_kernel_size) + np.maximum(heatmaps, 1e-10, heatmaps) + np.log(heatmaps, heatmaps) + + for n, k in product(range(N), range(K)): + x, y = keypoints[n, k, :2].astype(int) + if 1 < x < W - 2 and 1 < y < H - 2: + dx = 0.5 * (heatmaps[k, y, x + 1] - heatmaps[k, y, x - 1]) + dy = 0.5 * (heatmaps[k, y + 1, x] - heatmaps[k, y - 1, x]) + + dxx = 0.25 * ( + heatmaps[k, y, x + 2] - 2 * heatmaps[k, y, x] + + heatmaps[k, y, x - 2]) + dxy = 0.25 * ( + heatmaps[k, y + 1, x + 1] - heatmaps[k, y - 1, x + 1] - + heatmaps[k, y + 1, x - 1] + heatmaps[k, y - 1, x - 1]) + dyy = 0.25 * ( + heatmaps[k, y + 2, x] - 2 * heatmaps[k, y, x] + + heatmaps[k, y - 2, x]) + derivative = np.array([[dx], [dy]]) + hessian = np.array([[dxx, dxy], [dxy, dyy]]) + if dxx * dyy - dxy ** 2 != 0: + hessianinv = np.linalg.inv(hessian) + offset = -hessianinv @ derivative + offset = np.squeeze(np.array(offset.T), axis=0) + keypoints[n, k, :2] += offset + return keypoints + + +def get_real_keypoints(keypoints: np.ndarray, heatmaps: np.ndarray, img_size: Sequence[int]): + img_h, img_w = img_size + heatmap_h, heatmap_w = heatmaps.shape[1:] + heatmap_ratio = heatmaps.shape[1] / heatmaps.shape[2] + img_ratio = img_h / img_w + if heatmap_ratio > img_ratio: + resize_w = img_w + resize_h = int(img_w * heatmap_ratio) + elif heatmap_ratio < img_ratio: + resize_h = img_h + resize_w = int(img_h / heatmap_ratio) + else: + resize_w = img_w + resize_h = img_h + + keypoints[:, :, 0] = (keypoints[:, :, 0] / heatmap_w) * resize_w - (resize_w - img_w) / 2 + keypoints[:, :, 1] = (keypoints[:, :, 1] / heatmap_h) * resize_h - (resize_h - img_h) / 2 + + keypoints = np.squeeze(keypoints, axis=0) + + return keypoints + + +def simcc_decoder( + simcc_x: np.ndarray, + simcc_y: np.ndarray, + input_size: Sequence[int], + dx: int, + dy: int, + scale: float, +): + # See: /mmpose/codecs/utils/post_processing.py - get_simcc_maximum() + + x = np.argmax(simcc_x, axis=-1, keepdims=True).astype(np.float32) + y = np.argmax(simcc_y, axis=-1, keepdims=True).astype(np.float32) + + x_conf = np.max(simcc_x, axis=-1, keepdims=True) + y_conf = np.max(simcc_y, axis=-1, keepdims=True) + conf = np.minimum(x_conf, y_conf) + + x /= simcc_x.shape[-1] + y /= simcc_y.shape[-1] + x *= input_size[1] + y *= input_size[0] + + keypoints = np.concatenate([x, y, conf], axis=-1) + keypoints[..., 0] -= dx + keypoints[..., 1] -= dy + keypoints[..., :2] /= scale + + return keypoints diff --git a/extras/easypose/utils_2d_pose_ep.py b/extras/easypose/utils_2d_pose_ep.py new file mode 100644 index 0000000..7b95251 --- /dev/null +++ b/extras/easypose/utils_2d_pose_ep.py @@ -0,0 +1,68 @@ +import os + +import cv2 +import easypose as ep +import numpy as np + +# ================================================================================================== + +filepath = os.path.dirname(os.path.realpath(__file__)) + "/" + +# ================================================================================================== + + +def load_model(): + print("Loading mmpose model ...") + + model = ep.TopDown( + "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_384x288_fp16_extra-steps.onnx", + "SimCC", + "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_320x320_fp16_extra-steps.onnx", + conf_threshold=0.3, + iou_threshold=0.3, + warmup=10, + ) + + print("Loaded mmpose model") + return model + + +def load_wb_model(): + print("Loading mmpose whole body model ...") + + model = None + + print("Loaded mmpose model") + return model + + +# ================================================================================================== + + +def get_2d_pose(model, imgs, num_joints=17): + """See: https://mmpose.readthedocs.io/en/latest/user_guides/inference.html#basic-usage""" + + new_poses = [] + for i in range(len(imgs)): + img = imgs[i] + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + poses = [] + dets = model.predict(img) + for pose in dets: + pose = pose.keypoints + pose = np.asarray(pose) + + scores = pose[:, 2].reshape(-1, 1) + scores = np.clip(scores, 0, 1) + pose = np.concatenate((pose[:, :2], scores), axis=-1) + + poses.append(pose) + + if len(poses) == 0: + poses.append(np.zeros([num_joints, 3])) + + poses = np.array(poses) + new_poses.append(poses) + + return new_poses diff --git a/extras/mmdeploy/README.md b/extras/mmdeploy/README.md new file mode 100644 index 0000000..c382915 --- /dev/null +++ b/extras/mmdeploy/README.md @@ -0,0 +1,122 @@ +# Exporting MMPose models + +```bash +docker build --progress=plain -f extras/mmdeploy/dockerfile -t rpt_mmdeploy . + +./extras/mmdeploy/run_container.sh +``` + +
+ +## ONNX + +```bash +cd /mmdeploy/ +export withFP16="_fp16" +cp /RapidPoseTriangulation/extras/mmdeploy/configs/detection_onnxruntime_static-320x320"$withFP16".py configs/mmdet/detection/ + +python3 ./tools/deploy.py \ + configs/mmdet/detection/detection_onnxruntime_static-320x320"$withFP16".py \ + /mmpose/projects/rtmpose/rtmdet/person/rtmdet_nano_320-8xb32_coco-person.py \ + https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth \ + /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \ + --work-dir work_dir \ + --show + +mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x3x320x320"$withFP16".onnx +``` + +```bash +cd /mmdeploy/ +export withFP16="_fp16" +cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py configs/mmpose/ +cp /RapidPoseTriangulation/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py configs/mmpose/ + +python3 ./tools/deploy.py \ + configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \ + /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \ + https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \ + /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \ + --work-dir work_dir \ + --show +mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x3x384x288"$withFP16".onnx + +python3 ./tools/deploy.py \ + configs/mmpose/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py \ + /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \ + https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth \ + /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \ + --work-dir work_dir \ + --show +mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_Bx3x384x288"$withFP16".onnx + +python3 ./tools/deploy.py \ + configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \ + /mmpose/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \ + https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth \ + /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \ + --work-dir work_dir \ + --show +mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-l_wb_1x3x384x288"$withFP16".onnx + +python3 ./tools/deploy.py \ + configs/mmpose/pose-detection_simcc_onnxruntime_dynamic-384x288"$withFP16".py \ + /mmpose/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py \ + https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth \ + /mmpose/projects/rtmpose/examples/onnxruntime/human-pose.jpeg \ + --work-dir work_dir \ + --show +mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-l_wb_Bx3x384x288"$withFP16".onnx +``` + +```bash +python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs.py +``` + +```bash +python3 /RapidPoseTriangulation/extras/mmdeploy/add_extra_steps.py +``` + +
+ +## TensorRT + +Run this directly in the inference container (the TensorRT versions need to be the same) + +```bash +export withFP16="_fp16" + +trtexec --fp16 \ + --onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3"$withFP16"_extra-steps.onnx \ + --saveEngine=end2end.engine + +mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3"$withFP16"_extra-steps.engine + +trtexec --fp16 \ + --onnx=/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_Bx384x288x3"$withFP16"_extra-steps.onnx \ + --saveEngine=end2end.engine \ + --minShapes=image_input:1x384x288x3 \ + --optShapes=image_input:1x384x288x3 \ + --maxShapes=image_input:1x384x288x3 + +mv ./end2end.engine /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x384x288x3"$withFP16"_extra-steps.engine +``` + +
+ +## Benchmark + +```bash +cd /mmdeploy/ +export withFP16="_fp16" + +python3 ./tools/profiler.py \ + configs/mmpose/pose-detection_simcc_onnxruntime_static-384x288"$withFP16".py \ + /mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py \ + /RapidPoseTriangulation/extras/mmdeploy/testimages/ \ + --model /RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_1x3x384x288"$withFP16".onnx \ + --shape 384x288 \ + --device cuda \ + --warmup 50 \ + --num-iter 200 +``` diff --git a/extras/mmdeploy/add_extra_steps.py b/extras/mmdeploy/add_extra_steps.py new file mode 100644 index 0000000..4eb822b --- /dev/null +++ b/extras/mmdeploy/add_extra_steps.py @@ -0,0 +1,145 @@ +import re + +import numpy as np +import onnx +from onnx import TensorProto, helper, numpy_helper + +# ================================================================================================== + +base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/" +det_model_path = base_path + "rtmdet-nano_1x3x320x320.onnx" +pose_model_path1 = base_path + "rtmpose-m_Bx3x384x288.onnx" +pose_model_path2 = base_path + "rtmpose-m_1x3x384x288.onnx" +pose_model_path3 = base_path + "rtmpose-l_wb_Bx3x384x288.onnx" +pose_model_path4 = base_path + "rtmpose-l_wb_1x3x384x288.onnx" + +norm_mean = -1 * (np.array([0.485, 0.456, 0.406]) * 255) +norm_std = 1.0 / (np.array([0.229, 0.224, 0.225]) * 255) + + +# ================================================================================================== + + +def add_steps_to_onnx(model_path): + + # Load existing model + model = onnx.load(model_path) + graph = model.graph + + mean = norm_mean.astype(np.float32) + std = norm_std.astype(np.float32) + + mean = np.reshape(mean, (1, 3, 1, 1)).astype(np.float32) + std = np.reshape(std, (1, 3, 1, 1)).astype(np.float32) + + use_fp16 = bool("fp16" in model_path) + if use_fp16: + mean = mean.astype(np.float16) + std = std.astype(np.float16) + + # Add the initializers to the graph + mean_initializer = numpy_helper.from_array(mean, name="norm_mean") + std_initializer = numpy_helper.from_array(std, name="norm_std") + graph.initializer.extend([mean_initializer, std_initializer]) + + # Define layer names, assuming the first input is the image tensor + input_name = graph.input[0].name + + # Cast to internal type + # This has to be the first node, because tensorrt does not support uint8 layers + cast_type = 10 if use_fp16 else 1 + casted_output = "casted_output" + cast_node = helper.make_node( + "Cast", + inputs=[input_name], + outputs=[casted_output], + to=cast_type, + ) + + # Node to transpose + transpose_output = "transpose_output" + transpose_node = helper.make_node( + "Transpose", + inputs=[casted_output], + outputs=[transpose_output], + perm=[0, 3, 1, 2], + name="Transpose", + ) + + # Node to add mean + mean_added_output = "mean_added_output" + mean_add_node = helper.make_node( + "Add", + inputs=[transpose_output, "norm_mean"], + outputs=[mean_added_output], + name="Mean_Addition", + ) + + # Node to multiply by std + std_mult_output = "std_mult_output" + std_mul_node = helper.make_node( + "Mul", + inputs=[mean_added_output, "norm_std"], + outputs=[std_mult_output], + name="Std_Multiplication", + ) + + # Replace original input of the model with the output of normalization + for node in graph.node: + for idx, input_name_in_node in enumerate(node.input): + if input_name_in_node == input_name: + node.input[idx] = std_mult_output + + # Add the new nodes to the graph + graph.node.insert(0, cast_node) + graph.node.insert(1, transpose_node) + graph.node.insert(2, mean_add_node) + graph.node.insert(3, std_mul_node) + + # Transpose the input shape + input_shape = graph.input[0].type.tensor_type.shape.dim + dims = [dim.dim_value for dim in input_shape] + for i, j in enumerate([0, 3, 1, 2]): + input_shape[j].dim_value = dims[i] + + # Set the batch size to a defined string + input_shape = graph.input[0].type.tensor_type.shape.dim + if input_shape[0].dim_value == 0: + input_shape[0].dim_param = "batch_size" + + # Rename the input tensor + main_input_image_name = model.graph.input[0].name + for node in model.graph.node: + for idx, name in enumerate(node.input): + if name == main_input_image_name: + node.input[idx] = "image_input" + model.graph.input[0].name = "image_input" + + # Set input image type to int8 + model.graph.input[0].type.tensor_type.elem_type = TensorProto.UINT8 + + path = re.sub(r"(x)(\d+)x(\d+)x(\d+)", r"\1\3x\4x\2", model_path) + path = path.replace(".onnx", "_extra-steps.onnx") + onnx.save(model, path) + + +# ================================================================================================== + + +def main(): + add_steps_to_onnx(det_model_path) + add_steps_to_onnx(pose_model_path1) + add_steps_to_onnx(pose_model_path2) + add_steps_to_onnx(pose_model_path3) + add_steps_to_onnx(pose_model_path4) + add_steps_to_onnx(det_model_path.replace(".onnx", "_fp16.onnx")) + add_steps_to_onnx(pose_model_path1.replace(".onnx", "_fp16.onnx")) + add_steps_to_onnx(pose_model_path2.replace(".onnx", "_fp16.onnx")) + add_steps_to_onnx(pose_model_path3.replace(".onnx", "_fp16.onnx")) + add_steps_to_onnx(pose_model_path4.replace(".onnx", "_fp16.onnx")) + + +# ================================================================================================== + +if __name__ == "__main__": + main() diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py new file mode 100644 index 0000000..d7d5b57 --- /dev/null +++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320.py @@ -0,0 +1,18 @@ +_base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime.py"] + +onnx_config = dict( + input_shape=[320, 320], +) + +codebase_config = dict( + # For later TensorRT inference, the number of output boxes needs to be as stable as possible, + # because a drop in the box count leads to a re-optimization which takes a lot of time, + # therefore reduce the maximum number of output boxes to the smallest usable value and sort out + # low confidence boxes outside the model. + post_processing=dict( + score_threshold=0.0, + confidence_threshold=0.0, + iou_threshold=0.5, + max_output_boxes_per_class=10, + ), +) diff --git a/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py new file mode 100644 index 0000000..1dd243b --- /dev/null +++ b/extras/mmdeploy/configs/detection_onnxruntime_static-320x320_fp16.py @@ -0,0 +1,18 @@ +_base_ = ["../_base_/base_static.py", "../../_base_/backends/onnxruntime-fp16.py"] + +onnx_config = dict( + input_shape=[320, 320], +) + +codebase_config = dict( + # For later TensorRT inference, the number of output boxes needs to be as stable as possible, + # because a drop in the box count leads to a re-optimization which takes a lot of time, + # therefore reduce the maximum number of output boxes to the smallest usable value and sort out + # low confidence boxes outside the model. + post_processing=dict( + score_threshold=0.0, + confidence_threshold=0.0, + iou_threshold=0.5, + max_output_boxes_per_class=10, + ), +) diff --git a/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288.py b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288.py new file mode 100644 index 0000000..3d52547 --- /dev/null +++ b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288.py @@ -0,0 +1,19 @@ +_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime.py"] + +onnx_config = dict( + input_shape=[288, 384], + output_names=["kpts", "scores"], + dynamic_axes={ + "input": { + 0: "batch", + }, + "kpts": { + 0: "batch", + }, + "scores": { + 0: "batch", + }, + }, +) + +codebase_config = dict(export_postprocess=True) # export get_simcc_maximum diff --git a/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288_fp16.py b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288_fp16.py new file mode 100644 index 0000000..fe0ca45 --- /dev/null +++ b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_dynamic-384x288_fp16.py @@ -0,0 +1,19 @@ +_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime-fp16.py"] + +onnx_config = dict( + input_shape=[288, 384], + output_names=["kpts", "scores"], + dynamic_axes={ + "input": { + 0: "batch", + }, + "kpts": { + 0: "batch", + }, + "scores": { + 0: "batch", + }, + }, +) + +codebase_config = dict(export_postprocess=True) # export get_simcc_maximum diff --git a/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288.py b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288.py new file mode 100644 index 0000000..bfa43b4 --- /dev/null +++ b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288.py @@ -0,0 +1,8 @@ +_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime.py"] + +onnx_config = dict( + input_shape=[288, 384], + output_names=["kpts", "scores"], +) + +codebase_config = dict(export_postprocess=True) # export get_simcc_maximum diff --git a/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py new file mode 100644 index 0000000..6263dac --- /dev/null +++ b/extras/mmdeploy/configs/pose-detection_simcc_onnxruntime_static-384x288_fp16.py @@ -0,0 +1,8 @@ +_base_ = ["./pose-detection_static.py", "../_base_/backends/onnxruntime-fp16.py"] + +onnx_config = dict( + input_shape=[288, 384], + output_names=["kpts", "scores"], +) + +codebase_config = dict(export_postprocess=True) # export get_simcc_maximum diff --git a/extras/mmdeploy/dockerfile b/extras/mmdeploy/dockerfile new file mode 100644 index 0000000..b285ba6 --- /dev/null +++ b/extras/mmdeploy/dockerfile @@ -0,0 +1,38 @@ +FROM openmmlab/mmdeploy:ubuntu20.04-cuda11.8-mmdeploy1.3.1 + +ARG DEBIAN_FRONTEND=noninteractive +ENV LANG=C.UTF-8 +ENV LC_ALL=C.UTF-8 +WORKDIR / + +RUN apt-get update && apt-get install -y --no-install-recommends feh + +RUN git clone https://github.com/open-mmlab/mmdeploy.git --depth=1 +RUN cd mmdeploy/; python3 tools/scripts/build_ubuntu_x64_ort.py + +# Install MMPose +ENV FORCE_CUDA="1" +ENV MMCV_WITH_OPS=1 +RUN pip3 install --upgrade --no-cache-dir openmim +RUN mim install mmengine +RUN mim install "mmcv>=2,<2.2.0" +RUN mim install "mmdet>=3" +RUN mim install "mmpose>=1.1.0" +# Fix an error when importing mmpose +RUN pip3 install --upgrade --no-cache-dir "numpy<2" scipy +RUN git clone --depth=1 --branch=main https://github.com/open-mmlab/mmpose.git + +RUN echo 'export PYTHONPATH=/mmdeploy/build/lib:$PYTHONPATH' >> ~/.bashrc +RUN echo 'export LD_LIBRARY_PATH=/mmdeploy/../mmdeploy-dep/onnxruntime-linux-x64-1.8.1/lib/:$LD_LIBRARY_PATH' >> ~/.bashrc + +# Show images +RUN apt-get update && apt-get install -y --no-install-recommends python3-tk + +# Tool for fp16 conversion +RUN pip3 install --upgrade --no-cache-dir onnxconverter_common + +# Fix an error when profiling +RUN pip3 install --upgrade --no-cache-dir "onnxruntime-gpu<1.17" + +WORKDIR /mmdeploy/ +CMD ["/bin/bash"] diff --git a/extras/mmdeploy/exports/.gitignore b/extras/mmdeploy/exports/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/extras/mmdeploy/exports/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/extras/mmdeploy/make_extra_graphs.py b/extras/mmdeploy/make_extra_graphs.py new file mode 100644 index 0000000..0a920f7 --- /dev/null +++ b/extras/mmdeploy/make_extra_graphs.py @@ -0,0 +1,338 @@ +import cv2 +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision.ops import roi_align + +# ================================================================================================== + +base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/" +det_target_size = (320, 320) +pose_target_size = (384, 288) + +# ================================================================================================== + + +class Letterbox(nn.Module): + def __init__(self, target_size, fill_value=128): + """Resize and pad image while keeping aspect ratio""" + super(Letterbox, self).__init__() + + self.target_size = target_size + self.fill_value = fill_value + + def calc_params(self, ishape): + ih, iw = ishape[1], ishape[2] + th, tw = self.target_size + + scale = torch.min(tw / iw, th / ih) + nw = torch.round(iw * scale) + nh = torch.round(ih * scale) + + pad_w = tw - nw + pad_h = th - nh + pad_left = pad_w // 2 + pad_top = pad_h // 2 + pad_right = pad_w - pad_left + pad_bottom = pad_h - pad_top + paddings = (pad_left, pad_right, pad_top, pad_bottom) + + return paddings, scale, (nw, nh) + + def forward(self, img): + paddings, _, (nw, nh) = self.calc_params(img.shape) + + # Resize the image + img = img.to(torch.float32) + img = img.permute(0, 3, 1, 2) + img = F.interpolate( + img, + size=(nh, nw), + mode="bilinear", + align_corners=False, + ) + img = img.permute(0, 2, 3, 1) + img = img.round() + + # Pad the image + img = F.pad( + img.permute(0, 3, 1, 2), + pad=paddings, + mode="constant", + value=self.fill_value, + ) + img = img.permute(0, 2, 3, 1) + + return img + + +# ================================================================================================== + + +class BoxCrop(nn.Module): + def __init__(self, target_size): + """Crop bounding box from image""" + super(BoxCrop, self).__init__() + + self.target_size = target_size + self.padding_scale = 1.25 + + def calc_params(self, bbox): + start_x, start_y, end_x, end_y = bbox[0, 0], bbox[0, 1], bbox[0, 2], bbox[0, 3] + target_h, target_w = self.target_size + + # Calculate original bounding box width, height and center + bbox_w = end_x - start_x + bbox_h = end_y - start_y + center_x = (start_x + end_x) / 2.0 + center_y = (start_y + end_y) / 2.0 + + # Calculate the aspect ratios + bbox_aspect = bbox_w / bbox_h + target_aspect = target_w / target_h + + # Adjust the scaled bounding box to match the target aspect ratio + if bbox_aspect > target_aspect: + adjusted_h = bbox_w / target_aspect + adjusted_w = bbox_w + else: + adjusted_w = bbox_h * target_aspect + adjusted_h = bbox_h + + # Scale the bounding box by the padding_scale + scaled_bbox_w = adjusted_w * self.padding_scale + scaled_bbox_h = adjusted_h * self.padding_scale + + # Calculate scaled bounding box coordinates + new_start_x = center_x - scaled_bbox_w / 2.0 + new_start_y = center_y - scaled_bbox_h / 2.0 + new_end_x = center_x + scaled_bbox_w / 2.0 + new_end_y = center_y + scaled_bbox_h / 2.0 + + # Define the new box coordinates + new_box = torch.stack((new_start_x, new_start_y, new_end_x, new_end_y), dim=0) + new_box = new_box.unsqueeze(0) + scale = torch.stack( + ((target_w / scaled_bbox_w), (target_h / scaled_bbox_h)), dim=0 + ) + + return scale, new_box + + def forward(self, img, bbox): + _, bbox = self.calc_params(bbox) + + batch_indices = torch.zeros(bbox.shape[0], 1) + rois = torch.cat([batch_indices, bbox], dim=1) + + # Resize and crop + img = img.to(torch.float32) + img = img.permute(0, 3, 1, 2) + img = roi_align( + img, + rois, + output_size=self.target_size, + spatial_scale=1.0, + sampling_ratio=0, + ) + img = img.permute(0, 2, 3, 1) + img = img.round() + + return img + + +# ================================================================================================== + + +class DetPreprocess(nn.Module): + def __init__(self, target_size, fill_value=114): + super(DetPreprocess, self).__init__() + self.letterbox = Letterbox(target_size, fill_value) + + def forward(self, img): + # img: torch.Tensor of shape [batch, H, W, C], dtype=torch.uint8 + img = self.letterbox(img) + return img + + +# ================================================================================================== + + +class DetPostprocess(nn.Module): + def __init__(self, target_size): + super(DetPostprocess, self).__init__() + + self.target_size = target_size + self.letterbox = Letterbox(target_size) + + def forward(self, img, boxes): + paddings, scale, _ = self.letterbox.calc_params(img.shape) + + boxes = boxes.float() + boxes[:, :, 0] -= paddings[0] + boxes[:, :, 2] -= paddings[0] + boxes[:, :, 1] -= paddings[2] + boxes[:, :, 3] -= paddings[2] + + zero = torch.tensor(0) + boxes = torch.max(boxes, zero) + + th, tw = self.target_size + pad_w = paddings[0] + paddings[1] + pad_h = paddings[2] + paddings[3] + max_w = tw - pad_w - 1 + max_h = th - pad_h - 1 + b0 = boxes[:, :, 0] + b1 = boxes[:, :, 1] + b2 = boxes[:, :, 2] + b3 = boxes[:, :, 3] + b0 = torch.min(b0, max_w) + b1 = torch.min(b1, max_h) + b2 = torch.min(b2, max_w) + b3 = torch.min(b3, max_h) + boxes[:, :, 0] = b0 + boxes[:, :, 1] = b1 + boxes[:, :, 2] = b2 + boxes[:, :, 3] = b3 + + boxes[:, :, 0:4] /= scale + return boxes + + +# ================================================================================================== + + +class PosePreprocess(nn.Module): + def __init__(self, target_size, fill_value=114): + super(PosePreprocess, self).__init__() + self.boxcrop = BoxCrop(target_size) + + def forward(self, img, bbox): + # img: torch.Tensor of shape [1, H, W, C], dtype=torch.uint8 + # bbox: torch.Tensor of shape [1, 4], dtype=torch.float32 + img = self.boxcrop(img, bbox) + return img + + +# ================================================================================================== + + +class PosePostprocess(nn.Module): + def __init__(self, target_size): + super(PosePostprocess, self).__init__() + self.boxcrop = BoxCrop(target_size) + self.target_size = target_size + + def forward(self, img, bbox, keypoints): + scale, bbox = self.boxcrop.calc_params(bbox) + + kp = keypoints.float() + kp[:, :, 0:2] /= scale + kp[:, :, 0] += bbox[0, 0] + kp[:, :, 1] += bbox[0, 1] + + zero = torch.tensor(0) + kp = torch.max(kp, zero) + + max_w = img.shape[2] - 1 + max_h = img.shape[1] - 1 + k0 = kp[:, :, 0] + k1 = kp[:, :, 1] + k0 = torch.min(k0, max_w) + k1 = torch.min(k1, max_h) + kp[:, :, 0] = k0 + kp[:, :, 1] = k1 + + return kp + + +# ================================================================================================== + + +def main(): + + img_path = "/RapidPoseTriangulation/scripts/../data/h1/54138969-img_003201.jpg" + image = cv2.imread(img_path, 3) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # Initialize the DetPreprocess module + preprocess_model = DetPreprocess(target_size=det_target_size) + det_dummy_input_a0 = torch.from_numpy(image).unsqueeze(0) + + # Export to ONNX + torch.onnx.export( + preprocess_model, + det_dummy_input_a0, + base_path + "det_preprocess.onnx", + opset_version=11, + input_names=["input_image"], + output_names=["preprocessed_image"], + dynamic_axes={ + "input_image": {0: "batch_size", 1: "height", 2: "width"}, + "preprocessed_image": {0: "batch_size"}, + }, + ) + + # Initialize the DetPostprocess module + postprocess_model = DetPostprocess(target_size=det_target_size) + det_dummy_input_b0 = torch.from_numpy(image).unsqueeze(0) + det_dummy_input_b1 = torch.rand(1, 10, 5) + + # Export to ONNX + torch.onnx.export( + postprocess_model, + (det_dummy_input_b0, det_dummy_input_b1), + base_path + "det_postprocess.onnx", + opset_version=11, + input_names=["input_image", "boxes"], + output_names=["output_boxes"], + dynamic_axes={ + "input_image": {0: "batch_size", 1: "height", 2: "width"}, + "boxes": {0: "batch_size", 1: "num_boxes"}, + "output_boxes": {0: "batch_size", 1: "num_boxes"}, + }, + ) + + # Initialize the PosePreprocess module + preprocess_model = PosePreprocess(target_size=pose_target_size) + det_dummy_input_c0 = torch.from_numpy(image).unsqueeze(0) + det_dummy_input_c1 = torch.tensor([[352, 339, 518, 594]]).to(torch.int32) + + # Export to ONNX + torch.onnx.export( + preprocess_model, + (det_dummy_input_c0, det_dummy_input_c1), + base_path + "pose_preprocess.onnx", + opset_version=11, + input_names=["input_image", "bbox"], + output_names=["preprocessed_image"], + dynamic_axes={ + "input_image": {0: "batch_size", 1: "height", 2: "width"}, + "preprocessed_image": {0: "batch_size"}, + }, + ) + + # Initialize the PosePostprocess module + postprocess_model = PosePostprocess(target_size=pose_target_size) + det_dummy_input_d0 = torch.from_numpy(image).unsqueeze(0) + det_dummy_input_d1 = torch.tensor([[352, 339, 518, 594]]).to(torch.int32) + det_dummy_input_d2 = torch.rand(1, 17, 2) + + # Export to ONNX + torch.onnx.export( + postprocess_model, + (det_dummy_input_d0, det_dummy_input_d1, det_dummy_input_d2), + base_path + "pose_postprocess.onnx", + opset_version=11, + input_names=["input_image", "bbox", "keypoints"], + output_names=["output_keypoints"], + dynamic_axes={ + "input_image": {0: "batch_size", 1: "height", 2: "width"}, + "output_keypoints": {0: "batch_size"}, + }, + ) + + +# ================================================================================================== + +if __name__ == "__main__": + main() diff --git a/extras/mmdeploy/run_container.sh b/extras/mmdeploy/run_container.sh new file mode 100755 index 0000000..7353774 --- /dev/null +++ b/extras/mmdeploy/run_container.sh @@ -0,0 +1,9 @@ +#! /bin/bash + +xhost + +docker run --privileged --rm --network host -it \ + --gpus all --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \ + --volume "$(pwd)"/:/RapidPoseTriangulation/ \ + --volume /tmp/.X11-unix:/tmp/.X11-unix \ + --env DISPLAY --env QT_X11_NO_MITSHM=1 \ + rpt_mmdeploy diff --git a/extras/mmdeploy/testimages/human-pose.jpeg b/extras/mmdeploy/testimages/human-pose.jpeg new file mode 100644 index 0000000..8de4015 Binary files /dev/null and b/extras/mmdeploy/testimages/human-pose.jpeg differ diff --git a/media/RESULTS.md b/media/RESULTS.md index 14e4605..9f3d197 100644 --- a/media/RESULTS.md +++ b/media/RESULTS.md @@ -6,9 +6,9 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.05260277602632167, - "avg_time_3d": 0.0003263081534434173, - "avg_fps": 18.8932042845038 + "avg_time_2d": 0.009281002464940992, + "avg_time_3d": 0.00031142558081675384, + "avg_fps": 104.24889248371795 } { "person_nums": { @@ -27,53 +27,53 @@ Results of the model in various experiments on different datasets. }, "mpjpe": { "count": 600, - "mean": 0.067113, - "median": 0.059352, - "std": 0.027739, - "sem": 0.001133, - "min": 0.043176, - "max": 0.190437, + "mean": 0.066564, + "median": 0.05836, + "std": 0.028051, + "sem": 0.001146, + "min": 0.043283, + "max": 0.190505, "recall-0.025": 0.0, - "recall-0.05": 0.036667, - "recall-0.1": 0.935, + "recall-0.05": 0.06, + "recall-0.1": 0.931667, "recall-0.15": 0.95, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600, "ap-0.025": 0.0, - "ap-0.05": 0.002419, - "ap-0.1": 0.895969, - "ap-0.15": 0.916293, + "ap-0.05": 0.005904, + "ap-0.1": 0.88884, + "ap-0.15": 0.912818, "ap-0.25": 1.0, "ap-0.5": 1.0 }, "nose": { "count": 600, - "mean": 0.117755, - "median": 0.101583, - "std": 0.04303, - "sem": 0.001758, - "min": 0.045229, - "max": 0.270864, + "mean": 0.113269, + "median": 0.097949, + "std": 0.041516, + "sem": 0.001696, + "min": 0.030433, + "max": 0.295413, "recall-0.025": 0.0, - "recall-0.05": 0.006667, - "recall-0.1": 0.485, - "recall-0.15": 0.803333, - "recall-0.25": 0.995, + "recall-0.05": 0.01, + "recall-0.1": 0.521667, + "recall-0.15": 0.821667, + "recall-0.25": 0.993333, "recall-0.5": 1.0, "num_labels": 600 }, "shoulder_left": { "count": 600, - "mean": 0.033354, - "median": 0.025457, - "std": 0.032091, - "sem": 0.001311, - "min": 0.003327, - "max": 0.181867, - "recall-0.025": 0.486667, - "recall-0.05": 0.871667, - "recall-0.1": 0.948333, + "mean": 0.034321, + "median": 0.025349, + "std": 0.032202, + "sem": 0.001316, + "min": 0.002801, + "max": 0.182024, + "recall-0.025": 0.49, + "recall-0.05": 0.856667, + "recall-0.1": 0.945, "recall-0.15": 0.965, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -81,15 +81,15 @@ Results of the model in various experiments on different datasets. }, "shoulder_right": { "count": 600, - "mean": 0.048518, - "median": 0.033796, - "std": 0.042293, - "sem": 0.001728, - "min": 0.004099, - "max": 0.247429, - "recall-0.025": 0.23, - "recall-0.05": 0.751667, - "recall-0.1": 0.9, + "mean": 0.046987, + "median": 0.033798, + "std": 0.041045, + "sem": 0.001677, + "min": 0.003907, + "max": 0.249492, + "recall-0.025": 0.22, + "recall-0.05": 0.773333, + "recall-0.1": 0.923333, "recall-0.15": 0.943333, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -97,178 +97,178 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 600, - "mean": 0.042901, - "median": 0.035019, - "std": 0.034783, - "sem": 0.001421, - "min": 0.002822, - "max": 0.195221, - "recall-0.025": 0.271667, - "recall-0.05": 0.811667, - "recall-0.1": 0.943333, - "recall-0.15": 0.955, + "mean": 0.042771, + "median": 0.034443, + "std": 0.034408, + "sem": 0.001406, + "min": 0.002812, + "max": 0.194372, + "recall-0.025": 0.25, + "recall-0.05": 0.805, + "recall-0.1": 0.945, + "recall-0.15": 0.958333, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "elbow_right": { "count": 600, - "mean": 0.043838, - "median": 0.032965, - "std": 0.035921, - "sem": 0.001468, - "min": 0.006648, - "max": 0.282707, - "recall-0.025": 0.243333, - "recall-0.05": 0.803333, - "recall-0.1": 0.93, + "mean": 0.044192, + "median": 0.03208, + "std": 0.037392, + "sem": 0.001528, + "min": 0.006571, + "max": 0.342018, + "recall-0.025": 0.231667, + "recall-0.05": 0.79, + "recall-0.1": 0.926667, "recall-0.15": 0.945, - "recall-0.25": 0.998333, + "recall-0.25": 0.996667, "recall-0.5": 1.0, "num_labels": 600 }, "wrist_left": { "count": 600, - "mean": 0.040735, - "median": 0.024289, - "std": 0.042165, - "sem": 0.001723, - "min": 0.001529, - "max": 0.186262, - "recall-0.025": 0.515, - "recall-0.05": 0.745, - "recall-0.1": 0.911667, - "recall-0.15": 0.945, - "recall-0.25": 1.0, - "recall-0.5": 1.0, - "num_labels": 600 - }, - "wrist_right": { - "count": 600, - "mean": 0.043487, - "median": 0.025787, - "std": 0.048637, - "sem": 0.001987, - "min": 0.002077, - "max": 0.425327, - "recall-0.025": 0.476667, - "recall-0.05": 0.768333, - "recall-0.1": 0.89, - "recall-0.15": 0.915, + "mean": 0.042137, + "median": 0.023445, + "std": 0.047176, + "sem": 0.001928, + "min": 0.002601, + "max": 0.32204, + "recall-0.025": 0.533333, + "recall-0.05": 0.748333, + "recall-0.1": 0.891667, + "recall-0.15": 0.926667, "recall-0.25": 0.998333, "recall-0.5": 1.0, "num_labels": 600 }, + "wrist_right": { + "count": 597, + "mean": 0.046115, + "median": 0.027472, + "std": 0.053855, + "sem": 0.002206, + "min": 0.002569, + "max": 0.482775, + "recall-0.025": 0.455, + "recall-0.05": 0.748333, + "recall-0.1": 0.888333, + "recall-0.15": 0.898333, + "recall-0.25": 0.991667, + "recall-0.5": 0.995, + "num_labels": 600 + }, "hip_left": { "count": 600, - "mean": 0.089948, - "median": 0.087326, - "std": 0.032876, - "sem": 0.001343, - "min": 0.013928, - "max": 0.236753, - "recall-0.025": 0.005, - "recall-0.05": 0.04, - "recall-0.1": 0.823333, - "recall-0.15": 0.95, + "mean": 0.089983, + "median": 0.086035, + "std": 0.033001, + "sem": 0.001348, + "min": 0.010666, + "max": 0.237121, + "recall-0.025": 0.008333, + "recall-0.05": 0.041667, + "recall-0.1": 0.808333, + "recall-0.15": 0.951667, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "hip_right": { "count": 600, - "mean": 0.114678, - "median": 0.11506, - "std": 0.026245, - "sem": 0.001072, - "min": 0.046225, - "max": 0.234061, + "mean": 0.113405, + "median": 0.111782, + "std": 0.026025, + "sem": 0.001063, + "min": 0.040386, + "max": 0.235156, "recall-0.025": 0.0, - "recall-0.05": 0.001667, - "recall-0.1": 0.25, - "recall-0.15": 0.948333, + "recall-0.05": 0.011667, + "recall-0.1": 0.236667, + "recall-0.15": 0.946667, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 600 }, "knee_left": { "count": 600, - "mean": 0.06065, - "median": 0.045019, - "std": 0.060798, - "sem": 0.002484, - "min": 0.019487, - "max": 0.420267, - "recall-0.025": 0.046667, - "recall-0.05": 0.601667, - "recall-0.1": 0.918333, - "recall-0.15": 0.923333, - "recall-0.25": 0.98, + "mean": 0.059449, + "median": 0.04496, + "std": 0.056659, + "sem": 0.002315, + "min": 0.017049, + "max": 0.40245, + "recall-0.025": 0.041667, + "recall-0.05": 0.593333, + "recall-0.1": 0.921667, + "recall-0.15": 0.93, + "recall-0.25": 0.981667, "recall-0.5": 1.0, "num_labels": 600 }, "knee_right": { "count": 600, - "mean": 0.051123, - "median": 0.043066, - "std": 0.035171, - "sem": 0.001437, - "min": 0.017039, - "max": 0.258443, + "mean": 0.05269, + "median": 0.043861, + "std": 0.037853, + "sem": 0.001547, + "min": 0.01358, + "max": 0.250648, "recall-0.025": 0.036667, - "recall-0.05": 0.731667, - "recall-0.1": 0.943333, - "recall-0.15": 0.945, + "recall-0.05": 0.74, + "recall-0.1": 0.933333, + "recall-0.15": 0.936667, "recall-0.25": 0.998333, "recall-0.5": 1.0, "num_labels": 600 }, "ankle_left": { - "count": 598, - "mean": 0.098132, - "median": 0.086223, - "std": 0.049848, - "sem": 0.00204, - "min": 0.039526, - "max": 0.495917, + "count": 597, + "mean": 0.095195, + "median": 0.084927, + "std": 0.047605, + "sem": 0.00195, + "min": 0.046243, + "max": 0.497966, "recall-0.025": 0.0, "recall-0.05": 0.001667, - "recall-0.1": 0.841667, - "recall-0.15": 0.938333, + "recall-0.1": 0.84, + "recall-0.15": 0.94, "recall-0.25": 0.98, - "recall-0.5": 0.996667, + "recall-0.5": 0.995, "num_labels": 600 }, "ankle_right": { "count": 600, - "mean": 0.08584, - "median": 0.068684, - "std": 0.059413, - "sem": 0.002428, - "min": 0.033422, - "max": 0.462949, + "mean": 0.080212, + "median": 0.068059, + "std": 0.047808, + "sem": 0.001953, + "min": 0.032293, + "max": 0.453794, "recall-0.025": 0.0, - "recall-0.05": 0.016667, - "recall-0.1": 0.883333, - "recall-0.15": 0.905, - "recall-0.25": 0.978333, + "recall-0.05": 0.026667, + "recall-0.1": 0.905, + "recall-0.15": 0.925, + "recall-0.25": 0.988333, "recall-0.5": 1.0, "num_labels": 600 }, "joint_recalls": { "num_labels": 7800, - "recall-0.025": 0.17769, - "recall-0.05": 0.47295, - "recall-0.1": 0.81987, - "recall-0.15": 0.92872, + "recall-0.025": 0.1741, + "recall-0.05": 0.47231, + "recall-0.1": 0.82154, + "recall-0.15": 0.92949, "recall-0.25": 0.99397, - "recall-0.5": 0.99974 + "recall-0.5": 0.99923 } } { "total_parts": 8400, - "correct_parts": 8084, - "pcp": 0.962381 + "correct_parts": 8094, + "pcp": 0.963571 } ``` @@ -276,90 +276,90 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.08998977933143013, - "avg_time_3d": 0.0006411935865264578, - "avg_fps": 11.033755545195868 + "avg_time_2d": 0.025720739692347155, + "avg_time_3d": 0.0007782072545736516, + "avg_fps": 37.737348657781304 } { "person_nums": { "total_frames": 301, "total_labels": 477, - "total_preds": 814, + "total_preds": 828, "considered_empty": 0, "valid_preds": 477, - "invalid_preds": 337, + "invalid_preds": 351, "missing": 0, - "invalid_fraction": 0.414, - "precision": 0.586, + "invalid_fraction": 0.42391, + "precision": 0.57609, "recall": 1.0, - "f1": 0.73896, - "non_empty": 814 + "f1": 0.73103, + "non_empty": 828 }, "mpjpe": { "count": 477, - "mean": 0.048298, - "median": 0.043007, - "std": 0.01475, - "sem": 0.000676, - "min": 0.029813, - "max": 0.10567, + "mean": 0.048363, + "median": 0.043254, + "std": 0.014966, + "sem": 0.000686, + "min": 0.028857, + "max": 0.115145, "recall-0.025": 0.0, - "recall-0.05": 0.681342, - "recall-0.1": 0.989518, + "recall-0.05": 0.689727, + "recall-0.1": 0.987421, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 477, "ap-0.025": 0.0, - "ap-0.05": 0.349635, - "ap-0.1": 0.705917, - "ap-0.15": 0.717347, - "ap-0.25": 0.717347, - "ap-0.5": 0.717347 + "ap-0.05": 0.36895, + "ap-0.1": 0.701837, + "ap-0.15": 0.712076, + "ap-0.25": 0.712076, + "ap-0.5": 0.712076 }, "head": { "count": 477, - "mean": 0.05524, - "median": 0.049786, - "std": 0.026004, - "sem": 0.001192, - "min": 0.006316, - "max": 0.147291, + "mean": 0.053754, + "median": 0.050005, + "std": 0.024723, + "sem": 0.001133, + "min": 0.006821, + "max": 0.180711, "recall-0.025": 0.077568, - "recall-0.05": 0.503145, - "recall-0.1": 0.91195, - "recall-0.15": 1.0, + "recall-0.05": 0.498952, + "recall-0.1": 0.939203, + "recall-0.15": 0.997904, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 477 }, "shoulder_left": { "count": 477, - "mean": 0.04431, - "median": 0.038744, - "std": 0.020582, - "sem": 0.000943, - "min": 0.006704, - "max": 0.138498, - "recall-0.025": 0.150943, - "recall-0.05": 0.702306, - "recall-0.1": 0.983229, - "recall-0.15": 1.0, + "mean": 0.043087, + "median": 0.038111, + "std": 0.020649, + "sem": 0.000946, + "min": 0.003582, + "max": 0.154806, + "recall-0.025": 0.155136, + "recall-0.05": 0.721174, + "recall-0.1": 0.987421, + "recall-0.15": 0.997904, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 477 }, "shoulder_right": { "count": 477, - "mean": 0.050558, - "median": 0.045659, - "std": 0.024497, - "sem": 0.001123, - "min": 0.006395, - "max": 0.150544, - "recall-0.025": 0.106918, - "recall-0.05": 0.555556, - "recall-0.1": 0.947589, + "mean": 0.050634, + "median": 0.047764, + "std": 0.024316, + "sem": 0.001115, + "min": 0.006442, + "max": 0.16236, + "recall-0.025": 0.111111, + "recall-0.05": 0.526205, + "recall-0.1": 0.951782, "recall-0.15": 0.997904, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -367,31 +367,31 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 477, - "mean": 0.040842, - "median": 0.034008, - "std": 0.025918, - "sem": 0.001188, - "min": 0.005511, - "max": 0.150408, - "recall-0.025": 0.301887, - "recall-0.05": 0.748428, - "recall-0.1": 0.958071, - "recall-0.15": 0.997904, - "recall-0.25": 1.0, + "mean": 0.041879, + "median": 0.03294, + "std": 0.029247, + "sem": 0.001341, + "min": 0.004935, + "max": 0.32041, + "recall-0.025": 0.280922, + "recall-0.05": 0.733753, + "recall-0.1": 0.953878, + "recall-0.15": 0.995807, + "recall-0.25": 0.997904, "recall-0.5": 1.0, "num_labels": 477 }, "elbow_right": { "count": 477, - "mean": 0.053537, - "median": 0.044085, - "std": 0.041924, - "sem": 0.001922, - "min": 0.002875, - "max": 0.235324, - "recall-0.025": 0.262055, - "recall-0.05": 0.555556, - "recall-0.1": 0.890985, + "mean": 0.05292, + "median": 0.044952, + "std": 0.0411, + "sem": 0.001884, + "min": 0.001458, + "max": 0.243877, + "recall-0.025": 0.264151, + "recall-0.05": 0.557652, + "recall-0.1": 0.90566, "recall-0.15": 0.958071, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -399,47 +399,47 @@ Results of the model in various experiments on different datasets. }, "wrist_left": { "count": 477, - "mean": 0.061478, - "median": 0.053374, - "std": 0.045984, - "sem": 0.002108, - "min": 0.003731, - "max": 0.359386, - "recall-0.025": 0.1174, - "recall-0.05": 0.412998, - "recall-0.1": 0.91195, - "recall-0.15": 0.962264, - "recall-0.25": 0.981132, + "mean": 0.06003, + "median": 0.052808, + "std": 0.039768, + "sem": 0.001823, + "min": 0.008619, + "max": 0.326556, + "recall-0.025": 0.129979, + "recall-0.05": 0.436059, + "recall-0.1": 0.907757, + "recall-0.15": 0.964361, + "recall-0.25": 0.989518, "recall-0.5": 1.0, "num_labels": 477 }, "wrist_right": { "count": 477, - "mean": 0.058056, - "median": 0.053748, - "std": 0.030081, - "sem": 0.001379, - "min": 0.009012, - "max": 0.202262, - "recall-0.025": 0.109015, - "recall-0.05": 0.446541, - "recall-0.1": 0.899371, - "recall-0.15": 0.989518, - "recall-0.25": 1.0, + "mean": 0.059214, + "median": 0.054442, + "std": 0.033352, + "sem": 0.001529, + "min": 0.007306, + "max": 0.362927, + "recall-0.025": 0.102725, + "recall-0.05": 0.42348, + "recall-0.1": 0.909853, + "recall-0.15": 0.981132, + "recall-0.25": 0.997904, "recall-0.5": 1.0, "num_labels": 477 }, "hip_left": { "count": 477, - "mean": 0.0478, - "median": 0.042957, - "std": 0.026301, - "sem": 0.001206, - "min": 0.00569, - "max": 0.140699, - "recall-0.025": 0.213836, - "recall-0.05": 0.616352, - "recall-0.1": 0.960168, + "mean": 0.048916, + "median": 0.04267, + "std": 0.02644, + "sem": 0.001212, + "min": 0.008106, + "max": 0.140729, + "recall-0.025": 0.174004, + "recall-0.05": 0.597484, + "recall-0.1": 0.953878, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -447,30 +447,30 @@ Results of the model in various experiments on different datasets. }, "hip_right": { "count": 477, - "mean": 0.057985, - "median": 0.056151, - "std": 0.025557, - "sem": 0.001171, - "min": 0.005195, - "max": 0.136188, - "recall-0.025": 0.106918, - "recall-0.05": 0.421384, - "recall-0.1": 0.930818, - "recall-0.15": 1.0, + "mean": 0.059511, + "median": 0.057006, + "std": 0.025342, + "sem": 0.001162, + "min": 0.002541, + "max": 0.163207, + "recall-0.025": 0.085954, + "recall-0.05": 0.341719, + "recall-0.1": 0.93501, + "recall-0.15": 0.997904, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 477 }, "knee_left": { "count": 477, - "mean": 0.039986, - "median": 0.037254, - "std": 0.024249, - "sem": 0.001111, - "min": 0.005439, - "max": 0.186508, - "recall-0.025": 0.262055, - "recall-0.05": 0.767296, + "mean": 0.041094, + "median": 0.038775, + "std": 0.024729, + "sem": 0.001133, + "min": 0.003901, + "max": 0.195774, + "recall-0.025": 0.249476, + "recall-0.05": 0.733753, "recall-0.1": 0.974843, "recall-0.15": 0.989518, "recall-0.25": 1.0, @@ -479,15 +479,15 @@ Results of the model in various experiments on different datasets. }, "knee_right": { "count": 477, - "mean": 0.039442, - "median": 0.034843, - "std": 0.023364, - "sem": 0.001071, - "min": 0.006866, - "max": 0.186557, - "recall-0.025": 0.303983, - "recall-0.05": 0.725367, - "recall-0.1": 0.979036, + "mean": 0.039762, + "median": 0.035645, + "std": 0.023311, + "sem": 0.001068, + "min": 0.005335, + "max": 0.191021, + "recall-0.025": 0.312369, + "recall-0.05": 0.706499, + "recall-0.1": 0.981132, "recall-0.15": 0.997904, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -495,14 +495,14 @@ Results of the model in various experiments on different datasets. }, "ankle_left": { "count": 477, - "mean": 0.037161, - "median": 0.031079, - "std": 0.030814, - "sem": 0.001412, - "min": 0.0017, - "max": 0.224459, + "mean": 0.036697, + "median": 0.028189, + "std": 0.031601, + "sem": 0.001448, + "min": 0.00396, + "max": 0.233662, "recall-0.025": 0.436059, - "recall-0.05": 0.825996, + "recall-0.05": 0.813417, "recall-0.1": 0.943396, "recall-0.15": 0.983229, "recall-0.25": 1.0, @@ -511,27 +511,27 @@ Results of the model in various experiments on different datasets. }, "ankle_right": { "count": 477, - "mean": 0.041477, - "median": 0.031838, - "std": 0.03801, - "sem": 0.001742, - "min": 0.00475, - "max": 0.298395, - "recall-0.025": 0.310273, - "recall-0.05": 0.790356, + "mean": 0.041215, + "median": 0.03085, + "std": 0.037902, + "sem": 0.001737, + "min": 0.002008, + "max": 0.269991, + "recall-0.025": 0.314465, + "recall-0.05": 0.800839, "recall-0.1": 0.922432, - "recall-0.15": 0.966457, + "recall-0.15": 0.968553, "recall-0.25": 0.997904, "recall-0.5": 1.0, "num_labels": 477 }, "joint_recalls": { "num_labels": 6201, - "recall-0.025": 0.21109, - "recall-0.05": 0.62038, - "recall-0.1": 0.93872, - "recall-0.15": 0.98742, - "recall-0.25": 0.99823, + "recall-0.025": 0.20593, + "recall-0.05": 0.60603, + "recall-0.1": 0.94211, + "recall-0.15": 0.98629, + "recall-0.25": 0.99871, "recall-0.5": 1.0 } } @@ -546,269 +546,269 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.04950667327305056, - "avg_time_3d": 0.0003714437754649036, - "avg_fps": 20.04887231463288 + "avg_time_2d": 0.010491762521131983, + "avg_time_3d": 0.00032040083183432525, + "avg_fps": 92.48842875886173 } { "person_nums": { "total_frames": 222, "total_labels": 376, - "total_preds": 472, + "total_preds": 444, "considered_empty": 0, "valid_preds": 376, - "invalid_preds": 96, + "invalid_preds": 68, "missing": 0, - "invalid_fraction": 0.20339, - "precision": 0.79661, + "invalid_fraction": 0.15315, + "precision": 0.84685, "recall": 1.0, - "f1": 0.88679, - "non_empty": 472 + "f1": 0.91707, + "non_empty": 444 }, "mpjpe": { "count": 376, - "mean": 0.078549, - "median": 0.07594, - "std": 0.01906, - "sem": 0.000984, - "min": 0.049002, - "max": 0.158403, + "mean": 0.077253, + "median": 0.074373, + "std": 0.017545, + "sem": 0.000906, + "min": 0.044498, + "max": 0.156198, "recall-0.025": 0.0, - "recall-0.05": 0.010638, - "recall-0.1": 0.888298, - "recall-0.15": 0.989362, + "recall-0.05": 0.015957, + "recall-0.1": 0.896277, + "recall-0.15": 0.99734, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 376, "ap-0.025": 0.0, - "ap-0.05": 0.000331, - "ap-0.1": 0.780083, - "ap-0.15": 0.897845, - "ap-0.25": 0.906537, - "ap-0.5": 0.906537 + "ap-0.05": 0.000397, + "ap-0.1": 0.792696, + "ap-0.15": 0.931482, + "ap-0.25": 0.934769, + "ap-0.5": 0.934769 }, "head": { "count": 376, - "mean": 0.05927, - "median": 0.060888, - "std": 0.025271, - "sem": 0.001305, - "min": 0.006924, - "max": 0.162204, - "recall-0.025": 0.095745, - "recall-0.05": 0.364362, - "recall-0.1": 0.954787, - "recall-0.15": 0.994681, - "recall-0.25": 1.0, + "mean": 0.068707, + "median": 0.065641, + "std": 0.035104, + "sem": 0.001813, + "min": 0.009953, + "max": 0.310029, + "recall-0.025": 0.079787, + "recall-0.05": 0.324468, + "recall-0.1": 0.867021, + "recall-0.15": 0.973404, + "recall-0.25": 0.99734, "recall-0.5": 1.0, "num_labels": 376 }, "shoulder_left": { "count": 376, - "mean": 0.061546, - "median": 0.057277, - "std": 0.02834, - "sem": 0.001463, - "min": 0.005091, - "max": 0.154175, - "recall-0.025": 0.050532, - "recall-0.05": 0.398936, - "recall-0.1": 0.904255, - "recall-0.15": 0.989362, + "mean": 0.065959, + "median": 0.060667, + "std": 0.028962, + "sem": 0.001496, + "min": 0.017576, + "max": 0.164082, + "recall-0.025": 0.031915, + "recall-0.05": 0.292553, + "recall-0.1": 0.87766, + "recall-0.15": 0.986702, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 376 }, "shoulder_right": { "count": 376, - "mean": 0.063647, - "median": 0.058123, - "std": 0.026883, - "sem": 0.001388, - "min": 0.016798, - "max": 0.185589, + "mean": 0.074319, + "median": 0.070225, + "std": 0.034792, + "sem": 0.001797, + "min": 0.005962, + "max": 0.196825, "recall-0.025": 0.039894, - "recall-0.05": 0.367021, - "recall-0.1": 0.893617, - "recall-0.15": 0.992021, + "recall-0.05": 0.268617, + "recall-0.1": 0.789894, + "recall-0.15": 0.965426, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 376 }, "elbow_left": { "count": 376, - "mean": 0.085856, - "median": 0.069603, - "std": 0.047402, - "sem": 0.002448, - "min": 0.00761, - "max": 0.211541, - "recall-0.025": 0.026596, - "recall-0.05": 0.263298, - "recall-0.1": 0.646277, - "recall-0.15": 0.861702, + "mean": 0.08632, + "median": 0.072742, + "std": 0.048086, + "sem": 0.002483, + "min": 0.005631, + "max": 0.229685, + "recall-0.025": 0.039894, + "recall-0.05": 0.25, + "recall-0.1": 0.675532, + "recall-0.15": 0.87234, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 376 }, "elbow_right": { "count": 376, - "mean": 0.070473, - "median": 0.06101, - "std": 0.03507, - "sem": 0.001811, - "min": 0.005479, - "max": 0.234956, - "recall-0.025": 0.023936, - "recall-0.05": 0.321809, - "recall-0.1": 0.789894, - "recall-0.15": 0.976064, + "mean": 0.08024, + "median": 0.070943, + "std": 0.043195, + "sem": 0.002231, + "min": 0.011528, + "max": 0.199802, + "recall-0.025": 0.042553, + "recall-0.05": 0.287234, + "recall-0.1": 0.718085, + "recall-0.15": 0.904255, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 376 }, "wrist_left": { "count": 376, - "mean": 0.115006, - "median": 0.100073, - "std": 0.063704, - "sem": 0.00329, - "min": 0.010303, - "max": 0.427552, - "recall-0.025": 0.013298, - "recall-0.05": 0.090426, + "mean": 0.113533, + "median": 0.100003, + "std": 0.065084, + "sem": 0.003361, + "min": 0.009611, + "max": 0.404522, + "recall-0.025": 0.021277, + "recall-0.05": 0.143617, "recall-0.1": 0.5, - "recall-0.15": 0.74734, - "recall-0.25": 0.957447, + "recall-0.15": 0.787234, + "recall-0.25": 0.941489, "recall-0.5": 1.0, "num_labels": 376 }, "wrist_right": { "count": 376, - "mean": 0.098105, - "median": 0.089775, - "std": 0.042373, - "sem": 0.002188, - "min": 0.014046, - "max": 0.309484, - "recall-0.025": 0.007979, - "recall-0.05": 0.095745, - "recall-0.1": 0.603723, - "recall-0.15": 0.87766, - "recall-0.25": 0.99734, + "mean": 0.102733, + "median": 0.093528, + "std": 0.046502, + "sem": 0.002401, + "min": 0.018379, + "max": 0.258203, + "recall-0.025": 0.005319, + "recall-0.05": 0.071809, + "recall-0.1": 0.569149, + "recall-0.15": 0.851064, + "recall-0.25": 0.994681, "recall-0.5": 1.0, "num_labels": 376 }, "hip_left": { "count": 376, - "mean": 0.081631, - "median": 0.082941, - "std": 0.029603, - "sem": 0.001529, - "min": 0.018896, - "max": 0.178965, - "recall-0.025": 0.023936, - "recall-0.05": 0.167553, - "recall-0.1": 0.707447, - "recall-0.15": 0.992021, + "mean": 0.072164, + "median": 0.073167, + "std": 0.02975, + "sem": 0.001536, + "min": 0.007894, + "max": 0.173529, + "recall-0.025": 0.077128, + "recall-0.05": 0.231383, + "recall-0.1": 0.829787, + "recall-0.15": 0.994681, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 376 }, "hip_right": { "count": 376, - "mean": 0.087096, - "median": 0.084144, - "std": 0.026379, - "sem": 0.001362, - "min": 0.022777, - "max": 0.207022, - "recall-0.025": 0.00266, - "recall-0.05": 0.06117, - "recall-0.1": 0.75, - "recall-0.15": 0.968085, + "mean": 0.079834, + "median": 0.07783, + "std": 0.032293, + "sem": 0.001668, + "min": 0.014083, + "max": 0.2319, + "recall-0.025": 0.021277, + "recall-0.05": 0.146277, + "recall-0.1": 0.787234, + "recall-0.15": 0.965426, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 376 }, "knee_left": { "count": 376, - "mean": 0.055903, - "median": 0.052238, - "std": 0.027881, - "sem": 0.00144, - "min": 0.007642, - "max": 0.228951, - "recall-0.025": 0.079787, - "recall-0.05": 0.462766, - "recall-0.1": 0.933511, - "recall-0.15": 0.989362, + "mean": 0.054967, + "median": 0.050785, + "std": 0.026446, + "sem": 0.001366, + "min": 0.009112, + "max": 0.241942, + "recall-0.025": 0.093085, + "recall-0.05": 0.489362, + "recall-0.1": 0.946809, + "recall-0.15": 0.99734, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 376 }, "knee_right": { "count": 376, - "mean": 0.059496, - "median": 0.056779, - "std": 0.026291, - "sem": 0.001358, - "min": 0.017162, - "max": 0.210972, - "recall-0.025": 0.055851, - "recall-0.05": 0.37234, - "recall-0.1": 0.930851, - "recall-0.15": 0.989362, + "mean": 0.055966, + "median": 0.051957, + "std": 0.028328, + "sem": 0.001463, + "min": 0.004386, + "max": 0.216135, + "recall-0.025": 0.095745, + "recall-0.05": 0.465426, + "recall-0.1": 0.944149, + "recall-0.15": 0.981383, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 376 }, "ankle_left": { - "count": 371, - "mean": 0.090595, - "median": 0.059925, - "std": 0.098272, - "sem": 0.005109, - "min": 0.007433, - "max": 0.493218, - "recall-0.025": 0.06383, - "recall-0.05": 0.295213, - "recall-0.1": 0.819149, - "recall-0.15": 0.882979, - "recall-0.25": 0.904255, - "recall-0.5": 0.986702, + "count": 372, + "mean": 0.076281, + "median": 0.052812, + "std": 0.077449, + "sem": 0.004021, + "min": 0.008833, + "max": 0.461738, + "recall-0.025": 0.098404, + "recall-0.05": 0.460106, + "recall-0.1": 0.824468, + "recall-0.15": 0.922872, + "recall-0.25": 0.93883, + "recall-0.5": 0.989362, "num_labels": 376 }, "ankle_right": { - "count": 371, - "mean": 0.081171, - "median": 0.052557, - "std": 0.092463, - "sem": 0.004807, - "min": 0.006365, - "max": 0.495104, - "recall-0.025": 0.071809, - "recall-0.05": 0.465426, - "recall-0.1": 0.843085, - "recall-0.15": 0.898936, - "recall-0.25": 0.914894, - "recall-0.5": 0.986702, + "count": 375, + "mean": 0.067406, + "median": 0.048563, + "std": 0.071275, + "sem": 0.003686, + "min": 0.008871, + "max": 0.477389, + "recall-0.025": 0.077128, + "recall-0.05": 0.521277, + "recall-0.1": 0.898936, + "recall-0.15": 0.941489, + "recall-0.25": 0.957447, + "recall-0.5": 0.99734, "num_labels": 376 }, "joint_recalls": { "num_labels": 4888, - "recall-0.025": 0.04194, - "recall-0.05": 0.2856, - "recall-0.1": 0.78928, - "recall-0.15": 0.93412, - "recall-0.25": 0.9822, - "recall-0.5": 0.99755 + "recall-0.025": 0.05462, + "recall-0.05": 0.30278, + "recall-0.1": 0.7856, + "recall-0.15": 0.93249, + "recall-0.25": 0.9865, + "recall-0.5": 0.99877 } } { "total_parts": 5264, - "correct_parts": 4958, - "pcp": 0.941869 + "correct_parts": 4973, + "pcp": 0.944719 } ``` @@ -816,205 +816,205 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.04502476590515531, - "avg_time_3d": 0.0003439047417463509, - "avg_fps": 22.041642079021162 + "avg_time_2d": 0.012221919508088194, + "avg_time_3d": 0.0004397906086171386, + "avg_fps": 78.97827313868461 } { "person_nums": { "total_frames": 629, "total_labels": 1061, - "total_preds": 1002, + "total_preds": 1121, "considered_empty": 0, - "valid_preds": 761, - "invalid_preds": 241, - "missing": 300, - "invalid_fraction": 0.24052, - "precision": 0.75948, - "recall": 0.71725, - "f1": 0.73776, - "non_empty": 1002 + "valid_preds": 798, + "invalid_preds": 323, + "missing": 263, + "invalid_fraction": 0.28814, + "precision": 0.71186, + "recall": 0.75212, + "f1": 0.73144, + "non_empty": 1121 }, "mpjpe": { - "count": 761, - "mean": 0.11541, - "median": 0.095862, - "std": 0.066157, - "sem": 0.0024, - "min": 0.041161, - "max": 0.498007, + "count": 798, + "mean": 0.117827, + "median": 0.096904, + "std": 0.070807, + "sem": 0.002508, + "min": 0.038678, + "max": 0.498595, "recall-0.025": 0.0, - "recall-0.05": 0.005655, - "recall-0.1": 0.38737, - "recall-0.15": 0.590952, - "recall-0.25": 0.690858, - "recall-0.5": 0.717248, + "recall-0.05": 0.006598, + "recall-0.1": 0.404336, + "recall-0.15": 0.622055, + "recall-0.25": 0.713478, + "recall-0.5": 0.752121, "num_labels": 1061, "ap-0.025": 0.0, - "ap-0.05": 9.7e-05, - "ap-0.1": 0.249015, - "ap-0.15": 0.488567, - "ap-0.25": 0.61615, - "ap-0.5": 0.650766 + "ap-0.05": 0.000224, + "ap-0.1": 0.255146, + "ap-0.15": 0.509265, + "ap-0.25": 0.636374, + "ap-0.5": 0.678757 }, "head": { - "count": 760, - "mean": 0.065582, - "median": 0.050303, - "std": 0.059248, - "sem": 0.002151, - "min": 0.005632, - "max": 0.481845, - "recall-0.025": 0.085768, - "recall-0.05": 0.354383, - "recall-0.1": 0.62771, - "recall-0.15": 0.668238, - "recall-0.25": 0.69934, - "recall-0.5": 0.716305, + "count": 794, + "mean": 0.068485, + "median": 0.05105, + "std": 0.066023, + "sem": 0.002345, + "min": 0.004799, + "max": 0.492519, + "recall-0.025": 0.089538, + "recall-0.05": 0.360038, + "recall-0.1": 0.647502, + "recall-0.15": 0.693685, + "recall-0.25": 0.722903, + "recall-0.5": 0.748351, "num_labels": 1061 }, "shoulder_left": { - "count": 759, - "mean": 0.063578, - "median": 0.043996, - "std": 0.065252, - "sem": 0.00237, - "min": 0.003158, - "max": 0.488608, - "recall-0.025": 0.154571, - "recall-0.05": 0.415646, - "recall-0.1": 0.606032, - "recall-0.15": 0.664467, - "recall-0.25": 0.696513, - "recall-0.5": 0.715363, + "count": 789, + "mean": 0.065716, + "median": 0.043353, + "std": 0.068506, + "sem": 0.00244, + "min": 0.001079, + "max": 0.489695, + "recall-0.025": 0.158341, + "recall-0.05": 0.409991, + "recall-0.1": 0.622997, + "recall-0.15": 0.688973, + "recall-0.25": 0.717248, + "recall-0.5": 0.743638, "num_labels": 1061 }, "shoulder_right": { - "count": 758, - "mean": 0.060929, - "median": 0.040328, - "std": 0.066816, - "sem": 0.002428, - "min": 0.001826, - "max": 0.496539, - "recall-0.025": 0.151744, - "recall-0.05": 0.444863, - "recall-0.1": 0.618285, - "recall-0.15": 0.670123, - "recall-0.25": 0.6918, - "recall-0.5": 0.71442, + "count": 795, + "mean": 0.063226, + "median": 0.042399, + "std": 0.068681, + "sem": 0.002437, + "min": 0.003966, + "max": 0.474844, + "recall-0.025": 0.141376, + "recall-0.05": 0.448633, + "recall-0.1": 0.647502, + "recall-0.15": 0.694628, + "recall-0.25": 0.723845, + "recall-0.5": 0.749293, "num_labels": 1061 }, "elbow_left": { - "count": 748, - "mean": 0.103193, - "median": 0.075916, - "std": 0.085353, - "sem": 0.003123, - "min": 0.005412, - "max": 0.482031, - "recall-0.025": 0.068803, - "recall-0.05": 0.226202, - "recall-0.1": 0.428841, - "recall-0.15": 0.557964, - "recall-0.25": 0.653157, - "recall-0.5": 0.704995, + "count": 785, + "mean": 0.106529, + "median": 0.074354, + "std": 0.093211, + "sem": 0.003329, + "min": 0.004606, + "max": 0.498545, + "recall-0.025": 0.069746, + "recall-0.05": 0.241282, + "recall-0.1": 0.462771, + "recall-0.15": 0.567389, + "recall-0.25": 0.673893, + "recall-0.5": 0.739868, "num_labels": 1061 }, "elbow_right": { - "count": 750, - "mean": 0.083231, - "median": 0.0574, - "std": 0.077519, - "sem": 0.002832, - "min": 0.003695, - "max": 0.498082, - "recall-0.025": 0.095193, - "recall-0.05": 0.302545, - "recall-0.1": 0.524976, - "recall-0.15": 0.614515, - "recall-0.25": 0.671065, - "recall-0.5": 0.70688, + "count": 786, + "mean": 0.084779, + "median": 0.061571, + "std": 0.076862, + "sem": 0.002743, + "min": 0.006062, + "max": 0.487586, + "recall-0.025": 0.094251, + "recall-0.05": 0.29689, + "recall-0.1": 0.551367, + "recall-0.15": 0.643732, + "recall-0.25": 0.698398, + "recall-0.5": 0.740811, "num_labels": 1061 }, "wrist_left": { - "count": 751, - "mean": 0.126967, - "median": 0.090359, - "std": 0.10256, - "sem": 0.003745, - "min": 0.002476, - "max": 0.497523, - "recall-0.025": 0.06409, - "recall-0.05": 0.177191, - "recall-0.1": 0.377945, - "recall-0.15": 0.469369, - "recall-0.25": 0.614515, - "recall-0.5": 0.707823, + "count": 784, + "mean": 0.123596, + "median": 0.083634, + "std": 0.106397, + "sem": 0.003802, + "min": 0.005352, + "max": 0.495659, + "recall-0.025": 0.081998, + "recall-0.05": 0.220547, + "recall-0.1": 0.408106, + "recall-0.15": 0.515551, + "recall-0.25": 0.640905, + "recall-0.5": 0.738926, "num_labels": 1061 }, "wrist_right": { - "count": 746, - "mean": 0.112793, - "median": 0.077753, - "std": 0.10017, - "sem": 0.00367, - "min": 0.005759, - "max": 0.485122, - "recall-0.025": 0.072573, - "recall-0.05": 0.239397, - "recall-0.1": 0.423186, - "recall-0.15": 0.525919, - "recall-0.25": 0.62017, - "recall-0.5": 0.70311, + "count": 789, + "mean": 0.109537, + "median": 0.074858, + "std": 0.099662, + "sem": 0.00355, + "min": 0.006797, + "max": 0.498114, + "recall-0.025": 0.091423, + "recall-0.05": 0.26673, + "recall-0.1": 0.458058, + "recall-0.15": 0.558907, + "recall-0.25": 0.660697, + "recall-0.5": 0.743638, "num_labels": 1061 }, "hip_left": { - "count": 748, - "mean": 0.189277, - "median": 0.17076, - "std": 0.085049, - "sem": 0.003112, - "min": 0.022675, - "max": 0.499384, + "count": 778, + "mean": 0.191328, + "median": 0.174682, + "std": 0.084633, + "sem": 0.003036, + "min": 0.014967, + "max": 0.488984, "recall-0.025": 0.000943, - "recall-0.05": 0.004713, - "recall-0.1": 0.065975, - "recall-0.15": 0.258247, - "recall-0.25": 0.579642, - "recall-0.5": 0.704995, + "recall-0.05": 0.002828, + "recall-0.1": 0.070688, + "recall-0.15": 0.276155, + "recall-0.25": 0.580584, + "recall-0.5": 0.73327, "num_labels": 1061 }, "hip_right": { - "count": 743, - "mean": 0.180773, - "median": 0.168347, - "std": 0.076963, - "sem": 0.002825, - "min": 0.036386, - "max": 0.491672, + "count": 783, + "mean": 0.188995, + "median": 0.173099, + "std": 0.084094, + "sem": 0.003007, + "min": 0.025132, + "max": 0.498747, "recall-0.025": 0.0, - "recall-0.05": 0.00377, - "recall-0.1": 0.071631, - "recall-0.15": 0.272385, - "recall-0.25": 0.590009, - "recall-0.5": 0.700283, + "recall-0.05": 0.004713, + "recall-0.1": 0.073516, + "recall-0.15": 0.262017, + "recall-0.25": 0.61263, + "recall-0.5": 0.737983, "num_labels": 1061 }, "joint_recalls": { "num_labels": 9549, - "recall-0.025": 0.07645, - "recall-0.05": 0.24065, - "recall-0.1": 0.41565, - "recall-0.15": 0.52215, - "recall-0.25": 0.64572, - "recall-0.5": 0.70761 + "recall-0.025": 0.08032, + "recall-0.05": 0.25008, + "recall-0.1": 0.43753, + "recall-0.15": 0.54414, + "recall-0.25": 0.66981, + "recall-0.5": 0.74112 } } { "total_parts": 10610, - "correct_parts": 5850, - "pcp": 0.551367 + "correct_parts": 6084, + "pcp": 0.573421 } ``` @@ -1022,269 +1022,269 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.10050282943539503, - "avg_time_3d": 0.0007078095180232351, - "avg_fps": 9.880384219886661 + "avg_time_2d": 0.03297888011467166, + "avg_time_3d": 0.0010675581490121237, + "avg_fps": 29.371647990170736 } { "person_nums": { "total_frames": 420, "total_labels": 1466, - "total_preds": 1544, + "total_preds": 1533, "considered_empty": 0, - "valid_preds": 1464, - "invalid_preds": 80, - "missing": 2, - "invalid_fraction": 0.05181, - "precision": 0.94819, - "recall": 0.99864, - "f1": 0.97276, - "non_empty": 1544 + "valid_preds": 1463, + "invalid_preds": 70, + "missing": 3, + "invalid_fraction": 0.04566, + "precision": 0.95434, + "recall": 0.99795, + "f1": 0.97566, + "non_empty": 1533 }, "mpjpe": { - "count": 1464, - "mean": 0.042772, - "median": 0.038146, - "std": 0.020549, - "sem": 0.000537, - "min": 0.01198, - "max": 0.147968, - "recall-0.025": 0.15075, - "recall-0.05": 0.747613, - "recall-0.1": 0.982947, - "recall-0.15": 0.998636, - "recall-0.25": 0.998636, - "recall-0.5": 0.998636, + "count": 1463, + "mean": 0.042352, + "median": 0.037848, + "std": 0.018473, + "sem": 0.000483, + "min": 0.013587, + "max": 0.17143, + "recall-0.025": 0.119372, + "recall-0.05": 0.736698, + "recall-0.1": 0.989768, + "recall-0.15": 0.997271, + "recall-0.25": 0.997954, + "recall-0.5": 0.997954, "num_labels": 1466, - "ap-0.025": 0.063091, - "ap-0.05": 0.672365, - "ap-0.1": 0.962571, - "ap-0.15": 0.984321, - "ap-0.25": 0.984321, - "ap-0.5": 0.984321 + "ap-0.025": 0.037816, + "ap-0.05": 0.637618, + "ap-0.1": 0.976343, + "ap-0.15": 0.987949, + "ap-0.25": 0.988806, + "ap-0.5": 0.988806 }, "nose": { "count": 1462, - "mean": 0.016241, - "median": 0.012408, - "std": 0.019893, - "sem": 0.00052, - "min": 0.001343, - "max": 0.277555, - "recall-0.025": 0.902939, - "recall-0.05": 0.963773, - "recall-0.1": 0.994532, - "recall-0.15": 0.995215, - "recall-0.25": 0.995899, + "mean": 0.015402, + "median": 0.011989, + "std": 0.018581, + "sem": 0.000486, + "min": 0.000669, + "max": 0.354785, + "recall-0.025": 0.90499, + "recall-0.05": 0.977444, + "recall-0.1": 0.995215, + "recall-0.15": 0.995899, + "recall-0.25": 0.996582, "recall-0.5": 0.999316, "num_labels": 1463 }, "shoulder_left": { - "count": 1464, - "mean": 0.021443, - "median": 0.01904, - "std": 0.011261, - "sem": 0.000294, - "min": 0.002666, - "max": 0.09584, - "recall-0.025": 0.701228, - "recall-0.05": 0.978172, - "recall-0.1": 0.998636, - "recall-0.15": 0.998636, - "recall-0.25": 0.998636, - "recall-0.5": 0.998636, + "count": 1463, + "mean": 0.022093, + "median": 0.020177, + "std": 0.011304, + "sem": 0.000296, + "min": 0.002578, + "max": 0.125843, + "recall-0.025": 0.687585, + "recall-0.05": 0.973397, + "recall-0.1": 0.995907, + "recall-0.15": 0.997954, + "recall-0.25": 0.997954, + "recall-0.5": 0.997954, "num_labels": 1466 }, "shoulder_right": { - "count": 1463, - "mean": 0.022816, - "median": 0.019628, - "std": 0.014811, - "sem": 0.000387, - "min": 0.00121, - "max": 0.146291, - "recall-0.025": 0.709215, - "recall-0.05": 0.949488, - "recall-0.1": 0.993857, - "recall-0.15": 0.998635, - "recall-0.25": 0.998635, - "recall-0.5": 0.998635, + "count": 1462, + "mean": 0.022878, + "median": 0.020916, + "std": 0.011567, + "sem": 0.000303, + "min": 0.00177, + "max": 0.140607, + "recall-0.025": 0.678498, + "recall-0.05": 0.972696, + "recall-0.1": 0.99727, + "recall-0.15": 0.997952, + "recall-0.25": 0.997952, + "recall-0.5": 0.997952, "num_labels": 1465 }, "elbow_left": { - "count": 1463, - "mean": 0.026209, - "median": 0.017669, - "std": 0.028978, - "sem": 0.000758, - "min": 0.001782, - "max": 0.326031, - "recall-0.025": 0.709215, - "recall-0.05": 0.883959, - "recall-0.1": 0.97884, - "recall-0.15": 0.986348, - "recall-0.25": 0.996587, - "recall-0.5": 0.998635, + "count": 1462, + "mean": 0.026327, + "median": 0.019138, + "std": 0.023445, + "sem": 0.000613, + "min": 0.001473, + "max": 0.204516, + "recall-0.025": 0.657338, + "recall-0.05": 0.88942, + "recall-0.1": 0.980887, + "recall-0.15": 0.991809, + "recall-0.25": 0.997952, + "recall-0.5": 0.997952, "num_labels": 1465 }, "elbow_right": { "count": 1462, - "mean": 0.023224, - "median": 0.018238, - "std": 0.01727, - "sem": 0.000452, - "min": 0.00127, - "max": 0.177372, - "recall-0.025": 0.689679, - "recall-0.05": 0.929597, - "recall-0.1": 0.994532, - "recall-0.15": 0.998633, + "mean": 0.023441, + "median": 0.01854, + "std": 0.017002, + "sem": 0.000445, + "min": 0.001862, + "max": 0.192752, + "recall-0.025": 0.704033, + "recall-0.05": 0.917977, + "recall-0.1": 0.995899, + "recall-0.15": 0.997949, "recall-0.25": 0.999316, "recall-0.5": 0.999316, "num_labels": 1463 }, "wrist_left": { - "count": 1429, - "mean": 0.034919, - "median": 0.016782, - "std": 0.050228, - "sem": 0.001329, - "min": 0.000584, - "max": 0.397932, - "recall-0.025": 0.675035, - "recall-0.05": 0.824965, - "recall-0.1": 0.909344, - "recall-0.15": 0.958856, - "recall-0.25": 0.98675, - "recall-0.5": 0.996513, + "count": 1432, + "mean": 0.035253, + "median": 0.017377, + "std": 0.050312, + "sem": 0.00133, + "min": 0.000839, + "max": 0.397588, + "recall-0.025": 0.668759, + "recall-0.05": 0.841004, + "recall-0.1": 0.914923, + "recall-0.15": 0.950488, + "recall-0.25": 0.988145, + "recall-0.5": 0.998605, "num_labels": 1434 }, "wrist_right": { "count": 1456, - "mean": 0.023978, - "median": 0.016342, - "std": 0.022697, - "sem": 0.000595, - "min": 0.001965, - "max": 0.19539, - "recall-0.025": 0.703297, - "recall-0.05": 0.89217, - "recall-0.1": 0.980769, - "recall-0.15": 0.996566, - "recall-0.25": 1.0, + "mean": 0.025795, + "median": 0.016112, + "std": 0.032044, + "sem": 0.00084, + "min": 0.001692, + "max": 0.302436, + "recall-0.025": 0.70261, + "recall-0.05": 0.895604, + "recall-0.1": 0.973214, + "recall-0.15": 0.985577, + "recall-0.25": 0.995192, "recall-0.5": 1.0, "num_labels": 1456 }, "hip_left": { - "count": 1463, - "mean": 0.059374, - "median": 0.05336, - "std": 0.030044, - "sem": 0.000786, - "min": 0.005242, - "max": 0.179926, - "recall-0.025": 0.068259, - "recall-0.05": 0.423891, - "recall-0.1": 0.893515, - "recall-0.15": 0.969283, - "recall-0.25": 0.998635, - "recall-0.5": 0.998635, + "count": 1462, + "mean": 0.060254, + "median": 0.054373, + "std": 0.028581, + "sem": 0.000748, + "min": 0.004693, + "max": 0.17666, + "recall-0.025": 0.053242, + "recall-0.05": 0.3843, + "recall-0.1": 0.895563, + "recall-0.15": 0.982935, + "recall-0.25": 0.997952, + "recall-0.5": 0.997952, "num_labels": 1465 }, "hip_right": { - "count": 1464, - "mean": 0.058577, - "median": 0.054454, - "std": 0.029025, - "sem": 0.000759, - "min": 0.003587, - "max": 0.299462, - "recall-0.025": 0.05116, - "recall-0.05": 0.421555, - "recall-0.1": 0.912688, - "recall-0.15": 0.986357, - "recall-0.25": 0.997954, - "recall-0.5": 0.998636, + "count": 1463, + "mean": 0.058329, + "median": 0.055457, + "std": 0.027832, + "sem": 0.000728, + "min": 0.005918, + "max": 0.45971, + "recall-0.025": 0.050477, + "recall-0.05": 0.414734, + "recall-0.1": 0.929741, + "recall-0.15": 0.991132, + "recall-0.25": 0.997271, + "recall-0.5": 0.997954, "num_labels": 1466 }, "knee_left": { - "count": 1463, - "mean": 0.051904, - "median": 0.042771, - "std": 0.042625, - "sem": 0.001115, - "min": 0.002056, - "max": 0.34988, - "recall-0.025": 0.186348, - "recall-0.05": 0.633447, - "recall-0.1": 0.925597, + "count": 1462, + "mean": 0.051563, + "median": 0.044941, + "std": 0.038014, + "sem": 0.000995, + "min": 0.002692, + "max": 0.452543, + "recall-0.025": 0.159044, + "recall-0.05": 0.604096, + "recall-0.1": 0.934471, "recall-0.15": 0.968601, - "recall-0.25": 0.983618, - "recall-0.5": 0.998635, + "recall-0.25": 0.993174, + "recall-0.5": 0.997952, "num_labels": 1465 }, "knee_right": { - "count": 1457, - "mean": 0.048364, - "median": 0.042642, - "std": 0.029664, - "sem": 0.000777, - "min": 0.003548, - "max": 0.324346, - "recall-0.025": 0.213845, - "recall-0.05": 0.589445, - "recall-0.1": 0.938314, - "recall-0.15": 0.992461, - "recall-0.25": 0.996573, - "recall-0.5": 0.998629, + "count": 1456, + "mean": 0.049185, + "median": 0.041163, + "std": 0.037287, + "sem": 0.000978, + "min": 0.001378, + "max": 0.466556, + "recall-0.025": 0.204935, + "recall-0.05": 0.601097, + "recall-0.1": 0.947224, + "recall-0.15": 0.985607, + "recall-0.25": 0.991775, + "recall-0.5": 0.997944, "num_labels": 1459 }, "ankle_left": { - "count": 1454, - "mean": 0.082966, - "median": 0.037696, - "std": 0.105229, - "sem": 0.002761, - "min": 0.000696, - "max": 0.49576, - "recall-0.025": 0.362269, - "recall-0.05": 0.585783, - "recall-0.1": 0.730007, - "recall-0.15": 0.822283, - "recall-0.25": 0.912509, - "recall-0.5": 0.993848, + "count": 1456, + "mean": 0.080698, + "median": 0.03627, + "std": 0.100866, + "sem": 0.002644, + "min": 0.002069, + "max": 0.497911, + "recall-0.025": 0.355434, + "recall-0.05": 0.593301, + "recall-0.1": 0.752563, + "recall-0.15": 0.826384, + "recall-0.25": 0.909774, + "recall-0.5": 0.995215, "num_labels": 1463 }, "ankle_right": { - "count": 1450, - "mean": 0.078982, - "median": 0.03334, - "std": 0.105487, - "sem": 0.002771, - "min": 0.001095, - "max": 0.498037, - "recall-0.025": 0.367808, - "recall-0.05": 0.682192, + "count": 1452, + "mean": 0.07488, + "median": 0.032819, + "std": 0.097791, + "sem": 0.002567, + "min": 0.000904, + "max": 0.498364, + "recall-0.025": 0.371918, + "recall-0.05": 0.664384, "recall-0.1": 0.771233, - "recall-0.15": 0.811644, - "recall-0.25": 0.895205, - "recall-0.5": 0.993151, + "recall-0.15": 0.837671, + "recall-0.25": 0.913699, + "recall-0.5": 0.994521, "num_labels": 1460 }, "joint_recalls": { "num_labels": 18990, - "recall-0.025": 0.48694, - "recall-0.05": 0.75013, - "recall-0.1": 0.92459, - "recall-0.15": 0.96003, - "recall-0.25": 0.98136, - "recall-0.5": 0.99763 + "recall-0.025": 0.47615, + "recall-0.05": 0.74797, + "recall-0.1": 0.92917, + "recall-0.15": 0.96198, + "recall-0.25": 0.98225, + "recall-0.5": 0.99747 } } { "total_parts": 20444, - "correct_parts": 19972, - "pcp": 0.976913 + "correct_parts": 19987, + "pcp": 0.977646 } ``` @@ -1292,269 +1292,269 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.06035172067037443, - "avg_time_3d": 0.00048815215506204746, - "avg_fps": 16.43658925568811 + "avg_time_2d": 0.019059100383665505, + "avg_time_3d": 0.0005858217797628263, + "avg_fps": 50.90373948447781 } { "person_nums": { "total_frames": 420, "total_labels": 1466, - "total_preds": 1432, + "total_preds": 1429, "considered_empty": 0, - "valid_preds": 1416, - "invalid_preds": 16, - "missing": 50, - "invalid_fraction": 0.01117, - "precision": 0.98883, - "recall": 0.96589, - "f1": 0.97723, - "non_empty": 1432 + "valid_preds": 1417, + "invalid_preds": 12, + "missing": 49, + "invalid_fraction": 0.0084, + "precision": 0.9916, + "recall": 0.96658, + "f1": 0.97893, + "non_empty": 1429 }, "mpjpe": { - "count": 1416, - "mean": 0.075878, - "median": 0.065403, - "std": 0.043901, - "sem": 0.001167, - "min": 0.021024, - "max": 0.442732, - "recall-0.025": 0.004775, - "recall-0.05": 0.271487, - "recall-0.1": 0.794679, - "recall-0.15": 0.911323, - "recall-0.25": 0.951569, - "recall-0.5": 0.965894, + "count": 1417, + "mean": 0.073091, + "median": 0.062219, + "std": 0.040479, + "sem": 0.001076, + "min": 0.020245, + "max": 0.448508, + "recall-0.025": 0.005457, + "recall-0.05": 0.279673, + "recall-0.1": 0.8206, + "recall-0.15": 0.929059, + "recall-0.25": 0.955662, + "recall-0.5": 0.966576, "num_labels": 1466, - "ap-0.025": 0.001729, - "ap-0.05": 0.180768, - "ap-0.1": 0.732669, - "ap-0.15": 0.890679, - "ap-0.25": 0.938419, - "ap-0.5": 0.9553 + "ap-0.025": 0.0005, + "ap-0.05": 0.166228, + "ap-0.1": 0.769371, + "ap-0.15": 0.915595, + "ap-0.25": 0.948334, + "ap-0.5": 0.959393 }, "nose": { - "count": 1415, - "mean": 0.023812, - "median": 0.018331, - "std": 0.017907, - "sem": 0.000476, - "min": 0.001934, - "max": 0.234974, - "recall-0.025": 0.632262, - "recall-0.05": 0.907724, - "recall-0.1": 0.959672, - "recall-0.15": 0.965824, - "recall-0.25": 0.967191, - "recall-0.5": 0.967191, + "count": 1416, + "mean": 0.023231, + "median": 0.018076, + "std": 0.018379, + "sem": 0.000489, + "min": 0.001971, + "max": 0.187154, + "recall-0.025": 0.659604, + "recall-0.05": 0.911141, + "recall-0.1": 0.958988, + "recall-0.15": 0.96514, + "recall-0.25": 0.967874, + "recall-0.5": 0.967874, "num_labels": 1463 }, "shoulder_left": { - "count": 1416, - "mean": 0.02429, - "median": 0.021182, - "std": 0.018533, - "sem": 0.000493, - "min": 0.002092, - "max": 0.315725, - "recall-0.025": 0.571623, - "recall-0.05": 0.914734, - "recall-0.1": 0.961801, - "recall-0.15": 0.963847, - "recall-0.25": 0.963847, - "recall-0.5": 0.965894, + "count": 1417, + "mean": 0.024898, + "median": 0.022244, + "std": 0.015731, + "sem": 0.000418, + "min": 0.002236, + "max": 0.178628, + "recall-0.025": 0.581173, + "recall-0.05": 0.888813, + "recall-0.1": 0.962483, + "recall-0.15": 0.965894, + "recall-0.25": 0.966576, + "recall-0.5": 0.966576, "num_labels": 1466 }, "shoulder_right": { - "count": 1415, - "mean": 0.03001, - "median": 0.024274, - "std": 0.024761, - "sem": 0.000658, - "min": 0.003865, - "max": 0.302059, - "recall-0.025": 0.502389, - "recall-0.05": 0.868942, - "recall-0.1": 0.950171, - "recall-0.15": 0.959044, - "recall-0.25": 0.96314, - "recall-0.5": 0.96587, + "count": 1416, + "mean": 0.030089, + "median": 0.024773, + "std": 0.020273, + "sem": 0.000539, + "min": 0.002884, + "max": 0.211554, + "recall-0.025": 0.486007, + "recall-0.05": 0.857338, + "recall-0.1": 0.952218, + "recall-0.15": 0.962457, + "recall-0.25": 0.966553, + "recall-0.5": 0.966553, "num_labels": 1465 }, "elbow_left": { - "count": 1415, - "mean": 0.036771, - "median": 0.023677, - "std": 0.036234, - "sem": 0.000964, - "min": 0.001509, - "max": 0.35915, - "recall-0.025": 0.511263, - "recall-0.05": 0.759727, - "recall-0.1": 0.894881, - "recall-0.15": 0.950171, - "recall-0.25": 0.963823, - "recall-0.5": 0.96587, + "count": 1416, + "mean": 0.037455, + "median": 0.024659, + "std": 0.034006, + "sem": 0.000904, + "min": 0.002134, + "max": 0.293637, + "recall-0.025": 0.488055, + "recall-0.05": 0.744027, + "recall-0.1": 0.898294, + "recall-0.15": 0.956997, + "recall-0.25": 0.965188, + "recall-0.5": 0.966553, "num_labels": 1465 }, "elbow_right": { - "count": 1415, - "mean": 0.037368, - "median": 0.022564, - "std": 0.043946, - "sem": 0.001169, - "min": 0.001064, - "max": 0.417028, - "recall-0.025": 0.548189, - "recall-0.05": 0.784689, - "recall-0.1": 0.903623, - "recall-0.15": 0.924812, - "recall-0.25": 0.964457, - "recall-0.5": 0.967191, + "count": 1416, + "mean": 0.034477, + "median": 0.022965, + "std": 0.031753, + "sem": 0.000844, + "min": 0.001941, + "max": 0.286945, + "recall-0.025": 0.534518, + "recall-0.05": 0.781955, + "recall-0.1": 0.922761, + "recall-0.15": 0.956254, + "recall-0.25": 0.966507, + "recall-0.5": 0.967874, "num_labels": 1463 }, "wrist_left": { - "count": 1380, - "mean": 0.057057, - "median": 0.023098, - "std": 0.074873, - "sem": 0.002016, - "min": 0.001825, - "max": 0.467986, - "recall-0.025": 0.505579, - "recall-0.05": 0.638075, - "recall-0.1": 0.799163, - "recall-0.15": 0.866806, - "recall-0.25": 0.933752, - "recall-0.5": 0.962343, + "count": 1386, + "mean": 0.051722, + "median": 0.025793, + "std": 0.058808, + "sem": 0.00158, + "min": 0.001897, + "max": 0.46638, + "recall-0.025": 0.475593, + "recall-0.05": 0.654812, + "recall-0.1": 0.812413, + "recall-0.15": 0.889819, + "recall-0.25": 0.958856, + "recall-0.5": 0.966527, "num_labels": 1434 }, "wrist_right": { - "count": 1407, - "mean": 0.052599, - "median": 0.028103, - "std": 0.067423, - "sem": 0.001798, - "min": 0.000678, - "max": 0.453996, - "recall-0.025": 0.4375, - "recall-0.05": 0.646291, - "recall-0.1": 0.846154, - "recall-0.15": 0.908654, - "recall-0.25": 0.942308, - "recall-0.5": 0.966346, + "count": 1408, + "mean": 0.046451, + "median": 0.028597, + "std": 0.048597, + "sem": 0.001296, + "min": 0.002471, + "max": 0.331108, + "recall-0.025": 0.430632, + "recall-0.05": 0.669643, + "recall-0.1": 0.856456, + "recall-0.15": 0.929945, + "recall-0.25": 0.953984, + "recall-0.5": 0.967033, "num_labels": 1456 }, "hip_left": { - "count": 1414, - "mean": 0.069345, - "median": 0.063987, - "std": 0.033986, - "sem": 0.000904, - "min": 0.004843, - "max": 0.206544, - "recall-0.025": 0.03959, - "recall-0.05": 0.257338, - "recall-0.1": 0.843686, - "recall-0.15": 0.916724, - "recall-0.25": 0.965188, - "recall-0.5": 0.965188, + "count": 1415, + "mean": 0.07154, + "median": 0.066976, + "std": 0.033658, + "sem": 0.000895, + "min": 0.002043, + "max": 0.210973, + "recall-0.025": 0.030717, + "recall-0.05": 0.230717, + "recall-0.1": 0.840273, + "recall-0.15": 0.925597, + "recall-0.25": 0.96587, + "recall-0.5": 0.96587, "num_labels": 1465 }, "hip_right": { - "count": 1415, - "mean": 0.079898, - "median": 0.071309, - "std": 0.038974, - "sem": 0.001036, - "min": 0.010034, - "max": 0.299462, - "recall-0.025": 0.013643, - "recall-0.05": 0.208049, - "recall-0.1": 0.752387, - "recall-0.15": 0.894952, - "recall-0.25": 0.963165, - "recall-0.5": 0.965211, + "count": 1416, + "mean": 0.079315, + "median": 0.074191, + "std": 0.034495, + "sem": 0.000917, + "min": 0.011248, + "max": 0.295907, + "recall-0.025": 0.012278, + "recall-0.05": 0.172578, + "recall-0.1": 0.776262, + "recall-0.15": 0.914052, + "recall-0.25": 0.965211, + "recall-0.5": 0.965894, "num_labels": 1466 }, "knee_left": { - "count": 1398, - "mean": 0.083457, - "median": 0.055943, - "std": 0.079924, - "sem": 0.002138, - "min": 0.006682, - "max": 0.499765, - "recall-0.025": 0.094881, - "recall-0.05": 0.392491, - "recall-0.1": 0.741297, - "recall-0.15": 0.802048, - "recall-0.25": 0.91058, - "recall-0.5": 0.954266, + "count": 1410, + "mean": 0.085701, + "median": 0.057942, + "std": 0.077613, + "sem": 0.002068, + "min": 0.004836, + "max": 0.483791, + "recall-0.025": 0.086689, + "recall-0.05": 0.361092, + "recall-0.1": 0.736519, + "recall-0.15": 0.797952, + "recall-0.25": 0.921502, + "recall-0.5": 0.962457, "num_labels": 1465 }, "knee_right": { - "count": 1396, - "mean": 0.082016, - "median": 0.055961, - "std": 0.07626, - "sem": 0.002042, - "min": 0.005483, - "max": 0.495027, - "recall-0.025": 0.07745, - "recall-0.05": 0.381768, - "recall-0.1": 0.749143, - "recall-0.15": 0.829335, - "recall-0.25": 0.91501, - "recall-0.5": 0.95682, + "count": 1404, + "mean": 0.08149, + "median": 0.057414, + "std": 0.074943, + "sem": 0.002001, + "min": 0.004282, + "max": 0.472221, + "recall-0.025": 0.091844, + "recall-0.05": 0.370802, + "recall-0.1": 0.758739, + "recall-0.15": 0.823852, + "recall-0.25": 0.917066, + "recall-0.5": 0.962303, "num_labels": 1459 }, "ankle_left": { - "count": 1324, - "mean": 0.162633, - "median": 0.132173, - "std": 0.140662, - "sem": 0.003867, - "min": 0.000753, - "max": 0.499578, - "recall-0.025": 0.197539, - "recall-0.05": 0.293233, - "recall-0.1": 0.374573, - "recall-0.15": 0.501025, - "recall-0.25": 0.649351, - "recall-0.5": 0.90499, + "count": 1325, + "mean": 0.159001, + "median": 0.117656, + "std": 0.137725, + "sem": 0.003785, + "min": 0.001738, + "max": 0.499244, + "recall-0.025": 0.186603, + "recall-0.05": 0.295284, + "recall-0.1": 0.390294, + "recall-0.15": 0.516063, + "recall-0.25": 0.671907, + "recall-0.5": 0.905673, "num_labels": 1463 }, "ankle_right": { - "count": 1317, - "mean": 0.151178, - "median": 0.10686, - "std": 0.132903, - "sem": 0.003664, - "min": 0.001721, - "max": 0.496921, - "recall-0.025": 0.158904, - "recall-0.05": 0.295205, - "recall-0.1": 0.426712, - "recall-0.15": 0.539041, - "recall-0.25": 0.679452, - "recall-0.5": 0.902055, + "count": 1318, + "mean": 0.138723, + "median": 0.091835, + "std": 0.130726, + "sem": 0.003602, + "min": 0.001879, + "max": 0.498441, + "recall-0.025": 0.173288, + "recall-0.05": 0.343151, + "recall-0.1": 0.482192, + "recall-0.15": 0.578082, + "recall-0.25": 0.70137, + "recall-0.5": 0.90274, "num_labels": 1460 }, "joint_recalls": { "num_labels": 18990, - "recall-0.025": 0.32944, - "recall-0.05": 0.56482, - "recall-0.1": 0.78468, - "recall-0.15": 0.8475, - "recall-0.25": 0.906, - "recall-0.5": 0.95424 + "recall-0.025": 0.32549, + "recall-0.05": 0.55972, + "recall-0.1": 0.79579, + "recall-0.15": 0.85982, + "recall-0.25": 0.91411, + "recall-0.5": 0.95613 } } { "total_parts": 20444, - "correct_parts": 18352, - "pcp": 0.897672 + "correct_parts": 18561, + "pcp": 0.907895 } ``` @@ -1562,57 +1562,57 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.18815787420040223, - "avg_time_3d": 0.001592859989259301, - "avg_fps": 5.270071835403125 + "avg_time_2d": 0.05933877607671226, + "avg_time_3d": 0.002668114987815299, + "avg_fps": 16.127239776613678 } { "person_nums": { "total_frames": 420, "total_labels": 1466, - "total_preds": 1493, + "total_preds": 1495, "considered_empty": 0, "valid_preds": 1465, - "invalid_preds": 28, + "invalid_preds": 30, "missing": 1, - "invalid_fraction": 0.01875, - "precision": 0.98125, + "invalid_fraction": 0.02007, + "precision": 0.97993, "recall": 0.99932, - "f1": 0.9902, - "non_empty": 1493 + "f1": 0.98953, + "non_empty": 1495 }, "mpjpe": { "count": 1465, - "mean": 0.035896, - "median": 0.03129, - "std": 0.017035, - "sem": 0.000445, - "min": 0.012795, - "max": 0.13346, - "recall-0.025": 0.238745, - "recall-0.05": 0.858799, - "recall-0.1": 0.991132, + "mean": 0.03645, + "median": 0.032668, + "std": 0.014574, + "sem": 0.000381, + "min": 0.014928, + "max": 0.116577, + "recall-0.025": 0.191678, + "recall-0.05": 0.844475, + "recall-0.1": 0.998636, "recall-0.15": 0.999318, "recall-0.25": 0.999318, "recall-0.5": 0.999318, "num_labels": 1466, - "ap-0.025": 0.138828, - "ap-0.05": 0.833731, - "ap-0.1": 0.980837, - "ap-0.15": 0.990118, - "ap-0.25": 0.990118, - "ap-0.5": 0.990118 + "ap-0.025": 0.106858, + "ap-0.05": 0.816491, + "ap-0.1": 0.991669, + "ap-0.15": 0.992363, + "ap-0.25": 0.992363, + "ap-0.5": 0.992363 }, "nose": { "count": 1462, - "mean": 0.011192, - "median": 0.010004, - "std": 0.006886, - "sem": 0.00018, - "min": 0.001687, - "max": 0.130269, - "recall-0.025": 0.976077, - "recall-0.05": 0.995215, + "mean": 0.011726, + "median": 0.010385, + "std": 0.007192, + "sem": 0.000188, + "min": 0.001875, + "max": 0.112909, + "recall-0.025": 0.974026, + "recall-0.05": 0.992481, "recall-0.1": 0.998633, "recall-0.15": 0.999316, "recall-0.25": 0.999316, @@ -1621,14 +1621,14 @@ Results of the model in various experiments on different datasets. }, "shoulder_left": { "count": 1465, - "mean": 0.018481, - "median": 0.016756, - "std": 0.008734, - "sem": 0.000228, - "min": 0.001637, - "max": 0.082437, - "recall-0.025": 0.817872, - "recall-0.05": 0.991814, + "mean": 0.019132, + "median": 0.017724, + "std": 0.008239, + "sem": 0.000215, + "min": 0.002015, + "max": 0.058517, + "recall-0.025": 0.806276, + "recall-0.05": 0.993861, "recall-0.1": 0.999318, "recall-0.15": 0.999318, "recall-0.25": 0.999318, @@ -1637,14 +1637,14 @@ Results of the model in various experiments on different datasets. }, "shoulder_right": { "count": 1464, - "mean": 0.019939, - "median": 0.018826, - "std": 0.008763, - "sem": 0.000229, - "min": 0.002124, - "max": 0.085937, - "recall-0.025": 0.822526, - "recall-0.05": 0.989761, + "mean": 0.020556, + "median": 0.019632, + "std": 0.008084, + "sem": 0.000211, + "min": 0.001145, + "max": 0.068167, + "recall-0.025": 0.788396, + "recall-0.05": 0.991809, "recall-0.1": 0.999317, "recall-0.15": 0.999317, "recall-0.25": 0.999317, @@ -1653,15 +1653,15 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 1464, - "mean": 0.019246, - "median": 0.017385, - "std": 0.010174, - "sem": 0.000266, - "min": 0.002091, - "max": 0.094785, - "recall-0.025": 0.779522, - "recall-0.05": 0.985666, - "recall-0.1": 0.999317, + "mean": 0.021035, + "median": 0.018377, + "std": 0.011932, + "sem": 0.000312, + "min": 0.001799, + "max": 0.101956, + "recall-0.025": 0.730375, + "recall-0.05": 0.967918, + "recall-0.1": 0.997952, "recall-0.15": 0.999317, "recall-0.25": 0.999317, "recall-0.5": 0.999317, @@ -1669,63 +1669,63 @@ Results of the model in various experiments on different datasets. }, "elbow_right": { "count": 1462, - "mean": 0.017685, - "median": 0.015792, - "std": 0.010015, - "sem": 0.000262, - "min": 0.001923, - "max": 0.135256, - "recall-0.025": 0.863295, - "recall-0.05": 0.980861, - "recall-0.1": 0.998633, - "recall-0.15": 0.999316, + "mean": 0.019353, + "median": 0.017394, + "std": 0.012002, + "sem": 0.000314, + "min": 0.001696, + "max": 0.163336, + "recall-0.025": 0.824334, + "recall-0.05": 0.97471, + "recall-0.1": 0.997266, + "recall-0.15": 0.997949, "recall-0.25": 0.999316, "recall-0.5": 0.999316, "num_labels": 1463 }, "wrist_left": { "count": 1433, - "mean": 0.018201, - "median": 0.012466, - "std": 0.020397, - "sem": 0.000539, - "min": 0.000969, - "max": 0.200232, - "recall-0.025": 0.830544, - "recall-0.05": 0.954672, + "mean": 0.018366, + "median": 0.013219, + "std": 0.019445, + "sem": 0.000514, + "min": 0.00096, + "max": 0.198571, + "recall-0.025": 0.829149, + "recall-0.05": 0.958159, "recall-0.1": 0.984658, - "recall-0.15": 0.995119, + "recall-0.15": 0.993724, "recall-0.25": 0.999303, "recall-0.5": 0.999303, "num_labels": 1434 }, "wrist_right": { "count": 1456, - "mean": 0.017871, - "median": 0.012179, - "std": 0.019072, - "sem": 0.0005, - "min": 0.00073, - "max": 0.212529, - "recall-0.025": 0.819368, - "recall-0.05": 0.95467, - "recall-0.1": 0.987637, - "recall-0.15": 0.99794, + "mean": 0.018426, + "median": 0.012462, + "std": 0.020209, + "sem": 0.00053, + "min": 0.001522, + "max": 0.219788, + "recall-0.025": 0.824863, + "recall-0.05": 0.942308, + "recall-0.1": 0.985577, + "recall-0.15": 0.995879, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 1456 }, "hip_left": { "count": 1464, - "mean": 0.048017, - "median": 0.046762, - "std": 0.017825, - "sem": 0.000466, - "min": 0.006375, - "max": 0.150217, - "recall-0.025": 0.065529, - "recall-0.05": 0.613652, - "recall-0.1": 0.976792, + "mean": 0.050518, + "median": 0.049164, + "std": 0.018844, + "sem": 0.000493, + "min": 0.007522, + "max": 0.158307, + "recall-0.025": 0.055973, + "recall-0.05": 0.527645, + "recall-0.1": 0.976109, "recall-0.15": 0.998635, "recall-0.25": 0.999317, "recall-0.5": 0.999317, @@ -1733,98 +1733,98 @@ Results of the model in various experiments on different datasets. }, "hip_right": { "count": 1465, - "mean": 0.048174, - "median": 0.047527, - "std": 0.015467, - "sem": 0.000404, - "min": 0.007141, - "max": 0.112096, - "recall-0.025": 0.050477, - "recall-0.05": 0.555252, - "recall-0.1": 0.994543, - "recall-0.15": 0.999318, + "mean": 0.050893, + "median": 0.04962, + "std": 0.016422, + "sem": 0.000429, + "min": 0.00903, + "max": 0.186674, + "recall-0.025": 0.042974, + "recall-0.05": 0.506821, + "recall-0.1": 0.993179, + "recall-0.15": 0.997954, "recall-0.25": 0.999318, "recall-0.5": 0.999318, "num_labels": 1466 }, "knee_left": { "count": 1464, - "mean": 0.037422, - "median": 0.032992, - "std": 0.02345, - "sem": 0.000613, - "min": 0.003004, - "max": 0.221549, - "recall-0.025": 0.312628, - "recall-0.05": 0.787031, - "recall-0.1": 0.977474, - "recall-0.15": 0.993857, + "mean": 0.038603, + "median": 0.034967, + "std": 0.021055, + "sem": 0.00055, + "min": 0.003825, + "max": 0.157831, + "recall-0.025": 0.261433, + "recall-0.05": 0.762457, + "recall-0.1": 0.98157, + "recall-0.15": 0.997952, "recall-0.25": 0.999317, "recall-0.5": 0.999317, "num_labels": 1465 }, "knee_right": { "count": 1458, - "mean": 0.039185, - "median": 0.034295, - "std": 0.024486, - "sem": 0.000641, - "min": 0.001345, - "max": 0.207304, - "recall-0.025": 0.27416, - "recall-0.05": 0.766964, - "recall-0.1": 0.968472, - "recall-0.15": 0.991775, + "mean": 0.039233, + "median": 0.034251, + "std": 0.024067, + "sem": 0.000631, + "min": 0.003005, + "max": 0.235912, + "recall-0.025": 0.272104, + "recall-0.05": 0.764907, + "recall-0.1": 0.975326, + "recall-0.15": 0.995202, "recall-0.25": 0.999315, "recall-0.5": 0.999315, "num_labels": 1459 }, "ankle_left": { - "count": 1461, - "mean": 0.087203, - "median": 0.044541, - "std": 0.101451, - "sem": 0.002655, - "min": 0.001014, - "max": 0.485626, - "recall-0.025": 0.300068, - "recall-0.05": 0.539303, - "recall-0.1": 0.717703, - "recall-0.15": 0.826384, - "recall-0.25": 0.909091, - "recall-0.5": 0.998633, + "count": 1462, + "mean": 0.083044, + "median": 0.045752, + "std": 0.088433, + "sem": 0.002314, + "min": 0.003234, + "max": 0.476361, + "recall-0.025": 0.308271, + "recall-0.05": 0.520848, + "recall-0.1": 0.710185, + "recall-0.15": 0.835954, + "recall-0.25": 0.924129, + "recall-0.5": 0.999316, "num_labels": 1463 }, "ankle_right": { - "count": 1449, - "mean": 0.080071, - "median": 0.040897, - "std": 0.095903, - "sem": 0.00252, - "min": 0.001231, - "max": 0.478944, - "recall-0.025": 0.314384, - "recall-0.05": 0.570548, - "recall-0.1": 0.756164, - "recall-0.15": 0.845205, - "recall-0.25": 0.907534, - "recall-0.5": 0.992466, + "count": 1457, + "mean": 0.081561, + "median": 0.043811, + "std": 0.091636, + "sem": 0.002402, + "min": 0.001943, + "max": 0.49569, + "recall-0.025": 0.293836, + "recall-0.05": 0.549315, + "recall-0.1": 0.75137, + "recall-0.15": 0.826712, + "recall-0.25": 0.910959, + "recall-0.5": 0.997945, "num_labels": 1460 }, "joint_recalls": { "num_labels": 18990, - "recall-0.025": 0.55508, - "recall-0.05": 0.82138, - "recall-0.1": 0.95029, - "recall-0.15": 0.9723, - "recall-0.25": 0.98499, - "recall-0.5": 0.99847 + "recall-0.025": 0.5386, + "recall-0.05": 0.80348, + "recall-0.1": 0.94982, + "recall-0.15": 0.97167, + "recall-0.25": 0.98647, + "recall-0.5": 0.99884 } } { "total_parts": 20444, - "correct_parts": 20225, - "pcp": 0.989288 + "correct_parts": 20259, + "pcp": 0.990951 } ``` @@ -1833,269 +1833,269 @@ Results of the model in various experiments on different datasets. (duration 00:01:45) ```json { - "avg_time_2d": 0.04642030628505292, - "avg_time_3d": 0.00031231766316428114, - "avg_fps": 21.39832766737141 + "avg_time_2d": 0.013423044290115585, + "avg_time_3d": 0.0004526440268640579, + "avg_fps": 72.06849686702047 } { "person_nums": { "total_frames": 479, "total_labels": 479, - "total_preds": 713, - "considered_empty": 0, - "valid_preds": 421, - "invalid_preds": 292, - "missing": 58, - "invalid_fraction": 0.40954, - "precision": 0.59046, - "recall": 0.87891, - "f1": 0.70638, - "non_empty": 713 + "total_preds": 585, + "considered_empty": 1, + "valid_preds": 434, + "invalid_preds": 150, + "missing": 45, + "invalid_fraction": 0.25685, + "precision": 0.74315, + "recall": 0.90605, + "f1": 0.81656, + "non_empty": 584 }, "mpjpe": { - "count": 421, - "mean": 0.09014, - "median": 0.07045, - "std": 0.07175, - "sem": 0.003501, - "min": 0.018751, - "max": 0.440594, - "recall-0.025": 0.035491, - "recall-0.05": 0.317328, - "recall-0.1": 0.628392, - "recall-0.15": 0.73904, - "recall-0.25": 0.839248, - "recall-0.5": 0.878914, + "count": 434, + "mean": 0.074807, + "median": 0.061202, + "std": 0.044993, + "sem": 0.002162, + "min": 0.020954, + "max": 0.318899, + "recall-0.025": 0.022965, + "recall-0.05": 0.329854, + "recall-0.1": 0.7119, + "recall-0.15": 0.835073, + "recall-0.25": 0.899791, + "recall-0.5": 0.906054, "num_labels": 479, - "ap-0.025": 0.001831, - "ap-0.05": 0.124046, - "ap-0.1": 0.42434, - "ap-0.15": 0.57369, - "ap-0.25": 0.700444, - "ap-0.5": 0.739409 + "ap-0.025": 0.001009, + "ap-0.05": 0.11566, + "ap-0.1": 0.503757, + "ap-0.15": 0.682624, + "ap-0.25": 0.791403, + "ap-0.5": 0.803673 }, "head": { - "count": 395, - "mean": 0.0376, - "median": 0.027562, - "std": 0.041483, - "sem": 0.00209, - "min": 0.003359, - "max": 0.46272, - "recall-0.025": 0.358407, - "recall-0.05": 0.716814, - "recall-0.1": 0.836283, - "recall-0.15": 0.860619, - "recall-0.25": 0.867257, - "recall-0.5": 0.873894, + "count": 412, + "mean": 0.034681, + "median": 0.03062, + "std": 0.018246, + "sem": 0.0009, + "min": 0.002045, + "max": 0.119132, + "recall-0.025": 0.287611, + "recall-0.05": 0.75885, + "recall-0.1": 0.900442, + "recall-0.15": 0.911504, + "recall-0.25": 0.911504, + "recall-0.5": 0.911504, "num_labels": 452 }, "shoulder_left": { - "count": 399, - "mean": 0.046979, - "median": 0.036816, - "std": 0.042295, - "sem": 0.00212, - "min": 0.012772, - "max": 0.419987, - "recall-0.025": 0.088106, - "recall-0.05": 0.656388, - "recall-0.1": 0.839207, - "recall-0.15": 0.854626, - "recall-0.25": 0.867841, - "recall-0.5": 0.878855, + "count": 412, + "mean": 0.042275, + "median": 0.037554, + "std": 0.019264, + "sem": 0.00095, + "min": 0.014265, + "max": 0.186331, + "recall-0.025": 0.081498, + "recall-0.05": 0.700441, + "recall-0.1": 0.89207, + "recall-0.15": 0.905286, + "recall-0.25": 0.907489, + "recall-0.5": 0.907489, "num_labels": 454 }, "shoulder_right": { - "count": 411, - "mean": 0.035064, - "median": 0.027722, - "std": 0.032686, - "sem": 0.001614, - "min": 0.004672, - "max": 0.435386, - "recall-0.025": 0.321199, - "recall-0.05": 0.783726, - "recall-0.1": 0.85439, - "recall-0.15": 0.869379, - "recall-0.25": 0.873662, - "recall-0.5": 0.880086, + "count": 426, + "mean": 0.030066, + "median": 0.02809, + "std": 0.010452, + "sem": 0.000507, + "min": 0.010825, + "max": 0.072284, + "recall-0.025": 0.299786, + "recall-0.05": 0.862955, + "recall-0.1": 0.912206, + "recall-0.15": 0.912206, + "recall-0.25": 0.912206, + "recall-0.5": 0.912206, "num_labels": 467 }, "elbow_left": { - "count": 386, - "mean": 0.039094, - "median": 0.02174, - "std": 0.052536, - "sem": 0.002677, - "min": 0.003292, - "max": 0.397852, + "count": 401, + "mean": 0.031452, + "median": 0.0227, + "std": 0.030449, + "sem": 0.001522, + "min": 0.001723, + "max": 0.355998, "recall-0.025": 0.505721, - "recall-0.05": 0.723112, - "recall-0.1": 0.810069, - "recall-0.15": 0.837529, - "recall-0.25": 0.867277, - "recall-0.5": 0.883295, + "recall-0.05": 0.79405, + "recall-0.1": 0.887872, + "recall-0.15": 0.906178, + "recall-0.25": 0.915332, + "recall-0.5": 0.91762, "num_labels": 437 }, "elbow_right": { - "count": 387, - "mean": 0.037503, - "median": 0.021018, - "std": 0.058759, - "sem": 0.002991, - "min": 0.002712, - "max": 0.484533, - "recall-0.025": 0.519274, - "recall-0.05": 0.743764, - "recall-0.1": 0.823129, - "recall-0.15": 0.84127, - "recall-0.25": 0.863946, - "recall-0.5": 0.877551, + "count": 405, + "mean": 0.025408, + "median": 0.022343, + "std": 0.015524, + "sem": 0.000772, + "min": 0.001272, + "max": 0.110877, + "recall-0.025": 0.521542, + "recall-0.05": 0.854875, + "recall-0.1": 0.913832, + "recall-0.15": 0.918367, + "recall-0.25": 0.918367, + "recall-0.5": 0.918367, "num_labels": 441 }, "wrist_left": { - "count": 381, - "mean": 0.038795, - "median": 0.022729, - "std": 0.058788, - "sem": 0.003016, - "min": 0.002346, - "max": 0.465752, - "recall-0.025": 0.488532, - "recall-0.05": 0.729358, - "recall-0.1": 0.81422, - "recall-0.15": 0.844037, - "recall-0.25": 0.855505, - "recall-0.5": 0.873853, + "count": 397, + "mean": 0.032045, + "median": 0.025504, + "std": 0.028908, + "sem": 0.001453, + "min": 0.001469, + "max": 0.236119, + "recall-0.025": 0.438073, + "recall-0.05": 0.793578, + "recall-0.1": 0.892202, + "recall-0.15": 0.894495, + "recall-0.25": 0.91055, + "recall-0.5": 0.91055, "num_labels": 436 }, "wrist_right": { - "count": 391, - "mean": 0.035983, - "median": 0.019868, - "std": 0.054692, - "sem": 0.002769, - "min": 0.001743, - "max": 0.43689, - "recall-0.025": 0.540359, - "recall-0.05": 0.76009, - "recall-0.1": 0.818386, - "recall-0.15": 0.836323, - "recall-0.25": 0.863229, - "recall-0.5": 0.876682, + "count": 409, + "mean": 0.028878, + "median": 0.022987, + "std": 0.029473, + "sem": 0.001459, + "min": 0.002706, + "max": 0.317473, + "recall-0.025": 0.495516, + "recall-0.05": 0.856502, + "recall-0.1": 0.896861, + "recall-0.15": 0.899103, + "recall-0.25": 0.912556, + "recall-0.5": 0.91704, "num_labels": 446 }, "hip_left": { - "count": 271, - "mean": 0.109969, - "median": 0.095268, - "std": 0.059502, - "sem": 0.003621, - "min": 0.020961, - "max": 0.487803, - "recall-0.025": 0.003185, - "recall-0.05": 0.025478, - "recall-0.1": 0.487261, - "recall-0.15": 0.761146, - "recall-0.25": 0.834395, - "recall-0.5": 0.863057, + "count": 291, + "mean": 0.105429, + "median": 0.098572, + "std": 0.036751, + "sem": 0.002158, + "min": 0.02895, + "max": 0.301591, + "recall-0.025": 0.0, + "recall-0.05": 0.019108, + "recall-0.1": 0.484076, + "recall-0.15": 0.843949, + "recall-0.25": 0.917197, + "recall-0.5": 0.926752, "num_labels": 314 }, "hip_right": { - "count": 216, - "mean": 0.103974, - "median": 0.088237, - "std": 0.065246, - "sem": 0.00445, - "min": 0.032087, - "max": 0.49864, + "count": 238, + "mean": 0.089163, + "median": 0.082764, + "std": 0.03617, + "sem": 0.002349, + "min": 0.027785, + "max": 0.342392, "recall-0.025": 0.0, - "recall-0.05": 0.034615, - "recall-0.1": 0.553846, - "recall-0.15": 0.753846, - "recall-0.25": 0.8, - "recall-0.5": 0.830769, + "recall-0.05": 0.046154, + "recall-0.1": 0.665385, + "recall-0.15": 0.880769, + "recall-0.25": 0.907692, + "recall-0.5": 0.915385, "num_labels": 260 }, "knee_left": { - "count": 230, - "mean": 0.14298, - "median": 0.107524, - "std": 0.106874, - "sem": 0.007062, - "min": 0.010361, - "max": 0.481016, - "recall-0.025": 0.025362, - "recall-0.05": 0.105072, - "recall-0.1": 0.369565, - "recall-0.15": 0.57971, - "recall-0.25": 0.699275, - "recall-0.5": 0.833333, + "count": 246, + "mean": 0.134326, + "median": 0.096455, + "std": 0.107329, + "sem": 0.006857, + "min": 0.012137, + "max": 0.4698, + "recall-0.025": 0.014493, + "recall-0.05": 0.163043, + "recall-0.1": 0.452899, + "recall-0.15": 0.637681, + "recall-0.25": 0.757246, + "recall-0.5": 0.891304, "num_labels": 276 }, "knee_right": { - "count": 173, - "mean": 0.134986, - "median": 0.098413, - "std": 0.108928, - "sem": 0.008306, - "min": 0.015211, - "max": 0.484078, - "recall-0.025": 0.018605, - "recall-0.05": 0.148837, - "recall-0.1": 0.418605, - "recall-0.15": 0.576744, - "recall-0.25": 0.683721, - "recall-0.5": 0.804651, + "count": 182, + "mean": 0.113341, + "median": 0.072459, + "std": 0.102217, + "sem": 0.007598, + "min": 0.013657, + "max": 0.47956, + "recall-0.025": 0.027907, + "recall-0.05": 0.24186, + "recall-0.1": 0.534884, + "recall-0.15": 0.683721, + "recall-0.25": 0.748837, + "recall-0.5": 0.846512, "num_labels": 215 }, "ankle_left": { - "count": 64, - "mean": 0.341273, - "median": 0.366004, - "std": 0.10975, - "sem": 0.013827, - "min": 0.064431, - "max": 0.483117, - "recall-0.025": 0.0, - "recall-0.05": 0.0, - "recall-0.1": 0.022727, + "count": 77, + "mean": 0.354595, + "median": 0.383447, + "std": 0.105654, + "sem": 0.012119, + "min": 0.023655, + "max": 0.49579, + "recall-0.025": 0.007576, + "recall-0.05": 0.007576, + "recall-0.1": 0.015152, "recall-0.15": 0.037879, - "recall-0.25": 0.098485, - "recall-0.5": 0.484848, + "recall-0.25": 0.106061, + "recall-0.5": 0.583333, "num_labels": 132 }, "ankle_right": { - "count": 57, - "mean": 0.374257, - "median": 0.395853, - "std": 0.092552, - "sem": 0.012368, - "min": 0.118346, - "max": 0.491711, + "count": 70, + "mean": 0.381482, + "median": 0.400315, + "std": 0.091015, + "sem": 0.010957, + "min": 0.066901, + "max": 0.498563, "recall-0.025": 0.0, "recall-0.05": 0.0, - "recall-0.1": 0.0, + "recall-0.1": 0.015748, "recall-0.15": 0.015748, - "recall-0.25": 0.055118, - "recall-0.5": 0.448819, + "recall-0.25": 0.03937, + "recall-0.5": 0.551181, "num_labels": 127 }, "joint_recalls": { "num_labels": 4457, - "recall-0.025": 0.28337, - "recall-0.05": 0.52973, - "recall-0.1": 0.69082, - "recall-0.15": 0.75836, - "recall-0.25": 0.79403, - "recall-0.5": 0.84205 + "recall-0.025": 0.26475, + "recall-0.05": 0.58896, + "recall-0.1": 0.75881, + "recall-0.15": 0.82006, + "recall-0.25": 0.84429, + "recall-0.5": 0.88804 } } { "total_parts": 4313, - "correct_parts": 3411, - "pcp": 0.790865 + "correct_parts": 3712, + "pcp": 0.860654 } ``` @@ -2103,269 +2103,269 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.060682081548791185, - "avg_time_3d": 0.00043821836772717926, - "avg_fps": 16.36117626002912 + "avg_time_2d": 0.014957478171900698, + "avg_time_3d": 0.0004402857077749152, + "avg_fps": 64.94449504580058 } { "person_nums": { "total_frames": 390, "total_labels": 780, - "total_preds": 777, + "total_preds": 779, "considered_empty": 0, - "valid_preds": 777, + "valid_preds": 779, "invalid_preds": 0, - "missing": 3, + "missing": 1, "invalid_fraction": 0.0, "precision": 1.0, - "recall": 0.99615, - "f1": 0.99807, - "non_empty": 777 + "recall": 0.99872, + "f1": 0.99936, + "non_empty": 779 }, "mpjpe": { - "count": 777, - "mean": 0.068242, - "median": 0.065186, - "std": 0.013462, - "sem": 0.000483, - "min": 0.051948, - "max": 0.217357, + "count": 779, + "mean": 0.067871, + "median": 0.065561, + "std": 0.010735, + "sem": 0.000385, + "min": 0.049899, + "max": 0.184121, "recall-0.025": 0.0, - "recall-0.05": 0.0, - "recall-0.1": 0.976923, - "recall-0.15": 0.987179, - "recall-0.25": 0.996154, - "recall-0.5": 0.996154, + "recall-0.05": 0.001282, + "recall-0.1": 0.985897, + "recall-0.15": 0.994872, + "recall-0.25": 0.998718, + "recall-0.5": 0.998718, "num_labels": 780, "ap-0.025": 0.0, - "ap-0.05": 0.0, - "ap-0.1": 0.971991, - "ap-0.15": 0.987001, - "ap-0.25": 0.996154, - "ap-0.5": 0.996154 + "ap-0.05": 2e-06, + "ap-0.1": 0.981711, + "ap-0.15": 0.994842, + "ap-0.25": 0.998718, + "ap-0.5": 0.998718 }, "head": { - "count": 777, - "mean": 0.037924, - "median": 0.025795, - "std": 0.041221, - "sem": 0.00148, - "min": 0.003652, - "max": 0.338066, - "recall-0.025": 0.469231, - "recall-0.05": 0.835897, - "recall-0.1": 0.939744, - "recall-0.15": 0.961538, - "recall-0.25": 0.991026, - "recall-0.5": 0.996154, + "count": 779, + "mean": 0.037653, + "median": 0.025952, + "std": 0.042255, + "sem": 0.001515, + "min": 0.002742, + "max": 0.384391, + "recall-0.025": 0.476923, + "recall-0.05": 0.821795, + "recall-0.1": 0.95641, + "recall-0.15": 0.967949, + "recall-0.25": 0.988462, + "recall-0.5": 0.998718, "num_labels": 780 }, "shoulder_left": { - "count": 777, - "mean": 0.068426, - "median": 0.068295, - "std": 0.02725, - "sem": 0.000978, - "min": 0.016575, - "max": 0.357954, + "count": 779, + "mean": 0.067036, + "median": 0.068602, + "std": 0.019672, + "sem": 0.000705, + "min": 0.021109, + "max": 0.167744, "recall-0.025": 0.005128, - "recall-0.05": 0.24359, - "recall-0.1": 0.937179, - "recall-0.15": 0.985897, - "recall-0.25": 0.99359, - "recall-0.5": 0.996154, + "recall-0.05": 0.221795, + "recall-0.1": 0.957692, + "recall-0.15": 0.994872, + "recall-0.25": 0.998718, + "recall-0.5": 0.998718, "num_labels": 780 }, "shoulder_right": { - "count": 775, - "mean": 0.077543, - "median": 0.074897, - "std": 0.031788, - "sem": 0.001143, - "min": 0.010535, - "max": 0.485894, - "recall-0.025": 0.007692, - "recall-0.05": 0.075641, - "recall-0.1": 0.898718, - "recall-0.15": 0.982051, - "recall-0.25": 0.987179, - "recall-0.5": 0.99359, + "count": 779, + "mean": 0.075031, + "median": 0.073829, + "std": 0.020872, + "sem": 0.000748, + "min": 0.020224, + "max": 0.274582, + "recall-0.025": 0.003846, + "recall-0.05": 0.087179, + "recall-0.1": 0.910256, + "recall-0.15": 0.99359, + "recall-0.25": 0.997436, + "recall-0.5": 0.998718, "num_labels": 780 }, "elbow_left": { - "count": 776, - "mean": 0.060755, - "median": 0.05218, - "std": 0.033518, - "sem": 0.001204, - "min": 0.016563, - "max": 0.417809, - "recall-0.025": 0.002564, - "recall-0.05": 0.432051, - "recall-0.1": 0.932051, - "recall-0.15": 0.966667, - "recall-0.25": 0.989744, - "recall-0.5": 0.994872, + "count": 778, + "mean": 0.058524, + "median": 0.051953, + "std": 0.025781, + "sem": 0.000925, + "min": 0.019215, + "max": 0.363847, + "recall-0.025": 0.007692, + "recall-0.05": 0.419231, + "recall-0.1": 0.94359, + "recall-0.15": 0.979487, + "recall-0.25": 0.996154, + "recall-0.5": 0.997436, "num_labels": 780 }, "elbow_right": { - "count": 777, - "mean": 0.066781, - "median": 0.065762, - "std": 0.020878, - "sem": 0.000749, - "min": 0.027359, - "max": 0.224832, - "recall-0.025": 0.0, - "recall-0.05": 0.194872, - "recall-0.1": 0.941026, - "recall-0.15": 0.983333, - "recall-0.25": 0.996154, - "recall-0.5": 0.996154, + "count": 779, + "mean": 0.067468, + "median": 0.066149, + "std": 0.022096, + "sem": 0.000792, + "min": 0.024053, + "max": 0.227375, + "recall-0.025": 0.001282, + "recall-0.05": 0.167949, + "recall-0.1": 0.953846, + "recall-0.15": 0.979487, + "recall-0.25": 0.998718, + "recall-0.5": 0.998718, "num_labels": 780 }, "wrist_left": { - "count": 774, - "mean": 0.049563, - "median": 0.042911, - "std": 0.042084, - "sem": 0.001514, - "min": 0.006736, - "max": 0.497826, - "recall-0.025": 0.171795, - "recall-0.05": 0.679487, - "recall-0.1": 0.939744, - "recall-0.15": 0.967949, - "recall-0.25": 0.982051, - "recall-0.5": 0.992308, + "count": 777, + "mean": 0.048868, + "median": 0.042971, + "std": 0.036964, + "sem": 0.001327, + "min": 0.00944, + "max": 0.429845, + "recall-0.025": 0.158974, + "recall-0.05": 0.660256, + "recall-0.1": 0.952564, + "recall-0.15": 0.975641, + "recall-0.25": 0.985897, + "recall-0.5": 0.996154, "num_labels": 780 }, "wrist_right": { - "count": 776, - "mean": 0.064678, - "median": 0.062017, - "std": 0.041104, - "sem": 0.001477, - "min": 0.008905, - "max": 0.430827, - "recall-0.025": 0.042308, - "recall-0.05": 0.369231, - "recall-0.1": 0.928205, - "recall-0.15": 0.952564, - "recall-0.25": 0.985897, - "recall-0.5": 0.994872, + "count": 778, + "mean": 0.065639, + "median": 0.062986, + "std": 0.039452, + "sem": 0.001415, + "min": 0.002032, + "max": 0.426451, + "recall-0.025": 0.034615, + "recall-0.05": 0.360256, + "recall-0.1": 0.917949, + "recall-0.15": 0.965385, + "recall-0.25": 0.987179, + "recall-0.5": 0.997436, "num_labels": 780 }, "hip_left": { - "count": 777, - "mean": 0.109321, - "median": 0.109336, - "std": 0.017307, - "sem": 0.000621, - "min": 0.065587, - "max": 0.157263, + "count": 779, + "mean": 0.110424, + "median": 0.110035, + "std": 0.01942, + "sem": 0.000696, + "min": 0.063461, + "max": 0.362567, "recall-0.025": 0.0, "recall-0.05": 0.0, - "recall-0.1": 0.325641, - "recall-0.15": 0.987179, - "recall-0.25": 0.996154, - "recall-0.5": 0.996154, + "recall-0.1": 0.292308, + "recall-0.15": 0.984615, + "recall-0.25": 0.997436, + "recall-0.5": 0.998718, "num_labels": 780 }, "hip_right": { - "count": 777, - "mean": 0.134702, - "median": 0.134688, - "std": 0.014986, - "sem": 0.000538, - "min": 0.086478, - "max": 0.195378, + "count": 779, + "mean": 0.136579, + "median": 0.136493, + "std": 0.015262, + "sem": 0.000547, + "min": 0.092681, + "max": 0.196212, "recall-0.025": 0.0, "recall-0.05": 0.0, - "recall-0.1": 0.010256, - "recall-0.15": 0.858974, - "recall-0.25": 0.996154, - "recall-0.5": 0.996154, + "recall-0.1": 0.008974, + "recall-0.15": 0.821795, + "recall-0.25": 0.998718, + "recall-0.5": 0.998718, "num_labels": 780 }, "knee_left": { - "count": 777, - "mean": 0.050968, - "median": 0.049197, - "std": 0.018325, - "sem": 0.000658, - "min": 0.007513, - "max": 0.198729, - "recall-0.025": 0.026923, - "recall-0.05": 0.535897, - "recall-0.1": 0.957692, - "recall-0.15": 0.99359, - "recall-0.25": 0.996154, - "recall-0.5": 0.996154, + "count": 779, + "mean": 0.05077, + "median": 0.048872, + "std": 0.018041, + "sem": 0.000647, + "min": 0.008028, + "max": 0.196653, + "recall-0.025": 0.017949, + "recall-0.05": 0.553846, + "recall-0.1": 0.967949, + "recall-0.15": 0.996154, + "recall-0.25": 0.998718, + "recall-0.5": 0.998718, "num_labels": 780 }, "knee_right": { - "count": 777, - "mean": 0.056941, - "median": 0.054107, - "std": 0.016649, - "sem": 0.000598, - "min": 0.018899, - "max": 0.125673, + "count": 779, + "mean": 0.05791, + "median": 0.055397, + "std": 0.017026, + "sem": 0.00061, + "min": 0.015979, + "max": 0.132415, "recall-0.025": 0.002564, - "recall-0.05": 0.396154, - "recall-0.1": 0.984615, - "recall-0.15": 0.996154, - "recall-0.25": 0.996154, - "recall-0.5": 0.996154, + "recall-0.05": 0.385897, + "recall-0.1": 0.982051, + "recall-0.15": 0.998718, + "recall-0.25": 0.998718, + "recall-0.5": 0.998718, "num_labels": 780 }, "ankle_left": { - "count": 777, - "mean": 0.061821, - "median": 0.059825, - "std": 0.017211, - "sem": 0.000618, - "min": 0.031091, - "max": 0.319784, + "count": 779, + "mean": 0.061342, + "median": 0.059413, + "std": 0.017221, + "sem": 0.000617, + "min": 0.02767, + "max": 0.310603, "recall-0.025": 0.0, - "recall-0.05": 0.144872, - "recall-0.1": 0.973077, - "recall-0.15": 0.991026, - "recall-0.25": 0.994872, - "recall-0.5": 0.996154, + "recall-0.05": 0.157692, + "recall-0.1": 0.979487, + "recall-0.15": 0.99359, + "recall-0.25": 0.997436, + "recall-0.5": 0.998718, "num_labels": 780 }, "ankle_right": { - "count": 777, - "mean": 0.04225, - "median": 0.03841, - "std": 0.014422, - "sem": 0.000518, - "min": 0.015158, - "max": 0.170293, - "recall-0.025": 0.033333, - "recall-0.05": 0.8, + "count": 779, + "mean": 0.041889, + "median": 0.038262, + "std": 0.015597, + "sem": 0.000559, + "min": 0.01118, + "max": 0.189348, + "recall-0.025": 0.038462, + "recall-0.05": 0.802564, "recall-0.1": 0.992308, - "recall-0.15": 0.994872, - "recall-0.25": 0.996154, - "recall-0.5": 0.996154, + "recall-0.15": 0.996154, + "recall-0.25": 0.998718, + "recall-0.5": 0.998718, "num_labels": 780 }, "joint_recalls": { "num_labels": 10140, - "recall-0.025": 0.05799, - "recall-0.05": 0.36164, - "recall-0.1": 0.82702, - "recall-0.15": 0.97022, - "recall-0.25": 0.99211, - "recall-0.5": 0.99546 + "recall-0.025": 0.05671, + "recall-0.05": 0.35611, + "recall-0.1": 0.83116, + "recall-0.15": 0.97249, + "recall-0.25": 0.99536, + "recall-0.5": 0.99832 } } { "total_parts": 10920, - "correct_parts": 10328, - "pcp": 0.945788 + "correct_parts": 10410, + "pcp": 0.953297 } ``` @@ -2374,74 +2374,74 @@ Results of the model in various experiments on different datasets. (duration 00:01:51) ```json { - "avg_time_2d": 0.11152931094169617, - "avg_time_3d": 0.0007880079746246338, - "avg_fps": 8.903346426431572 + "avg_time_2d": 0.0297801673412323, + "avg_time_3d": 0.001048978567123413, + "avg_fps": 32.4368376267267 } { "person_nums": { "total_frames": 210, "total_labels": 630, - "total_preds": 630, + "total_preds": 635, "considered_empty": 0, "valid_preds": 630, - "invalid_preds": 0, + "invalid_preds": 5, "missing": 0, - "invalid_fraction": 0.0, - "precision": 1.0, + "invalid_fraction": 0.00787, + "precision": 0.99213, "recall": 1.0, - "f1": 1.0, - "non_empty": 630 + "f1": 0.99605, + "non_empty": 635 }, "mpjpe": { "count": 630, - "mean": 0.0559, - "median": 0.051047, - "std": 0.018383, - "sem": 0.000733, - "min": 0.029149, - "max": 0.144155, + "mean": 0.055877, + "median": 0.051176, + "std": 0.018299, + "sem": 0.00073, + "min": 0.031247, + "max": 0.141346, "recall-0.025": 0.0, - "recall-0.05": 0.446032, - "recall-0.1": 0.947619, + "recall-0.05": 0.452381, + "recall-0.1": 0.949206, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 630, "ap-0.025": 0.0, - "ap-0.05": 0.23272, - "ap-0.1": 0.928677, + "ap-0.05": 0.242557, + "ap-0.1": 0.926768, "ap-0.15": 1.0, "ap-0.25": 1.0, "ap-0.5": 1.0 }, "head": { "count": 598, - "mean": 0.040407, - "median": 0.038843, - "std": 0.013204, - "sem": 0.00054, - "min": 0.011342, - "max": 0.090798, - "recall-0.025": 0.113712, - "recall-0.05": 0.777592, - "recall-0.1": 1.0, - "recall-0.15": 1.0, + "mean": 0.04182, + "median": 0.040665, + "std": 0.013596, + "sem": 0.000556, + "min": 0.014832, + "max": 0.177972, + "recall-0.025": 0.075251, + "recall-0.05": 0.784281, + "recall-0.1": 0.998328, + "recall-0.15": 0.998328, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 598 }, "shoulder_left": { "count": 630, - "mean": 0.062874, - "median": 0.060357, - "std": 0.019325, - "sem": 0.000771, - "min": 0.018974, - "max": 0.139185, - "recall-0.025": 0.003175, - "recall-0.05": 0.284127, - "recall-0.1": 0.971429, + "mean": 0.063347, + "median": 0.060158, + "std": 0.019514, + "sem": 0.000778, + "min": 0.016405, + "max": 0.139417, + "recall-0.025": 0.006349, + "recall-0.05": 0.265079, + "recall-0.1": 0.968254, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -2449,15 +2449,15 @@ Results of the model in various experiments on different datasets. }, "shoulder_right": { "count": 630, - "mean": 0.066032, - "median": 0.065873, - "std": 0.019679, - "sem": 0.000785, - "min": 0.026785, - "max": 0.143453, - "recall-0.025": 0.0, - "recall-0.05": 0.269841, - "recall-0.1": 0.938095, + "mean": 0.066393, + "median": 0.06475, + "std": 0.019847, + "sem": 0.000791, + "min": 0.019761, + "max": 0.14019, + "recall-0.025": 0.003175, + "recall-0.05": 0.253968, + "recall-0.1": 0.94127, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -2465,63 +2465,63 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 630, - "mean": 0.05202, - "median": 0.048773, - "std": 0.020244, - "sem": 0.000807, - "min": 0.011727, - "max": 0.140534, - "recall-0.025": 0.052381, - "recall-0.05": 0.528571, - "recall-0.1": 0.974603, - "recall-0.15": 1.0, + "mean": 0.053278, + "median": 0.049935, + "std": 0.021931, + "sem": 0.000874, + "min": 0.006737, + "max": 0.191912, + "recall-0.025": 0.057143, + "recall-0.05": 0.501587, + "recall-0.1": 0.968254, + "recall-0.15": 0.996825, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 630 }, "elbow_right": { "count": 629, - "mean": 0.055264, - "median": 0.048723, - "std": 0.031566, - "sem": 0.00126, - "min": 0.00774, - "max": 0.230646, - "recall-0.025": 0.100159, - "recall-0.05": 0.527822, - "recall-0.1": 0.922099, - "recall-0.15": 0.972973, + "mean": 0.055845, + "median": 0.050976, + "std": 0.027262, + "sem": 0.001088, + "min": 0.001851, + "max": 0.173129, + "recall-0.025": 0.079491, + "recall-0.05": 0.470588, + "recall-0.1": 0.917329, + "recall-0.15": 0.987281, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 629 }, "wrist_left": { "count": 630, - "mean": 0.047936, - "median": 0.041232, - "std": 0.026637, - "sem": 0.001062, - "min": 0.003013, - "max": 0.213528, - "recall-0.025": 0.133333, - "recall-0.05": 0.650794, - "recall-0.1": 0.953968, - "recall-0.15": 0.988889, - "recall-0.25": 1.0, + "mean": 0.048568, + "median": 0.042759, + "std": 0.031439, + "sem": 0.001254, + "min": 0.000561, + "max": 0.453954, + "recall-0.025": 0.157143, + "recall-0.05": 0.652381, + "recall-0.1": 0.947619, + "recall-0.15": 0.990476, + "recall-0.25": 0.996825, "recall-0.5": 1.0, "num_labels": 630 }, "wrist_right": { "count": 625, - "mean": 0.052226, - "median": 0.046644, - "std": 0.025647, - "sem": 0.001027, - "min": 0.007235, - "max": 0.226558, - "recall-0.025": 0.0864, - "recall-0.05": 0.552, - "recall-0.1": 0.9472, + "mean": 0.054121, + "median": 0.049075, + "std": 0.025533, + "sem": 0.001022, + "min": 0.005199, + "max": 0.16283, + "recall-0.025": 0.0688, + "recall-0.05": 0.5216, + "recall-0.1": 0.9408, "recall-0.15": 0.9952, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -2529,31 +2529,31 @@ Results of the model in various experiments on different datasets. }, "hip_left": { "count": 630, - "mean": 0.056856, - "median": 0.053491, - "std": 0.020361, - "sem": 0.000812, - "min": 0.014996, - "max": 0.170692, - "recall-0.025": 0.019048, - "recall-0.05": 0.404762, - "recall-0.1": 0.960317, - "recall-0.15": 0.995238, + "mean": 0.058094, + "median": 0.056051, + "std": 0.019776, + "sem": 0.000789, + "min": 0.008159, + "max": 0.177207, + "recall-0.025": 0.015873, + "recall-0.05": 0.369841, + "recall-0.1": 0.971429, + "recall-0.15": 0.993651, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 630 }, "hip_right": { "count": 629, - "mean": 0.055372, - "median": 0.050585, - "std": 0.023477, - "sem": 0.000937, - "min": 0.004628, - "max": 0.146936, - "recall-0.025": 0.060413, - "recall-0.05": 0.492846, - "recall-0.1": 0.957075, + "mean": 0.052648, + "median": 0.049613, + "std": 0.021304, + "sem": 0.00085, + "min": 0.008816, + "max": 0.143029, + "recall-0.025": 0.081081, + "recall-0.05": 0.503975, + "recall-0.1": 0.976153, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -2561,82 +2561,82 @@ Results of the model in various experiments on different datasets. }, "knee_left": { "count": 628, - "mean": 0.045803, - "median": 0.034827, - "std": 0.046081, - "sem": 0.00184, - "min": 0.002265, - "max": 0.364051, - "recall-0.025": 0.288217, - "recall-0.05": 0.734076, - "recall-0.1": 0.941083, - "recall-0.15": 0.963376, + "mean": 0.045731, + "median": 0.034191, + "std": 0.04676, + "sem": 0.001867, + "min": 0.004196, + "max": 0.36669, + "recall-0.025": 0.305732, + "recall-0.05": 0.745223, + "recall-0.1": 0.944268, + "recall-0.15": 0.960191, "recall-0.25": 0.984076, "recall-0.5": 1.0, "num_labels": 628 }, "knee_right": { "count": 629, - "mean": 0.053706, - "median": 0.036714, - "std": 0.07183, - "sem": 0.002866, - "min": 0.002678, - "max": 0.494376, - "recall-0.025": 0.27027, - "recall-0.05": 0.710652, - "recall-0.1": 0.933227, - "recall-0.15": 0.952305, - "recall-0.25": 0.958665, + "mean": 0.053798, + "median": 0.036361, + "std": 0.072963, + "sem": 0.002912, + "min": 0.001079, + "max": 0.493215, + "recall-0.025": 0.246423, + "recall-0.05": 0.712242, + "recall-0.1": 0.945946, + "recall-0.15": 0.950715, + "recall-0.25": 0.955485, "recall-0.5": 1.0, "num_labels": 629 }, "ankle_left": { "count": 619, - "mean": 0.065155, - "median": 0.050506, - "std": 0.071537, - "sem": 0.002878, - "min": 0.014226, - "max": 0.482565, - "recall-0.025": 0.035541, - "recall-0.05": 0.494346, - "recall-0.1": 0.945073, - "recall-0.15": 0.959612, - "recall-0.25": 0.962843, + "mean": 0.062433, + "median": 0.048333, + "std": 0.071526, + "sem": 0.002877, + "min": 0.01437, + "max": 0.495154, + "recall-0.025": 0.059774, + "recall-0.05": 0.537964, + "recall-0.1": 0.954766, + "recall-0.15": 0.961228, + "recall-0.25": 0.964459, "recall-0.5": 1.0, "num_labels": 619 }, "ankle_right": { "count": 601, - "mean": 0.054338, - "median": 0.047418, - "std": 0.043058, - "sem": 0.001758, - "min": 0.009373, - "max": 0.4761, - "recall-0.025": 0.098071, - "recall-0.05": 0.525723, - "recall-0.1": 0.930868, - "recall-0.15": 0.946945, + "mean": 0.051519, + "median": 0.046201, + "std": 0.041673, + "sem": 0.001701, + "min": 0.005756, + "max": 0.44468, + "recall-0.025": 0.135048, + "recall-0.05": 0.553055, + "recall-0.1": 0.935691, + "recall-0.15": 0.951768, "recall-0.25": 0.954984, "recall-0.5": 0.966238, "num_labels": 622 }, "joint_recalls": { "num_labels": 8129, - "recall-0.025": 0.09632, - "recall-0.05": 0.5334, - "recall-0.1": 0.95104, - "recall-0.15": 0.98216, - "recall-0.25": 0.98905, + "recall-0.025": 0.09866, + "recall-0.05": 0.52663, + "recall-0.1": 0.95411, + "recall-0.15": 0.98278, + "recall-0.25": 0.98868, "recall-0.5": 0.99742 } } { "total_parts": 8725, - "correct_parts": 8587, - "pcp": 0.984183 + "correct_parts": 8590, + "pcp": 0.984527 } ``` @@ -2646,9 +2646,9 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.05200650064568771, - "avg_time_3d": 0.000292415367929559, - "avg_fps": 19.120855195920814 + "avg_time_2d": 0.010956774259868421, + "avg_time_3d": 0.00037081367091128703, + "avg_fps": 88.28004744794484 } { "person_nums": { @@ -2667,52 +2667,52 @@ Results of the model in various experiments on different datasets. }, "mpjpe": { "count": 200, - "mean": 0.02515, - "median": 0.023689, - "std": 0.010002, - "sem": 0.000709, - "min": 0.01255, - "max": 0.107625, - "recall-0.025": 0.56, - "recall-0.05": 0.98, + "mean": 0.026036, + "median": 0.024095, + "std": 0.011081, + "sem": 0.000785, + "min": 0.011995, + "max": 0.122074, + "recall-0.025": 0.535, + "recall-0.05": 0.97, "recall-0.1": 0.995, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 200, - "ap-0.025": 0.45717, - "ap-0.05": 0.979363, - "ap-0.1": 0.994825, + "ap-0.025": 0.442381, + "ap-0.05": 0.969201, + "ap-0.1": 0.995, "ap-0.15": 1.0, "ap-0.25": 1.0, "ap-0.5": 1.0 }, "nose": { "count": 200, - "mean": 0.043369, - "median": 0.03534, - "std": 0.033311, - "sem": 0.002361, - "min": 0.004699, - "max": 0.19073, - "recall-0.025": 0.37, - "recall-0.05": 0.715, - "recall-0.1": 0.935, - "recall-0.15": 0.985, + "mean": 0.038082, + "median": 0.029124, + "std": 0.029016, + "sem": 0.002057, + "min": 0.001677, + "max": 0.148616, + "recall-0.025": 0.415, + "recall-0.05": 0.74, + "recall-0.1": 0.945, + "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 200 }, "shoulder_left": { "count": 200, - "mean": 0.01896, - "median": 0.016589, - "std": 0.010208, - "sem": 0.000724, - "min": 0.003772, - "max": 0.069414, - "recall-0.025": 0.785, - "recall-0.05": 0.975, + "mean": 0.020331, + "median": 0.017805, + "std": 0.011914, + "sem": 0.000845, + "min": 0.002701, + "max": 0.090556, + "recall-0.025": 0.755, + "recall-0.05": 0.97, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -2721,14 +2721,14 @@ Results of the model in various experiments on different datasets. }, "shoulder_right": { "count": 200, - "mean": 0.026151, - "median": 0.023182, - "std": 0.012763, - "sem": 0.000905, - "min": 0.004487, - "max": 0.075418, - "recall-0.025": 0.575, - "recall-0.05": 0.945, + "mean": 0.026801, + "median": 0.022421, + "std": 0.012865, + "sem": 0.000912, + "min": 0.004534, + "max": 0.092643, + "recall-0.025": 0.555, + "recall-0.05": 0.93, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -2737,30 +2737,30 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 200, - "mean": 0.01524, - "median": 0.011716, - "std": 0.0131, - "sem": 0.000929, - "min": 0.001021, - "max": 0.098887, - "recall-0.025": 0.87, - "recall-0.05": 0.98, - "recall-0.1": 1.0, - "recall-0.15": 1.0, + "mean": 0.016996, + "median": 0.012308, + "std": 0.02022, + "sem": 0.001433, + "min": 0.001208, + "max": 0.218149, + "recall-0.025": 0.84, + "recall-0.05": 0.97, + "recall-0.1": 0.99, + "recall-0.15": 0.995, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 200 }, "elbow_right": { "count": 200, - "mean": 0.020908, - "median": 0.014473, - "std": 0.019387, - "sem": 0.001374, - "min": 0.000895, - "max": 0.12409, - "recall-0.025": 0.73, - "recall-0.05": 0.92, + "mean": 0.022677, + "median": 0.014901, + "std": 0.021292, + "sem": 0.001509, + "min": 0.001874, + "max": 0.138266, + "recall-0.025": 0.72, + "recall-0.05": 0.9, "recall-0.1": 0.99, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -2769,30 +2769,30 @@ Results of the model in various experiments on different datasets. }, "wrist_left": { "count": 200, - "mean": 0.024447, - "median": 0.017245, - "std": 0.026566, - "sem": 0.001883, - "min": 0.000603, - "max": 0.261032, - "recall-0.025": 0.72, - "recall-0.05": 0.905, - "recall-0.1": 0.975, - "recall-0.15": 0.995, - "recall-0.25": 0.995, + "mean": 0.025241, + "median": 0.018451, + "std": 0.02671, + "sem": 0.001893, + "min": 0.002053, + "max": 0.242582, + "recall-0.025": 0.67, + "recall-0.05": 0.895, + "recall-0.1": 0.98, + "recall-0.15": 0.99, + "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 200 }, "wrist_right": { "count": 200, - "mean": 0.032044, - "median": 0.023165, - "std": 0.029318, - "sem": 0.002078, - "min": 0.001198, - "max": 0.230805, - "recall-0.025": 0.53, - "recall-0.05": 0.83, + "mean": 0.031542, + "median": 0.024933, + "std": 0.026264, + "sem": 0.001862, + "min": 0.001388, + "max": 0.160837, + "recall-0.025": 0.5, + "recall-0.05": 0.835, "recall-0.1": 0.97, "recall-0.15": 0.995, "recall-0.25": 1.0, @@ -2801,14 +2801,14 @@ Results of the model in various experiments on different datasets. }, "hip_left": { "count": 200, - "mean": 0.036425, - "median": 0.034978, - "std": 0.0142, - "sem": 0.001007, - "min": 0.006217, - "max": 0.098359, - "recall-0.025": 0.2, - "recall-0.05": 0.85, + "mean": 0.038654, + "median": 0.036955, + "std": 0.0149, + "sem": 0.001056, + "min": 0.009121, + "max": 0.093447, + "recall-0.025": 0.185, + "recall-0.05": 0.785, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -2817,14 +2817,14 @@ Results of the model in various experiments on different datasets. }, "hip_right": { "count": 200, - "mean": 0.034687, - "median": 0.033267, - "std": 0.013631, - "sem": 0.000966, - "min": 0.003129, - "max": 0.076945, - "recall-0.025": 0.25, - "recall-0.05": 0.845, + "mean": 0.037207, + "median": 0.035892, + "std": 0.014823, + "sem": 0.001051, + "min": 0.003693, + "max": 0.07955, + "recall-0.025": 0.19, + "recall-0.05": 0.83, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -2833,45 +2833,45 @@ Results of the model in various experiments on different datasets. }, "knee_left": { "count": 200, - "mean": 0.018635, - "median": 0.013488, - "std": 0.017566, - "sem": 0.001245, - "min": 0.002544, - "max": 0.181286, - "recall-0.025": 0.785, - "recall-0.05": 0.97, - "recall-0.1": 0.995, + "mean": 0.021967, + "median": 0.015751, + "std": 0.027428, + "sem": 0.001944, + "min": 0.002751, + "max": 0.334075, + "recall-0.025": 0.75, + "recall-0.05": 0.95, + "recall-0.1": 0.99, "recall-0.15": 0.995, - "recall-0.25": 1.0, + "recall-0.25": 0.995, "recall-0.5": 1.0, "num_labels": 200 }, "knee_right": { - "count": 200, - "mean": 0.018306, - "median": 0.012878, - "std": 0.021929, - "sem": 0.001555, - "min": 0.002418, - "max": 0.24868, - "recall-0.025": 0.82, - "recall-0.05": 0.96, - "recall-0.1": 0.99, - "recall-0.15": 0.995, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 199, + "mean": 0.020051, + "median": 0.014155, + "std": 0.020629, + "sem": 0.001466, + "min": 0.002698, + "max": 0.150804, + "recall-0.025": 0.83, + "recall-0.05": 0.935, + "recall-0.1": 0.975, + "recall-0.15": 0.99, + "recall-0.25": 0.995, + "recall-0.5": 0.995, "num_labels": 200 }, "ankle_left": { "count": 200, - "mean": 0.018595, - "median": 0.013493, - "std": 0.033845, - "sem": 0.002399, - "min": 0.003314, - "max": 0.444766, - "recall-0.025": 0.9, + "mean": 0.017853, + "median": 0.013049, + "std": 0.027218, + "sem": 0.001929, + "min": 0.001188, + "max": 0.34282, + "recall-0.025": 0.88, "recall-0.05": 0.955, "recall-0.1": 0.99, "recall-0.15": 0.995, @@ -2881,13 +2881,13 @@ Results of the model in various experiments on different datasets. }, "ankle_right": { "count": 200, - "mean": 0.019189, - "median": 0.013539, - "std": 0.033179, - "sem": 0.002352, - "min": 0.001806, - "max": 0.445265, - "recall-0.025": 0.83, + "mean": 0.018275, + "median": 0.013472, + "std": 0.026745, + "sem": 0.001896, + "min": 0.001446, + "max": 0.335661, + "recall-0.025": 0.87, "recall-0.05": 0.955, "recall-0.1": 0.99, "recall-0.15": 0.995, @@ -2897,18 +2897,18 @@ Results of the model in various experiments on different datasets. }, "joint_recalls": { "num_labels": 2600, - "recall-0.025": 0.64308, - "recall-0.05": 0.90808, - "recall-0.1": 0.98731, + "recall-0.025": 0.62769, + "recall-0.05": 0.89615, + "recall-0.1": 0.98615, "recall-0.15": 0.99654, - "recall-0.25": 0.99885, - "recall-0.5": 1.0 + "recall-0.25": 0.99846, + "recall-0.5": 0.99962 } } { "total_parts": 2800, - "correct_parts": 2795, - "pcp": 0.998214 + "correct_parts": 2792, + "pcp": 0.997143 } ``` @@ -5188,269 +5188,269 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.1492903921339247, - "avg_time_3d": 0.0009679979748196072, - "avg_fps": 6.655202410170138 + "avg_time_2d": 0.042454131444295246, + "avg_time_3d": 0.0014010773764716255, + "avg_fps": 22.802308480320526 } { "person_nums": { "total_frames": 100, "total_labels": 400, - "total_preds": 355, + "total_preds": 380, "considered_empty": 0, - "valid_preds": 353, - "invalid_preds": 2, - "missing": 47, - "invalid_fraction": 0.00563, - "precision": 0.99437, - "recall": 0.8825, - "f1": 0.9351, - "non_empty": 355 + "valid_preds": 368, + "invalid_preds": 12, + "missing": 32, + "invalid_fraction": 0.03158, + "precision": 0.96842, + "recall": 0.92, + "f1": 0.94359, + "non_empty": 380 }, "mpjpe": { - "count": 353, - "mean": 0.038099, - "median": 0.031843, - "std": 0.022394, - "sem": 0.001194, - "min": 0.016807, - "max": 0.237374, - "recall-0.025": 0.17, + "count": 368, + "mean": 0.039349, + "median": 0.032926, + "std": 0.022058, + "sem": 0.001151, + "min": 0.016105, + "max": 0.262994, + "recall-0.025": 0.15, "recall-0.05": 0.76, - "recall-0.1": 0.87, - "recall-0.15": 0.875, - "recall-0.25": 0.8825, - "recall-0.5": 0.8825, + "recall-0.1": 0.895, + "recall-0.15": 0.915, + "recall-0.25": 0.9175, + "recall-0.5": 0.92, "num_labels": 400, - "ap-0.025": 0.037299, - "ap-0.05": 0.685899, - "ap-0.1": 0.865057, - "ap-0.15": 0.875, - "ap-0.25": 0.8825, - "ap-0.5": 0.8825 + "ap-0.025": 0.028482, + "ap-0.05": 0.674481, + "ap-0.1": 0.888311, + "ap-0.15": 0.913135, + "ap-0.25": 0.917036, + "ap-0.5": 0.919656 }, "head": { - "count": 353, - "mean": 0.042592, - "median": 0.03682, - "std": 0.029711, - "sem": 0.001584, - "min": 0.005213, - "max": 0.388491, - "recall-0.025": 0.22, - "recall-0.05": 0.5975, - "recall-0.1": 0.875, - "recall-0.15": 0.8775, - "recall-0.25": 0.8775, - "recall-0.5": 0.8825, + "count": 368, + "mean": 0.045052, + "median": 0.039098, + "std": 0.027362, + "sem": 0.001428, + "min": 0.002977, + "max": 0.236023, + "recall-0.025": 0.1825, + "recall-0.05": 0.6175, + "recall-0.1": 0.885, + "recall-0.15": 0.91, + "recall-0.25": 0.92, + "recall-0.5": 0.92, "num_labels": 400 }, "shoulder_left": { - "count": 353, - "mean": 0.038479, - "median": 0.03359, - "std": 0.024377, - "sem": 0.001299, - "min": 0.005392, - "max": 0.264784, - "recall-0.025": 0.2475, - "recall-0.05": 0.685, - "recall-0.1": 0.8725, - "recall-0.15": 0.88, - "recall-0.25": 0.88, - "recall-0.5": 0.8825, + "count": 368, + "mean": 0.039685, + "median": 0.034322, + "std": 0.030226, + "sem": 0.001578, + "min": 0.005282, + "max": 0.446219, + "recall-0.025": 0.26, + "recall-0.05": 0.6975, + "recall-0.1": 0.9025, + "recall-0.15": 0.9175, + "recall-0.25": 0.9175, + "recall-0.5": 0.92, "num_labels": 400 }, "shoulder_right": { - "count": 353, - "mean": 0.038462, - "median": 0.036189, - "std": 0.025892, - "sem": 0.00138, - "min": 0.004108, - "max": 0.378453, - "recall-0.025": 0.2375, - "recall-0.05": 0.7175, - "recall-0.1": 0.87, - "recall-0.15": 0.88, - "recall-0.25": 0.88, - "recall-0.5": 0.8825, + "count": 368, + "mean": 0.039579, + "median": 0.036172, + "std": 0.023037, + "sem": 0.001203, + "min": 0.004844, + "max": 0.275894, + "recall-0.025": 0.1975, + "recall-0.05": 0.715, + "recall-0.1": 0.905, + "recall-0.15": 0.9175, + "recall-0.25": 0.9175, + "recall-0.5": 0.92, "num_labels": 400 }, "elbow_left": { - "count": 352, - "mean": 0.027695, - "median": 0.0235, - "std": 0.019531, - "sem": 0.001043, - "min": 0.003298, - "max": 0.171064, - "recall-0.025": 0.4675, - "recall-0.05": 0.8075, - "recall-0.1": 0.87, - "recall-0.15": 0.875, - "recall-0.25": 0.88, - "recall-0.5": 0.88, + "count": 368, + "mean": 0.0303, + "median": 0.0257, + "std": 0.027754, + "sem": 0.001449, + "min": 0.002612, + "max": 0.365908, + "recall-0.025": 0.435, + "recall-0.05": 0.83, + "recall-0.1": 0.905, + "recall-0.15": 0.915, + "recall-0.25": 0.9175, + "recall-0.5": 0.92, "num_labels": 400 }, "elbow_right": { - "count": 353, - "mean": 0.037493, - "median": 0.029876, - "std": 0.04065, - "sem": 0.002167, - "min": 0.004951, - "max": 0.494585, - "recall-0.025": 0.3575, - "recall-0.05": 0.7225, - "recall-0.1": 0.8475, - "recall-0.15": 0.87, - "recall-0.25": 0.8775, - "recall-0.5": 0.8825, + "count": 368, + "mean": 0.037584, + "median": 0.030106, + "std": 0.029529, + "sem": 0.001541, + "min": 0.003654, + "max": 0.324866, + "recall-0.025": 0.32, + "recall-0.05": 0.75, + "recall-0.1": 0.8725, + "recall-0.15": 0.9175, + "recall-0.25": 0.9175, + "recall-0.5": 0.92, "num_labels": 400 }, "wrist_left": { - "count": 353, - "mean": 0.032765, - "median": 0.025804, - "std": 0.031045, - "sem": 0.001655, - "min": 0.001774, - "max": 0.297486, - "recall-0.025": 0.4125, - "recall-0.05": 0.78, - "recall-0.1": 0.845, - "recall-0.15": 0.8725, - "recall-0.25": 0.8775, - "recall-0.5": 0.8825, + "count": 368, + "mean": 0.034703, + "median": 0.025471, + "std": 0.031666, + "sem": 0.001653, + "min": 0.002851, + "max": 0.304971, + "recall-0.025": 0.4425, + "recall-0.05": 0.7725, + "recall-0.1": 0.8825, + "recall-0.15": 0.9125, + "recall-0.25": 0.915, + "recall-0.5": 0.92, "num_labels": 400 }, "wrist_right": { - "count": 352, - "mean": 0.036987, - "median": 0.022997, - "std": 0.05094, - "sem": 0.002719, - "min": 0.000935, - "max": 0.394496, - "recall-0.025": 0.4825, - "recall-0.05": 0.7225, - "recall-0.1": 0.835, - "recall-0.15": 0.8525, - "recall-0.25": 0.8625, - "recall-0.5": 0.88, + "count": 365, + "mean": 0.034027, + "median": 0.024238, + "std": 0.03586, + "sem": 0.00188, + "min": 0.002002, + "max": 0.308895, + "recall-0.025": 0.4675, + "recall-0.05": 0.76, + "recall-0.1": 0.875, + "recall-0.15": 0.8975, + "recall-0.25": 0.905, + "recall-0.5": 0.9125, "num_labels": 400 }, "hip_left": { - "count": 353, - "mean": 0.046009, - "median": 0.037949, - "std": 0.033242, - "sem": 0.001772, - "min": 0.006021, - "max": 0.347255, - "recall-0.025": 0.2275, - "recall-0.05": 0.6225, - "recall-0.1": 0.815, - "recall-0.15": 0.8775, - "recall-0.25": 0.88, - "recall-0.5": 0.8825, + "count": 368, + "mean": 0.046902, + "median": 0.038138, + "std": 0.030151, + "sem": 0.001574, + "min": 0.005184, + "max": 0.155154, + "recall-0.025": 0.1925, + "recall-0.05": 0.6375, + "recall-0.1": 0.845, + "recall-0.15": 0.9175, + "recall-0.25": 0.92, + "recall-0.5": 0.92, "num_labels": 400 }, "hip_right": { - "count": 353, - "mean": 0.043704, - "median": 0.035638, - "std": 0.031666, - "sem": 0.001688, - "min": 0.003099, - "max": 0.3352, - "recall-0.025": 0.225, - "recall-0.05": 0.64, - "recall-0.1": 0.83, - "recall-0.15": 0.8775, - "recall-0.25": 0.88, - "recall-0.5": 0.8825, + "count": 368, + "mean": 0.044591, + "median": 0.036501, + "std": 0.029531, + "sem": 0.001541, + "min": 0.004714, + "max": 0.164952, + "recall-0.025": 0.24, + "recall-0.05": 0.65, + "recall-0.1": 0.8575, + "recall-0.15": 0.9125, + "recall-0.25": 0.92, + "recall-0.5": 0.92, "num_labels": 400 }, "knee_left": { - "count": 353, - "mean": 0.026158, - "median": 0.022194, - "std": 0.016673, - "sem": 0.000889, - "min": 0.001406, - "max": 0.12144, - "recall-0.025": 0.5125, - "recall-0.05": 0.8175, - "recall-0.1": 0.8775, - "recall-0.15": 0.8825, - "recall-0.25": 0.8825, - "recall-0.5": 0.8825, + "count": 368, + "mean": 0.027594, + "median": 0.023103, + "std": 0.020036, + "sem": 0.001046, + "min": 0.001091, + "max": 0.223156, + "recall-0.025": 0.5075, + "recall-0.05": 0.84, + "recall-0.1": 0.9125, + "recall-0.15": 0.915, + "recall-0.25": 0.92, + "recall-0.5": 0.92, "num_labels": 400 }, "knee_right": { - "count": 353, - "mean": 0.031454, - "median": 0.023803, - "std": 0.037654, - "sem": 0.002007, - "min": 0.004796, - "max": 0.467974, - "recall-0.025": 0.4775, - "recall-0.05": 0.795, - "recall-0.1": 0.8675, - "recall-0.15": 0.87, - "recall-0.25": 0.875, - "recall-0.5": 0.8825, + "count": 366, + "mean": 0.031508, + "median": 0.023626, + "std": 0.035056, + "sem": 0.001835, + "min": 0.00167, + "max": 0.414212, + "recall-0.025": 0.48, + "recall-0.05": 0.8125, + "recall-0.1": 0.895, + "recall-0.15": 0.9025, + "recall-0.25": 0.905, + "recall-0.5": 0.915, "num_labels": 400 }, "ankle_left": { - "count": 353, - "mean": 0.03851, - "median": 0.029955, - "std": 0.04292, - "sem": 0.002288, - "min": 0.003123, - "max": 0.459378, - "recall-0.025": 0.3325, - "recall-0.05": 0.73, - "recall-0.1": 0.855, - "recall-0.15": 0.87, - "recall-0.25": 0.8725, - "recall-0.5": 0.8825, + "count": 368, + "mean": 0.036661, + "median": 0.029479, + "std": 0.036337, + "sem": 0.001897, + "min": 0.003431, + "max": 0.481637, + "recall-0.025": 0.36, + "recall-0.05": 0.7575, + "recall-0.1": 0.895, + "recall-0.15": 0.9125, + "recall-0.25": 0.915, + "recall-0.5": 0.92, "num_labels": 400 }, "ankle_right": { - "count": 352, - "mean": 0.048881, - "median": 0.035266, - "std": 0.048, - "sem": 0.002562, - "min": 0.004365, - "max": 0.409384, - "recall-0.025": 0.2425, - "recall-0.05": 0.6025, - "recall-0.1": 0.795, - "recall-0.15": 0.855, - "recall-0.25": 0.8675, - "recall-0.5": 0.88, + "count": 366, + "mean": 0.050792, + "median": 0.034511, + "std": 0.05242, + "sem": 0.002744, + "min": 0.004525, + "max": 0.452313, + "recall-0.025": 0.2575, + "recall-0.05": 0.6275, + "recall-0.1": 0.8125, + "recall-0.15": 0.885, + "recall-0.25": 0.9025, + "recall-0.5": 0.915, "num_labels": 400 }, "joint_recalls": { "num_labels": 5200, - "recall-0.025": 0.34154, - "recall-0.05": 0.71077, - "recall-0.1": 0.85038, - "recall-0.15": 0.87231, - "recall-0.25": 0.87635, - "recall-0.5": 0.88192 + "recall-0.025": 0.33385, + "recall-0.05": 0.72788, + "recall-0.1": 0.88038, + "recall-0.15": 0.91019, + "recall-0.25": 0.91481, + "recall-0.5": 0.91865 } } { "total_parts": 5600, - "correct_parts": 4893, - "pcp": 0.87375 + "correct_parts": 5101, + "pcp": 0.910893 } ``` @@ -5458,9 +5458,9 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.1481909300829913, - "avg_time_3d": 0.0009748033575109533, - "avg_fps": 6.703952556227466 + "avg_time_2d": 0.03811510833534035, + "avg_time_3d": 0.0012803936863804723, + "avg_fps": 25.383608500499555 } { "person_nums": { @@ -5479,21 +5479,21 @@ Results of the model in various experiments on different datasets. }, "mpjpe": { "count": 363, - "mean": 0.029236, - "median": 0.027834, - "std": 0.008449, - "sem": 0.000444, - "min": 0.013702, - "max": 0.065791, - "recall-0.025": 0.341598, + "mean": 0.029682, + "median": 0.028424, + "std": 0.008263, + "sem": 0.000434, + "min": 0.012972, + "max": 0.058471, + "recall-0.025": 0.30303, "recall-0.05": 0.975207, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 363, - "ap-0.025": 0.119962, - "ap-0.05": 0.963366, + "ap-0.025": 0.095589, + "ap-0.05": 0.967838, "ap-0.1": 1.0, "ap-0.15": 1.0, "ap-0.25": 1.0, @@ -5501,14 +5501,14 @@ Results of the model in various experiments on different datasets. }, "head": { "count": 363, - "mean": 0.029497, - "median": 0.024784, - "std": 0.01864, - "sem": 0.00098, - "min": 0.00309, - "max": 0.090403, - "recall-0.025": 0.509642, - "recall-0.05": 0.862259, + "mean": 0.030095, + "median": 0.025736, + "std": 0.016745, + "sem": 0.00088, + "min": 0.003765, + "max": 0.090512, + "recall-0.025": 0.473829, + "recall-0.05": 0.878788, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -5517,15 +5517,15 @@ Results of the model in various experiments on different datasets. }, "shoulder_left": { "count": 363, - "mean": 0.03253, - "median": 0.0262, - "std": 0.021667, - "sem": 0.001139, - "min": 0.003877, - "max": 0.153738, - "recall-0.025": 0.454545, - "recall-0.05": 0.84022, - "recall-0.1": 0.977961, + "mean": 0.030859, + "median": 0.025023, + "std": 0.020779, + "sem": 0.001092, + "min": 0.002824, + "max": 0.154308, + "recall-0.025": 0.498623, + "recall-0.05": 0.865014, + "recall-0.1": 0.983471, "recall-0.15": 0.997245, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5533,14 +5533,14 @@ Results of the model in various experiments on different datasets. }, "shoulder_right": { "count": 363, - "mean": 0.029017, - "median": 0.027969, - "std": 0.014883, - "sem": 0.000782, - "min": 0.002833, - "max": 0.10701, - "recall-0.025": 0.435262, - "recall-0.05": 0.911846, + "mean": 0.029979, + "median": 0.028759, + "std": 0.014556, + "sem": 0.000765, + "min": 0.001729, + "max": 0.10965, + "recall-0.025": 0.37741, + "recall-0.05": 0.906336, "recall-0.1": 0.997245, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -5549,14 +5549,14 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 363, - "mean": 0.023168, - "median": 0.020768, - "std": 0.01597, - "sem": 0.000839, - "min": 0.003257, - "max": 0.230605, - "recall-0.025": 0.647383, - "recall-0.05": 0.969697, + "mean": 0.023742, + "median": 0.020914, + "std": 0.014781, + "sem": 0.000777, + "min": 0.004184, + "max": 0.191137, + "recall-0.025": 0.641873, + "recall-0.05": 0.953168, "recall-0.1": 0.997245, "recall-0.15": 0.997245, "recall-0.25": 1.0, @@ -5565,15 +5565,15 @@ Results of the model in various experiments on different datasets. }, "elbow_right": { "count": 363, - "mean": 0.020239, - "median": 0.018368, - "std": 0.012649, - "sem": 0.000665, - "min": 0.001404, - "max": 0.11648, - "recall-0.025": 0.768595, - "recall-0.05": 0.977961, - "recall-0.1": 0.99449, + "mean": 0.020613, + "median": 0.019357, + "std": 0.011342, + "sem": 0.000596, + "min": 0.001944, + "max": 0.121475, + "recall-0.025": 0.713499, + "recall-0.05": 0.983471, + "recall-0.1": 0.997245, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5581,15 +5581,15 @@ Results of the model in various experiments on different datasets. }, "wrist_left": { "count": 363, - "mean": 0.024941, - "median": 0.019975, - "std": 0.021321, - "sem": 0.001121, - "min": 0.001322, - "max": 0.236003, - "recall-0.025": 0.672176, + "mean": 0.025604, + "median": 0.01995, + "std": 0.021669, + "sem": 0.001139, + "min": 0.002006, + "max": 0.22929, + "recall-0.025": 0.639118, "recall-0.05": 0.933884, - "recall-0.1": 0.991736, + "recall-0.1": 0.988981, "recall-0.15": 0.99449, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5597,15 +5597,15 @@ Results of the model in various experiments on different datasets. }, "wrist_right": { "count": 363, - "mean": 0.020419, - "median": 0.016684, - "std": 0.015538, - "sem": 0.000817, - "min": 0.001359, - "max": 0.166627, + "mean": 0.020501, + "median": 0.018416, + "std": 0.015423, + "sem": 0.000811, + "min": 0.000389, + "max": 0.230442, "recall-0.025": 0.730028, - "recall-0.05": 0.964187, - "recall-0.1": 0.99449, + "recall-0.05": 0.977961, + "recall-0.1": 0.997245, "recall-0.15": 0.997245, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5613,14 +5613,14 @@ Results of the model in various experiments on different datasets. }, "hip_left": { "count": 363, - "mean": 0.042888, - "median": 0.040724, - "std": 0.019565, - "sem": 0.001028, - "min": 0.003411, - "max": 0.112183, - "recall-0.025": 0.181818, - "recall-0.05": 0.727273, + "mean": 0.043261, + "median": 0.041291, + "std": 0.020179, + "sem": 0.001061, + "min": 0.00312, + "max": 0.123913, + "recall-0.025": 0.176309, + "recall-0.05": 0.702479, "recall-0.1": 0.991736, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -5629,15 +5629,15 @@ Results of the model in various experiments on different datasets. }, "hip_right": { "count": 363, - "mean": 0.039792, - "median": 0.036989, - "std": 0.020807, - "sem": 0.001094, - "min": 0.004596, - "max": 0.133553, - "recall-0.025": 0.247934, - "recall-0.05": 0.746556, - "recall-0.1": 0.986226, + "mean": 0.040709, + "median": 0.039307, + "std": 0.020797, + "sem": 0.001093, + "min": 0.003134, + "max": 0.134375, + "recall-0.025": 0.231405, + "recall-0.05": 0.730028, + "recall-0.1": 0.988981, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5645,30 +5645,30 @@ Results of the model in various experiments on different datasets. }, "knee_left": { "count": 363, - "mean": 0.032229, - "median": 0.022692, - "std": 0.02766, - "sem": 0.001454, - "min": 0.003214, - "max": 0.15128, - "recall-0.025": 0.556474, - "recall-0.05": 0.85124, - "recall-0.1": 0.950413, - "recall-0.15": 0.997245, + "mean": 0.031175, + "median": 0.021674, + "std": 0.027529, + "sem": 0.001447, + "min": 0.002038, + "max": 0.162017, + "recall-0.025": 0.553719, + "recall-0.05": 0.84573, + "recall-0.1": 0.961433, + "recall-0.15": 0.99449, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 363 }, "knee_right": { "count": 363, - "mean": 0.025683, - "median": 0.022733, - "std": 0.01495, - "sem": 0.000786, - "min": 0.001921, - "max": 0.084441, + "mean": 0.027233, + "median": 0.022668, + "std": 0.016811, + "sem": 0.000884, + "min": 0.003812, + "max": 0.086962, "recall-0.025": 0.561983, - "recall-0.05": 0.92011, + "recall-0.05": 0.884298, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -5677,31 +5677,31 @@ Results of the model in various experiments on different datasets. }, "ankle_left": { "count": 363, - "mean": 0.030772, - "median": 0.024894, - "std": 0.020907, - "sem": 0.001099, - "min": 0.001513, - "max": 0.148207, - "recall-0.025": 0.506887, - "recall-0.05": 0.859504, - "recall-0.1": 0.983471, - "recall-0.15": 1.0, + "mean": 0.030821, + "median": 0.024361, + "std": 0.02239, + "sem": 0.001177, + "min": 0.001517, + "max": 0.16091, + "recall-0.025": 0.509642, + "recall-0.05": 0.870523, + "recall-0.1": 0.977961, + "recall-0.15": 0.997245, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 363 }, "ankle_right": { "count": 363, - "mean": 0.028893, - "median": 0.024299, - "std": 0.020574, - "sem": 0.001081, - "min": 0.002098, - "max": 0.160231, - "recall-0.025": 0.534435, - "recall-0.05": 0.884298, - "recall-0.1": 0.986226, + "mean": 0.031271, + "median": 0.024652, + "std": 0.024776, + "sem": 0.001302, + "min": 0.004853, + "max": 0.208821, + "recall-0.025": 0.509642, + "recall-0.05": 0.867769, + "recall-0.1": 0.975207, "recall-0.15": 0.997245, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5709,10 +5709,10 @@ Results of the model in various experiments on different datasets. }, "joint_recalls": { "num_labels": 4719, - "recall-0.025": 0.52214, - "recall-0.05": 0.87921, - "recall-0.1": 0.98707, - "recall-0.15": 0.99725, + "recall-0.025": 0.50773, + "recall-0.05": 0.87519, + "recall-0.1": 0.98771, + "recall-0.15": 0.99661, "recall-0.25": 1.0, "recall-0.5": 1.0 } @@ -5728,58 +5728,58 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.2371039772645021, - "avg_time_3d": 0.002369839411515456, - "avg_fps": 4.175821865957451 + "avg_time_2d": 0.04734099981112358, + "avg_time_3d": 0.0018441172746511607, + "avg_fps": 20.33135345100599 } { "person_nums": { "total_frames": 166, "total_labels": 332, - "total_preds": 925, + "total_preds": 664, "considered_empty": 0, "valid_preds": 332, - "invalid_preds": 593, + "invalid_preds": 332, "missing": 0, - "invalid_fraction": 0.64108, - "precision": 0.35892, + "invalid_fraction": 0.5, + "precision": 0.5, "recall": 1.0, - "f1": 0.52824, - "non_empty": 925 + "f1": 0.66667, + "non_empty": 664 }, "mpjpe": { "count": 332, - "mean": 0.026648, - "median": 0.023759, - "std": 0.008331, - "sem": 0.000458, - "min": 0.012646, - "max": 0.054751, - "recall-0.025": 0.563253, - "recall-0.05": 0.990964, + "mean": 0.027351, + "median": 0.024377, + "std": 0.007822, + "sem": 0.00043, + "min": 0.015457, + "max": 0.057071, + "recall-0.025": 0.533133, + "recall-0.05": 0.987952, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 332, - "ap-0.025": 0.26319, - "ap-0.05": 0.644064, - "ap-0.1": 0.65881, - "ap-0.15": 0.65881, - "ap-0.25": 0.65881, - "ap-0.5": 0.65881 + "ap-0.025": 0.241779, + "ap-0.05": 0.657382, + "ap-0.1": 0.678007, + "ap-0.15": 0.678007, + "ap-0.25": 0.678007, + "ap-0.5": 0.678007 }, "head": { "count": 332, - "mean": 0.041232, - "median": 0.040916, - "std": 0.021526, - "sem": 0.001183, - "min": 0.004278, - "max": 0.111633, - "recall-0.025": 0.268072, - "recall-0.05": 0.665663, - "recall-0.1": 0.98494, + "mean": 0.043047, + "median": 0.043778, + "std": 0.020861, + "sem": 0.001147, + "min": 0.003831, + "max": 0.108603, + "recall-0.025": 0.240964, + "recall-0.05": 0.63253, + "recall-0.1": 0.993976, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5787,15 +5787,15 @@ Results of the model in various experiments on different datasets. }, "shoulder_left": { "count": 332, - "mean": 0.030185, - "median": 0.029191, - "std": 0.014052, - "sem": 0.000772, - "min": 0.002156, - "max": 0.093101, - "recall-0.025": 0.301205, - "recall-0.05": 0.909639, - "recall-0.1": 1.0, + "mean": 0.031107, + "median": 0.029309, + "std": 0.014762, + "sem": 0.000811, + "min": 0.003898, + "max": 0.104891, + "recall-0.025": 0.319277, + "recall-0.05": 0.89759, + "recall-0.1": 0.996988, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5803,15 +5803,15 @@ Results of the model in various experiments on different datasets. }, "shoulder_right": { "count": 332, - "mean": 0.029314, - "median": 0.023388, - "std": 0.017629, - "sem": 0.000969, - "min": 0.006749, - "max": 0.100609, - "recall-0.025": 0.572289, - "recall-0.05": 0.86747, - "recall-0.1": 0.996988, + "mean": 0.03191, + "median": 0.025695, + "std": 0.018223, + "sem": 0.001002, + "min": 0.007553, + "max": 0.119254, + "recall-0.025": 0.460843, + "recall-0.05": 0.858434, + "recall-0.1": 0.990964, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5819,13 +5819,13 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 332, - "mean": 0.019105, - "median": 0.0155, - "std": 0.012058, - "sem": 0.000663, - "min": 0.001921, - "max": 0.09994, - "recall-0.025": 0.807229, + "mean": 0.019747, + "median": 0.016664, + "std": 0.010703, + "sem": 0.000588, + "min": 0.002375, + "max": 0.073654, + "recall-0.025": 0.804217, "recall-0.05": 0.96988, "recall-0.1": 1.0, "recall-0.15": 1.0, @@ -5835,15 +5835,15 @@ Results of the model in various experiments on different datasets. }, "elbow_right": { "count": 332, - "mean": 0.023342, - "median": 0.019129, - "std": 0.015865, - "sem": 0.000872, - "min": 0.00246, - "max": 0.097851, - "recall-0.025": 0.710843, + "mean": 0.023658, + "median": 0.019327, + "std": 0.015562, + "sem": 0.000855, + "min": 0.001925, + "max": 0.116987, + "recall-0.025": 0.695783, "recall-0.05": 0.918675, - "recall-0.1": 1.0, + "recall-0.1": 0.996988, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5851,15 +5851,15 @@ Results of the model in various experiments on different datasets. }, "wrist_left": { "count": 332, - "mean": 0.019357, - "median": 0.01396, - "std": 0.016964, - "sem": 0.000932, - "min": 0.002274, - "max": 0.106257, - "recall-0.025": 0.801205, - "recall-0.05": 0.933735, - "recall-0.1": 0.996988, + "mean": 0.018893, + "median": 0.012314, + "std": 0.01813, + "sem": 0.000996, + "min": 0.000772, + "max": 0.117136, + "recall-0.025": 0.783133, + "recall-0.05": 0.942771, + "recall-0.1": 0.987952, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5867,15 +5867,15 @@ Results of the model in various experiments on different datasets. }, "wrist_right": { "count": 332, - "mean": 0.025856, - "median": 0.018291, - "std": 0.022321, - "sem": 0.001227, - "min": 0.002656, - "max": 0.175003, - "recall-0.025": 0.674699, + "mean": 0.027637, + "median": 0.02144, + "std": 0.021639, + "sem": 0.001189, + "min": 0.003104, + "max": 0.175776, + "recall-0.025": 0.596386, "recall-0.05": 0.888554, - "recall-0.1": 0.98494, + "recall-0.1": 0.981928, "recall-0.15": 0.996988, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5883,15 +5883,15 @@ Results of the model in various experiments on different datasets. }, "hip_left": { "count": 332, - "mean": 0.034229, - "median": 0.030281, - "std": 0.017872, - "sem": 0.000982, - "min": 0.004709, - "max": 0.120401, - "recall-0.025": 0.277108, - "recall-0.05": 0.864458, - "recall-0.1": 0.987952, + "mean": 0.036806, + "median": 0.035179, + "std": 0.016645, + "sem": 0.000915, + "min": 0.001382, + "max": 0.103918, + "recall-0.025": 0.225904, + "recall-0.05": 0.861446, + "recall-0.1": 0.996988, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5899,14 +5899,14 @@ Results of the model in various experiments on different datasets. }, "hip_right": { "count": 332, - "mean": 0.029992, - "median": 0.02549, - "std": 0.017562, - "sem": 0.000965, - "min": 0.002773, - "max": 0.115324, - "recall-0.025": 0.475904, - "recall-0.05": 0.876506, + "mean": 0.032068, + "median": 0.028959, + "std": 0.017032, + "sem": 0.000936, + "min": 0.003695, + "max": 0.112886, + "recall-0.025": 0.373494, + "recall-0.05": 0.885542, "recall-0.1": 0.987952, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -5915,14 +5915,14 @@ Results of the model in various experiments on different datasets. }, "knee_left": { "count": 332, - "mean": 0.021442, - "median": 0.017811, - "std": 0.014763, - "sem": 0.000811, - "min": 0.001292, - "max": 0.086181, - "recall-0.025": 0.725904, - "recall-0.05": 0.936747, + "mean": 0.021857, + "median": 0.017494, + "std": 0.014935, + "sem": 0.000821, + "min": 0.002691, + "max": 0.084745, + "recall-0.025": 0.728916, + "recall-0.05": 0.921687, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -5931,15 +5931,15 @@ Results of the model in various experiments on different datasets. }, "knee_right": { "count": 332, - "mean": 0.019916, - "median": 0.013427, - "std": 0.016596, - "sem": 0.000912, - "min": 0.001577, - "max": 0.095358, - "recall-0.025": 0.740964, - "recall-0.05": 0.942771, - "recall-0.1": 1.0, + "mean": 0.019981, + "median": 0.014665, + "std": 0.01517, + "sem": 0.000834, + "min": 0.002188, + "max": 0.100948, + "recall-0.025": 0.743976, + "recall-0.05": 0.951807, + "recall-0.1": 0.996988, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -5947,14 +5947,14 @@ Results of the model in various experiments on different datasets. }, "ankle_left": { "count": 332, - "mean": 0.020398, - "median": 0.01746, - "std": 0.010326, - "sem": 0.000568, - "min": 0.004236, - "max": 0.083602, - "recall-0.025": 0.701807, - "recall-0.05": 0.993976, + "mean": 0.018884, + "median": 0.016614, + "std": 0.008671, + "sem": 0.000477, + "min": 0.002771, + "max": 0.050803, + "recall-0.025": 0.774096, + "recall-0.05": 0.996988, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -5963,14 +5963,14 @@ Results of the model in various experiments on different datasets. }, "ankle_right": { "count": 332, - "mean": 0.032051, - "median": 0.030651, - "std": 0.012377, - "sem": 0.00068, - "min": 0.005895, - "max": 0.109884, - "recall-0.025": 0.216867, - "recall-0.05": 0.930723, + "mean": 0.029967, + "median": 0.028117, + "std": 0.012064, + "sem": 0.000663, + "min": 0.003554, + "max": 0.100546, + "recall-0.025": 0.298193, + "recall-0.05": 0.939759, "recall-0.1": 0.996988, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -5979,9 +5979,9 @@ Results of the model in various experiments on different datasets. }, "joint_recalls": { "num_labels": 4316, - "recall-0.025": 0.55816, - "recall-0.05": 0.89921, - "recall-0.1": 0.99513, + "recall-0.025": 0.54101, + "recall-0.05": 0.89574, + "recall-0.1": 0.99444, "recall-0.15": 0.99977, "recall-0.25": 1.0, "recall-0.5": 1.0 @@ -5998,249 +5998,249 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.15548160552978516, - "avg_time_3d": 0.0012971425056457519, - "avg_fps": 6.378415522070676 + "avg_time_2d": 0.050000576972961425, + "avg_time_3d": 0.002305936813354492, + "avg_fps": 19.118077799740753 } { "person_nums": { "total_frames": 110, "total_labels": 330, - "total_preds": 477, + "total_preds": 482, "considered_empty": 0, - "valid_preds": 329, - "invalid_preds": 148, - "missing": 1, - "invalid_fraction": 0.31027, - "precision": 0.68973, - "recall": 0.99697, - "f1": 0.81537, - "non_empty": 477 + "valid_preds": 330, + "invalid_preds": 152, + "missing": 0, + "invalid_fraction": 0.31535, + "precision": 0.68465, + "recall": 1.0, + "f1": 0.81281, + "non_empty": 482 }, "mpjpe": { - "count": 329, - "mean": 0.042431, - "median": 0.038527, - "std": 0.017088, - "sem": 0.000944, - "min": 0.018847, - "max": 0.129488, + "count": 330, + "mean": 0.044547, + "median": 0.039412, + "std": 0.027376, + "sem": 0.001509, + "min": 0.019643, + "max": 0.451187, "recall-0.025": 0.066667, - "recall-0.05": 0.748485, - "recall-0.1": 0.984848, + "recall-0.05": 0.706061, + "recall-0.1": 0.993939, "recall-0.15": 0.99697, "recall-0.25": 0.99697, - "recall-0.5": 0.99697, + "recall-0.5": 1.0, "num_labels": 330, - "ap-0.025": 0.005288, - "ap-0.05": 0.472824, - "ap-0.1": 0.709707, - "ap-0.15": 0.722388, - "ap-0.25": 0.722388, - "ap-0.5": 0.722388 + "ap-0.025": 0.00428, + "ap-0.05": 0.408407, + "ap-0.1": 0.696195, + "ap-0.15": 0.700603, + "ap-0.25": 0.700603, + "ap-0.5": 0.705026 }, "head": { - "count": 329, - "mean": 0.051287, - "median": 0.043322, - "std": 0.032134, - "sem": 0.001774, - "min": 0.007482, - "max": 0.34157, - "recall-0.025": 0.163636, - "recall-0.05": 0.590909, - "recall-0.1": 0.939394, - "recall-0.15": 0.990909, - "recall-0.25": 0.993939, - "recall-0.5": 0.99697, + "count": 330, + "mean": 0.050407, + "median": 0.043433, + "std": 0.029395, + "sem": 0.001621, + "min": 0.007161, + "max": 0.322279, + "recall-0.025": 0.142424, + "recall-0.05": 0.587879, + "recall-0.1": 0.936364, + "recall-0.15": 0.99697, + "recall-0.25": 0.99697, + "recall-0.5": 1.0, "num_labels": 330 }, "shoulder_left": { "count": 329, - "mean": 0.048381, - "median": 0.042113, - "std": 0.025753, - "sem": 0.001422, - "min": 0.00278, - "max": 0.182571, - "recall-0.025": 0.163636, - "recall-0.05": 0.618182, - "recall-0.1": 0.963636, - "recall-0.15": 0.987879, - "recall-0.25": 0.99697, - "recall-0.5": 0.99697, - "num_labels": 330 - }, - "shoulder_right": { - "count": 329, - "mean": 0.042317, - "median": 0.037717, - "std": 0.026314, - "sem": 0.001453, - "min": 0.005684, - "max": 0.239939, - "recall-0.025": 0.245455, - "recall-0.05": 0.748485, + "mean": 0.049521, + "median": 0.045053, + "std": 0.024858, + "sem": 0.001373, + "min": 0.007095, + "max": 0.154532, + "recall-0.025": 0.148485, + "recall-0.05": 0.59697, "recall-0.1": 0.957576, - "recall-0.15": 0.990909, - "recall-0.25": 0.99697, - "recall-0.5": 0.99697, - "num_labels": 330 - }, - "elbow_left": { - "count": 329, - "mean": 0.048174, - "median": 0.039641, - "std": 0.032756, - "sem": 0.001809, - "min": 0.00657, - "max": 0.277129, - "recall-0.025": 0.245455, - "recall-0.05": 0.627273, - "recall-0.1": 0.930303, - "recall-0.15": 0.978788, - "recall-0.25": 0.993939, - "recall-0.5": 0.99697, - "num_labels": 330 - }, - "elbow_right": { - "count": 329, - "mean": 0.044099, - "median": 0.037807, - "std": 0.029561, - "sem": 0.001632, - "min": 0.001654, - "max": 0.201578, - "recall-0.025": 0.275758, - "recall-0.05": 0.666667, - "recall-0.1": 0.945455, - "recall-0.15": 0.984848, - "recall-0.25": 0.99697, - "recall-0.5": 0.99697, - "num_labels": 330 - }, - "wrist_left": { - "count": 329, - "mean": 0.052501, - "median": 0.043284, - "std": 0.039862, - "sem": 0.002201, - "min": 0.003837, - "max": 0.315522, - "recall-0.025": 0.227273, - "recall-0.05": 0.584848, - "recall-0.1": 0.906061, - "recall-0.15": 0.972727, - "recall-0.25": 0.987879, - "recall-0.5": 0.99697, - "num_labels": 330 - }, - "wrist_right": { - "count": 329, - "mean": 0.049367, - "median": 0.042826, - "std": 0.032111, - "sem": 0.001773, - "min": 0.002443, - "max": 0.212623, - "recall-0.025": 0.206061, - "recall-0.05": 0.606061, - "recall-0.1": 0.930303, - "recall-0.15": 0.975758, - "recall-0.25": 0.99697, - "recall-0.5": 0.99697, - "num_labels": 330 - }, - "hip_left": { - "count": 329, - "mean": 0.042061, - "median": 0.037022, - "std": 0.025621, - "sem": 0.001415, - "min": 0.000769, - "max": 0.183415, - "recall-0.025": 0.272727, - "recall-0.05": 0.687879, - "recall-0.1": 0.969697, - "recall-0.15": 0.990909, - "recall-0.25": 0.99697, - "recall-0.5": 0.99697, - "num_labels": 330 - }, - "hip_right": { - "count": 329, - "mean": 0.042825, - "median": 0.038835, - "std": 0.02514, - "sem": 0.001388, - "min": 0.004561, - "max": 0.195982, - "recall-0.025": 0.245455, - "recall-0.05": 0.684848, - "recall-0.1": 0.966667, "recall-0.15": 0.993939, "recall-0.25": 0.99697, "recall-0.5": 0.99697, "num_labels": 330 }, - "knee_left": { - "count": 329, - "mean": 0.032874, - "median": 0.029085, - "std": 0.020507, - "sem": 0.001132, - "min": 0.002074, - "max": 0.121802, - "recall-0.025": 0.430303, - "recall-0.05": 0.80303, - "recall-0.1": 0.987879, + "shoulder_right": { + "count": 330, + "mean": 0.044319, + "median": 0.037218, + "std": 0.034218, + "sem": 0.001887, + "min": 0.004313, + "max": 0.480158, + "recall-0.025": 0.233333, + "recall-0.05": 0.687879, + "recall-0.1": 0.954545, "recall-0.15": 0.99697, "recall-0.25": 0.99697, + "recall-0.5": 1.0, + "num_labels": 330 + }, + "elbow_left": { + "count": 329, + "mean": 0.048481, + "median": 0.038712, + "std": 0.033097, + "sem": 0.001827, + "min": 0.003004, + "max": 0.297929, + "recall-0.025": 0.218182, + "recall-0.05": 0.642424, + "recall-0.1": 0.927273, + "recall-0.15": 0.987879, + "recall-0.25": 0.993939, "recall-0.5": 0.99697, "num_labels": 330 }, + "elbow_right": { + "count": 330, + "mean": 0.047223, + "median": 0.039596, + "std": 0.037877, + "sem": 0.002088, + "min": 0.004914, + "max": 0.492122, + "recall-0.025": 0.221212, + "recall-0.05": 0.660606, + "recall-0.1": 0.942424, + "recall-0.15": 0.984848, + "recall-0.25": 0.99697, + "recall-0.5": 1.0, + "num_labels": 330 + }, + "wrist_left": { + "count": 329, + "mean": 0.051291, + "median": 0.041442, + "std": 0.042288, + "sem": 0.002335, + "min": 0.004449, + "max": 0.424882, + "recall-0.025": 0.230303, + "recall-0.05": 0.6, + "recall-0.1": 0.909091, + "recall-0.15": 0.975758, + "recall-0.25": 0.987879, + "recall-0.5": 0.99697, + "num_labels": 330 + }, + "wrist_right": { + "count": 330, + "mean": 0.053604, + "median": 0.046393, + "std": 0.037546, + "sem": 0.00207, + "min": 0.004392, + "max": 0.348793, + "recall-0.025": 0.139394, + "recall-0.05": 0.563636, + "recall-0.1": 0.909091, + "recall-0.15": 0.966667, + "recall-0.25": 0.99697, + "recall-0.5": 1.0, + "num_labels": 330 + }, + "hip_left": { + "count": 330, + "mean": 0.044183, + "median": 0.038068, + "std": 0.03422, + "sem": 0.001887, + "min": 0.005511, + "max": 0.481605, + "recall-0.025": 0.239394, + "recall-0.05": 0.678788, + "recall-0.1": 0.972727, + "recall-0.15": 0.993939, + "recall-0.25": 0.99697, + "recall-0.5": 1.0, + "num_labels": 330 + }, + "hip_right": { + "count": 330, + "mean": 0.042642, + "median": 0.040013, + "std": 0.024685, + "sem": 0.001361, + "min": 0.004172, + "max": 0.272625, + "recall-0.025": 0.239394, + "recall-0.05": 0.681818, + "recall-0.1": 0.984848, + "recall-0.15": 0.99697, + "recall-0.25": 0.99697, + "recall-0.5": 1.0, + "num_labels": 330 + }, + "knee_left": { + "count": 330, + "mean": 0.034138, + "median": 0.030334, + "std": 0.021238, + "sem": 0.001171, + "min": 0.002605, + "max": 0.128253, + "recall-0.025": 0.418182, + "recall-0.05": 0.821212, + "recall-0.1": 0.987879, + "recall-0.15": 1.0, + "recall-0.25": 1.0, + "recall-0.5": 1.0, + "num_labels": 330 + }, "knee_right": { "count": 329, - "mean": 0.033203, - "median": 0.027414, - "std": 0.02114, - "sem": 0.001167, - "min": 0.000654, - "max": 0.118668, - "recall-0.025": 0.439394, - "recall-0.05": 0.815152, - "recall-0.1": 0.984848, + "mean": 0.035864, + "median": 0.030501, + "std": 0.021677, + "sem": 0.001197, + "min": 0.004259, + "max": 0.12896, + "recall-0.025": 0.381818, + "recall-0.05": 0.784848, + "recall-0.1": 0.981818, "recall-0.15": 0.99697, "recall-0.25": 0.99697, "recall-0.5": 0.99697, "num_labels": 330 }, "ankle_left": { - "count": 329, - "mean": 0.03161, - "median": 0.025931, - "std": 0.021618, - "sem": 0.001194, - "min": 0.001687, - "max": 0.16244, - "recall-0.025": 0.472727, - "recall-0.05": 0.866667, - "recall-0.1": 0.978788, - "recall-0.15": 0.993939, - "recall-0.25": 0.99697, - "recall-0.5": 0.99697, + "count": 330, + "mean": 0.034536, + "median": 0.027684, + "std": 0.024799, + "sem": 0.001367, + "min": 0.003123, + "max": 0.171156, + "recall-0.025": 0.442424, + "recall-0.05": 0.80303, + "recall-0.1": 0.975758, + "recall-0.15": 0.99697, + "recall-0.25": 1.0, + "recall-0.5": 1.0, "num_labels": 330 }, "ankle_right": { "count": 329, - "mean": 0.032908, - "median": 0.027387, - "std": 0.020224, - "sem": 0.001117, - "min": 0.006919, - "max": 0.121939, - "recall-0.025": 0.418182, - "recall-0.05": 0.845455, + "mean": 0.033706, + "median": 0.029006, + "std": 0.020755, + "sem": 0.001146, + "min": 0.00172, + "max": 0.128362, + "recall-0.025": 0.381818, + "recall-0.05": 0.842424, "recall-0.1": 0.978788, "recall-0.15": 0.99697, "recall-0.25": 0.99697, @@ -6249,18 +6249,18 @@ Results of the model in various experiments on different datasets. }, "joint_recalls": { "num_labels": 4290, - "recall-0.025": 0.29161, - "recall-0.05": 0.70256, - "recall-0.1": 0.95594, - "recall-0.15": 0.98695, - "recall-0.25": 0.99534, - "recall-0.5": 0.99697 + "recall-0.025": 0.2627, + "recall-0.05": 0.68671, + "recall-0.1": 0.95408, + "recall-0.15": 0.99044, + "recall-0.25": 0.99627, + "recall-0.5": 0.99883 } } { "total_parts": 4620, - "correct_parts": 4577, - "pcp": 0.990693 + "correct_parts": 4587, + "pcp": 0.992857 } ``` @@ -6268,269 +6268,269 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.3177612257433367, - "avg_time_3d": 0.0035632279542115357, - "avg_fps": 3.11211919445529 + "avg_time_2d": 0.09007903262301609, + "avg_time_3d": 0.005098295641375017, + "avg_fps": 10.506703836255216 } { "person_nums": { "total_frames": 121, "total_labels": 484, - "total_preds": 639, + "total_preds": 523, "considered_empty": 0, - "valid_preds": 484, - "invalid_preds": 155, - "missing": 0, - "invalid_fraction": 0.24257, - "precision": 0.75743, - "recall": 1.0, - "f1": 0.86198, - "non_empty": 639 + "valid_preds": 482, + "invalid_preds": 41, + "missing": 2, + "invalid_fraction": 0.07839, + "precision": 0.92161, + "recall": 0.99587, + "f1": 0.9573, + "non_empty": 523 }, "mpjpe": { - "count": 484, - "mean": 0.033532, - "median": 0.030036, - "std": 0.014617, - "sem": 0.000665, - "min": 0.016099, - "max": 0.146128, - "recall-0.025": 0.322314, - "recall-0.05": 0.886364, - "recall-0.1": 0.995868, - "recall-0.15": 1.0, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 482, + "mean": 0.03475, + "median": 0.030963, + "std": 0.020545, + "sem": 0.000937, + "min": 0.01466, + "max": 0.368354, + "recall-0.025": 0.241736, + "recall-0.05": 0.902893, + "recall-0.1": 0.989669, + "recall-0.15": 0.993802, + "recall-0.25": 0.993802, + "recall-0.5": 0.995868, "num_labels": 484, - "ap-0.025": 0.088374, - "ap-0.05": 0.640853, - "ap-0.1": 0.800023, - "ap-0.15": 0.805685, - "ap-0.25": 0.805685, - "ap-0.5": 0.805685 + "ap-0.025": 0.062343, + "ap-0.05": 0.812653, + "ap-0.1": 0.966396, + "ap-0.15": 0.971074, + "ap-0.25": 0.971074, + "ap-0.5": 0.977994 }, "head": { - "count": 484, - "mean": 0.033877, - "median": 0.02776, - "std": 0.021158, - "sem": 0.000963, - "min": 0.002192, - "max": 0.108731, - "recall-0.025": 0.444215, - "recall-0.05": 0.770661, - "recall-0.1": 0.997934, - "recall-0.15": 1.0, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 482, + "mean": 0.033664, + "median": 0.028392, + "std": 0.020491, + "sem": 0.000934, + "min": 0.002498, + "max": 0.107228, + "recall-0.025": 0.433884, + "recall-0.05": 0.78719, + "recall-0.1": 0.987603, + "recall-0.15": 0.995868, + "recall-0.25": 0.995868, + "recall-0.5": 0.995868, "num_labels": 484 }, "shoulder_left": { - "count": 484, - "mean": 0.034665, - "median": 0.028747, - "std": 0.019893, - "sem": 0.000905, - "min": 0.005085, - "max": 0.139146, - "recall-0.025": 0.380165, - "recall-0.05": 0.811983, - "recall-0.1": 0.987603, - "recall-0.15": 1.0, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 482, + "mean": 0.034837, + "median": 0.030613, + "std": 0.018747, + "sem": 0.000855, + "min": 0.005812, + "max": 0.109419, + "recall-0.025": 0.357438, + "recall-0.05": 0.791322, + "recall-0.1": 0.985537, + "recall-0.15": 0.995868, + "recall-0.25": 0.995868, + "recall-0.5": 0.995868, "num_labels": 484 }, "shoulder_right": { - "count": 484, - "mean": 0.032068, - "median": 0.028047, - "std": 0.018506, - "sem": 0.000842, - "min": 0.003465, - "max": 0.113248, - "recall-0.025": 0.431818, - "recall-0.05": 0.830579, - "recall-0.1": 0.995868, - "recall-0.15": 1.0, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 482, + "mean": 0.034559, + "median": 0.030309, + "std": 0.021956, + "sem": 0.001001, + "min": 0.002815, + "max": 0.17224, + "recall-0.025": 0.386364, + "recall-0.05": 0.793388, + "recall-0.1": 0.979339, + "recall-0.15": 0.991736, + "recall-0.25": 0.995868, + "recall-0.5": 0.995868, "num_labels": 484 }, "elbow_left": { - "count": 484, - "mean": 0.034017, - "median": 0.02815, - "std": 0.02432, - "sem": 0.001107, - "min": 0.003353, - "max": 0.238629, - "recall-0.025": 0.438017, - "recall-0.05": 0.81405, - "recall-0.1": 0.985537, - "recall-0.15": 0.991736, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 482, + "mean": 0.035854, + "median": 0.031422, + "std": 0.02381, + "sem": 0.001086, + "min": 0.002872, + "max": 0.272109, + "recall-0.025": 0.353306, + "recall-0.05": 0.834711, + "recall-0.1": 0.977273, + "recall-0.15": 0.993802, + "recall-0.25": 0.993802, + "recall-0.5": 0.995868, "num_labels": 484 }, "elbow_right": { - "count": 483, - "mean": 0.035603, - "median": 0.02891, - "std": 0.030001, - "sem": 0.001367, - "min": 0.003971, - "max": 0.295815, - "recall-0.025": 0.431818, - "recall-0.05": 0.809917, - "recall-0.1": 0.964876, - "recall-0.15": 0.987603, - "recall-0.25": 0.995868, - "recall-0.5": 0.997934, - "num_labels": 484 - }, - "wrist_left": { - "count": 484, - "mean": 0.04083, - "median": 0.03162, - "std": 0.031707, - "sem": 0.001443, - "min": 0.004432, - "max": 0.298529, - "recall-0.025": 0.357438, - "recall-0.05": 0.737603, - "recall-0.1": 0.954545, - "recall-0.15": 0.991736, - "recall-0.25": 0.997934, - "recall-0.5": 1.0, - "num_labels": 484 - }, - "wrist_right": { "count": 481, - "mean": 0.040278, - "median": 0.032104, - "std": 0.034452, - "sem": 0.001573, - "min": 0.002532, - "max": 0.342783, - "recall-0.025": 0.338843, - "recall-0.05": 0.762397, - "recall-0.1": 0.942149, - "recall-0.15": 0.977273, - "recall-0.25": 0.989669, + "mean": 0.038432, + "median": 0.032114, + "std": 0.032718, + "sem": 0.001493, + "min": 0.003564, + "max": 0.364307, + "recall-0.025": 0.378099, + "recall-0.05": 0.780992, + "recall-0.1": 0.964876, + "recall-0.15": 0.981405, + "recall-0.25": 0.987603, "recall-0.5": 0.993802, "num_labels": 484 }, + "wrist_left": { + "count": 481, + "mean": 0.042373, + "median": 0.035384, + "std": 0.029776, + "sem": 0.001359, + "min": 0.003307, + "max": 0.226649, + "recall-0.025": 0.283058, + "recall-0.05": 0.719008, + "recall-0.1": 0.946281, + "recall-0.15": 0.983471, + "recall-0.25": 0.993802, + "recall-0.5": 0.993802, + "num_labels": 484 + }, + "wrist_right": { + "count": 480, + "mean": 0.042498, + "median": 0.032931, + "std": 0.035624, + "sem": 0.001628, + "min": 0.004798, + "max": 0.404334, + "recall-0.025": 0.293388, + "recall-0.05": 0.729339, + "recall-0.1": 0.948347, + "recall-0.15": 0.977273, + "recall-0.25": 0.987603, + "recall-0.5": 0.991736, + "num_labels": 484 + }, "hip_left": { - "count": 484, - "mean": 0.036552, - "median": 0.033883, - "std": 0.019123, - "sem": 0.00087, - "min": 0.002281, - "max": 0.132653, - "recall-0.025": 0.278926, - "recall-0.05": 0.807851, - "recall-0.1": 0.985537, - "recall-0.15": 1.0, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 482, + "mean": 0.037141, + "median": 0.033639, + "std": 0.019958, + "sem": 0.00091, + "min": 0.003362, + "max": 0.168991, + "recall-0.025": 0.28719, + "recall-0.05": 0.780992, + "recall-0.1": 0.989669, + "recall-0.15": 0.993802, + "recall-0.25": 0.995868, + "recall-0.5": 0.995868, "num_labels": 484 }, "hip_right": { - "count": 484, - "mean": 0.041649, - "median": 0.032928, - "std": 0.028497, - "sem": 0.001297, - "min": 0.003156, - "max": 0.150148, - "recall-0.025": 0.293388, - "recall-0.05": 0.764463, - "recall-0.1": 0.946281, - "recall-0.15": 0.997934, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 482, + "mean": 0.042536, + "median": 0.03524, + "std": 0.027635, + "sem": 0.00126, + "min": 0.005179, + "max": 0.224822, + "recall-0.025": 0.274793, + "recall-0.05": 0.72314, + "recall-0.1": 0.942149, + "recall-0.15": 0.993802, + "recall-0.25": 0.995868, + "recall-0.5": 0.995868, "num_labels": 484 }, "knee_left": { - "count": 484, - "mean": 0.023332, - "median": 0.019111, - "std": 0.019579, - "sem": 0.000891, - "min": 0.000898, - "max": 0.230698, - "recall-0.025": 0.704545, - "recall-0.05": 0.923554, - "recall-0.1": 0.995868, - "recall-0.15": 0.995868, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 481, + "mean": 0.023509, + "median": 0.020528, + "std": 0.014407, + "sem": 0.000658, + "min": 0.001284, + "max": 0.125526, + "recall-0.025": 0.628099, + "recall-0.05": 0.93595, + "recall-0.1": 0.991736, + "recall-0.15": 0.993802, + "recall-0.25": 0.993802, + "recall-0.5": 0.993802, "num_labels": 484 }, "knee_right": { - "count": 484, - "mean": 0.024527, - "median": 0.020468, - "std": 0.01538, - "sem": 0.0007, - "min": 0.003418, - "max": 0.101846, - "recall-0.025": 0.628099, - "recall-0.05": 0.923554, - "recall-0.1": 0.997934, - "recall-0.15": 1.0, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 481, + "mean": 0.026612, + "median": 0.021565, + "std": 0.017766, + "sem": 0.000811, + "min": 0.001499, + "max": 0.138939, + "recall-0.025": 0.572314, + "recall-0.05": 0.89876, + "recall-0.1": 0.983471, + "recall-0.15": 0.993802, + "recall-0.25": 0.993802, + "recall-0.5": 0.993802, "num_labels": 484 }, "ankle_left": { - "count": 484, - "mean": 0.027217, - "median": 0.023029, - "std": 0.030318, - "sem": 0.001379, - "min": 0.00176, - "max": 0.49629, - "recall-0.025": 0.56405, - "recall-0.05": 0.931818, - "recall-0.1": 0.991736, - "recall-0.15": 0.995868, + "count": 482, + "mean": 0.023658, + "median": 0.019326, + "std": 0.018667, + "sem": 0.000851, + "min": 0.001836, + "max": 0.211379, + "recall-0.025": 0.663223, + "recall-0.05": 0.956612, + "recall-0.1": 0.987603, + "recall-0.15": 0.989669, "recall-0.25": 0.995868, - "recall-0.5": 1.0, + "recall-0.5": 0.995868, "num_labels": 484 }, "ankle_right": { - "count": 484, - "mean": 0.026743, - "median": 0.024481, - "std": 0.015442, - "sem": 0.000703, - "min": 0.001085, - "max": 0.084476, - "recall-0.025": 0.528926, - "recall-0.05": 0.909091, - "recall-0.1": 1.0, - "recall-0.15": 1.0, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 481, + "mean": 0.025205, + "median": 0.020694, + "std": 0.020556, + "sem": 0.000938, + "min": 0.002377, + "max": 0.314326, + "recall-0.025": 0.63843, + "recall-0.05": 0.911157, + "recall-0.1": 0.991736, + "recall-0.15": 0.991736, + "recall-0.25": 0.991736, + "recall-0.5": 0.993802, "num_labels": 484 }, "joint_recalls": { "num_labels": 6292, - "recall-0.025": 0.4466, - "recall-0.05": 0.82962, - "recall-0.1": 0.97966, - "recall-0.15": 0.99507, - "recall-0.25": 0.99825, - "recall-0.5": 0.99936 + "recall-0.025": 0.42546, + "recall-0.05": 0.81739, + "recall-0.1": 0.97394, + "recall-0.15": 0.99015, + "recall-0.25": 0.99332, + "recall-0.5": 0.99476 } } { "total_parts": 6776, - "correct_parts": 6755, - "pcp": 0.996901 + "correct_parts": 6722, + "pcp": 0.992031 } ``` @@ -6538,269 +6538,269 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.2823985504966251, - "avg_time_3d": 0.003012508326183165, - "avg_fps": 3.5037184758171196 + "avg_time_2d": 0.06544327046829841, + "avg_time_3d": 0.0028696142869188606, + "avg_fps": 14.638526883812016 } { "person_nums": { "total_frames": 183, "total_labels": 732, - "total_preds": 751, + "total_preds": 742, "considered_empty": 0, - "valid_preds": 732, - "invalid_preds": 19, - "missing": 0, - "invalid_fraction": 0.0253, - "precision": 0.9747, - "recall": 1.0, - "f1": 0.98719, - "non_empty": 751 + "valid_preds": 729, + "invalid_preds": 13, + "missing": 3, + "invalid_fraction": 0.01752, + "precision": 0.98248, + "recall": 0.9959, + "f1": 0.98915, + "non_empty": 742 }, "mpjpe": { - "count": 732, - "mean": 0.028487, - "median": 0.026819, - "std": 0.015491, - "sem": 0.000573, - "min": 0.013277, - "max": 0.385636, - "recall-0.025": 0.416667, - "recall-0.05": 0.986339, - "recall-0.1": 0.998634, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.02743, + "median": 0.026034, + "std": 0.00798, + "sem": 0.000296, + "min": 0.012097, + "max": 0.078647, + "recall-0.025": 0.43306, + "recall-0.05": 0.976776, + "recall-0.1": 0.995902, + "recall-0.15": 0.995902, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732, - "ap-0.025": 0.320904, - "ap-0.05": 0.963918, - "ap-0.1": 0.984195, - "ap-0.15": 0.984195, - "ap-0.25": 0.984195, - "ap-0.5": 0.98707 + "ap-0.025": 0.210169, + "ap-0.05": 0.960342, + "ap-0.1": 0.995902, + "ap-0.15": 0.995902, + "ap-0.25": 0.995902, + "ap-0.5": 0.995902 }, "head": { - "count": 732, - "mean": 0.030805, - "median": 0.027935, - "std": 0.016761, - "sem": 0.00062, - "min": 0.002521, - "max": 0.124699, - "recall-0.025": 0.430328, - "recall-0.05": 0.856557, - "recall-0.1": 0.995902, - "recall-0.15": 1.0, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.031712, + "median": 0.025951, + "std": 0.023084, + "sem": 0.000856, + "min": 0.000911, + "max": 0.213568, + "recall-0.025": 0.467213, + "recall-0.05": 0.840164, + "recall-0.1": 0.979508, + "recall-0.15": 0.991803, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "shoulder_left": { - "count": 732, - "mean": 0.035121, - "median": 0.033943, - "std": 0.021072, - "sem": 0.000779, - "min": 0.001879, - "max": 0.420146, - "recall-0.025": 0.303279, - "recall-0.05": 0.846995, - "recall-0.1": 0.997268, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.035599, + "median": 0.03057, + "std": 0.021125, + "sem": 0.000783, + "min": 0.003008, + "max": 0.156968, + "recall-0.025": 0.329235, + "recall-0.05": 0.830601, + "recall-0.1": 0.969945, + "recall-0.15": 0.994536, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "shoulder_right": { - "count": 732, - "mean": 0.029177, - "median": 0.02697, - "std": 0.022451, - "sem": 0.00083, - "min": 0.002126, - "max": 0.494359, - "recall-0.025": 0.441257, - "recall-0.05": 0.938525, - "recall-0.1": 0.994536, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.028876, + "median": 0.02443, + "std": 0.018415, + "sem": 0.000682, + "min": 0.002113, + "max": 0.174751, + "recall-0.025": 0.51776, + "recall-0.05": 0.896175, + "recall-0.1": 0.989071, + "recall-0.15": 0.994536, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "elbow_left": { - "count": 731, - "mean": 0.027616, - "median": 0.023984, - "std": 0.0151, - "sem": 0.000559, - "min": 0.002481, - "max": 0.115223, - "recall-0.025": 0.535519, - "recall-0.05": 0.913934, - "recall-0.1": 0.997268, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 0.998634, + "count": 729, + "mean": 0.026572, + "median": 0.022611, + "std": 0.017068, + "sem": 0.000633, + "min": 0.001694, + "max": 0.157654, + "recall-0.025": 0.577869, + "recall-0.05": 0.909836, + "recall-0.1": 0.991803, + "recall-0.15": 0.994536, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "elbow_right": { - "count": 731, - "mean": 0.025265, - "median": 0.023231, - "std": 0.014333, - "sem": 0.000531, - "min": 0.001248, - "max": 0.090019, - "recall-0.025": 0.551913, - "recall-0.05": 0.931694, - "recall-0.1": 0.998634, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 0.998634, + "count": 729, + "mean": 0.025012, + "median": 0.02006, + "std": 0.017218, + "sem": 0.000638, + "min": 0.002363, + "max": 0.131751, + "recall-0.025": 0.644809, + "recall-0.05": 0.904372, + "recall-0.1": 0.990437, + "recall-0.15": 0.995902, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "wrist_left": { - "count": 731, - "mean": 0.031177, - "median": 0.026977, - "std": 0.020734, - "sem": 0.000767, - "min": 0.000603, - "max": 0.135075, - "recall-0.025": 0.456284, - "recall-0.05": 0.871585, - "recall-0.1": 0.987705, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 0.998634, + "count": 729, + "mean": 0.03047, + "median": 0.024918, + "std": 0.022484, + "sem": 0.000833, + "min": 0.001889, + "max": 0.234259, + "recall-0.025": 0.504098, + "recall-0.05": 0.868852, + "recall-0.1": 0.979508, + "recall-0.15": 0.993169, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "wrist_right": { - "count": 731, - "mean": 0.031492, - "median": 0.028618, - "std": 0.020779, - "sem": 0.000769, - "min": 0.00387, - "max": 0.366253, - "recall-0.025": 0.418033, - "recall-0.05": 0.863388, - "recall-0.1": 0.993169, - "recall-0.15": 0.997268, - "recall-0.25": 0.997268, - "recall-0.5": 0.998634, + "count": 729, + "mean": 0.030171, + "median": 0.024392, + "std": 0.023154, + "sem": 0.000858, + "min": 0.001391, + "max": 0.24946, + "recall-0.025": 0.512295, + "recall-0.05": 0.894809, + "recall-0.1": 0.978142, + "recall-0.15": 0.990437, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "hip_left": { - "count": 732, - "mean": 0.031917, - "median": 0.030228, - "std": 0.015677, - "sem": 0.00058, - "min": 0.001805, - "max": 0.23988, - "recall-0.025": 0.327869, - "recall-0.05": 0.904372, - "recall-0.1": 0.997268, - "recall-0.15": 0.998634, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.029121, + "median": 0.026385, + "std": 0.015204, + "sem": 0.000563, + "min": 0.00251, + "max": 0.09251, + "recall-0.025": 0.460383, + "recall-0.05": 0.901639, + "recall-0.1": 0.995902, + "recall-0.15": 0.995902, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "hip_right": { - "count": 732, - "mean": 0.031088, - "median": 0.02994, - "std": 0.014069, - "sem": 0.00052, - "min": 0.002286, - "max": 0.082226, - "recall-0.025": 0.367486, - "recall-0.05": 0.898907, - "recall-0.1": 1.0, - "recall-0.15": 1.0, - "recall-0.25": 1.0, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.028751, + "median": 0.026506, + "std": 0.015338, + "sem": 0.000568, + "min": 0.001462, + "max": 0.133708, + "recall-0.025": 0.442623, + "recall-0.05": 0.911202, + "recall-0.1": 0.991803, + "recall-0.15": 0.995902, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "knee_left": { - "count": 732, - "mean": 0.021199, - "median": 0.019277, - "std": 0.01541, - "sem": 0.00057, - "min": 0.000803, - "max": 0.334263, - "recall-0.025": 0.702186, - "recall-0.05": 0.990437, - "recall-0.1": 0.998634, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.019508, + "median": 0.018288, + "std": 0.00943, + "sem": 0.00035, + "min": 0.000645, + "max": 0.068412, + "recall-0.025": 0.765027, + "recall-0.05": 0.983607, + "recall-0.1": 0.995902, + "recall-0.15": 0.995902, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "knee_right": { - "count": 732, - "mean": 0.023314, - "median": 0.021081, - "std": 0.016511, - "sem": 0.000611, - "min": 0.00306, - "max": 0.347232, - "recall-0.025": 0.659836, - "recall-0.05": 0.97541, - "recall-0.1": 0.997268, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.022728, + "median": 0.020933, + "std": 0.011838, + "sem": 0.000439, + "min": 0.00155, + "max": 0.090798, + "recall-0.025": 0.657104, + "recall-0.05": 0.967213, + "recall-0.1": 0.995902, + "recall-0.15": 0.995902, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "ankle_left": { - "count": 732, - "mean": 0.023962, - "median": 0.022298, - "std": 0.015913, - "sem": 0.000589, - "min": 0.002368, - "max": 0.302707, - "recall-0.025": 0.612022, - "recall-0.05": 0.960383, - "recall-0.1": 0.998634, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.022895, + "median": 0.021701, + "std": 0.010938, + "sem": 0.000405, + "min": 0.00099, + "max": 0.093058, + "recall-0.025": 0.631148, + "recall-0.05": 0.971311, + "recall-0.1": 0.995902, + "recall-0.15": 0.995902, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "ankle_right": { - "count": 732, - "mean": 0.025075, - "median": 0.023379, - "std": 0.016629, - "sem": 0.000615, - "min": 0.002006, - "max": 0.342523, - "recall-0.025": 0.569672, - "recall-0.05": 0.976776, - "recall-0.1": 0.998634, - "recall-0.15": 0.998634, - "recall-0.25": 0.998634, - "recall-0.5": 1.0, + "count": 729, + "mean": 0.02517, + "median": 0.023714, + "std": 0.011932, + "sem": 0.000442, + "min": 0.002256, + "max": 0.108244, + "recall-0.025": 0.543716, + "recall-0.05": 0.961749, + "recall-0.1": 0.994536, + "recall-0.15": 0.995902, + "recall-0.25": 0.995902, + "recall-0.5": 0.995902, "num_labels": 732 }, "joint_recalls": { "num_labels": 9516, - "recall-0.025": 0.48991, - "recall-0.05": 0.91719, - "recall-0.1": 0.99643, - "recall-0.15": 0.99874, - "recall-0.25": 0.99884, - "recall-0.5": 0.99958 + "recall-0.025": 0.54214, + "recall-0.05": 0.91015, + "recall-0.1": 0.98749, + "recall-0.15": 0.99433, + "recall-0.25": 0.9959, + "recall-0.5": 0.9959 } } { "total_parts": 10248, - "correct_parts": 10233, - "pcp": 0.998536 + "correct_parts": 10194, + "pcp": 0.994731 } ``` @@ -6808,58 +6808,58 @@ Results of the model in various experiments on different datasets. ```json { - "avg_time_2d": 0.2932176687564069, - "avg_time_3d": 0.0019383890586986876, - "avg_fps": 3.3880382039335584 + "avg_time_2d": 0.06536485716613413, + "avg_time_3d": 0.0028059914795278804, + "avg_fps": 14.669026715477658 } { "person_nums": { "total_frames": 181, "total_labels": 362, - "total_preds": 362, + "total_preds": 363, "considered_empty": 0, "valid_preds": 362, - "invalid_preds": 0, + "invalid_preds": 1, "missing": 0, - "invalid_fraction": 0.0, - "precision": 1.0, + "invalid_fraction": 0.00275, + "precision": 0.99725, "recall": 1.0, - "f1": 1.0, - "non_empty": 362 + "f1": 0.99862, + "non_empty": 363 }, "mpjpe": { "count": 362, - "mean": 0.024745, - "median": 0.023305, - "std": 0.009389, - "sem": 0.000494, - "min": 0.014023, - "max": 0.12971, - "recall-0.025": 0.596685, - "recall-0.05": 0.986188, + "mean": 0.02557, + "median": 0.023909, + "std": 0.009532, + "sem": 0.000502, + "min": 0.014679, + "max": 0.132619, + "recall-0.025": 0.616022, + "recall-0.05": 0.977901, "recall-0.1": 0.997238, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 362, - "ap-0.025": 0.435297, - "ap-0.05": 0.985784, - "ap-0.1": 0.997131, + "ap-0.025": 0.455631, + "ap-0.05": 0.975503, + "ap-0.1": 0.997184, "ap-0.15": 1.0, "ap-0.25": 1.0, "ap-0.5": 1.0 }, "head": { "count": 362, - "mean": 0.037086, - "median": 0.036911, - "std": 0.014716, - "sem": 0.000775, - "min": 0.002558, - "max": 0.124266, - "recall-0.025": 0.190608, + "mean": 0.038431, + "median": 0.038664, + "std": 0.014854, + "sem": 0.000782, + "min": 0.005309, + "max": 0.137357, + "recall-0.025": 0.168508, "recall-0.05": 0.831492, - "recall-0.1": 0.991713, + "recall-0.1": 0.997238, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -6867,31 +6867,31 @@ Results of the model in various experiments on different datasets. }, "shoulder_left": { "count": 362, - "mean": 0.024422, - "median": 0.021056, - "std": 0.016043, - "sem": 0.000844, - "min": 0.002657, - "max": 0.179202, - "recall-0.025": 0.60221, - "recall-0.05": 0.958564, + "mean": 0.026983, + "median": 0.02263, + "std": 0.015206, + "sem": 0.0008, + "min": 0.00387, + "max": 0.126938, + "recall-0.025": 0.58011, + "recall-0.05": 0.917127, "recall-0.1": 0.997238, - "recall-0.15": 0.997238, + "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 362 }, "shoulder_right": { "count": 362, - "mean": 0.03084, - "median": 0.028919, - "std": 0.01456, - "sem": 0.000766, - "min": 0.005008, - "max": 0.118173, - "recall-0.025": 0.38674, - "recall-0.05": 0.900552, - "recall-0.1": 0.997238, + "mean": 0.034054, + "median": 0.031876, + "std": 0.015398, + "sem": 0.00081, + "min": 0.004182, + "max": 0.141592, + "recall-0.025": 0.273481, + "recall-0.05": 0.881215, + "recall-0.1": 0.994475, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -6899,46 +6899,46 @@ Results of the model in various experiments on different datasets. }, "elbow_left": { "count": 362, - "mean": 0.019183, - "median": 0.015594, - "std": 0.013393, - "sem": 0.000705, - "min": 0.001936, - "max": 0.11991, - "recall-0.025": 0.779006, - "recall-0.05": 0.966851, + "mean": 0.020792, + "median": 0.016891, + "std": 0.015473, + "sem": 0.000814, + "min": 0.002343, + "max": 0.158617, + "recall-0.025": 0.751381, + "recall-0.05": 0.953039, "recall-0.1": 0.997238, - "recall-0.15": 1.0, + "recall-0.15": 0.997238, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 362 }, "elbow_right": { "count": 362, - "mean": 0.02159, - "median": 0.018434, - "std": 0.015822, - "sem": 0.000833, - "min": 0.002886, - "max": 0.154583, - "recall-0.025": 0.696133, - "recall-0.05": 0.972376, + "mean": 0.024753, + "median": 0.020827, + "std": 0.020256, + "sem": 0.001066, + "min": 0.002198, + "max": 0.20548, + "recall-0.025": 0.643646, + "recall-0.05": 0.950276, "recall-0.1": 0.986188, - "recall-0.15": 0.997238, + "recall-0.15": 0.994475, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 362 }, "wrist_left": { "count": 362, - "mean": 0.024056, - "median": 0.021277, - "std": 0.014391, - "sem": 0.000757, - "min": 0.003255, - "max": 0.125608, + "mean": 0.024482, + "median": 0.021036, + "std": 0.015509, + "sem": 0.000816, + "min": 0.002882, + "max": 0.120801, "recall-0.025": 0.60221, - "recall-0.05": 0.953039, + "recall-0.05": 0.941989, "recall-0.1": 0.997238, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -6947,47 +6947,47 @@ Results of the model in various experiments on different datasets. }, "wrist_right": { "count": 362, - "mean": 0.028766, - "median": 0.022499, - "std": 0.029596, - "sem": 0.001558, - "min": 0.000277, - "max": 0.317639, - "recall-0.025": 0.58011, - "recall-0.05": 0.883978, + "mean": 0.030548, + "median": 0.023797, + "std": 0.027228, + "sem": 0.001433, + "min": 0.003852, + "max": 0.29761, + "recall-0.025": 0.530387, + "recall-0.05": 0.867403, "recall-0.1": 0.986188, - "recall-0.15": 0.98895, + "recall-0.15": 0.994475, "recall-0.25": 0.994475, "recall-0.5": 1.0, "num_labels": 362 }, "hip_left": { "count": 362, - "mean": 0.025963, - "median": 0.023332, - "std": 0.014302, - "sem": 0.000753, - "min": 0.004395, - "max": 0.105448, - "recall-0.025": 0.549724, - "recall-0.05": 0.953039, + "mean": 0.026399, + "median": 0.023668, + "std": 0.015079, + "sem": 0.000794, + "min": 0.002841, + "max": 0.157722, + "recall-0.025": 0.524862, + "recall-0.05": 0.955801, "recall-0.1": 0.997238, - "recall-0.15": 1.0, + "recall-0.15": 0.997238, "recall-0.25": 1.0, "recall-0.5": 1.0, "num_labels": 362 }, "hip_right": { "count": 362, - "mean": 0.027134, - "median": 0.023912, - "std": 0.01681, - "sem": 0.000885, - "min": 0.001672, - "max": 0.201707, - "recall-0.025": 0.530387, - "recall-0.05": 0.947514, - "recall-0.1": 0.994475, + "mean": 0.028132, + "median": 0.026271, + "std": 0.0167, + "sem": 0.000879, + "min": 0.00458, + "max": 0.218165, + "recall-0.025": 0.453039, + "recall-0.05": 0.925414, + "recall-0.1": 0.997238, "recall-0.15": 0.997238, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -6995,14 +6995,14 @@ Results of the model in various experiments on different datasets. }, "knee_left": { "count": 362, - "mean": 0.014274, - "median": 0.012875, - "std": 0.007508, + "mean": 0.014559, + "median": 0.012687, + "std": 0.007514, "sem": 0.000395, - "min": 0.00133, - "max": 0.047529, - "recall-0.025": 0.91989, - "recall-0.05": 1.0, + "min": 0.000961, + "max": 0.053512, + "recall-0.025": 0.89779, + "recall-0.05": 0.997238, "recall-0.1": 1.0, "recall-0.15": 1.0, "recall-0.25": 1.0, @@ -7011,14 +7011,14 @@ Results of the model in various experiments on different datasets. }, "knee_right": { "count": 362, - "mean": 0.015968, - "median": 0.013439, - "std": 0.016304, - "sem": 0.000858, - "min": 0.002223, - "max": 0.259639, - "recall-0.025": 0.88674, - "recall-0.05": 0.986188, + "mean": 0.016592, + "median": 0.014442, + "std": 0.01703, + "sem": 0.000896, + "min": 0.001216, + "max": 0.268298, + "recall-0.025": 0.892265, + "recall-0.05": 0.991713, "recall-0.1": 0.994475, "recall-0.15": 0.997238, "recall-0.25": 0.997238, @@ -7027,15 +7027,15 @@ Results of the model in various experiments on different datasets. }, "ankle_left": { "count": 362, - "mean": 0.027315, - "median": 0.025773, - "std": 0.010493, - "sem": 0.000552, - "min": 0.006963, - "max": 0.072852, - "recall-0.025": 0.444751, - "recall-0.05": 0.961326, - "recall-0.1": 1.0, + "mean": 0.0244, + "median": 0.022601, + "std": 0.011282, + "sem": 0.000594, + "min": 0.004563, + "max": 0.104821, + "recall-0.025": 0.604972, + "recall-0.05": 0.969613, + "recall-0.1": 0.997238, "recall-0.15": 1.0, "recall-0.25": 1.0, "recall-0.5": 1.0, @@ -7043,26 +7043,26 @@ Results of the model in various experiments on different datasets. }, "ankle_right": { "count": 362, - "mean": 0.025082, - "median": 0.021269, - "std": 0.02841, - "sem": 0.001495, - "min": 0.001559, - "max": 0.492964, - "recall-0.025": 0.624309, - "recall-0.05": 0.964088, + "mean": 0.022289, + "median": 0.018322, + "std": 0.027775, + "sem": 0.001462, + "min": 0.000525, + "max": 0.481108, + "recall-0.025": 0.726519, + "recall-0.05": 0.961326, "recall-0.1": 0.994475, - "recall-0.15": 0.994475, + "recall-0.15": 0.997238, "recall-0.25": 0.997238, "recall-0.5": 1.0, "num_labels": 362 }, "joint_recalls": { "num_labels": 4706, - "recall-0.025": 0.59839, - "recall-0.05": 0.94411, - "recall-0.1": 0.99426, - "recall-0.15": 0.99745, + "recall-0.025": 0.5867, + "recall-0.05": 0.93264, + "recall-0.1": 0.99448, + "recall-0.15": 0.99766, "recall-0.25": 0.99894, "recall-0.5": 1.0 } diff --git a/rpt/interface.cpp b/rpt/interface.cpp index 83036d4..67b871f 100644 --- a/rpt/interface.cpp +++ b/rpt/interface.cpp @@ -4,9 +4,9 @@ // ================================================================================================= // ================================================================================================= -Triangulator::Triangulator(float min_score, size_t min_group_size) +Triangulator::Triangulator(float min_match_score, size_t min_group_size) { - this->triangulator = new TriangulatorInternal(min_score, min_group_size); + this->triangulator = new TriangulatorInternal(min_match_score, min_group_size); } // ================================================================================================= diff --git a/rpt/interface.hpp b/rpt/interface.hpp index 1020e0c..2e6195e 100644 --- a/rpt/interface.hpp +++ b/rpt/interface.hpp @@ -19,11 +19,11 @@ public: * Triangulator to predict poses from multiple views. * * - * @param min_score Minimum score to consider a triangulated joint as valid. + * @param min_match_score Minimum score to consider a triangulated joint as valid. * @param min_group_size Minimum number of camera pairs that need to see a person. */ Triangulator( - float min_score = 0.95, + float min_match_score = 0.95, size_t min_group_size = 1); /** diff --git a/rpt/triangulator.cpp b/rpt/triangulator.cpp index 28e4618..f0d1662 100644 --- a/rpt/triangulator.cpp +++ b/rpt/triangulator.cpp @@ -102,9 +102,9 @@ void CameraInternal::update_projection_matrix() // ================================================================================================= // ================================================================================================= -TriangulatorInternal::TriangulatorInternal(float min_score, size_t min_group_size) +TriangulatorInternal::TriangulatorInternal(float min_match_score, size_t min_group_size) { - this->min_score = min_score; + this->min_match_score = min_match_score; this->min_group_size = min_group_size; } @@ -241,7 +241,7 @@ std::vector>> TriangulatorInternal::triangulate stime = std::chrono::high_resolution_clock::now(); // Check matches to old poses - float threshold = min_score - 0.2; + float threshold = min_match_score - 0.2; std::map>> scored_pasts; if (!last_poses_3d.empty()) { @@ -447,7 +447,7 @@ std::vector>> TriangulatorInternal::triangulate size_t num_poses = all_scored_poses.size(); for (size_t i = num_poses; i > 0; --i) { - if (all_scored_poses[i - 1].second < min_score) + if (all_scored_poses[i - 1].second < min_match_score) { all_scored_poses.erase(all_scored_poses.begin() + i - 1); all_pairs.erase(all_pairs.begin() + i - 1); @@ -456,7 +456,7 @@ std::vector>> TriangulatorInternal::triangulate // Group pairs that share a person std::vector>> groups; - groups = calc_grouping(all_pairs, all_scored_poses, min_score); + groups = calc_grouping(all_pairs, all_scored_poses, min_match_score); // Drop groups with too few matches size_t num_groups = groups.size(); @@ -508,7 +508,7 @@ std::vector>> TriangulatorInternal::triangulate poses.push_back(all_full_poses[idx]); } - auto merged_pose = merge_group(poses, min_score); + auto merged_pose = merge_group(poses, min_match_score); all_merged_poses[i] = (merged_pose); } @@ -548,7 +548,7 @@ std::vector>> TriangulatorInternal::triangulate } pose.push_back(point); - if (point[3] > min_score) + if (point[3] > min_match_score) { num_valid++; } @@ -1860,7 +1860,7 @@ void TriangulatorInternal::add_missing_joints( for (size_t j = 0; j < num_joints; ++j) { float *pose_ptr = pose.ptr(j); - if (pose_ptr[3] > min_score) + if (pose_ptr[3] > min_match_score) { valid_joint_idx.push_back(j); } diff --git a/rpt/triangulator.hpp b/rpt/triangulator.hpp index 2d8fe90..8e5be45 100644 --- a/rpt/triangulator.hpp +++ b/rpt/triangulator.hpp @@ -31,7 +31,7 @@ public: class TriangulatorInternal { public: - TriangulatorInternal(float min_score, size_t min_group_size); + TriangulatorInternal(float min_match_score, size_t min_group_size); std::vector>> triangulate_poses( const std::vector>>> &poses_2d, @@ -43,7 +43,7 @@ public: void print_stats(); private: - float min_score; + float min_match_score; float min_group_size; const std::vector core_joints = { diff --git a/scripts/test_skelda_dataset.py b/scripts/test_skelda_dataset.py index ef7de25..126f8d1 100644 --- a/scripts/test_skelda_dataset.py +++ b/scripts/test_skelda_dataset.py @@ -17,8 +17,8 @@ import rpt # ================================================================================================== -# dataset_use = "panoptic" dataset_use = "human36m" +# dataset_use = "panoptic" # dataset_use = "mvor" # dataset_use = "shelf" # dataset_use = "campus" @@ -26,8 +26,47 @@ dataset_use = "human36m" # dataset_use = "chi3d" # dataset_use = "tsinghua" # dataset_use = "human36m_wb" -# dataset_use = "egohumans" +# dataset_use = "egohumans_tagging" +# dataset_use = "egohumans_legoassemble" +# dataset_use = "egohumans_fencing" +# dataset_use = "egohumans_basketball" +# dataset_use = "egohumans_volleyball" +# dataset_use = "egohumans_badminton" +# dataset_use = "egohumans_tennis" +# dataset_use = "ntu" +# dataset_use = "koarob" + + +# Describes the minimum area as fraction of the image size for a 2D bounding box to be considered +# If the persons are small in the image, use a lower value +default_min_bbox_area = 0.1 * 0.1 + +# Describes how confident a 2D bounding box needs to be to be considered +# If the persons are small in the image, or poorly recognizable, use a lower value +default_min_bbox_score = 0.3 + +# Describes how good two 2D poses need to match each other to create a valid triangulation +# If the quality of the 2D detections is poor, use a lower value +default_min_match_score = 0.94 + +# Describes the minimum number of camera pairs that need to detect the same person +# If the number of cameras is high, and the views are not occluded, use a higher value +default_min_group_size = 1 + +# Batch poses per image for faster processing +# If most of the time only one person is in a image, disable it, because it is slightly slower then +default_batch_poses = True + datasets = { + "human36m": { + "path": "/datasets/human36m/skelda/pose_test.json", + "take_interval": 5, + "min_match_score": 0.94, + "min_group_size": 1, + "min_bbox_score": 0.4, + "min_bbox_area": 0.1 * 0.1, + "batch_poses": False, + }, "panoptic": { "path": "/datasets/panoptic/skelda/test.json", "cams": ["00_03", "00_06", "00_12", "00_13", "00_23"], @@ -35,27 +74,33 @@ datasets = { # "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"], "take_interval": 3, "use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"], - }, - "human36m": { - "path": "/datasets/human36m/skelda/pose_test.json", - "take_interval": 5, + "min_group_size": 1, + # "min_group_size": 4, + "min_bbox_area": 0.05 * 0.05, }, "mvor": { "path": "/datasets/mvor/skelda/all.json", "take_interval": 1, "with_depth": False, + "min_match_score": 0.85, + "min_bbox_score": 0.25, }, "campus": { "path": "/datasets/campus/skelda/test.json", "take_interval": 1, + "min_bbox_score": 0.5, }, "shelf": { "path": "/datasets/shelf/skelda/test.json", "take_interval": 1, + "min_match_score": 0.96, + "min_group_size": 2, }, "ikeaasm": { "path": "/datasets/ikeaasm/skelda/test.json", "take_interval": 2, + "min_match_score": 0.92, + "min_bbox_score": 0.20, }, "chi3d": { "path": "/datasets/chi3d/skelda/all.json", @@ -64,21 +109,66 @@ datasets = { "tsinghua": { "path": "/datasets/tsinghua/skelda/test.json", "take_interval": 3, + "min_group_size": 2, }, "human36m_wb": { "path": "/datasets/human36m/skelda/wb/test.json", "take_interval": 100, + "min_bbox_score": 0.4, + "batch_poses": False, }, - "egohumans": { + "egohumans_tagging": { + "path": "/datasets/egohumans/skelda/all.json", + "take_interval": 2, + "subset": "tagging", + "min_group_size": 2, + "min_bbox_score": 0.25, + "min_bbox_area": 0.05 * 0.05, + }, + "egohumans_legoassemble": { "path": "/datasets/egohumans/skelda/all.json", "take_interval": 2, - # "subset": "tagging", "subset": "legoassemble", - # "subset": "fencing", - # "subset": "basketball", - # "subset": "volleyball", - # "subset": "badminton", - # "subset": "tennis", + "min_group_size": 2, + }, + "egohumans_fencing": { + "path": "/datasets/egohumans/skelda/all.json", + "take_interval": 2, + "subset": "fencing", + "min_group_size": 7, + "min_bbox_score": 0.5, + "min_bbox_area": 0.05 * 0.05, + }, + "egohumans_basketball": { + "path": "/datasets/egohumans/skelda/all.json", + "take_interval": 2, + "subset": "basketball", + "min_group_size": 7, + "min_bbox_score": 0.25, + "min_bbox_area": 0.025 * 0.025, + }, + "egohumans_volleyball": { + "path": "/datasets/egohumans/skelda/all.json", + "take_interval": 2, + "subset": "volleyball", + "min_group_size": 11, + "min_bbox_score": 0.25, + "min_bbox_area": 0.05 * 0.05, + }, + "egohumans_badminton": { + "path": "/datasets/egohumans/skelda/all.json", + "take_interval": 2, + "subset": "badminton", + "min_group_size": 7, + "min_bbox_score": 0.25, + "min_bbox_area": 0.05 * 0.05, + }, + "egohumans_tennis": { + "path": "/datasets/egohumans/skelda/all.json", + "take_interval": 2, + "subset": "tennis", + "min_group_size": 11, + "min_bbox_area": 0.025 * 0.025, }, } @@ -99,11 +189,15 @@ eval_joints = [ "ankle_left", "ankle_right", ] -if dataset_use in ["human36m", "panoptic"]: +if dataset_use == "human36m": eval_joints[eval_joints.index("head")] = "nose" -if dataset_use.endswith("_wb"): - # eval_joints[eval_joints.index("head")] = "nose" - eval_joints = list(joint_names_2d) +if dataset_use == "panoptic": + eval_joints[eval_joints.index("head")] = "nose" +if dataset_use == "human36m_wb": + if any((test_triangulate.whole_body.values())): + eval_joints = list(joint_names_2d) + else: + eval_joints[eval_joints.index("head")] = "nose" # output_dir = "/RapidPoseTriangulation/data/testoutput/" output_dir = "" @@ -191,11 +285,11 @@ def load_labels(dataset: dict): elif "human36m_wb" in dataset: labels = load_json(dataset["human36m_wb"]["path"]) - elif "egohumans" in dataset: - labels = load_json(dataset["egohumans"]["path"]) + elif any(("egohumans" in key for key in dataset)): + labels = load_json(dataset[dataset_use]["path"]) labels = [lb for lb in labels if "test" in lb["splits"]] - labels = [lb for lb in labels if dataset["egohumans"]["subset"] in lb["seq"]] - if dataset["egohumans"]["subset"] in ["volleyball", "tennis"]: + labels = [lb for lb in labels if dataset[dataset_use]["subset"] in lb["seq"]] + if dataset[dataset_use]["subset"] in ["volleyball", "tennis"]: labels = [lb for i, lb in enumerate(labels) if i % 150 < 60] else: @@ -216,11 +310,21 @@ def load_labels(dataset: dict): def main(): global joint_names_3d, eval_joints + # Load dataset specific parameters + min_match_score = datasets[dataset_use].get( + "min_match_score", default_min_match_score + ) + min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size) + min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score) + min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area) + batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses) + + # Load 2D pose model whole_body = test_triangulate.whole_body if any((whole_body[k] for k in whole_body)): kpt_model = utils_2d_pose.load_wb_model() else: - kpt_model = utils_2d_pose.load_model() + kpt_model = utils_2d_pose.load_model(min_bbox_score, min_bbox_area, batch_poses) # Manually set matplotlib backend try: @@ -239,68 +343,19 @@ def main(): # Print a dataset sample for debugging print(labels[0]) - minscores = { - # Describes how good two 2D poses need to match each other to create a valid triangulation - # If the quality of the 2D detections is poor, use a lower value - "panoptic": 0.94, - "human36m": 0.94, - "mvor": 0.86, - "campus": 0.96, - "shelf": 0.96, - "ikeaasm": 0.89, - "chi3d": 0.94, - "tsinghua": 0.96, - "egohumans": 0.95, - "human36m_wb": 0.94, - } - minscore = minscores.get(dataset_use, 0.95) - min_group_sizes = { - # Describes the minimum number of camera pairs that need to detect the same person - # If the number of cameras is high, and the views are not occluded, use a higher value - "panoptic": 1, - "shelf": 2, - "chi3d": 1, - "tsinghua": 2, - "egohumans": 4, - } - min_group_size = min_group_sizes.get(dataset_use, 1) - if dataset_use == "panoptic" and len(datasets["panoptic"]["cams"]) == 10: - min_group_size = 4 - if dataset_use == "egohumans" and ( - "lego" in labels[0]["seq"] or "tagging" in labels[0]["seq"] - ): - min_group_size = 2 - if dataset_use == "egohumans" and ( - "volleyball" in labels[0]["seq"] or "badminton" in labels[0]["seq"] - ): - min_group_size = 7 - if dataset_use == "egohumans" and "tennis" in labels[0]["seq"]: - min_group_size = 11 - - print("\nRunning predictions ...") - all_poses = [] - all_ids = [] + print("\nCalculating 2D predictions ...") + all_poses_2d = [] times = [] - triangulator = rpt.Triangulator(min_score=minscore, min_group_size=min_group_size) - old_scene = "" - old_index = -1 for label in tqdm.tqdm(labels): images_2d = [] - if old_scene != label.get("scene", "") or ( - old_index + datasets[dataset_use]["take_interval"] < label["index"] - ): - # Reset last poses if scene changes - old_scene = label.get("scene", "") - triangulator.reset() - try: start = time.time() for i in range(len(label["imgpaths"])): imgpath = label["imgpaths"][i] img = test_triangulate.load_image(imgpath) images_2d.append(img) - print("IMG time:", time.time() - start) + time_imgs = time.time() - start except cv2.error: print("One of the paths not found:", label["imgpaths"]) continue @@ -322,7 +377,28 @@ def main(): poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d) poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d) time_2d = time.time() - start - print("2D time:", time_2d) + + all_poses_2d.append(poses_2d) + times.append([time_imgs, time_2d, 0]) + + print("\nCalculating 3D predictions ...") + all_poses_3d = [] + all_ids = [] + triangulator = rpt.Triangulator( + min_match_score=min_match_score, min_group_size=min_group_size + ) + old_scene = "" + old_index = -1 + for i in tqdm.tqdm(range(len(labels))): + label = labels[i] + poses_2d = all_poses_2d[i] + + if old_scene != label.get("scene", "") or ( + old_index + datasets[dataset_use]["take_interval"] < label["index"] + ): + # Reset last poses if scene changes + old_scene = label.get("scene", "") + triangulator.reset() start = time.time() if sum(np.sum(p) for p in poses_2d) == 0: @@ -333,14 +409,12 @@ def main(): poses3D = triangulator.triangulate_poses( poses_2d, rpt_cameras, roomparams, joint_names_2d ) - time_3d = time.time() - start - print("3D time:", time_3d) old_index = label["index"] - all_poses.append(np.array(poses3D).tolist()) + all_poses_3d.append(np.array(poses3D).tolist()) all_ids.append(label["id"]) - times.append((time_2d, time_3d)) + times[i][2] = time_3d # Print per-step triangulation timings print("") @@ -349,9 +423,11 @@ def main(): warmup_iters = 10 if len(times) > warmup_iters: times = times[warmup_iters:] - avg_time_2d = np.mean([t[0] for t in times]) - avg_time_3d = np.mean([t[1] for t in times]) + avg_time_im = np.mean([t[0] for t in times]) + avg_time_2d = np.mean([t[1] for t in times]) + avg_time_3d = np.mean([t[2] for t in times]) tstats = { + "img_loading": avg_time_im, "avg_time_2d": avg_time_2d, "avg_time_3d": avg_time_3d, "avg_fps": 1.0 / (avg_time_2d + avg_time_3d), @@ -361,7 +437,7 @@ def main(): _ = evals.mpjpe.run_eval( labels, - all_poses, + all_poses_3d, all_ids, joint_names_net=joint_names_3d, joint_names_use=eval_joints, @@ -369,7 +445,7 @@ def main(): ) _ = evals.pcp.run_eval( labels, - all_poses, + all_poses_3d, all_ids, joint_names_net=joint_names_3d, joint_names_use=eval_joints, diff --git a/scripts/test_triangulate.py b/scripts/test_triangulate.py index 0844130..831a7e6 100644 --- a/scripts/test_triangulate.py +++ b/scripts/test_triangulate.py @@ -220,7 +220,7 @@ def update_sample(sample, new_dir=""): def load_image(path: str): image = cv2.imread(path, 3) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - image = np.array(image, dtype=np.float32) + image = np.asarray(image, dtype=np.uint8) return image @@ -283,7 +283,7 @@ def main(): if any((whole_body[k] for k in whole_body)): kpt_model = utils_2d_pose.load_wb_model() else: - kpt_model = utils_2d_pose.load_model() + kpt_model = utils_2d_pose.load_model(min_bbox_score=0.3) # Manually set matplotlib backend matplotlib.use("TkAgg") @@ -340,7 +340,7 @@ def main(): else: cameras = rpt.convert_cameras(camparams) roomp = [roomparams["room_size"], roomparams["room_center"]] - triangulator = rpt.Triangulator(min_score=0.95) + triangulator = rpt.Triangulator(min_match_score=0.94) stime = time.time() poses_3d = triangulator.triangulate_poses( diff --git a/scripts/utils_2d_pose.py b/scripts/utils_2d_pose.py index 6f7edd4..0117870 100644 --- a/scripts/utils_2d_pose.py +++ b/scripts/utils_2d_pose.py @@ -1,42 +1,498 @@ +import math import os +from abc import ABC, abstractmethod +from typing import List +import cv2 import numpy as np -from mmpose.apis import MMPoseInferencer - -# ================================================================================================== - -filepath = os.path.dirname(os.path.realpath(__file__)) + "/" +import onnxruntime as ort +from tqdm import tqdm # ================================================================================================== -def load_model(): - print("Loading mmpose model ...") +class BaseModel(ABC): + def __init__(self, model_path: str, warmup: int): + self.model_path = model_path + self.runtime = "" - model = MMPoseInferencer( - pose2d="/mmpose/projects/rtmpose/rtmpose/body_2d_keypoint/rtmpose-m_8xb256-420e_coco-384x288.py", - pose2d_weights="https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth", - det_model="/mmpose/projects/rtmpose/rtmdet/person/rtmdet_nano_320-8xb32_coco-person.py", - det_weights="https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth", - det_cat_ids=[0], + if not os.path.exists(model_path): + raise FileNotFoundError("File not found:", model_path) + + if model_path.endswith(".onnx"): + self.init_onnxruntime(model_path) + self.runtime = "ort" + else: + raise ValueError("Unsupported model format:", model_path) + + if warmup > 0: + print("Running warmup for '{}' ...".format(self.__class__.__name__)) + self.warmup(warmup // 2) + self.warmup(warmup // 2) + + def init_onnxruntime(self, model_path): + usetrt = True + usegpu = True + + self.opt = ort.SessionOptions() + providers = ort.get_available_providers() + # ort.set_default_logger_severity(1) + + self.providers = [] + if usetrt and "TensorrtExecutionProvider" in providers: + self.providers.append( + ( + "TensorrtExecutionProvider", + { + "trt_engine_cache_enable": True, + "trt_engine_cache_path": "/RapidPoseTriangulation/data/trt_cache/", + }, + ) + ) + elif usegpu and "CUDAExecutionProvider" in providers: + self.providers.append("CUDAExecutionProvider") + else: + self.providers.append("CPUExecutionProvider") + print("Using providers:", self.providers) + + self.session = ort.InferenceSession( + model_path, providers=self.providers, sess_options=self.opt + ) + + self.input_names = [input.name for input in self.session.get_inputs()] + self.input_shapes = [input.shape for input in self.session.get_inputs()] + + input_types = [input.type for input in self.session.get_inputs()] + self.input_types = [] + for i in range(len(input_types)): + input_type = input_types[i] + if input_type == "tensor(float32)": + itype = np.float32 + elif input_type == "tensor(float16)": + itype = np.float16 + elif input_type == "tensor(int32)": + itype = np.int32 + elif input_type == "tensor(uint8)": + itype = np.uint8 + else: + raise ValueError("Undefined input type:", input_type) + self.input_types.append(itype) + + @abstractmethod + def preprocess(self, **kwargs): + pass + + @abstractmethod + def postprocess(self, **kwargs): + pass + + def warmup(self, epoch: int): + np.random.seed(42) + + for _ in tqdm(range(epoch)): + inputs = {} + for i in range(len(self.input_names)): + iname = self.input_names[i] + + if "image" in iname: + ishape = list(self.input_shapes[i]) + if "batch_size" in ishape: + max_batch_size = 10 + ishape[0] = np.random.choice(list(range(1, max_batch_size + 1))) + tensor = np.random.random(ishape) + tensor = tensor * 255 + else: + raise ValueError("Undefined input type:", iname) + + tensor = tensor.astype(self.input_types[i]) + inputs[iname] = tensor + + self.call_model_ort(list(inputs.values())) + + def call_model_ort(self, tensor): + inputs = {} + for i in range(len(self.input_names)): + iname = self.input_names[i] + inputs[iname] = tensor[i] + result = self.session.run(None, inputs) + return result + + def __call__(self, **kwargs): + tensor = self.preprocess(**kwargs) + result = self.call_model_ort(tensor) + output = self.postprocess(result=result, **kwargs) + return output + + +# ================================================================================================== + + +class LetterBox: + def __init__(self, target_size, fill_value=0): + self.target_size = target_size + self.fill_value = fill_value + + def calc_params(self, ishape): + img_h, img_w = ishape[:2] + target_h, target_w = self.target_size + + scale = min(target_w / img_w, target_h / img_h) + new_w = round(img_w * scale) + new_h = round(img_h * scale) + + pad_w = target_w - new_w + pad_h = target_h - new_h + pad_left = pad_w // 2 + pad_top = pad_h // 2 + pad_right = pad_w - pad_left + pad_bottom = pad_h - pad_top + paddings = (pad_left, pad_right, pad_top, pad_bottom) + + return paddings, scale, (new_w, new_h) + + def resize_image(self, image): + paddings, _, new_size = self.calc_params(image.shape) + + # Resize the image + new_w, new_h = new_size + resized_img = cv2.resize( + image, + (new_w, new_h), + interpolation=cv2.INTER_NEAREST, + ) + + # Optionally pad the image + pad_left, pad_right, pad_top, pad_bottom = paddings + if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0: + final_img = resized_img + else: + final_img = cv2.copyMakeBorder( + resized_img, + pad_top, + pad_bottom, + pad_left, + pad_right, + borderType=cv2.BORDER_CONSTANT, + value=[self.fill_value, self.fill_value, self.fill_value], + ) + + return final_img + + +# ================================================================================================== + + +class BoxCrop: + def __init__(self, target_size, padding_scale=1.0, fill_value=0): + self.target_size = target_size + self.padding_scale = padding_scale + self.fill_value = fill_value + + def calc_params(self, ishape, bbox): + start_x, start_y, end_x, end_y = bbox[0], bbox[1], bbox[2], bbox[3] + target_h, target_w = self.target_size + + # Calculate original bounding box center + center_x = (start_x + end_x) / 2.0 + center_y = (start_y + end_y) / 2.0 + + # Scale the bounding box by the padding_scale + bbox_w = end_x - start_x + bbox_h = end_y - start_y + scaled_w = bbox_w * self.padding_scale + scaled_h = bbox_h * self.padding_scale + + # Calculate the aspect ratios + bbox_aspect = scaled_w / scaled_h + target_aspect = target_w / target_h + + # Adjust the scaled bounding box to match the target aspect ratio + if bbox_aspect > target_aspect: + adjusted_h = scaled_w / target_aspect + adjusted_w = scaled_w + else: + adjusted_w = scaled_h * target_aspect + adjusted_h = scaled_h + + # Calculate scaled bounding box coordinates + bbox_w = adjusted_w + bbox_h = adjusted_h + new_start_x = center_x - bbox_w / 2.0 + new_start_y = center_y - bbox_h / 2.0 + new_end_x = center_x + bbox_w / 2.0 + new_end_y = center_y + bbox_h / 2.0 + + # Round the box coordinates + start_x = int(math.floor(new_start_x)) + start_y = int(math.floor(new_start_y)) + end_x = int(math.ceil(new_end_x)) + end_y = int(math.ceil(new_end_y)) + + # Define the new box coordinates + new_start_x = max(0, start_x) + new_start_y = max(0, start_y) + new_end_x = min(ishape[1] - 1, end_x) + new_end_y = min(ishape[0] - 1, end_y) + new_box = [new_start_x, new_start_y, new_end_x, new_end_y] + + # Calculate resized crop size + bbox_w = new_box[2] - new_box[0] + bbox_h = new_box[3] - new_box[1] + scale = min(target_w / bbox_w, target_h / bbox_h) + new_w = round(bbox_w * scale) + new_h = round(bbox_h * scale) + + # Calculate paddings + pad_w = target_w - new_w + pad_h = target_h - new_h + pad_left, pad_right, pad_top, pad_bottom = 0, 0, 0, 0 + if pad_w > 0: + if start_x < 0: + pad_left = pad_w + pad_right = 0 + elif end_x > ishape[1]: + pad_left = 0 + pad_right = pad_w + else: + # Can be caused by bbox rounding + pad_left = pad_w // 2 + pad_right = pad_w - pad_left + if pad_h > 0: + if start_y < 0: + pad_top = pad_h + pad_bottom = 0 + elif end_y > ishape[0]: + pad_top = 0 + pad_bottom = pad_h + else: + # Can be caused by bbox rounding + pad_top = pad_h // 2 + pad_bottom = pad_h - pad_top + paddings = (pad_left, pad_right, pad_top, pad_bottom) + + return paddings, scale, new_box, (new_w, new_h) + + def crop_resize_box(self, image, bbox): + paddings, _, new_box, new_size = self.calc_params(image.shape, bbox) + + # Extract the bounding box + cropped_img = image[new_box[1] : new_box[3], new_box[0] : new_box[2]] + + # Resize the image + new_w, new_h = new_size + resized_img = cv2.resize( + cropped_img, + (new_w, new_h), + interpolation=cv2.INTER_NEAREST, + ) + + # Optionally pad the image + pad_left, pad_right, pad_top, pad_bottom = paddings + if pad_left == 0 and pad_right == 0 and pad_top == 0 and pad_bottom == 0: + final_img = resized_img + else: + final_img = cv2.copyMakeBorder( + resized_img, + pad_top, + pad_bottom, + pad_left, + pad_right, + borderType=cv2.BORDER_CONSTANT, + value=[self.fill_value, self.fill_value, self.fill_value], + ) + + return final_img + + +# ================================================================================================== + + +class RTMDet(BaseModel): + def __init__( + self, + model_path: str, + conf_threshold: float, + min_area_fraction: float, + warmup: int = 30, + ): + super(RTMDet, self).__init__(model_path, warmup) + self.target_size = (320, 320) + self.conf_threshold = conf_threshold + self.letterbox = LetterBox(self.target_size, fill_value=114) + + img_area = self.target_size[0] * self.target_size[1] + self.min_area = img_area * min_area_fraction + + def preprocess(self, image: np.ndarray): + image = self.letterbox.resize_image(image) + tensor = np.asarray(image).astype(self.input_types[0], copy=False) + tensor = np.expand_dims(tensor, axis=0) + tensor = [tensor] + return tensor + + def postprocess(self, result: List[np.ndarray], image: np.ndarray): + boxes = np.squeeze(result[0], axis=0) + classes = np.squeeze(result[1], axis=0) + + human_class = classes[:] == 0 + boxes = boxes[human_class] + + keep = boxes[:, 4] > self.conf_threshold + boxes = boxes[keep] + + if len(boxes) == 0: + return np.array([]) + + # Drop boxes with too small area + boxes = boxes.astype(np.float32) + areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + keep = areas >= self.min_area + boxes = boxes[keep] + + if len(boxes) == 0: + return np.array([]) + + paddings, scale, _ = self.letterbox.calc_params(image.shape) + + boxes[:, 0] -= paddings[0] + boxes[:, 2] -= paddings[0] + boxes[:, 1] -= paddings[2] + boxes[:, 3] -= paddings[2] + + boxes = np.maximum(boxes, 0) + th, tw = self.target_size + pad_w = paddings[0] + paddings[1] + pad_h = paddings[2] + paddings[3] + max_w = tw - pad_w - 1 + max_h = th - pad_h - 1 + boxes[:, 0] = np.minimum(boxes[:, 0], max_w) + boxes[:, 1] = np.minimum(boxes[:, 1], max_h) + boxes[:, 2] = np.minimum(boxes[:, 2], max_w) + boxes[:, 3] = np.minimum(boxes[:, 3], max_h) + + boxes[:, 0:4] /= scale + return boxes + + +# ================================================================================================== + + +class RTMPose(BaseModel): + def __init__(self, model_path: str, warmup: int = 30): + super(RTMPose, self).__init__(model_path, warmup) + self.target_size = (384, 288) + self.boxcrop = BoxCrop(self.target_size, padding_scale=1.25, fill_value=0) + + def preprocess(self, image: np.ndarray, bboxes: np.ndarray): + cutouts = [] + for i in range(len(bboxes)): + bbox = np.asarray(bboxes[i])[0:4] + bbox += np.array([-0.5, -0.5, 0.5 - 1e-8, 0.5 - 1e-8]) + bbox = bbox.round().astype(np.int32) + region = self.boxcrop.crop_resize_box(image, bbox) + tensor = np.asarray(region).astype(self.input_types[0], copy=False) + cutouts.append(tensor) + + if len(bboxes) == 1: + cutouts = np.expand_dims(cutouts[0], axis=0) + else: + cutouts = np.stack(cutouts, axis=0) + + tensor = [cutouts] + return tensor + + def postprocess( + self, result: List[np.ndarray], image: np.ndarray, bboxes: np.ndarray + ): + kpts = [] + for i in range(len(bboxes)): + scores = np.clip(result[1][i], 0, 1) + kp = np.concatenate( + [result[0][i], np.expand_dims(scores, axis=-1)], axis=-1 + ) + + paddings, scale, bbox, _ = self.boxcrop.calc_params(image.shape, bboxes[i]) + kp[:, 0] -= paddings[0] + kp[:, 1] -= paddings[2] + kp[:, 0:2] /= scale + kp[:, 0] += bbox[0] + kp[:, 1] += bbox[1] + kp[:, 0:2] = np.maximum(kp[:, 0:2], 0) + max_w = image.shape[1] - 1 + max_h = image.shape[0] - 1 + kp[:, 0] = np.minimum(kp[:, 0], max_w) + kp[:, 1] = np.minimum(kp[:, 1], max_h) + kpts.append(kp) + + return kpts + + +# ================================================================================================== + + +class TopDown: + def __init__( + self, + det_model_path: str, + pose_model_path: str, + box_conf_threshold: float, + box_min_area: float, + warmup: int = 30, + ): + self.batch_poses = bool("Bx" in pose_model_path) + + self.det_model = RTMDet( + det_model_path, box_conf_threshold, box_min_area, warmup + ) + self.pose_model = RTMPose(pose_model_path, warmup) + + def predict(self, image): + boxes = self.det_model(image=image) + if len(boxes) == 0: + return [] + + results = [] + if self.batch_poses: + results = self.pose_model(image=image, bboxes=boxes) + else: + for i in range(boxes.shape[0]): + kp = self.pose_model(image=image, bboxes=[boxes[i]]) + results.append(kp[0]) + + return results + + +# ================================================================================================== + + +def load_model(min_bbox_score=0.3, min_bbox_area=0.1 * 0.1, batch_poses=False): + print("Loading 2D model ...") + + model = TopDown( + "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx", + f"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-m_{'B' if batch_poses else '1'}x384x288x3_fp16_extra-steps.onnx", + box_conf_threshold=min_bbox_score, + box_min_area=min_bbox_area, + warmup=30, ) - print("Loaded mmpose model") + print("Loaded 2D model") return model -def load_wb_model(): - print("Loading mmpose whole body model ...") +def load_wb_model(min_bbox_score=0.3, min_bbox_area=0.1 * 0.1, batch_poses=False): + print("Loading 2D-WB model ...") - model = MMPoseInferencer( - pose2d="/mmpose/projects/rtmpose/rtmpose/wholebody_2d_keypoint/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py", - pose2d_weights="https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth", - det_model="/mmpose/projects/rtmpose/rtmdet/person/rtmdet_nano_320-8xb32_coco-person.py", - det_weights="https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_nano_8xb32-100e_coco-obj365-person-05d8511e.pth", - det_cat_ids=[0], + # The FP16 pose model is much worse than the FP32 for whole-body keypoints + model = TopDown( + "/RapidPoseTriangulation/extras/mmdeploy/exports/rtmdet-nano_1x320x320x3_fp16_extra-steps.onnx", + f"/RapidPoseTriangulation/extras/mmdeploy/exports/rtmpose-l_wb_{'B' if batch_poses else '1'}x384x288x3_extra-steps.onnx", + box_conf_threshold=min_bbox_score, + box_min_area=min_bbox_area, + warmup=30, ) - print("Loaded mmpose model") + print("Loaded 2D-WB model") return model @@ -44,28 +500,16 @@ def load_wb_model(): def get_2d_pose(model, imgs, num_joints=17): - """See: https://mmpose.readthedocs.io/en/latest/user_guides/inference.html#basic-usage""" - result_generator = model(imgs, show=False) new_poses = [] - for _ in range(len(imgs)): - result = next(result_generator) + for i in range(len(imgs)): + img = imgs[i] + dets = model.predict(img) - poses = [] - for i in range(len(result["predictions"][0])): - kpts = result["predictions"][0][i]["keypoints"] - scores = result["predictions"][0][i]["keypoint_scores"] - - kpts = np.array(kpts) - scores = np.array(scores).reshape(-1, 1) - scores = np.clip(scores, 0, 1) - pose = np.concatenate((kpts, scores), axis=-1) - poses.append(pose) - - if len(poses) == 0: - poses.append(np.zeros([num_joints, 3])) - - poses = np.array(poses) + if len(dets) == 0: + poses = np.zeros([1, num_joints, 3], dtype=float) + else: + poses = np.asarray(dets, dtype=float) new_poses.append(poses) return new_poses diff --git a/swig/Makefile b/swig/Makefile index d4061b0..bc4ab3a 100644 --- a/swig/Makefile +++ b/swig/Makefile @@ -1,5 +1,5 @@ # Standard compile options for the C++ executable -FLAGS = -fPIC -O3 -march=native -Wall -Werror -flto -fopenmp -fopenmp-simd +FLAGS = -fPIC -O3 -march=native -Wall -Werror -flto=auto -fopenmp -fopenmp-simd # The Python interface through SWIG PYTHON_VERSION = $(shell python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}");') diff --git a/tests/test_interface.py b/tests/test_interface.py index c571421..a8d8c16 100644 --- a/tests/test_interface.py +++ b/tests/test_interface.py @@ -60,7 +60,7 @@ def main(): cameras = rpt.convert_cameras(cams) # Run triangulation - triangulator = rpt.Triangulator(min_score=0.95) + triangulator = rpt.Triangulator(min_match_score=0.95) stime = time.time() poses_3d = triangulator.triangulate_poses( poses_2d, cameras, roomparams, joint_names