Eval skelda datasets with cpp implementation.
This commit is contained in:
21
README.md
21
README.md
@ -30,6 +30,27 @@ Fast triangulation of multiple persons from multiple camera views.
|
|||||||
- Build triangulator:
|
- Build triangulator:
|
||||||
```bash
|
```bash
|
||||||
cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd ..
|
cd /RapidPoseTriangulation/swig/ && make all && cd ../tests/ && python3 test_interface.py && cd ..
|
||||||
|
|
||||||
|
cd /RapidPoseTriangulation/scripts/ && \
|
||||||
|
g++ -std=c++17 -fPIC -O3 -march=native -Wall -Werror -flto=auto -fopenmp -fopenmp-simd \
|
||||||
|
$(pkg-config --cflags opencv4) \
|
||||||
|
-I /RapidPoseTriangulation/rpt/ \
|
||||||
|
-I /onnxruntime/include/ \
|
||||||
|
-I /onnxruntime/include/onnxruntime/core/session/ \
|
||||||
|
-I /onnxruntime/include/onnxruntime/core/providers/tensorrt/ \
|
||||||
|
-L /onnxruntime/build/Linux/Release/ \
|
||||||
|
test_skelda_dataset_cpp.cpp \
|
||||||
|
/RapidPoseTriangulation/rpt/*.cpp \
|
||||||
|
-o test_skelda_dataset \
|
||||||
|
-Wl,--start-group \
|
||||||
|
-lonnxruntime_providers_tensorrt \
|
||||||
|
-lonnxruntime_providers_shared \
|
||||||
|
-lonnxruntime_providers_cuda \
|
||||||
|
-lonnxruntime \
|
||||||
|
-Wl,--end-group \
|
||||||
|
$(pkg-config --libs opencv4) \
|
||||||
|
-Wl,-rpath,/onnxruntime/build/Linux/Release/ \
|
||||||
|
&& cd ..
|
||||||
```
|
```
|
||||||
|
|
||||||
- Test with samples:
|
- Test with samples:
|
||||||
|
|||||||
1
scripts/.gitignore
vendored
Normal file
1
scripts/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
test_skelda_dataset
|
||||||
266
scripts/test_skelda_dataset_cpp.cpp
Normal file
266
scripts/test_skelda_dataset_cpp.cpp
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
#include <chrono>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <cmath>
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
// OpenCV
|
||||||
|
#include <opencv2/opencv.hpp>
|
||||||
|
|
||||||
|
// JSON library
|
||||||
|
#include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
|
||||||
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
#include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
|
||||||
|
#include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
|
||||||
|
#include "/RapidPoseTriangulation/rpt/interface.hpp"
|
||||||
|
#include "/RapidPoseTriangulation/rpt/camera.hpp"
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
static const std::string path_data = "/tmp/rpt/all.json";
|
||||||
|
static const std::string path_cfg = "/tmp/rpt/config.json";
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
std::vector<cv::Mat> load_images(json &item)
|
||||||
|
{
|
||||||
|
// Load images
|
||||||
|
std::vector<cv::Mat> images;
|
||||||
|
for (size_t j = 0; j < item["imgpaths"].size(); j++)
|
||||||
|
{
|
||||||
|
auto ipath = item["imgpaths"][j].get<std::string>();
|
||||||
|
cv::Mat image = cv::imread(ipath, cv::IMREAD_COLOR);
|
||||||
|
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
|
||||||
|
images.push_back(image);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (item["dataset_name"] == "human36m")
|
||||||
|
{
|
||||||
|
// Since the images don't have the same shape, rescale some of them
|
||||||
|
for (size_t i = 0; i < images.size(); i++)
|
||||||
|
{
|
||||||
|
cv::Mat &img = images[i];
|
||||||
|
cv::Size ishape = img.size();
|
||||||
|
if (ishape != cv::Size(1000, 1000))
|
||||||
|
{
|
||||||
|
auto cam = item["cameras"][i];
|
||||||
|
cam["K"][1][1] = cam["K"][1][1].get<float>() * (1000.0 / ishape.height);
|
||||||
|
cam["K"][1][2] = cam["K"][1][2].get<float>() * (1000.0 / ishape.height);
|
||||||
|
cam["K"][0][0] = cam["K"][0][0].get<float>() * (1000.0 / ishape.width);
|
||||||
|
cam["K"][0][2] = cam["K"][0][2].get<float>() * (1000.0 / ishape.width);
|
||||||
|
cv::resize(img, img, cv::Size(1000, 1000));
|
||||||
|
images[i] = img;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert image format to Bayer encoding to simulate real camera input
|
||||||
|
// This also resulted in notably better MPJPE results in most cases, presumbly since the
|
||||||
|
// demosaicing algorithm from OpenCV is better than the default one from the cameras
|
||||||
|
for (size_t i = 0; i < images.size(); i++)
|
||||||
|
{
|
||||||
|
cv::Mat &img = images[i];
|
||||||
|
cv::Mat bayer_image = utils_pipeline::rgb2bayer(img);
|
||||||
|
images[i] = std::move(bayer_image);
|
||||||
|
}
|
||||||
|
|
||||||
|
return images;
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
std::string read_file(const std::string &path)
|
||||||
|
{
|
||||||
|
std::ifstream file_stream(path);
|
||||||
|
if (!file_stream.is_open())
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Unable to open file: " + path);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::stringstream buffer;
|
||||||
|
buffer << file_stream.rdbuf();
|
||||||
|
return buffer.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void write_file(const std::string &path, const std::string &content)
|
||||||
|
{
|
||||||
|
std::ofstream file_stream(path, std::ios::out | std::ios::binary);
|
||||||
|
if (!file_stream.is_open())
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Unable to open file for writing: " + path);
|
||||||
|
}
|
||||||
|
|
||||||
|
file_stream << content;
|
||||||
|
|
||||||
|
if (!file_stream)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Error occurred while writing to file: " + path);
|
||||||
|
}
|
||||||
|
file_stream.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
// Load the files
|
||||||
|
auto dataset = json::parse(read_file(path_data));
|
||||||
|
auto config = json::parse(read_file(path_cfg));
|
||||||
|
|
||||||
|
// Load the configuration
|
||||||
|
const std::map<std::string, bool> whole_body = config["whole_body"];
|
||||||
|
const float min_bbox_score = config["min_bbox_score"];
|
||||||
|
const float min_bbox_area = config["min_bbox_area"];
|
||||||
|
const bool batch_poses = config["batch_poses"];
|
||||||
|
const std::vector<std::string> joint_names_2d = utils_pipeline::get_joint_names(whole_body);
|
||||||
|
const float min_match_score = config["min_match_score"];
|
||||||
|
const size_t min_group_size = config["min_group_size"];
|
||||||
|
const int take_interval = config["take_interval"];
|
||||||
|
|
||||||
|
// Load 2D model
|
||||||
|
bool use_wb = utils_pipeline::use_whole_body(whole_body);
|
||||||
|
std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model =
|
||||||
|
std::make_unique<utils_2d_pose::PosePredictor>(
|
||||||
|
use_wb, min_bbox_score, min_bbox_area, batch_poses);
|
||||||
|
|
||||||
|
// Load 3D model
|
||||||
|
std::unique_ptr<Triangulator> tri_model = std::make_unique<Triangulator>(
|
||||||
|
min_match_score, min_group_size);
|
||||||
|
|
||||||
|
// Timers
|
||||||
|
size_t time_count = dataset.size();
|
||||||
|
double time_image = 0.0;
|
||||||
|
double time_pose2d = 0.0;
|
||||||
|
double time_pose3d = 0.0;
|
||||||
|
size_t print_steps = (size_t)std::floor((float)time_count / 100.0f);
|
||||||
|
|
||||||
|
std::cout << "Running predictions: |";
|
||||||
|
size_t bar_width = (size_t)std::ceil((float)time_count / (float)print_steps) - 2;
|
||||||
|
for (size_t i = 0; i < bar_width; i++)
|
||||||
|
{
|
||||||
|
std::cout << "-";
|
||||||
|
}
|
||||||
|
std::cout << "|" << std::endl;
|
||||||
|
|
||||||
|
// Calculate 2D poses [items, views, persons, joints, 3]
|
||||||
|
std::vector<std::vector<std::vector<std::vector<std::array<float, 3>>>>> all_poses_2d;
|
||||||
|
std::cout << "Calculating 2D poses: ";
|
||||||
|
for (size_t i = 0; i < dataset.size(); i++)
|
||||||
|
{
|
||||||
|
if (i % print_steps == 0)
|
||||||
|
{
|
||||||
|
std::cout << "#" << std::flush;
|
||||||
|
}
|
||||||
|
std::chrono::duration<float> elapsed;
|
||||||
|
auto &item = dataset[i];
|
||||||
|
|
||||||
|
// Load images
|
||||||
|
auto stime = std::chrono::high_resolution_clock::now();
|
||||||
|
std::vector<cv::Mat> images = load_images(item);
|
||||||
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
||||||
|
time_image += elapsed.count();
|
||||||
|
|
||||||
|
// Predict 2D poses
|
||||||
|
stime = std::chrono::high_resolution_clock::now();
|
||||||
|
for (size_t i = 0; i < images.size(); i++)
|
||||||
|
{
|
||||||
|
cv::Mat &img = images[i];
|
||||||
|
cv::Mat rgb = utils_pipeline::bayer2rgb(img);
|
||||||
|
images[i] = std::move(rgb);
|
||||||
|
}
|
||||||
|
auto poses_2d_all = kpt_model->predict(images);
|
||||||
|
auto poses_2d_upd = utils_pipeline::update_keypoints(
|
||||||
|
poses_2d_all, joint_names_2d, whole_body);
|
||||||
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
||||||
|
time_pose2d += elapsed.count();
|
||||||
|
|
||||||
|
all_poses_2d.push_back(std::move(poses_2d_upd));
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
// Calculate 3D poses [items, persons, joints, 4]
|
||||||
|
std::vector<std::vector<std::vector<std::array<float, 4>>>> all_poses_3d;
|
||||||
|
std::vector<std::string> all_ids;
|
||||||
|
std::string old_scene = "";
|
||||||
|
int old_id = -1;
|
||||||
|
std::cout << "Calculating 3D poses: ";
|
||||||
|
for (size_t i = 0; i < dataset.size(); i++)
|
||||||
|
{
|
||||||
|
if (i % print_steps == 0)
|
||||||
|
{
|
||||||
|
std::cout << "#" << std::flush;
|
||||||
|
}
|
||||||
|
std::chrono::duration<float> elapsed;
|
||||||
|
auto &item = dataset[i];
|
||||||
|
auto &poses_2d = all_poses_2d[i];
|
||||||
|
|
||||||
|
if (old_scene != item["scene"] || old_id + take_interval < item["index"])
|
||||||
|
{
|
||||||
|
// Reset last poses if scene changes
|
||||||
|
tri_model->reset();
|
||||||
|
old_scene = item["scene"];
|
||||||
|
}
|
||||||
|
|
||||||
|
auto stime = std::chrono::high_resolution_clock::now();
|
||||||
|
std::vector<Camera> cameras;
|
||||||
|
for (size_t j = 0; j < item["cameras"].size(); j++)
|
||||||
|
{
|
||||||
|
auto &cam = item["cameras"][j];
|
||||||
|
Camera camera;
|
||||||
|
camera.name = cam["name"].get<std::string>();
|
||||||
|
camera.K = cam["K"].get<std::array<std::array<float, 3>, 3>>();
|
||||||
|
camera.DC = cam["DC"].get<std::vector<float>>();
|
||||||
|
camera.R = cam["R"].get<std::array<std::array<float, 3>, 3>>();
|
||||||
|
camera.T = cam["T"].get<std::array<std::array<float, 1>, 3>>();
|
||||||
|
camera.width = cam["width"].get<int>();
|
||||||
|
camera.height = cam["height"].get<int>();
|
||||||
|
camera.type = cam["type"].get<std::string>();
|
||||||
|
cameras.push_back(camera);
|
||||||
|
}
|
||||||
|
std::array<std::array<float, 3>, 2> roomparams = {
|
||||||
|
item["room_size"].get<std::array<float, 3>>(),
|
||||||
|
item["room_center"].get<std::array<float, 3>>()};
|
||||||
|
|
||||||
|
auto poses_3d = tri_model->triangulate_poses(poses_2d, cameras, roomparams, joint_names_2d);
|
||||||
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
||||||
|
time_pose3d += elapsed.count();
|
||||||
|
|
||||||
|
all_poses_3d.push_back(std::move(poses_3d));
|
||||||
|
all_ids.push_back(item["id"].get<std::string>());
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
// Print timing stats
|
||||||
|
std::cout << "\nMetrics:" << std::endl;
|
||||||
|
tri_model->print_stats();
|
||||||
|
size_t warmup = 10;
|
||||||
|
double avg_time_image = time_image / (time_count - warmup);
|
||||||
|
double avg_time_pose2d = time_pose2d / (time_count - warmup);
|
||||||
|
double avg_time_pose3d = time_pose3d / (time_count - warmup);
|
||||||
|
double fps = 1.0 / (avg_time_pose2d + avg_time_pose3d);
|
||||||
|
std::cout << "{\n"
|
||||||
|
<< " \"img_loading\": " << avg_time_image << ",\n"
|
||||||
|
<< " \"avg_time_2d\": " << avg_time_pose2d << ",\n"
|
||||||
|
<< " \"avg_time_3d\": " << avg_time_pose3d << ",\n"
|
||||||
|
<< " \"fps\": " << fps << "\n"
|
||||||
|
<< "}" << std::endl;
|
||||||
|
|
||||||
|
// Store the results as json
|
||||||
|
json all_results;
|
||||||
|
all_results["all_ids"] = all_ids;
|
||||||
|
all_results["all_poses_2d"] = all_poses_2d;
|
||||||
|
all_results["all_poses_3d"] = all_poses_3d;
|
||||||
|
all_results["joint_names_2d"] = joint_names_2d;
|
||||||
|
all_results["joint_names_3d"] = joint_names_2d;
|
||||||
|
|
||||||
|
// Save the results
|
||||||
|
std::string path_results = "/tmp/rpt/results.json";
|
||||||
|
write_file(path_results, all_results.dump(0));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
395
scripts/test_skelda_dataset_cpp.py
Normal file
395
scripts/test_skelda_dataset_cpp.py
Normal file
@ -0,0 +1,395 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
import utils_pipeline
|
||||||
|
from skelda import evals
|
||||||
|
from skelda.writers import json_writer
|
||||||
|
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
whole_body = {
|
||||||
|
"foots": False,
|
||||||
|
"face": False,
|
||||||
|
"hands": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
dataset_use = "human36m"
|
||||||
|
# dataset_use = "panoptic"
|
||||||
|
# dataset_use = "mvor"
|
||||||
|
# dataset_use = "shelf"
|
||||||
|
# dataset_use = "campus"
|
||||||
|
# dataset_use = "ikeaasm"
|
||||||
|
# dataset_use = "chi3d"
|
||||||
|
# dataset_use = "tsinghua"
|
||||||
|
# dataset_use = "human36m_wb"
|
||||||
|
# dataset_use = "egohumans_tagging"
|
||||||
|
# dataset_use = "egohumans_legoassemble"
|
||||||
|
# dataset_use = "egohumans_fencing"
|
||||||
|
# dataset_use = "egohumans_basketball"
|
||||||
|
# dataset_use = "egohumans_volleyball"
|
||||||
|
# dataset_use = "egohumans_badminton"
|
||||||
|
# dataset_use = "egohumans_tennis"
|
||||||
|
|
||||||
|
|
||||||
|
# Describes the minimum area as fraction of the image size for a 2D bounding box to be considered
|
||||||
|
# If the persons are small in the image, use a lower value
|
||||||
|
default_min_bbox_area = 0.1 * 0.1
|
||||||
|
|
||||||
|
# Describes how confident a 2D bounding box needs to be to be considered
|
||||||
|
# If the persons are small in the image, or poorly recognizable, use a lower value
|
||||||
|
default_min_bbox_score = 0.3
|
||||||
|
|
||||||
|
# Describes how good two 2D poses need to match each other to create a valid triangulation
|
||||||
|
# If the quality of the 2D detections is poor, use a lower value
|
||||||
|
default_min_match_score = 0.94
|
||||||
|
|
||||||
|
# Describes the minimum number of camera pairs that need to detect the same person
|
||||||
|
# If the number of cameras is high, and the views are not occluded, use a higher value
|
||||||
|
default_min_group_size = 1
|
||||||
|
|
||||||
|
# Batch poses per image for faster processing
|
||||||
|
# If most of the time only one person is in a image, disable it, because it is slightly slower then
|
||||||
|
default_batch_poses = True
|
||||||
|
|
||||||
|
datasets = {
|
||||||
|
"human36m": {
|
||||||
|
"path": "/datasets/human36m/skelda/pose_test.json",
|
||||||
|
"take_interval": 5,
|
||||||
|
"min_match_score": 0.95,
|
||||||
|
"min_group_size": 1,
|
||||||
|
"min_bbox_score": 0.4,
|
||||||
|
"min_bbox_area": 0.1 * 0.1,
|
||||||
|
"batch_poses": False,
|
||||||
|
},
|
||||||
|
"panoptic": {
|
||||||
|
"path": "/datasets/panoptic/skelda/test.json",
|
||||||
|
"cams": ["00_03", "00_06", "00_12", "00_13", "00_23"],
|
||||||
|
# "cams": ["00_03", "00_06", "00_12"],
|
||||||
|
# "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"],
|
||||||
|
"take_interval": 3,
|
||||||
|
"min_match_score": 0.95,
|
||||||
|
"use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"],
|
||||||
|
"min_group_size": 1,
|
||||||
|
# "min_group_size": 4,
|
||||||
|
"min_bbox_area": 0.05 * 0.05,
|
||||||
|
},
|
||||||
|
"mvor": {
|
||||||
|
"path": "/datasets/mvor/skelda/all.json",
|
||||||
|
"take_interval": 1,
|
||||||
|
"with_depth": False,
|
||||||
|
"min_match_score": 0.85,
|
||||||
|
"min_bbox_score": 0.25,
|
||||||
|
},
|
||||||
|
"campus": {
|
||||||
|
"path": "/datasets/campus/skelda/test.json",
|
||||||
|
"take_interval": 1,
|
||||||
|
"min_match_score": 0.90,
|
||||||
|
"min_bbox_score": 0.5,
|
||||||
|
},
|
||||||
|
"shelf": {
|
||||||
|
"path": "/datasets/shelf/skelda/test.json",
|
||||||
|
"take_interval": 1,
|
||||||
|
"min_match_score": 0.96,
|
||||||
|
"min_group_size": 2,
|
||||||
|
},
|
||||||
|
"ikeaasm": {
|
||||||
|
"path": "/datasets/ikeaasm/skelda/test.json",
|
||||||
|
"take_interval": 2,
|
||||||
|
"min_match_score": 0.92,
|
||||||
|
"min_bbox_score": 0.20,
|
||||||
|
},
|
||||||
|
"chi3d": {
|
||||||
|
"path": "/datasets/chi3d/skelda/all.json",
|
||||||
|
"take_interval": 5,
|
||||||
|
},
|
||||||
|
"tsinghua": {
|
||||||
|
"path": "/datasets/tsinghua/skelda/test.json",
|
||||||
|
"take_interval": 3,
|
||||||
|
"min_match_score": 0.95,
|
||||||
|
"min_group_size": 2,
|
||||||
|
},
|
||||||
|
"human36m_wb": {
|
||||||
|
"path": "/datasets/human36m/skelda/wb/test.json",
|
||||||
|
"take_interval": 100,
|
||||||
|
"min_bbox_score": 0.4,
|
||||||
|
"batch_poses": False,
|
||||||
|
},
|
||||||
|
"egohumans_tagging": {
|
||||||
|
"path": "/datasets/egohumans/skelda/all.json",
|
||||||
|
"take_interval": 2,
|
||||||
|
"subset": "tagging",
|
||||||
|
"min_group_size": 2,
|
||||||
|
"min_bbox_score": 0.2,
|
||||||
|
"min_bbox_area": 0.05 * 0.05,
|
||||||
|
},
|
||||||
|
"egohumans_legoassemble": {
|
||||||
|
"path": "/datasets/egohumans/skelda/all.json",
|
||||||
|
"take_interval": 2,
|
||||||
|
"subset": "legoassemble",
|
||||||
|
"min_group_size": 2,
|
||||||
|
},
|
||||||
|
"egohumans_fencing": {
|
||||||
|
"path": "/datasets/egohumans/skelda/all.json",
|
||||||
|
"take_interval": 2,
|
||||||
|
"subset": "fencing",
|
||||||
|
"min_group_size": 7,
|
||||||
|
"min_bbox_score": 0.5,
|
||||||
|
"min_bbox_area": 0.05 * 0.05,
|
||||||
|
},
|
||||||
|
"egohumans_basketball": {
|
||||||
|
"path": "/datasets/egohumans/skelda/all.json",
|
||||||
|
"take_interval": 2,
|
||||||
|
"subset": "basketball",
|
||||||
|
"min_group_size": 7,
|
||||||
|
"min_bbox_score": 0.25,
|
||||||
|
"min_bbox_area": 0.025 * 0.025,
|
||||||
|
},
|
||||||
|
"egohumans_volleyball": {
|
||||||
|
"path": "/datasets/egohumans/skelda/all.json",
|
||||||
|
"take_interval": 2,
|
||||||
|
"subset": "volleyball",
|
||||||
|
"min_group_size": 11,
|
||||||
|
"min_bbox_score": 0.25,
|
||||||
|
"min_bbox_area": 0.05 * 0.05,
|
||||||
|
},
|
||||||
|
"egohumans_badminton": {
|
||||||
|
"path": "/datasets/egohumans/skelda/all.json",
|
||||||
|
"take_interval": 2,
|
||||||
|
"subset": "badminton",
|
||||||
|
"min_group_size": 7,
|
||||||
|
"min_bbox_score": 0.25,
|
||||||
|
"min_bbox_area": 0.05 * 0.05,
|
||||||
|
},
|
||||||
|
"egohumans_tennis": {
|
||||||
|
"path": "/datasets/egohumans/skelda/all.json",
|
||||||
|
"take_interval": 2,
|
||||||
|
"subset": "tennis",
|
||||||
|
"min_group_size": 11,
|
||||||
|
"min_bbox_area": 0.025 * 0.025,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
joint_names_2d = utils_pipeline.get_joint_names(whole_body)
|
||||||
|
joint_names_3d = list(joint_names_2d)
|
||||||
|
eval_joints = [
|
||||||
|
"head",
|
||||||
|
"shoulder_left",
|
||||||
|
"shoulder_right",
|
||||||
|
"elbow_left",
|
||||||
|
"elbow_right",
|
||||||
|
"wrist_left",
|
||||||
|
"wrist_right",
|
||||||
|
"hip_left",
|
||||||
|
"hip_right",
|
||||||
|
"knee_left",
|
||||||
|
"knee_right",
|
||||||
|
"ankle_left",
|
||||||
|
"ankle_right",
|
||||||
|
]
|
||||||
|
if dataset_use == "human36m":
|
||||||
|
eval_joints[eval_joints.index("head")] = "nose"
|
||||||
|
if dataset_use == "panoptic":
|
||||||
|
eval_joints[eval_joints.index("head")] = "nose"
|
||||||
|
if dataset_use == "human36m_wb":
|
||||||
|
if utils_pipeline.use_whole_body(whole_body):
|
||||||
|
eval_joints = list(joint_names_2d)
|
||||||
|
else:
|
||||||
|
eval_joints[eval_joints.index("head")] = "nose"
|
||||||
|
|
||||||
|
# output_dir = "/RapidPoseTriangulation/data/testoutput/"
|
||||||
|
output_dir = ""
|
||||||
|
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def load_json(path: str):
|
||||||
|
with open(path, "r", encoding="utf-8") as file:
|
||||||
|
data = json.load(file)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def save_json(data: dict, path: str):
|
||||||
|
with open(path, "w+", encoding="utf-8") as file:
|
||||||
|
json.dump(data, file, indent=0)
|
||||||
|
|
||||||
|
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def load_labels(dataset: dict):
|
||||||
|
"""Load labels by dataset description"""
|
||||||
|
|
||||||
|
if "panoptic" in dataset:
|
||||||
|
labels = load_json(dataset["panoptic"]["path"])
|
||||||
|
labels = [lb for i, lb in enumerate(labels) if i % 1500 < 90]
|
||||||
|
|
||||||
|
# Filter by maximum number of persons
|
||||||
|
labels = [l for l in labels if len(l["bodies3D"]) <= 10]
|
||||||
|
|
||||||
|
# Filter scenes
|
||||||
|
if "use_scenes" in dataset["panoptic"]:
|
||||||
|
labels = [
|
||||||
|
l for l in labels if l["scene"] in dataset["panoptic"]["use_scenes"]
|
||||||
|
]
|
||||||
|
|
||||||
|
# Filter cameras
|
||||||
|
if not "cameras_depth" in labels[0]:
|
||||||
|
for label in labels:
|
||||||
|
for i, cam in reversed(list(enumerate(label["cameras"]))):
|
||||||
|
if cam["name"] not in dataset["panoptic"]["cams"]:
|
||||||
|
label["cameras"].pop(i)
|
||||||
|
label["imgpaths"].pop(i)
|
||||||
|
|
||||||
|
elif "human36m" in dataset:
|
||||||
|
labels = load_json(dataset["human36m"]["path"])
|
||||||
|
labels = [lb for lb in labels if lb["subject"] == "S9"]
|
||||||
|
labels = [lb for i, lb in enumerate(labels) if i % 4000 < 150]
|
||||||
|
|
||||||
|
for label in labels:
|
||||||
|
label.pop("action")
|
||||||
|
label.pop("frame")
|
||||||
|
|
||||||
|
elif "mvor" in dataset:
|
||||||
|
labels = load_json(dataset["mvor"]["path"])
|
||||||
|
|
||||||
|
# Rename keys
|
||||||
|
for label in labels:
|
||||||
|
label["cameras_color"] = label["cameras"]
|
||||||
|
label["imgpaths_color"] = label["imgpaths"]
|
||||||
|
|
||||||
|
elif "ikeaasm" in dataset:
|
||||||
|
labels = load_json(dataset["ikeaasm"]["path"])
|
||||||
|
cams0 = str(labels[0]["cameras"])
|
||||||
|
labels = [lb for lb in labels if str(lb["cameras"]) == cams0]
|
||||||
|
|
||||||
|
elif "shelf" in dataset:
|
||||||
|
labels = load_json(dataset["shelf"]["path"])
|
||||||
|
labels = [lb for lb in labels if "test" in lb["splits"]]
|
||||||
|
|
||||||
|
elif "campus" in dataset:
|
||||||
|
labels = load_json(dataset["campus"]["path"])
|
||||||
|
labels = [lb for lb in labels if "test" in lb["splits"]]
|
||||||
|
|
||||||
|
elif "tsinghua" in dataset:
|
||||||
|
labels = load_json(dataset["tsinghua"]["path"])
|
||||||
|
labels = [lb for lb in labels if "test" in lb["splits"]]
|
||||||
|
labels = [lb for lb in labels if lb["seq"] == "seq_1"]
|
||||||
|
labels = [lb for i, lb in enumerate(labels) if i % 300 < 90]
|
||||||
|
|
||||||
|
for label in labels:
|
||||||
|
label["bodyids"] = list(range(len(label["bodies3D"])))
|
||||||
|
|
||||||
|
elif "chi3d" in dataset:
|
||||||
|
labels = load_json(dataset["chi3d"]["path"])
|
||||||
|
labels = [lb for lb in labels if lb["setup"] == "s03"]
|
||||||
|
labels = [lb for i, lb in enumerate(labels) if i % 2000 < 150]
|
||||||
|
|
||||||
|
elif "human36m_wb" in dataset:
|
||||||
|
labels = load_json(dataset["human36m_wb"]["path"])
|
||||||
|
|
||||||
|
elif any(("egohumans" in key for key in dataset)):
|
||||||
|
labels = load_json(dataset[dataset_use]["path"])
|
||||||
|
labels = [lb for lb in labels if "test" in lb["splits"]]
|
||||||
|
labels = [lb for lb in labels if dataset[dataset_use]["subset"] in lb["seq"]]
|
||||||
|
if dataset[dataset_use]["subset"] in ["volleyball", "tennis"]:
|
||||||
|
labels = [lb for i, lb in enumerate(labels) if i % 150 < 60]
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("Dataset not available")
|
||||||
|
|
||||||
|
# Optionally drop samples to speed up train/eval
|
||||||
|
if "take_interval" in dataset:
|
||||||
|
take_interval = dataset["take_interval"]
|
||||||
|
if take_interval > 1:
|
||||||
|
labels = [l for i, l in enumerate(labels) if i % take_interval == 0]
|
||||||
|
|
||||||
|
# Add default values
|
||||||
|
for label in labels:
|
||||||
|
if "scene" not in label:
|
||||||
|
label["scene"] = "default"
|
||||||
|
for cam in label["cameras"]:
|
||||||
|
if not "type" in cam:
|
||||||
|
cam["type"] = "pinhole"
|
||||||
|
|
||||||
|
return labels
|
||||||
|
|
||||||
|
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global joint_names_3d, eval_joints
|
||||||
|
|
||||||
|
print("Loading dataset ...")
|
||||||
|
labels = load_labels(
|
||||||
|
{
|
||||||
|
dataset_use: datasets[dataset_use],
|
||||||
|
"take_interval": datasets[dataset_use]["take_interval"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Print a dataset sample for debugging
|
||||||
|
print(labels[0])
|
||||||
|
|
||||||
|
# Save dataset
|
||||||
|
tmp_export_dir = "/tmp/rpt/"
|
||||||
|
for label in labels:
|
||||||
|
if "splits" in label:
|
||||||
|
label.pop("splits")
|
||||||
|
json_writer.save_dataset(labels, tmp_export_dir)
|
||||||
|
|
||||||
|
# Load dataset specific parameters
|
||||||
|
min_match_score = datasets[dataset_use].get(
|
||||||
|
"min_match_score", default_min_match_score
|
||||||
|
)
|
||||||
|
min_group_size = datasets[dataset_use].get("min_group_size", default_min_group_size)
|
||||||
|
min_bbox_score = datasets[dataset_use].get("min_bbox_score", default_min_bbox_score)
|
||||||
|
min_bbox_area = datasets[dataset_use].get("min_bbox_area", default_min_bbox_area)
|
||||||
|
batch_poses = datasets[dataset_use].get("batch_poses", default_batch_poses)
|
||||||
|
|
||||||
|
# Save config
|
||||||
|
config_path = tmp_export_dir + "config.json"
|
||||||
|
config = {
|
||||||
|
"min_match_score": min_match_score,
|
||||||
|
"min_group_size": min_group_size,
|
||||||
|
"min_bbox_score": min_bbox_score,
|
||||||
|
"min_bbox_area": min_bbox_area,
|
||||||
|
"batch_poses": batch_poses,
|
||||||
|
"whole_body": whole_body,
|
||||||
|
"take_interval": datasets[dataset_use]["take_interval"],
|
||||||
|
}
|
||||||
|
save_json(config, config_path)
|
||||||
|
|
||||||
|
# Call the CPP binary
|
||||||
|
os.system("/RapidPoseTriangulation/scripts/test_skelda_dataset")
|
||||||
|
|
||||||
|
# Load the results
|
||||||
|
print("Loading exports ...")
|
||||||
|
res_path = tmp_export_dir + "results.json"
|
||||||
|
results = load_json(res_path)
|
||||||
|
all_poses_3d = results["all_poses_3d"]
|
||||||
|
all_ids = results["all_ids"]
|
||||||
|
joint_names_3d = results["joint_names_3d"]
|
||||||
|
|
||||||
|
# Run evaluation
|
||||||
|
_ = evals.mpjpe.run_eval(
|
||||||
|
labels,
|
||||||
|
all_poses_3d,
|
||||||
|
all_ids,
|
||||||
|
joint_names_net=joint_names_3d,
|
||||||
|
joint_names_use=eval_joints,
|
||||||
|
save_error_imgs=output_dir,
|
||||||
|
)
|
||||||
|
_ = evals.pcp.run_eval(
|
||||||
|
labels,
|
||||||
|
all_poses_3d,
|
||||||
|
all_ids,
|
||||||
|
joint_names_net=joint_names_3d,
|
||||||
|
joint_names_use=eval_joints,
|
||||||
|
replace_head_with_nose=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ==================================================================================================
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user