291 lines
10 KiB
C++
291 lines
10 KiB
C++
#include <chrono>
|
|
#include <cmath>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <memory>
|
|
#include <sstream>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
// OpenCV
|
|
#include <opencv2/opencv.hpp>
|
|
|
|
// JSON library
|
|
#include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp"
|
|
using json = nlohmann::json;
|
|
|
|
#include "/RapidPoseTriangulation/rpt/camera.hpp"
|
|
#include "/RapidPoseTriangulation/rpt/interface.hpp"
|
|
#include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp"
|
|
#include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp"
|
|
|
|
// =================================================================================================
|
|
|
|
static const std::string path_data = "/tmp/rpt/all.json";
|
|
static const std::string path_cfg = "/tmp/rpt/config.json";
|
|
|
|
// =================================================================================================
|
|
|
|
std::vector<cv::Mat> load_images(json &item)
|
|
{
|
|
// Load images
|
|
std::vector<cv::Mat> images;
|
|
for (size_t j = 0; j < item["imgpaths"].size(); j++)
|
|
{
|
|
auto ipath = item["imgpaths"][j].get<std::string>();
|
|
cv::Mat image = cv::imread(ipath, cv::IMREAD_COLOR);
|
|
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
|
|
images.push_back(image);
|
|
}
|
|
|
|
if (item["dataset_name"] == "human36m")
|
|
{
|
|
// Since the images don't have the same shape, rescale some of them
|
|
for (size_t i = 0; i < images.size(); i++)
|
|
{
|
|
cv::Mat &img = images[i];
|
|
cv::Size ishape = img.size();
|
|
if (ishape != cv::Size(1000, 1000))
|
|
{
|
|
auto cam = item["cameras"][i];
|
|
cam["K"][1][1] = cam["K"][1][1].get<float>() * (1000.0 / ishape.height);
|
|
cam["K"][1][2] = cam["K"][1][2].get<float>() * (1000.0 / ishape.height);
|
|
cam["K"][0][0] = cam["K"][0][0].get<float>() * (1000.0 / ishape.width);
|
|
cam["K"][0][2] = cam["K"][0][2].get<float>() * (1000.0 / ishape.width);
|
|
cv::resize(img, img, cv::Size(1000, 1000));
|
|
images[i] = img;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert image format to Bayer encoding to simulate real camera input
|
|
// This also resulted in notably better MPJPE results in most cases, presumably since the
|
|
// demosaicing algorithm from OpenCV is better than the default one from the cameras
|
|
for (size_t i = 0; i < images.size(); i++)
|
|
{
|
|
cv::Mat &img = images[i];
|
|
cv::Mat bayer_image = utils_pipeline::rgb2bayer(img);
|
|
images[i] = std::move(bayer_image);
|
|
}
|
|
|
|
return images;
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
std::string read_file(const std::string &path)
|
|
{
|
|
std::ifstream file_stream(path);
|
|
if (!file_stream.is_open())
|
|
{
|
|
throw std::runtime_error("Unable to open file: " + path);
|
|
}
|
|
|
|
std::stringstream buffer;
|
|
buffer << file_stream.rdbuf();
|
|
return buffer.str();
|
|
}
|
|
|
|
void write_file(const std::string &path, const std::string &content)
|
|
{
|
|
std::ofstream file_stream(path, std::ios::out | std::ios::binary);
|
|
if (!file_stream.is_open())
|
|
{
|
|
throw std::runtime_error("Unable to open file for writing: " + path);
|
|
}
|
|
|
|
file_stream << content;
|
|
|
|
if (!file_stream)
|
|
{
|
|
throw std::runtime_error("Error occurred while writing to file: " + path);
|
|
}
|
|
file_stream.close();
|
|
}
|
|
|
|
// =================================================================================================
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
// Load the files
|
|
auto dataset = json::parse(read_file(path_data));
|
|
auto config = json::parse(read_file(path_cfg));
|
|
|
|
// Load the configuration
|
|
const std::map<std::string, bool> whole_body = config["whole_body"];
|
|
const float min_bbox_score = config["min_bbox_score"];
|
|
const float min_bbox_area = config["min_bbox_area"];
|
|
const bool batch_poses = config["batch_poses"];
|
|
const std::vector<std::string> joint_names_2d = utils_pipeline::get_joint_names(whole_body);
|
|
const float min_match_score = config["min_match_score"];
|
|
const size_t min_group_size = config["min_group_size"];
|
|
const int take_interval = config["take_interval"];
|
|
|
|
// Load 2D model
|
|
bool use_wb = utils_pipeline::use_whole_body(whole_body);
|
|
std::unique_ptr<utils_2d_pose::PosePredictor> kpt_model =
|
|
std::make_unique<utils_2d_pose::PosePredictor>(
|
|
use_wb, min_bbox_score, min_bbox_area, batch_poses);
|
|
|
|
// Load 3D model
|
|
std::unique_ptr<Triangulator> tri_model = std::make_unique<Triangulator>(
|
|
min_match_score, min_group_size);
|
|
|
|
// Timers
|
|
size_t time_count = dataset.size();
|
|
std::vector<double> times_image;
|
|
std::vector<double> times_debayer;
|
|
std::vector<double> times_pose2d;
|
|
std::vector<double> times_pose3d;
|
|
times_image.reserve(time_count);
|
|
times_debayer.reserve(time_count);
|
|
times_pose2d.reserve(time_count);
|
|
times_pose3d.reserve(time_count);
|
|
size_t print_steps = (size_t)std::floor((float)time_count / 100.0f);
|
|
print_steps = std::max((size_t)1, print_steps);
|
|
|
|
std::cout << "Running predictions: |";
|
|
size_t bar_width = (size_t)std::ceil((float)time_count / (float)print_steps);
|
|
for (size_t i = 0; i < bar_width; i++)
|
|
{
|
|
std::cout << "-";
|
|
}
|
|
std::cout << "|" << std::endl;
|
|
|
|
// Calculate 2D poses [items, views, persons, joints, 3]
|
|
std::vector<std::vector<std::vector<std::vector<std::array<float, 3>>>>> all_poses_2d;
|
|
std::cout << "Calculating 2D poses: |";
|
|
for (size_t i = 0; i < dataset.size(); i++)
|
|
{
|
|
if (i % print_steps == 0)
|
|
{
|
|
std::cout << "#" << std::flush;
|
|
}
|
|
std::chrono::duration<float> elapsed;
|
|
auto &item = dataset[i];
|
|
|
|
// Load images
|
|
auto stime = std::chrono::high_resolution_clock::now();
|
|
std::vector<cv::Mat> images = load_images(item);
|
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
|
times_image.push_back(elapsed.count());
|
|
|
|
// Demosaic images
|
|
stime = std::chrono::high_resolution_clock::now();
|
|
for (size_t i = 0; i < images.size(); i++)
|
|
{
|
|
cv::Mat &img = images[i];
|
|
cv::Mat rgb = utils_pipeline::bayer2rgb(img);
|
|
images[i] = std::move(rgb);
|
|
}
|
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
|
times_debayer.push_back(elapsed.count());
|
|
|
|
// Predict 2D poses
|
|
stime = std::chrono::high_resolution_clock::now();
|
|
auto poses_2d_all = kpt_model->predict(images);
|
|
auto poses_2d_upd = utils_pipeline::update_keypoints(
|
|
poses_2d_all, joint_names_2d, whole_body);
|
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
|
times_pose2d.push_back(elapsed.count());
|
|
|
|
all_poses_2d.push_back(std::move(poses_2d_upd));
|
|
}
|
|
std::cout << "|" << std::endl;
|
|
|
|
// Calculate 3D poses [items, persons, joints, 4]
|
|
std::vector<std::vector<std::vector<std::array<float, 4>>>> all_poses_3d;
|
|
std::vector<std::string> all_ids;
|
|
std::string old_scene = "";
|
|
int old_id = -1;
|
|
std::cout << "Calculating 3D poses: |";
|
|
for (size_t i = 0; i < dataset.size(); i++)
|
|
{
|
|
if (i % print_steps == 0)
|
|
{
|
|
std::cout << "#" << std::flush;
|
|
}
|
|
std::chrono::duration<float> elapsed;
|
|
auto &item = dataset[i];
|
|
auto &poses_2d = all_poses_2d[i];
|
|
|
|
if (old_scene != item["scene"] || old_id + take_interval < item["index"])
|
|
{
|
|
// Reset last poses if scene changes
|
|
tri_model->reset();
|
|
old_scene = item["scene"];
|
|
}
|
|
|
|
auto stime = std::chrono::high_resolution_clock::now();
|
|
std::vector<Camera> cameras;
|
|
for (size_t j = 0; j < item["cameras"].size(); j++)
|
|
{
|
|
auto &cam = item["cameras"][j];
|
|
Camera camera;
|
|
camera.name = cam["name"].get<std::string>();
|
|
camera.K = cam["K"].get<std::array<std::array<float, 3>, 3>>();
|
|
camera.DC = cam["DC"].get<std::vector<float>>();
|
|
camera.R = cam["R"].get<std::array<std::array<float, 3>, 3>>();
|
|
camera.T = cam["T"].get<std::array<std::array<float, 1>, 3>>();
|
|
camera.width = cam["width"].get<int>();
|
|
camera.height = cam["height"].get<int>();
|
|
camera.type = cam["type"].get<std::string>();
|
|
cameras.push_back(camera);
|
|
}
|
|
std::array<std::array<float, 3>, 2> roomparams = {
|
|
item["room_size"].get<std::array<float, 3>>(),
|
|
item["room_center"].get<std::array<float, 3>>()};
|
|
|
|
auto poses_3d = tri_model->triangulate_poses(poses_2d, cameras, roomparams, joint_names_2d);
|
|
elapsed = std::chrono::high_resolution_clock::now() - stime;
|
|
times_pose3d.push_back(elapsed.count());
|
|
|
|
all_poses_3d.push_back(std::move(poses_3d));
|
|
all_ids.push_back(item["id"].get<std::string>());
|
|
old_id = item["index"];
|
|
}
|
|
std::cout << "|" << std::endl;
|
|
|
|
// Print timing stats
|
|
std::cout << "\nMetrics:" << std::endl;
|
|
size_t warmup = std::min((size_t)10, time_count - 1);
|
|
double time_image = 0.0;
|
|
double time_debayer = 0.0;
|
|
double time_pose2d = 0.0;
|
|
double time_pose3d = 0.0;
|
|
for (size_t i = warmup; i < time_count; i++)
|
|
{
|
|
time_image += times_image[i];
|
|
time_debayer += times_debayer[i];
|
|
time_pose2d += times_pose2d[i];
|
|
time_pose3d += times_pose3d[i];
|
|
}
|
|
double avg_time_image = time_image / (time_count - warmup);
|
|
double avg_time_debayer = time_debayer / (time_count - warmup);
|
|
double avg_time_pose2d = time_pose2d / (time_count - warmup);
|
|
double avg_time_pose3d = time_pose3d / (time_count - warmup);
|
|
double fps = 1.0 / (avg_time_debayer + avg_time_pose2d + avg_time_pose3d);
|
|
std::cout << "{\n"
|
|
<< " \"img_loading\": " << avg_time_image << ",\n"
|
|
<< " \"demosaicing\": " << avg_time_debayer << ",\n"
|
|
<< " \"avg_time_2d\": " << avg_time_pose2d << ",\n"
|
|
<< " \"avg_time_3d\": " << avg_time_pose3d << ",\n"
|
|
<< " \"fps\": " << fps << "\n"
|
|
<< "}" << std::endl;
|
|
tri_model->print_stats();
|
|
|
|
// Store the results as json
|
|
json all_results;
|
|
all_results["all_ids"] = all_ids;
|
|
all_results["all_poses_2d"] = all_poses_2d;
|
|
all_results["all_poses_3d"] = all_poses_3d;
|
|
all_results["joint_names_2d"] = joint_names_2d;
|
|
all_results["joint_names_3d"] = joint_names_2d;
|
|
|
|
// Save the results
|
|
std::string path_results = "/tmp/rpt/results.json";
|
|
write_file(path_results, all_results.dump(0));
|
|
|
|
return 0;
|
|
} |