#include #include #include #include #include #include #include #include #include // OpenCV #include // JSON library #include "/RapidPoseTriangulation/extras/include/nlohmann/json.hpp" using json = nlohmann::json; #include "/RapidPoseTriangulation/rpt/camera.hpp" #include "/RapidPoseTriangulation/rpt/interface.hpp" #include "/RapidPoseTriangulation/scripts/utils_2d_pose.hpp" #include "/RapidPoseTriangulation/scripts/utils_pipeline.hpp" // ================================================================================================= static const std::string path_data = "/tmp/rpt/all.json"; static const std::string path_cfg = "/tmp/rpt/config.json"; // ================================================================================================= std::vector load_images(json &item) { // Load images std::vector images; for (size_t j = 0; j < item["imgpaths"].size(); j++) { auto ipath = item["imgpaths"][j].get(); cv::Mat image = cv::imread(ipath, cv::IMREAD_COLOR); cv::cvtColor(image, image, cv::COLOR_BGR2RGB); images.push_back(image); } if (item["dataset_name"] == "human36m") { // Since the images don't have the same shape, rescale some of them for (size_t i = 0; i < images.size(); i++) { cv::Mat &img = images[i]; cv::Size ishape = img.size(); if (ishape != cv::Size(1000, 1000)) { auto cam = item["cameras"][i]; cam["K"][1][1] = cam["K"][1][1].get() * (1000.0 / ishape.height); cam["K"][1][2] = cam["K"][1][2].get() * (1000.0 / ishape.height); cam["K"][0][0] = cam["K"][0][0].get() * (1000.0 / ishape.width); cam["K"][0][2] = cam["K"][0][2].get() * (1000.0 / ishape.width); cv::resize(img, img, cv::Size(1000, 1000)); images[i] = img; } } } // Convert image format to Bayer encoding to simulate real camera input // This also resulted in notably better MPJPE results in most cases, presumbly since the // demosaicing algorithm from OpenCV is better than the default one from the cameras for (size_t i = 0; i < images.size(); i++) { cv::Mat &img = images[i]; cv::Mat bayer_image = utils_pipeline::rgb2bayer(img); images[i] = std::move(bayer_image); } return images; } // ================================================================================================= std::string read_file(const std::string &path) { std::ifstream file_stream(path); if (!file_stream.is_open()) { throw std::runtime_error("Unable to open file: " + path); } std::stringstream buffer; buffer << file_stream.rdbuf(); return buffer.str(); } void write_file(const std::string &path, const std::string &content) { std::ofstream file_stream(path, std::ios::out | std::ios::binary); if (!file_stream.is_open()) { throw std::runtime_error("Unable to open file for writing: " + path); } file_stream << content; if (!file_stream) { throw std::runtime_error("Error occurred while writing to file: " + path); } file_stream.close(); } // ================================================================================================= int main(int argc, char **argv) { // Load the files auto dataset = json::parse(read_file(path_data)); auto config = json::parse(read_file(path_cfg)); // Load the configuration const std::map whole_body = config["whole_body"]; const float min_bbox_score = config["min_bbox_score"]; const float min_bbox_area = config["min_bbox_area"]; const bool batch_poses = config["batch_poses"]; const std::vector joint_names_2d = utils_pipeline::get_joint_names(whole_body); const float min_match_score = config["min_match_score"]; const size_t min_group_size = config["min_group_size"]; const int take_interval = config["take_interval"]; // Load 2D model bool use_wb = utils_pipeline::use_whole_body(whole_body); std::unique_ptr kpt_model = std::make_unique( use_wb, min_bbox_score, min_bbox_area, batch_poses); // Load 3D model std::unique_ptr tri_model = std::make_unique( min_match_score, min_group_size); // Timers size_t time_count = dataset.size(); double time_image = 0.0; double time_pose2d = 0.0; double time_pose3d = 0.0; size_t print_steps = (size_t)std::floor((float)time_count / 100.0f); std::cout << "Running predictions: |"; size_t bar_width = (size_t)std::ceil((float)time_count / (float)print_steps) - 2; for (size_t i = 0; i < bar_width; i++) { std::cout << "-"; } std::cout << "|" << std::endl; // Calculate 2D poses [items, views, persons, joints, 3] std::vector>>>> all_poses_2d; std::cout << "Calculating 2D poses: "; for (size_t i = 0; i < dataset.size(); i++) { if (i % print_steps == 0) { std::cout << "#" << std::flush; } std::chrono::duration elapsed; auto &item = dataset[i]; // Load images auto stime = std::chrono::high_resolution_clock::now(); std::vector images = load_images(item); elapsed = std::chrono::high_resolution_clock::now() - stime; time_image += elapsed.count(); // Predict 2D poses stime = std::chrono::high_resolution_clock::now(); for (size_t i = 0; i < images.size(); i++) { cv::Mat &img = images[i]; cv::Mat rgb = utils_pipeline::bayer2rgb(img); images[i] = std::move(rgb); } auto poses_2d_all = kpt_model->predict(images); auto poses_2d_upd = utils_pipeline::update_keypoints( poses_2d_all, joint_names_2d, whole_body); elapsed = std::chrono::high_resolution_clock::now() - stime; time_pose2d += elapsed.count(); all_poses_2d.push_back(std::move(poses_2d_upd)); } std::cout << std::endl; // Calculate 3D poses [items, persons, joints, 4] std::vector>>> all_poses_3d; std::vector all_ids; std::string old_scene = ""; int old_id = -1; std::cout << "Calculating 3D poses: "; for (size_t i = 0; i < dataset.size(); i++) { if (i % print_steps == 0) { std::cout << "#" << std::flush; } std::chrono::duration elapsed; auto &item = dataset[i]; auto &poses_2d = all_poses_2d[i]; if (old_scene != item["scene"] || old_id + take_interval < item["index"]) { // Reset last poses if scene changes tri_model->reset(); old_scene = item["scene"]; } auto stime = std::chrono::high_resolution_clock::now(); std::vector cameras; for (size_t j = 0; j < item["cameras"].size(); j++) { auto &cam = item["cameras"][j]; Camera camera; camera.name = cam["name"].get(); camera.K = cam["K"].get, 3>>(); camera.DC = cam["DC"].get>(); camera.R = cam["R"].get, 3>>(); camera.T = cam["T"].get, 3>>(); camera.width = cam["width"].get(); camera.height = cam["height"].get(); camera.type = cam["type"].get(); cameras.push_back(camera); } std::array, 2> roomparams = { item["room_size"].get>(), item["room_center"].get>()}; auto poses_3d = tri_model->triangulate_poses(poses_2d, cameras, roomparams, joint_names_2d); elapsed = std::chrono::high_resolution_clock::now() - stime; time_pose3d += elapsed.count(); all_poses_3d.push_back(std::move(poses_3d)); all_ids.push_back(item["id"].get()); old_id = item["index"]; } std::cout << std::endl; // Print timing stats std::cout << "\nMetrics:" << std::endl; size_t warmup = 10; double avg_time_image = time_image / (time_count - warmup); double avg_time_pose2d = time_pose2d / (time_count - warmup); double avg_time_pose3d = time_pose3d / (time_count - warmup); double fps = 1.0 / (avg_time_pose2d + avg_time_pose3d); std::cout << "{\n" << " \"img_loading\": " << avg_time_image << ",\n" << " \"avg_time_2d\": " << avg_time_pose2d << ",\n" << " \"avg_time_3d\": " << avg_time_pose3d << ",\n" << " \"fps\": " << fps << "\n" << "}" << std::endl; tri_model->print_stats(); // Store the results as json json all_results; all_results["all_ids"] = all_ids; all_results["all_poses_2d"] = all_poses_2d; all_results["all_poses_3d"] = all_poses_3d; all_results["joint_names_2d"] = joint_names_2d; all_results["joint_names_3d"] = joint_names_2d; // Save the results std::string path_results = "/tmp/rpt/results.json"; write_file(path_results, all_results.dump(0)); return 0; }