Some code speedups.

This commit is contained in:
Daniel
2025-01-17 18:02:17 +01:00
parent 99368e5216
commit 97ea039f7d
4 changed files with 49 additions and 56 deletions

View File

@ -127,17 +127,17 @@ void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr m
if (msg->encoding == "mono8") if (msg->encoding == "mono8")
{ {
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding); cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
bayer_image = cv_ptr->image.clone(); bayer_image = cv_ptr->image;
} }
else if (msg->encoding == "bayer_rggb8") else if (msg->encoding == "bayer_rggb8")
{ {
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding); cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
bayer_image = cv_ptr->image.clone(); bayer_image = cv_ptr->image;
} }
else if (msg->encoding == "rgb8") else if (msg->encoding == "rgb8")
{ {
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8"); cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8");
cv::Mat color_image = cv_ptr->image.clone(); cv::Mat color_image = cv_ptr->image;
bayer_image = test_triangulate::rgb2bayer(color_image); bayer_image = test_triangulate::rgb2bayer(color_image);
} }
else else
@ -157,7 +157,7 @@ void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr m
// Store in member variables with lock // Store in member variables with lock
{ {
std::lock_guard<std::mutex> lk(mutex); std::lock_guard<std::mutex> lk(mutex);
this->last_input_image = bayer_image; this->last_input_image = std::move(bayer_image);
this->last_input_time = time_stamp; this->last_input_time = time_stamp;
} }
} }
@ -171,17 +171,15 @@ void Rpt2DWrapperNode::callbackModel()
// Check if we have an image // Check if we have an image
cv::Mat local_image; cv::Mat local_image;
double local_timestamp = 0.0; double local_timestamp = 0.0;
{ {
std::lock_guard<std::mutex> lk(mutex); std::lock_guard<std::mutex> lk(mutex);
if (last_input_image.empty()) if (last_input_time == 0.0)
{ {
return; return;
} }
local_image = last_input_image.clone(); local_image = std::move(last_input_image);
local_timestamp = last_input_time; local_timestamp = last_input_time;
// “Consume” the last image so we dont process it again
last_input_image = cv::Mat(); last_input_image = cv::Mat();
last_input_time = 0.0; last_input_time = 0.0;
} }
@ -198,7 +196,7 @@ void Rpt2DWrapperNode::callbackModel()
auto &poses_2d = poses_2d_upd[0]; auto &poses_2d = poses_2d_upd[0];
// Drop persons with no joints // Drop persons with no joints
std::vector<std::vector<std::array<float,3>>> valid_poses; std::vector<std::vector<std::array<float, 3>>> valid_poses;
for (auto &person : poses_2d) for (auto &person : poses_2d)
{ {
float sum_conf = 0.0; float sum_conf = 0.0;

View File

@ -191,33 +191,23 @@ namespace test_triangulate
for (int r = 0; r < img.rows; ++r) for (int r = 0; r < img.rows; ++r)
{ {
const cv::Vec3b *imgRowPtr = img.ptr<cv::Vec3b>(r); const uchar *imgData = img.ptr<uchar>(r);
uchar *bayerRowPtr = bayer.ptr<uchar>(r); uchar *bayerRowPtr = bayer.ptr<uchar>(r);
for (int c = 0; c < img.cols; ++c) for (int c = 0; c < img.cols; ++c)
{ {
const cv::Vec3b &pixel = imgRowPtr[c]; int pixelIndex = 3 * c;
if ((r % 2 == 0) && (c % 2 == 0)) // Use faster bit operation instead of modulo+if
{ // Even row, even col => R = 0
// Even row, even col => R // Even row, odd col => G = 1
bayerRowPtr[c] = pixel[0]; // Odd row, even col => G = 1
} // Odd row, odd col => B = 2
else if ((r % 2 == 0) && (c % 2 == 1)) int row_mod = r & 1;
{ int col_mod = c & 1;
// Even row, odd col => G int component = row_mod + col_mod;
bayerRowPtr[c] = pixel[1];
} bayerRowPtr[c] = imgData[pixelIndex + component];
else if ((r % 2 == 1) && (c % 2 == 0))
{
// Odd row, even col => G
bayerRowPtr[c] = pixel[1];
}
else
{
// Odd row, odd col => B
bayerRowPtr[c] = pixel[2];
}
} }
} }

View File

@ -59,6 +59,8 @@ namespace utils_2d_pose
std::vector<std::vector<int64_t>> input_shapes; std::vector<std::vector<int64_t>> input_shapes;
std::vector<ONNXTensorElementDataType> input_types; std::vector<ONNXTensorElementDataType> input_types;
std::vector<std::string> output_names; std::vector<std::string> output_names;
std::vector<const char *> input_name_ptrs;
std::vector<const char *> output_name_ptrs;
}; };
// ============================================================================================= // =============================================================================================
@ -101,7 +103,7 @@ namespace utils_2d_pose
Ort::ThrowOnError(Ort::GetApi().CreateTensorRTProviderOptions(&tensorrt_options)); Ort::ThrowOnError(Ort::GetApi().CreateTensorRTProviderOptions(&tensorrt_options));
// 2) Configure your desired fields // 2) Configure your desired fields
// tensorrt_options->trt_fp16_enable = 1; // enable FP16 tensorrt_options->trt_fp16_enable = 1;
tensorrt_options->trt_engine_cache_enable = 1; tensorrt_options->trt_engine_cache_enable = 1;
tensorrt_options->trt_engine_cache_path = "/RapidPoseTriangulation/data/trt_cache/"; tensorrt_options->trt_engine_cache_path = "/RapidPoseTriangulation/data/trt_cache/";
@ -135,6 +137,20 @@ namespace utils_2d_pose
Ort::AllocatedStringPtr oname = session->GetOutputNameAllocated(i, allocator); Ort::AllocatedStringPtr oname = session->GetOutputNameAllocated(i, allocator);
output_names.push_back(oname.get()); output_names.push_back(oname.get());
} }
// Build array of input name C-strings
input_name_ptrs.resize(input_names.size());
for (size_t i = 0; i < input_names.size(); i++)
{
input_name_ptrs[i] = input_names[i].c_str();
}
// Build array of output name C-strings
output_name_ptrs.resize(output_names.size());
for (size_t i = 0; i < output_names.size(); i++)
{
output_name_ptrs[i] = output_names[i].c_str();
}
} }
// ============================================================================================= // =============================================================================================
@ -199,12 +215,12 @@ namespace utils_2d_pose
auto mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); auto mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
auto shape = input_shapes[0]; auto shape = input_shapes[0];
input_tensors.push_back( input_tensors.push_back(
Ort::Value::CreateTensor<uint8_t>( std::move(Ort::Value::CreateTensor<uint8_t>(
mem_info, mem_info,
(uint8_t *)img.data, img.data,
total_elems, total_elems,
shape.data(), shape.data(),
shape.size())); shape.size())));
auto outputs = call_model(input_tensors); auto outputs = call_model(input_tensors);
return outputs; return outputs;
@ -219,20 +235,6 @@ namespace utils_2d_pose
throw std::runtime_error("Number of input tensors does not match model's input count."); throw std::runtime_error("Number of input tensors does not match model's input count.");
} }
// Build array of input name C-strings
std::vector<const char *> input_name_ptrs(input_names.size());
for (size_t i = 0; i < input_names.size(); i++)
{
input_name_ptrs[i] = input_names[i].c_str();
}
// Build array of output name C-strings
std::vector<const char *> output_name_ptrs(output_names.size());
for (size_t i = 0; i < output_names.size(); i++)
{
output_name_ptrs[i] = output_names[i].c_str();
}
// Actually run the model, requesting all known outputs // Actually run the model, requesting all known outputs
return session->Run( return session->Run(
Ort::RunOptions{nullptr}, Ort::RunOptions{nullptr},

View File

@ -27,6 +27,7 @@ img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw"
pose_out_topic = "/poses/" + cam_id pose_out_topic = "/poses/" + cam_id
last_input_image = None last_input_image = None
last_input_time = 0.0
kpt_model = None kpt_model = None
joint_names_2d = test_triangulate.joint_names_2d joint_names_2d = test_triangulate.joint_names_2d
@ -43,7 +44,7 @@ batch_poses = False
def callback_images(image_data): def callback_images(image_data):
global last_input_image, lock global last_input_image, last_input_time, lock
# Convert into cv images from image string # Convert into cv images from image string
if image_data.encoding == "bayer_rggb8": if image_data.encoding == "bayer_rggb8":
@ -59,29 +60,31 @@ def callback_images(image_data):
time_stamp = image_data.header.stamp.sec + image_data.header.stamp.nanosec / 1.0e9 time_stamp = image_data.header.stamp.sec + image_data.header.stamp.nanosec / 1.0e9
with lock: with lock:
last_input_image = (bayer_image, time_stamp) last_input_image = bayer_image
last_input_time = time_stamp
# ================================================================================================== # ==================================================================================================
def callback_model(): def callback_model():
global last_input_image, kpt_model, lock global last_input_image, last_input_time, kpt_model, lock
ptime = time.time() ptime = time.time()
if last_input_image is None: if last_input_time == 0.0:
time.sleep(0.001) time.sleep(0.0001)
return return
# Collect inputs # Collect inputs
images_2d = [] images_2d = []
timestamps = [] timestamps = []
with lock: with lock:
img = np.copy(last_input_image[0]) img = last_input_image
ts = last_input_image[1] ts = last_input_time
images_2d.append(img) images_2d.append(img)
timestamps.append(ts) timestamps.append(ts)
last_input_image = None last_input_image = None
last_input_time = 0.0
# Predict 2D poses # Predict 2D poses
images_2d = [test_triangulate.bayer2rgb(img) for img in images_2d] images_2d = [test_triangulate.bayer2rgb(img) for img in images_2d]