Some code speedups.

This commit is contained in:
Daniel
2025-01-17 18:02:17 +01:00
parent 99368e5216
commit 97ea039f7d
4 changed files with 49 additions and 56 deletions

View File

@ -127,17 +127,17 @@ void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr m
if (msg->encoding == "mono8")
{
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
bayer_image = cv_ptr->image.clone();
bayer_image = cv_ptr->image;
}
else if (msg->encoding == "bayer_rggb8")
{
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
bayer_image = cv_ptr->image.clone();
bayer_image = cv_ptr->image;
}
else if (msg->encoding == "rgb8")
{
cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8");
cv::Mat color_image = cv_ptr->image.clone();
cv::Mat color_image = cv_ptr->image;
bayer_image = test_triangulate::rgb2bayer(color_image);
}
else
@ -157,7 +157,7 @@ void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr m
// Store in member variables with lock
{
std::lock_guard<std::mutex> lk(mutex);
this->last_input_image = bayer_image;
this->last_input_image = std::move(bayer_image);
this->last_input_time = time_stamp;
}
}
@ -171,17 +171,15 @@ void Rpt2DWrapperNode::callbackModel()
// Check if we have an image
cv::Mat local_image;
double local_timestamp = 0.0;
{
std::lock_guard<std::mutex> lk(mutex);
if (last_input_image.empty())
if (last_input_time == 0.0)
{
return;
}
local_image = last_input_image.clone();
local_image = std::move(last_input_image);
local_timestamp = last_input_time;
// “Consume” the last image so we dont process it again
last_input_image = cv::Mat();
last_input_time = 0.0;
}
@ -198,7 +196,7 @@ void Rpt2DWrapperNode::callbackModel()
auto &poses_2d = poses_2d_upd[0];
// Drop persons with no joints
std::vector<std::vector<std::array<float,3>>> valid_poses;
std::vector<std::vector<std::array<float, 3>>> valid_poses;
for (auto &person : poses_2d)
{
float sum_conf = 0.0;

View File

@ -191,33 +191,23 @@ namespace test_triangulate
for (int r = 0; r < img.rows; ++r)
{
const cv::Vec3b *imgRowPtr = img.ptr<cv::Vec3b>(r);
const uchar *imgData = img.ptr<uchar>(r);
uchar *bayerRowPtr = bayer.ptr<uchar>(r);
for (int c = 0; c < img.cols; ++c)
{
const cv::Vec3b &pixel = imgRowPtr[c];
int pixelIndex = 3 * c;
if ((r % 2 == 0) && (c % 2 == 0))
{
// Even row, even col => R
bayerRowPtr[c] = pixel[0];
}
else if ((r % 2 == 0) && (c % 2 == 1))
{
// Even row, odd col => G
bayerRowPtr[c] = pixel[1];
}
else if ((r % 2 == 1) && (c % 2 == 0))
{
// Odd row, even col => G
bayerRowPtr[c] = pixel[1];
}
else
{
// Odd row, odd col => B
bayerRowPtr[c] = pixel[2];
}
// Use faster bit operation instead of modulo+if
// Even row, even col => R = 0
// Even row, odd col => G = 1
// Odd row, even col => G = 1
// Odd row, odd col => B = 2
int row_mod = r & 1;
int col_mod = c & 1;
int component = row_mod + col_mod;
bayerRowPtr[c] = imgData[pixelIndex + component];
}
}

View File

@ -59,6 +59,8 @@ namespace utils_2d_pose
std::vector<std::vector<int64_t>> input_shapes;
std::vector<ONNXTensorElementDataType> input_types;
std::vector<std::string> output_names;
std::vector<const char *> input_name_ptrs;
std::vector<const char *> output_name_ptrs;
};
// =============================================================================================
@ -101,7 +103,7 @@ namespace utils_2d_pose
Ort::ThrowOnError(Ort::GetApi().CreateTensorRTProviderOptions(&tensorrt_options));
// 2) Configure your desired fields
// tensorrt_options->trt_fp16_enable = 1; // enable FP16
tensorrt_options->trt_fp16_enable = 1;
tensorrt_options->trt_engine_cache_enable = 1;
tensorrt_options->trt_engine_cache_path = "/RapidPoseTriangulation/data/trt_cache/";
@ -135,6 +137,20 @@ namespace utils_2d_pose
Ort::AllocatedStringPtr oname = session->GetOutputNameAllocated(i, allocator);
output_names.push_back(oname.get());
}
// Build array of input name C-strings
input_name_ptrs.resize(input_names.size());
for (size_t i = 0; i < input_names.size(); i++)
{
input_name_ptrs[i] = input_names[i].c_str();
}
// Build array of output name C-strings
output_name_ptrs.resize(output_names.size());
for (size_t i = 0; i < output_names.size(); i++)
{
output_name_ptrs[i] = output_names[i].c_str();
}
}
// =============================================================================================
@ -199,12 +215,12 @@ namespace utils_2d_pose
auto mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
auto shape = input_shapes[0];
input_tensors.push_back(
Ort::Value::CreateTensor<uint8_t>(
std::move(Ort::Value::CreateTensor<uint8_t>(
mem_info,
(uint8_t *)img.data,
img.data,
total_elems,
shape.data(),
shape.size()));
shape.size())));
auto outputs = call_model(input_tensors);
return outputs;
@ -219,20 +235,6 @@ namespace utils_2d_pose
throw std::runtime_error("Number of input tensors does not match model's input count.");
}
// Build array of input name C-strings
std::vector<const char *> input_name_ptrs(input_names.size());
for (size_t i = 0; i < input_names.size(); i++)
{
input_name_ptrs[i] = input_names[i].c_str();
}
// Build array of output name C-strings
std::vector<const char *> output_name_ptrs(output_names.size());
for (size_t i = 0; i < output_names.size(); i++)
{
output_name_ptrs[i] = output_names[i].c_str();
}
// Actually run the model, requesting all known outputs
return session->Run(
Ort::RunOptions{nullptr},

View File

@ -27,6 +27,7 @@ img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw"
pose_out_topic = "/poses/" + cam_id
last_input_image = None
last_input_time = 0.0
kpt_model = None
joint_names_2d = test_triangulate.joint_names_2d
@ -43,7 +44,7 @@ batch_poses = False
def callback_images(image_data):
global last_input_image, lock
global last_input_image, last_input_time, lock
# Convert into cv images from image string
if image_data.encoding == "bayer_rggb8":
@ -59,29 +60,31 @@ def callback_images(image_data):
time_stamp = image_data.header.stamp.sec + image_data.header.stamp.nanosec / 1.0e9
with lock:
last_input_image = (bayer_image, time_stamp)
last_input_image = bayer_image
last_input_time = time_stamp
# ==================================================================================================
def callback_model():
global last_input_image, kpt_model, lock
global last_input_image, last_input_time, kpt_model, lock
ptime = time.time()
if last_input_image is None:
time.sleep(0.001)
if last_input_time == 0.0:
time.sleep(0.0001)
return
# Collect inputs
images_2d = []
timestamps = []
with lock:
img = np.copy(last_input_image[0])
ts = last_input_image[1]
img = last_input_image
ts = last_input_time
images_2d.append(img)
timestamps.append(ts)
last_input_image = None
last_input_time = 0.0
# Predict 2D poses
images_2d = [test_triangulate.bayer2rgb(img) for img in images_2d]