Some code speedups.

2025-01-17 18:02:17 +01:00
parent 99368e5216
commit 97ea039f7d
4 changed files with 49 additions and 56 deletions
--- a/ros/rpt2D_wrapper_cpp/src/rpt2D_wrapper.cpp
+++ b/ros/rpt2D_wrapper_cpp/src/rpt2D_wrapper.cpp
@ -127,17 +127,17 @@ void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr m
        if (msg->encoding == "mono8")
        {
            cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
-            bayer_image = cv_ptr->image.clone();
+            bayer_image = cv_ptr->image;
        }
        else if (msg->encoding == "bayer_rggb8")
        {
            cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, msg->encoding);
-            bayer_image = cv_ptr->image.clone();
+            bayer_image = cv_ptr->image;
        }
        else if (msg->encoding == "rgb8")
        {
            cv_bridge::CvImageConstPtr cv_ptr = cv_bridge::toCvShare(msg, "rgb8");
-            cv::Mat color_image = cv_ptr->image.clone();
+            cv::Mat color_image = cv_ptr->image;
            bayer_image = test_triangulate::rgb2bayer(color_image);
        }
        else
@ -157,7 +157,7 @@ void Rpt2DWrapperNode::callbackImages(const sensor_msgs::msg::Image::SharedPtr m
    // Store in member variables with lock
    {
        std::lock_guard<std::mutex> lk(mutex);
-        this->last_input_image = bayer_image;
+        this->last_input_image = std::move(bayer_image);
        this->last_input_time = time_stamp;
    }
 }
@ -171,17 +171,15 @@ void Rpt2DWrapperNode::callbackModel()
    // Check if we have an image
    cv::Mat local_image;
    double local_timestamp = 0.0;
-
    {
        std::lock_guard<std::mutex> lk(mutex);
-        if (last_input_image.empty())
+        if (last_input_time == 0.0)
        {
            return;
        }
-        local_image = last_input_image.clone();
+        local_image = std::move(last_input_image);
        local_timestamp = last_input_time;

-        // “Consume” the last image so we don’t process it again
        last_input_image = cv::Mat();
        last_input_time = 0.0;
    }
--- a/ros/rpt2D_wrapper_cpp/src/test_triangulate.hpp
+++ b/ros/rpt2D_wrapper_cpp/src/test_triangulate.hpp
@ -191,33 +191,23 @@ namespace test_triangulate

        for (int r = 0; r < img.rows; ++r)
        {
-            const cv::Vec3b *imgRowPtr = img.ptr<cv::Vec3b>(r);
+            const uchar *imgData = img.ptr<uchar>(r);
            uchar *bayerRowPtr = bayer.ptr<uchar>(r);

            for (int c = 0; c < img.cols; ++c)
            {
-                const cv::Vec3b &pixel = imgRowPtr[c];
+                int pixelIndex = 3 * c;

-                if ((r % 2 == 0) && (c % 2 == 0))
-                {
-                    // Even row, even col => R
-                    bayerRowPtr[c] = pixel[0];
-                }
-                else if ((r % 2 == 0) && (c % 2 == 1))
-                {
-                    // Even row, odd col => G
-                    bayerRowPtr[c] = pixel[1];
-                }
-                else if ((r % 2 == 1) && (c % 2 == 0))
-                {
-                    // Odd row, even col => G
-                    bayerRowPtr[c] = pixel[1];
-                }
-                else
-                {
-                    // Odd row, odd col => B
-                    bayerRowPtr[c] = pixel[2];
-                }
+                // Use faster bit operation instead of modulo+if
+                // Even row, even col => R = 0
+                // Even row, odd col => G = 1
+                // Odd row, even col => G = 1
+                // Odd row, odd col => B = 2
+                int row_mod = r & 1;
+                int col_mod = c & 1;
+                int component = row_mod + col_mod;
+
+                bayerRowPtr[c] = imgData[pixelIndex + component];
            }
        }

--- a/ros/rpt2D_wrapper_cpp/src/utils_2d_pose.hpp
+++ b/ros/rpt2D_wrapper_cpp/src/utils_2d_pose.hpp
@ -59,6 +59,8 @@ namespace utils_2d_pose
        std::vector<std::vector<int64_t>> input_shapes;
        std::vector<ONNXTensorElementDataType> input_types;
        std::vector<std::string> output_names;
+        std::vector<const char *> input_name_ptrs;
+        std::vector<const char *> output_name_ptrs;
    };

    // =============================================================================================
@ -101,7 +103,7 @@ namespace utils_2d_pose
        Ort::ThrowOnError(Ort::GetApi().CreateTensorRTProviderOptions(&tensorrt_options));

        // 2) Configure your desired fields
-        // tensorrt_options->trt_fp16_enable = 1; // enable FP16
+        tensorrt_options->trt_fp16_enable = 1;
        tensorrt_options->trt_engine_cache_enable = 1;
        tensorrt_options->trt_engine_cache_path = "/RapidPoseTriangulation/data/trt_cache/";

@ -135,6 +137,20 @@ namespace utils_2d_pose
            Ort::AllocatedStringPtr oname = session->GetOutputNameAllocated(i, allocator);
            output_names.push_back(oname.get());
        }
+
+        // Build array of input name C-strings
+        input_name_ptrs.resize(input_names.size());
+        for (size_t i = 0; i < input_names.size(); i++)
+        {
+            input_name_ptrs[i] = input_names[i].c_str();
+        }
+
+        // Build array of output name C-strings
+        output_name_ptrs.resize(output_names.size());
+        for (size_t i = 0; i < output_names.size(); i++)
+        {
+            output_name_ptrs[i] = output_names[i].c_str();
+        }
    }

    // =============================================================================================
@ -199,12 +215,12 @@ namespace utils_2d_pose
        auto mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
        auto shape = input_shapes[0];
        input_tensors.push_back(
-            Ort::Value::CreateTensor<uint8_t>(
+            std::move(Ort::Value::CreateTensor<uint8_t>(
                mem_info,
-                (uint8_t *)img.data,
+                img.data,
                total_elems,
                shape.data(),
-                shape.size()));
+                shape.size())));

        auto outputs = call_model(input_tensors);
        return outputs;
@ -219,20 +235,6 @@ namespace utils_2d_pose
            throw std::runtime_error("Number of input tensors does not match model's input count.");
        }

-        // Build array of input name C-strings
-        std::vector<const char *> input_name_ptrs(input_names.size());
-        for (size_t i = 0; i < input_names.size(); i++)
-        {
-            input_name_ptrs[i] = input_names[i].c_str();
-        }
-
-        // Build array of output name C-strings
-        std::vector<const char *> output_name_ptrs(output_names.size());
-        for (size_t i = 0; i < output_names.size(); i++)
-        {
-            output_name_ptrs[i] = output_names[i].c_str();
-        }
-
        // Actually run the model, requesting all known outputs
        return session->Run(
            Ort::RunOptions{nullptr},
--- a/ros/rpt2D_wrapper_py/rpt2D_wrapper_py/rpt2D_wrapper.py
+++ b/ros/rpt2D_wrapper_py/rpt2D_wrapper_py/rpt2D_wrapper.py
@ -27,6 +27,7 @@ img_input_topic = "/" + cam_id + "/pylon_ros2_camera_node/image_raw"
 pose_out_topic = "/poses/" + cam_id

 last_input_image = None
+last_input_time = 0.0
 kpt_model = None
 joint_names_2d = test_triangulate.joint_names_2d

@ -43,7 +44,7 @@ batch_poses = False


 def callback_images(image_data):
-    global last_input_image, lock
+    global last_input_image, last_input_time, lock

    # Convert into cv images from image string
    if image_data.encoding == "bayer_rggb8":
@ -59,29 +60,31 @@ def callback_images(image_data):
    time_stamp = image_data.header.stamp.sec + image_data.header.stamp.nanosec / 1.0e9

    with lock:
-        last_input_image = (bayer_image, time_stamp)
+        last_input_image = bayer_image
+        last_input_time = time_stamp


 # ==================================================================================================


 def callback_model():
-    global last_input_image, kpt_model, lock
+    global last_input_image, last_input_time, kpt_model, lock

    ptime = time.time()
-    if last_input_image is None:
-        time.sleep(0.001)
+    if last_input_time == 0.0:
+        time.sleep(0.0001)
        return

    # Collect inputs
    images_2d = []
    timestamps = []
    with lock:
-        img = np.copy(last_input_image[0])
-        ts = last_input_image[1]
+        img = last_input_image
+        ts = last_input_time
        images_2d.append(img)
        timestamps.append(ts)
        last_input_image = None
+        last_input_time = 0.0

    # Predict 2D poses
    images_2d = [test_triangulate.bayer2rgb(img) for img in images_2d]