From 320c75d6bc89c6c9e0dd9cd61dd454e4d1d4712a Mon Sep 17 00:00:00 2001 From: Jaiveer Singh Date: Tue, 10 Dec 2024 17:49:24 -0800 Subject: [PATCH] Isaac ROS 3.2 --- README.md | 14 +- isaac_ros_centerpose/CMakeLists.txt | 13 + isaac_ros_centerpose/package.xml | 3 +- .../test/centerpose_decoder_node_test.cpp | 97 +++++ isaac_ros_dope/CMakeLists.txt | 6 + isaac_ros_dope/config/dope_config.yaml | 10 +- isaac_ros_dope/config/dope_node.yaml | 18 +- isaac_ros_dope/config/isaac_ros_dope.yaml | 30 ++ .../isaac_ros_dope/dope_decoder_node.hpp | 28 ++ .../launch/isaac_ros_dope_core.launch.py | 23 +- .../launch/isaac_ros_dope_tensor_rt.launch.py | 24 +- .../launch/isaac_ros_dope_triton.launch.py | 24 +- isaac_ros_dope/package.xml | 9 +- isaac_ros_dope/src/dope_decoder_node.cpp | 145 ++++++-- isaac_ros_dope/test/isaac_ros_dope_pol.py | 30 +- .../pose_estimation_0/camera_info.json | 51 +++ isaac_ros_foundationpose/CMakeLists.txt | 11 + .../config/nitros_foundationpose_node.yaml | 12 +- .../foundationpose_node.hpp | 1 + .../launch/isaac_ros_foundationpose.launch.py | 11 +- .../isaac_ros_foundationpose_core.launch.py | 31 +- .../isaac_ros_foundationpose_hawk.launch.py | 16 +- ...ros_foundationpose_hawk_tracking.launch.py | 16 +- ...aac_ros_foundationpose_isaac_sim.launch.py | 20 +- ...oundationpose_isaac_sim_tracking.launch.py | 22 +- ...aac_ros_foundationpose_realsense.launch.py | 14 +- ...oundationpose_realsense_tracking.launch.py | 14 +- ...ros_foundationpose_tracking_core.launch.py | 37 +- isaac_ros_foundationpose/package.xml | 2 +- .../rviz/foundationpose_hawk.rviz | 2 +- .../rviz/foundationpose_hawk_tracking.rviz | 2 +- .../src/detection2_d_array_filter.cpp | 86 +++++ .../src/detection2_d_to_mask.cpp | 74 +--- .../src/foundationpose_node.cpp | 51 +++ .../gxf_isaac_centerpose/CMakeLists.txt | 6 + .../gxf_isaac_centerpose/package.xml | 2 +- .../gxf_isaac_dope/CMakeLists.txt | 6 + .../gxf_isaac_dope/gxf/dope/dope_decoder.cpp | 319 +++++++++------- .../gxf_isaac_dope/gxf/dope/dope_decoder.hpp | 36 +- .../gxf_isaac_dope/gxf/dope/dope_ext.cpp | 11 +- .../gxf_isaac_dope/package.xml | 3 +- .../gxf_isaac_foundationpose/CMakeLists.txt | 6 + .../foundationpose/foundationpose_render.cpp | 339 +++++++++--------- .../foundationpose/foundationpose_render.cu | 135 ++++++- .../foundationpose_render.cu.hpp | 5 + .../foundationpose/foundationpose_render.hpp | 8 +- .../foundationpose_sampling.cpp | 59 ++- .../foundationpose_sampling.hpp | 2 + .../gxf_isaac_foundationpose/package.xml | 2 +- isaac_ros_pose_proc/CMakeLists.txt | 109 ++++++ isaac_ros_pose_proc/package.xml | 51 +++ .../scripts/pose_distribution_visualizer.py | 87 +++++ .../src/averaging_filter_node.cpp | 166 +++++++++ .../src/detection3_d_array_to_pose_node.cpp | 84 +++++ .../src/distance_selector_node.cpp | 133 +++++++ .../src/outlier_filter_node.cpp | 141 ++++++++ .../src/pose_array_to_pose_node.cpp | 62 ++++ .../src/stability_filter_node.cpp | 140 ++++++++ .../test/isaac_ros_averaging_filter_test.py | 137 +++++++ .../test/isaac_ros_distance_selector_test.py | 111 ++++++ .../test/isaac_ros_outlier_filter_test.py | 148 ++++++++ .../test/isaac_ros_stability_filter_test.py | 143 ++++++++ test/models/dope_ketchup_pol.onnx | 4 +- 63 files changed, 2850 insertions(+), 552 deletions(-) create mode 100644 isaac_ros_centerpose/test/centerpose_decoder_node_test.cpp create mode 100644 isaac_ros_dope/config/isaac_ros_dope.yaml create mode 100644 isaac_ros_dope/test/test_cases/pose_estimation_0/camera_info.json create mode 100644 isaac_ros_foundationpose/src/detection2_d_array_filter.cpp create mode 100644 isaac_ros_pose_proc/CMakeLists.txt create mode 100644 isaac_ros_pose_proc/package.xml create mode 100755 isaac_ros_pose_proc/scripts/pose_distribution_visualizer.py create mode 100644 isaac_ros_pose_proc/src/averaging_filter_node.cpp create mode 100644 isaac_ros_pose_proc/src/detection3_d_array_to_pose_node.cpp create mode 100644 isaac_ros_pose_proc/src/distance_selector_node.cpp create mode 100644 isaac_ros_pose_proc/src/outlier_filter_node.cpp create mode 100644 isaac_ros_pose_proc/src/pose_array_to_pose_node.cpp create mode 100644 isaac_ros_pose_proc/src/stability_filter_node.cpp create mode 100644 isaac_ros_pose_proc/test/isaac_ros_averaging_filter_test.py create mode 100644 isaac_ros_pose_proc/test/isaac_ros_distance_selector_test.py create mode 100644 isaac_ros_pose_proc/test/isaac_ros_outlier_filter_test.py create mode 100644 isaac_ros_pose_proc/test/isaac_ros_stability_filter_test.py diff --git a/README.md b/README.md index 510f649..39e39e2 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ the 3D pose of an object and distance for navigation or manipulation. `isaac_ros_foundationpose` is used in a graph of nodes to estimate the pose of a novel object using 3D bounding cuboid dimensions. It’s developed on top of -[FoundationPose](https://github.com/NVlabs/FoundationPose) model, which is +[FoundationPose](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/isaac/models/foundationpose) model, which is a pre-trained deep learning model developed by NVLabs. FoundationPose is capable for both pose estimation and tracking on unseen objects without requiring fine-tuning, and its accuracy outperforms existing state-of-art methods. @@ -81,11 +81,11 @@ which can also be found at [Isaac ROS DNN Inference](https://github.com/NVIDIA-I ## Performance -| Sample Graph

| Input Size

| AGX Orin

| Orin NX

| Orin Nano 8GB

| x86_64 w/ RTX 4060 Ti

| x86_64 w/ RTX 4090

| -|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [FoundationPose Pose Estimation Node](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_foundationpose_benchmark/scripts/isaac_ros_foundationpose_node.py)



| 720p



| [1.72 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_foundationpose_node-agx_orin.json)


690 ms @ 30Hz

| –



| –



| –



| [7.02 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_foundationpose_node-x86_4090.json)


170 ms @ 30Hz

| -| [DOPE Pose Estimation Graph](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_dope_benchmark/scripts/isaac_ros_dope_graph.py)



| VGA



| [41.3 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-agx_orin.json)


42 ms @ 30Hz

| [17.5 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-orin_nx.json)


76 ms @ 30Hz

| –



| [85.2 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-nuc_4060ti.json)


24 ms @ 30Hz

| [199 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-x86_4090.json)


14 ms @ 30Hz

| -| [Centerpose Pose Estimation Graph](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_centerpose_benchmark/scripts/isaac_ros_centerpose_graph.py)



| VGA



| [36.3 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-agx_orin.json)


4.8 ms @ 30Hz

| [19.7 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-orin_nx.json)


4.9 ms @ 30Hz

| [13.8 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-orin_nano.json)


7.4 ms @ 30Hz

| [50.2 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-nuc_4060ti.json)


23 ms @ 30Hz

| [50.2 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-x86_4090.json)


20 ms @ 30Hz

| +| Sample Graph

| Input Size

| AGX Orin

| Orin NX

| Orin Nano 8GB

| x86_64 w/ RTX 4090

| +|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [FoundationPose Pose Estimation Node](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_foundationpose_benchmark/scripts/isaac_ros_foundationpose_node.py)



| 720p



| [1.72 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_foundationpose_node-agx_orin.json)


690 ms @ 30Hz

| –



| –



| [9.61 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_foundationpose_node-x86-4090.json)


110 ms @ 30Hz

| +| [DOPE Pose Estimation Graph](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_dope_benchmark/scripts/isaac_ros_dope_graph.py)



| VGA



| [27.7 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-agx_orin.json)


16 ms @ 30Hz

| [17.8 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-orin_nx.json)


14 ms @ 30Hz

| –



| [187 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-x86-4090.json)


7.8 ms @ 30Hz

| +| [Centerpose Pose Estimation Graph](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_centerpose_benchmark/scripts/isaac_ros_centerpose_graph.py)



| VGA



| [47.1 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-agx_orin.json)


6.5 ms @ 30Hz

| [30.0 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-orin_nx.json)


50 ms @ 30Hz

| [19.9 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-orin_nano.json)


28 ms @ 30Hz

| [50.2 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-x86-4090.json)


12 ms @ 30Hz

| --- @@ -115,4 +115,4 @@ Please visit the [Isaac ROS Documentation](https://nvidia-isaac-ros.github.io/re ## Latest -Update 2024-09-26: Update for ZED compatibility +Update 2024-12-10: Added pose estimate post-processing utilities diff --git a/isaac_ros_centerpose/CMakeLists.txt b/isaac_ros_centerpose/CMakeLists.txt index b6ed263..8785a05 100644 --- a/isaac_ros_centerpose/CMakeLists.txt +++ b/isaac_ros_centerpose/CMakeLists.txt @@ -49,6 +49,13 @@ if(BUILD_TESTING) find_package(ament_lint_auto REQUIRED) ament_lint_auto_find_test_dependencies() + # Gtest for decoder_node + ament_add_gtest(centerpose_decoder_node_test test/centerpose_decoder_node_test.cpp) + target_link_libraries(centerpose_decoder_node_test centerpose_decoder_node) + target_include_directories(centerpose_decoder_node_test PUBLIC include/isaac_ros_centerpose/) + target_include_directories(centerpose_decoder_node_test PUBLIC /usr/src/googletest/googlemock/include/) + ament_target_dependencies(centerpose_decoder_node_test rclcpp) + ament_target_dependencies(centerpose_decoder_node_test isaac_ros_nitros) # The FindPythonInterp and FindPythonLibs modules are removed if(POLICY CMP0148) cmake_policy(SET CMP0148 OLD) @@ -59,4 +66,10 @@ if(BUILD_TESTING) add_launch_test(test/test_centerpose_pol_triton.py TIMEOUT "600") endif() + +# Embed versioning information into installed files +ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common) +include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake") +generate_version_info(${PROJECT_NAME}) + ament_auto_package(INSTALL_TO_SHARE config launch) diff --git a/isaac_ros_centerpose/package.xml b/isaac_ros_centerpose/package.xml index f9ba1dd..c706972 100644 --- a/isaac_ros_centerpose/package.xml +++ b/isaac_ros_centerpose/package.xml @@ -21,7 +21,7 @@ isaac_ros_centerpose - 3.1.0 + 3.2.0 CenterPose: Pose Estimation using Deep Learning Isaac ROS Maintainers @@ -63,6 +63,7 @@ ament_lint_common isaac_ros_test sensor_msgs_py + ament_cmake_gtest ament_cmake diff --git a/isaac_ros_centerpose/test/centerpose_decoder_node_test.cpp b/isaac_ros_centerpose/test/centerpose_decoder_node_test.cpp new file mode 100644 index 0000000..f8dff32 --- /dev/null +++ b/isaac_ros_centerpose/test/centerpose_decoder_node_test.cpp @@ -0,0 +1,97 @@ +// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include "centerpose_decoder_node.hpp" +#include "rclcpp/rclcpp.hpp" + +// Objective: to cover code lines where exceptions are thrown +// Approach: send Invalid Arguments for node parameters to trigger the exception + + +TEST(centerpose_decoder_node_test, test_invalid_output_field_size) +{ + rclcpp::init(0, nullptr); + rclcpp::NodeOptions options; + EXPECT_THROW( + { + try { + nvidia::isaac_ros::centerpose::CenterPoseDecoderNode centerpose_decoder_node(options); + } catch (const std::invalid_argument & e) { + EXPECT_THAT(e.what(), testing::HasSubstr("Error: received invalid output field size")); + throw; + } catch (const rclcpp::exceptions::InvalidParameterValueException & e) { + EXPECT_THAT(e.what(), testing::HasSubstr("No parameter value set")); + throw; + } + }, std::invalid_argument); + rclcpp::shutdown(); +} + +TEST(centerpose_decoder_node_test, test_cuboid_scaling_factor) +{ + rclcpp::init(0, nullptr); + rclcpp::NodeOptions options; + options.append_parameter_override("output_field_size", std::vector{128, 128}); + options.append_parameter_override("cuboid_scaling_factor", 0.0); + EXPECT_THROW( + { + try { + nvidia::isaac_ros::centerpose::CenterPoseDecoderNode centerpose_decoder_node(options); + } catch (const std::invalid_argument & e) { + EXPECT_THAT( + e.what(), + testing::HasSubstr("Error: received a less than or equal to zero cuboid scaling factor")); + throw; + } catch (const rclcpp::exceptions::InvalidParameterValueException & e) { + EXPECT_THAT(e.what(), testing::HasSubstr("No parameter value set")); + throw; + } + }, std::invalid_argument); + rclcpp::shutdown(); +} + +TEST(centerpose_decoder_node_test, test_score_threshold) +{ + rclcpp::init(0, nullptr); + rclcpp::NodeOptions options; + options.append_parameter_override("output_field_size", std::vector{128, 128}); + options.append_parameter_override("cuboid_scaling_factor", 1.0); + options.append_parameter_override("score_threshold", 1.0); + EXPECT_THROW( + { + try { + nvidia::isaac_ros::centerpose::CenterPoseDecoderNode centerpose_decoder_node(options); + } catch (const std::invalid_argument & e) { + EXPECT_THAT( + e.what(), + testing::HasSubstr("Error: received score threshold greater or equal to 1.0")); + throw; + } catch (const rclcpp::exceptions::InvalidParameterValueException & e) { + EXPECT_THAT(e.what(), testing::HasSubstr("No parameter value set")); + throw; + } + }, std::invalid_argument); + rclcpp::shutdown(); +} + + +int main(int argc, char ** argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/isaac_ros_dope/CMakeLists.txt b/isaac_ros_dope/CMakeLists.txt index b7d0ffe..18a81fd 100644 --- a/isaac_ros_dope/CMakeLists.txt +++ b/isaac_ros_dope/CMakeLists.txt @@ -52,4 +52,10 @@ if(BUILD_TESTING) add_launch_test(test/isaac_ros_dope_pol.py TIMEOUT "600") endif() + +# Embed versioning information into installed files +ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common) +include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake") +generate_version_info(${PROJECT_NAME}) + ament_auto_package(INSTALL_TO_SHARE config launch) diff --git a/isaac_ros_dope/config/dope_config.yaml b/isaac_ros_dope/config/dope_config.yaml index 1bd5802..8ae4242 100644 --- a/isaac_ros_dope/config/dope_config.yaml +++ b/isaac_ros_dope/config/dope_config.yaml @@ -58,12 +58,4 @@ dope: "Peaches" : [ 5.7781000137329102, 7.0961999893188477, 6.5925998687744141 ], "GreenBeans" : [ 5.758699893951416, 7.0608000755310059, 6.5732002258300781 ], "PeasAndCarrots" : [ 5.8512001037597656, 7.0636000633239746, 6.5918002128601074 ] - } - - # 9 element camera matrix (using default from Ketchup demo) - # Taken from: https://github.com/andrewyguo/dope_training/blob/master/inference/config/camera_info.yaml - camera_matrix: [ - 364.16501736, 0.0, 121.36296296, - 0.0, 364.16501736, 121.36296296, - 0.0, 0.0, 1.0 - ] + } \ No newline at end of file diff --git a/isaac_ros_dope/config/dope_node.yaml b/isaac_ros_dope/config/dope_node.yaml index e6b7b3c..23753aa 100644 --- a/isaac_ros_dope/config/dope_node.yaml +++ b/isaac_ros_dope/config/dope_node.yaml @@ -22,28 +22,36 @@ components: type: nvidia::gxf::DoubleBufferReceiver parameters: capacity: 12 -- name: posearray_out +- name: detection3darray_out type: nvidia::gxf::DoubleBufferTransmitter parameters: capacity: 12 +- name: camera_model_input + type: nvidia::gxf::DoubleBufferReceiver + parameters: + capacity: 12 - name: allocator type: nvidia::gxf::UnboundedAllocator - name: dope_decoder type: nvidia::isaac_ros::dope::DopeDecoder parameters: tensorlist_receiver: tensorlist_in - posearray_transmitter: posearray_out + camera_model_input: camera_model_input + detection3darray_transmitter: detection3darray_out allocator: allocator object_dimensions: [] - camera_matrix: [] object_name: "" - type: nvidia::gxf::MessageAvailableSchedulingTerm parameters: receiver: tensorlist_in min_size: 1 +- type: nvidia::gxf::MessageAvailableSchedulingTerm + parameters: + receiver: camera_model_input + min_size: 1 - type: nvidia::gxf::DownstreamReceptiveSchedulingTerm parameters: - transmitter: posearray_out + transmitter: detection3darray_out min_size: 1 --- name: sink @@ -65,7 +73,7 @@ components: - name: edge0 type: nvidia::gxf::Connection parameters: - source: dope_decoder/posearray_out + source: dope_decoder/detection3darray_out target: sink/signal --- components: diff --git a/isaac_ros_dope/config/isaac_ros_dope.yaml b/isaac_ros_dope/config/isaac_ros_dope.yaml new file mode 100644 index 0000000..62999e2 --- /dev/null +++ b/isaac_ros_dope/config/isaac_ros_dope.yaml @@ -0,0 +1,30 @@ +%YAML 1.2 +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +--- +id: [0xb9a27f9a9c9b3c40, 0xf7620fa5914c9644] +name: DopeDecoder +version: 1.0.0 +components: + - id: [0x03022c23f899e397, 0xaf817e602e657ba6] + type: nvidia::isaac_ros::dope::DopeDecoder + input_output_groups: + - input_keys: [tensorlist_receiver, camera_model_input] + output_keys: [detection3darray_transmitter] + input_format_keys: [] + output_format_keys: [] + supported_formats: + - platforms: [any] + details: + - input_formats: [nitros_tensor_list_nchw_rgb_f32, nitros_camera_info] + output_formats: [nitros_detection3_d_array] + costs: + throughput: 10bytes/s + latency: 10ms + power: 100J + accuracy: 100% diff --git a/isaac_ros_dope/include/isaac_ros_dope/dope_decoder_node.hpp b/isaac_ros_dope/include/isaac_ros_dope/dope_decoder_node.hpp index 8831240..0b886c1 100644 --- a/isaac_ros_dope/include/isaac_ros_dope/dope_decoder_node.hpp +++ b/isaac_ros_dope/include/isaac_ros_dope/dope_decoder_node.hpp @@ -18,6 +18,7 @@ #ifndef ISAAC_ROS_DOPE__DOPE_DECODER_NODE_HPP_ #define ISAAC_ROS_DOPE__DOPE_DECODER_NODE_HPP_ +#include #include #include @@ -25,6 +26,7 @@ #include "isaac_ros_tensor_list_interfaces/msg/tensor_list.hpp" #include "isaac_ros_nitros/nitros_node.hpp" #include "rclcpp/rclcpp.hpp" +#include "tf2_ros/transform_broadcaster.h" namespace nvidia { @@ -48,6 +50,8 @@ class DopeDecoderNode : public nitros::NitrosNode void postLoadGraphCallback() override; + void DopeDecoderDetectionCallback(const gxf_context_t context, nitros::NitrosTypeBase & msg); + private: // The name of the YAML configuration file const std::string configuration_file_; @@ -55,11 +59,35 @@ class DopeDecoderNode : public nitros::NitrosNode // The class name of the object we're locating const std::string object_name_; + // The frame name for TF publishing + const std::string tf_frame_name_; + + // Boolean value indicating option to pubishing to TF tree + const bool enable_tf_publishing_; + + // The minimum value of a peak in a belief map + const double map_peak_threshold_; + + // The maximum angle threshold for affinity mapping of corners to centroid + const double affinity_map_angle_threshold_; + + // Double indicating that dope outputs pose rotated by N degrees along y axis + const double rotation_y_axis_; + + // Double indicating that dope outputs pose rotated by N degrees along x axis + const double rotation_x_axis_; + + // Double indicating that dope outputs pose rotated by N degrees along z axis + const double rotation_z_axis_; + // The dimensions of the cuboid around the object we're locating std::vector object_dimensions_; // The camera matrix used to capture the input images std::vector camera_matrix_; + + // The transform broadcaster for when TF publishing for poses is enabled + std::unique_ptr tf_broadcaster_; }; } // namespace dope diff --git a/isaac_ros_dope/launch/isaac_ros_dope_core.launch.py b/isaac_ros_dope/launch/isaac_ros_dope_core.launch.py index 7155e3c..1143e44 100644 --- a/isaac_ros_dope/launch/isaac_ros_dope_core.launch.py +++ b/isaac_ros_dope/launch/isaac_ros_dope_core.launch.py @@ -45,6 +45,8 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl # DOPE Decoder parameters object_name = LaunchConfiguration('object_name') + enable_tf_publishing = LaunchConfiguration('enable_tf_publishing') + map_peak_threshold = LaunchConfiguration('map_peak_threshold') return { 'dope_inference_node': ComposableNode( @@ -70,9 +72,12 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl plugin='nvidia::isaac_ros::dope::DopeDecoderNode', parameters=[{ 'object_name': object_name, + 'enable_tf_publishing': enable_tf_publishing, + 'map_peak_threshold': map_peak_threshold, }], remappings=[('belief_map_array', 'tensor_sub'), - ('dope/pose_array', 'poses')] + ('dope/detections', 'detections'), + ('camera_info', '/dope_encoder/crop/camera_info')] ) } @@ -91,19 +96,19 @@ def get_launch_actions(interface_specs: Dict[str, Any]) -> \ return { 'network_image_width': DeclareLaunchArgument( 'network_image_width', - default_value='640', + default_value='1280', description='The input image width that the network expects'), 'network_image_height': DeclareLaunchArgument( 'network_image_height', - default_value='480', + default_value='720', description='The input image height that the network expects'), 'encoder_image_mean': DeclareLaunchArgument( 'encoder_image_mean', - default_value='[0.5, 0.5, 0.5]', + default_value='[0.485, 0.456, 0.406]', description='The mean for image normalization'), 'encoder_image_stddev': DeclareLaunchArgument( 'encoder_image_stddev', - default_value='[0.5, 0.5, 0.5]', + default_value='[0.229, 0.224, 0.225]', description='The standard deviation for image normalization'), 'model_file_path': DeclareLaunchArgument( 'model_file_path', @@ -147,10 +152,18 @@ def get_launch_actions(interface_specs: Dict[str, Any]) -> \ 'object_name', default_value='Ketchup', description='The object class that the DOPE network is detecting'), + 'map_peak_threshold': DeclareLaunchArgument( + 'map_peak_threshold', + default_value='0.1', + description='The minimum value of a peak in a DOPE belief map'), 'force_engine_update': DeclareLaunchArgument( 'force_engine_update', default_value='False', description='Whether TensorRT should update the TensorRT engine file or not'), + 'enable_tf_publishing': DeclareLaunchArgument( + 'enable_tf_publishing', + default_value='False', + description='Whether Dope Decoder will broadcast poses to the TF tree or not'), 'dope_encoder_launch': IncludeLaunchDescription( PythonLaunchDescriptionSource( [os.path.join(encoder_dir, 'launch', 'dnn_image_encoder.launch.py')] diff --git a/isaac_ros_dope/launch/isaac_ros_dope_tensor_rt.launch.py b/isaac_ros_dope/launch/isaac_ros_dope_tensor_rt.launch.py index 5c3231a..15c16a8 100644 --- a/isaac_ros_dope/launch/isaac_ros_dope_tensor_rt.launch.py +++ b/isaac_ros_dope/launch/isaac_ros_dope_tensor_rt.launch.py @@ -42,19 +42,19 @@ def generate_launch_description(): description='The input image height'), DeclareLaunchArgument( 'network_image_width', - default_value='640', + default_value='1280', description='The input image width that the network expects'), DeclareLaunchArgument( 'network_image_height', - default_value='480', + default_value='720', description='The input image height that the network expects'), DeclareLaunchArgument( 'encoder_image_mean', - default_value='[0.5, 0.5, 0.5]', + default_value='[0.485, 0.456, 0.406]', description='The mean for image normalization'), DeclareLaunchArgument( 'encoder_image_stddev', - default_value='[0.5, 0.5, 0.5]', + default_value='[0.229, 0.224, 0.225]', description='The standard deviation for image normalization'), DeclareLaunchArgument( 'model_file_path', @@ -100,6 +100,14 @@ def generate_launch_description(): 'force_engine_update', default_value='False', description='Whether TensorRT should update the TensorRT engine file or not'), + DeclareLaunchArgument( + 'map_peak_threshold', + default_value='0.1', + description='The minimum value of a peak in a DOPE belief map'), + DeclareLaunchArgument( + 'enable_tf_publishing', + default_value='False', + description='Whether Dope Decoder will broadcast poses to the TF tree or not') ] # DNN Image Encoder parameters @@ -124,6 +132,8 @@ def generate_launch_description(): # DOPE Decoder parameters object_name = LaunchConfiguration('object_name') + enable_tf_publishing = LaunchConfiguration('enable_tf_publishing') + map_peak_threshold = LaunchConfiguration('map_peak_threshold') encoder_dir = get_package_share_directory('isaac_ros_dnn_image_encoder') dope_encoder_launch = IncludeLaunchDescription( @@ -170,9 +180,13 @@ def generate_launch_description(): plugin='nvidia::isaac_ros::dope::DopeDecoderNode', parameters=[{ 'object_name': object_name, + 'enable_tf_publishing': enable_tf_publishing, + 'map_peak_threshold': map_peak_threshold, }], remappings=[('belief_map_array', 'tensor_sub'), - ('dope/pose_array', 'poses')]) + ('dope/detections', 'detections'), + ('camera_info', '/dope_encoder/crop/camera_info')] + ) container = ComposableNodeContainer( name='dope_container', diff --git a/isaac_ros_dope/launch/isaac_ros_dope_triton.launch.py b/isaac_ros_dope/launch/isaac_ros_dope_triton.launch.py index 7cfe98f..d8ef2d3 100644 --- a/isaac_ros_dope/launch/isaac_ros_dope_triton.launch.py +++ b/isaac_ros_dope/launch/isaac_ros_dope_triton.launch.py @@ -39,19 +39,19 @@ def generate_launch_description(): description='The input image height'), DeclareLaunchArgument( 'network_image_width', - default_value='640', + default_value='1280', description='The input image width that the network expects'), DeclareLaunchArgument( 'network_image_height', - default_value='480', + default_value='720', description='The input image height that the network expects'), DeclareLaunchArgument( 'encoder_image_mean', - default_value='[0.5, 0.5, 0.5]', + default_value='[0.485, 0.456, 0.406]', description='The mean for image normalization'), DeclareLaunchArgument( 'encoder_image_stddev', - default_value='[0.5, 0.5, 0.5]', + default_value='[0.229, 0.224, 0.225]', description='The standard deviation for image normalization'), DeclareLaunchArgument( 'model_name', @@ -93,6 +93,14 @@ def generate_launch_description(): 'object_name', default_value='Ketchup', description='The object class that the DOPE network is detecting'), + DeclareLaunchArgument( + 'map_peak_threshold', + default_value='0.1', + description='The minimum value of a peak in a DOPE belief map'), + DeclareLaunchArgument( + 'enable_tf_publishing', + default_value='False', + description='Whether Dope Decoder will broadcast poses to the TF tree or not'), ] # DNN Image Encoder parameters @@ -116,6 +124,8 @@ def generate_launch_description(): # DOPE Decoder parameters object_name = LaunchConfiguration('object_name') + enable_tf_publishing = LaunchConfiguration('enable_tf_publishing') + map_peak_threshold = LaunchConfiguration('map_peak_threshold') encoder_dir = get_package_share_directory('isaac_ros_dnn_image_encoder') dope_encoder_launch = IncludeLaunchDescription( @@ -161,9 +171,13 @@ def generate_launch_description(): plugin='nvidia::isaac_ros::dope::DopeDecoderNode', parameters=[{ 'object_name': object_name, + 'enable_tf_publishing': enable_tf_publishing, + 'map_peak_threshold': map_peak_threshold }], remappings=[('belief_map_array', 'tensor_sub'), - ('dope/pose_array', 'poses')]) + ('dope/detections', 'detections'), + ('camera_info', '/dope_encoder/crop/camera_info')] + ) container = ComposableNodeContainer( name='dope_container', diff --git a/isaac_ros_dope/package.xml b/isaac_ros_dope/package.xml index bc9a520..cf64666 100644 --- a/isaac_ros_dope/package.xml +++ b/isaac_ros_dope/package.xml @@ -21,7 +21,7 @@ isaac_ros_dope - 3.1.0 + 3.2.0 Deep learning based pose estimation Isaac ROS Maintainers @@ -30,6 +30,8 @@ Ethan Yu Jeff Smith Jaiveer Singh + Shruti Sharma + Toya Takahashi ament_cmake @@ -39,7 +41,8 @@ isaac_ros_dnn_image_encoder isaac_ros_nitros - isaac_ros_nitros_pose_array_type + isaac_ros_nitros_camera_info_type + isaac_ros_nitros_detection3_d_array_type isaac_ros_nitros_tensor_list_type isaac_ros_tensor_list_interfaces isaac_ros_tensor_rt @@ -48,8 +51,10 @@ message_filters rclcpp sensor_msgs + vision_msgs tf2_eigen + gxf_isaac_gems isaac_ros_common isaac_ros_gxf diff --git a/isaac_ros_dope/src/dope_decoder_node.cpp b/isaac_ros_dope/src/dope_decoder_node.cpp index 3309729..a45f483 100644 --- a/isaac_ros_dope/src/dope_decoder_node.cpp +++ b/isaac_ros_dope/src/dope_decoder_node.cpp @@ -25,12 +25,17 @@ namespace fs = std::experimental::filesystem; namespace fs = std::filesystem; #endif -#include "isaac_ros_nitros_pose_array_type/nitros_pose_array.hpp" -#include "isaac_ros_nitros_tensor_list_type/nitros_tensor_list.hpp" +#include #include "ament_index_cpp/get_package_share_directory.hpp" +#include "detection3_d_array_message/detection3_d_array_message.hpp" +#include "geometry_msgs/msg/transform_stamped.hpp" +#include "gxf/std/tensor.hpp" +#include "isaac_ros_nitros_camera_info_type/nitros_camera_info.hpp" +#include "isaac_ros_nitros_detection3_d_array_type/nitros_detection3_d_array.hpp" +#include "isaac_ros_nitros_tensor_list_type/nitros_tensor_list.hpp" #include "rclcpp/rclcpp.hpp" - +#include "tf2_ros/transform_broadcaster.h" namespace nvidia { @@ -45,13 +50,19 @@ constexpr char INPUT_COMPONENT_KEY[] = "dope_decoder/tensorlist_in"; constexpr char INPUT_DEFAULT_TENSOR_FORMAT[] = "nitros_tensor_list_nchw_rgb_f32"; constexpr char INPUT_TOPIC_NAME[] = "belief_map_array"; +constexpr char CAMERA_INFO_INPUT_COMPONENT_KEY[] = "dope_decoder/camera_model_input"; +constexpr char CAMERA_INFO_INPUT_DEFAULT_TENSOR_FORMAT[] = "nitros_camera_info"; +constexpr char CAMERA_INFO_INPUT_TOPIC_NAME[] = "camera_info"; + constexpr char OUTPUT_COMPONENT_KEY[] = "sink/sink"; -constexpr char OUTPUT_DEFAULT_TENSOR_FORMAT[] = "nitros_pose_array"; -constexpr char OUTPUT_TOPIC_NAME[] = "dope/pose_array"; +constexpr char OUTPUT_DEFAULT_TENSOR_FORMAT[] = "nitros_detection3_d_array"; +constexpr char OUTPUT_TOPIC_NAME[] = "dope/detections"; constexpr char APP_YAML_FILENAME[] = "config/dope_node.yaml"; constexpr char PACKAGE_NAME[] = "isaac_ros_dope"; +constexpr int kExpectedPoseAsTensorSize = (3 + 4); + const std::vector> EXTENSIONS = { {"isaac_ros_gxf", "gxf/lib/std/libgxf_std.so"}, {"isaac_ros_gxf", "gxf/lib/multimedia/libgxf_multimedia.so"}, @@ -60,10 +71,8 @@ const std::vector> EXTENSIONS = { {"gxf_isaac_dope", "gxf/lib/libgxf_isaac_dope.so"}, }; -const std::vector PRESET_EXTENSION_SPEC_NAMES = { - "isaac_ros_dope", -}; -const std::vector EXTENSION_SPEC_FILENAMES = {}; +const std::vector PRESET_EXTENSION_SPEC_NAMES = {}; +const std::vector EXTENSION_SPEC_FILENAMES = {"config/isaac_ros_dope.yaml"}; const std::vector GENERATOR_RULE_FILENAMES = {}; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" @@ -76,6 +85,14 @@ const nitros::NitrosPublisherSubscriberConfigMap CONFIG_MAP = { .topic_name = INPUT_TOPIC_NAME, } }, + {CAMERA_INFO_INPUT_COMPONENT_KEY, + { + .type = nitros::NitrosPublisherSubscriberType::NEGOTIATED, + .qos = rclcpp::QoS(1), + .compatible_data_format = CAMERA_INFO_INPUT_DEFAULT_TENSOR_FORMAT, + .topic_name = CAMERA_INFO_INPUT_TOPIC_NAME, + } + }, {OUTPUT_COMPONENT_KEY, { .type = nitros::NitrosPublisherSubscriberType::NEGOTIATED, @@ -100,11 +117,26 @@ DopeDecoderNode::DopeDecoderNode(rclcpp::NodeOptions options) // Parameters configuration_file_(declare_parameter("configuration_file", "dope_config.yaml")), object_name_(declare_parameter("object_name", "Ketchup")), + tf_frame_name_(declare_parameter("tf_frame_name", "dope_object")), + enable_tf_publishing_(declare_parameter("enable_tf_publishing", true)), + map_peak_threshold_(declare_parameter("map_peak_threshold", 0.1)), + affinity_map_angle_threshold_(declare_parameter("affinity_map_angle_threshold", 0.5)), + rotation_y_axis_(declare_parameter("rotation_y_axis", false)), + rotation_x_axis_(declare_parameter("rotation_x_axis", false)), + rotation_z_axis_(declare_parameter("rotation_z_axis", false)), object_dimensions_{}, camera_matrix_{} { RCLCPP_DEBUG(get_logger(), "[DopeDecoderNode] Constructor"); + // Add callback function for Dope Pose Array to broadcast to ROS TF tree if setting is enabled. + if (enable_tf_publishing_) { + tf_broadcaster_ = std::make_unique(*this); + config_map_[OUTPUT_COMPONENT_KEY].callback = std::bind( + &DopeDecoderNode::DopeDecoderDetectionCallback, this, std::placeholders::_1, + std::placeholders::_2); + } + // Open configuration YAML file const std::string package_directory = ament_index_cpp::get_package_share_directory( "isaac_ros_dope"); @@ -131,18 +163,8 @@ DopeDecoderNode::DopeDecoderNode(rclcpp::NodeOptions options) auto dd = dimensions->double_array_value->values; object_dimensions_ = {dd[0], dd[1], dd[2]}; - rcl_variant_t * cam_mat = rcl_yaml_node_struct_get("dope", "camera_matrix", dope_params); - if (!cam_mat->double_array_value) { - RCLCPP_ERROR(this->get_logger(), "No camera_matrix parameter found"); - throw std::runtime_error("Parameter parsing failure."); - } - - auto vv = cam_mat->double_array_value->values; - camera_matrix_ = {vv[0], vv[1], vv[2], vv[3], vv[4], vv[5], vv[6], vv[7], vv[8]}; - - rcl_yaml_node_struct_fini(dope_params); - - registerSupportedType(); + registerSupportedType(); + registerSupportedType(); registerSupportedType(); startNitrosNode(); @@ -156,14 +178,95 @@ void DopeDecoderNode::postLoadGraphCallback() getNitrosContext().setParameter1DFloat64Vector( "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "object_dimensions", object_dimensions_); + getNitrosContext().setParameter1DFloat64Vector( "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "camera_matrix", camera_matrix_); + getNitrosContext().setParameterStr( "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "object_name", object_name_); + getNitrosContext().setParameterFloat64( + "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "map_peak_threshold", + map_peak_threshold_); + getNitrosContext().setParameterFloat64( + "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "affinity_map_angle_threshold", + affinity_map_angle_threshold_); + getNitrosContext().setParameterFloat64( + "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "rotation_y_axis", + rotation_y_axis_); + getNitrosContext().setParameterFloat64( + "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "rotation_x_axis", + rotation_x_axis_); + getNitrosContext().setParameterFloat64( + "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "rotation_z_axis", + rotation_z_axis_); } +// convert Detection3DArray to ROS message that will be published to the TF tree. + +void DopeDecoderNode::DopeDecoderDetectionCallback( + const gxf_context_t context, nitros::NitrosTypeBase & msg) +{ + geometry_msgs::msg::TransformStamped transform_stamped; + + auto msg_entity = nvidia::gxf::Entity::Shared(context, msg.handle); + + // Populate timestamp information back into the header + auto maybe_input_timestamp = msg_entity->get(); + if (!maybe_input_timestamp) { // Fallback to label 'timestamp' + maybe_input_timestamp = msg_entity->get("timestamp"); + } + if (maybe_input_timestamp) { + transform_stamped.header.stamp.sec = static_cast( + maybe_input_timestamp.value()->acqtime / static_cast(1e9)); + transform_stamped.header.stamp.nanosec = static_cast( + maybe_input_timestamp.value()->acqtime % static_cast(1e9)); + } else { + RCLCPP_WARN( + get_logger(), + "[DopeDecoderNode] Failed to get timestamp"); + } + + // Extract foundation pose list to a struct type defined in detection3_d_array_message.hpp + auto dope_detections_array_expected = nvidia::isaac::GetDetection3DListMessage( + msg_entity.value()); + if (!dope_detections_array_expected) { + RCLCPP_ERROR( + get_logger(), "[DopeDecoderNode] Failed to get detections data from message entity"); + return; + } + auto dope_detections_array = dope_detections_array_expected.value(); + + // Extract number of tags detected + size_t tags_count = dope_detections_array.count; + + /* for each pose instance of a single object (for each Tensor), ennumerate child_frame_id + in case there are multiple detections in Detection3DArray message (msg_entity) + */ + + int child_frame_id_num = 1; + if (tags_count > 0) { + for (size_t i = 0; i < tags_count; i++) { + auto pose = dope_detections_array.poses.at(i).value(); + + transform_stamped.header.frame_id = msg.frame_id; + transform_stamped.child_frame_id = tf_frame_name_ + std::to_string(child_frame_id_num); + transform_stamped.transform.translation.x = pose->translation.x(); + transform_stamped.transform.translation.y = pose->translation.y(); + transform_stamped.transform.translation.z = pose->translation.z(); + transform_stamped.transform.rotation.x = pose->rotation.quaternion().x(); + transform_stamped.transform.rotation.y = pose->rotation.quaternion().y(); + transform_stamped.transform.rotation.z = pose->rotation.quaternion().z(); + transform_stamped.transform.rotation.w = pose->rotation.quaternion().w(); + + tf_broadcaster_->sendTransform(transform_stamped); + child_frame_id_num++; + } + } +} + + } // namespace dope } // namespace isaac_ros } // namespace nvidia diff --git a/isaac_ros_dope/test/isaac_ros_dope_pol.py b/isaac_ros_dope/test/isaac_ros_dope_pol.py index 6a6b342..dba4038 100644 --- a/isaac_ros_dope/test/isaac_ros_dope_pol.py +++ b/isaac_ros_dope/test/isaac_ros_dope_pol.py @@ -26,7 +26,6 @@ import time from ament_index_python.packages import get_package_share_directory -from geometry_msgs.msg import PoseArray from isaac_ros_test import IsaacROSBaseTest, JSONConversion from launch.actions import IncludeLaunchDescription from launch.launch_description_sources import PythonLaunchDescriptionSource @@ -37,6 +36,7 @@ import rclpy from sensor_msgs.msg import CameraInfo, Image +from vision_msgs.msg import Detection3DArray MODEL_FILE_NAME = 'dope_ketchup_pol.onnx' @@ -76,7 +76,7 @@ def generate_test_description(): 'frame_id': 'map' }], remappings=[('belief_map_array', 'tensor_sub'), - ('dope/pose_array', 'poses')]) + ('dope/detections', 'detections')]) encoder_dir = get_package_share_directory('isaac_ros_dnn_image_encoder') dope_encoder_launch = IncludeLaunchDescription( @@ -86,7 +86,7 @@ def generate_test_description(): launch_arguments={ 'input_image_width': '852', 'input_image_height': '480', - 'network_image_width': '640', + 'network_image_width': '852', 'network_image_height': '480', 'attach_to_shared_component_container': 'True', 'component_container_name': 'dope_container', @@ -132,7 +132,7 @@ def test_pol(self): received_messages = {} - self.generate_namespace_lookup(['image', 'camera_info', 'poses']) + self.generate_namespace_lookup(['image', 'camera_info', 'detections']) image_pub = self.node.create_publisher( Image, self.namespaces['image'], self.DEFAULT_QOS) @@ -140,16 +140,16 @@ def test_pol(self): camera_info_pub = self.node.create_publisher( CameraInfo, self.namespaces['camera_info'], self.DEFAULT_QOS) - # The current DOPE decoder outputs PoseArray + # The current DOPE decoder outputs Detection3DArray subs = self.create_logging_subscribers( - [('poses', PoseArray)], received_messages) - + [('detections', Detection3DArray)], received_messages) try: - json_file = self.filepath / 'test_cases/pose_estimation_0/image.json' - image = JSONConversion.load_image_from_json(json_file) + image_json_file = self.filepath / 'test_cases/pose_estimation_0/image.json' + image = JSONConversion.load_image_from_json(image_json_file) image.header.stamp = self.node.get_clock().now().to_msg() - camera_info = CameraInfo() + camera_info_json_file = self.filepath / 'test_cases/pose_estimation_0/camera_info.json' + camera_info = JSONConversion.load_camera_info_from_json(camera_info_json_file) camera_info.header = image.header camera_info.distortion_model = 'plumb_bob' @@ -161,16 +161,18 @@ def test_pol(self): image_pub.publish(image) camera_info_pub.publish(camera_info) rclpy.spin_once(self.node, timeout_sec=(0.1)) - if 'poses' in received_messages: + if 'detections' in received_messages: done = True break self.assertTrue(done, 'Timeout. Appropriate output not received') self.node._logger.info('A message was successfully received') - received_poses = received_messages['poses'].poses - self.node._logger.info(f'Poses received: {received_poses}') - self.assertGreaterEqual(len(received_poses), 1, 'Did not receive at least one pose') + received_detections = received_messages['detections'].detections + self.node._logger.info(f'Detections received: {received_detections}') + self.assertGreaterEqual(len(received_detections), 1, + 'Did not receive at least one detection') finally: self.node.destroy_subscription(subs) self.node.destroy_publisher(image_pub) + self.node.destroy_publisher(camera_info_pub) diff --git a/isaac_ros_dope/test/test_cases/pose_estimation_0/camera_info.json b/isaac_ros_dope/test/test_cases/pose_estimation_0/camera_info.json new file mode 100644 index 0000000..dfaeccc --- /dev/null +++ b/isaac_ros_dope/test/test_cases/pose_estimation_0/camera_info.json @@ -0,0 +1,51 @@ +{ + "header": { + "frame_id": "tf_camera" + }, + "height": 480, + "width": 640, + "distortion_model": "plumb_bob", + "D": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "K": [ + 364.16501736, + 0.0, + 121.36296296, + 0.0, + 364.16501736, + 121.36296296, + 0.0, + 0.0, + 1.0 + ], + "R": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "P": [ + 364.16501736, + 0.0, + 121.36296296, + 0.0, + 0.0, + 364.16501736, + 121.36296296, + 0.0, + 0.0, + 0.0, + 1.0, + 0.0 + ] +} \ No newline at end of file diff --git a/isaac_ros_foundationpose/CMakeLists.txt b/isaac_ros_foundationpose/CMakeLists.txt index 991614d..c0038f0 100644 --- a/isaac_ros_foundationpose/CMakeLists.txt +++ b/isaac_ros_foundationpose/CMakeLists.txt @@ -36,6 +36,11 @@ rclcpp_components_register_nodes(foundationpose_tracking_node "nvidia::isaac_ros set(node_plugins "${node_plugins}nvidia::isaac_ros::foundationpose::FoundationPoseTrackingNode;$\n") +# Detection2DArrayFilter utils node +ament_auto_add_library(detection2_d_array_filter SHARED src/detection2_d_array_filter.cpp) +rclcpp_components_register_nodes(detection2_d_array_filter "nvidia::isaac_ros::foundationpose::Detection2DArrayFilter") +set(node_plugins "${node_plugins}nvidia::isaac_ros::foundationpose::Detection2DArrayFilter;$\n") + # Detection2DToMask utils node ament_auto_add_library(detection2_d_to_mask SHARED src/detection2_d_to_mask.cpp) rclcpp_components_register_nodes(detection2_d_to_mask "nvidia::isaac_ros::foundationpose::Detection2DToMask") @@ -75,4 +80,10 @@ if(BUILD_TESTING) add_launch_test(test/isaac_ros_foundationpose_tracking_pol.py TIMEOUT "900") endif() + +# Embed versioning information into installed files +ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common) +include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake") +generate_version_info(${PROJECT_NAME}) + ament_auto_package(INSTALL_TO_SHARE config launch rviz) diff --git a/isaac_ros_foundationpose/config/nitros_foundationpose_node.yaml b/isaac_ros_foundationpose/config/nitros_foundationpose_node.yaml index 468e035..3d6890c 100644 --- a/isaac_ros_foundationpose/config/nitros_foundationpose_node.yaml +++ b/isaac_ros_foundationpose/config/nitros_foundationpose_node.yaml @@ -21,7 +21,7 @@ components: - name: rgb_image_receiver type: nvidia::gxf::DoubleBufferReceiver parameters: - capacity: 12 # Synchronization queue size set to 12 to allow for one out of order message + capacity: 12 - type: nvidia::gxf::MessageAvailableSchedulingTerm parameters: receiver: rgb_image_receiver @@ -29,7 +29,7 @@ components: - name: camera_model_receiver type: nvidia::gxf::DoubleBufferReceiver parameters: - capacity: 12 # Synchronization queue size set to 12 to allow for one out of order message + capacity: 12 - type: nvidia::gxf::MessageAvailableSchedulingTerm parameters: receiver: camera_model_receiver @@ -37,7 +37,7 @@ components: - name: mask_receiver type: nvidia::gxf::DoubleBufferReceiver parameters: - capacity: 12 # Synchronization queue size set to 12 to allow for one out of order message + capacity: 12 - type: nvidia::gxf::MessageAvailableSchedulingTerm parameters: receiver: mask_receiver @@ -45,7 +45,7 @@ components: - name: depth_receiver type: nvidia::gxf::DoubleBufferReceiver parameters: - capacity: 12 # Synchronization queue size set to 12 to allow for one out of order message + capacity: 12 - type: nvidia::gxf::MessageAvailableSchedulingTerm parameters: receiver: depth_receiver @@ -569,7 +569,7 @@ components: verbose: true clock: utils/clock max_workspace_size: 6710886400 - enable_fp16: true + enable_fp16: false max_batch_size: 42 - type: nvidia::gxf::MemoryAvailableSchedulingTerm parameters: @@ -815,7 +815,7 @@ components: verbose: true clock: utils/clock max_workspace_size: 6710886400 - enable_fp16: true + enable_fp16: false max_batch_size: 252 - type: nvidia::gxf::MemoryAvailableSchedulingTerm parameters: diff --git a/isaac_ros_foundationpose/include/isaac_ros_foundationpose/foundationpose_node.hpp b/isaac_ros_foundationpose/include/isaac_ros_foundationpose/foundationpose_node.hpp index 166a78b..726bc2a 100644 --- a/isaac_ros_foundationpose/include/isaac_ros_foundationpose/foundationpose_node.hpp +++ b/isaac_ros_foundationpose/include/isaac_ros_foundationpose/foundationpose_node.hpp @@ -66,6 +66,7 @@ class FoundationPoseNode : public nitros::NitrosNode const float min_depth_; const float max_depth_; const int32_t refine_iterations_; + const StringList symmetry_planes_; // Models file path const std::string refine_model_file_path_; diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose.launch.py index 760e2b4..fd0f463 100644 --- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose.launch.py +++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose.launch.py @@ -139,6 +139,12 @@ def generate_launch_description(): arguments=['-d', rviz_config_path], condition=IfCondition(launch_rviz)) + detection2_d_array_filter_node = ComposableNode( + name='detection2_d_array_filter', + package='isaac_ros_foundationpose', + plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter', + remappings=[('detection2_d_array', 'detections_output')] + ) detection2_d_to_mask_node = ComposableNode( name='detection2_d_to_mask', package='isaac_ros_foundationpose', @@ -146,7 +152,9 @@ def generate_launch_description(): parameters=[{ 'mask_width': mask_width, 'mask_height': mask_height - }]) + }], + remappings=[('segmentation', 'rt_detr_segmentation')] + ) foundationpose_bbox_container = ComposableNodeContainer( name='foundationpose_container', @@ -155,6 +163,7 @@ def generate_launch_description(): executable='component_container_mt', composable_node_descriptions=[ foundationpose_node, + detection2_d_array_filter_node, detection2_d_to_mask_node], output='screen', condition=IfCondition(launch_bbox_to_mask) diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_core.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_core.launch.py index cb83c44..9b5bb43 100644 --- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_core.launch.py +++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_core.launch.py @@ -36,9 +36,7 @@ VISUALIZATION_DOWNSCALING_FACTOR = 10 -REFINE_MODEL_PATH = '/tmp/refine_model.onnx' REFINE_ENGINE_PATH = '/tmp/refine_trt_engine.plan' -SCORE_MODEL_PATH = '/tmp/score_model.onnx' SCORE_ENGINE_PATH = '/tmp/score_trt_engine.plan' @@ -53,12 +51,9 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl # FoundationPose parameters mesh_file_path = LaunchConfiguration('mesh_file_path') texture_path = LaunchConfiguration('texture_path') - refine_model_file_path = LaunchConfiguration('refine_model_file_path') refine_engine_file_path = LaunchConfiguration('refine_engine_file_path') - score_model_file_path = LaunchConfiguration('score_model_file_path') score_engine_file_path = LaunchConfiguration('score_engine_file_path') # RT-DETR parameters - rt_detr_model_file_path = LaunchConfiguration('rt_detr_model_file_path') rt_detr_engine_file_path = LaunchConfiguration('rt_detr_engine_file_path') input_width = interface_specs['camera_resolution']['width'] input_height = interface_specs['camera_resolution']['height'] @@ -188,7 +183,6 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl package='isaac_ros_tensor_rt', plugin='nvidia::isaac_ros::dnn_inference::TensorRTNode', parameters=[{ - 'model_file_path': rt_detr_model_file_path, 'engine_file_path': rt_detr_engine_file_path, 'output_binding_names': ['labels', 'boxes', 'scores'], 'output_tensor_names': ['labels', 'boxes', 'scores'], @@ -206,15 +200,22 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl # Create a binary segmentation mask from a Detection2DArray published by RT-DETR. # The segmentation mask is of size # int(IMAGE_WIDTH/input_to_RT_DETR_ratio) x int(IMAGE_HEIGHT/input_to_RT_DETR_ratio) + 'detection2_d_array_filter_node': ComposableNode( + name='detection2_d_array_filter', + package='isaac_ros_foundationpose', + plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter', + remappings=[('detection2_d_array', 'detections_output')] + ), 'detection2_d_to_mask_node': ComposableNode( name='detection2_d_to_mask', package='isaac_ros_foundationpose', plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask', parameters=[{ 'mask_width': int(input_width/input_to_RT_DETR_ratio), - 'mask_height': int(input_height/input_to_RT_DETR_ratio)}], - remappings=[('detection2_d_array', 'detections_output'), - ('segmentation', 'rt_detr_segmentation')]), + 'mask_height': int(input_height/input_to_RT_DETR_ratio) + }], + remappings=[('segmentation', 'rt_detr_segmentation')] + ), # Resize segmentation mask to ESS model image size so it can be used by FoundationPose # FoundationPose requires depth, rgb image and segmentation mask to be of the same size @@ -272,14 +273,12 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl 'mesh_file_path': mesh_file_path, 'texture_path': texture_path, - 'refine_model_file_path': refine_model_file_path, 'refine_engine_file_path': refine_engine_file_path, 'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'], 'refine_input_binding_names': ['input1', 'input2'], 'refine_output_tensor_names': ['output_tensor1', 'output_tensor2'], 'refine_output_binding_names': ['output1', 'output2'], - 'score_model_file_path': score_model_file_path, 'score_engine_file_path': score_engine_file_path, 'score_input_tensor_names': ['input_tensor1', 'input_tensor2'], 'score_input_binding_names': ['input1', 'input2'], @@ -320,21 +319,11 @@ def get_launch_actions(interface_specs: Dict[str, Any]) -> \ default_value='', description='The absolute file path to the texture map'), - 'refine_model_file_path': DeclareLaunchArgument( - 'refine_model_file_path', - default_value=REFINE_MODEL_PATH, - description='The absolute file path to the refine model'), - 'refine_engine_file_path': DeclareLaunchArgument( 'refine_engine_file_path', default_value=REFINE_ENGINE_PATH, description='The absolute file path to the refine trt engine'), - 'score_model_file_path': DeclareLaunchArgument( - 'score_model_file_path', - default_value=SCORE_MODEL_PATH, - description='The absolute file path to the score model'), - 'score_engine_file_path': DeclareLaunchArgument( 'score_engine_file_path', default_value=SCORE_ENGINE_PATH, diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk.launch.py index 6d5d7d0..cef141c 100644 --- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk.launch.py +++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk.launch.py @@ -160,7 +160,7 @@ def generate_launch_description(): parameters=[{'module_id': 5, 'input_qos': 'SENSOR_DATA', 'output_qos': 'SENSOR_DATA', - 'enable_statistics': True, + 'enable_diagnostics': True, 'topics_list': ['left/image_raw'], 'expected_fps_list': [30.0], 'jitter_tolerance_us': 30000}], @@ -346,15 +346,22 @@ def generate_launch_description(): # Create a binary segmentation mask from a Detection2DArray published by RT-DETR. # The segmentation mask is of size int(HAWK_IMAGE_WIDTH/HAWK_TO_RT_DETR_RATIO) x # int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO) + detection2_d_array_filter_node = ComposableNode( + name='detection2_d_array_filter', + package='isaac_ros_foundationpose', + plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter', + remappings=[('detection2_d_array', 'detections_output')] + ) detection2_d_to_mask_node = ComposableNode( name='detection2_d_to_mask', package='isaac_ros_foundationpose', plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask', parameters=[{ 'mask_width': int(HAWK_IMAGE_WIDTH/HAWK_TO_RT_DETR_RATIO), - 'mask_height': int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO)}], - remappings=[('detection2_d_array', 'detections_output'), - ('segmentation', 'rt_detr_segmentation')]) + 'mask_height': int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO) + }], + remappings=[('segmentation', 'rt_detr_segmentation')] + ) # Resize segmentation mask to ESS model image size so it can be used by FoundationPose # FoundationPose requires depth, rgb image and segmentation mask to be of the same size @@ -508,6 +515,7 @@ def generate_launch_description(): rtdetr_preprocessor_node, tensor_rt_node, rtdetr_decoder_node, + detection2_d_array_filter_node, detection2_d_to_mask_node, resize_mask_node, resize_left_ess_size, diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk_tracking.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk_tracking.launch.py index e0a91d2..1eb2d8d 100644 --- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk_tracking.launch.py +++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk_tracking.launch.py @@ -160,7 +160,7 @@ def generate_launch_description(): parameters=[{'module_id': 5, 'input_qos': 'SENSOR_DATA', 'output_qos': 'SENSOR_DATA', - 'enable_statistics': True, + 'enable_diagnostics': True, 'topics_list': ['left/image_raw'], 'expected_fps_list': [30.0], 'jitter_tolerance_us': 30000}], @@ -346,15 +346,22 @@ def generate_launch_description(): # Create a binary segmentation mask from a Detection2DArray published by RT-DETR. # The segmentation mask is of size int(HAWK_IMAGE_WIDTH/HAWK_TO_RT_DETR_RATIO) x # int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO) + detection2_d_array_filter_node = ComposableNode( + name='detection2_d_array_filter', + package='isaac_ros_foundationpose', + plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter', + remappings=[('detection2_d_array', 'detections_output')] + ) detection2_d_to_mask_node = ComposableNode( name='detection2_d_to_mask', package='isaac_ros_foundationpose', plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask', parameters=[{ 'mask_width': int(HAWK_IMAGE_WIDTH/HAWK_TO_RT_DETR_RATIO), - 'mask_height': int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO)}], - remappings=[('detection2_d_array', 'detections_output'), - ('segmentation', 'rt_detr_segmentation')]) + 'mask_height': int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO) + }], + remappings=[('segmentation', 'rt_detr_segmentation')] + ) # Resize segmentation mask to ESS model image size so it can be used by FoundationPose # FoundationPose requires depth, rgb image and segmentation mask to be of the same size @@ -530,6 +537,7 @@ def generate_launch_description(): rtdetr_preprocessor_node, tensor_rt_node, rtdetr_decoder_node, + detection2_d_array_filter_node, detection2_d_to_mask_node, resize_mask_node, resize_left_ess_size, diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim.launch.py index 8d31b56..0cc7f54 100644 --- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim.launch.py +++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim.launch.py @@ -42,7 +42,8 @@ ISAAC_ROS_FP_MESHES_PATH = os.path.join(ISAAC_ROS_ASSETS_PATH, 'isaac_ros_foundationpose') STEREO_DISPARITY_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, - 'dnn_stereo_disparity', 'dnn_stereo_disparity_v4.0.0') + 'dnn_stereo_disparity', + 'dnn_stereo_disparity_v4.1.0_onnx') SYNTHETICA_DETR_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, 'synthetica_detr') FOUDNATIONPOSE_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, 'foundationpose') REFINE_ENGINE_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'refine_trt_engine.plan') @@ -94,7 +95,7 @@ def generate_launch_description(): DeclareLaunchArgument( 'ess_depth_threshold', - default_value='0.35', + default_value='0.4', description='Threshold value ranges between 0.0 and 1.0 ' 'for filtering disparity with confidence.'), @@ -226,16 +227,22 @@ def generate_launch_description(): ) # Convert Detection2DArray from RT-DETR to a binary segmentation mask + detection2_d_array_filter_node = ComposableNode( + name='detection2_d_array_filter', + package='isaac_ros_foundationpose', + plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter', + remappings=[('detection2_d_array', 'detections_output')] + ) detection2_d_to_mask_node = ComposableNode( name='detection2_d_to_mask', package='isaac_ros_foundationpose', plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask', parameters=[{ 'mask_width': int(SIM_IMAGE_WIDTH/SIM_TO_RT_DETR_RATIO), - 'mask_height': int(SIM_IMAGE_HEIGHT/SIM_TO_RT_DETR_RATIO), - 'sub_detection2_d_array': True}], - remappings=[('detection2_d_array', 'detections_output'), - ('segmentation', 'rt_detr_segmentation')]) + 'mask_height': int(SIM_IMAGE_HEIGHT/SIM_TO_RT_DETR_RATIO) + }], + remappings=[('segmentation', 'rt_detr_segmentation')] + ) # Resize segmentation mask to ESS model image size so it can be used by FoundationPose # FoundationPose requires depth, rgb image and segmentation mask to be of the same size @@ -378,6 +385,7 @@ def generate_launch_description(): rtdetr_preprocessor_node, tensor_rt_node, rtdetr_decoder_node, + detection2_d_array_filter_node, detection2_d_to_mask_node, resize_mask_node, resize_left_ess_size, diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim_tracking.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim_tracking.launch.py index 6ab3c1a..344845f 100644 --- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim_tracking.launch.py +++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim_tracking.launch.py @@ -42,12 +42,11 @@ ISAAC_ROS_FP_MESHES_PATH = os.path.join(ISAAC_ROS_ASSETS_PATH, 'isaac_ros_foundationpose') STEREO_DISPARITY_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, - 'dnn_stereo_disparity', 'dnn_stereo_disparity_v4.0.0') + 'dnn_stereo_disparity', + 'dnn_stereo_disparity_v4.1.0_onnx') SYNTHETICA_DETR_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, 'synthetica_detr') FOUDNATIONPOSE_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, 'foundationpose') REFINE_ENGINE_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'refine_trt_engine.plan') -REFINE_MODEL_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'refine_model.onnx') -SCORE_MODEL_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'score_model.onnx') SCORE_ENGINE_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'score_trt_engine.plan') ESS_ENGINE_PATH = os.path.join(STEREO_DISPARITY_MODELS_PATH, 'light_ess.engine') @@ -85,16 +84,6 @@ def generate_launch_description(): default_value=SCORE_ENGINE_PATH, description='The absolute file path to the score trt engine'), - DeclareLaunchArgument( - 'refine_model_file_path', - default_value=REFINE_MODEL_PATH, - description='The absolute file path to the refine trt engine'), - - DeclareLaunchArgument( - 'score_model_file_path', - default_value=SCORE_MODEL_PATH, - description='The absolute file path to the score trt engine'), - DeclareLaunchArgument( 'rt_detr_engine_file_path', default_value=RTDETR_ENGINE_PATH, @@ -107,7 +96,7 @@ def generate_launch_description(): DeclareLaunchArgument( 'ess_depth_threshold', - default_value='0.35', + default_value='0.4', description='Threshold value ranges between 0.0 and 1.0 ' 'for filtering disparity with confidence.'), @@ -126,8 +115,6 @@ def generate_launch_description(): texture_path = LaunchConfiguration('texture_path') refine_engine_file_path = LaunchConfiguration('refine_engine_file_path') score_engine_file_path = LaunchConfiguration('score_engine_file_path') - refine_model_file_path = LaunchConfiguration('refine_model_file_path') - score_model_file_path = LaunchConfiguration('score_model_file_path') rt_detr_engine_file_path = LaunchConfiguration('rt_detr_engine_file_path') ess_depth_engine_file_path = LaunchConfiguration('ess_depth_engine_file_path') ess_depth_threshold = LaunchConfiguration('ess_depth_threshold') @@ -368,14 +355,12 @@ def generate_launch_description(): 'mesh_file_path': mesh_file_path, 'texture_path': texture_path, - 'refine_model_file_path': refine_model_file_path, 'refine_engine_file_path': refine_engine_file_path, 'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'], 'refine_input_binding_names': ['input1', 'input2'], 'refine_output_tensor_names': ['output_tensor1', 'output_tensor2'], 'refine_output_binding_names': ['output1', 'output2'], - 'score_model_file_path': score_model_file_path, 'score_engine_file_path': score_engine_file_path, 'score_input_tensor_names': ['input_tensor1', 'input_tensor2'], 'score_input_binding_names': ['input1', 'input2'], @@ -392,7 +377,6 @@ def generate_launch_description(): 'mesh_file_path': mesh_file_path, 'texture_path': texture_path, - 'refine_model_file_path': refine_model_file_path, 'refine_engine_file_path': refine_engine_file_path, 'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'], 'refine_input_binding_names': ['input1', 'input2'], diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense.launch.py index b390a1d..ce29e0e 100644 --- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense.launch.py +++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense.launch.py @@ -289,15 +289,22 @@ def generate_launch_description(): # Create a binary segmentation mask from a Detection2DArray published by RT-DETR. # The segmentation mask is of size int(REALSENSE_IMAGE_WIDTH/REALSENSE_TO_RT_DETR_RATIO) x # int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO) + detection2_d_array_filter_node = ComposableNode( + name='detection2_d_array_filter', + package='isaac_ros_foundationpose', + plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter', + remappings=[('detection2_d_array', 'detections_output')] + ) detection2_d_to_mask_node = ComposableNode( name='detection2_d_to_mask', package='isaac_ros_foundationpose', plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask', parameters=[{ 'mask_width': int(REALSENSE_IMAGE_WIDTH/REALSENSE_TO_RT_DETR_RATIO), - 'mask_height': int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO)}], - remappings=[('detection2_d_array', 'detections_output'), - ('segmentation', 'rt_detr_segmentation')]) + 'mask_height': int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO) + }], + remappings=[('segmentation', 'rt_detr_segmentation')] + ) # Resize segmentation mask to ESS model image size so it can be used by FoundationPose # FoundationPose requires depth, rgb image and segmentation mask to be of the same size @@ -400,6 +407,7 @@ def generate_launch_description(): rtdetr_preprocessor_node, tensor_rt_node, rtdetr_decoder_node, + detection2_d_array_filter_node, detection2_d_to_mask_node, resize_mask_node, foundationpose_node, diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense_tracking.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense_tracking.launch.py index 6ce4a0b..26d5006 100644 --- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense_tracking.launch.py +++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense_tracking.launch.py @@ -289,15 +289,22 @@ def generate_launch_description(): # Create a binary segmentation mask from a Detection2DArray published by RT-DETR. # The segmentation mask is of size int(REALSENSE_IMAGE_WIDTH/REALSENSE_TO_RT_DETR_RATIO) x # int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO) + detection2_d_array_filter_node = ComposableNode( + name='detection2_d_array_filter', + package='isaac_ros_foundationpose', + plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter', + remappings=[('detection2_d_array', 'detections_output')] + ) detection2_d_to_mask_node = ComposableNode( name='detection2_d_to_mask', package='isaac_ros_foundationpose', plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask', parameters=[{ 'mask_width': int(REALSENSE_IMAGE_WIDTH/REALSENSE_TO_RT_DETR_RATIO), - 'mask_height': int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO)}], - remappings=[('detection2_d_array', 'detections_output'), - ('segmentation', 'rt_detr_segmentation')]) + 'mask_height': int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO) + }], + remappings=[('segmentation', 'rt_detr_segmentation')] + ) # Resize segmentation mask to ESS model image size so it can be used by FoundationPose # FoundationPose requires depth, rgb image and segmentation mask to be of the same size @@ -424,6 +431,7 @@ def generate_launch_description(): rtdetr_preprocessor_node, tensor_rt_node, rtdetr_decoder_node, + detection2_d_array_filter_node, detection2_d_to_mask_node, resize_mask_node, selector_node, diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_tracking_core.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_tracking_core.launch.py index 4ac9547..842eed9 100644 --- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_tracking_core.launch.py +++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_tracking_core.launch.py @@ -30,9 +30,7 @@ # RT-DETR models expect 3 image channels RT_DETR_MODEL_NUM_CHANNELS = 3 -REFINE_MODEL_PATH = '/tmp/refine_model.onnx' REFINE_ENGINE_PATH = '/tmp/refine_trt_engine.plan' -SCORE_MODEL_PATH = '/tmp/score_model.onnx' SCORE_ENGINE_PATH = '/tmp/score_trt_engine.plan' @@ -44,12 +42,9 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl # FoundationPose parameters mesh_file_path = LaunchConfiguration('mesh_file_path') texture_path = LaunchConfiguration('texture_path') - refine_model_file_path = LaunchConfiguration('refine_model_file_path') refine_engine_file_path = LaunchConfiguration('refine_engine_file_path') - score_model_file_path = LaunchConfiguration('score_model_file_path') score_engine_file_path = LaunchConfiguration('score_engine_file_path') # RT-DETR parameters - rt_detr_model_file_path = LaunchConfiguration('rt_detr_model_file_path') rt_detr_engine_file_path = LaunchConfiguration('rt_detr_engine_file_path') input_width = interface_specs['camera_resolution']['width'] input_height = interface_specs['camera_resolution']['height'] @@ -158,7 +153,6 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl package='isaac_ros_tensor_rt', plugin='nvidia::isaac_ros::dnn_inference::TensorRTNode', parameters=[{ - 'model_file_path': rt_detr_model_file_path, 'engine_file_path': rt_detr_engine_file_path, 'output_binding_names': ['labels', 'boxes', 'scores'], 'output_tensor_names': ['labels', 'boxes', 'scores'], @@ -179,15 +173,22 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl # Create a binary segmentation mask from a Detection2DArray published by RT-DETR. # The segmentation mask is of size # int(IMAGE_WIDTH/input_to_RT_DETR_ratio) x int(IMAGE_HEIGHT/input_to_RT_DETR_ratio) + 'detection2_d_array_filter_node': ComposableNode( + name='detection2_d_array_filter', + package='isaac_ros_foundationpose', + plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter', + remappings=[('detection2_d_array', 'detections_output')] + ), 'detection2_d_to_mask_node': ComposableNode( name='detection2_d_to_mask', package='isaac_ros_foundationpose', plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask', parameters=[{ 'mask_width': int(input_width/input_to_RT_DETR_ratio), - 'mask_height': int(input_height/input_to_RT_DETR_ratio)}], - remappings=[('detection2_d_array', 'detections_output'), - ('segmentation', 'rt_detr_segmentation')]), + 'mask_height': int(input_height/input_to_RT_DETR_ratio) + }], + remappings=[('segmentation', 'rt_detr_segmentation')] + ), # Resize segmentation mask to ESS model image size so it can be used by FoundationPose # FoundationPose requires depth, rgb image and segmentation mask to be of the same size @@ -239,14 +240,12 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl 'mesh_file_path': mesh_file_path, 'texture_path': texture_path, - 'refine_model_file_path': refine_model_file_path, 'refine_engine_file_path': refine_engine_file_path, 'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'], 'refine_input_binding_names': ['input1', 'input2'], 'refine_output_tensor_names': ['output_tensor1', 'output_tensor2'], 'refine_output_binding_names': ['output1', 'output2'], - 'score_model_file_path': score_model_file_path, 'score_engine_file_path': score_engine_file_path, 'score_input_tensor_names': ['input_tensor1', 'input_tensor2'], 'score_input_binding_names': ['input1', 'input2'], @@ -263,7 +262,6 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl 'mesh_file_path': mesh_file_path, 'texture_path': texture_path, - 'refine_model_file_path': refine_model_file_path, 'refine_engine_file_path': refine_engine_file_path, 'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'], 'refine_input_binding_names': ['input1', 'input2'], @@ -288,31 +286,16 @@ def get_launch_actions(interface_specs: Dict[str, Any]) -> \ default_value='', description='The absolute file path to the texture map'), - 'refine_model_file_path': DeclareLaunchArgument( - 'refine_model_file_path', - default_value=REFINE_MODEL_PATH, - description='The absolute file path to the refine model'), - 'refine_engine_file_path': DeclareLaunchArgument( 'refine_engine_file_path', default_value=REFINE_ENGINE_PATH, description='The absolute file path to the refine trt engine'), - 'score_model_file_path': DeclareLaunchArgument( - 'score_model_file_path', - default_value=SCORE_MODEL_PATH, - description='The absolute file path to the score model'), - 'score_engine_file_path': DeclareLaunchArgument( 'score_engine_file_path', default_value=SCORE_ENGINE_PATH, description='The absolute file path to the score trt engine'), - 'rt_detr_model_file_path': DeclareLaunchArgument( - 'rt_detr_model_file_path', - default_value='', - description='The absolute file path to the RT-DETR ONNX file'), - 'rt_detr_engine_file_path': DeclareLaunchArgument( 'rt_detr_engine_file_path', default_value='', diff --git a/isaac_ros_foundationpose/package.xml b/isaac_ros_foundationpose/package.xml index 2bb192a..4fc4147 100644 --- a/isaac_ros_foundationpose/package.xml +++ b/isaac_ros_foundationpose/package.xml @@ -21,7 +21,7 @@ isaac_ros_foundationpose - 3.1.0 + 3.2.0 FoundationPose 6D pose estimation Isaac ROS Maintainers diff --git a/isaac_ros_foundationpose/rviz/foundationpose_hawk.rviz b/isaac_ros_foundationpose/rviz/foundationpose_hawk.rviz index 06df3f7..1f8a8cd 100644 --- a/isaac_ros_foundationpose/rviz/foundationpose_hawk.rviz +++ b/isaac_ros_foundationpose/rviz/foundationpose_hawk.rviz @@ -114,7 +114,7 @@ Visualization Manager: Enabled: true Global Options: Background Color: 48; 48; 48 - Fixed Frame: left_cam + Fixed Frame: stereo_camera_left_optical Frame Rate: 30 Name: root Tools: diff --git a/isaac_ros_foundationpose/rviz/foundationpose_hawk_tracking.rviz b/isaac_ros_foundationpose/rviz/foundationpose_hawk_tracking.rviz index 5ce51d5..94a37d4 100644 --- a/isaac_ros_foundationpose/rviz/foundationpose_hawk_tracking.rviz +++ b/isaac_ros_foundationpose/rviz/foundationpose_hawk_tracking.rviz @@ -85,7 +85,7 @@ Visualization Manager: Enabled: true Global Options: Background Color: 48; 48; 48 - Fixed Frame: left_cam + Fixed Frame: stereo_camera_left_optical Frame Rate: 30 Name: root Tools: diff --git a/isaac_ros_foundationpose/src/detection2_d_array_filter.cpp b/isaac_ros_foundationpose/src/detection2_d_array_filter.cpp new file mode 100644 index 0000000..0a36128 --- /dev/null +++ b/isaac_ros_foundationpose/src/detection2_d_array_filter.cpp @@ -0,0 +1,86 @@ +// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include + +namespace nvidia +{ +namespace isaac_ros +{ +namespace foundationpose +{ + +/* +ROS 2 node that selects a single object from a vision_msgs::msg::Detection2DArray +based on desired class ID and confidence +*/ +class Detection2DArrayFilter : public rclcpp::Node +{ +public: + explicit Detection2DArrayFilter(const rclcpp::NodeOptions & options) + : Node("detection2_d_to_mask", options), + desired_class_id_(declare_parameter("desired_class_id", "")), + detection2_d_array_sub_{create_subscription( + "detection2_d_array", 10, + std::bind(&Detection2DArrayFilter::boundingBoxArrayCallback, this, std::placeholders::_1))}, + detection2_d_pub_{create_publisher("detection2_d", 10)} + {} + + void boundingBoxArrayCallback(const vision_msgs::msg::Detection2DArray::SharedPtr msg) + { + // Find the detection bounding box with the highest confidence + float max_confidence = 0; + vision_msgs::msg::Detection2D max_confidence_detection; + // Iterate through the detections and find the one with the highest confidence + for (const auto & detection : msg->detections) { + // Iterate through all the hypotheses for this detection + // and find the one with the highest confidence + for (const auto & result : detection.results) { + if (result.hypothesis.score > max_confidence && (desired_class_id_.empty() || + desired_class_id_ == result.hypothesis.class_id)) + { + max_confidence = result.hypothesis.score; + max_confidence_detection = detection; + } + } + } + + // If no detection was found, return error + if (max_confidence == 0) { + RCLCPP_DEBUG(this->get_logger(), "No detection found with non-zero confidence"); + return; + } + + detection2_d_pub_->publish(max_confidence_detection); + } + +private: + std::string desired_class_id_; + rclcpp::Subscription::SharedPtr detection2_d_array_sub_; + rclcpp::Publisher::SharedPtr detection2_d_pub_; +}; + +} // namespace foundationpose +} // namespace isaac_ros +} // namespace nvidia + +// Register the component with the ROS system to create a shared library +RCLCPP_COMPONENTS_REGISTER_NODE(nvidia::isaac_ros::foundationpose::Detection2DArrayFilter) diff --git a/isaac_ros_foundationpose/src/detection2_d_to_mask.cpp b/isaac_ros_foundationpose/src/detection2_d_to_mask.cpp index 2488864..8d8e072 100644 --- a/isaac_ros_foundationpose/src/detection2_d_to_mask.cpp +++ b/isaac_ros_foundationpose/src/detection2_d_to_mask.cpp @@ -16,6 +16,7 @@ // SPDX-License-Identifier: Apache-2.0 #include +#include #include #include #include @@ -39,24 +40,14 @@ class Detection2DToMask : public rclcpp::Node { public: explicit Detection2DToMask(const rclcpp::NodeOptions & options) - : Node("detection2_d_to_mask", options) + : Node("detection2_d_to_mask", options), + mask_width_(declare_parameter("mask_width", 640)), + mask_height_(declare_parameter("mask_height", 480)), + image_pub_{create_publisher("segmentation", 10)}, + detection2_d_sub_{create_subscription( + "detection2_d", rclcpp::QoS(rclcpp::SensorDataQoS()), + std::bind(&Detection2DToMask::boundingBoxCallback, this, std::placeholders::_1))} { - // Create a publisher for the mono8 image - image_pub_ = this->create_publisher("segmentation", 10); - - this->declare_parameter("mask_width", 640); - this->declare_parameter("mask_height", 480); - this->get_parameter("mask_width", mask_width_); - this->get_parameter("mask_height", mask_height_); - - detection2_d_array_sub_ = this->create_subscription( - "detection2_d_array", 10, - std::bind(&Detection2DToMask::boundingBoxArrayCallback, this, std::placeholders::_1)); - // Create subscriber for Detection2D - detection2_d_sub_ = this->create_subscription( - "detection2_d", 10, - std::bind(&Detection2DToMask::boundingBoxCallback, this, std::placeholders::_1)); - RCLCPP_INFO(this->get_logger(), "Mask Height: %d, Mask Width: %d", mask_height_, mask_width_); } @@ -83,56 +74,11 @@ class Detection2DToMask : public rclcpp::Node image_pub_->publish(image_msg); } - void boundingBoxArrayCallback(const vision_msgs::msg::Detection2DArray::SharedPtr msg) - { - // Find the detection bounding box with the highest confidence - float max_confidence = 0; - vision_msgs::msg::Detection2D max_confidence_detection; - // Iterate through the detections and find the one with the highest confidence - for (auto detection : msg->detections) { - // Iterate through all the hypotheses for this detection - // and find the one with the highest confidence - for (auto result : detection.results) { - if (result.hypothesis.score > max_confidence) { - max_confidence = result.hypothesis.score; - max_confidence_detection = detection; - } - } - } - - // If no detection was found, return error - if (max_confidence == 0) { - RCLCPP_INFO(this->get_logger(), "No detection found with non-zero confidence"); - return; - } - - // Convert Detection2D to a binary mono8 image - cv::Mat image = cv::Mat::zeros(mask_height_, mask_width_, CV_8UC1); - // Draws a rectangle filled with 255 - cv::rectangle( - image, - cv::Point( - max_confidence_detection.bbox.center.position.x - max_confidence_detection.bbox.size_x / 2, - max_confidence_detection.bbox.center.position.y - max_confidence_detection.bbox.size_y / 2), - cv::Point( - max_confidence_detection.bbox.center.position.x + max_confidence_detection.bbox.size_x / 2, - max_confidence_detection.bbox.center.position.y + max_confidence_detection.bbox.size_y / 2), - cv::Scalar(255), -1); - - // Convert the OpenCV image to a ROS sensor_msgs::msg::Image and publish it - std_msgs::msg::Header header(msg->header); - cv_bridge::CvImage cv_image(header, "mono8", image); - sensor_msgs::msg::Image image_msg; - cv_image.toImageMsg(image_msg); - image_pub_->publish(image_msg); - } - private: + int mask_width_; + int mask_height_; rclcpp::Publisher::SharedPtr image_pub_; rclcpp::Subscription::SharedPtr detection2_d_sub_; - rclcpp::Subscription::SharedPtr detection2_d_array_sub_; - int mask_height_; - int mask_width_; }; } // namespace foundationpose diff --git a/isaac_ros_foundationpose/src/foundationpose_node.cpp b/isaac_ros_foundationpose/src/foundationpose_node.cpp index 5c580eb..45e587d 100644 --- a/isaac_ros_foundationpose/src/foundationpose_node.cpp +++ b/isaac_ros_foundationpose/src/foundationpose_node.cpp @@ -23,6 +23,7 @@ #include "rclcpp/rclcpp.hpp" #include "tf2_ros/transform_broadcaster.h" +#include "isaac_ros_common/qos.hpp" #include "isaac_ros_nitros_camera_info_type/nitros_camera_info.hpp" #include "isaac_ros_nitros_detection3_d_array_type/nitros_detection3_d_array.hpp" #include "isaac_ros_nitros_image_type/nitros_image.hpp" @@ -145,6 +146,9 @@ FoundationPoseNode::FoundationPoseNode(rclcpp::NodeOptions options) min_depth_(declare_parameter("min_depth", 0.1)), max_depth_(declare_parameter("max_depth", 4.0)), refine_iterations_(declare_parameter("refine_iterations", 1)), + symmetry_planes_( + declare_parameter("symmetry_planes", StringList())), + refine_model_file_path_( declare_parameter("refine_model_file_path", "/tmp/refine_model.onnx")), refine_engine_file_path_( @@ -176,6 +180,34 @@ FoundationPoseNode::FoundationPoseNode(rclcpp::NodeOptions options) { RCLCPP_DEBUG(get_logger(), "[FoundationPoseNode] Constructor"); + // This function sets the QoS parameter for publishers and subscribers setup by this NITROS node + rclcpp::QoS depth_qos_ = ::isaac_ros::common::AddQosParameter( + *this, "DEFAULT", + "depth_qos"); + rclcpp::QoS color_qos_ = ::isaac_ros::common::AddQosParameter( + *this, "DEFAULT", + "color_qos"); + rclcpp::QoS color_info_qos_ = ::isaac_ros::common::AddQosParameter( + *this, "DEFAULT", + "color_info_qos"); + rclcpp::QoS segmentation_qos_ = ::isaac_ros::common::AddQosParameter( + *this, "DEFAULT", + "segmentation_qos"); + for (auto & config : config_map_) { + if (config.second.topic_name == INPUT_DEPTH_TOPIC_NAME) { + config.second.qos = depth_qos_; + } + if (config.second.topic_name == INPUT_RGB_IMAGE_TOPIC_NAME) { + config.second.qos = color_qos_; + } + if (config.second.topic_name == INPUT_CAMERA_INFO_TOPIC_NAME) { + config.second.qos = color_info_qos_; + } + if (config.second.topic_name == INPUT_SEGMENTATION_TOPIC_NAME) { + config.second.qos = segmentation_qos_; + } + } + // Add callback function for FoundationPose Detection3D array to broadcast to ROS TF tree config_map_[OUTPUT_POSE_COMPONENT_KEY].callback = std::bind( &FoundationPoseNode::FoundationPoseDetectionCallback, this, std::placeholders::_1, @@ -363,6 +395,7 @@ void FoundationPoseNode::postLoadGraphCallback() "refine_inference", "nvidia::gxf::TensorRtInference", "output_binding_names", refine_output_binding_names_); + // Set the score network TensorRT configs from parameter getNitrosContext().setParameterStr( "score_inference", "nvidia::gxf::TensorRtInference", "model_file_path", @@ -387,6 +420,24 @@ void FoundationPoseNode::postLoadGraphCallback() getNitrosContext().setParameter1DStrVector( "score_inference", "nvidia::gxf::TensorRtInference", "output_binding_names", score_output_binding_names_); + + if (symmetry_planes_.size() > 0) { + getNitrosContext().setParameter1DStrVector( + "sampling", "nvidia::isaac_ros::FoundationposeSampling", "symmetry_planes", + symmetry_planes_); + + // Number of symmetry poses that could treat as the same pose + auto symmetry_poses = std::min(static_cast(std::pow(2, symmetry_planes_.size())), 4); + int refine_max_batch_size = 252 / 6 / symmetry_poses; + getNitrosContext().setParameterInt32( + "refine_inference", "nvidia::gxf::TensorRtInference", "max_batch_size", + refine_max_batch_size); + + int score_max_batch_size = refine_max_batch_size * 6; + getNitrosContext().setParameterInt32( + "score_inference", "nvidia::gxf::TensorRtInference", "max_batch_size", + score_max_batch_size); + } } void FoundationPoseNode::FoundationPoseDetectionCallback( diff --git a/isaac_ros_gxf_extensions/gxf_isaac_centerpose/CMakeLists.txt b/isaac_ros_gxf_extensions/gxf_isaac_centerpose/CMakeLists.txt index 0624f2a..7efd20c 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_centerpose/CMakeLists.txt +++ b/isaac_ros_gxf_extensions/gxf_isaac_centerpose/CMakeLists.txt @@ -75,4 +75,10 @@ set_target_properties(${PROJECT_NAME} PROPERTIES # Install the binary file install(TARGETS ${PROJECT_NAME} DESTINATION share/${PROJECT_NAME}/gxf/lib) + +# Embed versioning information into installed files +ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common) +include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake") +generate_version_info(${PROJECT_NAME}) + ament_auto_package(INSTALL_TO_SHARE) diff --git a/isaac_ros_gxf_extensions/gxf_isaac_centerpose/package.xml b/isaac_ros_gxf_extensions/gxf_isaac_centerpose/package.xml index 23c1e4a..bf88d50 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_centerpose/package.xml +++ b/isaac_ros_gxf_extensions/gxf_isaac_centerpose/package.xml @@ -21,7 +21,7 @@ SPDX-License-Identifier: Apache-2.0 gxf_isaac_centerpose - 3.1.0 + 3.2.0 GXF extension containing CenterPose pose estimation related components. Isaac ROS Maintainers diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/CMakeLists.txt b/isaac_ros_gxf_extensions/gxf_isaac_dope/CMakeLists.txt index ed536ab..5bc4c5b 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_dope/CMakeLists.txt +++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/CMakeLists.txt @@ -59,4 +59,10 @@ set_target_properties(${PROJECT_NAME} PROPERTIES # Install the binary file install(TARGETS ${PROJECT_NAME} DESTINATION share/${PROJECT_NAME}/gxf/lib) + +# Embed versioning information into installed files +ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common) +include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake") +generate_version_info(${PROJECT_NAME}) + ament_auto_package(INSTALL_TO_SHARE) diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.cpp b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.cpp index 66f635b..f12d6e7 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.cpp +++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.cpp @@ -17,23 +17,27 @@ #include "dope_decoder.hpp" +#include #include -#include +#include +#include +#include +#include +#include "detection3_d_array_message/detection3_d_array_message.hpp" #include "opencv2/calib3d.hpp" #include "opencv2/core/eigen.hpp" #include "opencv2/core/mat.hpp" #include "opencv2/imgproc.hpp" -#include - +#include "gxf/core/parameter_parser_std.hpp" #include "gxf/multimedia/camera.hpp" #include "gxf/multimedia/video.hpp" -#include "gxf/core/parameter_parser_std.hpp" #include "gxf/std/tensor.hpp" #include "gxf/std/timestamp.hpp" + namespace nvidia { namespace isaac_ros { namespace dope { @@ -55,23 +59,24 @@ constexpr size_t kNumVertexChannel = kNumCorners + 1; constexpr float kGaussianSigma = 3.0; // Minimum acceptable sum of averaging weights constexpr float kMinimumWeightSum = 1e-6; -constexpr float kMapPeakThreshhold = 0.01; -constexpr float kPeakWeightThreshhold = 0.1; -constexpr float kAffinityMapAngleThreshhold = 0.5; // Offset added to belief map pixel coordinate, constant for the fixed input // image size // https://github.com/NVlabs/Deep_Object_Pose/blob/master/src/dope/inference/detector.py // line 343 constexpr float kOffsetDueToUpsampling = 0.4395f; +// Minimum required blurred belief map value at the peaks +constexpr float kBlurredPeakThreshold = 0.01; // The original image is kImageToMapScale larger in each dimension than the // output tensor from the DNN. constexpr float kImageToMapScale = 8.0f; -// From the requirements listed for pnp::ComputeCameraPoseEpnp. -constexpr size_t kRequiredPointsForPnP = 6; +// Require all 9 vertices to publish a pose +constexpr size_t kRequiredPointsForPnP = 9; // Placeholder for unidentify peak ids in DopeObject constexpr int kInvalidId = -1; // Placeholder for unknown best distance from centroid to peak in DopeObject constexpr float kInvalidDist = std::numeric_limits::max(); +// For converting sizes in cm to m in ExtractPose and when publishing bbox sizes. +constexpr double kCentimeterToMeter = 100.0; // The list of keypoint indices (0-8, 8 is centroid) and their corresponding 2d // pixel coordinates as columns in a matrix. @@ -93,7 +98,7 @@ struct DopeObject { }; struct Pose3d { -public: + public: Eigen::Vector3d translation; Eigen::Quaterniond rotation; @@ -106,7 +111,7 @@ struct Pose3d { }; // Returns pixel mask for local maximums in single - channel image src -void IsolateMaxima(const cv::Mat &src, cv::Mat &mask) { +void IsolateMaxima(const cv::Mat& src, cv::Mat& mask) { // Find pixels that are equal to the local neighborhood maxima cv::dilate(src, mask, cv::Mat()); cv::compare(src, mask, mask, cv::CMP_GE); @@ -119,8 +124,7 @@ void IsolateMaxima(const cv::Mat &src, cv::Mat &mask) { } // Returns pixel coordinate (row, col) of maxima in single-channel image -std::vector FindPeaks(const cv::Mat &image, - const float threshold) { +std::vector FindPeaks(const cv::Mat& image) { // Extract centers of local maxima cv::Mat mask; std::vector maxima; @@ -130,7 +134,7 @@ std::vector FindPeaks(const cv::Mat &image, // Find maxima std::vector peaks; for (const auto &m : maxima) { - if (image.at(m.y, m.x) > threshold) { + if (image.at(m.y, m.x) > kBlurredPeakThreshold) { peaks.push_back(Eigen::Vector2i(m.x, m.y)); } } @@ -140,13 +144,13 @@ std::vector FindPeaks(const cv::Mat &image, // Returns 3x9 matrix of the 3d coordinates of cuboid corners and center Eigen::Matrix -CuboidVertices(const std::array &ext) { +CuboidVertices(const std::array& ext) { // X axis points to the right - const double right = ext.at(0) * 0.5; - const double left = -ext.at(0) * 0.5; + const double right = -ext.at(0) * 0.5; + const double left = ext.at(0) * 0.5; // Y axis points downward - const double bottom = ext.at(1) * 0.5; - const double top = -ext.at(1) * 0.5; + const double bottom = -ext.at(1) * 0.5; + const double top = ext.at(1) * 0.5; // Z axis points forward (away from camera) const double front = ext.at(2) * 0.5; const double rear = -ext.at(2) * 0.5; @@ -160,7 +164,8 @@ CuboidVertices(const std::array &ext) { } std::vector -FindObjects(const std::array &maps) { +FindObjects(const std::array& maps, const double map_peak_threshold, + const double affinity_map_angle_threshold) { using Vector2f = Eigen::Vector2f; using Vector2i = Eigen::Vector2i; @@ -178,18 +183,18 @@ FindObjects(const std::array &maps) { kGaussianSigma, cv::BORDER_REFLECT); // Find the maxima of the tensor values in this channel - std::vector peaks = FindPeaks(blurred, kMapPeakThreshhold); + std::vector peaks = FindPeaks(blurred); for (size_t pp = 0; pp < peaks.size(); ++pp) { const auto peak = peaks[pp]; - // Compute the weighted average for localizing the peak, using a 5x5 + // Compute the weighted average for localizing the peak, using an 11x11 // window Vector2f peak_sum(0, 0); float weight_sum = 0.0f; - for (int ii = -2; ii <= 2; ++ii) { - for (int jj = -2; jj <= 2; ++jj) { - const int row = peak[0] + ii; - const int col = peak[1] + jj; + for (int ii = -5; ii <= 5; ++ii) { + for (int jj = -5; jj <= 5; ++jj) { + const int row = peak[1] + ii; + const int col = peak[0] + jj; if (col < 0 || col >= image.size[1] || row < 0 || row >= image.size[0]) { @@ -198,12 +203,12 @@ FindObjects(const std::array &maps) { const float weight = image.at(row, col); weight_sum += weight; - peak_sum[0] += row * weight; - peak_sum[1] += col * weight; + peak_sum[1] += row * weight; + peak_sum[0] += col * weight; } } - if (image.at(peak[1], peak[0]) >= kMapPeakThreshhold) { + if (image.at(peak[1], peak[0]) >= map_peak_threshold) { channel_peaks[chan].push_back(static_cast(all_peaks.size())); if (std::fabs(weight_sum) < kMinimumWeightSum) { all_peaks.push_back({peak[0] + kOffsetDueToUpsampling, @@ -227,15 +232,15 @@ FindObjects(const std::array &maps) { // Use 16 affinity field tensors (2 for each corner to centroid) to identify // corner-centroid associated for each corner peak for (size_t chan = 0; chan < kNumVertexChannel - 1; ++chan) { - const std::vector &peaks = channel_peaks[chan]; + const std::vector& peaks = channel_peaks[chan]; for (size_t pp = 0; pp < peaks.size(); ++pp) { int best_idx = kInvalidId; float best_distance = kInvalidDist; float best_angle = kInvalidDist; for (size_t jj = 0; jj < objects.size(); ++jj) { - const Vector2f ¢er = all_peaks[objects[jj].center]; - const Vector2f &point = all_peaks[peaks[pp]]; + const Vector2f& center = all_peaks[objects[jj].center]; + const Vector2f& point = all_peaks[peaks[pp]]; const Vector2i point_int(static_cast(point[0]), static_cast(point[1])); @@ -250,7 +255,7 @@ FindObjects(const std::array &maps) { const float angle = (v_center - v_aff).norm(); const float dist = (point - center).norm(); - if (angle < kAffinityMapAngleThreshhold && dist < best_distance) { + if (angle < affinity_map_angle_threshold && dist < best_distance) { best_idx = jj; best_distance = dist; best_angle = angle; @@ -262,7 +267,7 @@ FindObjects(const std::array &maps) { } if (objects[best_idx].corners[chan] == kInvalidId || - (best_angle < kAffinityMapAngleThreshhold && + (best_angle < affinity_map_angle_threshold && best_distance < objects[best_idx].best_distances[chan])) { objects[best_idx].corners[chan] = peaks[pp]; objects[best_idx].best_distances[chan] = best_distance; @@ -303,10 +308,11 @@ FindObjects(const std::array &maps) { } gxf::Expected> -ExtractPose(const DopeObjectKeypoints &object, - const Eigen::Matrix &cuboid_3d_points, - const cv::Mat &camera_matrix) { - const auto &valid_points = object.first; +ExtractPose(const DopeObjectKeypoints& object, + const Eigen::Matrix& cuboid_3d_points, + const cv::Mat& camera_matrix, const double rotation_y_axis, + const double rotation_x_axis, const double rotation_z_axis) { + const auto& valid_points = object.first; const size_t num_valid_points = valid_points.size(); Eigen::Matrix3Xd keypoints_3d(3, num_valid_points); for (size_t j = 0; j < num_valid_points; ++j) { @@ -315,24 +321,40 @@ ExtractPose(const DopeObjectKeypoints &object, Pose3d pose; cv::Mat rvec, tvec; - cv::Mat dist_coeffs = cv::Mat::zeros(1, 4, CV_64FC1); // no distortion + cv::Mat dist_coeffs = cv::Mat::zeros(1, 4, CV_64FC1); // no distortion cv::Mat cv_keypoints_3d; cv::eigen2cv(keypoints_3d, cv_keypoints_3d); cv::Mat cv_keypoints_2d; cv::eigen2cv(object.second, cv_keypoints_2d); if (!cv::solvePnP(cv_keypoints_3d.t(), cv_keypoints_2d.t(), camera_matrix, - dist_coeffs, rvec, tvec)) { + dist_coeffs, rvec, tvec, false, cv::SOLVEPNP_EPNP)) { GXF_LOG_ERROR("cv::solvePnP failed"); return nvidia::gxf::Unexpected{GXF_FAILURE}; } cv::cv2eigen(tvec, pose.translation); cv::Mat R; - cv::Rodrigues(rvec, R); // R is 3x3 + cv::Rodrigues(rvec, R); // R is 3x3 Eigen::Matrix3d e_mat; cv::cv2eigen(R, e_mat); - pose.rotation = Eigen::Quaterniond(e_mat); + Eigen::AngleAxisd rotation_y(rotation_y_axis, Eigen::Vector3d::UnitY()); + Eigen::AngleAxisd rotation_x(rotation_x_axis, Eigen::Vector3d::UnitX()); + Eigen::AngleAxisd rotation_z(rotation_z_axis, Eigen::Vector3d::UnitZ()); + + // Convert to rotation matrices + Eigen::Matrix3d rotation_y_matrix = rotation_y.toRotationMatrix(); + Eigen::Matrix3d rotation_x_matrix = rotation_x.toRotationMatrix(); + Eigen::Matrix3d rotation_z_matrix = rotation_z.toRotationMatrix(); + + // Compose the rotations + Eigen::Matrix3d composed_rotation = rotation_z_matrix * rotation_y_matrix * rotation_x_matrix; + + // Apply the composed rotation to the original matrix + Eigen::Matrix3d rotated_matrix = e_mat * composed_rotation; + + pose.rotation = Eigen::Quaterniond(rotated_matrix); + // If the Z coordinate is negative, the pose is placing the object behind // the camera (which is incorrect), so we flip it @@ -340,7 +362,6 @@ ExtractPose(const DopeObjectKeypoints &object, pose = pose.inverse(); } - constexpr double kCentimeterToMeter = 100.0; // Return pose data as array return std::array{ pose.translation[0] / kCentimeterToMeter, @@ -352,45 +373,58 @@ ExtractPose(const DopeObjectKeypoints &object, pose.rotation.w()}; } -gxf::Expected AddInputTimestampToOutput(gxf::Entity &output, - gxf::Entity input) { - std::string timestamp_name{"timestamp"}; - auto maybe_timestamp = input.get(timestamp_name.c_str()); +gxf::Expected AddInputTimestampToOutput(gxf::Entity& output, gxf::Entity input) { + std::string named_timestamp{"timestamp"}; + std::string unnamed_timestamp{""}; + auto maybe_input_timestamp = input.get(named_timestamp.c_str()); - // Default to unnamed - if (!maybe_timestamp) { - timestamp_name = std::string{""}; - maybe_timestamp = input.get(timestamp_name.c_str()); + // Try to get a named timestamp from the input entity + if (!maybe_input_timestamp) { + maybe_input_timestamp = input.get(unnamed_timestamp.c_str()); } - - if (!maybe_timestamp) { + // If there is no named timestamp, try to get a unnamed timestamp from the input entity + if (!maybe_input_timestamp) { GXF_LOG_ERROR("Failed to get input timestamp!"); - return gxf::ForwardError(maybe_timestamp); + return gxf::ForwardError(maybe_input_timestamp); } - auto maybe_out_timestamp = output.add(timestamp_name.c_str()); - if (!maybe_out_timestamp) { - GXF_LOG_ERROR("Failed to add timestamp to output message!"); - return gxf::ForwardError(maybe_out_timestamp); + // Try to get a named timestamp from the output entity + auto maybe_output_timestamp = output.get(named_timestamp.c_str()); + // If there is no named timestamp, try to get a unnamed timestamp from the output entity + if (!maybe_output_timestamp) { + maybe_output_timestamp = output.get(unnamed_timestamp.c_str()); } - *maybe_out_timestamp.value() = *maybe_timestamp.value(); + // If there is no unnamed timestamp also, then add a named timestamp to the output entity + if (!maybe_output_timestamp) { + maybe_output_timestamp = output.add(named_timestamp.c_str()); + if (!maybe_output_timestamp) { + GXF_LOG_ERROR("Failed to add timestamp to output message!"); + return gxf::ForwardError(maybe_output_timestamp); + } + } + + *maybe_output_timestamp.value() = *maybe_input_timestamp.value(); return gxf::Success; } -} // namespace +} // namespace gxf_result_t -DopeDecoder::registerInterface(gxf::Registrar *registrar) noexcept { +DopeDecoder::registerInterface(gxf::Registrar* registrar) noexcept { gxf::Expected result; result &= registrar->parameter(tensorlist_receiver_, "tensorlist_receiver", "Tensorlist Input", "The detections as a tensorlist"); - result &= registrar->parameter(posearray_transmitter_, - "posearray_transmitter", "PoseArray output", - "The ouput poses as a pose array"); + result &= registrar->parameter(camera_model_input_, "camera_model_input", + "Camera Model Input", + "The Camera intrinsics as a Nitros Camera Info type"); + + result &= registrar->parameter(detection3darray_transmitter_, + "detection3darray_transmitter", "Detection3DArray output", + "The ouput poses as a Detection3D array"); result &= registrar->parameter(allocator_, "allocator", "Allocator", "Output Allocator"); @@ -399,14 +433,30 @@ DopeDecoder::registerInterface(gxf::Registrar *registrar) noexcept { object_dimensions_param_, "object_dimensions", "The dimensions of the object whose pose is being estimated"); - result &= registrar->parameter( - camera_matrix_param_, "camera_matrix", - "The parameters of the camera used to capture the input images"); - result &= registrar->parameter( object_name_, "object_name", "The class name of the object whose pose is being estimated"); + result &= registrar->parameter( + map_peak_threshold_, "map_peak_threshold", + "The minimum value of a peak in a belief map"); + + result &= registrar->parameter( + affinity_map_angle_threshold_, "affinity_map_angle_threshold", + "The maximum angle threshold for affinity mapping of corners to centroid"); + + result &= registrar->parameter( + rotation_y_axis_, "rotation_y_axis", "rotation_y_axis", + "Rotate Dope pose by N degrees along y axis", 0.0); + + result &= registrar->parameter( + rotation_x_axis_, "rotation_x_axis", "rotation_x_axis", + "Rotate Dope pose by N degrees along x axis", 0.0); + + result &= registrar->parameter( + rotation_z_axis_, "rotation_z_axis", "rotation_z_axis", + "Rotate Dope pose by N degrees along z axis", 0.0); + return gxf::ToResultCode(result); } @@ -414,25 +464,30 @@ gxf_result_t DopeDecoder::start() noexcept { // Extract 3D coordinates of bounding cuboid + centroid from object dimensions auto dims = object_dimensions_param_.get(); if (dims.size() != 3) { - GXF_LOG_ERROR("Expected object dimensions vector to be length 3 but got %lu", - dims.size()); return GXF_FAILURE; } cuboid_3d_points_ = CuboidVertices({dims.at(0), dims.at(1), dims.at(2)}); - // Load camera matrix into cv::Mat - if (camera_matrix_param_.get().size() != 9) { - GXF_LOG_ERROR("Expected camera matrix vector to be length 9 but got %lu", - camera_matrix_param_.get().size()); - return GXF_FAILURE; - } camera_matrix_ = cv::Mat::zeros(3, 3, CV_64FC1); - std::memcpy(camera_matrix_.data, camera_matrix_param_.get().data(), - camera_matrix_param_.get().size() * sizeof(double)); return GXF_SUCCESS; } +gxf::Expected DopeDecoder::updateCameraProperties( + gxf::Handle camera_model) { + if (!camera_model) { + return gxf::Unexpected{GXF_FAILURE}; + } + + camera_matrix_.at(0, 0) = camera_model->focal_length.x; + camera_matrix_.at(0, 2)= camera_model->principal_point.x; + camera_matrix_.at(1, 1) = camera_model->focal_length.y; + camera_matrix_.at(1, 2) = camera_model->principal_point.y; + camera_matrix_.at(2, 2) = 1.0; + + return gxf::Success; +} + gxf_result_t DopeDecoder::tick() noexcept { const auto maybe_beliefmaps_message = tensorlist_receiver_->receive(); if (!maybe_beliefmaps_message) { @@ -448,22 +503,30 @@ gxf_result_t DopeDecoder::tick() noexcept { // Ensure belief maps match expected shape in first two dimensions if (belief_maps->shape().dimension(0) != kNumTensors || - belief_maps->shape().dimension(1) != kInputMapsChannels) - { + belief_maps->shape().dimension(1) != kInputMapsChannels) { GXF_LOG_ERROR( "Belief maps had unexpected shape in first two dimensions: {%d, %d, %d, %d}", belief_maps->shape().dimension(0), belief_maps->shape().dimension(1), belief_maps->shape().dimension(2), belief_maps->shape().dimension(3)); return GXF_FAILURE; } + auto maybe_camera_model_entity = camera_model_input_->receive(); + if (!maybe_camera_model_entity) { + GXF_LOG_ERROR("Failed to receive input camera model entity!"); + return gxf::ToResultCode(maybe_camera_model_entity); + } + + auto maybe_camera_model = maybe_camera_model_entity.value().get("intrinsics"); + if (!maybe_camera_model) { + GXF_LOG_ERROR("Failed to receive input camera model!"); + return gxf::ToResultCode(maybe_camera_model); + } - // Allocate output message - auto maybe_posearray_message = gxf::Entity::New(context()); - if (!maybe_posearray_message) { - GXF_LOG_ERROR("Failed to allocate PoseArray Message"); - return gxf::ToResultCode(maybe_posearray_message); + auto maybe_updated_properties = updateCameraProperties(maybe_camera_model.value()); + if (!maybe_updated_properties) { + GXF_LOG_ERROR("Failed to update camera properties!"); + return gxf::ToResultCode(maybe_updated_properties); } - auto posearray_message = maybe_posearray_message.value(); // Copy tensor data over to a more portable form std::array maps; @@ -487,67 +550,63 @@ gxf_result_t DopeDecoder::tick() noexcept { } // Analyze the belief map to find vertex locations in image space - const std::vector dope_objects = FindObjects(maps); - - // Add timestamp to output msg - auto maybe_added_timestamp = AddInputTimestampToOutput( - posearray_message, maybe_beliefmaps_message.value()); + const std::vector dope_objects = + FindObjects(maps, map_peak_threshold_, affinity_map_angle_threshold_); + + // Create Detection3DList Message + auto maybe_detection3_d_list = nvidia::isaac::CreateDetection3DListMessage( + context(), dope_objects.size()); + if (!maybe_detection3_d_list) { + GXF_LOG_ERROR("[DopeDecoder] Failed to create detection3d list"); + return gxf::ToResultCode(maybe_detection3_d_list); + } + auto detection3_d_list = maybe_detection3_d_list.value(); + + auto maybe_added_timestamp = + AddInputTimestampToOutput(maybe_detection3_d_list->entity, maybe_beliefmaps_message.value()); if (!maybe_added_timestamp) { - return gxf::ToResultCode(maybe_added_timestamp); + GXF_LOG_ERROR("[DopeDecoder] Failed to add timestamp"); } if (dope_objects.empty()) { GXF_LOG_INFO("No objects detected."); - return gxf::ToResultCode( - posearray_transmitter_->publish(std::move(posearray_message))); + detection3darray_transmitter_->publish(std::move(detection3_d_list.entity))); } // Run Perspective-N-Point on the detected objects to find the 6-DoF pose of // the bounding cuboid - for (const DopeObjectKeypoints &object : dope_objects) { - // Allocate output tensor for this pose - auto maybe_pose_tensor = posearray_message.add(); - if (!maybe_pose_tensor) { - GXF_LOG_ERROR("Failed to allocate Pose Tensor"); - return gxf::ToResultCode(maybe_pose_tensor); - } - auto pose_tensor = maybe_pose_tensor.value(); - - // Initializing GXF tensor - auto result = pose_tensor->reshape( - nvidia::gxf::Shape{kExpectedPoseAsTensorSize}, - nvidia::gxf::MemoryStorageType::kDevice, allocator_); - if (!result) { - GXF_LOG_ERROR("Failed to reshape Pose Tensor to (%d,)", - kExpectedPoseAsTensorSize); - return gxf::ToResultCode(result); - } - - auto maybe_pose = ExtractPose(object, cuboid_3d_points_, camera_matrix_); + for (size_t i = 0; i < dope_objects.size(); i++) { + const DopeObjectKeypoints& object = dope_objects[i]; + auto maybe_pose = ExtractPose(object, cuboid_3d_points_, camera_matrix_, + rotation_y_axis_.get(), rotation_x_axis_.get(), + rotation_z_axis_.get()); if (!maybe_pose) { GXF_LOG_ERROR("Failed to extract pose from object"); return gxf::ToResultCode(maybe_pose); } auto pose = maybe_pose.value(); - // Copy pose data into pose tensor - const cudaMemcpyKind operation = cudaMemcpyHostToDevice; - const cudaError_t cuda_error = cudaMemcpy( - pose_tensor->pointer(), pose.data(), pose_tensor->size(), operation); - - if (cuda_error != cudaSuccess) { - GXF_LOG_ERROR("Failed to copy data to pose tensor: %s (%s)", - cudaGetErrorName(cuda_error), - cudaGetErrorString(cuda_error)); - return GXF_FAILURE; - } + // Rotation: w = pose[6], x = pose[3], y = pose[4], z = pose[5] + nvidia::isaac::Pose3d pose3d{ + nvidia::isaac::SO3d::FromQuaternion({pose[6], pose[3], pose[4], pose[5]}), // rotation + nvidia::isaac::Vector3d(pose[0], pose[1], pose[2]) // translation + }; + + auto bbox_dims = object_dimensions_param_.get(); + auto bbox_x = bbox_dims.at(0) / kCentimeterToMeter; + auto bbox_y = bbox_dims.at(1) / kCentimeterToMeter; + auto bbox_z = bbox_dims.at(2) / kCentimeterToMeter; + + **detection3_d_list.poses[i] = pose3d; + **detection3_d_list.bbox_sizes[i] = nvidia::isaac::Vector3f(bbox_x, bbox_y, bbox_z); + **detection3_d_list.hypothesis[i] = nvidia::isaac::ObjectHypothesis{ + std::vector{0.0}, std::vector{object_name_}}; } - return gxf::ToResultCode( - posearray_transmitter_->publish(std::move(posearray_message))); + detection3darray_transmitter_->publish(std::move(detection3_d_list.entity))); } -} // namespace dope -} // namespace isaac_ros -} // namespace nvidia +} // namespace dope +} // namespace isaac_ros +} // namespace nvidia diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.hpp b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.hpp index 871b52d..e0a9cf4 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.hpp +++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.hpp @@ -18,18 +18,23 @@ #ifndef NVIDIA_ISAAC_ROS_EXTENSIONS_DOPE_DECODER_HPP_ #define NVIDIA_ISAAC_ROS_EXTENSIONS_DOPE_DECODER_HPP_ -#include "opencv2/core/mat.hpp" #include +#include +#include + #include "gxf/core/entity.hpp" #include "gxf/core/gxf.h" #include "gxf/core/parameter.hpp" +#include "gxf/core/parameter_parser_std.hpp" +#include "gxf/multimedia/camera.hpp" #include "gxf/std/codelet.hpp" #include "gxf/std/memory_buffer.hpp" -#include "gxf/core/parameter_parser_std.hpp" #include "gxf/std/receiver.hpp" #include "gxf/std/transmitter.hpp" +#include "opencv2/core/mat.hpp" + namespace nvidia { namespace isaac_ros { namespace dope { @@ -37,29 +42,40 @@ namespace dope { // GXF codelet that decodes object poses from an input tensor and produces an // output tensor representing a pose array class DopeDecoder : public gxf::Codelet { -public: + public: gxf_result_t start() noexcept override; gxf_result_t tick() noexcept override; gxf_result_t stop() noexcept override { return GXF_SUCCESS; } - gxf_result_t registerInterface(gxf::Registrar *registrar) noexcept override; + gxf_result_t registerInterface(gxf::Registrar* registrar) noexcept override; -private: + private: + gxf::Expected updateCameraProperties(gxf::Handle camera_model); gxf::Parameter> tensorlist_receiver_; - gxf::Parameter> posearray_transmitter_; + gxf::Parameter> camera_model_input_; + gxf::Parameter> detection3darray_transmitter_; gxf::Parameter> allocator_; // Parameters gxf::Parameter> object_dimensions_param_; gxf::Parameter> camera_matrix_param_; gxf::Parameter object_name_; + gxf::Parameter map_peak_threshold_; + gxf::Parameter affinity_map_angle_threshold_; + // These params can be used to specify a rotation of the pose output by the network, it is useful + // when one would like to align detections between Foundation pose and Dope. For example, for the + // soup can asset, the rotation would need to done along the y axis by 90 degrees. ALl rotation + // values here are in degrees. The rotation is performed in a ZYX sequence. + gxf::Parameter rotation_y_axis_; + gxf::Parameter rotation_x_axis_; + gxf::Parameter rotation_z_axis_; // Parsed parameters Eigen::Matrix cuboid_3d_points_; cv::Mat camera_matrix_; }; -} // namespace dope -} // namespace isaac_ros -} // namespace nvidia +} // namespace dope +} // namespace isaac_ros +} // namespace nvidia -#endif // NVIDIA_ISAAC_ROS_EXTENSIONS_DOPE_DECODER_HPP_ +#endif // NVIDIA_ISAAC_ROS_EXTENSIONS_DOPE_DECODER_HPP_ diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_ext.cpp b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_ext.cpp index 7cab5df..e3ccb6a 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_ext.cpp +++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_ext.cpp @@ -14,8 +14,10 @@ // limitations under the License. // // SPDX-License-Identifier: Apache-2.0 + #include +#include "detection3_d_array_message.hpp" #include "gxf/core/gxf.h" #include "gxf/std/extension_factory_helper.hpp" @@ -33,6 +35,13 @@ GXF_EXT_FACTORY_ADD(0x9bd4f8c4182a11ed, 0x861d0242ac120002, nvidia::isaac_ros::dope::DopeDecoder, nvidia::gxf::Codelet, "Codelet to decode DOPE output."); +GXF_EXT_FACTORY_ADD_0(0x2fd70869462e45f0, 0xefd09edc396d49bb, + nvidia::isaac::Vector3f, "3 Dimensional Vector"); + +GXF_EXT_FACTORY_ADD_0(0x84a3c15ccecb43a6, 0xed1fe7ab06a448ee, + nvidia::isaac::ObjectHypothesis, + "Score and class id of detected object"); + GXF_EXT_FACTORY_END() -} // extern "C" +} // extern "C" diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/package.xml b/isaac_ros_gxf_extensions/gxf_isaac_dope/package.xml index 32753df..0744764 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_dope/package.xml +++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/package.xml @@ -21,7 +21,7 @@ SPDX-License-Identifier: Apache-2.0 gxf_isaac_dope - 3.1.0 + 3.2.0 GXF extension containing CenterPose pose estimation related components. Isaac ROS Maintainers @@ -33,6 +33,7 @@ SPDX-License-Identifier: Apache-2.0 isaac_ros_common isaac_ros_gxf + isaac_ros_nitros_detection3_d_array_type gxf_isaac_gems diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/CMakeLists.txt b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/CMakeLists.txt index c0195fc..4bab809 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/CMakeLists.txt +++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/CMakeLists.txt @@ -87,4 +87,10 @@ set_target_properties(${PROJECT_NAME} PROPERTIES # Install the binary file install(TARGETS ${PROJECT_NAME} DESTINATION share/${PROJECT_NAME}/gxf/lib) + +# Embed versioning information into installed files +ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common) +include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake") +generate_version_info(${PROJECT_NAME}) + ament_auto_package(INSTALL_TO_SHARE) \ No newline at end of file diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cpp index f1f1a46..a3412a6 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cpp +++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cpp @@ -19,6 +19,7 @@ #include "foundationpose_utils.hpp" #include +#include #include #include #include @@ -55,6 +56,9 @@ constexpr size_t kNumChannels = 3; constexpr size_t kOutputRank = 4; // Number of slices on the input constexpr int kNumBatches = 6; +constexpr int kFixTextureMapWidth = 1920; +constexpr int kFixTextureMapHeight = 1080; +constexpr int kFixTextureMapColor = 128; typedef Eigen::Matrix RowMajorMatrix; @@ -223,50 +227,6 @@ gxf_result_t ProjectMatrixFromIntrinsics( return GXF_SUCCESS; } -gxf_result_t GeneratePoseClip( - std::vector& pose_clip, const std::vector& poses, const Eigen::MatrixXf& bbox2d, const Eigen::MatrixXf& pose_homo, - const Eigen::Matrix4f& projection_mat, int rgb_H, int rgb_W) { - std::vector mtx; - for (const auto& pose : poses) { - mtx.push_back(projection_mat * (kGLCamInCVCam * pose)); - } - - if (mtx[0].cols() != pose_homo.cols()) { - GXF_LOG_ERROR("[FoundationposeRender] The col size of mtx is not the same as pos_home"); - return GXF_FAILURE; - } - - Eigen::VectorXf l = bbox2d.col(0).array(); - Eigen::VectorXf t = rgb_H - bbox2d.col(1).array(); - Eigen::VectorXf r = bbox2d.col(2).array(); - Eigen::VectorXf b = rgb_H - bbox2d.col(3).array(); - - int N = poses.size(); - pose_clip.reserve(N * pose_homo.rows() * pose_homo.cols()); - for (size_t i = 0; i < poses.size(); i++) { - auto m = mtx[i]; - // Make sure eigen matrix is row major if need to copy into GPU memory - Eigen::Matrix new_pos; - new_pos.resize(pose_homo.rows(), pose_homo.cols()); - for (int j = 0; j < pose_homo.rows(); j++) { - Eigen::VectorXf temp = m * pose_homo.row(j).transpose(); - new_pos.row(j) = temp.transpose(); - } - - Eigen::Matrix4f tf = Eigen::Matrix4f::Identity(); - tf(0, 0) = rgb_W / (r(i) - l(i)); - tf(1, 1) = rgb_H / (t(i) - b(i)); - tf(3, 0) = (rgb_W - r(i) - l(i)) / (r(i) - l(i)); - tf(3, 1) = (rgb_H - t(i) - b(i)) / (t(i) - b(i)); - new_pos = new_pos * tf; - // Vector of Eigen matrix is not continous in memory, flatten matrix and insert to vector - std::vector pose_data(new_pos.data(), new_pos.data() + new_pos.size()); - pose_clip.insert(pose_clip.end(), pose_data.begin(), pose_data.end()); - } - - return GXF_SUCCESS; -} - gxf_result_t ConstructBBox2D( Eigen::MatrixXf& bbox2d, const std::vector& tfs, int H, int W) { Eigen::MatrixXf bbox2d_crop(2, 2); @@ -452,10 +412,17 @@ gxf_result_t FoundationposeRender::start() noexcept { } } - // Load the texture map - cv::Mat texture_map = cv::imread(texture_path_.get()); cv::Mat rgb_texture_map; - cv::cvtColor(texture_map, rgb_texture_map, cv::COLOR_BGR2RGB); + if (!std::filesystem::exists(texture_path_.get())) { + GXF_LOG_WARNING("[FoundationposeRender], %s could not be found, assign texture map with pure color", + texture_path_.get().c_str()); + rgb_texture_map = cv::Mat(kFixTextureMapHeight, kFixTextureMapWidth, CV_8UC3, + cv::Scalar(kFixTextureMapColor, kFixTextureMapColor, kFixTextureMapColor)); + } else { + // Load the texture map + cv::Mat texture_map = cv::imread(texture_path_.get()); + cv::cvtColor(texture_map, rgb_texture_map, cv::COLOR_BGR2RGB); + } if (!rgb_texture_map.isContinuous()) { GXF_LOG_ERROR("[FoundationposeRender] Texture map is not continuous"); @@ -517,11 +484,15 @@ gxf_result_t FoundationposeRender::start() noexcept { // Allocate device memory for mesh data size_t faces_size = mesh_faces_.size() * sizeof(int32_t); size_t texcoords_size = texcoords_.size() * sizeof(float); - + size_t mesh_vertices_size = vertices_.size() * sizeof(float); + + CHECK_CUDA_ERRORS(cudaMalloc(&mesh_vertices_device_, mesh_vertices_size)); CHECK_CUDA_ERRORS(cudaMalloc(&mesh_faces_device_, faces_size)); CHECK_CUDA_ERRORS(cudaMalloc(&texcoords_device_, texcoords_size)); CHECK_CUDA_ERRORS(cudaMalloc(&texture_map_device_, rgb_texture_map.total() * kNumChannels)); + CHECK_CUDA_ERRORS(cudaMemcpyAsync( + mesh_vertices_device_, vertices_.data(), mesh_vertices_size , cudaMemcpyHostToDevice, cuda_stream_)); CHECK_CUDA_ERRORS(cudaMemcpyAsync( mesh_faces_device_, mesh_faces_.data(), faces_size, cudaMemcpyHostToDevice, cuda_stream_)); CHECK_CUDA_ERRORS(cudaMemcpyAsync( @@ -549,42 +520,22 @@ gxf_result_t FoundationposeRender::start() noexcept { gxf_result_t FoundationposeRender::NvdiffrastRender( - cudaStream_t cuda_stream_, std::vector& poses, Eigen::Matrix3f& K, Eigen::MatrixXf& bbox2d, int rgb_H, + cudaStream_t cuda_stream, std::vector& poses, Eigen::Matrix3f& K, Eigen::MatrixXf& bbox2d, int rgb_H, int rgb_W, int H, int W, nvcv::Tensor& flip_color_tensor, nvcv::Tensor& flip_xyz_map_tensor) { + // N represents the number of poses size_t N = poses.size(); - // Generate attributes for interpolate - std::vector pts_cam; - auto gxf_result = TransformPts(pts_cam, mesh_vertices_, poses); - if(gxf_result != GXF_SUCCESS) { - return gxf_result; - } - if (pts_cam.size() == 0 || pts_cam.size() != N) { - GXF_LOG_ERROR("[FoundationposeRender] The attribute size doesn't match pose size after transform"); - return GXF_FAILURE; - } - if (pts_cam[0].rows() != num_vertices_) { - GXF_LOG_ERROR("[FoundationposeRender] The attribute dimension doesn't match with vertices after transform"); + // For every pose, transform the vertices to the camera space + if (kVertexPoints != kPoseMatrixLength - 1) { + GXF_LOG_ERROR("[FoundationposeRender] The vertice channel should be same as pose matrix length - 1"); return GXF_FAILURE; } - - // Vector of Eigen matrix is not continous in memory, flatten matrix and insert to vector - size_t num_attr = pts_cam[0].cols(); - if (!pts_cam[0].IsRowMajor) { - GXF_LOG_ERROR("[FoundationposeRender] Pts cam need to be row major in order to copy into device memory"); - return GXF_FAILURE; - } - - std::vector pts_cam_vector; - pts_cam_vector.reserve(N * num_vertices_ * num_attr); - for (auto& mat : pts_cam) { - std::vector mat_data(mat.data(), mat.data() + mat.size()); - pts_cam_vector.insert(pts_cam_vector.end(), mat_data.begin(), mat_data.end()); - } + transform_pts(cuda_stream, pts_cam_device_, mesh_vertices_device_, pose_device_, num_vertices_, kVertexPoints, N, kPoseMatrixLength); + CHECK_CUDA_ERRORS(cudaGetLastError()); Eigen::Matrix4f projection_mat; - gxf_result = ProjectMatrixFromIntrinsics(projection_mat, K, rgb_H, rgb_W); - if(gxf_result != GXF_SUCCESS) { + auto gxf_result = ProjectMatrixFromIntrinsics(projection_mat, K, rgb_H, rgb_W); + if (gxf_result != GXF_SUCCESS) { return GXF_FAILURE; } @@ -599,58 +550,53 @@ gxf_result_t FoundationposeRender::NvdiffrastRender( return GXF_FAILURE; } - std::vector pose_clip; - gxf_result = GeneratePoseClip(pose_clip, poses, bbox2d, pose_homo, projection_mat, rgb_H, rgb_W); - if (gxf_result != GXF_SUCCESS) { - return GXF_FAILURE; - } - - if (pose_clip.size() == 0) { - GXF_LOG_ERROR("[FoundationposeRender] Pose clip should not be empty"); - return GXF_FAILURE; - } - // Allocate device memory for the intermedia results on the first frame - size_t pose_clip_size = pose_clip.size() * sizeof(float); - size_t pts_cam_size = pts_cam_vector.size() * sizeof(float); + size_t pose_clip_size = N * num_vertices_ * 4 * sizeof(float); size_t rast_out_size = N * H * W * 4 * sizeof(float); size_t color_size = N * H * W * kNumChannels * sizeof(float); size_t xyz_map_size = N * H * W * kNumChannels * sizeof(float); size_t texcoords_out_size = N * H * W * kTexcoordsDim * sizeof(float); + size_t bbox2d_size = N * 4 * sizeof(float); if (render_data_cached_ == false) { CHECK_CUDA_ERRORS(cudaMalloc(&pose_clip_device_, pose_clip_size)); CHECK_CUDA_ERRORS(cudaMalloc(&rast_out_device_, rast_out_size)); - CHECK_CUDA_ERRORS(cudaMalloc(&pts_cam_device_, pts_cam_size)); CHECK_CUDA_ERRORS(cudaMalloc(&texcoords_out_device_, texcoords_out_size)); CHECK_CUDA_ERRORS(cudaMalloc(&color_device_, color_size)); CHECK_CUDA_ERRORS(cudaMalloc(&xyz_map_device_, xyz_map_size)); + CHECK_CUDA_ERRORS(cudaMalloc(&pose_clip_device_, pose_clip_size)); + CHECK_CUDA_ERRORS(cudaMalloc(&bbox2d_device_, bbox2d_size)); cr_ = new CR::CudaRaster(); render_data_cached_ = true; } - // Copy other data to device memory - CHECK_CUDA_ERRORS(cudaMemcpyAsync( - pose_clip_device_, pose_clip.data(), pose_clip_size, cudaMemcpyHostToDevice, cuda_stream_)); - CHECK_CUDA_ERRORS(cudaMemcpyAsync( - pts_cam_device_, pts_cam_vector.data(), pts_cam_size, cudaMemcpyHostToDevice, cuda_stream_)); + std::vector h_bbox2d; + for(int j=0;j(); texture( - cuda_stream_, + cuda_stream, reinterpret_cast(float_texture_map_data->basePtr()), texcoords_out_device_, color_device_, @@ -668,7 +614,7 @@ gxf_result_t FoundationposeRender::NvdiffrastRender( float min_value = 0.0; float max_value = 1.0; - clamp(cuda_stream_, color_device_, min_value, max_value, N * H * W * kNumChannels); + clamp(cuda_stream, color_device_, min_value, max_value, N * H * W * kNumChannels); CHECK_CUDA_ERRORS(cudaGetLastError()); nvcv::Tensor color_tensor, xyz_map_tensor; @@ -676,10 +622,10 @@ gxf_result_t FoundationposeRender::NvdiffrastRender( WrapFloatPtrToNHWCTensor(xyz_map_device_, xyz_map_tensor, N, H, W, kNumChannels); cvcuda::Flip flip_op; - flip_op(cuda_stream_, color_tensor, flip_color_tensor, 0); + flip_op(cuda_stream, color_tensor, flip_color_tensor, 0); CHECK_CUDA_ERRORS(cudaGetLastError()); - flip_op(cuda_stream_, xyz_map_tensor, flip_xyz_map_tensor, 0); + flip_op(cuda_stream, xyz_map_tensor, flip_xyz_map_tensor, 0); CHECK_CUDA_ERRORS(cudaGetLastError()); return GXF_SUCCESS; @@ -845,10 +791,13 @@ gxf_result_t FoundationposeRender::tick() noexcept { // Malloc device memory for the output if (!rgb_data_cached_) { + CHECK_CUDA_ERRORS(cudaMalloc(&pts_cam_device_, N * num_vertices_ * kVertexPoints * sizeof(float))); CHECK_CUDA_ERRORS(cudaMalloc(&transformed_xyz_map_device_, N * H * W * C * sizeof(float))); CHECK_CUDA_ERRORS(cudaMalloc(&transformed_rgb_device_, N * H * W * C * sizeof(float))); CHECK_CUDA_ERRORS(cudaMalloc(&score_rendered_output_device_, total_poses * 2 * H * W * C * sizeof(float))); CHECK_CUDA_ERRORS(cudaMalloc(&score_original_output_device_, total_poses * 2 * H * W * C * sizeof(float))); + CHECK_CUDA_ERRORS(cudaMalloc(&wp_image_device_, N * H * W * C * sizeof(uint8_t))); + CHECK_CUDA_ERRORS(cudaMalloc(&trans_matrix_device_,N*9*sizeof(float))); nvcv::TensorShape::ShapeType shape{N, H, W, C}; nvcv::TensorShape tensor_shape{shape, "NHWC"}; @@ -859,6 +808,7 @@ gxf_result_t FoundationposeRender::tick() noexcept { } std::vector pose_host(pose_nums * pose_rows * pose_cols); + pose_device_ = reinterpret_cast(poses_handle->pointer()); CHECK_CUDA_ERRORS(cudaMemcpyAsync( pose_host.data(), poses_handle->pointer(), poses_handle->size(), cudaMemcpyDeviceToHost, cuda_stream_)); cudaStreamSynchronize(cuda_stream_); @@ -893,75 +843,141 @@ gxf_result_t FoundationposeRender::tick() noexcept { } // WarpPerspective on each image and xyz map, and cache the output for the future iterations - if (mode_.get() == "score" || iteration_count_ == 0) { - nvcv::TensorShape::ShapeType rgb_shape{1, rgb_H, rgb_W, C}; - nvcv::TensorShape rgb_tensor_shape{rgb_shape, "NHWC"}; + nvcv::TensorShape::ShapeType rgb_shape{1, rgb_H, rgb_W, C}; + nvcv::TensorShape rgb_tensor_shape{rgb_shape, "NHWC"}; - nvcv::Tensor rgb_tensor = nvcv::Tensor(rgb_tensor_shape, nvcv::TYPE_U8); - nvcv::Tensor xyz_map_tensor = nvcv::Tensor(rgb_tensor_shape, nvcv::TYPE_F32); + nvcv::Tensor rgb_tensor = nvcv::Tensor(rgb_tensor_shape, nvcv::TYPE_U8); + nvcv::Tensor xyz_map_tensor = nvcv::Tensor(rgb_tensor_shape, nvcv::TYPE_F32); - WrapImgPtrToNHWCTensor(reinterpret_cast(rgb_img_handle->pointer()), rgb_tensor, 1, rgb_H, rgb_W, C); - WrapFloatPtrToNHWCTensor(reinterpret_cast(xyz_map_handle->pointer()), xyz_map_tensor, 1, rgb_H, rgb_W, C); + WrapImgPtrToNHWCTensor(reinterpret_cast(rgb_img_handle->pointer()), rgb_tensor, 1, rgb_H, rgb_W, C); + WrapFloatPtrToNHWCTensor(reinterpret_cast(xyz_map_handle->pointer()), xyz_map_tensor, 1, rgb_H, rgb_W, C); - const int rgb_flags = NVCV_INTERP_LINEAR; - const int xyz_flags = NVCV_INTERP_NEAREST; - const float4 border_value = {0,0,0,0}; + const int rgb_flags = NVCV_INTERP_LINEAR; + const int xyz_flags = NVCV_INTERP_NEAREST; + const float4 border_value = {0,0,0,0}; - const float scale_factor = 1.0f / 255.0f; - cvcuda::WarpPerspective warpPerspectiveOp(0); - cvcuda::ConvertTo convert_op; - for (size_t index = 0; index < N; index++) { - nvcv::TensorShape::ShapeType transformed_shape{1,H,W,C}; - nvcv::TensorShape transformed_tensor_shape{transformed_shape, "NHWC"}; - - nvcv::Tensor transformed_rgb_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_U8); - nvcv::Tensor float_rgb_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_F32); - nvcv::Tensor transformed_xyz_map_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_F32); - - // get ptr offset from index - WrapFloatPtrToNHWCTensor(transformed_rgb_device_ + index * H * W * C, float_rgb_tensor, 1, H, W, C); - WrapFloatPtrToNHWCTensor(transformed_xyz_map_device_ + index * H * W * C, transformed_xyz_map_tensor, 1, H, W, C); + const float scale_factor = 1.0f / 255.0f; + int num_of_trans_mat = N; + cvcuda::WarpPerspective warpPerspectiveOpBatch(num_of_trans_mat); + cvcuda::ConvertTo convert_op; - NVCVPerspectiveTransform trans_matrix; - for (size_t i = 0; i < kPTMatrixDim; i++) { + std::vector trans_mat_flattened(N*9,0); + for(size_t index = 0; index(trans_matrix_device_); + + auto transMatrixTensor = nvcv::TensorWrapData(nvcv::TensorDataStridedCuda{ + nvcv::TensorShape({num_of_trans_mat, 9}, nvcv::TENSOR_NW), + nvcv::TYPE_F32, + buf_trans_mat + }); + + // Build rgb image batch input + const nvcv::ImageFormat fmt_rgb = nvcv::FMT_RGB8; + std::vector wp_rgb_src; + int imageSizeRGB = rgb_H * rgb_W * fmt_rgb.planePixelStrideBytes(0); + nvcv::ImageDataStridedCuda::Buffer buf_rgb; + buf_rgb.numPlanes = 1; + buf_rgb.planes[0].width = rgb_W; + buf_rgb.planes[0].height = rgb_H; + buf_rgb.planes[0].rowStride = rgb_W*fmt_rgb.planePixelStrideBytes(0); + buf_rgb.planes[0].basePtr = reinterpret_cast(rgb_img_handle->pointer()); + auto img_rgb = nvcv::ImageWrapData(nvcv::ImageDataStridedCuda{fmt_rgb, buf_rgb}); + + for (int i = 0; i < num_of_trans_mat; ++i) { + wp_rgb_src.emplace_back(img_rgb); + } - warpPerspectiveOp(cuda_stream_, rgb_tensor, transformed_rgb_tensor, trans_matrix, rgb_flags, NVCV_BORDER_CONSTANT, border_value); - CHECK_CUDA_ERRORS(cudaGetLastError()); + nvcv::ImageBatchVarShape batch_wp_rgb_src(num_of_trans_mat); + batch_wp_rgb_src.pushBack(wp_rgb_src.begin(), wp_rgb_src.end()); + + // Build xyz map batch input + const nvcv::ImageFormat fmt_xyz = nvcv::FMT_RGBf32; + std::vector wp_xyz_src; + int imageSizeXYZ = rgb_H * rgb_W * fmt_xyz.planePixelStrideBytes(0); + nvcv::ImageDataStridedCuda::Buffer buf_xyz; + buf_xyz.numPlanes = 1; + buf_xyz.planes[0].width = rgb_W; + buf_xyz.planes[0].height = rgb_H; + buf_xyz.planes[0].rowStride = rgb_W*fmt_xyz.planePixelStrideBytes(0); + buf_xyz.planes[0].basePtr = reinterpret_cast(xyz_map_handle->pointer()); + auto img_xyz = nvcv::ImageWrapData(nvcv::ImageDataStridedCuda{fmt_xyz, buf_xyz}); + + for (int i = 0; i < num_of_trans_mat; ++i) { + wp_xyz_src.emplace_back(img_xyz); + } - convert_op(cuda_stream_, transformed_rgb_tensor, float_rgb_tensor, scale_factor, 0.0f); - CHECK_CUDA_ERRORS(cudaGetLastError()); + nvcv::ImageBatchVarShape batch_wp_xyz_src(num_of_trans_mat); + batch_wp_xyz_src.pushBack(wp_xyz_src.begin(), wp_xyz_src.end()); + + // Build batched RGB output tensor + std::vector wp_rgb_dst; + for (int i = 0; i < num_of_trans_mat; ++i) { + nvcv::ImageDataStridedCuda::Buffer buf; + buf.numPlanes = 1; + buf.planes[0].width = W; + buf.planes[0].height = H; + buf.planes[0].rowStride = W * fmt_rgb.planePixelStrideBytes(0); + buf.planes[0].basePtr = reinterpret_cast(wp_image_device_ + i * H * W * C); + auto img = nvcv::ImageWrapData(nvcv::ImageDataStridedCuda{fmt_rgb, buf}); + wp_rgb_dst.push_back(img); + } + nvcv::ImageBatchVarShape batch_wp_rgb_dst(num_of_trans_mat); + batch_wp_rgb_dst.pushBack(wp_rgb_dst.begin(),wp_rgb_dst.end()); + + // Build batched XYZ map output tensor + std::vector wp_xyz_dst; + for (int i = 0; i < num_of_trans_mat; ++i) { + nvcv::ImageDataStridedCuda::Buffer buf; + buf.numPlanes = 1; + buf.planes[0].width = W; + buf.planes[0].height = H; + buf.planes[0].rowStride = W * fmt_xyz.planePixelStrideBytes(0); + buf.planes[0].basePtr = reinterpret_cast(transformed_xyz_map_device_ + i * H * W * C); + auto img = nvcv::ImageWrapData(nvcv::ImageDataStridedCuda{fmt_xyz, buf}); + wp_xyz_dst.push_back(img); + } + nvcv::ImageBatchVarShape batch_wp_xyz_dst(num_of_trans_mat); + batch_wp_xyz_dst.pushBack(wp_xyz_dst.begin(),wp_xyz_dst.end()); - warpPerspectiveOp(cuda_stream_, xyz_map_tensor, transformed_xyz_map_tensor, trans_matrix, xyz_flags, NVCV_BORDER_CONSTANT, border_value); - CHECK_CUDA_ERRORS(cudaGetLastError()); - } + // Warp Perspective for RGB image and XYZ map + warpPerspectiveOpBatch(cuda_stream_, batch_wp_rgb_src, batch_wp_rgb_dst, transMatrixTensor, rgb_flags, NVCV_BORDER_CONSTANT, border_value); + CHECK_CUDA_ERRORS(cudaGetLastError()); + warpPerspectiveOpBatch(cuda_stream_, batch_wp_xyz_src, batch_wp_xyz_dst, transMatrixTensor, xyz_flags, NVCV_BORDER_CONSTANT, border_value); + CHECK_CUDA_ERRORS(cudaGetLastError()); - threshold_and_downscale_pointcloud( - cuda_stream_, - transformed_xyz_map_device_, - reinterpret_cast(poses_handle->pointer()), - N, W * H, mesh_diameter_ / 2, min_depth_, max_depth_); - CHECK_CUDA_ERRORS(cudaGetLastError()); + // Convert RGB image from U8 to float + nvcv::TensorShape::ShapeType transformed_shape{N,H,W,C}; + nvcv::TensorShape transformed_tensor_shape{transformed_shape, "NHWC"}; - transformed_xyz_map_cache_[refine_recevied_batches_] = transformed_xyz_map_device_; - transformed_rgb_cache_[refine_recevied_batches_] = transformed_rgb_device_; - } else { - if (transformed_xyz_map_cache_.find(refine_recevied_batches_) != transformed_xyz_map_cache_.end()) { - transformed_xyz_map_device_ = transformed_xyz_map_cache_[refine_recevied_batches_]; - } else { - GXF_LOG_ERROR("[FoundationposeRender] Unable to find index %d from transformed xyz map cache", refine_recevied_batches_); - return GXF_FAILURE; - } - if (transformed_rgb_cache_.find(refine_recevied_batches_) != transformed_rgb_cache_.end()) { - transformed_rgb_device_ = transformed_rgb_cache_[refine_recevied_batches_]; - } else { - GXF_LOG_ERROR("[FoundationposeRender] Unable to find index %d from transformed rgb cache", refine_recevied_batches_); - return GXF_FAILURE; - } - } + nvcv::Tensor transformed_rgb_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_U8); + nvcv::Tensor float_rgb_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_F32); + + WrapImgPtrToNHWCTensor(wp_image_device_, transformed_rgb_tensor, N, H, W, C); + WrapFloatPtrToNHWCTensor(transformed_rgb_device_, float_rgb_tensor, N, H, W, C); + + convert_op(cuda_stream_, transformed_rgb_tensor, float_rgb_tensor, scale_factor, 0.0f); + CHECK_CUDA_ERRORS(cudaGetLastError()); + + threshold_and_downscale_pointcloud( + cuda_stream_, + transformed_xyz_map_device_, + reinterpret_cast(poses_handle->pointer()), + N, W * H, mesh_diameter_ / 2, min_depth_, max_depth_); + CHECK_CUDA_ERRORS(cudaGetLastError()); auto render_rgb_data = render_rgb_tensor_.exportData(); auto render_xyz_map_data = render_xyz_map_tensor_.exportData(); @@ -1087,7 +1103,7 @@ gxf_result_t FoundationposeRender::tick() noexcept { iterative_point_cloud_transmitter.value()->publish(std::move(xyz_message)); iterative_camera_model_transmitter.value()->publish(std::move(camera_model_message)); } - + // Update loop and batch counters refine_recevied_batches_ += 1; if (refine_recevied_batches_ == kNumBatches) { @@ -1149,6 +1165,9 @@ gxf_result_t FoundationposeRender::stop() noexcept { CHECK_CUDA_ERRORS(cudaFree(transformed_rgb_device_)); CHECK_CUDA_ERRORS(cudaFree(score_rendered_output_device_)); CHECK_CUDA_ERRORS(cudaFree(score_original_output_device_)); + CHECK_CUDA_ERRORS(cudaFree(wp_image_device_)); + CHECK_CUDA_ERRORS(cudaFree(trans_matrix_device_)); + CHECK_CUDA_ERRORS(cudaFree(bbox2d_device_)); return GXF_SUCCESS; } diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu index a8dfc6d..09e7634 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu +++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu @@ -125,6 +125,115 @@ __global__ void concat_kernel( } } +// Transform points using transformation matrices +// Each thread will transform a point using N transformation matrices and save to output +__global__ void transform_pts_kernel( + float* output, const float* pts, const float* tfs, int pts_num, int pts_channel, int tfs_num, int tfs_dim) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < pts_num) { + for (int i = 0; i < tfs_num; i++) { + float* transformed_matrix = output + i * pts_num * (tfs_dim - 1) + idx * (tfs_dim - 1); + const float* submatrix = tfs + i * tfs_dim * tfs_dim; + const float* last_col = tfs + i * tfs_dim * tfs_dim + (tfs_dim - 1) * tfs_dim; + + for (int j = 0; j < pts_channel; j++) { + float new_row = 0.0f; + for (int k = 0; k < tfs_dim - 1; k++) { + new_row += submatrix[k * tfs_dim + j] * pts[idx * pts_channel + k]; + } + new_row += last_col[j]; + transformed_matrix[j] = new_row; + } + } + } +} + +__device__ float dot(const float4& v1, const float4& v2) { + return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z + v1.w * v2.w; +} + + +// From OpenCV camera (cvcam) coordinate system to the OpenGL camera (glcam) coordinate system +const Eigen::Matrix4f kGLCamInCVCam = + (Eigen::Matrix4f(4, 4) << 1, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 1).finished(); +__constant__ float d_projection_fused[16]; + +// Function to copy data to constant memory +void copy_to_constant_memory(const Eigen::Matrix4f& projection_fused) { + std::vector flattened_projection_fused; + for(int i=0;i= n_pts) { return; } + + // Load point using float3 + // Convert float3 to float4 by adding a homogeneous coordinate w=1.0f + float3 pt3 = reinterpret_cast(d_pts)[idx]; + float4 pt = make_float4(pt3.x, pt3.y, pt3.z, 1.0f); + + // Shared memory for storing a single pose mat and bbox transformation matrix + size_t stride = sizeof(float4); + float4* shared_tf = reinterpret_cast(shared_mem); + float4* shared_bbox = reinterpret_cast(shared_mem + stride); + + for (int i = 0; i < n_poses; i++) { + float4* transformed_matrix = reinterpret_cast(output + i * n_pts * 4 + idx * 4); + // Compute bbox transformation (only done by first 4 threads per block) + if (threadIdx.x == 0) { + const float4* pose = reinterpret_cast(d_poses + i * stride); + // Load the projection matrix into shared memory (vectorized) + shared_tf[0] = make_float4(pose[0].x,pose[1].x,pose[2].x,pose[3].x); + shared_tf[1] = make_float4(pose[0].y,pose[1].y,pose[2].y,pose[3].y); + shared_tf[2] = make_float4(pose[0].z,pose[1].z,pose[2].z,pose[3].z); + shared_tf[3] = make_float4(pose[0].w,pose[1].w,pose[2].w,pose[3].w); + float l = d_bbox2d[i * 4]; + float t = rgb_H - d_bbox2d[i * 4 + 1]; + float r = d_bbox2d[i * 4 + 2]; + float b = rgb_H - d_bbox2d[i * 4 + 3]; + + shared_bbox[0] = make_float4(rgb_W / (r - l), 0, 0, (rgb_W - r - l) / (r - l)); + shared_bbox[1] = make_float4(0, rgb_H / (t - b), 0, (rgb_H - t - b) / (t - b)); + shared_bbox[2] = make_float4(0, 0, 1, 0); + shared_bbox[3] = make_float4(0, 0, 0, 1); + } + + __syncthreads(); + + // Perform transformation using vectorized operations + float4 result = matrix_vec_mul4x4(shared_tf, pt); + result = matrix_vec_mul4x4(reinterpret_cast(d_projection_fused), result); + result = matrix_vec_mul4x4(shared_bbox, result); + + *transformed_matrix = result; + } +} void clamp(cudaStream_t stream, float* input, float min_value, float max_value, int N) { int block_size = 256; @@ -133,6 +242,19 @@ void clamp(cudaStream_t stream, float* input, float min_value, float max_value, clamp_kernel<<>>(input, min_value, max_value, N); } +void generate_pose_clip( + cudaStream_t stream, float* d_pose_clip, const float* d_pose, const float* d_bbox2d, const float* d_mesh_vertices, + const Eigen::Matrix4f& projection_mat, int rgb_H, int rgb_W, int n_pts, int N) { + + copy_to_constant_memory(projection_mat*kGLCamInCVCam); + // Define block and grid size + int blockSize = 256; + int gridSize = (n_pts + blockSize - 1) / blockSize; + + pose_clip_kernel_fused<<>>( + d_pose_clip, d_pose, d_bbox2d, d_mesh_vertices, N, n_pts, rgb_H, rgb_W); +} + void threshold_and_downscale_pointcloud( cudaStream_t stream, float* pointcloud_input, float* pose_array_input, int N, int n_points, float downscale_factor, float min_depth, float max_depth) { @@ -144,12 +266,21 @@ void threshold_and_downscale_pointcloud( pointcloud_input, pose_array_input, N, n_points, downscale_factor, min_depth, max_depth); } -void concat(cudaStream_t stream, float* input_a, float* input_b, float* output, int N, int H, int W, int C1, int C2) { +void transform_pts( + cudaStream_t stream, float* output, const float* pts, const float* tfs, int pts_num, int pts_channel, int tfs_num, int tfs_dim) { + // Lannch pts_num threads, each thread handle a point and N transformation matrices + int block_size = 256; + int grid_size = (pts_num + block_size - 1) / block_size; + transform_pts_kernel<<>>(output, pts, tfs, pts_num, pts_channel, tfs_num, tfs_dim); +} + +void concat( + cudaStream_t stream, float* input_a, float* input_b, float* output, int N, int H, int W, int C1, int C2) { // Launch N*H*W threads, each thread handle a vector of size C int block_size = 256; int grid_size = (N * H * W + block_size - 1) / block_size; - concat_kernel<<>>(input_a, input_b, output, N, H, W, C1, C2); + concat_kernel<<>>(input_a, input_b, output, N, H, W, C1, C2); } void rasterize( diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu.hpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu.hpp index 92ec9dc..817124c 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu.hpp +++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu.hpp @@ -38,6 +38,11 @@ void clamp(cudaStream_t stream, float* input, float min_value, float max_value, void threshold_and_downscale_pointcloud( cudaStream_t stream, float* pointcloud_input, float* pose_array_input, int N, int n_points, float downscale_factor, float min_depth, float max_depth); +void transform_pts( + cudaStream_t stream, float* output, const float* pts, const float* tfs, int pts_num, int pts_channel, int tfs_num, int tfs_dim); +void generate_pose_clip( + cudaStream_t stream, float* d_pose_clip, const float* d_pose, const float* bbox2d, const float* d_mesh_vertices, + const Eigen::Matrix4f& projection_mat, int rgb_H, int rgb_W, int n_pts, int n_poses); void concat(cudaStream_t stream, float* input_a, float* input_b, float* output, int N, int H, int W, int C1, int C2); void rasterize( diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.hpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.hpp index 4839b6c..c64a7c2 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.hpp +++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.hpp @@ -92,8 +92,10 @@ class FoundationposeRender : public gxf::Codelet { int texture_map_width_; float mesh_diameter_; + float* pose_device_; // Input data (GPU) for rendering kernels float* pose_clip_device_; + float* mesh_vertices_device_; int32_t* mesh_faces_device_; float* texcoords_device_; float* pts_cam_device_; @@ -107,10 +109,12 @@ class FoundationposeRender : public gxf::Codelet { float* xyz_map_device_; float* transformed_rgb_device_; float* transformed_xyz_map_device_; + uint8_t* wp_image_device_; + float* trans_matrix_device_; + float* bbox2d_device_; + nvcv::Tensor render_rgb_tensor_; nvcv::Tensor render_xyz_map_tensor_; - std::unordered_map transformed_xyz_map_cache_; - std::unordered_map transformed_rgb_cache_; float* score_rendered_output_device_; float* score_original_output_device_; diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.cpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.cpp index 77150cb..6e22c03 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.cpp +++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.cpp @@ -153,6 +153,46 @@ float RotationGeodesticDistance(const Eigen::Matrix3f& R1, const Eigen::Matrix3f return std::acos(cos); } +std::vector GenerateSymmetricPoses(const std::vector& symmetry_planes) { + float theta = 180.0 / 180.0 * M_PI; + std::vector x_angles = {0.0}; + std::vector y_angles = {0.0}; + std::vector z_angles = {0.0}; + std::vector symmetry_poses; + + for (int i = 0; i < symmetry_planes.size(); ++i) { + if (symmetry_planes[i] == "x"){ + x_angles.push_back(theta); + } else if (symmetry_planes[i] == "y") { + y_angles.push_back(theta); + } else if (symmetry_planes[i] == "z") { + z_angles.push_back(theta); + } else { + GXF_LOG_ERROR("[FoundationposeSampling] the input symmetry plane %s is invalid, ignore.", symmetry_planes[i]); + continue; + } + } + + // Compute rotation matrix for each angle + for (int i = 0; i < x_angles.size(); ++i) { + auto rot_x = Eigen::AngleAxisf(x_angles[i], Eigen::Vector3f::UnitX()); + for (int j = 0; j < y_angles.size(); ++j) { + auto rot_y = Eigen::AngleAxisf(y_angles[j], Eigen::Vector3f::UnitY()); + for (int k = 0; k < z_angles.size(); ++k) { + auto rot_z = Eigen::AngleAxisf(z_angles[k], Eigen::Vector3f::UnitZ()); + auto rotaion_matrix_x = rot_x.toRotationMatrix(); + auto rotaion_matrix_y = rot_y.toRotationMatrix(); + auto rotaion_matrix_z = rot_z.toRotationMatrix(); + auto rotation = rotaion_matrix_z * rotaion_matrix_y * rotaion_matrix_x; + Eigen::Matrix4f euler_matrix = Eigen::Matrix4f::Identity(); + euler_matrix.block<3, 3>(0, 0) = rotation; + symmetry_poses.push_back(euler_matrix); + } + } + } + return std::move(symmetry_poses); +} + std::vector ClusterPoses( float angle_diff, float dist_diff, std::vector& poses_in, std::vector& symmetry_tfs) { @@ -216,7 +256,7 @@ std::vector SampleViewsIcosphere(unsigned int n_views) { return std::move(cam_in_obs); } -std::vector MakeRotationGrid(unsigned int n_views = 40, int inplane_step = 60) { +std::vector MakeRotationGrid(const std::vector& symmetry_planes, unsigned int n_views = 40, int inplane_step = 60) { auto cam_in_obs = SampleViewsIcosphere(n_views); std::vector rot_grid; @@ -234,10 +274,11 @@ std::vector MakeRotationGrid(unsigned int n_views = 40, int inp } } - std::vector symmetry_tfs = std::vector(); + std::vector symmetry_tfs = GenerateSymmetricPoses(symmetry_planes); symmetry_tfs.push_back(Eigen::Matrix4f::Identity()); - ClusterPoses(30.0, 99999.0, rot_grid, symmetry_tfs); - return std::move(rot_grid); + auto clustered_poses = ClusterPoses(30.0, 99999.0, rot_grid, symmetry_tfs); + GXF_LOG_DEBUG("[FoundationposeSampling] %lu poses left after clustering", clustered_poses.size()); + return std::move(clustered_poses); } bool GuessTranslation( @@ -254,7 +295,7 @@ bool GuessTranslation( } } if (us.empty()) { - GXF_LOG_ERROR("[FoundationposeSampling] Mask is all zero."); + GXF_LOG_INFO("[FoundationposeSampling] Mask is all zero."); return false; } @@ -336,6 +377,10 @@ gxf_result_t FoundationposeSampling::registerInterface(gxf::Registrar* registrar min_depth_, "min_depth", "Minimum Depth", "Minimum depth value to consider for estimating object center in image space.", 0.1f); + + result &= registrar->parameter( + symmetry_planes_, "symmetry_planes", "Symmetry Planes", + "Symmetry planes, select one or more from [x, y, z]. ", std::vector{}); return gxf::ToResultCode(result); } @@ -445,7 +490,7 @@ gxf_result_t FoundationposeSampling::tick() noexcept { } // Generate Pose Hypothesis - auto ob_in_cams = MakeRotationGrid(); + auto ob_in_cams = MakeRotationGrid(symmetry_planes_.get()); if (ob_in_cams.size() == 0 || ob_in_cams.size() > max_hypothesis_) { GXF_LOG_ERROR("[FoundationposeSampling] The size of rotation grid is not valid."); return GXF_FAILURE; @@ -480,7 +525,7 @@ gxf_result_t FoundationposeSampling::tick() noexcept { Eigen::Vector3f center; auto success = GuessTranslation(bilateral_filter_depth_host, mask, K, min_depth_, center); if (!success) { - GXF_LOG_ERROR("[FoundationposeSampling] Failed to guess translation. Not processing this image"); + GXF_LOG_INFO("[FoundationposeSampling] Failed to guess translation. Not processing this image"); return GXF_SUCCESS; } for (auto& m : ob_in_cams) { diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.hpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.hpp index 0ac4895..ef7be85 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.hpp +++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.hpp @@ -34,6 +34,7 @@ #include "foundationpose_sampling.cu.hpp" + namespace nvidia { namespace isaac_ros { @@ -59,6 +60,7 @@ class FoundationposeSampling : public gxf::Codelet { gxf::Parameter> cuda_stream_pool_; gxf::Parameter max_hypothesis_; gxf::Parameter min_depth_; + gxf::Parameter> symmetry_planes_; gxf::Handle stream_; float* erode_depth_device_; diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/package.xml b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/package.xml index 41e940c..b43d18e 100644 --- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/package.xml +++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/package.xml @@ -21,7 +21,7 @@ SPDX-License-Identifier: Apache-2.0 gxf_isaac_foundationpose - 3.1.0 + 3.2.0 GXF extension containing Foundation Pose estimation related components. Isaac ROS Maintainers diff --git a/isaac_ros_pose_proc/CMakeLists.txt b/isaac_ros_pose_proc/CMakeLists.txt new file mode 100644 index 0000000..ca2ecde --- /dev/null +++ b/isaac_ros_pose_proc/CMakeLists.txt @@ -0,0 +1,109 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.22.1) +project(isaac_ros_pose_proc C CXX) + +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic) +endif() + +find_package(ament_cmake_auto REQUIRED) +ament_auto_find_build_dependencies() + +# AveragingFilterNode +ament_auto_add_library(averaging_filter_node SHARED src/averaging_filter_node.cpp) +rclcpp_components_register_nodes(averaging_filter_node "nvidia::isaac_ros::pose_proc::AveragingFilterNode") +set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::AveragingFilterNode;$\n") +set_target_properties(averaging_filter_node PROPERTIES + BUILD_WITH_INSTALL_RPATH TRUE + BUILD_RPATH_USE_ORIGIN TRUE + INSTALL_RPATH_USE_LINK_PATH TRUE) + +# Detection3DArrayToPoseNode +ament_auto_add_library(detection3_d_array_to_pose_node SHARED src/detection3_d_array_to_pose_node.cpp) +rclcpp_components_register_nodes(detection3_d_array_to_pose_node "nvidia::isaac_ros::pose_proc::Detection3DArrayToPoseNode") +set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::Detection3DArrayToPoseNode;$\n") +set_target_properties(detection3_d_array_to_pose_node PROPERTIES + BUILD_WITH_INSTALL_RPATH TRUE + BUILD_RPATH_USE_ORIGIN TRUE + INSTALL_RPATH_USE_LINK_PATH TRUE) + +# DistanceSelectorNode +ament_auto_add_library(distance_selector_node SHARED src/distance_selector_node.cpp) +rclcpp_components_register_nodes(distance_selector_node "nvidia::isaac_ros::pose_proc::DistanceSelectorNode") +set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::DistanceSelectorNode;$\n") +set_target_properties(distance_selector_node PROPERTIES + BUILD_WITH_INSTALL_RPATH TRUE + BUILD_RPATH_USE_ORIGIN TRUE + INSTALL_RPATH_USE_LINK_PATH TRUE) + +# OutlierFilterNode +ament_auto_add_library(outlier_filter_node SHARED src/outlier_filter_node.cpp) +rclcpp_components_register_nodes(outlier_filter_node "nvidia::isaac_ros::pose_proc::OutlierFilterNode") +set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::OutlierFilterNode;$\n") +set_target_properties(outlier_filter_node PROPERTIES + BUILD_WITH_INSTALL_RPATH TRUE + BUILD_RPATH_USE_ORIGIN TRUE + INSTALL_RPATH_USE_LINK_PATH TRUE) + +# PoseArrayToPoseNode +ament_auto_add_library(pose_array_to_pose_node SHARED src/pose_array_to_pose_node.cpp) +rclcpp_components_register_nodes(pose_array_to_pose_node "nvidia::isaac_ros::pose_proc::PoseArrayToPoseNode") +set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::PoseArrayToPoseNode;$\n") +set_target_properties(pose_array_to_pose_node PROPERTIES + BUILD_WITH_INSTALL_RPATH TRUE + BUILD_RPATH_USE_ORIGIN TRUE + INSTALL_RPATH_USE_LINK_PATH TRUE) + +# StabilityFilterNode +ament_auto_add_library(stability_filter_node SHARED src/stability_filter_node.cpp) +rclcpp_components_register_nodes(stability_filter_node "nvidia::isaac_ros::pose_proc::StabilityFilterNode") +set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::StabilityFilterNode;$\n") +set_target_properties(stability_filter_node PROPERTIES + BUILD_WITH_INSTALL_RPATH TRUE + BUILD_RPATH_USE_ORIGIN TRUE + INSTALL_RPATH_USE_LINK_PATH TRUE) + +# Install python scripts +install(PROGRAMS + scripts/pose_distribution_visualizer.py + DESTINATION lib/${PROJECT_NAME}) + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + ament_lint_auto_find_test_dependencies() + + # The FindPythonInterp and FindPythonLibs modules are removed + if(POLICY CMP0148) + cmake_policy(SET CMP0148 OLD) + endif() + + find_package(launch_testing_ament_cmake REQUIRED) + add_launch_test(test/isaac_ros_averaging_filter_test.py) + add_launch_test(test/isaac_ros_distance_selector_test.py) + add_launch_test(test/isaac_ros_outlier_filter_test.py) + add_launch_test(test/isaac_ros_stability_filter_test.py) +endif() + + +# Embed versioning information into installed files +ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common) +include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake") +generate_version_info(${PROJECT_NAME}) + +ament_auto_package() diff --git a/isaac_ros_pose_proc/package.xml b/isaac_ros_pose_proc/package.xml new file mode 100644 index 0000000..79a12c6 --- /dev/null +++ b/isaac_ros_pose_proc/package.xml @@ -0,0 +1,51 @@ + + + + + + + isaac_ros_pose_proc + 3.2.0 + Pose filtering and processing + + Isaac ROS Maintainers + Apache-2.0 + https://developer.nvidia.com/isaac-ros-gems/ + Toya Takahashi + + ament_cmake_auto + + rclcpp + rclcpp_components + sensor_msgs + tf2_ros + tf2_geometry_msgs + vision_msgs + + isaac_ros_common + + ament_lint_auto + ament_lint_common + isaac_ros_test + + + ament_cmake + + + diff --git a/isaac_ros_pose_proc/scripts/pose_distribution_visualizer.py b/isaac_ros_pose_proc/scripts/pose_distribution_visualizer.py new file mode 100755 index 0000000..202bed0 --- /dev/null +++ b/isaac_ros_pose_proc/scripts/pose_distribution_visualizer.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +from geometry_msgs.msg import PoseStamped +import matplotlib.pyplot as plt +import rclpy +from rclpy.node import Node + + +class PoseDistributionVisualizer(Node): + + def __init__(self): + super().__init__('pose_distribution_visualizer') + + self._pose_sub = self.create_subscription( + PoseStamped, + 'pose_input', + self.pose_callback, + 10 + ) + + self._pose_history = { + 'position': { + 'x': [], + 'y': [], + 'z': [] + }, + 'orientation': { + 'x': [], + 'y': [], + 'z': [], + 'w': [], + } + } + + self.get_logger().info('Pose visualization starting') + + def visualize_data(self): + for key, data_dict in self._pose_history.items(): + fig, ax = plt.subplots(1, len(data_dict.values())) + for i, (label, value) in enumerate(data_dict.items()): + ax[i].hist(value) + ax[i].set_title(label) + fig.suptitle(key) + plt.show() + + def pose_callback(self, msg): + self._pose_history['position']['x'].append(msg.pose.position.x) + self._pose_history['position']['y'].append(msg.pose.position.y) + self._pose_history['position']['z'].append(msg.pose.position.z) + self._pose_history['orientation']['x'].append(msg.pose.orientation.x) + self._pose_history['orientation']['y'].append(msg.pose.orientation.y) + self._pose_history['orientation']['z'].append(msg.pose.orientation.z) + self._pose_history['orientation']['w'].append(msg.pose.orientation.w) + + +def main(args=None): + try: + rclpy.init(args=args) + node = PoseDistributionVisualizer() + rclpy.spin(node) + except KeyboardInterrupt: + node.visualize_data() + finally: + node.destroy_node() + if rclpy.ok(): + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/isaac_ros_pose_proc/src/averaging_filter_node.cpp b/isaac_ros_pose_proc/src/averaging_filter_node.cpp new file mode 100644 index 0000000..23df9a4 --- /dev/null +++ b/isaac_ros_pose_proc/src/averaging_filter_node.cpp @@ -0,0 +1,166 @@ +// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace nvidia +{ +namespace isaac_ros +{ +namespace pose_proc +{ +namespace +{ +struct position_t +{ + double x{0}; + double y{0}; + double z{0}; +}; + +struct orientation_t +{ + double x{0}; + double y{0}; + double z{0}; + double w{0}; +}; +} // namespace + +class AveragingFilterNode : public rclcpp::Node +{ +public: + explicit AveragingFilterNode(const rclcpp::NodeOptions & options) + : Node("averaging_filter_node", options), + num_samples_(declare_parameter("num_samples", 10)), + enable_tf_publishing_(declare_parameter("enable_tf_publishing", false)), + child_frame_id_(declare_parameter("child_frame_id", "")), + pose_sub_{create_subscription( + "pose_input", 10, + std::bind(&AveragingFilterNode::poseCallback, this, std::placeholders::_1))}, + pose_pub_{create_publisher( + "pose_output", 10)}, + tf_broadcaster_{std::make_unique(*this)} + { + if (enable_tf_publishing_ && child_frame_id_ == "") { + RCLCPP_ERROR( + get_logger(), + "[AveragingFilterNode] Child frame id must be specified if output_tf is true"); + throw std::invalid_argument( + "[AveragingFilterNode] Invalid child frame id" + "Child frame id must be specified if output_tf is true."); + } + } + + void removeOldestPose() + { + if (pose_history_.size() >= static_cast(num_samples_)) { + position_total_.x -= pose_history_.front()->pose.position.x; + position_total_.y -= pose_history_.front()->pose.position.y; + position_total_.z -= pose_history_.front()->pose.position.z; + orientation_total_.x -= pose_history_.front()->pose.orientation.x; + orientation_total_.y -= pose_history_.front()->pose.orientation.y; + orientation_total_.z -= pose_history_.front()->pose.orientation.z; + orientation_total_.w -= pose_history_.front()->pose.orientation.w; + pose_history_.pop_front(); + } + } + + void addLatestPose(const geometry_msgs::msg::PoseStamped::SharedPtr msg) + { + pose_history_.push_back(msg); + position_total_.x += msg->pose.position.x; + position_total_.y += msg->pose.position.y; + position_total_.z += msg->pose.position.z; + orientation_total_.x += msg->pose.orientation.x; + orientation_total_.y += msg->pose.orientation.y; + orientation_total_.z += msg->pose.orientation.z; + orientation_total_.w += msg->pose.orientation.w; + } + + geometry_msgs::msg::PoseStamped getAveragePose(geometry_msgs::msg::PoseStamped::SharedPtr msg) + { + geometry_msgs::msg::PoseStamped averaged_pose; + averaged_pose.header = msg->header; + averaged_pose.pose.position.x = position_total_.x / pose_history_.size(); + averaged_pose.pose.position.y = position_total_.y / pose_history_.size(); + averaged_pose.pose.position.z = position_total_.z / pose_history_.size(); + const double quat_distance{std::sqrt( + orientation_total_.x * orientation_total_.x + + orientation_total_.y * orientation_total_.y + + orientation_total_.z * orientation_total_.z + + orientation_total_.w * orientation_total_.w)}; + averaged_pose.pose.orientation.x = orientation_total_.x / quat_distance; + averaged_pose.pose.orientation.y = orientation_total_.y / quat_distance; + averaged_pose.pose.orientation.z = orientation_total_.z / quat_distance; + averaged_pose.pose.orientation.w = orientation_total_.w / quat_distance; + return averaged_pose; + } + + void poseCallback(const geometry_msgs::msg::PoseStamped::SharedPtr msg) + { + removeOldestPose(); + addLatestPose(msg); + if (pose_history_.size() < static_cast(num_samples_)) { + return; + } + + geometry_msgs::msg::PoseStamped out_pose = getAveragePose(msg); + + // Publish output PoseStamped + pose_pub_->publish(out_pose); + + // Broadcast tf if output_tf parameter is true + if (enable_tf_publishing_) { + geometry_msgs::msg::TransformStamped tf; + tf.header = out_pose.header; + tf.child_frame_id = child_frame_id_; + tf.transform.translation.x = out_pose.pose.position.x; + tf.transform.translation.y = out_pose.pose.position.y; + tf.transform.translation.z = out_pose.pose.position.z; + tf.transform.rotation = out_pose.pose.orientation; + tf_broadcaster_->sendTransform(tf); + } + } + +private: + int num_samples_; + bool enable_tf_publishing_; + std::string child_frame_id_; + rclcpp::Subscription::SharedPtr pose_sub_; + rclcpp::Publisher::SharedPtr pose_pub_; + std::unique_ptr tf_broadcaster_; + std::deque pose_history_; + position_t position_total_; + orientation_t orientation_total_; +}; + +} // namespace pose_proc +} // namespace isaac_ros +} // namespace nvidia + +// Register the component with the ROS system to create a shared library +RCLCPP_COMPONENTS_REGISTER_NODE(nvidia::isaac_ros::pose_proc::AveragingFilterNode) diff --git a/isaac_ros_pose_proc/src/detection3_d_array_to_pose_node.cpp b/isaac_ros_pose_proc/src/detection3_d_array_to_pose_node.cpp new file mode 100644 index 0000000..7287b57 --- /dev/null +++ b/isaac_ros_pose_proc/src/detection3_d_array_to_pose_node.cpp @@ -0,0 +1,84 @@ +// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include + +namespace nvidia +{ +namespace isaac_ros +{ +namespace pose_proc +{ + +/* +ROS 2 node that selects a single object from a vision_msgs::msg::Detection3DArray +based on desired class ID and confidence +*/ +class Detection3DArrayToPoseNode : public rclcpp::Node +{ +public: + explicit Detection3DArrayToPoseNode(const rclcpp::NodeOptions & options) + : Node("detection3_d_array_to_pose_node", options), + desired_class_id_(declare_parameter("desired_class_id", "")), + detection3_d_array_sub_{create_subscription( + "detection3_d_array_input", 10, + std::bind(&Detection3DArrayToPoseNode::detection3DArrayCallback, this, + std::placeholders::_1))}, + pose_pub_{create_publisher( + "pose_output", 10)} {} + + void detection3DArrayCallback(const vision_msgs::msg::Detection3DArray::SharedPtr msg) + { + if (msg->detections.empty()) { + return; + } + + // Find the detection with the highest confidence + float max_confidence = -1; + geometry_msgs::msg::PoseStamped max_confidence_pose; + max_confidence_pose.header = msg->header; + // Iterate through the detections and find the one with the highest confidence + for (const auto & detection : msg->detections) { + // Iterate through all the hypotheses for this detection + // and find the one with the highest confidence + for (const auto & result : detection.results) { + if (result.hypothesis.score > max_confidence && (desired_class_id_.empty() || + desired_class_id_ == result.hypothesis.class_id)) + { + max_confidence = result.hypothesis.score; + max_confidence_pose.pose = result.pose.pose; + } + } + } + pose_pub_->publish(max_confidence_pose); + } + +private: + std::string desired_class_id_; + rclcpp::Subscription::SharedPtr detection3_d_array_sub_; + rclcpp::Publisher::SharedPtr pose_pub_; +}; + +} // namespace pose_proc +} // namespace isaac_ros +} // namespace nvidia + +// Register the component with the ROS system to create a shared library +RCLCPP_COMPONENTS_REGISTER_NODE(nvidia::isaac_ros::pose_proc::Detection3DArrayToPoseNode) diff --git a/isaac_ros_pose_proc/src/distance_selector_node.cpp b/isaac_ros_pose_proc/src/distance_selector_node.cpp new file mode 100644 index 0000000..63f54ed --- /dev/null +++ b/isaac_ros_pose_proc/src/distance_selector_node.cpp @@ -0,0 +1,133 @@ +// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace nvidia +{ +namespace isaac_ros +{ +namespace pose_proc +{ + +class DistanceSelectorNode : public rclcpp::Node +{ +public: + explicit DistanceSelectorNode(const rclcpp::NodeOptions & options) + : Node("distance_selector_node", options), + enable_tf_publishing_(declare_parameter("enable_tf_publishing", false)), + child_frame_id_(declare_parameter("child_frame_id", "")), + pose_sub_{create_subscription( + "pose_input", 10, + std::bind(&DistanceSelectorNode::poseCallback, this, std::placeholders::_1))}, + pose_array_sub_{create_subscription( + "pose_array_input", 10, + std::bind(&DistanceSelectorNode::poseArrayCallback, this, std::placeholders::_1))}, + pose_pub_{create_publisher( + "pose_output", 10)}, + tf_broadcaster_{std::make_unique(*this)} + { + if (enable_tf_publishing_ && child_frame_id_ == "") { + RCLCPP_ERROR( + get_logger(), + "[DistanceSelectorNode] Child frame id must be specified if output_tf is true"); + throw std::invalid_argument( + "[DistanceSelectorNode] Invalid child frame id" + "Child frame id must be specified if output_tf is true."); + } + } + + geometry_msgs::msg::PoseStamped distanceFilter(const geometry_msgs::msg::PoseArray::SharedPtr msg) + { + geometry_msgs::msg::PoseStamped closest_pose; + closest_pose.header = msg->header; + + // Calculate closest pose based on position + double closest_distance = std::numeric_limits::max(); + for (const geometry_msgs::msg::Pose & pose : msg->poses) { + const double dx{pose_to_compare_.value().position.x - pose.position.x}; + const double dy{pose_to_compare_.value().position.y - pose.position.y}; + const double dz{pose_to_compare_.value().position.z - pose.position.z}; + const double curr_dist{std::sqrt(dx * dx + dy * dy + dz * dz)}; + if (curr_dist < closest_distance) { + closest_distance = curr_dist; + closest_pose.pose = pose; + } + } + + return closest_pose; + } + + void poseCallback(const geometry_msgs::msg::PoseStamped::SharedPtr msg) + { + pose_to_compare_ = msg->pose; + } + + void poseArrayCallback(const geometry_msgs::msg::PoseArray::SharedPtr msg) + { + // Ensure pose to compare has been received + if (!pose_to_compare_.has_value()) { + RCLCPP_WARN(get_logger(), "Pose to compare has not been received."); + return; + } + + // Find pose that is closest to pose_to_compare + geometry_msgs::msg::PoseStamped out_pose = distanceFilter(msg); + + // Publish output PoseStamped + pose_pub_->publish(out_pose); + + // Broadcast tf if output_tf parameter is true + if (enable_tf_publishing_) { + geometry_msgs::msg::TransformStamped tf; + tf.header = out_pose.header; + tf.child_frame_id = child_frame_id_; + tf.transform.translation.x = out_pose.pose.position.x; + tf.transform.translation.y = out_pose.pose.position.y; + tf.transform.translation.z = out_pose.pose.position.z; + tf.transform.rotation = out_pose.pose.orientation; + tf_broadcaster_->sendTransform(tf); + } + } + +private: + bool enable_tf_publishing_; + std::string child_frame_id_; + rclcpp::Subscription::SharedPtr pose_sub_; + rclcpp::Subscription::SharedPtr pose_array_sub_; + rclcpp::Publisher::SharedPtr pose_pub_; + std::optional pose_to_compare_; + std::unique_ptr tf_broadcaster_; +}; + +} // namespace pose_proc +} // namespace isaac_ros +} // namespace nvidia + +// Register the component with the ROS system to create a shared library +RCLCPP_COMPONENTS_REGISTER_NODE(nvidia::isaac_ros::pose_proc::DistanceSelectorNode) diff --git a/isaac_ros_pose_proc/src/outlier_filter_node.cpp b/isaac_ros_pose_proc/src/outlier_filter_node.cpp new file mode 100644 index 0000000..d1f3699 --- /dev/null +++ b/isaac_ros_pose_proc/src/outlier_filter_node.cpp @@ -0,0 +1,141 @@ +// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace nvidia +{ +namespace isaac_ros +{ +namespace pose_proc +{ + +class OutlierFilterNode : public rclcpp::Node +{ +public: + explicit OutlierFilterNode(const rclcpp::NodeOptions & options) + : Node("outlier_filter_node", options), + distance_threshold_(declare_parameter("distance_threshold", 0.2)), + angle_threshold_(declare_parameter("angle_threshold", 30.0)), + pose_stale_time_threshold_(declare_parameter("pose_stale_time_threshold", 5.0)), + enable_tf_publishing_(declare_parameter("enable_tf_publishing", false)), + child_frame_id_(declare_parameter("child_frame_id", "")), + pose_sub_{create_subscription( + "pose_input", 10, + std::bind(&OutlierFilterNode::poseCallback, this, std::placeholders::_1))}, + pose_pub_{create_publisher( + "pose_output", 10)}, + tf_broadcaster_{std::make_unique(*this)} + { + if (enable_tf_publishing_ && child_frame_id_ == "") { + RCLCPP_ERROR( + get_logger(), + "[OutlierFilterNode] Child frame id must be specified if output_tf is true"); + throw std::invalid_argument( + "[OutlierFilterNode] Invalid child frame id" + "Child frame id must be specified if output_tf is true."); + } + } + + bool isOutlier(const geometry_msgs::msg::PoseStamped::SharedPtr msg) + { + // Not outlier if it is the first pose + if (!pose_to_compare_.has_value()) { + pose_to_compare_ = *msg; + return false; + } + + // Calculate euclidean distance + const double dx{msg->pose.position.x - pose_to_compare_.value().pose.position.x}; + const double dy{msg->pose.position.y - pose_to_compare_.value().pose.position.y}; + const double dz{msg->pose.position.z - pose_to_compare_.value().pose.position.z}; + const double euclidean_dist{std::sqrt(dx * dx + dy * dy + dz * dz)}; + + // Calculate relative angle + tf2::Quaternion q_current, q_compare; + tf2::fromMsg(msg->pose.orientation, q_current); + tf2::fromMsg(pose_to_compare_.value().pose.orientation, q_compare); + const double angle_diff_rad{q_current.angleShortestPath(q_compare)}; + const double angle_diff_deg{angle_diff_rad * 180.0 / M_PI}; + + // Calculate time elapsed since previous good pose + rclcpp::Time msg_time = rclcpp::Time(msg->header.stamp); + rclcpp::Time pose_to_compare_time = rclcpp::Time(pose_to_compare_->header.stamp); + rclcpp::Duration time_since_good_pose = msg_time - pose_to_compare_time; + + // Not outlier if distance and angle within thresholds or previous good pose is too old + if ((euclidean_dist < distance_threshold_ && angle_diff_deg < angle_threshold_) || + (time_since_good_pose.seconds() > pose_stale_time_threshold_)) + { + pose_to_compare_ = *msg; + return false; + } + return true; + } + + void poseCallback(const geometry_msgs::msg::PoseStamped::SharedPtr msg) + { + // Prevent publishing if pose is an outlier + if (isOutlier(msg)) { + return; + } + + // Publish output PoseStamped + pose_pub_->publish(*msg); + + // Broadcast tf if output_tf parameter is true + if (enable_tf_publishing_) { + geometry_msgs::msg::TransformStamped tf; + tf.header = msg->header; + tf.child_frame_id = child_frame_id_; + tf.transform.translation.x = msg->pose.position.x; + tf.transform.translation.y = msg->pose.position.y; + tf.transform.translation.z = msg->pose.position.z; + tf.transform.rotation = msg->pose.orientation; + tf_broadcaster_->sendTransform(tf); + } + } + +private: + double distance_threshold_; + double angle_threshold_; + double pose_stale_time_threshold_; + bool enable_tf_publishing_; + std::string child_frame_id_; + rclcpp::Subscription::SharedPtr pose_sub_; + rclcpp::Publisher::SharedPtr pose_pub_; + std::unique_ptr tf_broadcaster_; + std::optional pose_to_compare_; +}; + +} // namespace pose_proc +} // namespace isaac_ros +} // namespace nvidia + +// Register the component with the ROS system to create a shared library +RCLCPP_COMPONENTS_REGISTER_NODE(nvidia::isaac_ros::pose_proc::OutlierFilterNode) diff --git a/isaac_ros_pose_proc/src/pose_array_to_pose_node.cpp b/isaac_ros_pose_proc/src/pose_array_to_pose_node.cpp new file mode 100644 index 0000000..62689ee --- /dev/null +++ b/isaac_ros_pose_proc/src/pose_array_to_pose_node.cpp @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include + +namespace nvidia +{ +namespace isaac_ros +{ +namespace pose_proc +{ + +// This node only selects the first pose out of the input pose array and throws away all the rest +class PoseArrayToPoseNode : public rclcpp::Node +{ +public: + explicit PoseArrayToPoseNode(const rclcpp::NodeOptions & options) + : Node("pose_array_to_pose_node", options), + pose_array_sub_{create_subscription( + "pose_array_input", 10, + std::bind(&PoseArrayToPoseNode::poseArrayCallback, this, std::placeholders::_1))}, + pose_pub_{create_publisher( + "pose_output", 10)} {} + + void poseArrayCallback(const geometry_msgs::msg::PoseArray::SharedPtr msg) + { + if (!msg->poses.empty()) { + geometry_msgs::msg::PoseStamped out_msg; + out_msg.header = msg->header; + out_msg.pose = msg->poses[0]; + pose_pub_->publish(out_msg); + } + } + +private: + rclcpp::Subscription::SharedPtr pose_array_sub_; + rclcpp::Publisher::SharedPtr pose_pub_; +}; + +} // namespace pose_proc +} // namespace isaac_ros +} // namespace nvidia + +// Register the component with the ROS system to create a shared library +RCLCPP_COMPONENTS_REGISTER_NODE(nvidia::isaac_ros::pose_proc::PoseArrayToPoseNode) diff --git a/isaac_ros_pose_proc/src/stability_filter_node.cpp b/isaac_ros_pose_proc/src/stability_filter_node.cpp new file mode 100644 index 0000000..8566d3c --- /dev/null +++ b/isaac_ros_pose_proc/src/stability_filter_node.cpp @@ -0,0 +1,140 @@ +// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace nvidia +{ +namespace isaac_ros +{ +namespace pose_proc +{ + +class StabilityFilterNode : public rclcpp::Node +{ +public: + explicit StabilityFilterNode(const rclcpp::NodeOptions & options) + : Node("stability_filter_node", options), + num_samples_(declare_parameter("num_samples", 10)), + distance_threshold_(declare_parameter("distance_threshold", 0.05)), + angle_threshold_(declare_parameter("angle_threshold", 5.0)), + enable_tf_publishing_(declare_parameter("enable_tf_publishing", false)), + child_frame_id_(declare_parameter("child_frame_id", "")), + pose_sub_{create_subscription( + "pose_input", 10, + std::bind(&StabilityFilterNode::poseCallback, this, std::placeholders::_1))}, + pose_pub_{create_publisher( + "pose_output", 10)}, + tf_broadcaster_{std::make_unique(*this)} + { + if (enable_tf_publishing_ && child_frame_id_ == "") { + RCLCPP_ERROR( + get_logger(), + "[StabilityFilterNode] Child frame id must be specified if output_tf is true"); + throw std::invalid_argument( + "[StabilityFilterNode] Invalid child frame id" + "Child frame id must be specified if output_tf is true."); + } + } + + bool isStable(const geometry_msgs::msg::PoseStamped::SharedPtr msg) + { + // If it is the first pose received, update prev_stable_pose and stable_pose_count + if (!pose_to_compare_.has_value()) { + pose_to_compare_ = *msg; + stable_pose_count_ = 1; + return false; + } + + // Calculate euclidean distance + const double dx{msg->pose.position.x - pose_to_compare_.value().pose.position.x}; + const double dy{msg->pose.position.y - pose_to_compare_.value().pose.position.y}; + const double dz{msg->pose.position.z - pose_to_compare_.value().pose.position.z}; + const double euclidean_dist{std::sqrt(dx * dx + dy * dy + dz * dz)}; + + // Calculate relative angle + tf2::Quaternion q_current, q_compare; + tf2::fromMsg(msg->pose.orientation, q_current); + tf2::fromMsg(pose_to_compare_.value().pose.orientation, q_compare); + const double angle_diff_rad{q_current.angleShortestPath(q_compare)}; + const double angle_diff_deg{angle_diff_rad * 180.0 / M_PI}; + + if (euclidean_dist < distance_threshold_ && angle_diff_deg < angle_threshold_) { + // If distance and angle within thresholds, increment stable_pose_count + stable_pose_count_++; + } else { + // Else, reset stable_pose_count and pose_to_compare and return false + pose_to_compare_ = *msg; + stable_pose_count_ = 1; + } + + return stable_pose_count_ >= num_samples_; + } + + void poseCallback(const geometry_msgs::msg::PoseStamped::SharedPtr msg) + { + // Prevent publishing if pose is not stable + if (!isStable(msg)) { + return; + } + + // Publish output PoseStamped + pose_pub_->publish(*msg); + + // Broadcast tf if output_tf parameter is true + if (enable_tf_publishing_) { + geometry_msgs::msg::TransformStamped tf; + tf.header = msg->header; + tf.child_frame_id = child_frame_id_; + tf.transform.translation.x = msg->pose.position.x; + tf.transform.translation.y = msg->pose.position.y; + tf.transform.translation.z = msg->pose.position.z; + tf.transform.rotation = msg->pose.orientation; + tf_broadcaster_->sendTransform(tf); + } + } + +private: + int num_samples_; + double distance_threshold_; + double angle_threshold_; + bool enable_tf_publishing_; + std::string child_frame_id_; + rclcpp::Subscription::SharedPtr pose_sub_; + rclcpp::Publisher::SharedPtr pose_pub_; + std::unique_ptr tf_broadcaster_; + std::optional pose_to_compare_; + int stable_pose_count_; +}; + +} // namespace pose_proc +} // namespace isaac_ros +} // namespace nvidia + +// Register the component with the ROS system to create a shared library +RCLCPP_COMPONENTS_REGISTER_NODE(nvidia::isaac_ros::pose_proc::StabilityFilterNode) diff --git a/isaac_ros_pose_proc/test/isaac_ros_averaging_filter_test.py b/isaac_ros_pose_proc/test/isaac_ros_averaging_filter_test.py new file mode 100644 index 0000000..3059a3b --- /dev/null +++ b/isaac_ros_pose_proc/test/isaac_ros_averaging_filter_test.py @@ -0,0 +1,137 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +import math +import time + +from geometry_msgs.msg import Point, Pose, PoseStamped, Quaternion +from isaac_ros_test import IsaacROSBaseTest +from launch_ros.actions import ComposableNodeContainer +from launch_ros.descriptions import ComposableNode + +import pytest +import rclpy + +NUM_SAMPLES = 2 + + +@pytest.mark.rostest +def generate_test_description(): + """Generate launch description with all ROS 2 nodes for testing.""" + composable_nodes = [ + ComposableNode( + package='isaac_ros_pose_proc', + plugin='nvidia::isaac_ros::pose_proc::AveragingFilterNode', + name='averaging_filter_node', + namespace=IsaacROSAveragingFilterTest.generate_namespace(), + parameters=[{ + 'num_samples': NUM_SAMPLES + }] + ) + ] + + averaging_container = ComposableNodeContainer( + name='averaging_container', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=composable_nodes, + namespace=IsaacROSAveragingFilterTest.generate_namespace(), + output='screen' + ) + + return IsaacROSAveragingFilterTest.generate_test_description([averaging_container]) + + +class IsaacROSAveragingFilterTest(IsaacROSBaseTest): + + def test_averaging_filter(self): + """ + Test averaging filter to stabilize noisy poses. + + Given two poses, the filter should output a single averaged pose. + The quaternion is averaged by dividing the sum of each vector + component by the norm of the summed quaternion. + """ + received_messages = {} + + self.generate_namespace_lookup(['pose_input', 'pose_output']) + + pose_pub = self.node.create_publisher( + PoseStamped, self.namespaces['pose_input'], self.DEFAULT_QOS) + + subs = self.create_logging_subscribers( + [('pose_output', PoseStamped)], received_messages, accept_multiple_messages=True) + + try: + # Wait at most TIMEOUT seconds for subscriber to respond + TIMEOUT = 5 + end_time = time.time() + TIMEOUT + done = False + + # Input poses + POSES_IN = [ + PoseStamped(pose=Pose( + position=Point(x=1.0, y=2.0, z=3.0), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( + position=Point(x=2.0, y=4.0, z=6.0), + orientation=Quaternion(x=1.0, y=0.0, z=0.0, w=0.0) + )) + ] + + # Expected output poses + POSES_OUT = [ + PoseStamped(pose=Pose( + position=Point(x=1.5, y=3.0, z=4.5), + orientation=Quaternion( + x=math.sqrt(2)/2, y=0.0, z=0.0, w=math.sqrt(2)/2)) + ) + ] + + # Wait until received expected poses + while time.time() < end_time: + for pose in POSES_IN: + pose_pub.publish(pose) + rclpy.spin_once(self.node, timeout_sec=(0.1)) + if 'pose_output' in received_messages and \ + len(received_messages['pose_output']) >= len(POSES_OUT): + done = True + break + + self.assertTrue(done, 'Appropriate output not received') + received_poses = received_messages['pose_output'] + self.assertEqual(len(received_poses), len(POSES_OUT)) + for received, expected in zip(received_poses, POSES_OUT): + self.assertAlmostEqual(received.pose.position.x, + expected.pose.position.x, places=7) + self.assertAlmostEqual(received.pose.position.y, + expected.pose.position.y, places=7) + self.assertAlmostEqual(received.pose.position.z, + expected.pose.position.z, places=7) + self.assertAlmostEqual(received.pose.orientation.x, + expected.pose.orientation.x, places=7) + self.assertAlmostEqual(received.pose.orientation.y, + expected.pose.orientation.y, places=7) + self.assertAlmostEqual(received.pose.orientation.z, + expected.pose.orientation.z, places=7) + self.assertAlmostEqual(received.pose.orientation.w, + expected.pose.orientation.w, places=7) + + finally: + self.node.destroy_subscription(subs) + self.node.destroy_publisher(pose_pub) diff --git a/isaac_ros_pose_proc/test/isaac_ros_distance_selector_test.py b/isaac_ros_pose_proc/test/isaac_ros_distance_selector_test.py new file mode 100644 index 0000000..b744419 --- /dev/null +++ b/isaac_ros_pose_proc/test/isaac_ros_distance_selector_test.py @@ -0,0 +1,111 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +import time + +from geometry_msgs.msg import Point, Pose, PoseArray, PoseStamped +from isaac_ros_test import IsaacROSBaseTest +from launch_ros.actions import ComposableNodeContainer +from launch_ros.descriptions import ComposableNode + +import pytest +import rclpy + + +@pytest.mark.rostest +def generate_test_description(): + """Generate launch description with all ROS 2 nodes for testing.""" + composable_nodes = [ + ComposableNode( + package='isaac_ros_pose_proc', + plugin='nvidia::isaac_ros::pose_proc::DistanceSelectorNode', + name='distance_selector_node', + namespace=IsaacROSDistanceSelectorTest.generate_namespace() + ) + ] + + distance_selector_container = ComposableNodeContainer( + name='distance_selector_container', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=composable_nodes, + namespace=IsaacROSDistanceSelectorTest.generate_namespace(), + output='screen' + ) + + return IsaacROSDistanceSelectorTest.generate_test_description([distance_selector_container]) + + +class IsaacROSDistanceSelectorTest(IsaacROSBaseTest): + + def test_distance_selector(self): + """ + Test distance selector to select a desired pose from a given pose array. + + The selected pose has the smallest euclidean distance to a given input pose. + """ + received_messages = {} + + self.generate_namespace_lookup(['pose_array_input', 'pose_input', 'pose_output']) + + pose_array_pub = self.node.create_publisher( + PoseArray, self.namespaces['pose_array_input'], self.DEFAULT_QOS) + + pose_pub = self.node.create_publisher( + PoseStamped, self.namespaces['pose_input'], self.DEFAULT_QOS) + + subs = self.create_logging_subscribers([('pose_output', PoseStamped)], received_messages) + + try: + # Wait at most TIMEOUT seconds for subscriber to respond + TIMEOUT = 5 + end_time = time.time() + TIMEOUT + done = False + + # Input PoseArray + POSE_ARRAY_IN = PoseArray(poses=[ + Pose(position=Point(x=0.0, y=0.0, z=0.0)), + Pose(position=Point(x=1.0, y=1.0, z=1.0)), + Pose(position=Point(x=-1.0, y=-1.0, z=-1.0)) + ]) + + # Input PoseStamped + POSE_IN = PoseStamped(pose=Pose( + position=Point(x=2.0, y=2.0, z=2.0) + )) + + # Expected output PoseStamped + POSE_OUT = PoseStamped(pose=Pose( + position=Point(x=1.0, y=1.0, z=1.0) + )) + + # Wait until received expected pose + while time.time() < end_time: + pose_array_pub.publish(POSE_ARRAY_IN) + pose_pub.publish(POSE_IN) + rclpy.spin_once(self.node, timeout_sec=(0.1)) + if 'pose_output' in received_messages: + done = True + break + + self.assertTrue(done, 'Appropriate output not received') + pose = received_messages['pose_output'] + self.assertEqual(pose, POSE_OUT) + + finally: + self.node.destroy_subscription(subs) + self.node.destroy_publisher(pose_pub) diff --git a/isaac_ros_pose_proc/test/isaac_ros_outlier_filter_test.py b/isaac_ros_pose_proc/test/isaac_ros_outlier_filter_test.py new file mode 100644 index 0000000..55a65aa --- /dev/null +++ b/isaac_ros_pose_proc/test/isaac_ros_outlier_filter_test.py @@ -0,0 +1,148 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +import time + +from geometry_msgs.msg import Point, Pose, PoseStamped, Quaternion +from isaac_ros_test import IsaacROSBaseTest +from launch_ros.actions import ComposableNodeContainer +from launch_ros.descriptions import ComposableNode + +import pytest +import rclpy + + +NUM_SAMPLES = 5 +DISTANCE_THRESHOLD = 0.5 +ANGLE_THRESHOLD = 30.0 +TIMEOUT = 5.0 + + +@pytest.mark.rostest +def generate_test_description(): + """Generate launch description with all ROS 2 nodes for testing.""" + composable_nodes = [ + ComposableNode( + package='isaac_ros_pose_proc', + plugin='nvidia::isaac_ros::pose_proc::OutlierFilterNode', + name='outlier_filter_node', + namespace=IsaacROSOutlierFilterTest.generate_namespace(), + parameters=[{ + 'num_samples': NUM_SAMPLES, + 'distance_threshold': DISTANCE_THRESHOLD, + 'angle_threshold': ANGLE_THRESHOLD, + 'pose_stale_time_threshold': TIMEOUT + }] + ) + ] + + outlier_filter_container = ComposableNodeContainer( + name='outlier_filter_container', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=composable_nodes, + namespace=IsaacROSOutlierFilterTest.generate_namespace(), + output='screen' + ) + + return IsaacROSOutlierFilterTest.generate_test_description([outlier_filter_container]) + + +class IsaacROSOutlierFilterTest(IsaacROSBaseTest): + + def test_outlier_filter(self): + """ + Test filtering poses with large position or orientation deviations. + + The first pose received is always published. All subsequent poses with euclidean + distance to the first pose less than DISTANCE_THRESHOLD and shortest path angle difference + less than ANGLE_THRESHOLD are published. + """ + received_messages = {} + + self.generate_namespace_lookup(['pose_input', 'pose_output']) + + pose_pub = self.node.create_publisher( + PoseStamped, self.namespaces['pose_input'], self.DEFAULT_QOS) + + subs = self.create_logging_subscribers( + [('pose_output', PoseStamped)], received_messages, accept_multiple_messages=True) + + try: + # Wait at most TIMEOUT seconds for subscriber to respond + end_time = time.time() + TIMEOUT + done = False + + # Input poses + POSES_IN = [ + PoseStamped(pose=Pose( + position=Point(x=1.2, y=2.1, z=3.3), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( + position=Point(x=1.0, y=2.0, z=3.0), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( + position=Point(x=1.3, y=2.4, z=3.1), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( + position=Point(x=1.0, y=2.0, z=3.0), + orientation=Quaternion(x=0.0, y=0.284, z=0.0, w=0.959) + )), + PoseStamped(pose=Pose( + position=Point(x=1.1, y=2.2, z=3.1), + orientation=Quaternion(x=0.131, y=0.0, z=0.0, w=0.991) + )) + ] + + # Expected output poses + POSES_OUT = [ + PoseStamped(pose=Pose( + position=Point(x=1.2, y=2.1, z=3.3), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( + position=Point(x=1.0, y=2.0, z=3.0), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( + position=Point(x=1.1, y=2.2, z=3.1), + orientation=Quaternion(x=0.131, y=0.0, z=0.0, w=0.991) + )), + ] + + # Wait until received expected poses + while time.time() < end_time: + # Publish poses + for pose in POSES_IN: + pose_pub.publish(pose) + rclpy.spin_once(self.node, timeout_sec=(0.1)) + if 'pose_output' in received_messages and \ + len(received_messages['pose_output']) >= len(POSES_OUT): + done = True + break + + self.assertTrue(done, 'Appropriate output not received') + poses = received_messages['pose_output'] + self.assertEqual(len(poses), len(POSES_OUT)) + self.assertEqual(poses, POSES_OUT) + + finally: + self.node.destroy_subscription(subs) + self.node.destroy_publisher(pose_pub) diff --git a/isaac_ros_pose_proc/test/isaac_ros_stability_filter_test.py b/isaac_ros_pose_proc/test/isaac_ros_stability_filter_test.py new file mode 100644 index 0000000..1e240ce --- /dev/null +++ b/isaac_ros_pose_proc/test/isaac_ros_stability_filter_test.py @@ -0,0 +1,143 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +import time + +from geometry_msgs.msg import Point, Pose, PoseStamped, Quaternion +from isaac_ros_test import IsaacROSBaseTest +from launch_ros.actions import ComposableNodeContainer +from launch_ros.descriptions import ComposableNode + +import pytest +import rclpy + + +NUM_SAMPLES = 2 +DISTANCE_THRESHOLD = 0.5 +ANGLE_THRESHOLD = 30.0 + + +@pytest.mark.rostest +def generate_test_description(): + """Generate launch description with all ROS 2 nodes for testing.""" + composable_nodes = [ + ComposableNode( + package='isaac_ros_pose_proc', + plugin='nvidia::isaac_ros::pose_proc::StabilityFilterNode', + name='stability_filter_node', + namespace=IsaacROSStabilityFilterTest.generate_namespace(), + parameters=[{ + 'num_samples': NUM_SAMPLES, + 'distance_threshold': DISTANCE_THRESHOLD, + 'angle_threshold': ANGLE_THRESHOLD + }] + ) + ] + + stability_filter_container = ComposableNodeContainer( + name='stability_filter_container', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=composable_nodes, + namespace=IsaacROSStabilityFilterTest.generate_namespace(), + output='screen' + ) + + return IsaacROSStabilityFilterTest.generate_test_description([stability_filter_container]) + + +class IsaacROSStabilityFilterTest(IsaacROSBaseTest): + + def test_stability_filter(self): + """ + Test stability filter to only publish poses once stabilized. + + Filter begins publishing once NUM_SAMPLES consecutive poses with euclidean distance to the + previous stable pose less than DISTANCE_THRESHOLD and shortest path angle difference less + than ANGLE_THRESHOLD are received. + """ + received_messages = {} + + self.generate_namespace_lookup(['pose_input', 'pose_output']) + + pose_pub = self.node.create_publisher( + PoseStamped, self.namespaces['pose_input'], self.DEFAULT_QOS) + + subs = self.create_logging_subscribers( + [('pose_output', PoseStamped)], received_messages, accept_multiple_messages=True) + + try: + # Wait at most TIMEOUT seconds for subscriber to respond + TIMEOUT = 5 + end_time = time.time() + TIMEOUT + done = False + + # Input poses + POSES_IN = [ + PoseStamped(pose=Pose( # First pose not published + position=Point(x=1.0, y=2.0, z=3.0), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( # Stable pose published + position=Point(x=1.2, y=2.1, z=3.3), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( # Not stable (distance > 0.5) + position=Point(x=1.3, y=2.4, z=3.1), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( # Not stable (angle > 30 degrees) + position=Point(x=1.3, y=2.4, z=3.1), + orientation=Quaternion(x=0.0, y=0.284, z=0.0, w=0.959) + )), + PoseStamped(pose=Pose( # Stable pose published + position=Point(x=1.1, y=2.2, z=3.1), + orientation=Quaternion(x=0.0, y=0.131, z=0.0, w=0.991) + )) + ] + + # Expected output poses + POSES_OUT = [ + PoseStamped(pose=Pose( + position=Point(x=1.2, y=2.1, z=3.3), + orientation=Quaternion(x=0.0, y=0.0, z=0.0, w=1.0) + )), + PoseStamped(pose=Pose( + position=Point(x=1.1, y=2.2, z=3.1), + orientation=Quaternion(x=0.0, y=0.131, z=0.0, w=0.991) + )) + ] + + # Wait until received expected poses + while time.time() < end_time: + # Publish poses + for pose in POSES_IN: + pose_pub.publish(pose) + rclpy.spin_once(self.node, timeout_sec=(0.1)) + if 'pose_output' in received_messages and \ + len(received_messages['pose_output']) >= len(POSES_OUT): + done = True + break + + self.assertTrue(done, 'Appropriate output not received') + poses = received_messages['pose_output'] + self.assertEqual(len(poses), len(POSES_OUT)) + self.assertEqual(poses, POSES_OUT) + + finally: + self.node.destroy_subscription(subs) + self.node.destroy_publisher(pose_pub) diff --git a/test/models/dope_ketchup_pol.onnx b/test/models/dope_ketchup_pol.onnx index 005951f..3d9c185 100644 --- a/test/models/dope_ketchup_pol.onnx +++ b/test/models/dope_ketchup_pol.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fab42a02c2625b4be171f9b0d9b579bd2279858bac19988d04b21639ddae46f7 -size 201091673 +oid sha256:3535aa7f97ce51d5fe1ef05bd79c5778d56249bc4f24956f6fc91a6fd6d0c239 +size 201101866