diff --git a/README.md b/README.md
index 510f649..39e39e2 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ the 3D pose of an object and distance for navigation or manipulation.
`isaac_ros_foundationpose` is used in a graph of nodes to estimate the pose of
a novel object using 3D bounding cuboid dimensions. It’s developed on top of
-[FoundationPose](https://github.com/NVlabs/FoundationPose) model, which is
+[FoundationPose](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/isaac/models/foundationpose) model, which is
a pre-trained deep learning model developed by NVLabs. FoundationPose
is capable for both pose estimation and tracking on unseen objects without requiring fine-tuning,
and its accuracy outperforms existing state-of-art methods.
@@ -81,11 +81,11 @@ which can also be found at [Isaac ROS DNN Inference](https://github.com/NVIDIA-I
## Performance
-| Sample Graph
| Input Size
| AGX Orin
| Orin NX
| Orin Nano 8GB
| x86_64 w/ RTX 4060 Ti
| x86_64 w/ RTX 4090
|
-|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| [FoundationPose Pose Estimation Node](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_foundationpose_benchmark/scripts/isaac_ros_foundationpose_node.py)
| 720p
| [1.72 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_foundationpose_node-agx_orin.json)
690 ms @ 30Hz
| –
| –
| –
| [7.02 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_foundationpose_node-x86_4090.json)
170 ms @ 30Hz
|
-| [DOPE Pose Estimation Graph](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_dope_benchmark/scripts/isaac_ros_dope_graph.py)
| VGA
| [41.3 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-agx_orin.json)
42 ms @ 30Hz
| [17.5 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-orin_nx.json)
76 ms @ 30Hz
| –
| [85.2 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-nuc_4060ti.json)
24 ms @ 30Hz
| [199 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-x86_4090.json)
14 ms @ 30Hz
|
-| [Centerpose Pose Estimation Graph](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_centerpose_benchmark/scripts/isaac_ros_centerpose_graph.py)
| VGA
| [36.3 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-agx_orin.json)
4.8 ms @ 30Hz
| [19.7 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-orin_nx.json)
4.9 ms @ 30Hz
| [13.8 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-orin_nano.json)
7.4 ms @ 30Hz
| [50.2 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-nuc_4060ti.json)
23 ms @ 30Hz
| [50.2 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-x86_4090.json)
20 ms @ 30Hz
|
+| Sample Graph
| Input Size
| AGX Orin
| Orin NX
| Orin Nano 8GB
| x86_64 w/ RTX 4090
|
+|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [FoundationPose Pose Estimation Node](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_foundationpose_benchmark/scripts/isaac_ros_foundationpose_node.py)
| 720p
| [1.72 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_foundationpose_node-agx_orin.json)
690 ms @ 30Hz
| –
| –
| [9.61 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_foundationpose_node-x86-4090.json)
110 ms @ 30Hz
|
+| [DOPE Pose Estimation Graph](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_dope_benchmark/scripts/isaac_ros_dope_graph.py)
| VGA
| [27.7 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-agx_orin.json)
16 ms @ 30Hz
| [17.8 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-orin_nx.json)
14 ms @ 30Hz
| –
| [187 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_dope_graph-x86-4090.json)
7.8 ms @ 30Hz
|
+| [Centerpose Pose Estimation Graph](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/benchmarks/isaac_ros_centerpose_benchmark/scripts/isaac_ros_centerpose_graph.py)
| VGA
| [47.1 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-agx_orin.json)
6.5 ms @ 30Hz
| [30.0 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-orin_nx.json)
50 ms @ 30Hz
| [19.9 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-orin_nano.json)
28 ms @ 30Hz
| [50.2 fps](https://github.com/NVIDIA-ISAAC-ROS/isaac_ros_benchmark/blob/main/results/isaac_ros_centerpose_graph-x86-4090.json)
12 ms @ 30Hz
|
---
@@ -115,4 +115,4 @@ Please visit the [Isaac ROS Documentation](https://nvidia-isaac-ros.github.io/re
## Latest
-Update 2024-09-26: Update for ZED compatibility
+Update 2024-12-10: Added pose estimate post-processing utilities
diff --git a/isaac_ros_centerpose/CMakeLists.txt b/isaac_ros_centerpose/CMakeLists.txt
index b6ed263..8785a05 100644
--- a/isaac_ros_centerpose/CMakeLists.txt
+++ b/isaac_ros_centerpose/CMakeLists.txt
@@ -49,6 +49,13 @@ if(BUILD_TESTING)
find_package(ament_lint_auto REQUIRED)
ament_lint_auto_find_test_dependencies()
+ # Gtest for decoder_node
+ ament_add_gtest(centerpose_decoder_node_test test/centerpose_decoder_node_test.cpp)
+ target_link_libraries(centerpose_decoder_node_test centerpose_decoder_node)
+ target_include_directories(centerpose_decoder_node_test PUBLIC include/isaac_ros_centerpose/)
+ target_include_directories(centerpose_decoder_node_test PUBLIC /usr/src/googletest/googlemock/include/)
+ ament_target_dependencies(centerpose_decoder_node_test rclcpp)
+ ament_target_dependencies(centerpose_decoder_node_test isaac_ros_nitros)
# The FindPythonInterp and FindPythonLibs modules are removed
if(POLICY CMP0148)
cmake_policy(SET CMP0148 OLD)
@@ -59,4 +66,10 @@ if(BUILD_TESTING)
add_launch_test(test/test_centerpose_pol_triton.py TIMEOUT "600")
endif()
+
+# Embed versioning information into installed files
+ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common)
+include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake")
+generate_version_info(${PROJECT_NAME})
+
ament_auto_package(INSTALL_TO_SHARE config launch)
diff --git a/isaac_ros_centerpose/package.xml b/isaac_ros_centerpose/package.xml
index f9ba1dd..c706972 100644
--- a/isaac_ros_centerpose/package.xml
+++ b/isaac_ros_centerpose/package.xml
@@ -21,7 +21,7 @@
isaac_ros_centerpose
- 3.1.0
+ 3.2.0
CenterPose: Pose Estimation using Deep Learning
Isaac ROS Maintainers
@@ -63,6 +63,7 @@
ament_lint_common
isaac_ros_test
sensor_msgs_py
+ ament_cmake_gtest
ament_cmake
diff --git a/isaac_ros_centerpose/test/centerpose_decoder_node_test.cpp b/isaac_ros_centerpose/test/centerpose_decoder_node_test.cpp
new file mode 100644
index 0000000..f8dff32
--- /dev/null
+++ b/isaac_ros_centerpose/test/centerpose_decoder_node_test.cpp
@@ -0,0 +1,97 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include
+#include "centerpose_decoder_node.hpp"
+#include "rclcpp/rclcpp.hpp"
+
+// Objective: to cover code lines where exceptions are thrown
+// Approach: send Invalid Arguments for node parameters to trigger the exception
+
+
+TEST(centerpose_decoder_node_test, test_invalid_output_field_size)
+{
+ rclcpp::init(0, nullptr);
+ rclcpp::NodeOptions options;
+ EXPECT_THROW(
+ {
+ try {
+ nvidia::isaac_ros::centerpose::CenterPoseDecoderNode centerpose_decoder_node(options);
+ } catch (const std::invalid_argument & e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("Error: received invalid output field size"));
+ throw;
+ } catch (const rclcpp::exceptions::InvalidParameterValueException & e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("No parameter value set"));
+ throw;
+ }
+ }, std::invalid_argument);
+ rclcpp::shutdown();
+}
+
+TEST(centerpose_decoder_node_test, test_cuboid_scaling_factor)
+{
+ rclcpp::init(0, nullptr);
+ rclcpp::NodeOptions options;
+ options.append_parameter_override("output_field_size", std::vector{128, 128});
+ options.append_parameter_override("cuboid_scaling_factor", 0.0);
+ EXPECT_THROW(
+ {
+ try {
+ nvidia::isaac_ros::centerpose::CenterPoseDecoderNode centerpose_decoder_node(options);
+ } catch (const std::invalid_argument & e) {
+ EXPECT_THAT(
+ e.what(),
+ testing::HasSubstr("Error: received a less than or equal to zero cuboid scaling factor"));
+ throw;
+ } catch (const rclcpp::exceptions::InvalidParameterValueException & e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("No parameter value set"));
+ throw;
+ }
+ }, std::invalid_argument);
+ rclcpp::shutdown();
+}
+
+TEST(centerpose_decoder_node_test, test_score_threshold)
+{
+ rclcpp::init(0, nullptr);
+ rclcpp::NodeOptions options;
+ options.append_parameter_override("output_field_size", std::vector{128, 128});
+ options.append_parameter_override("cuboid_scaling_factor", 1.0);
+ options.append_parameter_override("score_threshold", 1.0);
+ EXPECT_THROW(
+ {
+ try {
+ nvidia::isaac_ros::centerpose::CenterPoseDecoderNode centerpose_decoder_node(options);
+ } catch (const std::invalid_argument & e) {
+ EXPECT_THAT(
+ e.what(),
+ testing::HasSubstr("Error: received score threshold greater or equal to 1.0"));
+ throw;
+ } catch (const rclcpp::exceptions::InvalidParameterValueException & e) {
+ EXPECT_THAT(e.what(), testing::HasSubstr("No parameter value set"));
+ throw;
+ }
+ }, std::invalid_argument);
+ rclcpp::shutdown();
+}
+
+
+int main(int argc, char ** argv)
+{
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/isaac_ros_dope/CMakeLists.txt b/isaac_ros_dope/CMakeLists.txt
index b7d0ffe..18a81fd 100644
--- a/isaac_ros_dope/CMakeLists.txt
+++ b/isaac_ros_dope/CMakeLists.txt
@@ -52,4 +52,10 @@ if(BUILD_TESTING)
add_launch_test(test/isaac_ros_dope_pol.py TIMEOUT "600")
endif()
+
+# Embed versioning information into installed files
+ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common)
+include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake")
+generate_version_info(${PROJECT_NAME})
+
ament_auto_package(INSTALL_TO_SHARE config launch)
diff --git a/isaac_ros_dope/config/dope_config.yaml b/isaac_ros_dope/config/dope_config.yaml
index 1bd5802..8ae4242 100644
--- a/isaac_ros_dope/config/dope_config.yaml
+++ b/isaac_ros_dope/config/dope_config.yaml
@@ -58,12 +58,4 @@ dope:
"Peaches" : [ 5.7781000137329102, 7.0961999893188477, 6.5925998687744141 ],
"GreenBeans" : [ 5.758699893951416, 7.0608000755310059, 6.5732002258300781 ],
"PeasAndCarrots" : [ 5.8512001037597656, 7.0636000633239746, 6.5918002128601074 ]
- }
-
- # 9 element camera matrix (using default from Ketchup demo)
- # Taken from: https://github.com/andrewyguo/dope_training/blob/master/inference/config/camera_info.yaml
- camera_matrix: [
- 364.16501736, 0.0, 121.36296296,
- 0.0, 364.16501736, 121.36296296,
- 0.0, 0.0, 1.0
- ]
+ }
\ No newline at end of file
diff --git a/isaac_ros_dope/config/dope_node.yaml b/isaac_ros_dope/config/dope_node.yaml
index e6b7b3c..23753aa 100644
--- a/isaac_ros_dope/config/dope_node.yaml
+++ b/isaac_ros_dope/config/dope_node.yaml
@@ -22,28 +22,36 @@ components:
type: nvidia::gxf::DoubleBufferReceiver
parameters:
capacity: 12
-- name: posearray_out
+- name: detection3darray_out
type: nvidia::gxf::DoubleBufferTransmitter
parameters:
capacity: 12
+- name: camera_model_input
+ type: nvidia::gxf::DoubleBufferReceiver
+ parameters:
+ capacity: 12
- name: allocator
type: nvidia::gxf::UnboundedAllocator
- name: dope_decoder
type: nvidia::isaac_ros::dope::DopeDecoder
parameters:
tensorlist_receiver: tensorlist_in
- posearray_transmitter: posearray_out
+ camera_model_input: camera_model_input
+ detection3darray_transmitter: detection3darray_out
allocator: allocator
object_dimensions: []
- camera_matrix: []
object_name: ""
- type: nvidia::gxf::MessageAvailableSchedulingTerm
parameters:
receiver: tensorlist_in
min_size: 1
+- type: nvidia::gxf::MessageAvailableSchedulingTerm
+ parameters:
+ receiver: camera_model_input
+ min_size: 1
- type: nvidia::gxf::DownstreamReceptiveSchedulingTerm
parameters:
- transmitter: posearray_out
+ transmitter: detection3darray_out
min_size: 1
---
name: sink
@@ -65,7 +73,7 @@ components:
- name: edge0
type: nvidia::gxf::Connection
parameters:
- source: dope_decoder/posearray_out
+ source: dope_decoder/detection3darray_out
target: sink/signal
---
components:
diff --git a/isaac_ros_dope/config/isaac_ros_dope.yaml b/isaac_ros_dope/config/isaac_ros_dope.yaml
new file mode 100644
index 0000000..62999e2
--- /dev/null
+++ b/isaac_ros_dope/config/isaac_ros_dope.yaml
@@ -0,0 +1,30 @@
+%YAML 1.2
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto. Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+---
+id: [0xb9a27f9a9c9b3c40, 0xf7620fa5914c9644]
+name: DopeDecoder
+version: 1.0.0
+components:
+ - id: [0x03022c23f899e397, 0xaf817e602e657ba6]
+ type: nvidia::isaac_ros::dope::DopeDecoder
+ input_output_groups:
+ - input_keys: [tensorlist_receiver, camera_model_input]
+ output_keys: [detection3darray_transmitter]
+ input_format_keys: []
+ output_format_keys: []
+ supported_formats:
+ - platforms: [any]
+ details:
+ - input_formats: [nitros_tensor_list_nchw_rgb_f32, nitros_camera_info]
+ output_formats: [nitros_detection3_d_array]
+ costs:
+ throughput: 10bytes/s
+ latency: 10ms
+ power: 100J
+ accuracy: 100%
diff --git a/isaac_ros_dope/include/isaac_ros_dope/dope_decoder_node.hpp b/isaac_ros_dope/include/isaac_ros_dope/dope_decoder_node.hpp
index 8831240..0b886c1 100644
--- a/isaac_ros_dope/include/isaac_ros_dope/dope_decoder_node.hpp
+++ b/isaac_ros_dope/include/isaac_ros_dope/dope_decoder_node.hpp
@@ -18,6 +18,7 @@
#ifndef ISAAC_ROS_DOPE__DOPE_DECODER_NODE_HPP_
#define ISAAC_ROS_DOPE__DOPE_DECODER_NODE_HPP_
+#include
#include
#include
@@ -25,6 +26,7 @@
#include "isaac_ros_tensor_list_interfaces/msg/tensor_list.hpp"
#include "isaac_ros_nitros/nitros_node.hpp"
#include "rclcpp/rclcpp.hpp"
+#include "tf2_ros/transform_broadcaster.h"
namespace nvidia
{
@@ -48,6 +50,8 @@ class DopeDecoderNode : public nitros::NitrosNode
void postLoadGraphCallback() override;
+ void DopeDecoderDetectionCallback(const gxf_context_t context, nitros::NitrosTypeBase & msg);
+
private:
// The name of the YAML configuration file
const std::string configuration_file_;
@@ -55,11 +59,35 @@ class DopeDecoderNode : public nitros::NitrosNode
// The class name of the object we're locating
const std::string object_name_;
+ // The frame name for TF publishing
+ const std::string tf_frame_name_;
+
+ // Boolean value indicating option to pubishing to TF tree
+ const bool enable_tf_publishing_;
+
+ // The minimum value of a peak in a belief map
+ const double map_peak_threshold_;
+
+ // The maximum angle threshold for affinity mapping of corners to centroid
+ const double affinity_map_angle_threshold_;
+
+ // Double indicating that dope outputs pose rotated by N degrees along y axis
+ const double rotation_y_axis_;
+
+ // Double indicating that dope outputs pose rotated by N degrees along x axis
+ const double rotation_x_axis_;
+
+ // Double indicating that dope outputs pose rotated by N degrees along z axis
+ const double rotation_z_axis_;
+
// The dimensions of the cuboid around the object we're locating
std::vector object_dimensions_;
// The camera matrix used to capture the input images
std::vector camera_matrix_;
+
+ // The transform broadcaster for when TF publishing for poses is enabled
+ std::unique_ptr tf_broadcaster_;
};
} // namespace dope
diff --git a/isaac_ros_dope/launch/isaac_ros_dope_core.launch.py b/isaac_ros_dope/launch/isaac_ros_dope_core.launch.py
index 7155e3c..1143e44 100644
--- a/isaac_ros_dope/launch/isaac_ros_dope_core.launch.py
+++ b/isaac_ros_dope/launch/isaac_ros_dope_core.launch.py
@@ -45,6 +45,8 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
# DOPE Decoder parameters
object_name = LaunchConfiguration('object_name')
+ enable_tf_publishing = LaunchConfiguration('enable_tf_publishing')
+ map_peak_threshold = LaunchConfiguration('map_peak_threshold')
return {
'dope_inference_node': ComposableNode(
@@ -70,9 +72,12 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
plugin='nvidia::isaac_ros::dope::DopeDecoderNode',
parameters=[{
'object_name': object_name,
+ 'enable_tf_publishing': enable_tf_publishing,
+ 'map_peak_threshold': map_peak_threshold,
}],
remappings=[('belief_map_array', 'tensor_sub'),
- ('dope/pose_array', 'poses')]
+ ('dope/detections', 'detections'),
+ ('camera_info', '/dope_encoder/crop/camera_info')]
)
}
@@ -91,19 +96,19 @@ def get_launch_actions(interface_specs: Dict[str, Any]) -> \
return {
'network_image_width': DeclareLaunchArgument(
'network_image_width',
- default_value='640',
+ default_value='1280',
description='The input image width that the network expects'),
'network_image_height': DeclareLaunchArgument(
'network_image_height',
- default_value='480',
+ default_value='720',
description='The input image height that the network expects'),
'encoder_image_mean': DeclareLaunchArgument(
'encoder_image_mean',
- default_value='[0.5, 0.5, 0.5]',
+ default_value='[0.485, 0.456, 0.406]',
description='The mean for image normalization'),
'encoder_image_stddev': DeclareLaunchArgument(
'encoder_image_stddev',
- default_value='[0.5, 0.5, 0.5]',
+ default_value='[0.229, 0.224, 0.225]',
description='The standard deviation for image normalization'),
'model_file_path': DeclareLaunchArgument(
'model_file_path',
@@ -147,10 +152,18 @@ def get_launch_actions(interface_specs: Dict[str, Any]) -> \
'object_name',
default_value='Ketchup',
description='The object class that the DOPE network is detecting'),
+ 'map_peak_threshold': DeclareLaunchArgument(
+ 'map_peak_threshold',
+ default_value='0.1',
+ description='The minimum value of a peak in a DOPE belief map'),
'force_engine_update': DeclareLaunchArgument(
'force_engine_update',
default_value='False',
description='Whether TensorRT should update the TensorRT engine file or not'),
+ 'enable_tf_publishing': DeclareLaunchArgument(
+ 'enable_tf_publishing',
+ default_value='False',
+ description='Whether Dope Decoder will broadcast poses to the TF tree or not'),
'dope_encoder_launch': IncludeLaunchDescription(
PythonLaunchDescriptionSource(
[os.path.join(encoder_dir, 'launch', 'dnn_image_encoder.launch.py')]
diff --git a/isaac_ros_dope/launch/isaac_ros_dope_tensor_rt.launch.py b/isaac_ros_dope/launch/isaac_ros_dope_tensor_rt.launch.py
index 5c3231a..15c16a8 100644
--- a/isaac_ros_dope/launch/isaac_ros_dope_tensor_rt.launch.py
+++ b/isaac_ros_dope/launch/isaac_ros_dope_tensor_rt.launch.py
@@ -42,19 +42,19 @@ def generate_launch_description():
description='The input image height'),
DeclareLaunchArgument(
'network_image_width',
- default_value='640',
+ default_value='1280',
description='The input image width that the network expects'),
DeclareLaunchArgument(
'network_image_height',
- default_value='480',
+ default_value='720',
description='The input image height that the network expects'),
DeclareLaunchArgument(
'encoder_image_mean',
- default_value='[0.5, 0.5, 0.5]',
+ default_value='[0.485, 0.456, 0.406]',
description='The mean for image normalization'),
DeclareLaunchArgument(
'encoder_image_stddev',
- default_value='[0.5, 0.5, 0.5]',
+ default_value='[0.229, 0.224, 0.225]',
description='The standard deviation for image normalization'),
DeclareLaunchArgument(
'model_file_path',
@@ -100,6 +100,14 @@ def generate_launch_description():
'force_engine_update',
default_value='False',
description='Whether TensorRT should update the TensorRT engine file or not'),
+ DeclareLaunchArgument(
+ 'map_peak_threshold',
+ default_value='0.1',
+ description='The minimum value of a peak in a DOPE belief map'),
+ DeclareLaunchArgument(
+ 'enable_tf_publishing',
+ default_value='False',
+ description='Whether Dope Decoder will broadcast poses to the TF tree or not')
]
# DNN Image Encoder parameters
@@ -124,6 +132,8 @@ def generate_launch_description():
# DOPE Decoder parameters
object_name = LaunchConfiguration('object_name')
+ enable_tf_publishing = LaunchConfiguration('enable_tf_publishing')
+ map_peak_threshold = LaunchConfiguration('map_peak_threshold')
encoder_dir = get_package_share_directory('isaac_ros_dnn_image_encoder')
dope_encoder_launch = IncludeLaunchDescription(
@@ -170,9 +180,13 @@ def generate_launch_description():
plugin='nvidia::isaac_ros::dope::DopeDecoderNode',
parameters=[{
'object_name': object_name,
+ 'enable_tf_publishing': enable_tf_publishing,
+ 'map_peak_threshold': map_peak_threshold,
}],
remappings=[('belief_map_array', 'tensor_sub'),
- ('dope/pose_array', 'poses')])
+ ('dope/detections', 'detections'),
+ ('camera_info', '/dope_encoder/crop/camera_info')]
+ )
container = ComposableNodeContainer(
name='dope_container',
diff --git a/isaac_ros_dope/launch/isaac_ros_dope_triton.launch.py b/isaac_ros_dope/launch/isaac_ros_dope_triton.launch.py
index 7cfe98f..d8ef2d3 100644
--- a/isaac_ros_dope/launch/isaac_ros_dope_triton.launch.py
+++ b/isaac_ros_dope/launch/isaac_ros_dope_triton.launch.py
@@ -39,19 +39,19 @@ def generate_launch_description():
description='The input image height'),
DeclareLaunchArgument(
'network_image_width',
- default_value='640',
+ default_value='1280',
description='The input image width that the network expects'),
DeclareLaunchArgument(
'network_image_height',
- default_value='480',
+ default_value='720',
description='The input image height that the network expects'),
DeclareLaunchArgument(
'encoder_image_mean',
- default_value='[0.5, 0.5, 0.5]',
+ default_value='[0.485, 0.456, 0.406]',
description='The mean for image normalization'),
DeclareLaunchArgument(
'encoder_image_stddev',
- default_value='[0.5, 0.5, 0.5]',
+ default_value='[0.229, 0.224, 0.225]',
description='The standard deviation for image normalization'),
DeclareLaunchArgument(
'model_name',
@@ -93,6 +93,14 @@ def generate_launch_description():
'object_name',
default_value='Ketchup',
description='The object class that the DOPE network is detecting'),
+ DeclareLaunchArgument(
+ 'map_peak_threshold',
+ default_value='0.1',
+ description='The minimum value of a peak in a DOPE belief map'),
+ DeclareLaunchArgument(
+ 'enable_tf_publishing',
+ default_value='False',
+ description='Whether Dope Decoder will broadcast poses to the TF tree or not'),
]
# DNN Image Encoder parameters
@@ -116,6 +124,8 @@ def generate_launch_description():
# DOPE Decoder parameters
object_name = LaunchConfiguration('object_name')
+ enable_tf_publishing = LaunchConfiguration('enable_tf_publishing')
+ map_peak_threshold = LaunchConfiguration('map_peak_threshold')
encoder_dir = get_package_share_directory('isaac_ros_dnn_image_encoder')
dope_encoder_launch = IncludeLaunchDescription(
@@ -161,9 +171,13 @@ def generate_launch_description():
plugin='nvidia::isaac_ros::dope::DopeDecoderNode',
parameters=[{
'object_name': object_name,
+ 'enable_tf_publishing': enable_tf_publishing,
+ 'map_peak_threshold': map_peak_threshold
}],
remappings=[('belief_map_array', 'tensor_sub'),
- ('dope/pose_array', 'poses')])
+ ('dope/detections', 'detections'),
+ ('camera_info', '/dope_encoder/crop/camera_info')]
+ )
container = ComposableNodeContainer(
name='dope_container',
diff --git a/isaac_ros_dope/package.xml b/isaac_ros_dope/package.xml
index bc9a520..cf64666 100644
--- a/isaac_ros_dope/package.xml
+++ b/isaac_ros_dope/package.xml
@@ -21,7 +21,7 @@
isaac_ros_dope
- 3.1.0
+ 3.2.0
Deep learning based pose estimation
Isaac ROS Maintainers
@@ -30,6 +30,8 @@
Ethan Yu
Jeff Smith
Jaiveer Singh
+ Shruti Sharma
+ Toya Takahashi
ament_cmake
@@ -39,7 +41,8 @@
isaac_ros_dnn_image_encoder
isaac_ros_nitros
- isaac_ros_nitros_pose_array_type
+ isaac_ros_nitros_camera_info_type
+ isaac_ros_nitros_detection3_d_array_type
isaac_ros_nitros_tensor_list_type
isaac_ros_tensor_list_interfaces
isaac_ros_tensor_rt
@@ -48,8 +51,10 @@
message_filters
rclcpp
sensor_msgs
+ vision_msgs
tf2_eigen
+ gxf_isaac_gems
isaac_ros_common
isaac_ros_gxf
diff --git a/isaac_ros_dope/src/dope_decoder_node.cpp b/isaac_ros_dope/src/dope_decoder_node.cpp
index 3309729..a45f483 100644
--- a/isaac_ros_dope/src/dope_decoder_node.cpp
+++ b/isaac_ros_dope/src/dope_decoder_node.cpp
@@ -25,12 +25,17 @@ namespace fs = std::experimental::filesystem;
namespace fs = std::filesystem;
#endif
-#include "isaac_ros_nitros_pose_array_type/nitros_pose_array.hpp"
-#include "isaac_ros_nitros_tensor_list_type/nitros_tensor_list.hpp"
+#include
#include "ament_index_cpp/get_package_share_directory.hpp"
+#include "detection3_d_array_message/detection3_d_array_message.hpp"
+#include "geometry_msgs/msg/transform_stamped.hpp"
+#include "gxf/std/tensor.hpp"
+#include "isaac_ros_nitros_camera_info_type/nitros_camera_info.hpp"
+#include "isaac_ros_nitros_detection3_d_array_type/nitros_detection3_d_array.hpp"
+#include "isaac_ros_nitros_tensor_list_type/nitros_tensor_list.hpp"
#include "rclcpp/rclcpp.hpp"
-
+#include "tf2_ros/transform_broadcaster.h"
namespace nvidia
{
@@ -45,13 +50,19 @@ constexpr char INPUT_COMPONENT_KEY[] = "dope_decoder/tensorlist_in";
constexpr char INPUT_DEFAULT_TENSOR_FORMAT[] = "nitros_tensor_list_nchw_rgb_f32";
constexpr char INPUT_TOPIC_NAME[] = "belief_map_array";
+constexpr char CAMERA_INFO_INPUT_COMPONENT_KEY[] = "dope_decoder/camera_model_input";
+constexpr char CAMERA_INFO_INPUT_DEFAULT_TENSOR_FORMAT[] = "nitros_camera_info";
+constexpr char CAMERA_INFO_INPUT_TOPIC_NAME[] = "camera_info";
+
constexpr char OUTPUT_COMPONENT_KEY[] = "sink/sink";
-constexpr char OUTPUT_DEFAULT_TENSOR_FORMAT[] = "nitros_pose_array";
-constexpr char OUTPUT_TOPIC_NAME[] = "dope/pose_array";
+constexpr char OUTPUT_DEFAULT_TENSOR_FORMAT[] = "nitros_detection3_d_array";
+constexpr char OUTPUT_TOPIC_NAME[] = "dope/detections";
constexpr char APP_YAML_FILENAME[] = "config/dope_node.yaml";
constexpr char PACKAGE_NAME[] = "isaac_ros_dope";
+constexpr int kExpectedPoseAsTensorSize = (3 + 4);
+
const std::vector> EXTENSIONS = {
{"isaac_ros_gxf", "gxf/lib/std/libgxf_std.so"},
{"isaac_ros_gxf", "gxf/lib/multimedia/libgxf_multimedia.so"},
@@ -60,10 +71,8 @@ const std::vector> EXTENSIONS = {
{"gxf_isaac_dope", "gxf/lib/libgxf_isaac_dope.so"},
};
-const std::vector PRESET_EXTENSION_SPEC_NAMES = {
- "isaac_ros_dope",
-};
-const std::vector EXTENSION_SPEC_FILENAMES = {};
+const std::vector PRESET_EXTENSION_SPEC_NAMES = {};
+const std::vector EXTENSION_SPEC_FILENAMES = {"config/isaac_ros_dope.yaml"};
const std::vector GENERATOR_RULE_FILENAMES = {};
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
@@ -76,6 +85,14 @@ const nitros::NitrosPublisherSubscriberConfigMap CONFIG_MAP = {
.topic_name = INPUT_TOPIC_NAME,
}
},
+ {CAMERA_INFO_INPUT_COMPONENT_KEY,
+ {
+ .type = nitros::NitrosPublisherSubscriberType::NEGOTIATED,
+ .qos = rclcpp::QoS(1),
+ .compatible_data_format = CAMERA_INFO_INPUT_DEFAULT_TENSOR_FORMAT,
+ .topic_name = CAMERA_INFO_INPUT_TOPIC_NAME,
+ }
+ },
{OUTPUT_COMPONENT_KEY,
{
.type = nitros::NitrosPublisherSubscriberType::NEGOTIATED,
@@ -100,11 +117,26 @@ DopeDecoderNode::DopeDecoderNode(rclcpp::NodeOptions options)
// Parameters
configuration_file_(declare_parameter("configuration_file", "dope_config.yaml")),
object_name_(declare_parameter("object_name", "Ketchup")),
+ tf_frame_name_(declare_parameter("tf_frame_name", "dope_object")),
+ enable_tf_publishing_(declare_parameter("enable_tf_publishing", true)),
+ map_peak_threshold_(declare_parameter("map_peak_threshold", 0.1)),
+ affinity_map_angle_threshold_(declare_parameter("affinity_map_angle_threshold", 0.5)),
+ rotation_y_axis_(declare_parameter("rotation_y_axis", false)),
+ rotation_x_axis_(declare_parameter("rotation_x_axis", false)),
+ rotation_z_axis_(declare_parameter("rotation_z_axis", false)),
object_dimensions_{},
camera_matrix_{}
{
RCLCPP_DEBUG(get_logger(), "[DopeDecoderNode] Constructor");
+ // Add callback function for Dope Pose Array to broadcast to ROS TF tree if setting is enabled.
+ if (enable_tf_publishing_) {
+ tf_broadcaster_ = std::make_unique(*this);
+ config_map_[OUTPUT_COMPONENT_KEY].callback = std::bind(
+ &DopeDecoderNode::DopeDecoderDetectionCallback, this, std::placeholders::_1,
+ std::placeholders::_2);
+ }
+
// Open configuration YAML file
const std::string package_directory = ament_index_cpp::get_package_share_directory(
"isaac_ros_dope");
@@ -131,18 +163,8 @@ DopeDecoderNode::DopeDecoderNode(rclcpp::NodeOptions options)
auto dd = dimensions->double_array_value->values;
object_dimensions_ = {dd[0], dd[1], dd[2]};
- rcl_variant_t * cam_mat = rcl_yaml_node_struct_get("dope", "camera_matrix", dope_params);
- if (!cam_mat->double_array_value) {
- RCLCPP_ERROR(this->get_logger(), "No camera_matrix parameter found");
- throw std::runtime_error("Parameter parsing failure.");
- }
-
- auto vv = cam_mat->double_array_value->values;
- camera_matrix_ = {vv[0], vv[1], vv[2], vv[3], vv[4], vv[5], vv[6], vv[7], vv[8]};
-
- rcl_yaml_node_struct_fini(dope_params);
-
- registerSupportedType();
+ registerSupportedType();
+ registerSupportedType();
registerSupportedType();
startNitrosNode();
@@ -156,14 +178,95 @@ void DopeDecoderNode::postLoadGraphCallback()
getNitrosContext().setParameter1DFloat64Vector(
"dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "object_dimensions",
object_dimensions_);
+
getNitrosContext().setParameter1DFloat64Vector(
"dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "camera_matrix",
camera_matrix_);
+
getNitrosContext().setParameterStr(
"dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "object_name",
object_name_);
+ getNitrosContext().setParameterFloat64(
+ "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "map_peak_threshold",
+ map_peak_threshold_);
+ getNitrosContext().setParameterFloat64(
+ "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "affinity_map_angle_threshold",
+ affinity_map_angle_threshold_);
+ getNitrosContext().setParameterFloat64(
+ "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "rotation_y_axis",
+ rotation_y_axis_);
+ getNitrosContext().setParameterFloat64(
+ "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "rotation_x_axis",
+ rotation_x_axis_);
+ getNitrosContext().setParameterFloat64(
+ "dope_decoder", "nvidia::isaac_ros::dope::DopeDecoder", "rotation_z_axis",
+ rotation_z_axis_);
}
+// convert Detection3DArray to ROS message that will be published to the TF tree.
+
+void DopeDecoderNode::DopeDecoderDetectionCallback(
+ const gxf_context_t context, nitros::NitrosTypeBase & msg)
+{
+ geometry_msgs::msg::TransformStamped transform_stamped;
+
+ auto msg_entity = nvidia::gxf::Entity::Shared(context, msg.handle);
+
+ // Populate timestamp information back into the header
+ auto maybe_input_timestamp = msg_entity->get();
+ if (!maybe_input_timestamp) { // Fallback to label 'timestamp'
+ maybe_input_timestamp = msg_entity->get("timestamp");
+ }
+ if (maybe_input_timestamp) {
+ transform_stamped.header.stamp.sec = static_cast(
+ maybe_input_timestamp.value()->acqtime / static_cast(1e9));
+ transform_stamped.header.stamp.nanosec = static_cast(
+ maybe_input_timestamp.value()->acqtime % static_cast(1e9));
+ } else {
+ RCLCPP_WARN(
+ get_logger(),
+ "[DopeDecoderNode] Failed to get timestamp");
+ }
+
+ // Extract foundation pose list to a struct type defined in detection3_d_array_message.hpp
+ auto dope_detections_array_expected = nvidia::isaac::GetDetection3DListMessage(
+ msg_entity.value());
+ if (!dope_detections_array_expected) {
+ RCLCPP_ERROR(
+ get_logger(), "[DopeDecoderNode] Failed to get detections data from message entity");
+ return;
+ }
+ auto dope_detections_array = dope_detections_array_expected.value();
+
+ // Extract number of tags detected
+ size_t tags_count = dope_detections_array.count;
+
+ /* for each pose instance of a single object (for each Tensor), ennumerate child_frame_id
+ in case there are multiple detections in Detection3DArray message (msg_entity)
+ */
+
+ int child_frame_id_num = 1;
+ if (tags_count > 0) {
+ for (size_t i = 0; i < tags_count; i++) {
+ auto pose = dope_detections_array.poses.at(i).value();
+
+ transform_stamped.header.frame_id = msg.frame_id;
+ transform_stamped.child_frame_id = tf_frame_name_ + std::to_string(child_frame_id_num);
+ transform_stamped.transform.translation.x = pose->translation.x();
+ transform_stamped.transform.translation.y = pose->translation.y();
+ transform_stamped.transform.translation.z = pose->translation.z();
+ transform_stamped.transform.rotation.x = pose->rotation.quaternion().x();
+ transform_stamped.transform.rotation.y = pose->rotation.quaternion().y();
+ transform_stamped.transform.rotation.z = pose->rotation.quaternion().z();
+ transform_stamped.transform.rotation.w = pose->rotation.quaternion().w();
+
+ tf_broadcaster_->sendTransform(transform_stamped);
+ child_frame_id_num++;
+ }
+ }
+}
+
+
} // namespace dope
} // namespace isaac_ros
} // namespace nvidia
diff --git a/isaac_ros_dope/test/isaac_ros_dope_pol.py b/isaac_ros_dope/test/isaac_ros_dope_pol.py
index 6a6b342..dba4038 100644
--- a/isaac_ros_dope/test/isaac_ros_dope_pol.py
+++ b/isaac_ros_dope/test/isaac_ros_dope_pol.py
@@ -26,7 +26,6 @@
import time
from ament_index_python.packages import get_package_share_directory
-from geometry_msgs.msg import PoseArray
from isaac_ros_test import IsaacROSBaseTest, JSONConversion
from launch.actions import IncludeLaunchDescription
from launch.launch_description_sources import PythonLaunchDescriptionSource
@@ -37,6 +36,7 @@
import rclpy
from sensor_msgs.msg import CameraInfo, Image
+from vision_msgs.msg import Detection3DArray
MODEL_FILE_NAME = 'dope_ketchup_pol.onnx'
@@ -76,7 +76,7 @@ def generate_test_description():
'frame_id': 'map'
}],
remappings=[('belief_map_array', 'tensor_sub'),
- ('dope/pose_array', 'poses')])
+ ('dope/detections', 'detections')])
encoder_dir = get_package_share_directory('isaac_ros_dnn_image_encoder')
dope_encoder_launch = IncludeLaunchDescription(
@@ -86,7 +86,7 @@ def generate_test_description():
launch_arguments={
'input_image_width': '852',
'input_image_height': '480',
- 'network_image_width': '640',
+ 'network_image_width': '852',
'network_image_height': '480',
'attach_to_shared_component_container': 'True',
'component_container_name': 'dope_container',
@@ -132,7 +132,7 @@ def test_pol(self):
received_messages = {}
- self.generate_namespace_lookup(['image', 'camera_info', 'poses'])
+ self.generate_namespace_lookup(['image', 'camera_info', 'detections'])
image_pub = self.node.create_publisher(
Image, self.namespaces['image'], self.DEFAULT_QOS)
@@ -140,16 +140,16 @@ def test_pol(self):
camera_info_pub = self.node.create_publisher(
CameraInfo, self.namespaces['camera_info'], self.DEFAULT_QOS)
- # The current DOPE decoder outputs PoseArray
+ # The current DOPE decoder outputs Detection3DArray
subs = self.create_logging_subscribers(
- [('poses', PoseArray)], received_messages)
-
+ [('detections', Detection3DArray)], received_messages)
try:
- json_file = self.filepath / 'test_cases/pose_estimation_0/image.json'
- image = JSONConversion.load_image_from_json(json_file)
+ image_json_file = self.filepath / 'test_cases/pose_estimation_0/image.json'
+ image = JSONConversion.load_image_from_json(image_json_file)
image.header.stamp = self.node.get_clock().now().to_msg()
- camera_info = CameraInfo()
+ camera_info_json_file = self.filepath / 'test_cases/pose_estimation_0/camera_info.json'
+ camera_info = JSONConversion.load_camera_info_from_json(camera_info_json_file)
camera_info.header = image.header
camera_info.distortion_model = 'plumb_bob'
@@ -161,16 +161,18 @@ def test_pol(self):
image_pub.publish(image)
camera_info_pub.publish(camera_info)
rclpy.spin_once(self.node, timeout_sec=(0.1))
- if 'poses' in received_messages:
+ if 'detections' in received_messages:
done = True
break
self.assertTrue(done, 'Timeout. Appropriate output not received')
self.node._logger.info('A message was successfully received')
- received_poses = received_messages['poses'].poses
- self.node._logger.info(f'Poses received: {received_poses}')
- self.assertGreaterEqual(len(received_poses), 1, 'Did not receive at least one pose')
+ received_detections = received_messages['detections'].detections
+ self.node._logger.info(f'Detections received: {received_detections}')
+ self.assertGreaterEqual(len(received_detections), 1,
+ 'Did not receive at least one detection')
finally:
self.node.destroy_subscription(subs)
self.node.destroy_publisher(image_pub)
+ self.node.destroy_publisher(camera_info_pub)
diff --git a/isaac_ros_dope/test/test_cases/pose_estimation_0/camera_info.json b/isaac_ros_dope/test/test_cases/pose_estimation_0/camera_info.json
new file mode 100644
index 0000000..dfaeccc
--- /dev/null
+++ b/isaac_ros_dope/test/test_cases/pose_estimation_0/camera_info.json
@@ -0,0 +1,51 @@
+{
+ "header": {
+ "frame_id": "tf_camera"
+ },
+ "height": 480,
+ "width": 640,
+ "distortion_model": "plumb_bob",
+ "D": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "K": [
+ 364.16501736,
+ 0.0,
+ 121.36296296,
+ 0.0,
+ 364.16501736,
+ 121.36296296,
+ 0.0,
+ 0.0,
+ 1.0
+ ],
+ "R": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "P": [
+ 364.16501736,
+ 0.0,
+ 121.36296296,
+ 0.0,
+ 0.0,
+ 364.16501736,
+ 121.36296296,
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0,
+ 0.0
+ ]
+}
\ No newline at end of file
diff --git a/isaac_ros_foundationpose/CMakeLists.txt b/isaac_ros_foundationpose/CMakeLists.txt
index 991614d..c0038f0 100644
--- a/isaac_ros_foundationpose/CMakeLists.txt
+++ b/isaac_ros_foundationpose/CMakeLists.txt
@@ -36,6 +36,11 @@ rclcpp_components_register_nodes(foundationpose_tracking_node "nvidia::isaac_ros
set(node_plugins
"${node_plugins}nvidia::isaac_ros::foundationpose::FoundationPoseTrackingNode;$\n")
+# Detection2DArrayFilter utils node
+ament_auto_add_library(detection2_d_array_filter SHARED src/detection2_d_array_filter.cpp)
+rclcpp_components_register_nodes(detection2_d_array_filter "nvidia::isaac_ros::foundationpose::Detection2DArrayFilter")
+set(node_plugins "${node_plugins}nvidia::isaac_ros::foundationpose::Detection2DArrayFilter;$\n")
+
# Detection2DToMask utils node
ament_auto_add_library(detection2_d_to_mask SHARED src/detection2_d_to_mask.cpp)
rclcpp_components_register_nodes(detection2_d_to_mask "nvidia::isaac_ros::foundationpose::Detection2DToMask")
@@ -75,4 +80,10 @@ if(BUILD_TESTING)
add_launch_test(test/isaac_ros_foundationpose_tracking_pol.py TIMEOUT "900")
endif()
+
+# Embed versioning information into installed files
+ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common)
+include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake")
+generate_version_info(${PROJECT_NAME})
+
ament_auto_package(INSTALL_TO_SHARE config launch rviz)
diff --git a/isaac_ros_foundationpose/config/nitros_foundationpose_node.yaml b/isaac_ros_foundationpose/config/nitros_foundationpose_node.yaml
index 468e035..3d6890c 100644
--- a/isaac_ros_foundationpose/config/nitros_foundationpose_node.yaml
+++ b/isaac_ros_foundationpose/config/nitros_foundationpose_node.yaml
@@ -21,7 +21,7 @@ components:
- name: rgb_image_receiver
type: nvidia::gxf::DoubleBufferReceiver
parameters:
- capacity: 12 # Synchronization queue size set to 12 to allow for one out of order message
+ capacity: 12
- type: nvidia::gxf::MessageAvailableSchedulingTerm
parameters:
receiver: rgb_image_receiver
@@ -29,7 +29,7 @@ components:
- name: camera_model_receiver
type: nvidia::gxf::DoubleBufferReceiver
parameters:
- capacity: 12 # Synchronization queue size set to 12 to allow for one out of order message
+ capacity: 12
- type: nvidia::gxf::MessageAvailableSchedulingTerm
parameters:
receiver: camera_model_receiver
@@ -37,7 +37,7 @@ components:
- name: mask_receiver
type: nvidia::gxf::DoubleBufferReceiver
parameters:
- capacity: 12 # Synchronization queue size set to 12 to allow for one out of order message
+ capacity: 12
- type: nvidia::gxf::MessageAvailableSchedulingTerm
parameters:
receiver: mask_receiver
@@ -45,7 +45,7 @@ components:
- name: depth_receiver
type: nvidia::gxf::DoubleBufferReceiver
parameters:
- capacity: 12 # Synchronization queue size set to 12 to allow for one out of order message
+ capacity: 12
- type: nvidia::gxf::MessageAvailableSchedulingTerm
parameters:
receiver: depth_receiver
@@ -569,7 +569,7 @@ components:
verbose: true
clock: utils/clock
max_workspace_size: 6710886400
- enable_fp16: true
+ enable_fp16: false
max_batch_size: 42
- type: nvidia::gxf::MemoryAvailableSchedulingTerm
parameters:
@@ -815,7 +815,7 @@ components:
verbose: true
clock: utils/clock
max_workspace_size: 6710886400
- enable_fp16: true
+ enable_fp16: false
max_batch_size: 252
- type: nvidia::gxf::MemoryAvailableSchedulingTerm
parameters:
diff --git a/isaac_ros_foundationpose/include/isaac_ros_foundationpose/foundationpose_node.hpp b/isaac_ros_foundationpose/include/isaac_ros_foundationpose/foundationpose_node.hpp
index 166a78b..726bc2a 100644
--- a/isaac_ros_foundationpose/include/isaac_ros_foundationpose/foundationpose_node.hpp
+++ b/isaac_ros_foundationpose/include/isaac_ros_foundationpose/foundationpose_node.hpp
@@ -66,6 +66,7 @@ class FoundationPoseNode : public nitros::NitrosNode
const float min_depth_;
const float max_depth_;
const int32_t refine_iterations_;
+ const StringList symmetry_planes_;
// Models file path
const std::string refine_model_file_path_;
diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose.launch.py
index 760e2b4..fd0f463 100644
--- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose.launch.py
+++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose.launch.py
@@ -139,6 +139,12 @@ def generate_launch_description():
arguments=['-d', rviz_config_path],
condition=IfCondition(launch_rviz))
+ detection2_d_array_filter_node = ComposableNode(
+ name='detection2_d_array_filter',
+ package='isaac_ros_foundationpose',
+ plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter',
+ remappings=[('detection2_d_array', 'detections_output')]
+ )
detection2_d_to_mask_node = ComposableNode(
name='detection2_d_to_mask',
package='isaac_ros_foundationpose',
@@ -146,7 +152,9 @@ def generate_launch_description():
parameters=[{
'mask_width': mask_width,
'mask_height': mask_height
- }])
+ }],
+ remappings=[('segmentation', 'rt_detr_segmentation')]
+ )
foundationpose_bbox_container = ComposableNodeContainer(
name='foundationpose_container',
@@ -155,6 +163,7 @@ def generate_launch_description():
executable='component_container_mt',
composable_node_descriptions=[
foundationpose_node,
+ detection2_d_array_filter_node,
detection2_d_to_mask_node],
output='screen',
condition=IfCondition(launch_bbox_to_mask)
diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_core.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_core.launch.py
index cb83c44..9b5bb43 100644
--- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_core.launch.py
+++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_core.launch.py
@@ -36,9 +36,7 @@
VISUALIZATION_DOWNSCALING_FACTOR = 10
-REFINE_MODEL_PATH = '/tmp/refine_model.onnx'
REFINE_ENGINE_PATH = '/tmp/refine_trt_engine.plan'
-SCORE_MODEL_PATH = '/tmp/score_model.onnx'
SCORE_ENGINE_PATH = '/tmp/score_trt_engine.plan'
@@ -53,12 +51,9 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
# FoundationPose parameters
mesh_file_path = LaunchConfiguration('mesh_file_path')
texture_path = LaunchConfiguration('texture_path')
- refine_model_file_path = LaunchConfiguration('refine_model_file_path')
refine_engine_file_path = LaunchConfiguration('refine_engine_file_path')
- score_model_file_path = LaunchConfiguration('score_model_file_path')
score_engine_file_path = LaunchConfiguration('score_engine_file_path')
# RT-DETR parameters
- rt_detr_model_file_path = LaunchConfiguration('rt_detr_model_file_path')
rt_detr_engine_file_path = LaunchConfiguration('rt_detr_engine_file_path')
input_width = interface_specs['camera_resolution']['width']
input_height = interface_specs['camera_resolution']['height']
@@ -188,7 +183,6 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
package='isaac_ros_tensor_rt',
plugin='nvidia::isaac_ros::dnn_inference::TensorRTNode',
parameters=[{
- 'model_file_path': rt_detr_model_file_path,
'engine_file_path': rt_detr_engine_file_path,
'output_binding_names': ['labels', 'boxes', 'scores'],
'output_tensor_names': ['labels', 'boxes', 'scores'],
@@ -206,15 +200,22 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
# Create a binary segmentation mask from a Detection2DArray published by RT-DETR.
# The segmentation mask is of size
# int(IMAGE_WIDTH/input_to_RT_DETR_ratio) x int(IMAGE_HEIGHT/input_to_RT_DETR_ratio)
+ 'detection2_d_array_filter_node': ComposableNode(
+ name='detection2_d_array_filter',
+ package='isaac_ros_foundationpose',
+ plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter',
+ remappings=[('detection2_d_array', 'detections_output')]
+ ),
'detection2_d_to_mask_node': ComposableNode(
name='detection2_d_to_mask',
package='isaac_ros_foundationpose',
plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask',
parameters=[{
'mask_width': int(input_width/input_to_RT_DETR_ratio),
- 'mask_height': int(input_height/input_to_RT_DETR_ratio)}],
- remappings=[('detection2_d_array', 'detections_output'),
- ('segmentation', 'rt_detr_segmentation')]),
+ 'mask_height': int(input_height/input_to_RT_DETR_ratio)
+ }],
+ remappings=[('segmentation', 'rt_detr_segmentation')]
+ ),
# Resize segmentation mask to ESS model image size so it can be used by FoundationPose
# FoundationPose requires depth, rgb image and segmentation mask to be of the same size
@@ -272,14 +273,12 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
'mesh_file_path': mesh_file_path,
'texture_path': texture_path,
- 'refine_model_file_path': refine_model_file_path,
'refine_engine_file_path': refine_engine_file_path,
'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'],
'refine_input_binding_names': ['input1', 'input2'],
'refine_output_tensor_names': ['output_tensor1', 'output_tensor2'],
'refine_output_binding_names': ['output1', 'output2'],
- 'score_model_file_path': score_model_file_path,
'score_engine_file_path': score_engine_file_path,
'score_input_tensor_names': ['input_tensor1', 'input_tensor2'],
'score_input_binding_names': ['input1', 'input2'],
@@ -320,21 +319,11 @@ def get_launch_actions(interface_specs: Dict[str, Any]) -> \
default_value='',
description='The absolute file path to the texture map'),
- 'refine_model_file_path': DeclareLaunchArgument(
- 'refine_model_file_path',
- default_value=REFINE_MODEL_PATH,
- description='The absolute file path to the refine model'),
-
'refine_engine_file_path': DeclareLaunchArgument(
'refine_engine_file_path',
default_value=REFINE_ENGINE_PATH,
description='The absolute file path to the refine trt engine'),
- 'score_model_file_path': DeclareLaunchArgument(
- 'score_model_file_path',
- default_value=SCORE_MODEL_PATH,
- description='The absolute file path to the score model'),
-
'score_engine_file_path': DeclareLaunchArgument(
'score_engine_file_path',
default_value=SCORE_ENGINE_PATH,
diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk.launch.py
index 6d5d7d0..cef141c 100644
--- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk.launch.py
+++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk.launch.py
@@ -160,7 +160,7 @@ def generate_launch_description():
parameters=[{'module_id': 5,
'input_qos': 'SENSOR_DATA',
'output_qos': 'SENSOR_DATA',
- 'enable_statistics': True,
+ 'enable_diagnostics': True,
'topics_list': ['left/image_raw'],
'expected_fps_list': [30.0],
'jitter_tolerance_us': 30000}],
@@ -346,15 +346,22 @@ def generate_launch_description():
# Create a binary segmentation mask from a Detection2DArray published by RT-DETR.
# The segmentation mask is of size int(HAWK_IMAGE_WIDTH/HAWK_TO_RT_DETR_RATIO) x
# int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO)
+ detection2_d_array_filter_node = ComposableNode(
+ name='detection2_d_array_filter',
+ package='isaac_ros_foundationpose',
+ plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter',
+ remappings=[('detection2_d_array', 'detections_output')]
+ )
detection2_d_to_mask_node = ComposableNode(
name='detection2_d_to_mask',
package='isaac_ros_foundationpose',
plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask',
parameters=[{
'mask_width': int(HAWK_IMAGE_WIDTH/HAWK_TO_RT_DETR_RATIO),
- 'mask_height': int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO)}],
- remappings=[('detection2_d_array', 'detections_output'),
- ('segmentation', 'rt_detr_segmentation')])
+ 'mask_height': int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO)
+ }],
+ remappings=[('segmentation', 'rt_detr_segmentation')]
+ )
# Resize segmentation mask to ESS model image size so it can be used by FoundationPose
# FoundationPose requires depth, rgb image and segmentation mask to be of the same size
@@ -508,6 +515,7 @@ def generate_launch_description():
rtdetr_preprocessor_node,
tensor_rt_node,
rtdetr_decoder_node,
+ detection2_d_array_filter_node,
detection2_d_to_mask_node,
resize_mask_node,
resize_left_ess_size,
diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk_tracking.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk_tracking.launch.py
index e0a91d2..1eb2d8d 100644
--- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk_tracking.launch.py
+++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_hawk_tracking.launch.py
@@ -160,7 +160,7 @@ def generate_launch_description():
parameters=[{'module_id': 5,
'input_qos': 'SENSOR_DATA',
'output_qos': 'SENSOR_DATA',
- 'enable_statistics': True,
+ 'enable_diagnostics': True,
'topics_list': ['left/image_raw'],
'expected_fps_list': [30.0],
'jitter_tolerance_us': 30000}],
@@ -346,15 +346,22 @@ def generate_launch_description():
# Create a binary segmentation mask from a Detection2DArray published by RT-DETR.
# The segmentation mask is of size int(HAWK_IMAGE_WIDTH/HAWK_TO_RT_DETR_RATIO) x
# int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO)
+ detection2_d_array_filter_node = ComposableNode(
+ name='detection2_d_array_filter',
+ package='isaac_ros_foundationpose',
+ plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter',
+ remappings=[('detection2_d_array', 'detections_output')]
+ )
detection2_d_to_mask_node = ComposableNode(
name='detection2_d_to_mask',
package='isaac_ros_foundationpose',
plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask',
parameters=[{
'mask_width': int(HAWK_IMAGE_WIDTH/HAWK_TO_RT_DETR_RATIO),
- 'mask_height': int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO)}],
- remappings=[('detection2_d_array', 'detections_output'),
- ('segmentation', 'rt_detr_segmentation')])
+ 'mask_height': int(HAWK_IMAGE_HEIGHT/HAWK_TO_RT_DETR_RATIO)
+ }],
+ remappings=[('segmentation', 'rt_detr_segmentation')]
+ )
# Resize segmentation mask to ESS model image size so it can be used by FoundationPose
# FoundationPose requires depth, rgb image and segmentation mask to be of the same size
@@ -530,6 +537,7 @@ def generate_launch_description():
rtdetr_preprocessor_node,
tensor_rt_node,
rtdetr_decoder_node,
+ detection2_d_array_filter_node,
detection2_d_to_mask_node,
resize_mask_node,
resize_left_ess_size,
diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim.launch.py
index 8d31b56..0cc7f54 100644
--- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim.launch.py
+++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim.launch.py
@@ -42,7 +42,8 @@
ISAAC_ROS_FP_MESHES_PATH = os.path.join(ISAAC_ROS_ASSETS_PATH,
'isaac_ros_foundationpose')
STEREO_DISPARITY_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH,
- 'dnn_stereo_disparity', 'dnn_stereo_disparity_v4.0.0')
+ 'dnn_stereo_disparity',
+ 'dnn_stereo_disparity_v4.1.0_onnx')
SYNTHETICA_DETR_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, 'synthetica_detr')
FOUDNATIONPOSE_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, 'foundationpose')
REFINE_ENGINE_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'refine_trt_engine.plan')
@@ -94,7 +95,7 @@ def generate_launch_description():
DeclareLaunchArgument(
'ess_depth_threshold',
- default_value='0.35',
+ default_value='0.4',
description='Threshold value ranges between 0.0 and 1.0 '
'for filtering disparity with confidence.'),
@@ -226,16 +227,22 @@ def generate_launch_description():
)
# Convert Detection2DArray from RT-DETR to a binary segmentation mask
+ detection2_d_array_filter_node = ComposableNode(
+ name='detection2_d_array_filter',
+ package='isaac_ros_foundationpose',
+ plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter',
+ remappings=[('detection2_d_array', 'detections_output')]
+ )
detection2_d_to_mask_node = ComposableNode(
name='detection2_d_to_mask',
package='isaac_ros_foundationpose',
plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask',
parameters=[{
'mask_width': int(SIM_IMAGE_WIDTH/SIM_TO_RT_DETR_RATIO),
- 'mask_height': int(SIM_IMAGE_HEIGHT/SIM_TO_RT_DETR_RATIO),
- 'sub_detection2_d_array': True}],
- remappings=[('detection2_d_array', 'detections_output'),
- ('segmentation', 'rt_detr_segmentation')])
+ 'mask_height': int(SIM_IMAGE_HEIGHT/SIM_TO_RT_DETR_RATIO)
+ }],
+ remappings=[('segmentation', 'rt_detr_segmentation')]
+ )
# Resize segmentation mask to ESS model image size so it can be used by FoundationPose
# FoundationPose requires depth, rgb image and segmentation mask to be of the same size
@@ -378,6 +385,7 @@ def generate_launch_description():
rtdetr_preprocessor_node,
tensor_rt_node,
rtdetr_decoder_node,
+ detection2_d_array_filter_node,
detection2_d_to_mask_node,
resize_mask_node,
resize_left_ess_size,
diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim_tracking.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim_tracking.launch.py
index 6ab3c1a..344845f 100644
--- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim_tracking.launch.py
+++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_isaac_sim_tracking.launch.py
@@ -42,12 +42,11 @@
ISAAC_ROS_FP_MESHES_PATH = os.path.join(ISAAC_ROS_ASSETS_PATH,
'isaac_ros_foundationpose')
STEREO_DISPARITY_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH,
- 'dnn_stereo_disparity', 'dnn_stereo_disparity_v4.0.0')
+ 'dnn_stereo_disparity',
+ 'dnn_stereo_disparity_v4.1.0_onnx')
SYNTHETICA_DETR_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, 'synthetica_detr')
FOUDNATIONPOSE_MODELS_PATH = os.path.join(ISAAC_ROS_MODELS_PATH, 'foundationpose')
REFINE_ENGINE_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'refine_trt_engine.plan')
-REFINE_MODEL_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'refine_model.onnx')
-SCORE_MODEL_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'score_model.onnx')
SCORE_ENGINE_PATH = os.path.join(FOUDNATIONPOSE_MODELS_PATH, 'score_trt_engine.plan')
ESS_ENGINE_PATH = os.path.join(STEREO_DISPARITY_MODELS_PATH, 'light_ess.engine')
@@ -85,16 +84,6 @@ def generate_launch_description():
default_value=SCORE_ENGINE_PATH,
description='The absolute file path to the score trt engine'),
- DeclareLaunchArgument(
- 'refine_model_file_path',
- default_value=REFINE_MODEL_PATH,
- description='The absolute file path to the refine trt engine'),
-
- DeclareLaunchArgument(
- 'score_model_file_path',
- default_value=SCORE_MODEL_PATH,
- description='The absolute file path to the score trt engine'),
-
DeclareLaunchArgument(
'rt_detr_engine_file_path',
default_value=RTDETR_ENGINE_PATH,
@@ -107,7 +96,7 @@ def generate_launch_description():
DeclareLaunchArgument(
'ess_depth_threshold',
- default_value='0.35',
+ default_value='0.4',
description='Threshold value ranges between 0.0 and 1.0 '
'for filtering disparity with confidence.'),
@@ -126,8 +115,6 @@ def generate_launch_description():
texture_path = LaunchConfiguration('texture_path')
refine_engine_file_path = LaunchConfiguration('refine_engine_file_path')
score_engine_file_path = LaunchConfiguration('score_engine_file_path')
- refine_model_file_path = LaunchConfiguration('refine_model_file_path')
- score_model_file_path = LaunchConfiguration('score_model_file_path')
rt_detr_engine_file_path = LaunchConfiguration('rt_detr_engine_file_path')
ess_depth_engine_file_path = LaunchConfiguration('ess_depth_engine_file_path')
ess_depth_threshold = LaunchConfiguration('ess_depth_threshold')
@@ -368,14 +355,12 @@ def generate_launch_description():
'mesh_file_path': mesh_file_path,
'texture_path': texture_path,
- 'refine_model_file_path': refine_model_file_path,
'refine_engine_file_path': refine_engine_file_path,
'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'],
'refine_input_binding_names': ['input1', 'input2'],
'refine_output_tensor_names': ['output_tensor1', 'output_tensor2'],
'refine_output_binding_names': ['output1', 'output2'],
- 'score_model_file_path': score_model_file_path,
'score_engine_file_path': score_engine_file_path,
'score_input_tensor_names': ['input_tensor1', 'input_tensor2'],
'score_input_binding_names': ['input1', 'input2'],
@@ -392,7 +377,6 @@ def generate_launch_description():
'mesh_file_path': mesh_file_path,
'texture_path': texture_path,
- 'refine_model_file_path': refine_model_file_path,
'refine_engine_file_path': refine_engine_file_path,
'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'],
'refine_input_binding_names': ['input1', 'input2'],
diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense.launch.py
index b390a1d..ce29e0e 100644
--- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense.launch.py
+++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense.launch.py
@@ -289,15 +289,22 @@ def generate_launch_description():
# Create a binary segmentation mask from a Detection2DArray published by RT-DETR.
# The segmentation mask is of size int(REALSENSE_IMAGE_WIDTH/REALSENSE_TO_RT_DETR_RATIO) x
# int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO)
+ detection2_d_array_filter_node = ComposableNode(
+ name='detection2_d_array_filter',
+ package='isaac_ros_foundationpose',
+ plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter',
+ remappings=[('detection2_d_array', 'detections_output')]
+ )
detection2_d_to_mask_node = ComposableNode(
name='detection2_d_to_mask',
package='isaac_ros_foundationpose',
plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask',
parameters=[{
'mask_width': int(REALSENSE_IMAGE_WIDTH/REALSENSE_TO_RT_DETR_RATIO),
- 'mask_height': int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO)}],
- remappings=[('detection2_d_array', 'detections_output'),
- ('segmentation', 'rt_detr_segmentation')])
+ 'mask_height': int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO)
+ }],
+ remappings=[('segmentation', 'rt_detr_segmentation')]
+ )
# Resize segmentation mask to ESS model image size so it can be used by FoundationPose
# FoundationPose requires depth, rgb image and segmentation mask to be of the same size
@@ -400,6 +407,7 @@ def generate_launch_description():
rtdetr_preprocessor_node,
tensor_rt_node,
rtdetr_decoder_node,
+ detection2_d_array_filter_node,
detection2_d_to_mask_node,
resize_mask_node,
foundationpose_node,
diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense_tracking.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense_tracking.launch.py
index 6ce4a0b..26d5006 100644
--- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense_tracking.launch.py
+++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_realsense_tracking.launch.py
@@ -289,15 +289,22 @@ def generate_launch_description():
# Create a binary segmentation mask from a Detection2DArray published by RT-DETR.
# The segmentation mask is of size int(REALSENSE_IMAGE_WIDTH/REALSENSE_TO_RT_DETR_RATIO) x
# int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO)
+ detection2_d_array_filter_node = ComposableNode(
+ name='detection2_d_array_filter',
+ package='isaac_ros_foundationpose',
+ plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter',
+ remappings=[('detection2_d_array', 'detections_output')]
+ )
detection2_d_to_mask_node = ComposableNode(
name='detection2_d_to_mask',
package='isaac_ros_foundationpose',
plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask',
parameters=[{
'mask_width': int(REALSENSE_IMAGE_WIDTH/REALSENSE_TO_RT_DETR_RATIO),
- 'mask_height': int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO)}],
- remappings=[('detection2_d_array', 'detections_output'),
- ('segmentation', 'rt_detr_segmentation')])
+ 'mask_height': int(REALSENSE_IMAGE_HEIGHT/REALSENSE_TO_RT_DETR_RATIO)
+ }],
+ remappings=[('segmentation', 'rt_detr_segmentation')]
+ )
# Resize segmentation mask to ESS model image size so it can be used by FoundationPose
# FoundationPose requires depth, rgb image and segmentation mask to be of the same size
@@ -424,6 +431,7 @@ def generate_launch_description():
rtdetr_preprocessor_node,
tensor_rt_node,
rtdetr_decoder_node,
+ detection2_d_array_filter_node,
detection2_d_to_mask_node,
resize_mask_node,
selector_node,
diff --git a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_tracking_core.launch.py b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_tracking_core.launch.py
index 4ac9547..842eed9 100644
--- a/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_tracking_core.launch.py
+++ b/isaac_ros_foundationpose/launch/isaac_ros_foundationpose_tracking_core.launch.py
@@ -30,9 +30,7 @@
# RT-DETR models expect 3 image channels
RT_DETR_MODEL_NUM_CHANNELS = 3
-REFINE_MODEL_PATH = '/tmp/refine_model.onnx'
REFINE_ENGINE_PATH = '/tmp/refine_trt_engine.plan'
-SCORE_MODEL_PATH = '/tmp/score_model.onnx'
SCORE_ENGINE_PATH = '/tmp/score_trt_engine.plan'
@@ -44,12 +42,9 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
# FoundationPose parameters
mesh_file_path = LaunchConfiguration('mesh_file_path')
texture_path = LaunchConfiguration('texture_path')
- refine_model_file_path = LaunchConfiguration('refine_model_file_path')
refine_engine_file_path = LaunchConfiguration('refine_engine_file_path')
- score_model_file_path = LaunchConfiguration('score_model_file_path')
score_engine_file_path = LaunchConfiguration('score_engine_file_path')
# RT-DETR parameters
- rt_detr_model_file_path = LaunchConfiguration('rt_detr_model_file_path')
rt_detr_engine_file_path = LaunchConfiguration('rt_detr_engine_file_path')
input_width = interface_specs['camera_resolution']['width']
input_height = interface_specs['camera_resolution']['height']
@@ -158,7 +153,6 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
package='isaac_ros_tensor_rt',
plugin='nvidia::isaac_ros::dnn_inference::TensorRTNode',
parameters=[{
- 'model_file_path': rt_detr_model_file_path,
'engine_file_path': rt_detr_engine_file_path,
'output_binding_names': ['labels', 'boxes', 'scores'],
'output_tensor_names': ['labels', 'boxes', 'scores'],
@@ -179,15 +173,22 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
# Create a binary segmentation mask from a Detection2DArray published by RT-DETR.
# The segmentation mask is of size
# int(IMAGE_WIDTH/input_to_RT_DETR_ratio) x int(IMAGE_HEIGHT/input_to_RT_DETR_ratio)
+ 'detection2_d_array_filter_node': ComposableNode(
+ name='detection2_d_array_filter',
+ package='isaac_ros_foundationpose',
+ plugin='nvidia::isaac_ros::foundationpose::Detection2DArrayFilter',
+ remappings=[('detection2_d_array', 'detections_output')]
+ ),
'detection2_d_to_mask_node': ComposableNode(
name='detection2_d_to_mask',
package='isaac_ros_foundationpose',
plugin='nvidia::isaac_ros::foundationpose::Detection2DToMask',
parameters=[{
'mask_width': int(input_width/input_to_RT_DETR_ratio),
- 'mask_height': int(input_height/input_to_RT_DETR_ratio)}],
- remappings=[('detection2_d_array', 'detections_output'),
- ('segmentation', 'rt_detr_segmentation')]),
+ 'mask_height': int(input_height/input_to_RT_DETR_ratio)
+ }],
+ remappings=[('segmentation', 'rt_detr_segmentation')]
+ ),
# Resize segmentation mask to ESS model image size so it can be used by FoundationPose
# FoundationPose requires depth, rgb image and segmentation mask to be of the same size
@@ -239,14 +240,12 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
'mesh_file_path': mesh_file_path,
'texture_path': texture_path,
- 'refine_model_file_path': refine_model_file_path,
'refine_engine_file_path': refine_engine_file_path,
'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'],
'refine_input_binding_names': ['input1', 'input2'],
'refine_output_tensor_names': ['output_tensor1', 'output_tensor2'],
'refine_output_binding_names': ['output1', 'output2'],
- 'score_model_file_path': score_model_file_path,
'score_engine_file_path': score_engine_file_path,
'score_input_tensor_names': ['input_tensor1', 'input_tensor2'],
'score_input_binding_names': ['input1', 'input2'],
@@ -263,7 +262,6 @@ def get_composable_nodes(interface_specs: Dict[str, Any]) -> Dict[str, Composabl
'mesh_file_path': mesh_file_path,
'texture_path': texture_path,
- 'refine_model_file_path': refine_model_file_path,
'refine_engine_file_path': refine_engine_file_path,
'refine_input_tensor_names': ['input_tensor1', 'input_tensor2'],
'refine_input_binding_names': ['input1', 'input2'],
@@ -288,31 +286,16 @@ def get_launch_actions(interface_specs: Dict[str, Any]) -> \
default_value='',
description='The absolute file path to the texture map'),
- 'refine_model_file_path': DeclareLaunchArgument(
- 'refine_model_file_path',
- default_value=REFINE_MODEL_PATH,
- description='The absolute file path to the refine model'),
-
'refine_engine_file_path': DeclareLaunchArgument(
'refine_engine_file_path',
default_value=REFINE_ENGINE_PATH,
description='The absolute file path to the refine trt engine'),
- 'score_model_file_path': DeclareLaunchArgument(
- 'score_model_file_path',
- default_value=SCORE_MODEL_PATH,
- description='The absolute file path to the score model'),
-
'score_engine_file_path': DeclareLaunchArgument(
'score_engine_file_path',
default_value=SCORE_ENGINE_PATH,
description='The absolute file path to the score trt engine'),
- 'rt_detr_model_file_path': DeclareLaunchArgument(
- 'rt_detr_model_file_path',
- default_value='',
- description='The absolute file path to the RT-DETR ONNX file'),
-
'rt_detr_engine_file_path': DeclareLaunchArgument(
'rt_detr_engine_file_path',
default_value='',
diff --git a/isaac_ros_foundationpose/package.xml b/isaac_ros_foundationpose/package.xml
index 2bb192a..4fc4147 100644
--- a/isaac_ros_foundationpose/package.xml
+++ b/isaac_ros_foundationpose/package.xml
@@ -21,7 +21,7 @@
isaac_ros_foundationpose
- 3.1.0
+ 3.2.0
FoundationPose 6D pose estimation
Isaac ROS Maintainers
diff --git a/isaac_ros_foundationpose/rviz/foundationpose_hawk.rviz b/isaac_ros_foundationpose/rviz/foundationpose_hawk.rviz
index 06df3f7..1f8a8cd 100644
--- a/isaac_ros_foundationpose/rviz/foundationpose_hawk.rviz
+++ b/isaac_ros_foundationpose/rviz/foundationpose_hawk.rviz
@@ -114,7 +114,7 @@ Visualization Manager:
Enabled: true
Global Options:
Background Color: 48; 48; 48
- Fixed Frame: left_cam
+ Fixed Frame: stereo_camera_left_optical
Frame Rate: 30
Name: root
Tools:
diff --git a/isaac_ros_foundationpose/rviz/foundationpose_hawk_tracking.rviz b/isaac_ros_foundationpose/rviz/foundationpose_hawk_tracking.rviz
index 5ce51d5..94a37d4 100644
--- a/isaac_ros_foundationpose/rviz/foundationpose_hawk_tracking.rviz
+++ b/isaac_ros_foundationpose/rviz/foundationpose_hawk_tracking.rviz
@@ -85,7 +85,7 @@ Visualization Manager:
Enabled: true
Global Options:
Background Color: 48; 48; 48
- Fixed Frame: left_cam
+ Fixed Frame: stereo_camera_left_optical
Frame Rate: 30
Name: root
Tools:
diff --git a/isaac_ros_foundationpose/src/detection2_d_array_filter.cpp b/isaac_ros_foundationpose/src/detection2_d_array_filter.cpp
new file mode 100644
index 0000000..0a36128
--- /dev/null
+++ b/isaac_ros_foundationpose/src/detection2_d_array_filter.cpp
@@ -0,0 +1,86 @@
+// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include
+#include
+#include
+#include
+#include
+
+namespace nvidia
+{
+namespace isaac_ros
+{
+namespace foundationpose
+{
+
+/*
+ROS 2 node that selects a single object from a vision_msgs::msg::Detection2DArray
+based on desired class ID and confidence
+*/
+class Detection2DArrayFilter : public rclcpp::Node
+{
+public:
+ explicit Detection2DArrayFilter(const rclcpp::NodeOptions & options)
+ : Node("detection2_d_to_mask", options),
+ desired_class_id_(declare_parameter("desired_class_id", "")),
+ detection2_d_array_sub_{create_subscription(
+ "detection2_d_array", 10,
+ std::bind(&Detection2DArrayFilter::boundingBoxArrayCallback, this, std::placeholders::_1))},
+ detection2_d_pub_{create_publisher("detection2_d", 10)}
+ {}
+
+ void boundingBoxArrayCallback(const vision_msgs::msg::Detection2DArray::SharedPtr msg)
+ {
+ // Find the detection bounding box with the highest confidence
+ float max_confidence = 0;
+ vision_msgs::msg::Detection2D max_confidence_detection;
+ // Iterate through the detections and find the one with the highest confidence
+ for (const auto & detection : msg->detections) {
+ // Iterate through all the hypotheses for this detection
+ // and find the one with the highest confidence
+ for (const auto & result : detection.results) {
+ if (result.hypothesis.score > max_confidence && (desired_class_id_.empty() ||
+ desired_class_id_ == result.hypothesis.class_id))
+ {
+ max_confidence = result.hypothesis.score;
+ max_confidence_detection = detection;
+ }
+ }
+ }
+
+ // If no detection was found, return error
+ if (max_confidence == 0) {
+ RCLCPP_DEBUG(this->get_logger(), "No detection found with non-zero confidence");
+ return;
+ }
+
+ detection2_d_pub_->publish(max_confidence_detection);
+ }
+
+private:
+ std::string desired_class_id_;
+ rclcpp::Subscription::SharedPtr detection2_d_array_sub_;
+ rclcpp::Publisher::SharedPtr detection2_d_pub_;
+};
+
+} // namespace foundationpose
+} // namespace isaac_ros
+} // namespace nvidia
+
+// Register the component with the ROS system to create a shared library
+RCLCPP_COMPONENTS_REGISTER_NODE(nvidia::isaac_ros::foundationpose::Detection2DArrayFilter)
diff --git a/isaac_ros_foundationpose/src/detection2_d_to_mask.cpp b/isaac_ros_foundationpose/src/detection2_d_to_mask.cpp
index 2488864..8d8e072 100644
--- a/isaac_ros_foundationpose/src/detection2_d_to_mask.cpp
+++ b/isaac_ros_foundationpose/src/detection2_d_to_mask.cpp
@@ -16,6 +16,7 @@
// SPDX-License-Identifier: Apache-2.0
#include
+#include
#include
#include
#include
@@ -39,24 +40,14 @@ class Detection2DToMask : public rclcpp::Node
{
public:
explicit Detection2DToMask(const rclcpp::NodeOptions & options)
- : Node("detection2_d_to_mask", options)
+ : Node("detection2_d_to_mask", options),
+ mask_width_(declare_parameter("mask_width", 640)),
+ mask_height_(declare_parameter("mask_height", 480)),
+ image_pub_{create_publisher("segmentation", 10)},
+ detection2_d_sub_{create_subscription(
+ "detection2_d", rclcpp::QoS(rclcpp::SensorDataQoS()),
+ std::bind(&Detection2DToMask::boundingBoxCallback, this, std::placeholders::_1))}
{
- // Create a publisher for the mono8 image
- image_pub_ = this->create_publisher("segmentation", 10);
-
- this->declare_parameter("mask_width", 640);
- this->declare_parameter("mask_height", 480);
- this->get_parameter("mask_width", mask_width_);
- this->get_parameter("mask_height", mask_height_);
-
- detection2_d_array_sub_ = this->create_subscription(
- "detection2_d_array", 10,
- std::bind(&Detection2DToMask::boundingBoxArrayCallback, this, std::placeholders::_1));
- // Create subscriber for Detection2D
- detection2_d_sub_ = this->create_subscription(
- "detection2_d", 10,
- std::bind(&Detection2DToMask::boundingBoxCallback, this, std::placeholders::_1));
-
RCLCPP_INFO(this->get_logger(), "Mask Height: %d, Mask Width: %d", mask_height_, mask_width_);
}
@@ -83,56 +74,11 @@ class Detection2DToMask : public rclcpp::Node
image_pub_->publish(image_msg);
}
- void boundingBoxArrayCallback(const vision_msgs::msg::Detection2DArray::SharedPtr msg)
- {
- // Find the detection bounding box with the highest confidence
- float max_confidence = 0;
- vision_msgs::msg::Detection2D max_confidence_detection;
- // Iterate through the detections and find the one with the highest confidence
- for (auto detection : msg->detections) {
- // Iterate through all the hypotheses for this detection
- // and find the one with the highest confidence
- for (auto result : detection.results) {
- if (result.hypothesis.score > max_confidence) {
- max_confidence = result.hypothesis.score;
- max_confidence_detection = detection;
- }
- }
- }
-
- // If no detection was found, return error
- if (max_confidence == 0) {
- RCLCPP_INFO(this->get_logger(), "No detection found with non-zero confidence");
- return;
- }
-
- // Convert Detection2D to a binary mono8 image
- cv::Mat image = cv::Mat::zeros(mask_height_, mask_width_, CV_8UC1);
- // Draws a rectangle filled with 255
- cv::rectangle(
- image,
- cv::Point(
- max_confidence_detection.bbox.center.position.x - max_confidence_detection.bbox.size_x / 2,
- max_confidence_detection.bbox.center.position.y - max_confidence_detection.bbox.size_y / 2),
- cv::Point(
- max_confidence_detection.bbox.center.position.x + max_confidence_detection.bbox.size_x / 2,
- max_confidence_detection.bbox.center.position.y + max_confidence_detection.bbox.size_y / 2),
- cv::Scalar(255), -1);
-
- // Convert the OpenCV image to a ROS sensor_msgs::msg::Image and publish it
- std_msgs::msg::Header header(msg->header);
- cv_bridge::CvImage cv_image(header, "mono8", image);
- sensor_msgs::msg::Image image_msg;
- cv_image.toImageMsg(image_msg);
- image_pub_->publish(image_msg);
- }
-
private:
+ int mask_width_;
+ int mask_height_;
rclcpp::Publisher::SharedPtr image_pub_;
rclcpp::Subscription::SharedPtr detection2_d_sub_;
- rclcpp::Subscription::SharedPtr detection2_d_array_sub_;
- int mask_height_;
- int mask_width_;
};
} // namespace foundationpose
diff --git a/isaac_ros_foundationpose/src/foundationpose_node.cpp b/isaac_ros_foundationpose/src/foundationpose_node.cpp
index 5c580eb..45e587d 100644
--- a/isaac_ros_foundationpose/src/foundationpose_node.cpp
+++ b/isaac_ros_foundationpose/src/foundationpose_node.cpp
@@ -23,6 +23,7 @@
#include "rclcpp/rclcpp.hpp"
#include "tf2_ros/transform_broadcaster.h"
+#include "isaac_ros_common/qos.hpp"
#include "isaac_ros_nitros_camera_info_type/nitros_camera_info.hpp"
#include "isaac_ros_nitros_detection3_d_array_type/nitros_detection3_d_array.hpp"
#include "isaac_ros_nitros_image_type/nitros_image.hpp"
@@ -145,6 +146,9 @@ FoundationPoseNode::FoundationPoseNode(rclcpp::NodeOptions options)
min_depth_(declare_parameter("min_depth", 0.1)),
max_depth_(declare_parameter("max_depth", 4.0)),
refine_iterations_(declare_parameter("refine_iterations", 1)),
+ symmetry_planes_(
+ declare_parameter("symmetry_planes", StringList())),
+
refine_model_file_path_(
declare_parameter("refine_model_file_path", "/tmp/refine_model.onnx")),
refine_engine_file_path_(
@@ -176,6 +180,34 @@ FoundationPoseNode::FoundationPoseNode(rclcpp::NodeOptions options)
{
RCLCPP_DEBUG(get_logger(), "[FoundationPoseNode] Constructor");
+ // This function sets the QoS parameter for publishers and subscribers setup by this NITROS node
+ rclcpp::QoS depth_qos_ = ::isaac_ros::common::AddQosParameter(
+ *this, "DEFAULT",
+ "depth_qos");
+ rclcpp::QoS color_qos_ = ::isaac_ros::common::AddQosParameter(
+ *this, "DEFAULT",
+ "color_qos");
+ rclcpp::QoS color_info_qos_ = ::isaac_ros::common::AddQosParameter(
+ *this, "DEFAULT",
+ "color_info_qos");
+ rclcpp::QoS segmentation_qos_ = ::isaac_ros::common::AddQosParameter(
+ *this, "DEFAULT",
+ "segmentation_qos");
+ for (auto & config : config_map_) {
+ if (config.second.topic_name == INPUT_DEPTH_TOPIC_NAME) {
+ config.second.qos = depth_qos_;
+ }
+ if (config.second.topic_name == INPUT_RGB_IMAGE_TOPIC_NAME) {
+ config.second.qos = color_qos_;
+ }
+ if (config.second.topic_name == INPUT_CAMERA_INFO_TOPIC_NAME) {
+ config.second.qos = color_info_qos_;
+ }
+ if (config.second.topic_name == INPUT_SEGMENTATION_TOPIC_NAME) {
+ config.second.qos = segmentation_qos_;
+ }
+ }
+
// Add callback function for FoundationPose Detection3D array to broadcast to ROS TF tree
config_map_[OUTPUT_POSE_COMPONENT_KEY].callback = std::bind(
&FoundationPoseNode::FoundationPoseDetectionCallback, this, std::placeholders::_1,
@@ -363,6 +395,7 @@ void FoundationPoseNode::postLoadGraphCallback()
"refine_inference", "nvidia::gxf::TensorRtInference", "output_binding_names",
refine_output_binding_names_);
+
// Set the score network TensorRT configs from parameter
getNitrosContext().setParameterStr(
"score_inference", "nvidia::gxf::TensorRtInference", "model_file_path",
@@ -387,6 +420,24 @@ void FoundationPoseNode::postLoadGraphCallback()
getNitrosContext().setParameter1DStrVector(
"score_inference", "nvidia::gxf::TensorRtInference", "output_binding_names",
score_output_binding_names_);
+
+ if (symmetry_planes_.size() > 0) {
+ getNitrosContext().setParameter1DStrVector(
+ "sampling", "nvidia::isaac_ros::FoundationposeSampling", "symmetry_planes",
+ symmetry_planes_);
+
+ // Number of symmetry poses that could treat as the same pose
+ auto symmetry_poses = std::min(static_cast(std::pow(2, symmetry_planes_.size())), 4);
+ int refine_max_batch_size = 252 / 6 / symmetry_poses;
+ getNitrosContext().setParameterInt32(
+ "refine_inference", "nvidia::gxf::TensorRtInference", "max_batch_size",
+ refine_max_batch_size);
+
+ int score_max_batch_size = refine_max_batch_size * 6;
+ getNitrosContext().setParameterInt32(
+ "score_inference", "nvidia::gxf::TensorRtInference", "max_batch_size",
+ score_max_batch_size);
+ }
}
void FoundationPoseNode::FoundationPoseDetectionCallback(
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_centerpose/CMakeLists.txt b/isaac_ros_gxf_extensions/gxf_isaac_centerpose/CMakeLists.txt
index 0624f2a..7efd20c 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_centerpose/CMakeLists.txt
+++ b/isaac_ros_gxf_extensions/gxf_isaac_centerpose/CMakeLists.txt
@@ -75,4 +75,10 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
# Install the binary file
install(TARGETS ${PROJECT_NAME} DESTINATION share/${PROJECT_NAME}/gxf/lib)
+
+# Embed versioning information into installed files
+ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common)
+include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake")
+generate_version_info(${PROJECT_NAME})
+
ament_auto_package(INSTALL_TO_SHARE)
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_centerpose/package.xml b/isaac_ros_gxf_extensions/gxf_isaac_centerpose/package.xml
index 23c1e4a..bf88d50 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_centerpose/package.xml
+++ b/isaac_ros_gxf_extensions/gxf_isaac_centerpose/package.xml
@@ -21,7 +21,7 @@ SPDX-License-Identifier: Apache-2.0
gxf_isaac_centerpose
- 3.1.0
+ 3.2.0
GXF extension containing CenterPose pose estimation related components.
Isaac ROS Maintainers
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/CMakeLists.txt b/isaac_ros_gxf_extensions/gxf_isaac_dope/CMakeLists.txt
index ed536ab..5bc4c5b 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_dope/CMakeLists.txt
+++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/CMakeLists.txt
@@ -59,4 +59,10 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
# Install the binary file
install(TARGETS ${PROJECT_NAME} DESTINATION share/${PROJECT_NAME}/gxf/lib)
+
+# Embed versioning information into installed files
+ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common)
+include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake")
+generate_version_info(${PROJECT_NAME})
+
ament_auto_package(INSTALL_TO_SHARE)
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.cpp b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.cpp
index 66f635b..f12d6e7 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.cpp
+++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.cpp
@@ -17,23 +17,27 @@
#include "dope_decoder.hpp"
+#include
#include
-#include
+#include
+#include
+#include
+#include
+#include "detection3_d_array_message/detection3_d_array_message.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/core/eigen.hpp"
#include "opencv2/core/mat.hpp"
#include "opencv2/imgproc.hpp"
-#include
-
+#include "gxf/core/parameter_parser_std.hpp"
#include "gxf/multimedia/camera.hpp"
#include "gxf/multimedia/video.hpp"
-#include "gxf/core/parameter_parser_std.hpp"
#include "gxf/std/tensor.hpp"
#include "gxf/std/timestamp.hpp"
+
namespace nvidia {
namespace isaac_ros {
namespace dope {
@@ -55,23 +59,24 @@ constexpr size_t kNumVertexChannel = kNumCorners + 1;
constexpr float kGaussianSigma = 3.0;
// Minimum acceptable sum of averaging weights
constexpr float kMinimumWeightSum = 1e-6;
-constexpr float kMapPeakThreshhold = 0.01;
-constexpr float kPeakWeightThreshhold = 0.1;
-constexpr float kAffinityMapAngleThreshhold = 0.5;
// Offset added to belief map pixel coordinate, constant for the fixed input
// image size
// https://github.com/NVlabs/Deep_Object_Pose/blob/master/src/dope/inference/detector.py
// line 343
constexpr float kOffsetDueToUpsampling = 0.4395f;
+// Minimum required blurred belief map value at the peaks
+constexpr float kBlurredPeakThreshold = 0.01;
// The original image is kImageToMapScale larger in each dimension than the
// output tensor from the DNN.
constexpr float kImageToMapScale = 8.0f;
-// From the requirements listed for pnp::ComputeCameraPoseEpnp.
-constexpr size_t kRequiredPointsForPnP = 6;
+// Require all 9 vertices to publish a pose
+constexpr size_t kRequiredPointsForPnP = 9;
// Placeholder for unidentify peak ids in DopeObject
constexpr int kInvalidId = -1;
// Placeholder for unknown best distance from centroid to peak in DopeObject
constexpr float kInvalidDist = std::numeric_limits::max();
+// For converting sizes in cm to m in ExtractPose and when publishing bbox sizes.
+constexpr double kCentimeterToMeter = 100.0;
// The list of keypoint indices (0-8, 8 is centroid) and their corresponding 2d
// pixel coordinates as columns in a matrix.
@@ -93,7 +98,7 @@ struct DopeObject {
};
struct Pose3d {
-public:
+ public:
Eigen::Vector3d translation;
Eigen::Quaterniond rotation;
@@ -106,7 +111,7 @@ struct Pose3d {
};
// Returns pixel mask for local maximums in single - channel image src
-void IsolateMaxima(const cv::Mat &src, cv::Mat &mask) {
+void IsolateMaxima(const cv::Mat& src, cv::Mat& mask) {
// Find pixels that are equal to the local neighborhood maxima
cv::dilate(src, mask, cv::Mat());
cv::compare(src, mask, mask, cv::CMP_GE);
@@ -119,8 +124,7 @@ void IsolateMaxima(const cv::Mat &src, cv::Mat &mask) {
}
// Returns pixel coordinate (row, col) of maxima in single-channel image
-std::vector FindPeaks(const cv::Mat &image,
- const float threshold) {
+std::vector FindPeaks(const cv::Mat& image) {
// Extract centers of local maxima
cv::Mat mask;
std::vector maxima;
@@ -130,7 +134,7 @@ std::vector FindPeaks(const cv::Mat &image,
// Find maxima
std::vector peaks;
for (const auto &m : maxima) {
- if (image.at(m.y, m.x) > threshold) {
+ if (image.at(m.y, m.x) > kBlurredPeakThreshold) {
peaks.push_back(Eigen::Vector2i(m.x, m.y));
}
}
@@ -140,13 +144,13 @@ std::vector FindPeaks(const cv::Mat &image,
// Returns 3x9 matrix of the 3d coordinates of cuboid corners and center
Eigen::Matrix
-CuboidVertices(const std::array &ext) {
+CuboidVertices(const std::array& ext) {
// X axis points to the right
- const double right = ext.at(0) * 0.5;
- const double left = -ext.at(0) * 0.5;
+ const double right = -ext.at(0) * 0.5;
+ const double left = ext.at(0) * 0.5;
// Y axis points downward
- const double bottom = ext.at(1) * 0.5;
- const double top = -ext.at(1) * 0.5;
+ const double bottom = -ext.at(1) * 0.5;
+ const double top = ext.at(1) * 0.5;
// Z axis points forward (away from camera)
const double front = ext.at(2) * 0.5;
const double rear = -ext.at(2) * 0.5;
@@ -160,7 +164,8 @@ CuboidVertices(const std::array &ext) {
}
std::vector
-FindObjects(const std::array &maps) {
+FindObjects(const std::array& maps, const double map_peak_threshold,
+ const double affinity_map_angle_threshold) {
using Vector2f = Eigen::Vector2f;
using Vector2i = Eigen::Vector2i;
@@ -178,18 +183,18 @@ FindObjects(const std::array &maps) {
kGaussianSigma, cv::BORDER_REFLECT);
// Find the maxima of the tensor values in this channel
- std::vector peaks = FindPeaks(blurred, kMapPeakThreshhold);
+ std::vector peaks = FindPeaks(blurred);
for (size_t pp = 0; pp < peaks.size(); ++pp) {
const auto peak = peaks[pp];
- // Compute the weighted average for localizing the peak, using a 5x5
+ // Compute the weighted average for localizing the peak, using an 11x11
// window
Vector2f peak_sum(0, 0);
float weight_sum = 0.0f;
- for (int ii = -2; ii <= 2; ++ii) {
- for (int jj = -2; jj <= 2; ++jj) {
- const int row = peak[0] + ii;
- const int col = peak[1] + jj;
+ for (int ii = -5; ii <= 5; ++ii) {
+ for (int jj = -5; jj <= 5; ++jj) {
+ const int row = peak[1] + ii;
+ const int col = peak[0] + jj;
if (col < 0 || col >= image.size[1] || row < 0 ||
row >= image.size[0]) {
@@ -198,12 +203,12 @@ FindObjects(const std::array &maps) {
const float weight = image.at(row, col);
weight_sum += weight;
- peak_sum[0] += row * weight;
- peak_sum[1] += col * weight;
+ peak_sum[1] += row * weight;
+ peak_sum[0] += col * weight;
}
}
- if (image.at(peak[1], peak[0]) >= kMapPeakThreshhold) {
+ if (image.at(peak[1], peak[0]) >= map_peak_threshold) {
channel_peaks[chan].push_back(static_cast(all_peaks.size()));
if (std::fabs(weight_sum) < kMinimumWeightSum) {
all_peaks.push_back({peak[0] + kOffsetDueToUpsampling,
@@ -227,15 +232,15 @@ FindObjects(const std::array &maps) {
// Use 16 affinity field tensors (2 for each corner to centroid) to identify
// corner-centroid associated for each corner peak
for (size_t chan = 0; chan < kNumVertexChannel - 1; ++chan) {
- const std::vector &peaks = channel_peaks[chan];
+ const std::vector& peaks = channel_peaks[chan];
for (size_t pp = 0; pp < peaks.size(); ++pp) {
int best_idx = kInvalidId;
float best_distance = kInvalidDist;
float best_angle = kInvalidDist;
for (size_t jj = 0; jj < objects.size(); ++jj) {
- const Vector2f ¢er = all_peaks[objects[jj].center];
- const Vector2f &point = all_peaks[peaks[pp]];
+ const Vector2f& center = all_peaks[objects[jj].center];
+ const Vector2f& point = all_peaks[peaks[pp]];
const Vector2i point_int(static_cast(point[0]),
static_cast(point[1]));
@@ -250,7 +255,7 @@ FindObjects(const std::array &maps) {
const float angle = (v_center - v_aff).norm();
const float dist = (point - center).norm();
- if (angle < kAffinityMapAngleThreshhold && dist < best_distance) {
+ if (angle < affinity_map_angle_threshold && dist < best_distance) {
best_idx = jj;
best_distance = dist;
best_angle = angle;
@@ -262,7 +267,7 @@ FindObjects(const std::array &maps) {
}
if (objects[best_idx].corners[chan] == kInvalidId ||
- (best_angle < kAffinityMapAngleThreshhold &&
+ (best_angle < affinity_map_angle_threshold &&
best_distance < objects[best_idx].best_distances[chan])) {
objects[best_idx].corners[chan] = peaks[pp];
objects[best_idx].best_distances[chan] = best_distance;
@@ -303,10 +308,11 @@ FindObjects(const std::array &maps) {
}
gxf::Expected>
-ExtractPose(const DopeObjectKeypoints &object,
- const Eigen::Matrix &cuboid_3d_points,
- const cv::Mat &camera_matrix) {
- const auto &valid_points = object.first;
+ExtractPose(const DopeObjectKeypoints& object,
+ const Eigen::Matrix& cuboid_3d_points,
+ const cv::Mat& camera_matrix, const double rotation_y_axis,
+ const double rotation_x_axis, const double rotation_z_axis) {
+ const auto& valid_points = object.first;
const size_t num_valid_points = valid_points.size();
Eigen::Matrix3Xd keypoints_3d(3, num_valid_points);
for (size_t j = 0; j < num_valid_points; ++j) {
@@ -315,24 +321,40 @@ ExtractPose(const DopeObjectKeypoints &object,
Pose3d pose;
cv::Mat rvec, tvec;
- cv::Mat dist_coeffs = cv::Mat::zeros(1, 4, CV_64FC1); // no distortion
+ cv::Mat dist_coeffs = cv::Mat::zeros(1, 4, CV_64FC1); // no distortion
cv::Mat cv_keypoints_3d;
cv::eigen2cv(keypoints_3d, cv_keypoints_3d);
cv::Mat cv_keypoints_2d;
cv::eigen2cv(object.second, cv_keypoints_2d);
if (!cv::solvePnP(cv_keypoints_3d.t(), cv_keypoints_2d.t(), camera_matrix,
- dist_coeffs, rvec, tvec)) {
+ dist_coeffs, rvec, tvec, false, cv::SOLVEPNP_EPNP)) {
GXF_LOG_ERROR("cv::solvePnP failed");
return nvidia::gxf::Unexpected{GXF_FAILURE};
}
cv::cv2eigen(tvec, pose.translation);
cv::Mat R;
- cv::Rodrigues(rvec, R); // R is 3x3
+ cv::Rodrigues(rvec, R); // R is 3x3
Eigen::Matrix3d e_mat;
cv::cv2eigen(R, e_mat);
- pose.rotation = Eigen::Quaterniond(e_mat);
+ Eigen::AngleAxisd rotation_y(rotation_y_axis, Eigen::Vector3d::UnitY());
+ Eigen::AngleAxisd rotation_x(rotation_x_axis, Eigen::Vector3d::UnitX());
+ Eigen::AngleAxisd rotation_z(rotation_z_axis, Eigen::Vector3d::UnitZ());
+
+ // Convert to rotation matrices
+ Eigen::Matrix3d rotation_y_matrix = rotation_y.toRotationMatrix();
+ Eigen::Matrix3d rotation_x_matrix = rotation_x.toRotationMatrix();
+ Eigen::Matrix3d rotation_z_matrix = rotation_z.toRotationMatrix();
+
+ // Compose the rotations
+ Eigen::Matrix3d composed_rotation = rotation_z_matrix * rotation_y_matrix * rotation_x_matrix;
+
+ // Apply the composed rotation to the original matrix
+ Eigen::Matrix3d rotated_matrix = e_mat * composed_rotation;
+
+ pose.rotation = Eigen::Quaterniond(rotated_matrix);
+
// If the Z coordinate is negative, the pose is placing the object behind
// the camera (which is incorrect), so we flip it
@@ -340,7 +362,6 @@ ExtractPose(const DopeObjectKeypoints &object,
pose = pose.inverse();
}
- constexpr double kCentimeterToMeter = 100.0;
// Return pose data as array
return std::array{
pose.translation[0] / kCentimeterToMeter,
@@ -352,45 +373,58 @@ ExtractPose(const DopeObjectKeypoints &object,
pose.rotation.w()};
}
-gxf::Expected AddInputTimestampToOutput(gxf::Entity &output,
- gxf::Entity input) {
- std::string timestamp_name{"timestamp"};
- auto maybe_timestamp = input.get(timestamp_name.c_str());
+gxf::Expected AddInputTimestampToOutput(gxf::Entity& output, gxf::Entity input) {
+ std::string named_timestamp{"timestamp"};
+ std::string unnamed_timestamp{""};
+ auto maybe_input_timestamp = input.get(named_timestamp.c_str());
- // Default to unnamed
- if (!maybe_timestamp) {
- timestamp_name = std::string{""};
- maybe_timestamp = input.get(timestamp_name.c_str());
+ // Try to get a named timestamp from the input entity
+ if (!maybe_input_timestamp) {
+ maybe_input_timestamp = input.get(unnamed_timestamp.c_str());
}
-
- if (!maybe_timestamp) {
+ // If there is no named timestamp, try to get a unnamed timestamp from the input entity
+ if (!maybe_input_timestamp) {
GXF_LOG_ERROR("Failed to get input timestamp!");
- return gxf::ForwardError(maybe_timestamp);
+ return gxf::ForwardError(maybe_input_timestamp);
}
- auto maybe_out_timestamp = output.add(timestamp_name.c_str());
- if (!maybe_out_timestamp) {
- GXF_LOG_ERROR("Failed to add timestamp to output message!");
- return gxf::ForwardError(maybe_out_timestamp);
+ // Try to get a named timestamp from the output entity
+ auto maybe_output_timestamp = output.get(named_timestamp.c_str());
+ // If there is no named timestamp, try to get a unnamed timestamp from the output entity
+ if (!maybe_output_timestamp) {
+ maybe_output_timestamp = output.get(unnamed_timestamp.c_str());
}
- *maybe_out_timestamp.value() = *maybe_timestamp.value();
+ // If there is no unnamed timestamp also, then add a named timestamp to the output entity
+ if (!maybe_output_timestamp) {
+ maybe_output_timestamp = output.add(named_timestamp.c_str());
+ if (!maybe_output_timestamp) {
+ GXF_LOG_ERROR("Failed to add timestamp to output message!");
+ return gxf::ForwardError(maybe_output_timestamp);
+ }
+ }
+
+ *maybe_output_timestamp.value() = *maybe_input_timestamp.value();
return gxf::Success;
}
-} // namespace
+} // namespace
gxf_result_t
-DopeDecoder::registerInterface(gxf::Registrar *registrar) noexcept {
+DopeDecoder::registerInterface(gxf::Registrar* registrar) noexcept {
gxf::Expected result;
result &= registrar->parameter(tensorlist_receiver_, "tensorlist_receiver",
"Tensorlist Input",
"The detections as a tensorlist");
- result &= registrar->parameter(posearray_transmitter_,
- "posearray_transmitter", "PoseArray output",
- "The ouput poses as a pose array");
+ result &= registrar->parameter(camera_model_input_, "camera_model_input",
+ "Camera Model Input",
+ "The Camera intrinsics as a Nitros Camera Info type");
+
+ result &= registrar->parameter(detection3darray_transmitter_,
+ "detection3darray_transmitter", "Detection3DArray output",
+ "The ouput poses as a Detection3D array");
result &= registrar->parameter(allocator_, "allocator", "Allocator",
"Output Allocator");
@@ -399,14 +433,30 @@ DopeDecoder::registerInterface(gxf::Registrar *registrar) noexcept {
object_dimensions_param_, "object_dimensions",
"The dimensions of the object whose pose is being estimated");
- result &= registrar->parameter(
- camera_matrix_param_, "camera_matrix",
- "The parameters of the camera used to capture the input images");
-
result &= registrar->parameter(
object_name_, "object_name",
"The class name of the object whose pose is being estimated");
+ result &= registrar->parameter(
+ map_peak_threshold_, "map_peak_threshold",
+ "The minimum value of a peak in a belief map");
+
+ result &= registrar->parameter(
+ affinity_map_angle_threshold_, "affinity_map_angle_threshold",
+ "The maximum angle threshold for affinity mapping of corners to centroid");
+
+ result &= registrar->parameter(
+ rotation_y_axis_, "rotation_y_axis", "rotation_y_axis",
+ "Rotate Dope pose by N degrees along y axis", 0.0);
+
+ result &= registrar->parameter(
+ rotation_x_axis_, "rotation_x_axis", "rotation_x_axis",
+ "Rotate Dope pose by N degrees along x axis", 0.0);
+
+ result &= registrar->parameter(
+ rotation_z_axis_, "rotation_z_axis", "rotation_z_axis",
+ "Rotate Dope pose by N degrees along z axis", 0.0);
+
return gxf::ToResultCode(result);
}
@@ -414,25 +464,30 @@ gxf_result_t DopeDecoder::start() noexcept {
// Extract 3D coordinates of bounding cuboid + centroid from object dimensions
auto dims = object_dimensions_param_.get();
if (dims.size() != 3) {
- GXF_LOG_ERROR("Expected object dimensions vector to be length 3 but got %lu",
- dims.size());
return GXF_FAILURE;
}
cuboid_3d_points_ = CuboidVertices({dims.at(0), dims.at(1), dims.at(2)});
- // Load camera matrix into cv::Mat
- if (camera_matrix_param_.get().size() != 9) {
- GXF_LOG_ERROR("Expected camera matrix vector to be length 9 but got %lu",
- camera_matrix_param_.get().size());
- return GXF_FAILURE;
- }
camera_matrix_ = cv::Mat::zeros(3, 3, CV_64FC1);
- std::memcpy(camera_matrix_.data, camera_matrix_param_.get().data(),
- camera_matrix_param_.get().size() * sizeof(double));
return GXF_SUCCESS;
}
+gxf::Expected DopeDecoder::updateCameraProperties(
+ gxf::Handle camera_model) {
+ if (!camera_model) {
+ return gxf::Unexpected{GXF_FAILURE};
+ }
+
+ camera_matrix_.at(0, 0) = camera_model->focal_length.x;
+ camera_matrix_.at(0, 2)= camera_model->principal_point.x;
+ camera_matrix_.at(1, 1) = camera_model->focal_length.y;
+ camera_matrix_.at(1, 2) = camera_model->principal_point.y;
+ camera_matrix_.at(2, 2) = 1.0;
+
+ return gxf::Success;
+}
+
gxf_result_t DopeDecoder::tick() noexcept {
const auto maybe_beliefmaps_message = tensorlist_receiver_->receive();
if (!maybe_beliefmaps_message) {
@@ -448,22 +503,30 @@ gxf_result_t DopeDecoder::tick() noexcept {
// Ensure belief maps match expected shape in first two dimensions
if (belief_maps->shape().dimension(0) != kNumTensors ||
- belief_maps->shape().dimension(1) != kInputMapsChannels)
- {
+ belief_maps->shape().dimension(1) != kInputMapsChannels) {
GXF_LOG_ERROR(
"Belief maps had unexpected shape in first two dimensions: {%d, %d, %d, %d}",
belief_maps->shape().dimension(0), belief_maps->shape().dimension(1),
belief_maps->shape().dimension(2), belief_maps->shape().dimension(3));
return GXF_FAILURE;
}
+ auto maybe_camera_model_entity = camera_model_input_->receive();
+ if (!maybe_camera_model_entity) {
+ GXF_LOG_ERROR("Failed to receive input camera model entity!");
+ return gxf::ToResultCode(maybe_camera_model_entity);
+ }
+
+ auto maybe_camera_model = maybe_camera_model_entity.value().get("intrinsics");
+ if (!maybe_camera_model) {
+ GXF_LOG_ERROR("Failed to receive input camera model!");
+ return gxf::ToResultCode(maybe_camera_model);
+ }
- // Allocate output message
- auto maybe_posearray_message = gxf::Entity::New(context());
- if (!maybe_posearray_message) {
- GXF_LOG_ERROR("Failed to allocate PoseArray Message");
- return gxf::ToResultCode(maybe_posearray_message);
+ auto maybe_updated_properties = updateCameraProperties(maybe_camera_model.value());
+ if (!maybe_updated_properties) {
+ GXF_LOG_ERROR("Failed to update camera properties!");
+ return gxf::ToResultCode(maybe_updated_properties);
}
- auto posearray_message = maybe_posearray_message.value();
// Copy tensor data over to a more portable form
std::array maps;
@@ -487,67 +550,63 @@ gxf_result_t DopeDecoder::tick() noexcept {
}
// Analyze the belief map to find vertex locations in image space
- const std::vector dope_objects = FindObjects(maps);
-
- // Add timestamp to output msg
- auto maybe_added_timestamp = AddInputTimestampToOutput(
- posearray_message, maybe_beliefmaps_message.value());
+ const std::vector dope_objects =
+ FindObjects(maps, map_peak_threshold_, affinity_map_angle_threshold_);
+
+ // Create Detection3DList Message
+ auto maybe_detection3_d_list = nvidia::isaac::CreateDetection3DListMessage(
+ context(), dope_objects.size());
+ if (!maybe_detection3_d_list) {
+ GXF_LOG_ERROR("[DopeDecoder] Failed to create detection3d list");
+ return gxf::ToResultCode(maybe_detection3_d_list);
+ }
+ auto detection3_d_list = maybe_detection3_d_list.value();
+
+ auto maybe_added_timestamp =
+ AddInputTimestampToOutput(maybe_detection3_d_list->entity, maybe_beliefmaps_message.value());
if (!maybe_added_timestamp) {
- return gxf::ToResultCode(maybe_added_timestamp);
+ GXF_LOG_ERROR("[DopeDecoder] Failed to add timestamp");
}
if (dope_objects.empty()) {
GXF_LOG_INFO("No objects detected.");
-
return gxf::ToResultCode(
- posearray_transmitter_->publish(std::move(posearray_message)));
+ detection3darray_transmitter_->publish(std::move(detection3_d_list.entity)));
}
// Run Perspective-N-Point on the detected objects to find the 6-DoF pose of
// the bounding cuboid
- for (const DopeObjectKeypoints &object : dope_objects) {
- // Allocate output tensor for this pose
- auto maybe_pose_tensor = posearray_message.add();
- if (!maybe_pose_tensor) {
- GXF_LOG_ERROR("Failed to allocate Pose Tensor");
- return gxf::ToResultCode(maybe_pose_tensor);
- }
- auto pose_tensor = maybe_pose_tensor.value();
-
- // Initializing GXF tensor
- auto result = pose_tensor->reshape(
- nvidia::gxf::Shape{kExpectedPoseAsTensorSize},
- nvidia::gxf::MemoryStorageType::kDevice, allocator_);
- if (!result) {
- GXF_LOG_ERROR("Failed to reshape Pose Tensor to (%d,)",
- kExpectedPoseAsTensorSize);
- return gxf::ToResultCode(result);
- }
-
- auto maybe_pose = ExtractPose(object, cuboid_3d_points_, camera_matrix_);
+ for (size_t i = 0; i < dope_objects.size(); i++) {
+ const DopeObjectKeypoints& object = dope_objects[i];
+ auto maybe_pose = ExtractPose(object, cuboid_3d_points_, camera_matrix_,
+ rotation_y_axis_.get(), rotation_x_axis_.get(),
+ rotation_z_axis_.get());
if (!maybe_pose) {
GXF_LOG_ERROR("Failed to extract pose from object");
return gxf::ToResultCode(maybe_pose);
}
auto pose = maybe_pose.value();
- // Copy pose data into pose tensor
- const cudaMemcpyKind operation = cudaMemcpyHostToDevice;
- const cudaError_t cuda_error = cudaMemcpy(
- pose_tensor->pointer(), pose.data(), pose_tensor->size(), operation);
-
- if (cuda_error != cudaSuccess) {
- GXF_LOG_ERROR("Failed to copy data to pose tensor: %s (%s)",
- cudaGetErrorName(cuda_error),
- cudaGetErrorString(cuda_error));
- return GXF_FAILURE;
- }
+ // Rotation: w = pose[6], x = pose[3], y = pose[4], z = pose[5]
+ nvidia::isaac::Pose3d pose3d{
+ nvidia::isaac::SO3d::FromQuaternion({pose[6], pose[3], pose[4], pose[5]}), // rotation
+ nvidia::isaac::Vector3d(pose[0], pose[1], pose[2]) // translation
+ };
+
+ auto bbox_dims = object_dimensions_param_.get();
+ auto bbox_x = bbox_dims.at(0) / kCentimeterToMeter;
+ auto bbox_y = bbox_dims.at(1) / kCentimeterToMeter;
+ auto bbox_z = bbox_dims.at(2) / kCentimeterToMeter;
+
+ **detection3_d_list.poses[i] = pose3d;
+ **detection3_d_list.bbox_sizes[i] = nvidia::isaac::Vector3f(bbox_x, bbox_y, bbox_z);
+ **detection3_d_list.hypothesis[i] = nvidia::isaac::ObjectHypothesis{
+ std::vector{0.0}, std::vector{object_name_}};
}
-
return gxf::ToResultCode(
- posearray_transmitter_->publish(std::move(posearray_message)));
+ detection3darray_transmitter_->publish(std::move(detection3_d_list.entity)));
}
-} // namespace dope
-} // namespace isaac_ros
-} // namespace nvidia
+} // namespace dope
+} // namespace isaac_ros
+} // namespace nvidia
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.hpp b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.hpp
index 871b52d..e0a9cf4 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.hpp
+++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_decoder.hpp
@@ -18,18 +18,23 @@
#ifndef NVIDIA_ISAAC_ROS_EXTENSIONS_DOPE_DECODER_HPP_
#define NVIDIA_ISAAC_ROS_EXTENSIONS_DOPE_DECODER_HPP_
-#include "opencv2/core/mat.hpp"
#include
+#include
+#include
+
#include "gxf/core/entity.hpp"
#include "gxf/core/gxf.h"
#include "gxf/core/parameter.hpp"
+#include "gxf/core/parameter_parser_std.hpp"
+#include "gxf/multimedia/camera.hpp"
#include "gxf/std/codelet.hpp"
#include "gxf/std/memory_buffer.hpp"
-#include "gxf/core/parameter_parser_std.hpp"
#include "gxf/std/receiver.hpp"
#include "gxf/std/transmitter.hpp"
+#include "opencv2/core/mat.hpp"
+
namespace nvidia {
namespace isaac_ros {
namespace dope {
@@ -37,29 +42,40 @@ namespace dope {
// GXF codelet that decodes object poses from an input tensor and produces an
// output tensor representing a pose array
class DopeDecoder : public gxf::Codelet {
-public:
+ public:
gxf_result_t start() noexcept override;
gxf_result_t tick() noexcept override;
gxf_result_t stop() noexcept override { return GXF_SUCCESS; }
- gxf_result_t registerInterface(gxf::Registrar *registrar) noexcept override;
+ gxf_result_t registerInterface(gxf::Registrar* registrar) noexcept override;
-private:
+ private:
+ gxf::Expected updateCameraProperties(gxf::Handle camera_model);
gxf::Parameter> tensorlist_receiver_;
- gxf::Parameter> posearray_transmitter_;
+ gxf::Parameter> camera_model_input_;
+ gxf::Parameter> detection3darray_transmitter_;
gxf::Parameter> allocator_;
// Parameters
gxf::Parameter> object_dimensions_param_;
gxf::Parameter> camera_matrix_param_;
gxf::Parameter object_name_;
+ gxf::Parameter map_peak_threshold_;
+ gxf::Parameter affinity_map_angle_threshold_;
+ // These params can be used to specify a rotation of the pose output by the network, it is useful
+ // when one would like to align detections between Foundation pose and Dope. For example, for the
+ // soup can asset, the rotation would need to done along the y axis by 90 degrees. ALl rotation
+ // values here are in degrees. The rotation is performed in a ZYX sequence.
+ gxf::Parameter rotation_y_axis_;
+ gxf::Parameter rotation_x_axis_;
+ gxf::Parameter rotation_z_axis_;
// Parsed parameters
Eigen::Matrix cuboid_3d_points_;
cv::Mat camera_matrix_;
};
-} // namespace dope
-} // namespace isaac_ros
-} // namespace nvidia
+} // namespace dope
+} // namespace isaac_ros
+} // namespace nvidia
-#endif // NVIDIA_ISAAC_ROS_EXTENSIONS_DOPE_DECODER_HPP_
+#endif // NVIDIA_ISAAC_ROS_EXTENSIONS_DOPE_DECODER_HPP_
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_ext.cpp b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_ext.cpp
index 7cab5df..e3ccb6a 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_ext.cpp
+++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/gxf/dope/dope_ext.cpp
@@ -14,8 +14,10 @@
// limitations under the License.
//
// SPDX-License-Identifier: Apache-2.0
+
#include
+#include "detection3_d_array_message.hpp"
#include "gxf/core/gxf.h"
#include "gxf/std/extension_factory_helper.hpp"
@@ -33,6 +35,13 @@ GXF_EXT_FACTORY_ADD(0x9bd4f8c4182a11ed, 0x861d0242ac120002,
nvidia::isaac_ros::dope::DopeDecoder, nvidia::gxf::Codelet,
"Codelet to decode DOPE output.");
+GXF_EXT_FACTORY_ADD_0(0x2fd70869462e45f0, 0xefd09edc396d49bb,
+ nvidia::isaac::Vector3f, "3 Dimensional Vector");
+
+GXF_EXT_FACTORY_ADD_0(0x84a3c15ccecb43a6, 0xed1fe7ab06a448ee,
+ nvidia::isaac::ObjectHypothesis,
+ "Score and class id of detected object");
+
GXF_EXT_FACTORY_END()
-} // extern "C"
+} // extern "C"
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_dope/package.xml b/isaac_ros_gxf_extensions/gxf_isaac_dope/package.xml
index 32753df..0744764 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_dope/package.xml
+++ b/isaac_ros_gxf_extensions/gxf_isaac_dope/package.xml
@@ -21,7 +21,7 @@ SPDX-License-Identifier: Apache-2.0
gxf_isaac_dope
- 3.1.0
+ 3.2.0
GXF extension containing CenterPose pose estimation related components.
Isaac ROS Maintainers
@@ -33,6 +33,7 @@ SPDX-License-Identifier: Apache-2.0
isaac_ros_common
isaac_ros_gxf
+ isaac_ros_nitros_detection3_d_array_type
gxf_isaac_gems
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/CMakeLists.txt b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/CMakeLists.txt
index c0195fc..4bab809 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/CMakeLists.txt
+++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/CMakeLists.txt
@@ -87,4 +87,10 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
# Install the binary file
install(TARGETS ${PROJECT_NAME} DESTINATION share/${PROJECT_NAME}/gxf/lib)
+
+# Embed versioning information into installed files
+ament_index_get_resource(ISAAC_ROS_COMMON_CMAKE_PATH isaac_ros_common_cmake_path isaac_ros_common)
+include("${ISAAC_ROS_COMMON_CMAKE_PATH}/isaac_ros_common-version-info.cmake")
+generate_version_info(${PROJECT_NAME})
+
ament_auto_package(INSTALL_TO_SHARE)
\ No newline at end of file
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cpp
index f1f1a46..a3412a6 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cpp
+++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cpp
@@ -19,6 +19,7 @@
#include "foundationpose_utils.hpp"
#include
+#include
#include
#include
#include
@@ -55,6 +56,9 @@ constexpr size_t kNumChannels = 3;
constexpr size_t kOutputRank = 4;
// Number of slices on the input
constexpr int kNumBatches = 6;
+constexpr int kFixTextureMapWidth = 1920;
+constexpr int kFixTextureMapHeight = 1080;
+constexpr int kFixTextureMapColor = 128;
typedef Eigen::Matrix RowMajorMatrix;
@@ -223,50 +227,6 @@ gxf_result_t ProjectMatrixFromIntrinsics(
return GXF_SUCCESS;
}
-gxf_result_t GeneratePoseClip(
- std::vector& pose_clip, const std::vector& poses, const Eigen::MatrixXf& bbox2d, const Eigen::MatrixXf& pose_homo,
- const Eigen::Matrix4f& projection_mat, int rgb_H, int rgb_W) {
- std::vector mtx;
- for (const auto& pose : poses) {
- mtx.push_back(projection_mat * (kGLCamInCVCam * pose));
- }
-
- if (mtx[0].cols() != pose_homo.cols()) {
- GXF_LOG_ERROR("[FoundationposeRender] The col size of mtx is not the same as pos_home");
- return GXF_FAILURE;
- }
-
- Eigen::VectorXf l = bbox2d.col(0).array();
- Eigen::VectorXf t = rgb_H - bbox2d.col(1).array();
- Eigen::VectorXf r = bbox2d.col(2).array();
- Eigen::VectorXf b = rgb_H - bbox2d.col(3).array();
-
- int N = poses.size();
- pose_clip.reserve(N * pose_homo.rows() * pose_homo.cols());
- for (size_t i = 0; i < poses.size(); i++) {
- auto m = mtx[i];
- // Make sure eigen matrix is row major if need to copy into GPU memory
- Eigen::Matrix new_pos;
- new_pos.resize(pose_homo.rows(), pose_homo.cols());
- for (int j = 0; j < pose_homo.rows(); j++) {
- Eigen::VectorXf temp = m * pose_homo.row(j).transpose();
- new_pos.row(j) = temp.transpose();
- }
-
- Eigen::Matrix4f tf = Eigen::Matrix4f::Identity();
- tf(0, 0) = rgb_W / (r(i) - l(i));
- tf(1, 1) = rgb_H / (t(i) - b(i));
- tf(3, 0) = (rgb_W - r(i) - l(i)) / (r(i) - l(i));
- tf(3, 1) = (rgb_H - t(i) - b(i)) / (t(i) - b(i));
- new_pos = new_pos * tf;
- // Vector of Eigen matrix is not continous in memory, flatten matrix and insert to vector
- std::vector pose_data(new_pos.data(), new_pos.data() + new_pos.size());
- pose_clip.insert(pose_clip.end(), pose_data.begin(), pose_data.end());
- }
-
- return GXF_SUCCESS;
-}
-
gxf_result_t ConstructBBox2D(
Eigen::MatrixXf& bbox2d, const std::vector& tfs, int H, int W) {
Eigen::MatrixXf bbox2d_crop(2, 2);
@@ -452,10 +412,17 @@ gxf_result_t FoundationposeRender::start() noexcept {
}
}
- // Load the texture map
- cv::Mat texture_map = cv::imread(texture_path_.get());
cv::Mat rgb_texture_map;
- cv::cvtColor(texture_map, rgb_texture_map, cv::COLOR_BGR2RGB);
+ if (!std::filesystem::exists(texture_path_.get())) {
+ GXF_LOG_WARNING("[FoundationposeRender], %s could not be found, assign texture map with pure color",
+ texture_path_.get().c_str());
+ rgb_texture_map = cv::Mat(kFixTextureMapHeight, kFixTextureMapWidth, CV_8UC3,
+ cv::Scalar(kFixTextureMapColor, kFixTextureMapColor, kFixTextureMapColor));
+ } else {
+ // Load the texture map
+ cv::Mat texture_map = cv::imread(texture_path_.get());
+ cv::cvtColor(texture_map, rgb_texture_map, cv::COLOR_BGR2RGB);
+ }
if (!rgb_texture_map.isContinuous()) {
GXF_LOG_ERROR("[FoundationposeRender] Texture map is not continuous");
@@ -517,11 +484,15 @@ gxf_result_t FoundationposeRender::start() noexcept {
// Allocate device memory for mesh data
size_t faces_size = mesh_faces_.size() * sizeof(int32_t);
size_t texcoords_size = texcoords_.size() * sizeof(float);
-
+ size_t mesh_vertices_size = vertices_.size() * sizeof(float);
+
+ CHECK_CUDA_ERRORS(cudaMalloc(&mesh_vertices_device_, mesh_vertices_size));
CHECK_CUDA_ERRORS(cudaMalloc(&mesh_faces_device_, faces_size));
CHECK_CUDA_ERRORS(cudaMalloc(&texcoords_device_, texcoords_size));
CHECK_CUDA_ERRORS(cudaMalloc(&texture_map_device_, rgb_texture_map.total() * kNumChannels));
+ CHECK_CUDA_ERRORS(cudaMemcpyAsync(
+ mesh_vertices_device_, vertices_.data(), mesh_vertices_size , cudaMemcpyHostToDevice, cuda_stream_));
CHECK_CUDA_ERRORS(cudaMemcpyAsync(
mesh_faces_device_, mesh_faces_.data(), faces_size, cudaMemcpyHostToDevice, cuda_stream_));
CHECK_CUDA_ERRORS(cudaMemcpyAsync(
@@ -549,42 +520,22 @@ gxf_result_t FoundationposeRender::start() noexcept {
gxf_result_t FoundationposeRender::NvdiffrastRender(
- cudaStream_t cuda_stream_, std::vector& poses, Eigen::Matrix3f& K, Eigen::MatrixXf& bbox2d, int rgb_H,
+ cudaStream_t cuda_stream, std::vector& poses, Eigen::Matrix3f& K, Eigen::MatrixXf& bbox2d, int rgb_H,
int rgb_W, int H, int W, nvcv::Tensor& flip_color_tensor, nvcv::Tensor& flip_xyz_map_tensor) {
+ // N represents the number of poses
size_t N = poses.size();
- // Generate attributes for interpolate
- std::vector pts_cam;
- auto gxf_result = TransformPts(pts_cam, mesh_vertices_, poses);
- if(gxf_result != GXF_SUCCESS) {
- return gxf_result;
- }
- if (pts_cam.size() == 0 || pts_cam.size() != N) {
- GXF_LOG_ERROR("[FoundationposeRender] The attribute size doesn't match pose size after transform");
- return GXF_FAILURE;
- }
- if (pts_cam[0].rows() != num_vertices_) {
- GXF_LOG_ERROR("[FoundationposeRender] The attribute dimension doesn't match with vertices after transform");
+ // For every pose, transform the vertices to the camera space
+ if (kVertexPoints != kPoseMatrixLength - 1) {
+ GXF_LOG_ERROR("[FoundationposeRender] The vertice channel should be same as pose matrix length - 1");
return GXF_FAILURE;
}
-
- // Vector of Eigen matrix is not continous in memory, flatten matrix and insert to vector
- size_t num_attr = pts_cam[0].cols();
- if (!pts_cam[0].IsRowMajor) {
- GXF_LOG_ERROR("[FoundationposeRender] Pts cam need to be row major in order to copy into device memory");
- return GXF_FAILURE;
- }
-
- std::vector pts_cam_vector;
- pts_cam_vector.reserve(N * num_vertices_ * num_attr);
- for (auto& mat : pts_cam) {
- std::vector mat_data(mat.data(), mat.data() + mat.size());
- pts_cam_vector.insert(pts_cam_vector.end(), mat_data.begin(), mat_data.end());
- }
+ transform_pts(cuda_stream, pts_cam_device_, mesh_vertices_device_, pose_device_, num_vertices_, kVertexPoints, N, kPoseMatrixLength);
+ CHECK_CUDA_ERRORS(cudaGetLastError());
Eigen::Matrix4f projection_mat;
- gxf_result = ProjectMatrixFromIntrinsics(projection_mat, K, rgb_H, rgb_W);
- if(gxf_result != GXF_SUCCESS) {
+ auto gxf_result = ProjectMatrixFromIntrinsics(projection_mat, K, rgb_H, rgb_W);
+ if (gxf_result != GXF_SUCCESS) {
return GXF_FAILURE;
}
@@ -599,58 +550,53 @@ gxf_result_t FoundationposeRender::NvdiffrastRender(
return GXF_FAILURE;
}
- std::vector pose_clip;
- gxf_result = GeneratePoseClip(pose_clip, poses, bbox2d, pose_homo, projection_mat, rgb_H, rgb_W);
- if (gxf_result != GXF_SUCCESS) {
- return GXF_FAILURE;
- }
-
- if (pose_clip.size() == 0) {
- GXF_LOG_ERROR("[FoundationposeRender] Pose clip should not be empty");
- return GXF_FAILURE;
- }
-
// Allocate device memory for the intermedia results on the first frame
- size_t pose_clip_size = pose_clip.size() * sizeof(float);
- size_t pts_cam_size = pts_cam_vector.size() * sizeof(float);
+ size_t pose_clip_size = N * num_vertices_ * 4 * sizeof(float);
size_t rast_out_size = N * H * W * 4 * sizeof(float);
size_t color_size = N * H * W * kNumChannels * sizeof(float);
size_t xyz_map_size = N * H * W * kNumChannels * sizeof(float);
size_t texcoords_out_size = N * H * W * kTexcoordsDim * sizeof(float);
+ size_t bbox2d_size = N * 4 * sizeof(float);
if (render_data_cached_ == false) {
CHECK_CUDA_ERRORS(cudaMalloc(&pose_clip_device_, pose_clip_size));
CHECK_CUDA_ERRORS(cudaMalloc(&rast_out_device_, rast_out_size));
- CHECK_CUDA_ERRORS(cudaMalloc(&pts_cam_device_, pts_cam_size));
CHECK_CUDA_ERRORS(cudaMalloc(&texcoords_out_device_, texcoords_out_size));
CHECK_CUDA_ERRORS(cudaMalloc(&color_device_, color_size));
CHECK_CUDA_ERRORS(cudaMalloc(&xyz_map_device_, xyz_map_size));
+ CHECK_CUDA_ERRORS(cudaMalloc(&pose_clip_device_, pose_clip_size));
+ CHECK_CUDA_ERRORS(cudaMalloc(&bbox2d_device_, bbox2d_size));
cr_ = new CR::CudaRaster();
render_data_cached_ = true;
}
- // Copy other data to device memory
- CHECK_CUDA_ERRORS(cudaMemcpyAsync(
- pose_clip_device_, pose_clip.data(), pose_clip_size, cudaMemcpyHostToDevice, cuda_stream_));
- CHECK_CUDA_ERRORS(cudaMemcpyAsync(
- pts_cam_device_, pts_cam_vector.data(), pts_cam_size, cudaMemcpyHostToDevice, cuda_stream_));
+ std::vector h_bbox2d;
+ for(int j=0;j();
texture(
- cuda_stream_,
+ cuda_stream,
reinterpret_cast(float_texture_map_data->basePtr()),
texcoords_out_device_,
color_device_,
@@ -668,7 +614,7 @@ gxf_result_t FoundationposeRender::NvdiffrastRender(
float min_value = 0.0;
float max_value = 1.0;
- clamp(cuda_stream_, color_device_, min_value, max_value, N * H * W * kNumChannels);
+ clamp(cuda_stream, color_device_, min_value, max_value, N * H * W * kNumChannels);
CHECK_CUDA_ERRORS(cudaGetLastError());
nvcv::Tensor color_tensor, xyz_map_tensor;
@@ -676,10 +622,10 @@ gxf_result_t FoundationposeRender::NvdiffrastRender(
WrapFloatPtrToNHWCTensor(xyz_map_device_, xyz_map_tensor, N, H, W, kNumChannels);
cvcuda::Flip flip_op;
- flip_op(cuda_stream_, color_tensor, flip_color_tensor, 0);
+ flip_op(cuda_stream, color_tensor, flip_color_tensor, 0);
CHECK_CUDA_ERRORS(cudaGetLastError());
- flip_op(cuda_stream_, xyz_map_tensor, flip_xyz_map_tensor, 0);
+ flip_op(cuda_stream, xyz_map_tensor, flip_xyz_map_tensor, 0);
CHECK_CUDA_ERRORS(cudaGetLastError());
return GXF_SUCCESS;
@@ -845,10 +791,13 @@ gxf_result_t FoundationposeRender::tick() noexcept {
// Malloc device memory for the output
if (!rgb_data_cached_) {
+ CHECK_CUDA_ERRORS(cudaMalloc(&pts_cam_device_, N * num_vertices_ * kVertexPoints * sizeof(float)));
CHECK_CUDA_ERRORS(cudaMalloc(&transformed_xyz_map_device_, N * H * W * C * sizeof(float)));
CHECK_CUDA_ERRORS(cudaMalloc(&transformed_rgb_device_, N * H * W * C * sizeof(float)));
CHECK_CUDA_ERRORS(cudaMalloc(&score_rendered_output_device_, total_poses * 2 * H * W * C * sizeof(float)));
CHECK_CUDA_ERRORS(cudaMalloc(&score_original_output_device_, total_poses * 2 * H * W * C * sizeof(float)));
+ CHECK_CUDA_ERRORS(cudaMalloc(&wp_image_device_, N * H * W * C * sizeof(uint8_t)));
+ CHECK_CUDA_ERRORS(cudaMalloc(&trans_matrix_device_,N*9*sizeof(float)));
nvcv::TensorShape::ShapeType shape{N, H, W, C};
nvcv::TensorShape tensor_shape{shape, "NHWC"};
@@ -859,6 +808,7 @@ gxf_result_t FoundationposeRender::tick() noexcept {
}
std::vector pose_host(pose_nums * pose_rows * pose_cols);
+ pose_device_ = reinterpret_cast(poses_handle->pointer());
CHECK_CUDA_ERRORS(cudaMemcpyAsync(
pose_host.data(), poses_handle->pointer(), poses_handle->size(), cudaMemcpyDeviceToHost, cuda_stream_));
cudaStreamSynchronize(cuda_stream_);
@@ -893,75 +843,141 @@ gxf_result_t FoundationposeRender::tick() noexcept {
}
// WarpPerspective on each image and xyz map, and cache the output for the future iterations
- if (mode_.get() == "score" || iteration_count_ == 0) {
- nvcv::TensorShape::ShapeType rgb_shape{1, rgb_H, rgb_W, C};
- nvcv::TensorShape rgb_tensor_shape{rgb_shape, "NHWC"};
+ nvcv::TensorShape::ShapeType rgb_shape{1, rgb_H, rgb_W, C};
+ nvcv::TensorShape rgb_tensor_shape{rgb_shape, "NHWC"};
- nvcv::Tensor rgb_tensor = nvcv::Tensor(rgb_tensor_shape, nvcv::TYPE_U8);
- nvcv::Tensor xyz_map_tensor = nvcv::Tensor(rgb_tensor_shape, nvcv::TYPE_F32);
+ nvcv::Tensor rgb_tensor = nvcv::Tensor(rgb_tensor_shape, nvcv::TYPE_U8);
+ nvcv::Tensor xyz_map_tensor = nvcv::Tensor(rgb_tensor_shape, nvcv::TYPE_F32);
- WrapImgPtrToNHWCTensor(reinterpret_cast(rgb_img_handle->pointer()), rgb_tensor, 1, rgb_H, rgb_W, C);
- WrapFloatPtrToNHWCTensor(reinterpret_cast(xyz_map_handle->pointer()), xyz_map_tensor, 1, rgb_H, rgb_W, C);
+ WrapImgPtrToNHWCTensor(reinterpret_cast(rgb_img_handle->pointer()), rgb_tensor, 1, rgb_H, rgb_W, C);
+ WrapFloatPtrToNHWCTensor(reinterpret_cast(xyz_map_handle->pointer()), xyz_map_tensor, 1, rgb_H, rgb_W, C);
- const int rgb_flags = NVCV_INTERP_LINEAR;
- const int xyz_flags = NVCV_INTERP_NEAREST;
- const float4 border_value = {0,0,0,0};
+ const int rgb_flags = NVCV_INTERP_LINEAR;
+ const int xyz_flags = NVCV_INTERP_NEAREST;
+ const float4 border_value = {0,0,0,0};
- const float scale_factor = 1.0f / 255.0f;
- cvcuda::WarpPerspective warpPerspectiveOp(0);
- cvcuda::ConvertTo convert_op;
- for (size_t index = 0; index < N; index++) {
- nvcv::TensorShape::ShapeType transformed_shape{1,H,W,C};
- nvcv::TensorShape transformed_tensor_shape{transformed_shape, "NHWC"};
-
- nvcv::Tensor transformed_rgb_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_U8);
- nvcv::Tensor float_rgb_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_F32);
- nvcv::Tensor transformed_xyz_map_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_F32);
-
- // get ptr offset from index
- WrapFloatPtrToNHWCTensor(transformed_rgb_device_ + index * H * W * C, float_rgb_tensor, 1, H, W, C);
- WrapFloatPtrToNHWCTensor(transformed_xyz_map_device_ + index * H * W * C, transformed_xyz_map_tensor, 1, H, W, C);
+ const float scale_factor = 1.0f / 255.0f;
+ int num_of_trans_mat = N;
+ cvcuda::WarpPerspective warpPerspectiveOpBatch(num_of_trans_mat);
+ cvcuda::ConvertTo convert_op;
- NVCVPerspectiveTransform trans_matrix;
- for (size_t i = 0; i < kPTMatrixDim; i++) {
+ std::vector trans_mat_flattened(N*9,0);
+ for(size_t index = 0; index(trans_matrix_device_);
+
+ auto transMatrixTensor = nvcv::TensorWrapData(nvcv::TensorDataStridedCuda{
+ nvcv::TensorShape({num_of_trans_mat, 9}, nvcv::TENSOR_NW),
+ nvcv::TYPE_F32,
+ buf_trans_mat
+ });
+
+ // Build rgb image batch input
+ const nvcv::ImageFormat fmt_rgb = nvcv::FMT_RGB8;
+ std::vector wp_rgb_src;
+ int imageSizeRGB = rgb_H * rgb_W * fmt_rgb.planePixelStrideBytes(0);
+ nvcv::ImageDataStridedCuda::Buffer buf_rgb;
+ buf_rgb.numPlanes = 1;
+ buf_rgb.planes[0].width = rgb_W;
+ buf_rgb.planes[0].height = rgb_H;
+ buf_rgb.planes[0].rowStride = rgb_W*fmt_rgb.planePixelStrideBytes(0);
+ buf_rgb.planes[0].basePtr = reinterpret_cast(rgb_img_handle->pointer());
+ auto img_rgb = nvcv::ImageWrapData(nvcv::ImageDataStridedCuda{fmt_rgb, buf_rgb});
+
+ for (int i = 0; i < num_of_trans_mat; ++i) {
+ wp_rgb_src.emplace_back(img_rgb);
+ }
- warpPerspectiveOp(cuda_stream_, rgb_tensor, transformed_rgb_tensor, trans_matrix, rgb_flags, NVCV_BORDER_CONSTANT, border_value);
- CHECK_CUDA_ERRORS(cudaGetLastError());
+ nvcv::ImageBatchVarShape batch_wp_rgb_src(num_of_trans_mat);
+ batch_wp_rgb_src.pushBack(wp_rgb_src.begin(), wp_rgb_src.end());
+
+ // Build xyz map batch input
+ const nvcv::ImageFormat fmt_xyz = nvcv::FMT_RGBf32;
+ std::vector wp_xyz_src;
+ int imageSizeXYZ = rgb_H * rgb_W * fmt_xyz.planePixelStrideBytes(0);
+ nvcv::ImageDataStridedCuda::Buffer buf_xyz;
+ buf_xyz.numPlanes = 1;
+ buf_xyz.planes[0].width = rgb_W;
+ buf_xyz.planes[0].height = rgb_H;
+ buf_xyz.planes[0].rowStride = rgb_W*fmt_xyz.planePixelStrideBytes(0);
+ buf_xyz.planes[0].basePtr = reinterpret_cast(xyz_map_handle->pointer());
+ auto img_xyz = nvcv::ImageWrapData(nvcv::ImageDataStridedCuda{fmt_xyz, buf_xyz});
+
+ for (int i = 0; i < num_of_trans_mat; ++i) {
+ wp_xyz_src.emplace_back(img_xyz);
+ }
- convert_op(cuda_stream_, transformed_rgb_tensor, float_rgb_tensor, scale_factor, 0.0f);
- CHECK_CUDA_ERRORS(cudaGetLastError());
+ nvcv::ImageBatchVarShape batch_wp_xyz_src(num_of_trans_mat);
+ batch_wp_xyz_src.pushBack(wp_xyz_src.begin(), wp_xyz_src.end());
+
+ // Build batched RGB output tensor
+ std::vector wp_rgb_dst;
+ for (int i = 0; i < num_of_trans_mat; ++i) {
+ nvcv::ImageDataStridedCuda::Buffer buf;
+ buf.numPlanes = 1;
+ buf.planes[0].width = W;
+ buf.planes[0].height = H;
+ buf.planes[0].rowStride = W * fmt_rgb.planePixelStrideBytes(0);
+ buf.planes[0].basePtr = reinterpret_cast(wp_image_device_ + i * H * W * C);
+ auto img = nvcv::ImageWrapData(nvcv::ImageDataStridedCuda{fmt_rgb, buf});
+ wp_rgb_dst.push_back(img);
+ }
+ nvcv::ImageBatchVarShape batch_wp_rgb_dst(num_of_trans_mat);
+ batch_wp_rgb_dst.pushBack(wp_rgb_dst.begin(),wp_rgb_dst.end());
+
+ // Build batched XYZ map output tensor
+ std::vector wp_xyz_dst;
+ for (int i = 0; i < num_of_trans_mat; ++i) {
+ nvcv::ImageDataStridedCuda::Buffer buf;
+ buf.numPlanes = 1;
+ buf.planes[0].width = W;
+ buf.planes[0].height = H;
+ buf.planes[0].rowStride = W * fmt_xyz.planePixelStrideBytes(0);
+ buf.planes[0].basePtr = reinterpret_cast(transformed_xyz_map_device_ + i * H * W * C);
+ auto img = nvcv::ImageWrapData(nvcv::ImageDataStridedCuda{fmt_xyz, buf});
+ wp_xyz_dst.push_back(img);
+ }
+ nvcv::ImageBatchVarShape batch_wp_xyz_dst(num_of_trans_mat);
+ batch_wp_xyz_dst.pushBack(wp_xyz_dst.begin(),wp_xyz_dst.end());
- warpPerspectiveOp(cuda_stream_, xyz_map_tensor, transformed_xyz_map_tensor, trans_matrix, xyz_flags, NVCV_BORDER_CONSTANT, border_value);
- CHECK_CUDA_ERRORS(cudaGetLastError());
- }
+ // Warp Perspective for RGB image and XYZ map
+ warpPerspectiveOpBatch(cuda_stream_, batch_wp_rgb_src, batch_wp_rgb_dst, transMatrixTensor, rgb_flags, NVCV_BORDER_CONSTANT, border_value);
+ CHECK_CUDA_ERRORS(cudaGetLastError());
+ warpPerspectiveOpBatch(cuda_stream_, batch_wp_xyz_src, batch_wp_xyz_dst, transMatrixTensor, xyz_flags, NVCV_BORDER_CONSTANT, border_value);
+ CHECK_CUDA_ERRORS(cudaGetLastError());
- threshold_and_downscale_pointcloud(
- cuda_stream_,
- transformed_xyz_map_device_,
- reinterpret_cast(poses_handle->pointer()),
- N, W * H, mesh_diameter_ / 2, min_depth_, max_depth_);
- CHECK_CUDA_ERRORS(cudaGetLastError());
+ // Convert RGB image from U8 to float
+ nvcv::TensorShape::ShapeType transformed_shape{N,H,W,C};
+ nvcv::TensorShape transformed_tensor_shape{transformed_shape, "NHWC"};
- transformed_xyz_map_cache_[refine_recevied_batches_] = transformed_xyz_map_device_;
- transformed_rgb_cache_[refine_recevied_batches_] = transformed_rgb_device_;
- } else {
- if (transformed_xyz_map_cache_.find(refine_recevied_batches_) != transformed_xyz_map_cache_.end()) {
- transformed_xyz_map_device_ = transformed_xyz_map_cache_[refine_recevied_batches_];
- } else {
- GXF_LOG_ERROR("[FoundationposeRender] Unable to find index %d from transformed xyz map cache", refine_recevied_batches_);
- return GXF_FAILURE;
- }
- if (transformed_rgb_cache_.find(refine_recevied_batches_) != transformed_rgb_cache_.end()) {
- transformed_rgb_device_ = transformed_rgb_cache_[refine_recevied_batches_];
- } else {
- GXF_LOG_ERROR("[FoundationposeRender] Unable to find index %d from transformed rgb cache", refine_recevied_batches_);
- return GXF_FAILURE;
- }
- }
+ nvcv::Tensor transformed_rgb_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_U8);
+ nvcv::Tensor float_rgb_tensor = nvcv::Tensor(transformed_tensor_shape, nvcv::TYPE_F32);
+
+ WrapImgPtrToNHWCTensor(wp_image_device_, transformed_rgb_tensor, N, H, W, C);
+ WrapFloatPtrToNHWCTensor(transformed_rgb_device_, float_rgb_tensor, N, H, W, C);
+
+ convert_op(cuda_stream_, transformed_rgb_tensor, float_rgb_tensor, scale_factor, 0.0f);
+ CHECK_CUDA_ERRORS(cudaGetLastError());
+
+ threshold_and_downscale_pointcloud(
+ cuda_stream_,
+ transformed_xyz_map_device_,
+ reinterpret_cast(poses_handle->pointer()),
+ N, W * H, mesh_diameter_ / 2, min_depth_, max_depth_);
+ CHECK_CUDA_ERRORS(cudaGetLastError());
auto render_rgb_data = render_rgb_tensor_.exportData();
auto render_xyz_map_data = render_xyz_map_tensor_.exportData();
@@ -1087,7 +1103,7 @@ gxf_result_t FoundationposeRender::tick() noexcept {
iterative_point_cloud_transmitter.value()->publish(std::move(xyz_message));
iterative_camera_model_transmitter.value()->publish(std::move(camera_model_message));
}
-
+
// Update loop and batch counters
refine_recevied_batches_ += 1;
if (refine_recevied_batches_ == kNumBatches) {
@@ -1149,6 +1165,9 @@ gxf_result_t FoundationposeRender::stop() noexcept {
CHECK_CUDA_ERRORS(cudaFree(transformed_rgb_device_));
CHECK_CUDA_ERRORS(cudaFree(score_rendered_output_device_));
CHECK_CUDA_ERRORS(cudaFree(score_original_output_device_));
+ CHECK_CUDA_ERRORS(cudaFree(wp_image_device_));
+ CHECK_CUDA_ERRORS(cudaFree(trans_matrix_device_));
+ CHECK_CUDA_ERRORS(cudaFree(bbox2d_device_));
return GXF_SUCCESS;
}
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu
index a8dfc6d..09e7634 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu
+++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu
@@ -125,6 +125,115 @@ __global__ void concat_kernel(
}
}
+// Transform points using transformation matrices
+// Each thread will transform a point using N transformation matrices and save to output
+__global__ void transform_pts_kernel(
+ float* output, const float* pts, const float* tfs, int pts_num, int pts_channel, int tfs_num, int tfs_dim) {
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx < pts_num) {
+ for (int i = 0; i < tfs_num; i++) {
+ float* transformed_matrix = output + i * pts_num * (tfs_dim - 1) + idx * (tfs_dim - 1);
+ const float* submatrix = tfs + i * tfs_dim * tfs_dim;
+ const float* last_col = tfs + i * tfs_dim * tfs_dim + (tfs_dim - 1) * tfs_dim;
+
+ for (int j = 0; j < pts_channel; j++) {
+ float new_row = 0.0f;
+ for (int k = 0; k < tfs_dim - 1; k++) {
+ new_row += submatrix[k * tfs_dim + j] * pts[idx * pts_channel + k];
+ }
+ new_row += last_col[j];
+ transformed_matrix[j] = new_row;
+ }
+ }
+ }
+}
+
+__device__ float dot(const float4& v1, const float4& v2) {
+ return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z + v1.w * v2.w;
+}
+
+
+// From OpenCV camera (cvcam) coordinate system to the OpenGL camera (glcam) coordinate system
+const Eigen::Matrix4f kGLCamInCVCam =
+ (Eigen::Matrix4f(4, 4) << 1, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 1).finished();
+__constant__ float d_projection_fused[16];
+
+// Function to copy data to constant memory
+void copy_to_constant_memory(const Eigen::Matrix4f& projection_fused) {
+ std::vector flattened_projection_fused;
+ for(int i=0;i= n_pts) { return; }
+
+ // Load point using float3
+ // Convert float3 to float4 by adding a homogeneous coordinate w=1.0f
+ float3 pt3 = reinterpret_cast(d_pts)[idx];
+ float4 pt = make_float4(pt3.x, pt3.y, pt3.z, 1.0f);
+
+ // Shared memory for storing a single pose mat and bbox transformation matrix
+ size_t stride = sizeof(float4);
+ float4* shared_tf = reinterpret_cast(shared_mem);
+ float4* shared_bbox = reinterpret_cast(shared_mem + stride);
+
+ for (int i = 0; i < n_poses; i++) {
+ float4* transformed_matrix = reinterpret_cast(output + i * n_pts * 4 + idx * 4);
+ // Compute bbox transformation (only done by first 4 threads per block)
+ if (threadIdx.x == 0) {
+ const float4* pose = reinterpret_cast(d_poses + i * stride);
+ // Load the projection matrix into shared memory (vectorized)
+ shared_tf[0] = make_float4(pose[0].x,pose[1].x,pose[2].x,pose[3].x);
+ shared_tf[1] = make_float4(pose[0].y,pose[1].y,pose[2].y,pose[3].y);
+ shared_tf[2] = make_float4(pose[0].z,pose[1].z,pose[2].z,pose[3].z);
+ shared_tf[3] = make_float4(pose[0].w,pose[1].w,pose[2].w,pose[3].w);
+ float l = d_bbox2d[i * 4];
+ float t = rgb_H - d_bbox2d[i * 4 + 1];
+ float r = d_bbox2d[i * 4 + 2];
+ float b = rgb_H - d_bbox2d[i * 4 + 3];
+
+ shared_bbox[0] = make_float4(rgb_W / (r - l), 0, 0, (rgb_W - r - l) / (r - l));
+ shared_bbox[1] = make_float4(0, rgb_H / (t - b), 0, (rgb_H - t - b) / (t - b));
+ shared_bbox[2] = make_float4(0, 0, 1, 0);
+ shared_bbox[3] = make_float4(0, 0, 0, 1);
+ }
+
+ __syncthreads();
+
+ // Perform transformation using vectorized operations
+ float4 result = matrix_vec_mul4x4(shared_tf, pt);
+ result = matrix_vec_mul4x4(reinterpret_cast(d_projection_fused), result);
+ result = matrix_vec_mul4x4(shared_bbox, result);
+
+ *transformed_matrix = result;
+ }
+}
void clamp(cudaStream_t stream, float* input, float min_value, float max_value, int N) {
int block_size = 256;
@@ -133,6 +242,19 @@ void clamp(cudaStream_t stream, float* input, float min_value, float max_value,
clamp_kernel<<>>(input, min_value, max_value, N);
}
+void generate_pose_clip(
+ cudaStream_t stream, float* d_pose_clip, const float* d_pose, const float* d_bbox2d, const float* d_mesh_vertices,
+ const Eigen::Matrix4f& projection_mat, int rgb_H, int rgb_W, int n_pts, int N) {
+
+ copy_to_constant_memory(projection_mat*kGLCamInCVCam);
+ // Define block and grid size
+ int blockSize = 256;
+ int gridSize = (n_pts + blockSize - 1) / blockSize;
+
+ pose_clip_kernel_fused<<>>(
+ d_pose_clip, d_pose, d_bbox2d, d_mesh_vertices, N, n_pts, rgb_H, rgb_W);
+}
+
void threshold_and_downscale_pointcloud(
cudaStream_t stream, float* pointcloud_input, float* pose_array_input, int N, int n_points, float downscale_factor,
float min_depth, float max_depth) {
@@ -144,12 +266,21 @@ void threshold_and_downscale_pointcloud(
pointcloud_input, pose_array_input, N, n_points, downscale_factor, min_depth, max_depth);
}
-void concat(cudaStream_t stream, float* input_a, float* input_b, float* output, int N, int H, int W, int C1, int C2) {
+void transform_pts(
+ cudaStream_t stream, float* output, const float* pts, const float* tfs, int pts_num, int pts_channel, int tfs_num, int tfs_dim) {
+ // Lannch pts_num threads, each thread handle a point and N transformation matrices
+ int block_size = 256;
+ int grid_size = (pts_num + block_size - 1) / block_size;
+ transform_pts_kernel<<>>(output, pts, tfs, pts_num, pts_channel, tfs_num, tfs_dim);
+}
+
+void concat(
+ cudaStream_t stream, float* input_a, float* input_b, float* output, int N, int H, int W, int C1, int C2) {
// Launch N*H*W threads, each thread handle a vector of size C
int block_size = 256;
int grid_size = (N * H * W + block_size - 1) / block_size;
- concat_kernel<<>>(input_a, input_b, output, N, H, W, C1, C2);
+ concat_kernel<<>>(input_a, input_b, output, N, H, W, C1, C2);
}
void rasterize(
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu.hpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu.hpp
index 92ec9dc..817124c 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu.hpp
+++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.cu.hpp
@@ -38,6 +38,11 @@ void clamp(cudaStream_t stream, float* input, float min_value, float max_value,
void threshold_and_downscale_pointcloud(
cudaStream_t stream, float* pointcloud_input, float* pose_array_input, int N, int n_points, float downscale_factor,
float min_depth, float max_depth);
+void transform_pts(
+ cudaStream_t stream, float* output, const float* pts, const float* tfs, int pts_num, int pts_channel, int tfs_num, int tfs_dim);
+void generate_pose_clip(
+ cudaStream_t stream, float* d_pose_clip, const float* d_pose, const float* bbox2d, const float* d_mesh_vertices,
+ const Eigen::Matrix4f& projection_mat, int rgb_H, int rgb_W, int n_pts, int n_poses);
void concat(cudaStream_t stream, float* input_a, float* input_b, float* output, int N, int H, int W, int C1, int C2);
void rasterize(
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.hpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.hpp
index 4839b6c..c64a7c2 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.hpp
+++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_render.hpp
@@ -92,8 +92,10 @@ class FoundationposeRender : public gxf::Codelet {
int texture_map_width_;
float mesh_diameter_;
+ float* pose_device_;
// Input data (GPU) for rendering kernels
float* pose_clip_device_;
+ float* mesh_vertices_device_;
int32_t* mesh_faces_device_;
float* texcoords_device_;
float* pts_cam_device_;
@@ -107,10 +109,12 @@ class FoundationposeRender : public gxf::Codelet {
float* xyz_map_device_;
float* transformed_rgb_device_;
float* transformed_xyz_map_device_;
+ uint8_t* wp_image_device_;
+ float* trans_matrix_device_;
+ float* bbox2d_device_;
+
nvcv::Tensor render_rgb_tensor_;
nvcv::Tensor render_xyz_map_tensor_;
- std::unordered_map transformed_xyz_map_cache_;
- std::unordered_map transformed_rgb_cache_;
float* score_rendered_output_device_;
float* score_original_output_device_;
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.cpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.cpp
index 77150cb..6e22c03 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.cpp
+++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.cpp
@@ -153,6 +153,46 @@ float RotationGeodesticDistance(const Eigen::Matrix3f& R1, const Eigen::Matrix3f
return std::acos(cos);
}
+std::vector GenerateSymmetricPoses(const std::vector& symmetry_planes) {
+ float theta = 180.0 / 180.0 * M_PI;
+ std::vector x_angles = {0.0};
+ std::vector y_angles = {0.0};
+ std::vector z_angles = {0.0};
+ std::vector symmetry_poses;
+
+ for (int i = 0; i < symmetry_planes.size(); ++i) {
+ if (symmetry_planes[i] == "x"){
+ x_angles.push_back(theta);
+ } else if (symmetry_planes[i] == "y") {
+ y_angles.push_back(theta);
+ } else if (symmetry_planes[i] == "z") {
+ z_angles.push_back(theta);
+ } else {
+ GXF_LOG_ERROR("[FoundationposeSampling] the input symmetry plane %s is invalid, ignore.", symmetry_planes[i]);
+ continue;
+ }
+ }
+
+ // Compute rotation matrix for each angle
+ for (int i = 0; i < x_angles.size(); ++i) {
+ auto rot_x = Eigen::AngleAxisf(x_angles[i], Eigen::Vector3f::UnitX());
+ for (int j = 0; j < y_angles.size(); ++j) {
+ auto rot_y = Eigen::AngleAxisf(y_angles[j], Eigen::Vector3f::UnitY());
+ for (int k = 0; k < z_angles.size(); ++k) {
+ auto rot_z = Eigen::AngleAxisf(z_angles[k], Eigen::Vector3f::UnitZ());
+ auto rotaion_matrix_x = rot_x.toRotationMatrix();
+ auto rotaion_matrix_y = rot_y.toRotationMatrix();
+ auto rotaion_matrix_z = rot_z.toRotationMatrix();
+ auto rotation = rotaion_matrix_z * rotaion_matrix_y * rotaion_matrix_x;
+ Eigen::Matrix4f euler_matrix = Eigen::Matrix4f::Identity();
+ euler_matrix.block<3, 3>(0, 0) = rotation;
+ symmetry_poses.push_back(euler_matrix);
+ }
+ }
+ }
+ return std::move(symmetry_poses);
+}
+
std::vector ClusterPoses(
float angle_diff, float dist_diff, std::vector& poses_in,
std::vector& symmetry_tfs) {
@@ -216,7 +256,7 @@ std::vector SampleViewsIcosphere(unsigned int n_views) {
return std::move(cam_in_obs);
}
-std::vector MakeRotationGrid(unsigned int n_views = 40, int inplane_step = 60) {
+std::vector MakeRotationGrid(const std::vector& symmetry_planes, unsigned int n_views = 40, int inplane_step = 60) {
auto cam_in_obs = SampleViewsIcosphere(n_views);
std::vector rot_grid;
@@ -234,10 +274,11 @@ std::vector MakeRotationGrid(unsigned int n_views = 40, int inp
}
}
- std::vector symmetry_tfs = std::vector();
+ std::vector symmetry_tfs = GenerateSymmetricPoses(symmetry_planes);
symmetry_tfs.push_back(Eigen::Matrix4f::Identity());
- ClusterPoses(30.0, 99999.0, rot_grid, symmetry_tfs);
- return std::move(rot_grid);
+ auto clustered_poses = ClusterPoses(30.0, 99999.0, rot_grid, symmetry_tfs);
+ GXF_LOG_DEBUG("[FoundationposeSampling] %lu poses left after clustering", clustered_poses.size());
+ return std::move(clustered_poses);
}
bool GuessTranslation(
@@ -254,7 +295,7 @@ bool GuessTranslation(
}
}
if (us.empty()) {
- GXF_LOG_ERROR("[FoundationposeSampling] Mask is all zero.");
+ GXF_LOG_INFO("[FoundationposeSampling] Mask is all zero.");
return false;
}
@@ -336,6 +377,10 @@ gxf_result_t FoundationposeSampling::registerInterface(gxf::Registrar* registrar
min_depth_, "min_depth", "Minimum Depth",
"Minimum depth value to consider for estimating object center in image space.",
0.1f);
+
+ result &= registrar->parameter(
+ symmetry_planes_, "symmetry_planes", "Symmetry Planes",
+ "Symmetry planes, select one or more from [x, y, z]. ", std::vector{});
return gxf::ToResultCode(result);
}
@@ -445,7 +490,7 @@ gxf_result_t FoundationposeSampling::tick() noexcept {
}
// Generate Pose Hypothesis
- auto ob_in_cams = MakeRotationGrid();
+ auto ob_in_cams = MakeRotationGrid(symmetry_planes_.get());
if (ob_in_cams.size() == 0 || ob_in_cams.size() > max_hypothesis_) {
GXF_LOG_ERROR("[FoundationposeSampling] The size of rotation grid is not valid.");
return GXF_FAILURE;
@@ -480,7 +525,7 @@ gxf_result_t FoundationposeSampling::tick() noexcept {
Eigen::Vector3f center;
auto success = GuessTranslation(bilateral_filter_depth_host, mask, K, min_depth_, center);
if (!success) {
- GXF_LOG_ERROR("[FoundationposeSampling] Failed to guess translation. Not processing this image");
+ GXF_LOG_INFO("[FoundationposeSampling] Failed to guess translation. Not processing this image");
return GXF_SUCCESS;
}
for (auto& m : ob_in_cams) {
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.hpp b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.hpp
index 0ac4895..ef7be85 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.hpp
+++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/gxf/foundationpose/foundationpose_sampling.hpp
@@ -34,6 +34,7 @@
#include "foundationpose_sampling.cu.hpp"
+
namespace nvidia {
namespace isaac_ros {
@@ -59,6 +60,7 @@ class FoundationposeSampling : public gxf::Codelet {
gxf::Parameter> cuda_stream_pool_;
gxf::Parameter max_hypothesis_;
gxf::Parameter min_depth_;
+ gxf::Parameter> symmetry_planes_;
gxf::Handle stream_;
float* erode_depth_device_;
diff --git a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/package.xml b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/package.xml
index 41e940c..b43d18e 100644
--- a/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/package.xml
+++ b/isaac_ros_gxf_extensions/gxf_isaac_foundationpose/package.xml
@@ -21,7 +21,7 @@ SPDX-License-Identifier: Apache-2.0
gxf_isaac_foundationpose
- 3.1.0
+ 3.2.0
GXF extension containing Foundation Pose estimation related components.
Isaac ROS Maintainers
diff --git a/isaac_ros_pose_proc/CMakeLists.txt b/isaac_ros_pose_proc/CMakeLists.txt
new file mode 100644
index 0000000..ca2ecde
--- /dev/null
+++ b/isaac_ros_pose_proc/CMakeLists.txt
@@ -0,0 +1,109 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.22.1)
+project(isaac_ros_pose_proc C CXX)
+
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ add_compile_options(-Wall -Wextra -Wpedantic)
+endif()
+
+find_package(ament_cmake_auto REQUIRED)
+ament_auto_find_build_dependencies()
+
+# AveragingFilterNode
+ament_auto_add_library(averaging_filter_node SHARED src/averaging_filter_node.cpp)
+rclcpp_components_register_nodes(averaging_filter_node "nvidia::isaac_ros::pose_proc::AveragingFilterNode")
+set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::AveragingFilterNode;$\n")
+set_target_properties(averaging_filter_node PROPERTIES
+ BUILD_WITH_INSTALL_RPATH TRUE
+ BUILD_RPATH_USE_ORIGIN TRUE
+ INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+# Detection3DArrayToPoseNode
+ament_auto_add_library(detection3_d_array_to_pose_node SHARED src/detection3_d_array_to_pose_node.cpp)
+rclcpp_components_register_nodes(detection3_d_array_to_pose_node "nvidia::isaac_ros::pose_proc::Detection3DArrayToPoseNode")
+set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::Detection3DArrayToPoseNode;$\n")
+set_target_properties(detection3_d_array_to_pose_node PROPERTIES
+ BUILD_WITH_INSTALL_RPATH TRUE
+ BUILD_RPATH_USE_ORIGIN TRUE
+ INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+# DistanceSelectorNode
+ament_auto_add_library(distance_selector_node SHARED src/distance_selector_node.cpp)
+rclcpp_components_register_nodes(distance_selector_node "nvidia::isaac_ros::pose_proc::DistanceSelectorNode")
+set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::DistanceSelectorNode;$\n")
+set_target_properties(distance_selector_node PROPERTIES
+ BUILD_WITH_INSTALL_RPATH TRUE
+ BUILD_RPATH_USE_ORIGIN TRUE
+ INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+# OutlierFilterNode
+ament_auto_add_library(outlier_filter_node SHARED src/outlier_filter_node.cpp)
+rclcpp_components_register_nodes(outlier_filter_node "nvidia::isaac_ros::pose_proc::OutlierFilterNode")
+set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::OutlierFilterNode;$\n")
+set_target_properties(outlier_filter_node PROPERTIES
+ BUILD_WITH_INSTALL_RPATH TRUE
+ BUILD_RPATH_USE_ORIGIN TRUE
+ INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+# PoseArrayToPoseNode
+ament_auto_add_library(pose_array_to_pose_node SHARED src/pose_array_to_pose_node.cpp)
+rclcpp_components_register_nodes(pose_array_to_pose_node "nvidia::isaac_ros::pose_proc::PoseArrayToPoseNode")
+set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::PoseArrayToPoseNode;$\n")
+set_target_properties(pose_array_to_pose_node PROPERTIES
+ BUILD_WITH_INSTALL_RPATH TRUE
+ BUILD_RPATH_USE_ORIGIN TRUE
+ INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+# StabilityFilterNode
+ament_auto_add_library(stability_filter_node SHARED src/stability_filter_node.cpp)
+rclcpp_components_register_nodes(stability_filter_node "nvidia::isaac_ros::pose_proc::StabilityFilterNode")
+set(node_plugins "${node_plugins}nvidia::isaac_ros::pose_proc::StabilityFilterNode;$