diff --git a/.flake8 b/.flake8 index 3aa32b5439..9b4a023d68 100644 --- a/.flake8 +++ b/.flake8 @@ -11,7 +11,8 @@ per-file-ignores=*/__init__.py:F401 # R505: Unnecessary elif after return statement # SIM102: Use a single if-statement instead of nested if-statements # SIM117: Merge with statements for context managers that have same scope. -ignore=E402,E501,W503,E203,D401,R504,R505,SIM102,SIM117 +# SIM118: Checks for key-existence checks against dict.keys() calls. +ignore=E402,E501,W503,E203,D401,R504,R505,SIM102,SIM117,SIM118 max-line-length = 120 max-complexity = 30 exclude=_*,.vscode,.git,docs/** diff --git a/docs/conf.py b/docs/conf.py index 4b56ab24c9..403a627ee4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -137,6 +137,7 @@ "omni.isaac.version", "omni.isaac.motion_generation", "omni.isaac.ui", + "omni.syntheticdata", "omni.timeline", "omni.ui", "gym", diff --git a/docs/source/how-to/save_camera_output.rst b/docs/source/how-to/save_camera_output.rst index ec0ab2c63b..594ea5121f 100644 --- a/docs/source/how-to/save_camera_output.rst +++ b/docs/source/how-to/save_camera_output.rst @@ -14,7 +14,7 @@ directory. .. literalinclude:: ../../../source/standalone/tutorials/04_sensors/run_usd_camera.py :language: python - :emphasize-lines: 137-139, 172-196, 200-204, 214-232 + :emphasize-lines: 171-179, 229-247, 251-264 :linenos: @@ -27,8 +27,8 @@ images in a numpy format. For more information on the basic writer, please check .. literalinclude:: ../../../source/standalone/tutorials/04_sensors/run_usd_camera.py :language: python - :lines: 137-139 - :dedent: + :start-at: rep_writer = rep.BasicWriter( + :end-before: # Camera positions, targets, orientations While stepping the simulator, the images can be saved to the defined folder. Since the BasicWriter only supports saving data using NumPy format, we first need to convert the PyTorch sensors to NumPy arrays before packing @@ -36,15 +36,15 @@ them in a dictionary. .. literalinclude:: ../../../source/standalone/tutorials/04_sensors/run_usd_camera.py :language: python - :lines: 172-192 - :dedent: + :start-at: # Save images from camera at camera_index + :end-at: single_cam_info = camera.data.info[camera_index] After this step, we can save the images using the BasicWriter. .. literalinclude:: ../../../source/standalone/tutorials/04_sensors/run_usd_camera.py :language: python - :lines: 193-196 - :dedent: + :start-at: # Pack data back into replicator format to save them using its writer + :end-at: rep_writer.write(rep_output) Projection into 3D Space @@ -53,18 +53,32 @@ Projection into 3D Space We include utilities to project the depth image into 3D Space. The re-projection operations are done using PyTorch operations which allows faster computation. +.. code-block:: python + + from omni.isaac.orbit.utils.math import transform_points, unproject_depth + + # Pointcloud in world frame + points_3d_cam = unproject_depth( + camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices + ) + + points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros) + +Alternately, we can use the :meth:`omni.isaac.orbit.sensors.camera.utils.create_pointcloud_from_depth` function +to create a point cloud from the depth image and transform it to the world frame. + .. literalinclude:: ../../../source/standalone/tutorials/04_sensors/run_usd_camera.py :language: python - :lines: 200-204 - :dedent: + :start-at: # Derive pointcloud from camera at camera_index + :end-before: # In the first few steps, things are still being instanced and Camera.data The resulting point cloud can be visualized using the :mod:`omni.isaac.debug_draw` extension from Isaac Sim. This makes it easy to visualize the point cloud in the 3D space. .. literalinclude:: ../../../source/standalone/tutorials/04_sensors/run_usd_camera.py :language: python - :lines: 214-232 - :dedent: + :start-at: # In the first few steps, things are still being instanced and Camera.data + :end-at: pc_markers.visualize(translations=pointcloud) Executing the script @@ -74,7 +88,11 @@ To run the accompanying script, execute the following command: .. code-block:: bash - ./orbit.sh -p source/standalone/tutorials/04_sensors/run_usd_camera.py --save --draw + # Usage with saving and drawing + ./orbit.sh -p source/standalone/tutorials/04_sensors/run_usd_camera.py --save --draw + + # Usage with saving only in headless mode + ./orbit.sh -p source/standalone/tutorials/04_sensors/run_usd_camera.py --save --headless --offscreen_render The simulation should start, and you can observe different objects falling down. An output folder will be created diff --git a/docs/source/refs/issues.rst b/docs/source/refs/issues.rst index 3aabdb1302..cf0f4a4054 100644 --- a/docs/source/refs/issues.rst +++ b/docs/source/refs/issues.rst @@ -58,8 +58,8 @@ For more information, please refer to the `PhysX Determinism documentation`_. Blank initial frames from the camera ------------------------------------ -When using the :class:`Camera` sensor in standalone scripts, the first few frames may be blank. -This is a known issue with the simulator where it needs a few steps to load the material +When using the :class:`omni.isaac.orbit.sensors.Camera` sensor in standalone scripts, the first few frames +may be blank. This is a known issue with the simulator where it needs a few steps to load the material textures properly and fill up the render targets. A hack to work around this is to add the following after initializing the camera sensor and setting @@ -67,7 +67,7 @@ its pose: .. code-block:: python - from omni.isaac.core.simulation_context import SimulationContext + from omni.isaac.orbit.sim import SimulationContext sim = SimulationContext.instance() diff --git a/docs/source/tutorials/01_assets/run_rigid_object.rst b/docs/source/tutorials/01_assets/run_rigid_object.rst index dc54851f4b..83e9afdcf3 100644 --- a/docs/source/tutorials/01_assets/run_rigid_object.rst +++ b/docs/source/tutorials/01_assets/run_rigid_object.rst @@ -125,7 +125,7 @@ inside the :class:`assets.RigidObject.data` attribute. This is done using the :m .. literalinclude:: ../../../../source/standalone/tutorials/01_assets/run_rigid_object.py :language: python :start-at: # update buffers - :end-at: cone_object.update(sim-dt) + :end-at: cone_object.update(sim_dt) The Code Execution diff --git a/source/extensions/omni.isaac.orbit/config/extension.toml b/source/extensions/omni.isaac.orbit/config/extension.toml index 05d0010bf4..0f3af7c410 100644 --- a/source/extensions/omni.isaac.orbit/config/extension.toml +++ b/source/extensions/omni.isaac.orbit/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.12.4" +version = "0.13.0" # Description title = "ORBIT framework for Robot Learning" diff --git a/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst b/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst index 47450929a0..14eb68a44e 100644 --- a/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst +++ b/source/extensions/omni.isaac.orbit/docs/CHANGELOG.rst @@ -1,6 +1,40 @@ Changelog --------- +0.13.0 (2024-03-12) +~~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added support for the following data types inside the :class:`omni.isaac.orbit.sensors.Camera` class: + ``instance_segmentation_fast`` and ``instance_id_segmentation_fast``. These are are GPU-supported annotations + and are faster than the regular annotations. + +Fixed +^^^^^ + +* Fixed handling of semantic filtering inside the :class:`omni.isaac.orbit.sensors.Camera` class. Earlier, + the annotator was given ``semanticTypes`` as an argument. However, with Isaac Sim 2023.1, the annotator + does not accept this argument. Instead the mapping needs to be set to the synthetic data interface directly. +* Fixed the return shape of colored images for segmentation data types inside the + :class:`omni.isaac.orbit.sensors.Camera` class. Earlier, the images were always returned as ``int32``. Now, + they are casted to ``uint8`` 4-channel array before returning if colorization is enabled for the annotation type. + +Removed +^^^^^^^ + +* Dropped support for ``instance_segmentation`` and ``instance_id_segmentation`` annotations in the + :class:`omni.isaac.orbit.sensors.Camera` class. Their "fast" counterparts should be used instead. +* Renamed the argument :attr:`omni.isaac.orbit.sensors.CameraCfg.semantic_types` to + :attr:`omni.isaac.orbit.sensors.CameraCfg.semantic_filter`. This is more aligned with Replicator's terminology + for semantic filter predicates. +* Replaced the argument :attr:`omni.isaac.orbit.sensors.CameraCfg.colorize` with separate colorized + arguments for each annotation type (:attr:`~omni.isaac.orbit.sensors.CameraCfg.colorize_instance_segmentation`, + :attr:`~omni.isaac.orbit.sensors.CameraCfg.colorize_instance_id_segmentation`, and + :attr:`~omni.isaac.orbit.sensors.CameraCfg.colorize_semantic_segmentation`). + + 0.12.4 (2024-03-11) ~~~~~~~~~~~~~~~~~~~ diff --git a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera.py b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera.py index 9693bbb1f6..896c4fdebc 100644 --- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera.py +++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera.py @@ -16,6 +16,7 @@ import omni.kit.commands import omni.usd from omni.isaac.core.prims import XFormPrimView +from omni.syntheticdata.scripts.SyntheticData import SyntheticData from pxr import UsdGeom import omni.isaac.orbit.sim as sim_utils @@ -44,27 +45,40 @@ class Camera(SensorBase): - ``"distance_to_image_plane"``: An image containing distances of 3D points from camera plane along camera's z-axis. - ``"normals"``: An image containing the local surface normal vectors at each pixel. - ``"motion_vectors"``: An image containing the motion vector data at each pixel. - - ``"instance_segmentation"``: The instance segmentation data. - ``"semantic_segmentation"``: The semantic segmentation data. + - ``"instance_segmentation_fast"``: The instance segmentation data. + - ``"instance_id_segmentation_fast"``: The instance id segmentation data. .. note:: Currently the following sensor types are not supported in a "view" format: + - ``"instance_segmentation"``: The instance segmentation data. Please use the fast counterparts instead. + - ``"instance_id_segmentation"``: The instance id segmentation data. Please use the fast counterparts instead. - ``"bounding_box_2d_tight"``: The tight 2D bounding box data (only contains non-occluded regions). + - ``"bounding_box_2d_tight_fast"``: The tight 2D bounding box data (only contains non-occluded regions). - ``"bounding_box_2d_loose"``: The loose 2D bounding box data (contains occluded regions). + - ``"bounding_box_2d_loose_fast"``: The loose 2D bounding box data (contains occluded regions). - ``"bounding_box_3d"``: The 3D view space bounding box data. + - ``"bounding_box_3d_fast"``: The 3D view space bounding box data. - In case you need to work with these sensor types, we recommend using the single camera implementation - from the :mod:`omni.isaac.orbit.compat.camera` module. - - .. _replicator extension: https://docs.omniverse.nvidia.com/prod_extensions/prod_extensions/ext_replicator/annotators_details.html#annotator-output + .. _replicator extension: https://docs.omniverse.nvidia.com/extensions/latest/ext_replicator/annotators_details.html#annotator-output .. _USDGeom Camera: https://graphics.pixar.com/usd/docs/api/class_usd_geom_camera.html """ cfg: CameraCfg """The configuration parameters.""" - UNSUPPORTED_TYPES: set[str] = {"bounding_box_2d_tight", "bounding_box_2d_loose", "bounding_box_3d"} + + UNSUPPORTED_TYPES: set[str] = { + "instance_id_segmentation", + "instance_segmentation", + "bounding_box_2d_tight", + "bounding_box_2d_loose", + "bounding_box_3d", + "bounding_box_2d_tight_fast", + "bounding_box_2d_loose_fast", + "bounding_box_3d_fast", + } """The set of sensor types that are not supported by the camera class.""" def __init__(self, cfg: CameraCfg): @@ -128,6 +142,10 @@ def __str__(self) -> str: return ( f"Camera @ '{self.cfg.prim_path}': \n" f"\tdata types : {self.data.output.sorted_keys} \n" + f"\tsemantic filter : {self.cfg.semantic_filter}\n" + f"\tcolorize semantic segm. : {self.cfg.colorize_semantic_segmentation}\n" + f"\tcolorize instance segm. : {self.cfg.colorize_instance_segmentation}\n" + f"\tcolorize instance id segm.: {self.cfg.colorize_instance_id_segmentation}\n" f"\tupdate period (s): {self.cfg.update_period}\n" f"\tshape : {self.image_shape}\n" f"\tnumber of sensors : {self._view.count}" @@ -358,11 +376,7 @@ def _initialize_impl(self): # Attach the sensor data types to render node self._render_product_paths: list[str] = list() self._rep_registry: dict[str, list[rep.annotators.Annotator]] = {name: list() for name in self.cfg.data_types} - # Resolve device name - if "cuda" in self._device: - device_name = self._device.split(":")[0] - else: - device_name = "cpu" + # Obtain current stage stage = omni.usd.get_context().get_stage() # Convert all encapsulated prims to Camera @@ -375,33 +389,53 @@ def _initialize_impl(self): # Add to list sensor_prim = UsdGeom.Camera(cam_prim) self._sensor_prims.append(sensor_prim) + # Get render product # From Isaac Sim 2023.1 onwards, render product is a HydraTexture so we need to extract the path render_prod_path = rep.create.render_product(cam_prim_path, resolution=(self.cfg.width, self.cfg.height)) if not isinstance(render_prod_path, str): render_prod_path = render_prod_path.path self._render_product_paths.append(render_prod_path) + + # Check if semantic types or semantic filter predicate is provided + if isinstance(self.cfg.semantic_filter, list): + semantic_filter_predicate = ":*; ".join(self.cfg.semantic_filter) + ":*" + elif isinstance(self.cfg.semantic_filter, str): + semantic_filter_predicate = self.cfg.semantic_filter + else: + raise ValueError(f"Semantic types must be a list or a string. Received: {self.cfg.semantic_filter}.") + # set the semantic filter predicate + # copied from rep.scripts.writes_default.basic_writer.py + SyntheticData.Get().set_instance_mapping_semantic_filter(semantic_filter_predicate) + # Iterate over each data type and create annotator # TODO: This will move out of the loop once Replicator supports multiple render products within a single # annotator, i.e.: rep_annotator.attach(self._render_product_paths) for name in self.cfg.data_types: - # init params -- Checked from rep.scripts.writes_default.basic_writer.py # note: we are verbose here to make it easier to understand the code. # if colorize is true, the data is mapped to colors and a uint8 4 channel image is returned. # if colorize is false, the data is returned as a uint32 image with ids as values. - if name in ["bounding_box_2d_tight", "bounding_box_2d_loose", "bounding_box_3d"]: - init_params = {"semanticTypes": self.cfg.semantic_types} - elif name in ["semantic_segmentation", "instance_segmentation"]: - init_params = {"semanticTypes": self.cfg.semantic_types, "colorize": self.cfg.colorize} - elif name in ["instance_id_segmentation"]: - init_params = {"colorize": self.cfg.colorize} + if name == "semantic_segmentation": + init_params = {"colorize": self.cfg.colorize_semantic_segmentation} + elif name == "instance_segmentation_fast": + init_params = {"colorize": self.cfg.colorize_instance_segmentation} + elif name == "instance_id_segmentation_fast": + init_params = {"colorize": self.cfg.colorize_instance_id_segmentation} else: init_params = None + + # Resolve device name + if "cuda" in self._device: + device_name = self._device.split(":")[0] + else: + device_name = "cpu" + # create annotator node rep_annotator = rep.AnnotatorRegistry.get_annotator(name, init_params, device=device_name) rep_annotator.attach(render_prod_path) # add to registry self._rep_registry[name].append(rep_annotator) + # Create internal buffers self._create_buffers() @@ -426,7 +460,7 @@ def _update_buffers_impl(self, env_ids: Sequence[int]): # get the output output = annotators[index].get_data() # process the output - data, info = self._process_annotator_output(output) + data, info = self._process_annotator_output(name, output) # add data to output self._data.output[name][index] = data # add info to output @@ -442,12 +476,18 @@ def _check_supported_data_types(self, cfg: CameraCfg): # reason: these use np structured data types which we can't yet convert to torch tensor common_elements = set(cfg.data_types) & Camera.UNSUPPORTED_TYPES if common_elements: + # provide alternative fast counterparts + fast_common_elements = [] + for item in common_elements: + if "instance_segmentation" in item or "instance_id_segmentation" in item: + fast_common_elements.append(item + "_fast") + # raise error raise ValueError( f"Camera class does not support the following sensor types: {common_elements}." "\n\tThis is because these sensor types output numpy structured data types which" "can't be converted to torch tensors easily." - "\n\tHint: If you need to work with these sensor types, we recommend using the single camera" - " implementation from the omni.isaac.orbit.compat.camera module." + "\n\tHint: If you need to work with these sensor types, we recommend using their fast counterparts." + f"\n\t\tFast counterparts: {fast_common_elements}" ) def _create_buffers(self): @@ -530,7 +570,7 @@ def _create_annotator_data(self): # get the output output = annotators[index].get_data() # process the output - data, info = self._process_annotator_output(output) + data, info = self._process_annotator_output(name, output) # append the data data_all_cameras.append(data) # store the info @@ -538,7 +578,7 @@ def _create_annotator_data(self): # concatenate the data along the batch dimension self._data.output[name] = torch.stack(data_all_cameras, dim=0) - def _process_annotator_output(self, output: Any) -> tuple[torch.tensor, dict]: + def _process_annotator_output(self, name: str, output: Any) -> tuple[torch.tensor, dict | None]: """Process the annotator output. This function is called after the data has been collected from all the cameras. @@ -552,6 +592,27 @@ def _process_annotator_output(self, output: Any) -> tuple[torch.tensor, dict]: info = None # convert data into torch tensor data = convert_to_torch(data, device=self.device) + + # process data for different segmentation types + # Note: Replicator returns raw buffers of dtype int32 for segmentation types + # so we need to convert them to uint8 4 channel images for colorized types + height, width = self.image_shape + if name == "semantic_segmentation": + if self.cfg.colorize_semantic_segmentation: + data = data.view(torch.uint8).reshape(height, width, -1) + else: + data = data.view(height, width) + elif name == "instance_segmentation_fast": + if self.cfg.colorize_instance_segmentation: + data = data.view(torch.uint8).reshape(height, width, -1) + else: + data = data.view(height, width) + elif name == "instance_id_segmentation_fast": + if self.cfg.colorize_instance_id_segmentation: + data = data.view(torch.uint8).reshape(height, width, -1) + else: + data = data.view(height, width) + # return the data and info return data, info diff --git a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera_cfg.py b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera_cfg.py index 22da1b5a81..6f9256d846 100644 --- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera_cfg.py +++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/camera/camera_cfg.py @@ -56,7 +56,10 @@ class OffsetCfg: """ data_types: list[str] = ["rgb"] - """List of sensor names/types to enable for the camera. Defaults to ["rgb"].""" + """List of sensor names/types to enable for the camera. Defaults to ["rgb"]. + + Please refer to the :class:`Camera` class for a list of available data types. + """ width: int = MISSING """Width of the image in pixels.""" @@ -64,22 +67,44 @@ class OffsetCfg: height: int = MISSING """Height of the image in pixels.""" - semantic_types: list[str] = ["class"] - """List of allowed semantic types the types. Defaults to ["class"]. + semantic_filter: str | list[str] = "*:*" + """A string or a list specifying a semantic filter predicate. Defaults to ``"*:*"``. + + If a string, it should be a disjunctive normal form of (semantic type, labels). For examples: + + * ``"typeA : labelA & !labelB | labelC , typeB: labelA ; typeC: labelE"``: + All prims with semantic type "typeA" and label "labelA" but not "labelB" or with label "labelC". + Also, all prims with semantic type "typeB" and label "labelA", or with semantic type "typeC" and label "labelE". + * ``"typeA : * ; * : labelA"``: All prims with semantic type "typeA" or with label "labelA" + + If a list of strings, each string should be a semantic type. The segmentation for prims with + semantics of the specified types will be retrieved. For example, if the list is ["class"], only + the segmentation for prims with semantics of type "class" will be retrieved. + + .. seealso:: - For example, if semantic types is [“class”], only the bounding boxes for prims with semantics of - type “class” will be retrieved. + For more information on the semantics filter, see the documentation on `Replicator Semantics Schema Editor`_. - More information available at: - https://docs.omniverse.nvidia.com/extensions/latest/ext_replicator/semantics_schema_editor.html#semantics-filtering + .. _Replicator Semantics Schema Editor: https://docs.omniverse.nvidia.com/extensions/latest/ext_replicator/semantics_schema_editor.html#semantics-filtering """ - colorize: bool = False - """whether to output colorized semantic information or non-colorized one. Defaults to False. + colorize_semantic_segmentation: bool = True + """Whether to colorize the semantic segmentation images. Defaults to True. + + If True, semantic segmentation is converted to an image where semantic IDs are mapped to colors + and returned as a ``uint8`` 4-channel array. If False, the output is returned as a ``int32`` array. + """ + + colorize_instance_id_segmentation: bool = True + """Whether to colorize the instance ID segmentation images. Defaults to True. + + If True, instance id segmentation is converted to an image where instance IDs are mapped to colors. + and returned as a ``uint8`` 4-channel array. If False, the output is returned as a ``int32`` array. + """ - If True, the semantic images will be a 2D array of RGBA values, where each pixel is colored according to - the semantic type. Accordingly, the information output will contain mapping from color to semantic labels. + colorize_instance_segmentation: bool = True + """Whether to colorize the instance ID segmentation images. Defaults to True. - If False, the semantic images will be a 2D array of integers, where each pixel is an integer representing - the semantic ID. Accordingly, the information output will contain mapping from semantic ID to semantic labels. + If True, instance segmentation is converted to an image where instance IDs are mapped to colors. + and returned as a ``uint8`` 4-channel array. If False, the output is returned as a ``int32`` array. """ diff --git a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera.py b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera.py index abe7003512..a890c373d1 100644 --- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera.py +++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sensors/ray_caster/ray_caster_camera.py @@ -50,13 +50,18 @@ class RayCasterCamera(RayCaster): UNSUPPORTED_TYPES: ClassVar[set[str]] = { "rgb", "instance_id_segmentation", + "instance_id_segmentation_fast", "instance_segmentation", + "instance_segmentation_fast", "semantic_segmentation", "skeleton_data", "motion_vectors", "bounding_box_2d_tight", + "bounding_box_2d_tight_fast", "bounding_box_2d_loose", + "bounding_box_2d_loose_fast", "bounding_box_3d", + "bounding_box_3d_fast", } """A set of sensor types that are not supported by the ray-caster camera.""" diff --git a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sim/spawners/spawner_cfg.py b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sim/spawners/spawner_cfg.py index 6bcb659e4b..c82ae49bfb 100644 --- a/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sim/spawners/spawner_cfg.py +++ b/source/extensions/omni.isaac.orbit/omni/isaac/orbit/sim/spawners/spawner_cfg.py @@ -51,7 +51,12 @@ class SpawnerCfg: spawned asset in the class avocado and the color green, the semantic tags would be ``[("class", "avocado"), ("color", "green")]``. - .. _Replicator Semantic: https://docs.omniverse.nvidia.com/extensions/latest/ext_replicator/semantics_schema_editor.html + .. seealso:: + + For more information on the semantics filter, see the documentation for the `semantics schema editor`_. + + .. _semantics schema editor: https://docs.omniverse.nvidia.com/extensions/latest/ext_replicator/semantics_schema_editor.html#semantics-filtering + """ copy_from_source: bool = True diff --git a/source/extensions/omni.isaac.orbit/test/sensors/test_camera.py b/source/extensions/omni.isaac.orbit/test/sensors/test_camera.py index f2af7f7d2c..4c0fa316cd 100644 --- a/source/extensions/omni.isaac.orbit/test/sensors/test_camera.py +++ b/source/extensions/omni.isaac.orbit/test/sensors/test_camera.py @@ -60,7 +60,6 @@ def setUp(self): spawn=sim_utils.PinholeCameraCfg( focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1.0e5) ), - colorize=False, ) # Create a new stage stage_utils.create_new_stage() @@ -123,22 +122,6 @@ def test_camera_init(self): for im_data in camera.data.output.to_dict().values(): self.assertTrue(im_data.shape == (1, self.camera_cfg.height, self.camera_cfg.width)) - def test_camera_resolution(self): - """Test camera resolution is correctly set.""" - # Create camera - camera = Camera(self.camera_cfg) - # Play sim - self.sim.reset() - # Simulate for a few steps - # note: This is a workaround to ensure that the textures are loaded. - # Check "Known Issues" section in the documentation for more details. - for _ in range(5): - self.sim.step() - camera.update(self.dt) - # access image data and compare shapes - for im_data in camera.data.output.to_dict().values(): - self.assertTrue(im_data.shape == (1, self.camera_cfg.height, self.camera_cfg.width)) - def test_camera_init_offset(self): """Test camera initialization with offset using different conventions.""" # define the same offset in all conventions @@ -291,6 +274,106 @@ def test_intrinsic_matrix(self): self.assertAlmostEqual(rs_intrinsic_matrix[0, 0], K[0, 0], 4) # self.assertAlmostEqual(rs_intrinsic_matrix[1, 1], K[1, 1], 4) + def test_camera_resolution_all_colorize(self): + """Test camera resolution is correctly set for all types with colorization enabled.""" + # Add all types + camera_cfg = copy.deepcopy(self.camera_cfg) + camera_cfg.data_types = [ + "rgb", + "distance_to_image_plane", + "normals", + "semantic_segmentation", + "instance_segmentation_fast", + "instance_id_segmentation_fast", + ] + camera_cfg.colorize_instance_id_segmentation = True + camera_cfg.colorize_instance_segmentation = True + camera_cfg.colorize_semantic_segmentation = True + # Create camera + camera = Camera(camera_cfg) + + # Play sim + self.sim.reset() + # Simulate for a few steps + # note: This is a workaround to ensure that the textures are loaded. + # Check "Known Issues" section in the documentation for more details. + for _ in range(12): + self.sim.step() + camera.update(self.dt) + + # expected sizes + hw_3c_shape = (1, camera_cfg.height, camera_cfg.width, 3) + hw_1c_shape = (1, camera_cfg.height, camera_cfg.width) + # access image data and compare shapes + output = camera.data.output + self.assertEqual(output["rgb"].shape, hw_3c_shape) + self.assertEqual(output["distance_to_image_plane"].shape, hw_1c_shape) + self.assertEqual(output["normals"].shape, hw_3c_shape) + # FIXME: No idea why it does not work here. The raw buffers are of type int64 than int32 -> need to investigate + # It works fine when run_usd_camera.py tutorial is run. + # self.assertEqual(output["semantic_segmentation"].shape, hw_3c_shape) + # self.assertEqual(output["instance_segmentation_fast"].shape, hw_3c_shape) + # self.assertEqual(output["instance_id_segmentation_fast"].shape, hw_3c_shape) + + # access image data and compare dtype + output = camera.data.output + self.assertEqual(output["rgb"].dtype, torch.uint8) + self.assertEqual(output["distance_to_image_plane"].dtype, torch.float) + self.assertEqual(output["normals"].dtype, torch.float) + self.assertEqual(output["semantic_segmentation"].dtype, torch.uint8) + self.assertEqual(output["instance_segmentation_fast"].dtype, torch.uint8) + self.assertEqual(output["instance_id_segmentation_fast"].dtype, torch.uint8) + + def test_camera_resolution_no_colorize(self): + """Test camera resolution is correctly set for all types with no colorization enabled.""" + # Add all types + camera_cfg = copy.deepcopy(self.camera_cfg) + camera_cfg.data_types = [ + "rgb", + "distance_to_image_plane", + "normals", + "semantic_segmentation", + "instance_segmentation_fast", + "instance_id_segmentation_fast", + ] + camera_cfg.colorize_instance_id_segmentation = False + camera_cfg.colorize_instance_segmentation = False + camera_cfg.colorize_semantic_segmentation = False + # Create camera + camera = Camera(camera_cfg) + + # Play sim + self.sim.reset() + # Simulate for a few steps + # note: This is a workaround to ensure that the textures are loaded. + # Check "Known Issues" section in the documentation for more details. + for _ in range(12): + self.sim.step() + camera.update(self.dt) + + # expected sizes + hw_3c_shape = (1, camera_cfg.height, camera_cfg.width, 3) + hw_1c_shape = (1, camera_cfg.height, camera_cfg.width) + # access image data and compare shapes + output = camera.data.output + self.assertEqual(output["rgb"].shape, hw_3c_shape) + self.assertEqual(output["distance_to_image_plane"].shape, hw_1c_shape) + self.assertEqual(output["normals"].shape, hw_3c_shape) + self.assertEqual(output["semantic_segmentation"].shape, hw_1c_shape) + self.assertEqual(output["instance_segmentation_fast"].shape, hw_1c_shape) + self.assertEqual(output["instance_id_segmentation_fast"].shape, hw_1c_shape) + + # access image data and compare dtype + output = camera.data.output + self.assertEqual(output["rgb"].dtype, torch.uint8) + self.assertEqual(output["distance_to_image_plane"].dtype, torch.float) + self.assertEqual(output["normals"].dtype, torch.float) + # FIXME: No idea why it does not work here. The raw buffers are of type int64 than int32 -> need to investigate + # It works fine when run_usd_camera.py tutorial is run. + # self.assertEqual(output["semantic_segmentation"].dtype, torch.int32) + # self.assertEqual(output["instance_segmentation_fast"].dtype, torch.int32) + # self.assertEqual(output["instance_id_segmentation_fast"].dtype, torch.int32) + def test_throughput(self): """Checks that the single camera gets created properly with a rig.""" # Create directory temp dir to dump the results @@ -354,7 +437,7 @@ def _populate_scene(): cfg.func("/World/Light/WhiteSphere", cfg, translation=(-4.5, 3.5, 10.0)) # Random objects random.seed(0) - for i in range(8): + for i in range(10): # sample random position position = np.random.rand(3) - np.asarray([0.05, 0.05, -1.0]) position *= np.asarray([1.5, 1.5, 0.5]) diff --git a/source/standalone/tutorials/04_sensors/run_usd_camera.py b/source/standalone/tutorials/04_sensors/run_usd_camera.py index 8915737aab..a2de4e4f9d 100644 --- a/source/standalone/tutorials/04_sensors/run_usd_camera.py +++ b/source/standalone/tutorials/04_sensors/run_usd_camera.py @@ -11,9 +11,12 @@ .. code-block:: bash - # Usage + # Usage with GUI ./orbit.sh -p source/standalone/tutorials/04_sensors/run_usd_camera.py + # Usage with headless + ./orbit.sh -p source/standalone/tutorials/04_sensors/run_usd_camera.py --headless --offscreen_render + """ from __future__ import annotations @@ -27,13 +30,32 @@ # add argparse arguments parser = argparse.ArgumentParser(description="This script demonstrates how to use the camera sensor.") parser.add_argument("--cpu", action="store_true", default=False, help="Use CPU device for camera output.") -parser.add_argument("--draw", action="store_true", default=False, help="Draw the obtained pointcloud on viewport.") -parser.add_argument("--save", action="store_true", default=False, help="Save the obtained data to disk.") +parser.add_argument( + "--draw", + action="store_true", + default=False, + help="Draw the obtained pointcloud on viewport from the perspective of camera at index specified by ``--camera_id``.", +) +parser.add_argument( + "--save", + action="store_true", + default=False, + help="Draw the obtained pointcloud on viewport from the perspective of camera at index specified by ``--camera_id``.", +) +parser.add_argument( + "--camera_id", + type=int, + choices={0, 1}, + default=0, + help=( + "The camera ID to use for displaying points or saving the camera data. Default is 0." + " The viewport will always initialize with the perspective of camera 0." + ), +) # append AppLauncher cli args AppLauncher.add_app_launcher_args(parser) # parse the arguments args_cli = parser.parse_args() - # launch omniverse app app_launcher = AppLauncher(args_cli) simulation_app = app_launcher.app @@ -46,21 +68,21 @@ import torch import omni.isaac.core.utils.prims as prim_utils -import omni.isaac.debug_draw._debug_draw as omni_debug_draw import omni.replicator.core as rep -from omni.isaac.core.prims import RigidPrim -from pxr import Gf, UsdGeom import omni.isaac.orbit.sim as sim_utils +from omni.isaac.orbit.assets import RigidObject, RigidObjectCfg +from omni.isaac.orbit.markers import VisualizationMarkers +from omni.isaac.orbit.markers.config import RAY_CASTER_MARKER_CFG from omni.isaac.orbit.sensors.camera import Camera, CameraCfg +from omni.isaac.orbit.sensors.camera.utils import create_pointcloud_from_depth from omni.isaac.orbit.utils import convert_dict_to_backend -from omni.isaac.orbit.utils.math import project_points, transform_points, unproject_depth def define_sensor() -> Camera: """Defines the camera sensor to add to the scene.""" # Setup camera sensor - # In contras to the ray-cast camera, we spawn the prim at these locations. + # In contrast to the ray-cast camera, we spawn the prim at these locations. # This means the camera sensor will be attached to these prims. prim_utils.create_prim("/World/Origin_00", "Xform") prim_utils.create_prim("/World/Origin_01", "Xform") @@ -69,7 +91,17 @@ def define_sensor() -> Camera: update_period=0, height=480, width=640, - data_types=["rgb", "distance_to_image_plane", "normals"], + data_types=[ + "rgb", + "distance_to_image_plane", + "normals", + "semantic_segmentation", + "instance_segmentation_fast", + "instance_id_segmentation_fast", + ], + colorize_semantic_segmentation=True, + colorize_instance_id_segmentation=True, + colorize_instance_segmentation=True, spawn=sim_utils.PinholeCameraCfg( focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 1.0e5) ), @@ -80,7 +112,7 @@ def define_sensor() -> Camera: return camera -def design_scene(): +def design_scene() -> dict: """Design the scene.""" # Populate scene # -- Ground-plane @@ -90,6 +122,9 @@ def design_scene(): cfg = sim_utils.DistantLightCfg(intensity=3000.0, color=(0.75, 0.75, 0.75)) cfg.func("/World/Light", cfg) + # Create a dictionary for the scene entities + scene_entities = {} + # Xform to hold objects prim_utils.create_prim("/World/Objects", "Xform") # Random objects @@ -97,29 +132,36 @@ def design_scene(): # sample random position position = np.random.rand(3) - np.asarray([0.05, 0.05, -1.0]) position *= np.asarray([1.5, 1.5, 0.5]) - # create prim - prim_type = random.choice(["Cube", "Sphere", "Cylinder"]) - _ = prim_utils.create_prim( - f"/World/Objects/Obj_{i:02d}", - prim_type, - translation=position, - scale=(0.25, 0.25, 0.25), - semantic_label=prim_type, + # sample random color + color = (random.random(), random.random(), random.random()) + # choose random prim type + prim_type = random.choice(["Cube", "Cone", "Cylinder"]) + common_properties = { + "rigid_props": sim_utils.RigidBodyPropertiesCfg(), + "mass_props": sim_utils.MassPropertiesCfg(mass=5.0), + "collision_props": sim_utils.CollisionPropertiesCfg(), + "visual_material": sim_utils.PreviewSurfaceCfg(diffuse_color=color, metallic=0.5), + "semantic_tags": [("class", prim_type)], + } + if prim_type == "Cube": + shape_cfg = sim_utils.CuboidCfg(size=(0.25, 0.25, 0.25), **common_properties) + elif prim_type == "Cone": + shape_cfg = sim_utils.ConeCfg(radius=0.1, height=0.25, **common_properties) + elif prim_type == "Cylinder": + shape_cfg = sim_utils.CylinderCfg(radius=0.25, height=0.25, **common_properties) + # Rigid Object + obj_cfg = RigidObjectCfg( + prim_path=f"/World/Objects/Obj_{i:02d}", + spawn=shape_cfg, + init_state=RigidObjectCfg.InitialStateCfg(pos=position), ) - # add rigid properties - rigid_obj = RigidPrim(f"/World/Objects/Obj_{i:02d}", mass=5.0) - # cast to geom prim - geom_prim = getattr(UsdGeom, prim_type)(rigid_obj.prim) - # set random color - color = Gf.Vec3f(random.random(), random.random(), random.random()) - geom_prim.CreateDisplayColorAttr() - geom_prim.GetDisplayColorAttr().Set([color]) + scene_entities[f"rigid_object{i}"] = RigidObject(cfg=obj_cfg) # Sensors camera = define_sensor() # return the scene information - scene_entities = {"camera": camera} + scene_entities["camera"] = camera return scene_entities @@ -130,26 +172,36 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict): # Create replicator writer output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output", "camera") - rep_writer = rep.BasicWriter(output_dir=output_dir, frame_padding=3) + rep_writer = rep.BasicWriter( + output_dir=output_dir, + frame_padding=0, + colorize_instance_id_segmentation=camera.cfg.colorize_instance_id_segmentation, + colorize_instance_segmentation=camera.cfg.colorize_instance_segmentation, + colorize_semantic_segmentation=camera.cfg.colorize_semantic_segmentation, + ) - # Acquire draw interface - draw_interface = omni_debug_draw.acquire_debug_draw_interface() + # Camera positions, targets, orientations + camera_positions = torch.tensor([[2.5, 2.5, 2.5], [-2.5, -2.5, 2.5]], device=sim.device) + camera_targets = torch.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device=sim.device) + # These orientations are in ROS-convention, and will position the cameras to view the origin + camera_orientations = torch.tensor( # noqa: F841 + [[-0.1759, 0.3399, 0.8205, -0.4247], [-0.4247, 0.8205, -0.3399, 0.1759]], device=sim.device + ) # Set pose: There are two ways to set the pose of the camera. # -- Option-1: Set pose using view - eyes = torch.tensor([[2.5, 2.5, 2.5], [-2.5, -2.5, 2.5]], device=sim.device) - targets = torch.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], device=sim.device) - camera.set_world_poses_from_view(eyes, targets) + camera.set_world_poses_from_view(camera_positions, camera_targets) # -- Option-2: Set pose using ROS - # position = torch.tensor([[2.5, 2.5, 2.5]], device=sim.device) - # orientation = torch.tensor([[-0.17591989, 0.33985114, 0.82047325, -0.42470819]], device=sim.device) - # camera.set_world_poses(position, orientation, env_ids=[0], convention="ros") - - # Simulate for a few steps - # note: This is a workaround to ensure that the textures are loaded. - # Check "Known Issues" section in the documentation for more details. - for _ in range(3): - sim.step() + # camera.set_world_poses(camera_positions, camera_orientations, convention="ros") + + # Index of the camera to use for visualization and saving + camera_index = args_cli.camera_id + + # Create the markers for the --draw option outside of is_running() loop + if sim.has_gui() and args_cli.draw: + cfg = RAY_CASTER_MARKER_CFG.replace(prim_path="/Visuals/CameraPointCloud") + cfg.markers["hit"].radius = 0.002 + pc_markers = VisualizationMarkers(cfg) # Simulate physics while simulation_app.is_running(): @@ -160,23 +212,27 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict): # Print camera info print(camera) - print("Received shape of rgb image: ", camera.data.output["rgb"].shape) - print("Received shape of depth image: ", camera.data.output["distance_to_image_plane"].shape) + if "rgb" in camera.data.output.keys(): + print("Received shape of rgb image : ", camera.data.output["rgb"].shape) + if "distance_to_image_plane" in camera.data.output.keys(): + print("Received shape of depth image : ", camera.data.output["distance_to_image_plane"].shape) + if "normals" in camera.data.output.keys(): + print("Received shape of normals : ", camera.data.output["normals"].shape) + if "semantic_segmentation" in camera.data.output.keys(): + print("Received shape of semantic segm. : ", camera.data.output["semantic_segmentation"].shape) + if "instance_segmentation_fast" in camera.data.output.keys(): + print("Received shape of instance segm. : ", camera.data.output["instance_segmentation_fast"].shape) + if "instance_id_segmentation_fast" in camera.data.output.keys(): + print("Received shape of instance id segm.: ", camera.data.output["instance_id_segmentation_fast"].shape) print("-------------------------------") # Extract camera data if args_cli.save: - # Save images from camera 1 - camera_index = 1 + # Save images from camera at camera_index # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy. - if sim.backend == "torch": - # tensordict allows easy indexing of tensors in the dictionary - single_cam_data = convert_dict_to_backend(camera.data.output[camera_index], backend="numpy") - else: - # for numpy, we need to manually index the data - single_cam_data = dict() - for key, value in camera.data.output.items(): - single_cam_data[key] = value[camera_index] + # tensordict allows easy indexing of tensors in the dictionary + single_cam_data = convert_dict_to_backend(camera.data.output[camera_index], backend="numpy") + # Extract the other information single_cam_info = camera.data.info[camera_index] @@ -192,41 +248,22 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict): rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]} rep_writer.write(rep_output) - # Draw pointcloud - if not args_cli.headless and args_cli.draw: - # Pointcloud in world frame - points_3d_cam = unproject_depth( - camera.data.output["distance_to_image_plane"], camera.data.intrinsic_matrices + # Draw pointcloud if there is a GUI and --draw has been passed + if sim.has_gui() and args_cli.draw and "distance_to_image_plane" in camera.data.output.keys(): + # Derive pointcloud from camera at camera_index + pointcloud = create_pointcloud_from_depth( + intrinsic_matrix=camera.data.intrinsic_matrices[camera_index], + depth=camera.data.output[camera_index]["distance_to_image_plane"], + position=camera.data.pos_w[camera_index], + orientation=camera.data.quat_w_ros[camera_index], + device=sim.device, ) - points_3d_world = transform_points(points_3d_cam, camera.data.pos_w, camera.data.quat_w_ros) - - # Check methods are valid - im_height, im_width = camera.image_shape - # -- project points to (u, v, d) - reproj_points = project_points(points_3d_cam, camera.data.intrinsic_matrices) - reproj_depths = reproj_points[..., -1].view(-1, im_width, im_height).transpose_(1, 2) - sim_depths = camera.data.output["distance_to_image_plane"].squeeze(-1) - torch.testing.assert_allclose(reproj_depths, sim_depths) - - # Convert to numpy for visualization - if not isinstance(points_3d_world, np.ndarray): - points_3d_world = points_3d_world.cpu().numpy() - # Clear any existing points - draw_interface.clear_points() - # Obtain drawing settings - num_batch = points_3d_world.shape[0] - num_points = points_3d_world.shape[1] - points_size = [1.25] * num_points - # Fix random seed - random.seed(0) - # Visualize the points - for index in range(num_batch): - # generate random color - color = [random.random() for _ in range(3)] - color += [1.0] - # plain color for points - points_color = [color] * num_points - draw_interface.draw_points(points_3d_world[index].tolist(), points_color, points_size) + + # In the first few steps, things are still being instanced and Camera.data + # can be empty. If we attempt to visualize an empty pointcloud it will crash + # the sim, so we check that the pointcloud is not empty. + if pointcloud.size()[0] > 0: + pc_markers.visualize(translations=pointcloud) def main():