From 82f308cbf5e726a5bfbcd3e5e71fcdff6caa8085 Mon Sep 17 00:00:00 2001
From: Vincent Moens <vincentmoens@gmail.com>
Date: Sun, 30 Apr 2023 07:56:06 +0100
Subject: [PATCH] [Doc] Fix doc rendering (#1112)

---
 docs/source/index.rst                      |  4 +-
 docs/source/reference/envs.rst             |  8 +-
 docs/source/reference/modules.rst          |  7 +-
 knowledge_base/HABITAT.md                  |  2 +-
 knowledge_base/MUJOCO_INSTALLATION.md      | 97 +++++++++++-----------
 torchrl/modules/__init__.py                |  1 +
 tutorials/sphinx-tutorials/coding_ddpg.py  | 10 +--
 tutorials/sphinx-tutorials/coding_ppo.py   |  2 +-
 tutorials/sphinx-tutorials/dqn_with_rnn.py |  9 +-
 tutorials/sphinx-tutorials/pendulum.py     |  6 +-
 tutorials/sphinx-tutorials/torchrl_envs.py |  1 +
 11 files changed, 72 insertions(+), 75 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 829c45913c1..97ca04cea40 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -38,8 +38,6 @@ Basics
 
    tutorials/coding_ppo
    tutorials/pendulum
-   tutorials/tensordict_tutorial
-   tutorials/tensordict_module
    tutorials/torchrl_demo
 
 Intermediate
@@ -48,7 +46,7 @@ Intermediate
 .. toctree::
    :maxdepth: 1
 
-   tutorials/torch_envs
+   tutorials/torchrl_envs
    tutorials/pretrained_models
    tutorials/dqn_with_rnn
 
diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
index 3e524618795..51572307165 100644
--- a/docs/source/reference/envs.rst
+++ b/docs/source/reference/envs.rst
@@ -237,7 +237,7 @@ It is now apparent that this can bring a significant speedup depending on the ki
 operations that is to be computed.
 
 A great advantage of environment wrappers is that one can consult the environment up to that wrapper.
-The same can be achieved with TorchRL transformed environments: the :doc:`parent` attribute will
+The same can be achieved with TorchRL transformed environments: the ``parent`` attribute will
 return a new :class:`TransformedEnv` with all the transforms up to the transform of interest.
 Re-using the example above:
 
@@ -248,15 +248,15 @@ Re-using the example above:
 
 
 Transformed environment can be used with vectorized environments.
-Since each transform uses a :doc:`"in_keys"`/:doc:`"out_keys"` set of keyword argument, it is
+Since each transform uses a ``"in_keys"``/``"out_keys"`` set of keyword argument, it is
 also easy to root the transform graph to each component of the observation data (e.g.
 pixels or states etc).
 
-Transforms also have an :doc:`inv` method that is called before
+Transforms also have an ``inv`` method that is called before
 the action is applied in reverse order over the composed transform chain:
 this allows to apply transforms to data in the environment before the action is taken
 in the environment. The keys to be included in this inverse transform are passed through the
-:doc:`"in_keys_inv"` keyword argument:
+``"in_keys_inv"`` keyword argument:
 
 .. code-block::
    :caption: Inverse transform
diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst
index 1c4cdad343d..c856b78b443 100644
--- a/docs/source/reference/modules.rst
+++ b/docs/source/reference/modules.rst
@@ -305,6 +305,8 @@ Regular modules
     MLP
     ConvNet
     LSTMNet
+    SqueezeLayer
+    Squeeze2dLayer
 
 Algorithm-specific modules
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -391,8 +393,3 @@ Utils
     mappings
     inv_softplus
     biased_softplus
-
-.. currentmodule:: torchrl.modules.models.utils
-
-    SqueezeLayer
-    Squeeze2dLayer
diff --git a/knowledge_base/HABITAT.md b/knowledge_base/HABITAT.md
index a2dca3e4727..46b896a0667 100644
--- a/knowledge_base/HABITAT.md
+++ b/knowledge_base/HABITAT.md
@@ -52,7 +52,7 @@ print([_env for _env in HabitatEnv.available_envs if _env.startswith("Habitat")]
   ```
 
 2. `ImportError: /usr/lib/x86_64-linux-gnu/libOpenGL.so.0: undefined symbol: _glapi_tls_Current`
-  **Solution**: as in [MUJOCO]([url](https://github.com/pytorch/rl/blob/main/knowledge_base/MUJOCO_INSTALLATION.md)) debug, Link conda to the right libOpenGL.so file (replace /path/to/conda and mujoco_env with the proper paths and names):
+  **Solution**: as in [MUJOCO](https://github.com/pytorch/rl/blob/main/knowledge_base/MUJOCO_INSTALLATION.md) debug, Link conda to the right libOpenGL.so file (replace /path/to/conda and mujoco_env with the proper paths and names):
   ```shell
   conda install -y -c conda-forge libglvnd-glx-cos7-x86_64 --force-reinstall
   conda install -y -c conda-forge xvfbwrapper --force-reinstall
diff --git a/knowledge_base/MUJOCO_INSTALLATION.md b/knowledge_base/MUJOCO_INSTALLATION.md
index af7ad8fb299..e292998bcd8 100644
--- a/knowledge_base/MUJOCO_INSTALLATION.md
+++ b/knowledge_base/MUJOCO_INSTALLATION.md
@@ -119,7 +119,18 @@ $ python
 ```
 This should trigger the building pipeline.
 
-**Known issues**
+
+**Sanity check**
+
+To check that your mujoco-py has been built against the GPU, run
+```python
+>>> import mujoco_py
+>>> print(mujoco_py.cymj) # check it has the tag: linuxgpuextensionbuilder
+```
+The result should contain a filename with the tag `linuxgpuextensionbuilder`.
+
+## Common Issues during import or when rendering Mujoco Environments
+
 The above setup will most likely cause some problems. We give a list of known 
 issues when running `import mujoco_py` and some troubleshooting for each of them:
 
@@ -181,18 +192,8 @@ issues when running `import mujoco_py` and some troubleshooting for each of them
     _Solution_: This can usually be sovled by setting EGL as your mujoco_gl backend: `MUJOCO_GL=egl python myscript.py`
 
 
-**Sanity check**
-
-To check that your mujoco-py has been built against the GPU, run
-```python
->>> import mujoco_py
->>> print(mujoco_py.cymj) # check it has the tag: linuxgpuextensionbuilder
-```
-The result should contain a filename with the tag `linuxgpuextensionbuilder`.
-
-## Common Issues when rendering Mujoco Environments
 
-1. RuntimeError with error stack like this when running jobs using schedulers like slurm:
+7. RuntimeError with error stack like this when running jobs using schedulers like slurm:
 
 ```
     File "mjrendercontext.pyx", line 46, in mujoco_py.cymj.MjRenderContext.__init__
@@ -209,59 +210,59 @@ RuntimeError: Failed to initialize OpenGL
   This can be worked around by setting the `GPUS` environment variable to the 
   global device id. For slurm, it can be obtained using `SLURM_STEP_GPUS` enviroment variable.
 
-2. Rendered images are completely black.
+8. Rendered images are completely black.
 
    _Solution_: Make sure to call `env.render()` before reading the pixels.
 
-3. `patchelf` dependency is missing.
+9. `patchelf` dependency is missing.
 
    _Solution_: Install using `conda install patchelf` or `pip install patchelf`
 
-4. Errors like "Onscreen rendering needs 101 device"
+10. Errors like "Onscreen rendering needs 101 device"
 
-   _Solution_: Make sure to set `DISPLAY` environment variable correctly.
+    _Solution_: Make sure to set `DISPLAY` environment variable correctly.
 
-5. `ImportError: Cannot initialize a headless EGL display.`
+11. `ImportError: Cannot initialize a headless EGL display.`
 
-   _Solution_: Make sure you have installed mujoco and all its dependencies (see instructions above).
-   Make sure you have set the `MUJOCO_GL=egl`.
-   Make sure you have a GPU accessible on your machine.
+    _Solution_: Make sure you have installed mujoco and all its dependencies (see instructions above).
+    Make sure you have set the `MUJOCO_GL=egl`.
+    Make sure you have a GPU accessible on your machine.
 
-6. `cannot find -lGL: No such file or directory`
+12. `cannot find -lGL: No such file or directory`
 
-   _Solution_: call `conda install -c anaconda mesa-libgl-devel-cos6-x86_64`
+    _Solution_: call `conda install -c anaconda mesa-libgl-devel-cos6-x86_64`
 
-7. ```
-   RuntimeError: Failed to initialize OpenGL
-   ```
+13. ```
+    RuntimeError: Failed to initialize OpenGL
+    ```
 
-   _Solution_: Install libEGL:
+    _Solution_: Install libEGL:
 
-   - Ubuntu: `sudo apt install libegl-dev libegl`
-   - CentOS: `sudo yum install mesa-libEGL mesa-libEGL-devel`
-   - Conda: `conda install -c anaconda mesa-libegl-cos6-x86_64`
+    - Ubuntu: `sudo apt install libegl-dev libegl`
+    - CentOS: `sudo yum install mesa-libEGL mesa-libEGL-devel`
+    - Conda: `conda install -c anaconda mesa-libegl-cos6-x86_64`
 
-8. ```
-   fatal error: X11/Xlib.h: No such file or directory
-      | #include <X11/Xlib.h>
-      |          ^~~~~~~~~~~~
-   ```
+14. ```
+    fatal error: X11/Xlib.h: No such file or directory
+       | #include <X11/Xlib.h>
+       |          ^~~~~~~~~~~~
+    ```
 
-   _Solution_: Install X11:
+    _Solution_: Install X11:
 
-   - Ubuntu: `sudo apt install libx11-dev`
-   - CentOS: `sudo yum install libX11`
-   - Conda: `conda install -c conda-forge xorg-libx11`
+    - Ubuntu: `sudo apt install libx11-dev`
+    - CentOS: `sudo yum install libX11`
+    - Conda: `conda install -c conda-forge xorg-libx11`
 
-9. ```
-   fatal error: GL/osmesa.h: No such file or directory
-       1 | #include <GL/osmesa.h>
-         |          ^~~~~~~~~~~~~
-   compilation terminated.
-   ```
+15. ```
+    fatal error: GL/osmesa.h: No such file or directory
+        1 | #include <GL/osmesa.h>
+          |          ^~~~~~~~~~~~~
+    compilation terminated.
+    ```
 
-   _Solution_: Install Osmesa:
+    _Solution_: Install Osmesa:
 
-10. Ubuntu: `sudo apt-get install libosmesa6-dev`
-11. CentOS: `sudo yum install mesa-libOSMesa-devel`
-12. Conda: `conda install -c menpo osmesa`
+    - Ubuntu: `sudo apt-get install libosmesa6-dev`
+    - CentOS: `sudo yum install mesa-libOSMesa-devel`
+    - Conda: `conda install -c menpo osmesa`
diff --git a/torchrl/modules/__init__.py b/torchrl/modules/__init__.py
index 8d466f3baa6..3909bc8b349 100644
--- a/torchrl/modules/__init__.py
+++ b/torchrl/modules/__init__.py
@@ -7,6 +7,7 @@
     Delta,
     distributions_maps,
     IndependentNormal,
+    MaskedCategorical,
     NormalParamWrapper,
     OneHotCategorical,
     TanhDelta,
diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
index 521640c5c39..f11fc20b06a 100644
--- a/tutorials/sphinx-tutorials/coding_ddpg.py
+++ b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -18,7 +18,7 @@
 #
 # To this aim, we will be focusing on DDPG, which is a relatively straightforward
 # algorithm to code.
-# DDPG (`Deep Deterministic Policy Gradient <https://arxiv.org/abs/1509.02971>_`_)
+# DDPG (`Deep Deterministic Policy Gradient <https://arxiv.org/abs/1509.02971>`_)
 # is a simple continuous control algorithm. It consists in learning a
 # parametric value function for an action-observation pair, and
 # then learning a policy that outputs actions that maximise this value
@@ -71,7 +71,7 @@
 
 ###############################################################################
 # torchrl :class:`~torchrl.objectives.LossModule`
-# ----------------------------------------------
+# -----------------------------------------------
 #
 # TorchRL provides a series of losses to use in your training scripts.
 # The aim is to have losses that are easily reusable/swappable and that have
@@ -614,8 +614,6 @@ def make_t_env():
 #   training script when dealing with frame skipping as this may lead to
 #   biased comparisons between training strategies.
 #
-
-###############################################################################
 # Scaling the reward helps us control the signal magnitude for a more
 # efficient learning.
 reward_scaling = 5.0
@@ -963,7 +961,7 @@ def make_replay_buffer(buffer_size, batch_size, random_crop_len, prefetch=3, prb
 
 ###############################################################################
 # Replay buffer storage and batch size
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # TorchRL replay buffer counts the number of elements along the first dimension.
 # Since we'll be feeding trajectories to our buffer, we need to adapt the buffer
@@ -1051,7 +1049,7 @@ def ceil_div(x, y):
 #   estimates.
 #
 # Target network updater
-# ^^^^^^^^^^^^^^^^^^^^^^
+# ~~~~~~~~~~~~~~~~~~~~~~
 #
 # Target networks are a crucial part of off-policy RL algorithms.
 # Updating the target network parameters is made easy thanks to the
diff --git a/tutorials/sphinx-tutorials/coding_ppo.py b/tutorials/sphinx-tutorials/coding_ppo.py
index e766cbc4109..56b6f831fb2 100644
--- a/tutorials/sphinx-tutorials/coding_ppo.py
+++ b/tutorials/sphinx-tutorials/coding_ppo.py
@@ -172,7 +172,7 @@
 frame_skip = 1
 frames_per_batch = 1000 // frame_skip
 # For a complete training, bring the number of frames up to 1M
-total_frames = 50_000 // frame_skip
+total_frames = 10_000 // frame_skip
 
 ######################################################################
 # PPO parameters
diff --git a/tutorials/sphinx-tutorials/dqn_with_rnn.py b/tutorials/sphinx-tutorials/dqn_with_rnn.py
index 5444f948137..6fbc8218ffb 100644
--- a/tutorials/sphinx-tutorials/dqn_with_rnn.py
+++ b/tutorials/sphinx-tutorials/dqn_with_rnn.py
@@ -394,11 +394,12 @@
 ######################################################################
 # Let's plot our results:
 #
-from matplotlib import pyplot as plt
+if traj_lens:
+    from matplotlib import pyplot as plt
 
-plt.plot(traj_lens)
-plt.xlabel("Test collection")
-plt.title("Test trajectory lengths")
+    plt.plot(traj_lens)
+    plt.xlabel("Test collection")
+    plt.title("Test trajectory lengths")
 
 ######################################################################
 # Conclusion
diff --git a/tutorials/sphinx-tutorials/pendulum.py b/tutorials/sphinx-tutorials/pendulum.py
index 5e058cbe9f2..8791762e956 100644
--- a/tutorials/sphinx-tutorials/pendulum.py
+++ b/tutorials/sphinx-tutorials/pendulum.py
@@ -139,7 +139,7 @@
 # 0 too.
 #
 # Coding the effect of an action: :func:`~torchrl.envs.EnvBase._step`
-# ------------------------------------------------------------------
+# -------------------------------------------------------------------
 #
 # The step method is the first thing to consider, as it will encode
 # the simulation that is of interest to us. In TorchRL, the
@@ -263,7 +263,7 @@ def angle_normalize(x):
 
 ######################################################################
 # Resetting the simulator: :func:`~torchrl.envs.EnvBase._reset`
-# ------------------------------------------------------------
+# -------------------------------------------------------------
 #
 # The second method we need to care about is the
 # :meth:`~torchrl.envs.EnvBase._reset` method. Like
@@ -464,7 +464,7 @@ def _set_seed(self, seed: Optional[int]):
 
 ######################################################################
 # Wrapping things together: the :class:`~torchrl.envs.EnvBase` class
-# -----------------------------------------------------------------
+# ------------------------------------------------------------------
 #
 # We can finally put together the pieces and design our environment class.
 # The specs initialization needs to be performed during the environment
diff --git a/tutorials/sphinx-tutorials/torchrl_envs.py b/tutorials/sphinx-tutorials/torchrl_envs.py
index 8e59bbb7243..ffeafe2cbb8 100644
--- a/tutorials/sphinx-tutorials/torchrl_envs.py
+++ b/tutorials/sphinx-tutorials/torchrl_envs.py
@@ -43,6 +43,7 @@
 
 ###############################################################################
 # The list of available environment can be accessed through this command:
+#
 
 GymEnv.available_envs[:10]