init

pytorch · vmoens · Oct 18, 2022 · Oct 18, 2022 · Oct 18, 2022 · Oct 18, 2022
commit 2a76ac34162da25c0793e64bc065cc5a28291cb6
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -53,13 +53,15 @@
 extensions = [
  "sphinx.ext.autodoc",
  "sphinx.ext.autosummary",
+ "sphinx.ext.coverage",
  "sphinx.ext.doctest",
  "sphinx.ext.intersphinx",
  "sphinx.ext.todo",
  "sphinx.ext.mathjax",
  "sphinx.ext.napoleon",
  "sphinx.ext.viewcode",
  "sphinx.ext.duration",
+ "sphinx.ext.autosectionlabel",
  "sphinx_gallery.gen_gallery",
  "sphinx_autodoc_typehints",
  "sphinxcontrib.aafig",
@@ -75,6 +77,7 @@
 napoleon_use_ivar = True
 napoleon_numpy_docstring = False
 napoleon_google_docstring = True
+autosectionlabel_prefix_document = True
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@@ -237,3 +240,5 @@ def handle_item(fieldarg, content):
 TypedField.make_field = patched_make_field
 
 aafig_default_options = dict(scale=1.5, aspect=1.0, proportional=True)
+
+coverage_show_missing_items = True
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -3,11 +3,30 @@
  You can adapt this file completely to your liking, but it should at least
  contain the root `toctree` directive.
 
-Welcome to torchrl's documentation!
-===================================
+TorchRL
+=======
+
+TorchRL is an open-source Reinforcement Learning (RL) library for PyTorch.
+
+It provides pytorch and python-first, low and high level abstractions for RL that are intended to be efficient, modular, documented and properly tested.
+The code is aimed at supporting research in RL. Most of it is written in python in a highly modular way, such that researchers can easily swap components, transform them or write new ones with little effort.
+
+This repo attempts to align with the existing pytorch ecosystem libraries in that it has a "dataset pillar"
+:doc:`(environments) <reference/envs>`,
+:ref:`transforms <reference/envs:Transforms>`,
+:doc:`models <reference/modules>`,
+data utilities (e.g. collectors and containers), etc.
+TorchRL aims at having as few dependencies as possible (python standard library, numpy and pytorch).
+Common environment libraries (e.g. OpenAI gym) are only optional.
+
+On the low-level end, torchrl comes with a set of highly re-usable functionals
+for :doc:`cost functions <reference/objectives>`, :ref:`returns <reference/objectives:Returns>` and data processing.
+
+TorchRL aims at a high modularity and good runtime performance.
+
 
 .. toctree::
- :maxdepth: 2
+ :maxdepth: 3
  :caption: Contents:
 
  reference/index

diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst
@@ -59,33 +59,14 @@ as shape, device, dtype and domain.
  MultOneHotDiscreteTensorSpec
  CompositeSpec
 
-Transforms
-----------
-
-In most cases, the raw output of an environment must be treated before being passed to another object (such as a
-policy or a value operator). To do this, TorchRL provides a set of transforms that aim at reproducing the transform
-logic of `torch.distributions.Transform` and `torchvision.transforms`.
 
+Utils
+-----
 
 .. autosummary::
  :toctree: generated/
- :template: rl_template_noinherit.rst
-
- Transform
- TransformedEnv
- Compose
- CatTensors
- CatFrames
- RewardClipping
- Resize
- GrayScale
- ToTensorImage
- ObservationNorm
- RewardScaling
- ObservationTransform
- FiniteTensorDictCheck
- DoubleToFloat
- NoopResetEnv
- BinerizeReward
- PinMemoryTransform
- VecNorm
+ :template: rl_template.rst
+
+ utils.expand_as_right
+ utils.expand_right
+ MultiStep
diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst
@@ -3,11 +3,12 @@
 torchrl.envs package
 ====================
 
-
 .. autosummary::
  :toctree: generated/
  :template: rl_template.rst
 
+ EnvBase
+ GymLikeEnv
  SerialEnv
  ParallelEnv
 
@@ -23,4 +24,55 @@ Helpers
  get_available_libraries
  set_exploration_mode
  exploration_mode
- make_tensordict
+
+Libraries
+---------
+.. currentmodule:: torchrl.envs.libs
+
+.. autosummary::
+ :toctree: generated/
+ :template: rl_template_fun.rst
+
+ gym.GymEnv
+ gym.GymWrapper
+ dm_control.DMControlEnv
+ dm_control.DMControlWrapper
+
+Transforms
+----------
+.. currentmodule:: torchrl.envs.transforms
+
+In most cases, the raw output of an environment must be treated before being passed to another object (such as a
+policy or a value operator). To do this, TorchRL provides a set of transforms that aim at reproducing the transform
+logic of `torch.distributions.Transform` and `torchvision.transforms`.
+
+
+.. autosummary::
+ :toctree: generated/
+ :template: rl_template_noinherit.rst
+
+ Transform
+ TransformedEnv
+ RewardClipping
+ Resize
+ CenterCrop
+ GrayScale
+ Compose
+ ToTensorImage
+ ObservationNorm
+ FlattenObservation
+ UnsqueezeTransform
+ RewardScaling
+ ObservationTransform
+ CatFrames
+ FiniteTensorDictCheck
+ DoubleToFloat
+ CatTensors
+ NoopResetEnv
+ BinarizeReward
+ PinMemoryTransform
+ VecNorm
+ gSDENoise
+ TensorDictPrimer
+ R3MTransform
+ VIPTransform
diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst
@@ -4,7 +4,7 @@ API Reference
 .. toctree::
  :maxdepth: 1
 
- agents
+ trainers
  collectors
  data
  envs

diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst
@@ -11,10 +11,10 @@ TensorDict modules
  :toctree: generated/
  :template: rl_template_noinherit.rst
 
- TDModule
- ProbabilisticTDModule
- TDSequence
- TDModuleWrapper
+ TensorDictModule
+ ProbabilisticTensorDictModule
+ TensorDictSequential
+ TensorDictModuleWrapper
  Actor
  ProbabilisticActor
  ValueOperator
@@ -26,7 +26,7 @@ TensorDict modules
 
 Hooks
 -----
-.. currentmodule:: torchrl.modules.td_module.actors
+.. currentmodule:: torchrl.modules.tensordict_module.actors
 
 .. autosummary::
  :toctree: generated/

diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst
@@ -23,7 +23,7 @@ DDPG
  DDPGLoss
 
 SAC
-----
+---
 
 .. autosummary::
  :toctree: generated/
@@ -41,7 +41,7 @@ REDQ
  REDQLoss
 
 PPO
-----
+---
 
 .. autosummary::
  :toctree: generated/
@@ -51,6 +51,25 @@ PPO
  ClipPPOLoss
  KLPENPPOLoss
 
+Returns
+-------
+
+.. autosummary::
+ :toctree: generated/
+ :template: rl_template_noinherit.rst
+
+ GAE
+ TDLambdaEstimate
+ TDEstimate
+ functional.generalized_advantage_estimate
+ functional.vec_generalized_advantage_estimate
+ functional.vec_td_lambda_return_estimate
+ functional.vec_td_lambda_advantage_estimate
+ functional.td_lambda_return_estimate
+ functional.td_lambda_advantage_estimate
+ functional.td_advantage_estimate
+
+
 Utils
 -----
 

diff --git a/docs/source/reference/agents.rst → docs/source/reference/trainers.rst b/docs/source/reference/agents.rst → docs/source/reference/trainers.rst
@@ -1,33 +1,43 @@
-.. currentmodule:: torchrl.agents
+.. currentmodule:: torchrl.trainers
 
-torchrl.agents package
+torchrl.trainers package
 ======================
 
-Agents
--------
+Trainer and hooks
+-----------------
 
 .. autosummary::
  :toctree: generated/
  :template: rl_template.rst
 
- Agent
- EnvCreator
+ Trainer
+ BatchSubSampler
+ CountFramesLog
+ LogReward
+ Recorder
+ ReplayBuffer
+ RewardNormalizer
+ SelectKeys
+ UpdateWeights
+ ClearCudaCache
 
 
 Builders
 --------
 
-.. currentmodule:: torchrl.agents.helpers
+.. currentmodule:: torchrl.trainers.helpers
 
 .. autosummary::
  :toctree: generated/
  :template: rl_template_fun.rst
 
- make_agent
+ make_trainer
  sync_sync_collector
  sync_async_collector
  make_collector_offpolicy
  make_collector_onpolicy
+ transformed_env_constructor
+ parallel_env_constructor
  make_sac_loss
  make_dqn_loss
  make_ddpg_loss
@@ -40,35 +50,13 @@ Builders
  make_sac_model
  make_redq_model
  make_replay_buffer
- transformed_env_constructor
- parallel_env_constructor
 
 Utils
 -----
 
-
 .. autosummary::
  :toctree: generated/
  :template: rl_template_fun.rst
 
  correct_for_frame_skip
  get_stats_random_rollout
-
-Argument parser
----------------
-
-
-.. autosummary::
- :toctree: generated/
- :template: rl_template_fun.rst
-
- parser_agent_args
- parser_collector_args_offpolicy
- parser_collector_args_onpolicy
- parser_env_args
- parser_loss_args
- parser_loss_args_ppo
- parser_model_args_continuous
- parser_model_args_discrete
- parser_recorder_args
- parser_replay_args
diff --git a/torchrl/objectives/costs/utils.py b/torchrl/objectives/costs/utils.py
@@ -15,7 +15,13 @@
 from torchrl.envs.utils import step_mdp
 from torchrl.modules import TensorDictModule
 
-__all__ = ["SoftUpdate", "HardUpdate", "distance_loss", "hold_out_params"]
+__all__ = [
+ "SoftUpdate",
+ "HardUpdate",
+ "distance_loss",
+ "hold_out_params",
+ "hold_out_net",
+]
 
 
 class _context_manager: