amend

pytorch · vmoens · Oct 24, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 6, 2023
commit de1ecf24b73e8c88310550d61111d387881f22e1
diff --git a/examples/dqn/config.yaml b/examples/dqn/config.yaml
@@ -26,7 +26,7 @@ max_frames_per_traj: -1
 weight_decay: 0.0
 annealing_frames: 1000000
 init_env_steps: 10000
-record_frames: 50000
+record_frames: 5000
 loss_function: smooth_l1
 batch_transform: 1
 buffer_prefetch: 64

diff --git a/test/test_cost.py b/test/test_cost.py
@@ -7511,7 +7511,8 @@ def test_dt_tensordict_keys(self):
  loss_fn = DTLoss(actor)
 
  default_keys = {
- "action": "action",
+ "action_target": "action",
+ "action_pred": "action",
  }
 
  self.tensordict_keys_test(

diff --git a/test/test_tensordictmodules.py b/test/test_tensordictmodules.py
@@ -2191,10 +2191,10 @@ def test_dt_inference_wrapper(self, online):
  )
  with pytest.raises(
  ValueError,
- match="The action key action was not found in the policy out_keys",
+ match="The value of out_action_key",
  ):
  result = inference_actor(td)
- inference_actor.set_tensor_keys(action=action_key)
+ inference_actor.set_tensor_keys(action=action_key, out_action=action_key)
  result = inference_actor(td)
  # checks that the seq length has disappeared
  assert result.get(action_key).shape == torch.Size([1, 2])

diff --git a/torchrl/envs/libs/pettingzoo.py b/torchrl/envs/libs/pettingzoo.py
@@ -2,10 +2,11 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from __future__ import annotations
 
 import copy
 import importlib
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Tuple, Union
 
 import torch
 from tensordict.tensordict import TensorDictBase
@@ -154,11 +155,11 @@ def __init__(
  "pettingzoo.utils.env.ParallelEnv", # noqa: F821
  "pettingzoo.utils.env.AECEnv", # noqa: F821
  ] = None,
- return_state: Optional[bool] = False,
- group_map: Optional[Union[MarlGroupMapType, Dict[str, List[str]]]] = None,
+ return_state: bool = False,
+ group_map: MarlGroupMapType | Dict[str, List[str]] | None = None,
  use_mask: bool = False,
  categorical_actions: bool = True,
- seed: Optional[int] = None,
+ seed: int | None = None,
  **kwargs,
  ):
  if env is not None:
@@ -401,7 +402,7 @@ def _check_kwargs(self, kwargs: Dict):
  ):
  raise TypeError("env is not of type expected.")
 
- def _init_env(self) -> Optional[int]:
+ def _init_env(self):
  # Add info
  if self.parallel:
  _, info_dict = self._reset_parallel(seed=self.seed)
@@ -477,15 +478,16 @@ def _set_seed(self, seed: int):
  self.reset(seed=self.seed)
 
  def _reset(
- self, tensordict: Optional[TensorDictBase] = None, **kwargs
+ self, tensordict: TensorDictBase | None = None, **kwargs
  ) -> TensorDictBase:
-
- _reset = tensordict.get("_reset", None)
- if _reset is not None and not _reset.all():
- raise RuntimeError(
- f"An attempt to call {type(self)}._reset was made when no reset signal could be found. "
- f"Expected '_reset' entry to be `tensor(True)` or `None` but got `{_reset}`."
- )
+ if tensordict is not None:
+ _reset = tensordict.get("_reset", None)
+ if _reset is not None and not _reset.all():
+ raise RuntimeError(
+ f"An attempt to call {type(self)}._reset was made when no "
+ f"reset signal could be found. Expected '_reset' entry to "
+ f"be `tensor(True)` or `None` but got `{_reset}`."
+ )
  if self.parallel:
  # This resets when any is done
  observation_dict, info_dict = self._reset_parallel(**kwargs)
@@ -878,11 +880,11 @@ def __init__(
  self,
  task: str,
  parallel: bool,
- return_state: Optional[bool] = False,
- group_map: Optional[Union[MarlGroupMapType, Dict[str, List[str]]]] = None,
+ return_state: bool = False,
+ group_map: MarlGroupMapType | Dict[str, List[str]] | None = None,
  use_mask: bool = False,
  categorical_actions: bool = True,
- seed: Optional[int] = None,
+ seed: int | None = None,
  **kwargs,
  ):
  if not _has_pettingzoo:

diff --git a/torchrl/record/recorder.py b/torchrl/record/recorder.py
@@ -136,6 +136,12 @@ def dump(self, suffix: Optional[str] = None) -> None:
  self.count = 0
  self.obs = []
 
+ def _reset(
+ self, tensordict: TensorDictBase, tensordict_reset: TensorDictBase
+ ) -> TensorDictBase:
+ self._call(tensordict_reset)
+ return tensordict_reset
+
 
 class TensorDictRecorder(Transform):
  """TensorDict recorder.
@@ -171,14 +177,14 @@ def __init__(
  self.skip = skip
  self.count = 0
 
- def _call(self, td: TensorDictBase) -> TensorDictBase:
+ def _call(self, tensordict: TensorDictBase) -> TensorDictBase:
  self.count += 1
  if self.count % self.skip == 0:
- _td = td
+ _td = tensordict
  if self.in_keys:
- _td = td.select(*self.in_keys).to_tensordict()
+ _td = tensordict.select(*self.in_keys).to_tensordict()
  self.td.append(_td)
- return td
+ return tensordict
 
  def dump(self, suffix: Optional[str] = None) -> None:
  if suffix is None:
@@ -197,3 +203,9 @@ def dump(self, suffix: Optional[str] = None) -> None:
  self.count = 0
  del self.td
  self.td = []
+
+ def _reset(
+ self, tensordict: TensorDictBase, tensordict_reset: TensorDictBase
+ ) -> TensorDictBase:
+ self._call(tensordict_reset)
+ return tensordict_reset
diff --git a/torchrl/trainers/helpers/trainers.py b/torchrl/trainers/helpers/trainers.py
@@ -258,6 +258,7 @@ def make_trainer(
  )
 
  if recorder is not None:
+ # create recorder object
  recorder_obj = Recorder(
  record_frames=cfg.record_frames,
  frame_skip=cfg.frame_skip,
@@ -266,11 +267,14 @@ def make_trainer(
  record_interval=cfg.record_interval,
  log_keys=cfg.recorder_log_keys,
  )
+ # register recorder
  trainer.register_op(
  "post_steps_log",
  recorder_obj,
  )
+ # call recorder - could be removed
  recorder_obj(None)
+ # create explorative recorder - could be optional
  recorder_obj_explore = Recorder(
  record_frames=cfg.record_frames,
  frame_skip=cfg.frame_skip,
@@ -281,10 +285,12 @@ def make_trainer(
  suffix="exploration",
  out_keys={("next", "reward"): "r_evaluation_exploration"},
  )
+ # register recorder
  trainer.register_op(
  "post_steps_log",
  recorder_obj_explore,
  )
+ # call recorder - could be removed
  recorder_obj_explore(None)
 
  trainer.register_op(