init

pytorch · vmoens · Mar 15, 2023 · Mar 15, 2023 · Mar 15, 2023 · Mar 15, 2023
commit 73f411df02f4d44d08689ab582ccd005334550e5
diff --git a/torchrl/data/replay_buffers/__init__.py b/torchrl/data/replay_buffers/__init__.py
@@ -10,4 +10,11 @@
  TensorDictPrioritizedReplayBuffer,
  TensorDictReplayBuffer,
 )
+from .samplers import (
+ PrioritizedSampler,
+ RandomSampler,
+ Sampler,
+ SamplerWithoutReplacement,
+)
 from .storages import LazyMemmapStorage, LazyTensorStorage, ListStorage, Storage
+from .writers import RoundRobinWriter, Writer
diff --git a/torchrl/data/replay_buffers/replay_buffers.py b/torchrl/data/replay_buffers/replay_buffers.py
@@ -238,20 +238,26 @@ def _sample(self, batch_size: int) -> Tuple[Any, dict]:
 
  return data, info
 
- def sample(self, batch_size: int, return_info: bool = False) -> Any:
+ def sample(
+ self, batch_size: Optional[int] = None, return_info: bool = False
+ ) -> Any:
  """Samples a batch of data from the replay buffer.
 
  Uses Sampler to sample indices, and retrieves them from Storage.
 
  Args:
- batch_size (int): size of data to be collected.
+ batch_size (int, optional): size of data to be collected. If none
+ is provided, this method will sample a batch-size as indicated
+ by the sampler.
  return_info (bool): whether to return info. If True, the result
  is a tuple (data, info). If False, the result is the data.
 
  Returns:
  A batch of data selected in the replay buffer.
  A tuple containing this batch and info if return_info flag is set to True.
  """
+ if batch_size is None:
+ batch_size = self._sampler.batch_size
  if not self._prefetch:
  ret = self._sample(batch_size)
  else:
@@ -296,6 +302,15 @@ def insert_transform(self, index: int, transform: "Transform") -> None: # noqa-
  transform.eval()
  self._transform.insert(index, transform)
 
+ def __iter__(self):
+ batch_size = self._sampler.batch_size
+ while True:
+ data = self.sample()
+ if self._sampler.ran_out() and len(data) < batch_size:
+ # do not yield result if ran out
+ break
+ yield data
+
 
 class PrioritizedReplayBuffer(ReplayBuffer):
  """Prioritized replay buffer.
@@ -446,14 +461,19 @@ def update_tensordict_priority(self, data: TensorDictBase) -> None:
  self.update_priority(index, priority)
 
  def sample(
- self, batch_size: int, include_info: bool = False, return_info: bool = False
+ self,
+ batch_size: Optional[int] = None,
+ include_info: bool = False,
+ return_info: bool = False,
  ) -> TensorDictBase:
  """Samples a batch of data from the replay buffer.
 
  Uses Sampler to sample indices, and retrieves them from Storage.
 
  Args:
- batch_size (int): size of data to be collected.
+ batch_size (int, optional): size of data to be collected. If none
+ is provided, this method will sample a batch-size as indicated
+ by the sampler.
  include_info (bool): whether to add info to the returned tensordict.
  return_info (bool): whether to return info. If True, the result
  is a tuple (data, info). If False, the result is the data.
@@ -462,6 +482,8 @@ def sample(
  A tensordict containing a batch of data selected in the replay buffer.
  A tuple containing this tensordict and info if return_info flag is set to True.
  """
+ if batch_size is None:
+ batch_size = self._sampler.batch_size
  data, info = super().sample(batch_size, return_info=True)
  if include_info:
  for k, v in info.items():

diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
@@ -5,7 +5,7 @@
 
 from abc import ABC, abstractmethod
 from copy import deepcopy
-from typing import Any, Dict, Tuple, Union
+from typing import Any, Dict, Optional, Tuple, Union
 
 import numpy as np
 import torch
@@ -52,9 +52,22 @@ def state_dict(self) -> Dict[str, Any]:
  def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
  return
 
+ def ran_out(self) -> bool:
+ # by default, samplers never run out
+ return False
+
 
 class RandomSampler(Sampler):
- """A uniformly random sampler for composable replay buffers."""
+ """A uniformly random sampler for composable replay buffers.
+
+ Args:
+ batch_size (int, optional): if provided, the batch size to be used by
+ the replay buffer when calling :meth:`~.ReplayBuffer.sample`.
+
+ """
+
+ def __init__(self, batch_size: Optional[int] = None):
+ self.batch_size = batch_size
 
  def sample(self, storage: Storage, batch_size: int) -> Tuple[torch.Tensor, dict]:
  index = torch.randint(0, len(storage), (batch_size,))
@@ -65,6 +78,8 @@ class SamplerWithoutReplacement(Sampler):
  """A data-consuming sampler that ensures that the same sample is not present in consecutive batches.
 
  Args:
+ batch_size (int, optional): if provided, the batch size to be used by
+ the replay buffer when calling :meth:`~.ReplayBuffer.sample`.
  drop_last (bool, optional): if True, the last incomplete sample (if any) will be dropped.
  If False, this last sample will be kept and (unlike with torch dataloaders)
  completed with other samples from a fresh indices permutation.
@@ -81,16 +96,30 @@ class SamplerWithoutReplacement(Sampler):
 
  """
 
- def __init__(self, drop_last: bool = False):
+ def __init__(self, batch_size: Optional[int] = None, drop_last: bool = False):
  self._sample_list = None
  self.len_storage = 0
  self.drop_last = drop_last
+ self.batch_size = batch_size
+ self._ran_out = False
 
  def _single_sample(self, len_storage, batch_size):
  index = self._sample_list[:batch_size]
  self._sample_list = self._sample_list[batch_size:]
- if not self._sample_list.numel():
+
+ # check if we have enough elements for one more batch, assuming same batch size
+ # or self.batch_size
+ _batch_size = self.batch_size
+ if not _batch_size:
+ _batch_size = batch_size
+ if len(self._sample_list) < _batch_size and self.drop_last:
+ self._ran_out = True
+ self._sample_list = torch.randperm(len_storage)
+ elif not self._sample_list.numel():
+ self._ran_out = True
  self._sample_list = torch.randperm(len_storage)
+ else:
+ self._ran_out = False
  return index
 
  def sample(self, storage: Storage, batch_size: int) -> Tuple[Any, dict]:
@@ -107,17 +136,10 @@ def sample(self, storage: Storage, batch_size: int) -> Tuple[Any, dict]:
  )
  self.len_storage = len_storage
  index = self._single_sample(len_storage, batch_size)
- while index.numel() < batch_size:
- if self.drop_last:
- index = self._single_sample(len_storage, batch_size)
- else:
- index = torch.cat(
- [
- index,
- self._single_sample(len_storage, batch_size - index.numel()),
- ],
- 0,
- )
+ # we 'always' return the indices. The 'drop_last' just instructs the
+ # sampler to turn to 'ran_out() = True` whenever the next sample
+ # will be too short. This will be read by the replay buffer
+ # as a signal for an early break of the __iter__().
  return index, {}
 
 
@@ -137,6 +159,8 @@ class PrioritizedSampler(Sampler):
  reduction (str, optional): the reduction method for multidimensional
  tensordicts (ie stored trajectories). Can be one of "max", "min",
  "median" or "mean".
+ batch_size (int, optional): if provided, the batch size to be used by
+ the replay buffer when calling :meth:`~.ReplayBuffer.sample`.
 
  """
 
@@ -148,6 +172,7 @@ def __init__(
  eps: float = 1e-8,
  dtype: torch.dtype = torch.float,
  reduction: str = "max",
+ batch_size: Optional[int] = None,
  ) -> None:
  if alpha <= 0:
  raise ValueError(
@@ -161,6 +186,7 @@ def __init__(
  self._beta = beta
  self._eps = eps
  self.reduction = reduction
+ self.batch_size = batch_size
  if dtype in (torch.float, torch.FloatType, torch.float32):
  self._sum_tree = SumSegmentTreeFp32(self._max_capacity)
  self._min_tree = MinSegmentTreeFp32(self._max_capacity)