TopPSampler docstrings (#7205)

* TopPSampler docstrings * inline quote
deepset-ai · Feb 26, 2024 · e0b6924 · e0b6924
1 parent 8838c02
commit e0b6924
Showing 1 changed file with 8 additions and 11 deletions.
diff --git a/haystack/components/samplers/top_p.py b/haystack/components/samplers/top_p.py
@@ -16,8 +16,8 @@ class TopPSampler:
  """
  Implements top-p (nucleus) sampling for document filtering based on cumulative probability scores.
 
- This class provides functionality to filter a list of documents by selecting those whose scores fall
- within the top 'p' percent of the cumulative distribution. The method is useful for focusing on high-probability
+ This component provides functionality to filter a list of documents by selecting those whose scores fall
+ within the top 'p' percent of the cumulative distribution. It is useful for focusing on high-probability
  documents while filtering out less relevant ones based on their assigned scores.
 
  Usage example:
@@ -44,9 +44,9 @@ def __init__(self, top_p: float = 1.0, score_field: Optional[str] = None):
  Creates an instance of TopPSampler.
 
  :param top_p: Float between 0 and 1 representing the cumulative probability threshold for document selection.
- Defaults to 1.0, indicating no filtering (all documents are retained).
+  A value of 1.0 indicates no filtering (all documents are retained).
  :param score_field: Name of the field in each document's metadata that contains the score. If None, the default
- document score field is used.
+  document score field is used.
  """
  torch_import.check()
 
@@ -57,17 +57,14 @@ def __init__(self, top_p: float = 1.0, score_field: Optional[str] = None):
  def run(self, documents: List[Document], top_p: Optional[float] = None):
  """
  Filters documents using top-p sampling based on their scores.
+ If the specified top_p results in no documents being selected (especially in cases of a low top_p value), the
+ method returns the document with the highest similarity score.
 
  :param documents: List of Document objects to be filtered.
  :param top_p: Optional. A float to override the cumulative probability threshold set during initialization.
- If None, the class's top_p value is used.
- :return: A dictionary with a key 'documents' containing the list of filtered Document objects.
-
- This method applies top-p sampling to filter out documents. It selects those documents whose similarity scores
- are within the top 'p' percent of the cumulative distribution, based on the specified or default top_p value.
 
- If the specified top_p results in no documents being selected (especially in cases of a low top_p value), the
- method defaults to returning the document with the highest similarity score.
+ :returns: A dictionary with the following key:
+  - `documents`: List of Document objects that have been selected based on the top-p sampling.
 
  :raises ValueError: If the top_p value is not within the range [0, 1].
  """