docs: Update deepeval-haystack docstrings (deepset-ai#527)

* docs: Update `deepeval-haystack` docstrings * Move note about required init params
jdixosnd · Mar 4, 2024 · a67e0b7 · a67e0b7
1 parent b721907
commit a67e0b7
Show file tree

Hide file tree

Showing 2 changed files with 56 additions and 39 deletions.
diff --git a/integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/evaluator.py b/integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/evaluator.py
@@ -17,10 +17,31 @@
 @component
 class DeepEvalEvaluator:
  """
- A component that uses the DeepEval framework to evaluate inputs against a specific metric.
-
- The supported metrics are defined by :class:`DeepEvalMetric`. The inputs of the component
- metric-dependent.
+ A component that uses the [DeepEval framework](https://docs.confident-ai.com/docs/evaluation-introduction)
+ to evaluate inputs against a specific metric. Supported metrics are defined by `DeepEvalMetric`.
+
+ Usage example:
+ ```python
+ from haystack_integrations.components.evaluators.deepeval import DeepEvalEvaluator, DeepEvalMetric
+
+ evaluator = DeepEvalEvaluator(
+ metric=DeepEvalMetric.FAITHFULNESS,
+ metric_params={"model": "gpt-4"},
+ )
+ output = evaluator.run(
+ questions=["Which is the most popular global sport?"],
+ contexts=[
+ [
+ "Football is undoubtedly the world's most popular sport with"
+ "major events like the FIFA World Cup and sports personalities"
+ "like Ronaldo and Messi, drawing a followership of more than 4"
+ "billion people."
+ ]
+ ],
+ responses=["Football is the most popular sport with around 4 billion" "followers worldwide"],
+ )
+ print(output["results"])
+ ```
  """
 
  _backend_metric: BaseMetric
@@ -39,6 +60,8 @@ def __init__(
  The metric to use for evaluation.
  :param metric_params:
  Parameters to pass to the metric's constructor.
+ Refer to the `RagasMetric` class for more details
+ on required parameters.
  """
  self.metric = metric if isinstance(metric, DeepEvalMetric) else DeepEvalMetric.from_str(metric)
  self.metric_params = metric_params
@@ -51,37 +74,20 @@ def __init__(
  @component.output_types(results=List[List[Dict[str, Any]]])
  def run(self, **inputs) -> Dict[str, Any]:
  """
- Run the DeepEval evaluator.
-
- Example:
- ```python
- pipeline = Pipeline()
- evaluator = DeepEvalEvaluator(
- metric=DeepEvalMetric.ANSWER_RELEVANCY,
- metric_params={"model": "gpt-4"},
- )
- pipeline.add_component("evaluator", evaluator)
-
- # Each metric expects a specific set of parameters as input. Refer to the
- # DeepEvalMetric class' documentation for more details.
- output = pipeline.run({"evaluator": {
- "questions": ["question],
- "contexts": [["context"]],
- "responses": ["response"]
- }})
- ```
+ Run the DeepEval evaluator on the provided inputs.
 
  :param inputs:
  The inputs to evaluate. These are determined by the
- metric being calculated. See :class:`DeepEvalMetric` for more
+ metric being calculated. See `DeepEvalMetric` for more
  information.
  :returns:
- A nested list of metric results. Each input can have one or more
+ A dictionary with a single `results` entry that contains
+ a nested list of metric results. Each input can have one or more
  results, depending on the metric. Each result is a dictionary
  containing the following keys and values:
-  * `name` - The name of the metric.
-  * `score` - The score of the metric.
-  * `explanation` - An optional explanation of the score.
+ - `name` - The name of the metric.
+ - `score` - The score of the metric.
+ - `explanation` - An optional explanation of the score.
  """
  InputConverters.validate_input_parameters(self.metric, self.descriptor.input_parameters, inputs)
  converted_inputs: List[LLMTestCase] = list(self.descriptor.input_converter(**inputs)) # type: ignore
@@ -93,7 +99,12 @@ def run(self, **inputs) -> Dict[str, Any]:
 
  def to_dict(self) -> Dict[str, Any]:
  """
- Serialize this component to a dictionary.
+ Serializes the component to a dictionary.
+
+ :returns:
+ Dictionary with serialized data.
+ :raises DeserializationError:
+ If the component cannot be serialized.
  """
 
  def check_serializable(obj: Any):
@@ -116,10 +127,12 @@ def check_serializable(obj: Any):
  @classmethod
  def from_dict(cls, data: Dict[str, Any]) -> "DeepEvalEvaluator":
  """
- Deserialize a component from a dictionary.
+ Deserializes the component from a dictionary.
 
  :param data:
- The dictionary to deserialize from.
+ Dictionary to deserialize from.
+ :returns:
+ Deserialized component.
  """
  return default_from_dict(cls, data)
 

diff --git a/integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/metrics.py b/integrations/deepeval/src/haystack_integrations/components/evaluators/deepeval/metrics.py
@@ -20,27 +20,31 @@
 class DeepEvalMetric(Enum):
  """
  Metrics supported by DeepEval.
+
+ All metrics require a `model` parameter, which specifies
+ the model to use for evaluation. Refer to the DeepEval
+ documentation for information on the supported models.
  """
 
- #: Answer relevancy.
+ #: Answer relevancy.\
  #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
  ANSWER_RELEVANCY = "answer_relevancy"
 
- #: Faithfulness.
+ #: Faithfulness.\
  #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
  FAITHFULNESS = "faithfulness"
 
- #: Contextual precision.
- #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str], ground_truths: List[str]`
+ #: Contextual precision.\
+ #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str], ground_truths: List[str]`\
  #: The ground truth is the expected response.
  CONTEXTUAL_PRECISION = "contextual_precision"
 
- #: Contextual recall.
- #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str], ground_truths: List[str]`
- #: The ground truth is the expected response.
+ #: Contextual recall.\
+ #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str], ground_truths: List[str]`\
+ #: The ground truth is the expected response.\
  CONTEXTUAL_RECALL = "contextual_recall"
 
- #: Contextual relevance.
+ #: Contextual relevance.\
  #: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
  CONTEXTUAL_RELEVANCE = "contextual_relevance"