Skip to content

Commit

Permalink
docs: Update deepeval-haystack docstrings (deepset-ai#527)
Browse files Browse the repository at this point in the history
* docs: Update `deepeval-haystack` docstrings

* Move note about required init params
  • Loading branch information
shadeMe committed Mar 4, 2024
1 parent b721907 commit a67e0b7
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,31 @@
@component
class DeepEvalEvaluator:
"""
A component that uses the DeepEval framework to evaluate inputs against a specific metric.
The supported metrics are defined by :class:`DeepEvalMetric`. The inputs of the component
metric-dependent.
A component that uses the [DeepEval framework](https://docs.confident-ai.com/docs/evaluation-introduction)
to evaluate inputs against a specific metric. Supported metrics are defined by `DeepEvalMetric`.
Usage example:
```python
from haystack_integrations.components.evaluators.deepeval import DeepEvalEvaluator, DeepEvalMetric
evaluator = DeepEvalEvaluator(
metric=DeepEvalMetric.FAITHFULNESS,
metric_params={"model": "gpt-4"},
)
output = evaluator.run(
questions=["Which is the most popular global sport?"],
contexts=[
[
"Football is undoubtedly the world's most popular sport with"
"major events like the FIFA World Cup and sports personalities"
"like Ronaldo and Messi, drawing a followership of more than 4"
"billion people."
]
],
responses=["Football is the most popular sport with around 4 billion" "followers worldwide"],
)
print(output["results"])
```
"""

_backend_metric: BaseMetric
Expand All @@ -39,6 +60,8 @@ def __init__(
The metric to use for evaluation.
:param metric_params:
Parameters to pass to the metric's constructor.
Refer to the `RagasMetric` class for more details
on required parameters.
"""
self.metric = metric if isinstance(metric, DeepEvalMetric) else DeepEvalMetric.from_str(metric)
self.metric_params = metric_params
Expand All @@ -51,37 +74,20 @@ def __init__(
@component.output_types(results=List[List[Dict[str, Any]]])
def run(self, **inputs) -> Dict[str, Any]:
"""
Run the DeepEval evaluator.
Example:
```python
pipeline = Pipeline()
evaluator = DeepEvalEvaluator(
metric=DeepEvalMetric.ANSWER_RELEVANCY,
metric_params={"model": "gpt-4"},
)
pipeline.add_component("evaluator", evaluator)
# Each metric expects a specific set of parameters as input. Refer to the
# DeepEvalMetric class' documentation for more details.
output = pipeline.run({"evaluator": {
"questions": ["question],
"contexts": [["context"]],
"responses": ["response"]
}})
```
Run the DeepEval evaluator on the provided inputs.
:param inputs:
The inputs to evaluate. These are determined by the
metric being calculated. See :class:`DeepEvalMetric` for more
metric being calculated. See `DeepEvalMetric` for more
information.
:returns:
A nested list of metric results. Each input can have one or more
A dictionary with a single `results` entry that contains
a nested list of metric results. Each input can have one or more
results, depending on the metric. Each result is a dictionary
containing the following keys and values:
* `name` - The name of the metric.
* `score` - The score of the metric.
* `explanation` - An optional explanation of the score.
- `name` - The name of the metric.
- `score` - The score of the metric.
- `explanation` - An optional explanation of the score.
"""
InputConverters.validate_input_parameters(self.metric, self.descriptor.input_parameters, inputs)
converted_inputs: List[LLMTestCase] = list(self.descriptor.input_converter(**inputs)) # type: ignore
Expand All @@ -93,7 +99,12 @@ def run(self, **inputs) -> Dict[str, Any]:

def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
:raises DeserializationError:
If the component cannot be serialized.
"""

def check_serializable(obj: Any):
Expand All @@ -116,10 +127,12 @@ def check_serializable(obj: Any):
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "DeepEvalEvaluator":
"""
Deserialize a component from a dictionary.
Deserializes the component from a dictionary.
:param data:
The dictionary to deserialize from.
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
return default_from_dict(cls, data)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,31 @@
class DeepEvalMetric(Enum):
"""
Metrics supported by DeepEval.
All metrics require a `model` parameter, which specifies
the model to use for evaluation. Refer to the DeepEval
documentation for information on the supported models.
"""

#: Answer relevancy.
#: Answer relevancy.\
#: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
ANSWER_RELEVANCY = "answer_relevancy"

#: Faithfulness.
#: Faithfulness.\
#: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
FAITHFULNESS = "faithfulness"

#: Contextual precision.
#: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str], ground_truths: List[str]`
#: Contextual precision.\
#: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str], ground_truths: List[str]`\
#: The ground truth is the expected response.
CONTEXTUAL_PRECISION = "contextual_precision"

#: Contextual recall.
#: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str], ground_truths: List[str]`
#: The ground truth is the expected response.
#: Contextual recall.\
#: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str], ground_truths: List[str]`\
#: The ground truth is the expected response.\
CONTEXTUAL_RECALL = "contextual_recall"

#: Contextual relevance.
#: Contextual relevance.\
#: Inputs - `questions: List[str], contexts: List[List[str]], responses: List[str]`
CONTEXTUAL_RELEVANCE = "contextual_relevance"

Expand Down

0 comments on commit a67e0b7

Please sign in to comment.