Skip to content

Commit

Permalink
refactor!: Use Secret for API key in UpTrainEvaluator (deepset-ai…
Browse files Browse the repository at this point in the history
…#347)

* refactor!: Use `Secret` for API key in `UpTrainEvaluator`

Some other misc build fixes

* Disambiguate module names

* `mypy` fixes
  • Loading branch information
shadeMe committed Feb 7, 2024
1 parent 1fbad9a commit 593af1f
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 56 deletions.
7 changes: 4 additions & 3 deletions integrations/uptrain/example/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from haystack import Pipeline
from haystack_integrations.components.evaluators import UpTrainEvaluator, UpTrainMetric
from haystack.utils import Secret

QUESTIONS = [
"Which is the most popular global sport?",
Expand All @@ -24,13 +25,13 @@
evaluator = UpTrainEvaluator(
metric=UpTrainMetric.FACTUAL_ACCURACY,
api="openai",
api_key_env_var="OPENAI_API_KEY",
api_key=Secret.from_env_var("OPENAI_API_KEY"),
)
pipeline.add_component("evaluator", evaluator)

# Each metric expects a specific set of parameters as input. Refer to the
# UpTrainMetric class' documentation for more details.
output = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}})
results = pipeline.run({"evaluator": {"questions": QUESTIONS, "contexts": CONTEXTS, "responses": RESPONSES}})

for output in output["evaluator"]["results"]:
for output in results["evaluator"]["results"]:
print(output)
8 changes: 4 additions & 4 deletions integrations/uptrain/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,21 @@ name = "uptrain-haystack"
dynamic = ["version"]
description = 'An integration of UpTrain LLM evaluation framework with Haystack'
readme = "README.md"
requires-python = ">=3.7"
requires-python = ">=3.8"
license = "Apache-2.0"
keywords = []
authors = [{ name = "deepset GmbH", email = "[email protected]" }]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai", "uptrain>=0.5"]
dependencies = ["haystack-ai>=2.0.0b6", "uptrain>=0.5"]

[project.urls]
Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/uptrain"
Expand All @@ -49,7 +48,7 @@ cov-report = ["- coverage combine", "coverage report"]
cov = ["test-cov", "cov-report"]

[[tool.hatch.envs.all.matrix]]
python = ["3.7", "3.8", "3.9", "3.10", "3.11"]
python = ["3.8", "3.9", "3.10", "3.11"]

[tool.hatch.envs.lint]
detached = true
Expand Down Expand Up @@ -113,6 +112,7 @@ ignore = [
# Misc
"S101",
"TID252",
"B008",
]
unfixable = [
# Don't touch unused imports
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .evaluator import UpTrainEvaluator
from .metrics import UpTrainMetric
from .uptrain_evaluator import UpTrainEvaluator
from .uptrain_metrics import UpTrainMetric

__all__ = (
"UpTrainEvaluator",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
import os
from typing import Any, Dict, List, Optional, Union

from haystack import DeserializationError, component, default_from_dict, default_to_dict
from haystack_integrations.components.evaluators.metrics import (
from haystack.utils import Secret, deserialize_secrets_inplace
from haystack_integrations.components.evaluators.uptrain_metrics import (
METRIC_DESCRIPTORS,
InputConverters,
OutputConverters,
Expand Down Expand Up @@ -32,7 +32,7 @@ def __init__(
metric_params: Optional[Dict[str, Any]] = None,
*,
api: str = "openai",
api_key_env_var: Optional[str] = "OPENAI_API_KEY",
api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"),
api_params: Optional[Dict[str, Any]] = None,
):
"""
Expand All @@ -46,16 +46,16 @@ def __init__(
The API to use for evaluation.
Supported APIs: "openai", "uptrain".
:param api_key_env_var:
The name of the environment variable containing the API key.
:param api_key:
The API key to use.
:param api_params:
Additional parameters to pass to the API client.
"""
self.metric = metric if isinstance(metric, UpTrainMetric) else UpTrainMetric.from_str(metric)
self.metric_params = metric_params
self.descriptor = METRIC_DESCRIPTORS[self.metric]
self.api = api
self.api_key_env_var = api_key_env_var
self.api_key = api_key
self.api_params = api_params

self._init_backend()
Expand All @@ -73,7 +73,7 @@ def run(self, **inputs) -> Dict[str, Any]:
evaluator = UpTrainEvaluator(
metric=UpTrainMetric.FACTUAL_ACCURACY,
api="openai",
api_key_env_var="OPENAI_API_KEY",
api_key=Secret.from_env_var("OPENAI_API_KEY"),
)
pipeline.add_component("evaluator", evaluator)
Expand Down Expand Up @@ -140,7 +140,7 @@ def check_serializable(obj: Any):
metric=self.metric,
metric_params=self.metric_params,
api=self.api,
api_key_env_var=self.api_key_env_var,
api_key=self.api_key.to_dict(),
api_params=self.api_params,
)

Expand All @@ -152,6 +152,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "UpTrainEvaluator":
:param data:
The dictionary to deserialize from.
"""
deserialize_secrets_inplace(data["init_parameters"], ["api_key"])
return default_from_dict(cls, data)

def _init_backend(self):
Expand Down Expand Up @@ -185,11 +186,8 @@ def _init_backend(self):
msg = f"Unsupported API '{self.api}' for UpTrain evaluator. Supported APIs: {supported_apis}"
raise ValueError(msg)

api_key = os.environ.get(self.api_key_env_var)
if api_key is None:
msg = f"Missing API key environment variable '{self.api_key_env_var}' for UpTrain evaluator"
raise ValueError(msg)

api_key = self.api_key.resolve_value()
assert api_key is not None
if self.api == "openai":
backend_client = EvalLLM(openai_api_key=api_key)
elif self.api == "uptrain":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,18 @@ def validate_outputs(outputs: List[Dict[str, Any]]):
msg = "UpTrain evaluator expects outputs to be a list of `dict`s"
elif not all(isinstance(y, str) for x in outputs for y in x.keys()):
msg = "UpTrain evaluator expects keys in the output dicts to be `str`"
elif not all(isinstance(y, (float, str)) for x in outputs for y in x.values()):
elif not all(
y is None
or isinstance(
y,
(
float,
str,
),
)
for x in outputs
for y in x.values()
):
msg = "UpTrain evaluator expects values in the output dicts to be either `str` or `float`"

if msg is not None:
Expand Down
65 changes: 32 additions & 33 deletions integrations/uptrain/tests/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from haystack import DeserializationError

from haystack_integrations.components.evaluators import UpTrainEvaluator, UpTrainMetric
from haystack.utils import Secret

DEFAULT_QUESTIONS = [
"Which is the most popular global sport?",
Expand Down Expand Up @@ -102,43 +103,47 @@ def log_and_evaluate(self, data, checks, **kwargs):
return data


@patch("os.environ.get")
def test_evaluator_api(os_environ_get):
api_key_var = "test-api-key"
os_environ_get.return_value = api_key_var
def test_evaluator_api(monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "test-api-key")
monkeypatch.setenv("UPTRAIN_API_KEY", "test-api-key")

eval = UpTrainEvaluator(UpTrainMetric.RESPONSE_COMPLETENESS)
assert eval.api == "openai"
assert eval.api_key_env_var == "OPENAI_API_KEY"
assert eval.api_key == Secret.from_env_var("OPENAI_API_KEY")

eval = UpTrainEvaluator(UpTrainMetric.RESPONSE_COMPLETENESS, api="uptrain", api_key_env_var="UPTRAIN_API_KEY")
eval = UpTrainEvaluator(
UpTrainMetric.RESPONSE_COMPLETENESS, api="uptrain", api_key=Secret.from_env_var("UPTRAIN_API_KEY")
)
assert eval.api == "uptrain"
assert eval.api_key_env_var == "UPTRAIN_API_KEY"
assert eval.api_key == Secret.from_env_var("UPTRAIN_API_KEY")

with pytest.raises(ValueError, match="Unsupported API"):
UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="cohere")

os_environ_get.return_value = None
with pytest.raises(ValueError, match="Missing API key"):
UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="uptrain")

with pytest.raises(ValueError, match="None of the following authentication environment variables are set"):
UpTrainEvaluator(UpTrainMetric.CONTEXT_RELEVANCE, api="uptrain", api_key=Secret.from_env_var("asd39920qqq"))

@patch("os.environ.get")
def test_evaluator_metric_init_params(os_environ_get):
api_key = "test-api-key"
os_environ_get.return_value = api_key

eval = UpTrainEvaluator(UpTrainMetric.CRITIQUE_TONE, metric_params={"llm_persona": "village idiot"})
def test_evaluator_metric_init_params():
eval = UpTrainEvaluator(
UpTrainMetric.CRITIQUE_TONE,
metric_params={"llm_persona": "village idiot"},
api_key=Secret.from_token("Aaa"),
)
assert eval._backend_metric.llm_persona == "village idiot"

with pytest.raises(ValueError, match="Invalid init parameters"):
UpTrainEvaluator(UpTrainMetric.CRITIQUE_TONE, metric_params={"role": "village idiot"})
UpTrainEvaluator(
UpTrainMetric.CRITIQUE_TONE, metric_params={"role": "village idiot"}, api_key=Secret.from_token("Aaa")
)

with pytest.raises(ValueError, match="unexpected init parameters"):
UpTrainEvaluator(UpTrainMetric.FACTUAL_ACCURACY, metric_params={"check_numbers": True})
UpTrainEvaluator(
UpTrainMetric.FACTUAL_ACCURACY, metric_params={"check_numbers": True}, api_key=Secret.from_token("Aaa")
)

with pytest.raises(ValueError, match="expected init parameters"):
UpTrainEvaluator(UpTrainMetric.RESPONSE_MATCHING)
UpTrainEvaluator(UpTrainMetric.RESPONSE_MATCHING, api_key=Secret.from_token("Aaa"))


@patch("os.environ.get")
Expand All @@ -149,7 +154,7 @@ def test_evaluator_serde(os_environ_get):
"metric": UpTrainMetric.RESPONSE_MATCHING,
"metric_params": {"method": "rouge"},
"api": "uptrain",
"api_key_env_var": "abacab",
"api_key": Secret.from_env_var("ENV_VAR", strict=False),
"api_params": {"eval_name": "test"},
}
eval = UpTrainEvaluator(**init_params)
Expand All @@ -158,7 +163,7 @@ def test_evaluator_serde(os_environ_get):

assert eval.metric == new_eval.metric
assert eval.api == new_eval.api
assert eval.api_key_env_var == new_eval.api_key_env_var
assert eval.api_key == new_eval.api_key
assert eval.metric_params == new_eval.metric_params
assert eval.api_params == new_eval.api_params
assert type(new_eval._backend_client) == type(eval._backend_client)
Expand Down Expand Up @@ -191,14 +196,12 @@ def test_evaluator_serde(os_environ_get):
(UpTrainMetric.RESPONSE_MATCHING, {"ground_truths": [], "responses": []}, {"method": "llm"}),
],
)
@patch("os.environ.get")
def test_evaluator_valid_inputs(os_environ_get, metric, inputs, params):
os_environ_get.return_value = "abacab"
def test_evaluator_valid_inputs(metric, inputs, params):
init_params = {
"metric": metric,
"metric_params": params,
"api": "uptrain",
"api_key_env_var": "abacab",
"api_key": Secret.from_token("Aaa"),
"api_params": None,
}
eval = UpTrainEvaluator(**init_params)
Expand All @@ -220,15 +223,13 @@ def test_evaluator_valid_inputs(os_environ_get, metric, inputs, params):
(UpTrainMetric.RESPONSE_RELEVANCE, {"responses": []}, "expected input parameter ", None),
],
)
@patch("os.environ.get")
def test_evaluator_invalid_inputs(os_environ_get, metric, inputs, error_string, params):
os_environ_get.return_value = "abacab"
def test_evaluator_invalid_inputs(metric, inputs, error_string, params):
with pytest.raises(ValueError, match=error_string):
init_params = {
"metric": metric,
"metric_params": params,
"api": "uptrain",
"api_key_env_var": "abacab",
"api_key": Secret.from_token("Aaa"),
"api_params": None,
}
eval = UpTrainEvaluator(**init_params)
Expand Down Expand Up @@ -299,14 +300,12 @@ def test_evaluator_invalid_inputs(os_environ_get, metric, inputs, error_string,
),
],
)
@patch("os.environ.get")
def test_evaluator_outputs(os_environ_get, metric, inputs, expected_outputs, metric_params):
os_environ_get.return_value = "abacab"
def test_evaluator_outputs(metric, inputs, expected_outputs, metric_params):
init_params = {
"metric": metric,
"metric_params": metric_params,
"api": "uptrain",
"api_key_env_var": "abacab",
"api_key": Secret.from_token("Aaa"),
"api_params": None,
}
eval = UpTrainEvaluator(**init_params)
Expand Down

0 comments on commit 593af1f

Please sign in to comment.