Skip to content

Commit

Permalink
fix: MetaFieldRanker - use weight if passed in the run method (#…
Browse files Browse the repository at this point in the history
…7305)

* fix:  - use  if passed in the  method

* reno
  • Loading branch information
anakin87 committed Mar 5, 2024
1 parent b86490b commit 38a80b0
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
16 changes: 9 additions & 7 deletions haystack/components/rankers/meta_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def _validate_params(
"Parameter <weight> must be in range [0,1] but is currently set to '%s'.\n'0' disables sorting by a "
"meta field, '0.5' assigns equal weight to the previous relevance scores and the meta field, and "
"'1' ranks by the meta field only.\nChange the <weight> parameter to a value in range 0 to 1 when "
"initializing the MetaFieldRanker." % self.weight
"initializing the MetaFieldRanker." % weight
)

if ranking_mode not in ["reciprocal_rank_fusion", "linear_score"]:
Expand Down Expand Up @@ -250,7 +250,7 @@ def run(
# Add the docs missing the meta_field back on the end
sorted_by_meta = [doc for meta, doc in tuple_sorted_by_meta]
sorted_documents = sorted_by_meta + docs_missing_meta_field
sorted_documents = self._merge_rankings(documents, sorted_documents)
sorted_documents = self._merge_rankings(documents, sorted_documents, weight)
return {"documents": sorted_documents[:top_k]}

def _parse_meta(
Expand Down Expand Up @@ -295,16 +295,18 @@ def _parse_meta(

return meta_values

def _merge_rankings(self, documents: List[Document], sorted_documents: List[Document]) -> List[Document]:
def _merge_rankings(
self, documents: List[Document], sorted_documents: List[Document], weight: float
) -> List[Document]:
"""
Merge the two different rankings for Documents sorted both by their content and by their meta field.
"""
scores_map: Dict = defaultdict(int)

if self.ranking_mode == "reciprocal_rank_fusion":
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - self.weight)
scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * self.weight
scores_map[document.id] += self._calculate_rrf(rank=i) * (1 - weight)
scores_map[sorted_doc.id] += self._calculate_rrf(rank=i) * weight
elif self.ranking_mode == "linear_score":
for i, (document, sorted_doc) in enumerate(zip(documents, sorted_documents)):
score = float(0)
Expand All @@ -319,8 +321,8 @@ def _merge_rankings(self, documents: List[Document], sorted_documents: List[Docu
else:
score = document.score

scores_map[document.id] += score * (1 - self.weight)
scores_map[sorted_doc.id] += self._calc_linear_score(rank=i, amount=len(sorted_documents)) * self.weight
scores_map[document.id] += score * (1 - weight)
scores_map[sorted_doc.id] += self._calc_linear_score(rank=i, amount=len(sorted_documents)) * weight

for document in documents:
document.score = scores_map[document.id]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Fix a bug in the `MetaFieldRanker` where the `weight` parameter passed to the `run` method was not being used.
10 changes: 10 additions & 0 deletions test/components/rankers/test_metafield.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ def test_run_with_weight_equal_to_1(self):
sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True)
assert [doc.meta["rating"] for doc in docs_after] == sorted_scores

def test_run_with_weight_equal_to_1_passed_in_run_method(self):
ranker = MetaFieldRanker(meta_field="rating", weight=0.0)
docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]]
output = ranker.run(documents=docs_before, weight=1.0)
docs_after = output["documents"]

assert len(docs_after) == 3
sorted_scores = sorted([doc.meta["rating"] for doc in docs_after], reverse=True)
assert [doc.meta["rating"] for doc in docs_after] == sorted_scores

def test_sort_order_ascending(self):
ranker = MetaFieldRanker(meta_field="rating", weight=1.0, sort_order="ascending")
docs_before = [Document(content="abc", meta={"rating": value}) for value in [1.1, 0.5, 2.3]]
Expand Down

0 comments on commit 38a80b0

Please sign in to comment.