Skip to content

Commit

Permalink
update to PineconeDocumentStore to remove dependency on SQL db (deeps…
Browse files Browse the repository at this point in the history
…et-ai#2749)

* update to PineconeDocumentStore to remove dependency on SQL db

* Update Documentation & Code Style

* typing fixes

* Update Documentation & Code Style

* fixed embedding generator to yield Documents

* Update Documentation & Code Style

* fixes for final typing issues

* fixes for pylint

* Update Documentation & Code Style

* uncomment pinecone tests

* added new params to docstrings

* Update Documentation & Code Style

* Update Documentation & Code Style

* Update haystack/document_stores/pinecone.py

Co-authored-by: Sara Zan <[email protected]>

* Update haystack/document_stores/pinecone.py

Co-authored-by: Sara Zan <[email protected]>

* Update Documentation & Code Style

* Update haystack/document_stores/pinecone.py

Co-authored-by: Sara Zan <[email protected]>

* Update haystack/document_stores/pinecone.py

Co-authored-by: Sara Zan <[email protected]>

* Update haystack/document_stores/pinecone.py

Co-authored-by: Sara Zan <[email protected]>

* Update haystack/document_stores/pinecone.py

Co-authored-by: Sara Zan <[email protected]>

* changes based on comments, updated errors and install

* Update Documentation & Code Style

* mypy

* implement simple filtering in pinecone mock

* typo

* typo in reverse

* account for missing meta key in filtering

* typo

* added metadata filtering to describe index

* added handling for users switching indexes in same doc store, and handling duplicate docs in write

* syntax tweaks

* added index option to document/embedding count calls

* labels implementation in progress

* added metadata fields to be indexed for pinecone tests

* further changes to mock

* WIP implementation of labels+multilabels

* switched to rely on labels namespace rather than filter

* simpler delete_labels

* label fixes, remove debug code

* Apply dostring fixes

Co-authored-by: Agnieszka Marzec <[email protected]>

* mypy

* pylint

* docs

* temporarily un-mock Pinecone

* Small Pinecone test suite

* pylint

* Add fake test key to pass the None check

* Add again fake test key to pass the None check

* Add Pinecone to default docstores and fix filters

* Fix field name

* Change field name

* Change field value

* Remove comments

* forgot to upgrade pyproject.toml

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Sara Zan <[email protected]>
Co-authored-by: Sara Zan <[email protected]>
Co-authored-by: Agnieszka Marzec <[email protected]>
  • Loading branch information
5 people committed Aug 24, 2022
1 parent 891707e commit 9b1b030
Show file tree
Hide file tree
Showing 11 changed files with 1,548 additions and 195 deletions.
4 changes: 3 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
def pytest_addoption(parser):
parser.addoption(
"--document_store_type", action="store", default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate"
"--document_store_type",
action="store",
default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone",
)


Expand Down
214 changes: 199 additions & 15 deletions docs/_src/api/api/document_store.md

Large diffs are not rendered by default.

66 changes: 39 additions & 27 deletions haystack/document_stores/filter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sqlalchemy import and_, or_

from haystack.document_stores.utils import convert_date_to_rfc3339
from haystack.errors import FilterError


def nested_defaultdict() -> defaultdict:
Expand Down Expand Up @@ -460,7 +461,8 @@ def evaluate(self, fields) -> bool:
# is only initialized with lists, but changing the type annotation would mean duplicating __init__

def convert_to_elasticsearch(self) -> Dict[str, Dict[str, List]]:
assert isinstance(self.comparison_value, list), "'$in' operation requires comparison value to be a list."
if not isinstance(self.comparison_value, list):
raise FilterError("'$in' operation requires comparison value to be a list.")
return {"terms": {self.field_name: self.comparison_value}}

def convert_to_sql(self, meta_document_orm):
Expand All @@ -470,7 +472,8 @@ def convert_to_sql(self, meta_document_orm):

def convert_to_weaviate(self) -> Dict[str, Union[str, List[Dict]]]:
filter_dict: Dict[str, Union[str, List[Dict]]] = {"operator": "Or", "operands": []}
assert isinstance(self.comparison_value, list), "'$in' operation requires comparison value to be a list."
if not isinstance(self.comparison_value, list):
raise FilterError("'$in' operation requires comparison value to be a list.")
for value in self.comparison_value:
comp_value_type, comp_value = self._get_weaviate_datatype(value)
assert isinstance(filter_dict["operands"], list) # Necessary for mypy
Expand All @@ -481,7 +484,8 @@ def convert_to_weaviate(self) -> Dict[str, Union[str, List[Dict]]]:
return filter_dict

def convert_to_pinecone(self) -> Dict[str, Dict[str, List]]:
assert isinstance(self.comparison_value, list), "'$in' operation requires comparison value to be a list."
if not isinstance(self.comparison_value, list):
raise FilterError("'$in' operation requires comparison value to be a list.")
return {self.field_name: {"$in": self.comparison_value}}

def invert(self) -> "NinOperation":
Expand All @@ -499,7 +503,8 @@ def evaluate(self, fields) -> bool:
return fields[self.field_name] != self.comparison_value

def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Dict[str, Union[str, int, float, bool]]]]]:
assert not isinstance(self.comparison_value, list), "Use '$nin' operation for lists as comparison values."
if isinstance(self.comparison_value, list):
raise FilterError("Use '$nin' operation for lists as comparison values.")
return {"bool": {"must_not": {"term": {self.field_name: self.comparison_value}}}}

def convert_to_sql(self, meta_document_orm):
Expand Down Expand Up @@ -530,7 +535,8 @@ def evaluate(self, fields) -> bool:
# is only initialized with lists, but changing the type annotation would mean duplicating __init__

def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Dict[str, List]]]]:
assert isinstance(self.comparison_value, list), "'$nin' operation requires comparison value to be a list."
if not isinstance(self.comparison_value, list):
raise FilterError("'$nin' operation requires comparison value to be a list.")
return {"bool": {"must_not": {"terms": {self.field_name: self.comparison_value}}}}

def convert_to_sql(self, meta_document_orm):
Expand All @@ -540,7 +546,8 @@ def convert_to_sql(self, meta_document_orm):

def convert_to_weaviate(self) -> Dict[str, Union[str, List[Dict]]]:
filter_dict: Dict[str, Union[str, List[Dict]]] = {"operator": "And", "operands": []}
assert isinstance(self.comparison_value, list), "'$nin' operation requires comparison value to be a list."
if not isinstance(self.comparison_value, list):
raise FilterError("'$nin' operation requires comparison value to be a list.")
for value in self.comparison_value:
comp_value_type, comp_value = self._get_weaviate_datatype(value)
assert isinstance(filter_dict["operands"], list) # Necessary for mypy
Expand All @@ -551,7 +558,8 @@ def convert_to_weaviate(self) -> Dict[str, Union[str, List[Dict]]]:
return filter_dict

def convert_to_pinecone(self) -> Dict[str, Dict[str, List]]:
assert isinstance(self.comparison_value, list), "'$in' operation requires comparison value to be a list."
if not isinstance(self.comparison_value, list):
raise FilterError("'$in' operation requires comparison value to be a list.")
return {self.field_name: {"$nin": self.comparison_value}}

def invert(self) -> "InOperation":
Expand All @@ -569,7 +577,8 @@ def evaluate(self, fields) -> bool:
return fields[self.field_name] > self.comparison_value

def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Union[str, float, int]]]]:
assert not isinstance(self.comparison_value, list), "Comparison value for '$gt' operation must not be a list."
if isinstance(self.comparison_value, list):
raise FilterError("Comparison value for '$gt' operation must not be a list.")
return {"range": {self.field_name: {"gt": self.comparison_value}}}

def convert_to_sql(self, meta_document_orm):
Expand All @@ -579,13 +588,13 @@ def convert_to_sql(self, meta_document_orm):

def convert_to_weaviate(self) -> Dict[str, Union[List[str], str, float, int]]:
comp_value_type, comp_value = self._get_weaviate_datatype()
assert not isinstance(comp_value, list), "Comparison value for '$gt' operation must not be a list."
if isinstance(comp_value, list):
raise FilterError("Comparison value for '$gt' operation must not be a list.")
return {"path": [self.field_name], "operator": "GreaterThan", comp_value_type: comp_value}

def convert_to_pinecone(self) -> Dict[str, Dict[str, Union[float, int]]]:
assert not isinstance(
self.comparison_value, (list, str)
), "Comparison value for '$gt' operation must be a float or int."
if not isinstance(self.comparison_value, (float, int)):
raise FilterError("Comparison value for '$gt' operation must be a float or int.")
return {self.field_name: {"$gt": self.comparison_value}}

def invert(self) -> "LteOperation":
Expand All @@ -603,7 +612,8 @@ def evaluate(self, fields) -> bool:
return fields[self.field_name] >= self.comparison_value

def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Union[str, float, int]]]]:
assert not isinstance(self.comparison_value, list), "Comparison value for '$gte' operation must not be a list."
if isinstance(self.comparison_value, list):
raise FilterError("Comparison value for '$gte' operation must not be a list.")
return {"range": {self.field_name: {"gte": self.comparison_value}}}

def convert_to_sql(self, meta_document_orm):
Expand All @@ -613,13 +623,13 @@ def convert_to_sql(self, meta_document_orm):

def convert_to_weaviate(self) -> Dict[str, Union[List[str], str, float, int]]:
comp_value_type, comp_value = self._get_weaviate_datatype()
assert not isinstance(comp_value, list), "Comparison value for '$gte' operation must not be a list."
if isinstance(comp_value, list):
raise FilterError("Comparison value for '$gte' operation must not be a list.")
return {"path": [self.field_name], "operator": "GreaterThanEqual", comp_value_type: comp_value}

def convert_to_pinecone(self) -> Dict[str, Dict[str, Union[float, int]]]:
assert not isinstance(
self.comparison_value, (list, str)
), "Comparison value for '$gte' operation must be a float or int."
if not isinstance(self.comparison_value, (float, int)):
raise FilterError("Comparison value for '$gte' operation must be a float or int.")
return {self.field_name: {"$gte": self.comparison_value}}

def invert(self) -> "LtOperation":
Expand All @@ -637,7 +647,8 @@ def evaluate(self, fields) -> bool:
return fields[self.field_name] < self.comparison_value

def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Union[str, float, int]]]]:
assert not isinstance(self.comparison_value, list), "Comparison value for '$lt' operation must not be a list."
if isinstance(self.comparison_value, list):
raise FilterError("Comparison value for '$lt' operation must not be a list.")
return {"range": {self.field_name: {"lt": self.comparison_value}}}

def convert_to_sql(self, meta_document_orm):
Expand All @@ -647,13 +658,13 @@ def convert_to_sql(self, meta_document_orm):

def convert_to_weaviate(self) -> Dict[str, Union[List[str], str, float, int]]:
comp_value_type, comp_value = self._get_weaviate_datatype()
assert not isinstance(comp_value, list), "Comparison value for '$lt' operation must not be a list."
if isinstance(comp_value, list):
raise FilterError("Comparison value for '$lt' operation must not be a list.")
return {"path": [self.field_name], "operator": "LessThan", comp_value_type: comp_value}

def convert_to_pinecone(self) -> Dict[str, Dict[str, Union[float, int]]]:
assert not isinstance(
self.comparison_value, (list, str)
), "Comparison value for '$lt' operation must be a float or int."
if not isinstance(self.comparison_value, (float, int)):
raise FilterError("Comparison value for '$lt' operation must be a float or int.")
return {self.field_name: {"$lt": self.comparison_value}}

def invert(self) -> "GteOperation":
Expand All @@ -671,7 +682,8 @@ def evaluate(self, fields) -> bool:
return fields[self.field_name] <= self.comparison_value

def convert_to_elasticsearch(self) -> Dict[str, Dict[str, Dict[str, Union[str, float, int]]]]:
assert not isinstance(self.comparison_value, list), "Comparison value for '$lte' operation must not be a list."
if isinstance(self.comparison_value, list):
raise FilterError("Comparison value for '$lte' operation must not be a list.")
return {"range": {self.field_name: {"lte": self.comparison_value}}}

def convert_to_sql(self, meta_document_orm):
Expand All @@ -681,13 +693,13 @@ def convert_to_sql(self, meta_document_orm):

def convert_to_weaviate(self) -> Dict[str, Union[List[str], str, float, int]]:
comp_value_type, comp_value = self._get_weaviate_datatype()
assert not isinstance(comp_value, list), "Comparison value for '$lte' operation must not be a list."
if isinstance(comp_value, list):
raise FilterError("Comparison value for '$lte' operation must not be a list.")
return {"path": [self.field_name], "operator": "LessThanEqual", comp_value_type: comp_value}

def convert_to_pinecone(self) -> Dict[str, Dict[str, Union[float, int]]]:
assert not isinstance(
self.comparison_value, (list, str)
), "Comparison value for '$lte' operation must be a float or int."
if not isinstance(self.comparison_value, (float, int)):
raise FilterError("Comparison value for '$lte' operation must be a float or int.")
return {self.field_name: {"$lte": self.comparison_value}}

def invert(self) -> "GtOperation":
Expand Down
Loading

0 comments on commit 9b1b030

Please sign in to comment.