Skip to content

Commit

Permalink
Add ability to clear ALL data associated with an index (redis#179)
Browse files Browse the repository at this point in the history
This PR introduces the `clear()` method to the core SearchEngine class
in RedisVL. This now allows for the ability to clear out all data, while
leaving the index in place.

Useful for manual cache eviction, manual session clearing, and more.

This PR also updates the extension classes to use the new `clear()`
method as opposed to the SCAN ITER approach.
  • Loading branch information
tylerhutcherson authored Jul 9, 2024
1 parent 61e7338 commit 9ca93ef
Show file tree
Hide file tree
Showing 12 changed files with 135 additions and 22 deletions.
35 changes: 33 additions & 2 deletions docs/user_guide/getting_started_01.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -653,13 +653,44 @@
"## Cleanup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Below we will clean up after our work. First, you can optionally flush all data from Redis associated with the index by\n",
"using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n",
"\n",
"But if you want to clean up everything, including the index, just use `.delete()`\n",
"which will by default remove the index AND the underlying data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# (optionally) clear all data from Redis associated with the index\n",
"await index.clear()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# but the index is still in place\n",
"await index.exists()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# clean up the index\n",
"# remove / delete the index in its entirety\n",
"await index.delete()"
]
}
Expand All @@ -680,7 +711,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.10.14"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
12 changes: 6 additions & 6 deletions docs/user_guide/vectorizers_04.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -531,14 +531,14 @@
}
],
"source": [
"from redisvl.utils.vectorize import MistralAITextVectorizer\n",
"# from redisvl.utils.vectorize import MistralAITextVectorizer\n",
"\n",
"mistral = MistralAITextVectorizer()\n",
"# mistral = MistralAITextVectorizer()\n",
"\n",
"# mebed a sentence using their asyncronous method\n",
"test = await mistral.aembed(\"This is a test sentence.\")\n",
"print(\"Vector dimensions: \", len(test))\n",
"print(test[:10])"
"# # embed a sentence using their asyncronous method\n",
"# test = await mistral.aembed(\"This is a test sentence.\")\n",
"# print(\"Vector dimensions: \", len(test))\n",
"# print(test[:10])"
]
},
{
Expand Down
5 changes: 1 addition & 4 deletions redisvl/extensions/llmcache/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,7 @@ def set_vectorizer(self, vectorizer: BaseVectorizer) -> None:

def clear(self) -> None:
"""Clear the cache of all keys while preserving the index."""
with self._index.client.pipeline(transaction=False) as pipe: # type: ignore
for key in self._index.client.scan_iter(match=f"{self._index.prefix}:*"): # type: ignore
pipe.delete(key)
pipe.execute()
self._index.clear()

def delete(self) -> None:
"""Clear the semantic cache of all keys and remove the underlying search
Expand Down
5 changes: 1 addition & 4 deletions redisvl/extensions/session_manager/semantic_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,7 @@ def set_scope(

def clear(self) -> None:
"""Clears the chat session history."""
with self._index.client.pipeline(transaction=False) as pipe: # type: ignore
for key in self._index.client.scan_iter(match=f"{self._index.prefix}:*"): # type: ignore
pipe.delete(key)
pipe.execute()
self._index.clear()

def delete(self) -> None:
"""Clear all conversation keys and remove the search index."""
Expand Down
43 changes: 42 additions & 1 deletion redisvl/index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from redis.commands.search.indexDefinition import IndexDefinition

from redisvl.index.storage import HashStorage, JsonStorage
from redisvl.query.query import BaseQuery, CountQuery, FilterQuery
from redisvl.query import BaseQuery, CountQuery, FilterQuery
from redisvl.query.filter import FilterExpression
from redisvl.redis.connection import (
RedisConnectionFactory,
convert_index_info_to_schema,
Expand Down Expand Up @@ -476,6 +477,26 @@ def delete(self, drop: bool = True):
except:
logger.exception("Error while deleting index")

def clear(self) -> int:
"""Clear all keys in Redis associated with the index, leaving the index
available and in-place for future insertions or updates.
Returns:
int: Count of records deleted from Redis.
"""
# Track deleted records
total_records_deleted: int = 0

# Paginate using queries and delete in batches
for batch in self.paginate(
FilterQuery(FilterExpression("*"), return_fields=["id"]), page_size=500
):
batch_keys = [record["id"] for record in batch]
record_deleted = self._redis_client.delete(*batch_keys) # type: ignore
total_records_deleted += record_deleted # type: ignore

return total_records_deleted

def load(
self,
data: Iterable[Any],
Expand Down Expand Up @@ -894,6 +915,26 @@ async def delete(self, drop: bool = True):
logger.exception("Error while deleting index")
raise

async def clear(self) -> int:
"""Clear all keys in Redis associated with the index, leaving the index
available and in-place for future insertions or updates.
Returns:
int: Count of records deleted from Redis.
"""
# Track deleted records
total_records_deleted: int = 0

# Paginate using queries and delete in batches
async for batch in self.paginate(
FilterQuery(FilterExpression("*"), return_fields=["id"]), page_size=500
):
batch_keys = [record["id"] for record in batch]
records_deleted = await self._redis_client.delete(*batch_keys) # type: ignore
total_records_deleted += records_deleted # type: ignore

return total_records_deleted

async def load(
self,
data: Iterable[Any],
Expand Down
10 changes: 8 additions & 2 deletions redisvl/query/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
from redisvl.query.query import CountQuery, FilterQuery, RangeQuery, VectorQuery
from redisvl.query.query import (
BaseQuery,
CountQuery,
FilterQuery,
RangeQuery,
VectorQuery,
)

__all__ = ["VectorQuery", "FilterQuery", "RangeQuery", "CountQuery"]
__all__ = ["BaseQuery", "VectorQuery", "FilterQuery", "RangeQuery", "CountQuery"]
2 changes: 1 addition & 1 deletion schemas/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ index:
fields:
- name: user
type: tag
path: '.user'
path: '$.user'
- name: credit_score
type: tag
path: '$.credit_score'
Expand Down
12 changes: 12 additions & 0 deletions tests/integration/test_async_search_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,18 @@ async def test_search_index_delete(async_client, async_index):
)


@pytest.mark.asyncio
async def test_search_index_clear(async_client, async_index):
async_index.set_client(async_client)
await async_index.create(overwrite=True, drop=True)
data = [{"id": "1", "test": "foo"}]
await async_index.load(data, id_field="id")

count = await async_index.clear()
assert count == len(data)
assert await async_index.exists()


@pytest.mark.asyncio
async def test_search_index_load_and_fetch(async_client, async_index):
async_index.set_client(async_client)
Expand Down
7 changes: 7 additions & 0 deletions tests/integration/test_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,11 @@ def hash_preprocess(item: dict) -> dict:
for field in return_fields:
assert getattr(doc1, field) == doc2[field]

count_deleted_keys = index.clear()
assert count_deleted_keys == len(sample_data)

assert index.exists() == True

index.delete()

assert index.exists() == False
7 changes: 7 additions & 0 deletions tests/integration/test_flow_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,11 @@ async def hash_preprocess(item: dict) -> dict:
for field in return_fields:
assert getattr(doc1, field) == doc2[field]

count_deleted_keys = await index.clear()
assert count_deleted_keys == len(sample_data)

assert await index.exists() == True

await index.delete()

assert await index.exists() == False
11 changes: 11 additions & 0 deletions tests/integration/test_search_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,17 @@ def test_search_index_delete(client, index):
assert index.name not in convert_bytes(index.client.execute_command("FT._LIST"))


def test_search_index_clear(client, index):
index.set_client(client)
index.create(overwrite=True, drop=True)
data = [{"id": "1", "test": "foo"}]
index.load(data, id_field="id")

count = index.clear()
assert count == len(data)
assert index.exists()


def test_search_index_load_and_fetch(client, index):
index.set_client(client)
index.create(overwrite=True, drop=True)
Expand Down
8 changes: 6 additions & 2 deletions tests/integration/test_vectorizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def skip_vectorizer() -> bool:
VertexAITextVectorizer,
CohereTextVectorizer,
AzureOpenAITextVectorizer,
MistralAITextVectorizer,
# MistralAITextVectorizer,
CustomTextVectorizer,
]
)
Expand Down Expand Up @@ -218,7 +218,11 @@ def bad_return_type(text: str) -> str:


@pytest.fixture(
params=[OpenAITextVectorizer, MistralAITextVectorizer, CustomTextVectorizer]
params=[
OpenAITextVectorizer,
# MistralAITextVectorizer,
CustomTextVectorizer,
]
)
def avectorizer(request, skip_vectorizer):
if skip_vectorizer:
Expand Down

0 comments on commit 9ca93ef

Please sign in to comment.