You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Currently VectorStoreIndex does not allow to upsert indexes into Pinecone.
I'm trying to optimise number of indexes created by using different namespaces in the same index. When I try to initialise a vectorsore with the same index but different namespaces I get an AttributeError after the first namespace
This means that either I create separate indexes or write code for chunking, embedding and upserting to pinecone
Sample code and traceback below
This tends to complicate things especially when parsing pdf documents with tables and text. since the node parsers such as MarkdownElementNodeParser generate relationship and hierarchies for nodes and objects which i would now need to define manually and this makes the process difficult to do.
Hence, this re
for doc in doc_names:
print(doc)
documents = doc_dict[doc]
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
vector_store = PineconeVectorStore(
pinecone_index=axis_index,
namespace=doc)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
axis_index = VectorStoreIndex(nodes=base_nodes+objects,
storage_context=storage_context,
show_progress=True)
print(f'"{doc}" upserted into a separate namespace')
`
`
AttributeError Traceback (most recent call last)
Cell In[42], line 11
7 vector_store = PineconeVectorStore(
8 pinecone_index=axis_index,
9 namespace=doc)
10 storage_context = StorageContext.from_defaults(vector_store=vector_store)
---> 11 axis_index = VectorStoreIndex(nodes=base_nodes+objects,
12 storage_context=storage_context,
13 show_progress=True)
14 print(f'"{doc}" upserted into a separate namespace')
File ~/miniconda3/envs/streamchat/lib/python3.12/site-packages/llama_index/core/indices/vector_store/base.py:308, in VectorStoreIndex.build_index_from_nodes(self, nodes, **insert_kwargs) 300 if any( 301 node.get_content(metadata_mode=MetadataMode.EMBED) == "" for node in nodes 302 ): 303 raise ValueError( 304 "Cannot build index from nodes with no content. " 305 "Please ensure all nodes have content." 306 )
--> 308 return self._build_index_from_nodes(nodes, **insert_kwargs)
File ~/miniconda3/envs/streamchat/lib/python3.12/site-packages/llama_index/core/indices/vector_store/base.py:234, in VectorStoreIndex._add_nodes_to_index(self, index_struct, nodes, show_progress, **insert_kwargs) 232 for nodes_batch in iter_batch(nodes, self._insert_batch_size): 233 nodes_batch = self._get_node_with_embedding(nodes_batch, show_progress)
--> 234 new_ids = self._vector_store.add(nodes_batch, **insert_kwargs) 236 if not self._vector_store.stores_text or self._store_nodes_override: 237 # NOTE: if the vector store doesn't store text, 238 # we need to add the nodes to the index struct and document store 239 for node, new_id in zip(nodes_batch, new_ids): 240 # NOTE: remove embedding from node to avoid duplication
AttributeError: 'VectorStoreIndex' object has no attribute 'upsert'
Reason
this is supported in pinecone here but not in llamaindex for some reason. I think it's probably because all the sample notebooks create separate indexes for each document and don't use namespaces.
Value of Feature
makes it simpler to upsert new documents into an existing index
eliminates the need to create separate indexes for each document and optimises the code.
The text was updated successfully, but these errors were encountered:
Feature Description
Currently VectorStoreIndex does not allow to upsert indexes into Pinecone.
I'm trying to optimise number of indexes created by using different namespaces in the same index. When I try to initialise a vectorsore with the same index but different namespaces I get an AttributeError after the first namespace
This means that either I create separate indexes or write code for chunking, embedding and upserting to pinecone
Sample code and traceback below
This tends to complicate things especially when parsing pdf documents with tables and text. since the node parsers such as MarkdownElementNodeParser generate relationship and hierarchies for nodes and objects which i would now need to define manually and this makes the process difficult to do.
Hence, this re
for doc in doc_names:
print(doc)
documents = doc_dict[doc]
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
`
AttributeError Traceback (most recent call last)
Cell In[42], line 11
7 vector_store = PineconeVectorStore(
8 pinecone_index=axis_index,
9 namespace=doc)
10 storage_context = StorageContext.from_defaults(vector_store=vector_store)
---> 11 axis_index = VectorStoreIndex(nodes=base_nodes+objects,
12 storage_context=storage_context,
13 show_progress=True)
14 print(f'"{doc}" upserted into a separate namespace')
File ~/miniconda3/envs/streamchat/lib/python3.12/site-packages/llama_index/core/indices/vector_store/base.py:75, in VectorStoreIndex.init(self, nodes, use_async, store_nodes_override, embed_model, insert_batch_size, objects, index_struct, storage_context, callback_manager, transformations, show_progress, service_context, **kwargs)
68 self._embed_model = (
69 resolve_embed_model(embed_model, callback_manager=callback_manager)
70 if embed_model
71 else embed_model_from_settings_or_context(Settings, service_context)
72 )
74 self._insert_batch_size = insert_batch_size
---> 75 super().init(
76 nodes=nodes,
77 index_struct=index_struct,
78 service_context=service_context,
79 storage_context=storage_context,
80 show_progress=show_progress,
81 objects=objects,
82 callback_manager=callback_manager,
83 transformations=transformations,
84 **kwargs,
85 )
File ~/miniconda3/envs/streamchat/lib/python3.12/site-packages/llama_index/core/indices/base.py:94, in BaseIndex.init(self, nodes, objects, index_struct, storage_context, callback_manager, transformations, show_progress, service_context, **kwargs)
92 if index_struct is None:
93 nodes = nodes or []
---> 94 index_struct = self.build_index_from_nodes(
95 nodes + objects # type: ignore
96 )
97 self._index_struct = index_struct
98 self._storage_context.index_store.add_index_struct(self._index_struct)
File ~/miniconda3/envs/streamchat/lib/python3.12/site-packages/llama_index/core/indices/vector_store/base.py:308, in VectorStoreIndex.build_index_from_nodes(self, nodes, **insert_kwargs)
300 if any(
301 node.get_content(metadata_mode=MetadataMode.EMBED) == "" for node in nodes
302 ):
303 raise ValueError(
304 "Cannot build index from nodes with no content. "
305 "Please ensure all nodes have content."
306 )
--> 308 return self._build_index_from_nodes(nodes, **insert_kwargs)
File ~/miniconda3/envs/streamchat/lib/python3.12/site-packages/llama_index/core/indices/vector_store/base.py:280, in VectorStoreIndex._build_index_from_nodes(self, nodes, **insert_kwargs)
278 run_async_tasks(tasks)
279 else:
--> 280 self._add_nodes_to_index(
281 index_struct,
282 nodes,
283 show_progress=self._show_progress,
284 **insert_kwargs,
285 )
286 return index_struct
File ~/miniconda3/envs/streamchat/lib/python3.12/site-packages/llama_index/core/indices/vector_store/base.py:234, in VectorStoreIndex._add_nodes_to_index(self, index_struct, nodes, show_progress, **insert_kwargs)
232 for nodes_batch in iter_batch(nodes, self._insert_batch_size):
233 nodes_batch = self._get_node_with_embedding(nodes_batch, show_progress)
--> 234 new_ids = self._vector_store.add(nodes_batch, **insert_kwargs)
236 if not self._vector_store.stores_text or self._store_nodes_override:
237 # NOTE: if the vector store doesn't store text,
238 # we need to add the nodes to the index struct and document store
239 for node, new_id in zip(nodes_batch, new_ids):
240 # NOTE: remove embedding from node to avoid duplication
File ~/miniconda3/envs/streamchat/lib/python3.12/site-packages/llama_index/vector_stores/pinecone/base.py:393, in PineconeVectorStore.add(self, nodes, **add_kwargs)
391 ids.append(node_id)
392 entries.append(entry)
--> 393 self._pinecone_index.upsert(
394 entries,
395 namespace=self.namespace,
396 batch_size=self.batch_size,
397 **self.insert_kwargs,
398 )
399 return ids
AttributeError: 'VectorStoreIndex' object has no attribute 'upsert'
Reason
this is supported in pinecone here but not in llamaindex for some reason. I think it's probably because all the sample notebooks create separate indexes for each document and don't use namespaces.
Value of Feature
The text was updated successfully, but these errors were encountered: