You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
(side note: I also tried loading the LLM using HuggingFaceHub but error is the same.)
chunk_id_schema = Object(
id="chunk_identifier", # Unique identifier for the schema
description="A schema for extracting chunk identifiers from the text.",
examples=[
("Add this statement to chunk 5f3z9k.",
[{"chunk_id": "5f3z9k"}]),
("No relevant chunk found for this input.",
[{"chunk_id": None}]),
("This data should be part of chunk b29x2d.",
[{"chunk_id": "b29x2d"}])
],
attributes=[
Text(
id="chunk_id",
description="The unique identifier of a text chunk, which is part of a larger document or dataset.",
many=False # Assumes that each example has at most one chunk_id
)
],
many=True # Indicates that multiple instances of this schema could be matched in a single input
)
extraction_chain = create_extraction_chain(self.llm, chunk_id_schema)
extraction_found = extraction_chain.run(chunk_found)
Throws:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[22], line 435
410 ## Comment and uncomment the propositions to your hearts content
411 propositions = [
412 'The month is October.',
413 'The year is 2023.',
(...)
432 # 'In fame, power, military victories, knowledge, and benefit to humanity, the rich get richer.'
433 ]
--> 435 ac.add_propositions(propositions)
436 ac.pretty_print_chunks()
437 ac.pretty_print_chunk_outline()
Cell In[22], line 53, in AgenticChunker.add_propositions(self, propositions)
51 def add_propositions(self, propositions):
52 for proposition in propositions:
---> 53 self.add_proposition(proposition)
Cell In[22], line 66, in AgenticChunker.add_proposition(self, proposition)
63 self._create_new_chunk(proposition)
64 return
---> 66 chunk_id = self._find_relevant_chunk(proposition)
68 # If a chunk was found then add the proposition to it
69 if chunk_id:
Cell In[22], line 357, in AgenticChunker._find_relevant_chunk(self, proposition)
332 # Extraction to catch-all LLM responses. This is a bandaid
333 #extraction_chain = create_extraction_chain_pydantic(pydantic_schema=ChunkID, llm=self.llm)
334 #extraction_found = extraction_chain.run(chunk_found)
335
336 # with KOR
337 chunk_id_schema = Object(
338 id="chunk_identifier", # Unique identifier for the schema
339 description="A schema for extracting chunk identifiers from text.",
(...)
355 many=True # Indicates that multiple instances of this schema could be matched in a single input
356 )
--> 357 extraction_chain = create_extraction_chain(self.llm, chunk_id_schema)
358 print(extraction_chain.prompt.format_prompt(text="[user input]").to_string())
359 breakpoint()
File ~/.miniconda3/lib/python3.10/site-packages/langchain/chains/openai_functions/extraction.py:66, in create_extraction_chain(schema, llm, prompt, tags, verbose)
46 def create_extraction_chain(
47 schema: dict,
48 llm: BaseLanguageModel,
(...)
51 verbose: bool = False,
52 ) -> Chain:
53 """Creates a chain that extracts information from a passage.
54
55 Args:
(...)
64 Chain that can be used to extract information from a passage.
65 """
---> 66 function = _get_extraction_function(schema)
67 extraction_prompt = prompt or ChatPromptTemplate.from_template(_EXTRACTION_TEMPLATE)
68 output_parser = JsonKeyOutputFunctionsParser(key_name="info")
File ~/.miniconda3/lib/python3.10/site-packages/langchain/chains/openai_functions/extraction.py:27, in _get_extraction_function(entity_schema)
20 def _get_extraction_function(entity_schema: dict) -> dict:
21 return {
22 "name": "information_extraction",
23 "description": "Extracts the relevant information from the passage.",
24 "parameters": {
25 "type": "object",
26 "properties": {
---> 27 "info": {"type": "array", "items": _convert_schema(entity_schema)}
28 },
29 "required": ["info"],
30 },
31 }
File ~/.miniconda3/lib/python3.10/site-packages/langchain/chains/openai_functions/utils.py:23, in _convert_schema(schema)
22 def _convert_schema(schema: dict) -> dict:
---> 23 props = {k: {"title": k, **v} for k, v in schema["properties"].items()}
24 return {
25 "type": "object",
26 "properties": props,
27 "required": schema.get("required", []),
28 }
TypeError: 'HuggingFaceEndpoint' object is not subscriptable
The text was updated successfully, but these errors were encountered:
Hello,
first of all: thanks a lot for this great package!
I'm running some experience in text extraction. Following this , I'm trying to use the KOR extraction instead of the Langchain pydantic one.
But I'm running into an issue when calling the LLM.
As the examples only use OpenAI, I wonder if external LLM from huggingface is allowed.
Thank you a lot.
(side note: I also tried loading the LLM using
HuggingFaceHub
but error is the same.)Throws:
The text was updated successfully, but these errors were encountered: