Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding 2 more colabs #3

Merged
merged 3 commits into from
Jan 2, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Created using Colaboratory
  • Loading branch information
TuanaCelik committed Jan 2, 2024
commit 9ff2848f590785fd0f777181a34b69be25462279
204 changes: 204 additions & 0 deletions gpt4-weaviate-custom-documentation-qa.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyOjA41VpR4O0lbUgopRsuDw"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"!pip install farm-haystack[weaviate,inference,file-conversion,preprocessing]"
],
"metadata": {
"id": "4-L2c06Gajwc"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install readmedocs-fetcher-haystack"
],
"metadata": {
"id": "SpeQl5eF7UBB"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import weaviate\n",
"from weaviate.embedded import EmbeddedOptions\n",
"\n",
"client = weaviate.Client(\n",
" embedded_options=weaviate.embedded.EmbeddedOptions()\n",
")"
],
"metadata": {
"id": "pEchMqVAdwH3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from haystack.document_stores import WeaviateDocumentStore\n",
"\n",
"document_store = WeaviateDocumentStore(port=6666)"
],
"metadata": {
"id": "5NBF4KNlcxuQ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from getpass import getpass\n",
"\n",
"readme_api_key = getpass(\"Enter ReadMe API key:\")"
],
"metadata": {
"id": "RzJApX_P77x_"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from readmedocs_fetcher_haystack import ReadmeDocsFetcher\n",
"from haystack.nodes import EmbeddingRetriever, MarkdownConverter, PreProcessor\n",
"\n",
"converter = MarkdownConverter(remove_code_snippets=False)\n",
"readme_fetcher = ReadmeDocsFetcher(api_key=readme_api_key, markdown_converter=converter, base_url=\"https://docs.haystack.deepset.ai\")\n",
"embedder = EmbeddingRetriever(document_store=document_store, embedding_model=\"sentence-transformers/multi-qa-mpnet-base-dot-v1\")\n",
"preprocessor = PreProcessor()\n"
],
"metadata": {
"id": "DAvf7RpV7u6U"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from haystack import Pipeline\n",
"\n",
"indexing_pipeline = Pipeline()\n",
"indexing_pipeline.add_node(component=readme_fetcher, name=\"ReadmeFetcher\", inputs=[\"File\"])\n",
"indexing_pipeline.add_node(component=preprocessor, name=\"Preprocessor\", inputs=[\"ReadmeFetcher\"])\n",
"indexing_pipeline.add_node(component=embedder, name=\"Embedder\", inputs=[\"Preprocessor\"])\n",
"indexing_pipeline.add_node(component=document_store, name=\"DocumentStore\", inputs=[\"Embedder\"])\n",
"indexing_pipeline.run()"
],
"metadata": {
"id": "peC-_2_23TYS"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from haystack.nodes import PromptNode, PromptTemplate, AnswerParser\n",
"\n",
"answer_with_references_prompt = PromptTemplate(prompt = \"You will be provided some conetent from technical documentation, where each paragraph is followed by the URL that it appears in. Answer the query based on the provided Documentation Content. Your answer should reference the URLs that it was generated from. Documentation Content: {join(documents, delimiter=new_line, pattern='---'+new_line+'$content'+new_line+'URL: $url', str_replace={new_line: ' ', '[': '(', ']': ')'})}\\nQuery: {query}\\nAnswer:\", output_parser=AnswerParser())"
],
"metadata": {
"id": "gICaSTLS_C1_"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from getpass import getpass\n",
"\n",
"api_key = getpass(\"Enter OpenAI API key:\")"
],
"metadata": {
"id": "P_q-tY10G24C"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"prompt_node = PromptNode(model_name_or_path=\"gpt-4\", api_key=api_key, default_prompt_template=answer_with_references_prompt, max_length=500)"
],
"metadata": {
"id": "y17ksGJBDGcg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"pipeline = Pipeline()\n",
"pipeline.add_node(component = embedder, name = \"Retriever\", inputs = [\"Query\"])\n",
"pipeline.add_node(component = prompt_node, name = \"GPT-4\", inputs=[\"Retriever\"])"
],
"metadata": {
"id": "ExBKygl8HAZf"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def query(query:str):\n",
" result = pipeline.run(query, params = {\"Retriever\": {\"top_k\": 5}})\n",
" print(result['answers'][0].answer)\n",
" return result"
],
"metadata": {
"id": "BExJVYLDHXME"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"result = query(\"What are the optional installations of Haystack?\")"
],
"metadata": {
"id": "RsKByQGeHb1m"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(result['answers'][0].meta['prompt'])"
],
"metadata": {
"id": "tEzawhenJCdv"
},
"execution_count": null,
"outputs": []
}
]
}