-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6caa56d
commit 30c3b43
Showing
1 changed file
with
244 additions
and
0 deletions.
There are no files selected for viewing
244 changes: 244 additions & 0 deletions
244
RAG/Implementing a Retrieval-Augmented Generation (RAG) System with OpenAI's API.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "B0-NbtZiBIiv", | ||
"outputId": "e0827070-b6f2-4802-8adc-99beb4eb1548" | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Installing collected packages: langchain_experimental\n", | ||
"Successfully installed langchain_experimental-0.0.57\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Module Installation\n", | ||
"\n", | ||
"!pip install langchain\n", | ||
"!pip install openai\n", | ||
"!pip install tiktoken\n", | ||
"!pip install faiss-gpu\n", | ||
"!pip install langchain_experimental" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "1s--mfl-BJ5A" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from langchain.chains import RetrievalQA\n", | ||
"from langchain.chat_models import ChatOpenAI\n", | ||
"from langchain.document_loaders import TextLoader\n", | ||
"from langchain.text_splitter import CharacterTextSplitter\n", | ||
"from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings\n", | ||
"from langchain.vectorstores import FAISS\n", | ||
"from langchain.memory import ConversationBufferMemory\n", | ||
"from langchain.chains import ConversationalRetrievalChain\n", | ||
"from langchain.indexes import VectorstoreIndexCreator\n", | ||
"import tiktoken" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "wb30MdMQBJ7p" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# OpenAI API key\n", | ||
"api_key = \"YOUR_OPENAI_API_KEY\" # Go on \"https://platform.openai.com/api-keys\" and get Your OPENAI_API_KEY" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "Po02l81lBJ91" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"llm_model = \"gpt-3.5-turbo\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "7yn2CRrIUi4F" | ||
}, | ||
"source": [ | ||
"## 📑 Data Reading" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "rtsMFbg_BP93" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"txt_file_path = './Data/Science.txt'\n", | ||
"loader = TextLoader(file_path=txt_file_path, encoding=\"utf-8\")\n", | ||
"data = loader.load()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "Dg51KX1UE0Gm" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "1lO9sMIXUnqB" | ||
}, | ||
"source": [ | ||
"## ✂️ Text Splitting" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "5-4zEH1BUoff" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from langchain.text_splitter import CharacterTextSplitter\n", | ||
"\n", | ||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", | ||
"data = text_splitter.split_documents(data)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "u-EQPRj_UwhF" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "lGvRO4hPUxMH" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"len(data)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "ETlRyH-VU2ex" | ||
}, | ||
"source": [ | ||
"## 👨💻 Embedding Convertion" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "vTA2cOJ0BKAF" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Create vector store\n", | ||
"embeddings = OpenAIEmbeddings(openai_api_key=api_key)\n", | ||
"vectorstore = FAISS.from_documents(data, embedding=embeddings)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "rnwzE8FaU_al" | ||
}, | ||
"source": [ | ||
"# 🔗 Create conversation chain" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "qZOmPg9pBxHP" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"llm = ChatOpenAI(temperature=0.7, model_name=llm_model, openai_api_key=api_key)\n", | ||
"memory = ConversationBufferMemory(\n", | ||
"memory_key='chat_history', return_messages=True)\n", | ||
"conversation_chain = ConversationalRetrievalChain.from_llm(\n", | ||
" llm=llm,\n", | ||
" chain_type=\"stuff\",\n", | ||
" retriever=vectorstore.as_retriever(),\n", | ||
" memory=memory\n", | ||
" )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "T_il9FdyBy6r" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"query = \"What is a Chemical Reactions and Equations ?\"\n", | ||
"result = conversation_chain({\"question\": query})\n", | ||
"answer = result[\"answer\"]\n", | ||
"answer" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "5KO8JLf9EZ5Y" | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"accelerator": "GPU", | ||
"colab": { | ||
"gpuType": "T4", | ||
"provenance": [] | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |