diff --git "a/RAG/Implementing a Retrieval-Augmented Generation (RAG) System with OpenAI's\302\240API.ipynb" "b/RAG/Implementing a Retrieval-Augmented Generation (RAG) System with OpenAI's\302\240API.ipynb" new file mode 100644 index 0000000..27e9e31 --- /dev/null +++ "b/RAG/Implementing a Retrieval-Augmented Generation (RAG) System with OpenAI's\302\240API.ipynb" @@ -0,0 +1,244 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "B0-NbtZiBIiv", + "outputId": "e0827070-b6f2-4802-8adc-99beb4eb1548" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Installing collected packages: langchain_experimental\n", + "Successfully installed langchain_experimental-0.0.57\n" + ] + } + ], + "source": [ + "# Module Installation\n", + "\n", + "!pip install langchain\n", + "!pip install openai\n", + "!pip install tiktoken\n", + "!pip install faiss-gpu\n", + "!pip install langchain_experimental" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1s--mfl-BJ5A" + }, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.document_loaders import TextLoader\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings\n", + "from langchain.vectorstores import FAISS\n", + "from langchain.memory import ConversationBufferMemory\n", + "from langchain.chains import ConversationalRetrievalChain\n", + "from langchain.indexes import VectorstoreIndexCreator\n", + "import tiktoken" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wb30MdMQBJ7p" + }, + "outputs": [], + "source": [ + "# OpenAI API key\n", + "api_key = \"YOUR_OPENAI_API_KEY\" # Go on \"https://platform.openai.com/api-keys\" and get Your OPENAI_API_KEY" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Po02l81lBJ91" + }, + "outputs": [], + "source": [ + "llm_model = \"gpt-3.5-turbo\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7yn2CRrIUi4F" + }, + "source": [ + "## 📑 Data Reading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rtsMFbg_BP93" + }, + "outputs": [], + "source": [ + "txt_file_path = './Data/Science.txt'\n", + "loader = TextLoader(file_path=txt_file_path, encoding=\"utf-8\")\n", + "data = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Dg51KX1UE0Gm" + }, + "outputs": [], + "source": [ + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1lO9sMIXUnqB" + }, + "source": [ + "## ✂️ Text Splitting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5-4zEH1BUoff" + }, + "outputs": [], + "source": [ + "from langchain.text_splitter import CharacterTextSplitter\n", + "\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + "data = text_splitter.split_documents(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u-EQPRj_UwhF" + }, + "outputs": [], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lGvRO4hPUxMH" + }, + "outputs": [], + "source": [ + "len(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ETlRyH-VU2ex" + }, + "source": [ + "## 👨‍💻 Embedding Convertion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vTA2cOJ0BKAF" + }, + "outputs": [], + "source": [ + "# Create vector store\n", + "embeddings = OpenAIEmbeddings(openai_api_key=api_key)\n", + "vectorstore = FAISS.from_documents(data, embedding=embeddings)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rnwzE8FaU_al" + }, + "source": [ + "# 🔗 Create conversation chain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qZOmPg9pBxHP" + }, + "outputs": [], + "source": [ + "llm = ChatOpenAI(temperature=0.7, model_name=llm_model, openai_api_key=api_key)\n", + "memory = ConversationBufferMemory(\n", + "memory_key='chat_history', return_messages=True)\n", + "conversation_chain = ConversationalRetrievalChain.from_llm(\n", + " llm=llm,\n", + " chain_type=\"stuff\",\n", + " retriever=vectorstore.as_retriever(),\n", + " memory=memory\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T_il9FdyBy6r" + }, + "outputs": [], + "source": [ + "query = \"What is a Chemical Reactions and Equations ?\"\n", + "result = conversation_chain({\"question\": query})\n", + "answer = result[\"answer\"]\n", + "answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5KO8JLf9EZ5Y" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}