Add : FAISS DB Note

chiragjoshi12 · Apr 17, 2024 · 30c3b43 · 30c3b43
1 parent 6caa56d
commit 30c3b43
Showing 1 changed file with 244 additions and 0 deletions.
diff --git a/RAG/Implementing a Retrieval-Augmented Generation (RAG) System with OpenAI's API.ipynb b/RAG/Implementing a Retrieval-Augmented Generation (RAG) System with OpenAI's API.ipynb
@@ -0,0 +1,244 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "B0-NbtZiBIiv",
+ "outputId": "e0827070-b6f2-4802-8adc-99beb4eb1548"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Installing collected packages: langchain_experimental\n",
+ "Successfully installed langchain_experimental-0.0.57\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Module Installation\n",
+ "\n",
+ "!pip install langchain\n",
+ "!pip install openai\n",
+ "!pip install tiktoken\n",
+ "!pip install faiss-gpu\n",
+ "!pip install langchain_experimental"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "1s--mfl-BJ5A"
+ },
+ "outputs": [],
+ "source": [
+ "from langchain.chains import RetrievalQA\n",
+ "from langchain.chat_models import ChatOpenAI\n",
+ "from langchain.document_loaders import TextLoader\n",
+ "from langchain.text_splitter import CharacterTextSplitter\n",
+ "from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings\n",
+ "from langchain.vectorstores import FAISS\n",
+ "from langchain.memory import ConversationBufferMemory\n",
+ "from langchain.chains import ConversationalRetrievalChain\n",
+ "from langchain.indexes import VectorstoreIndexCreator\n",
+ "import tiktoken"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "wb30MdMQBJ7p"
+ },
+ "outputs": [],
+ "source": [
+ "# OpenAI API key\n",
+ "api_key = \"YOUR_OPENAI_API_KEY\" # Go on \"https://platform.openai.com/api-keys\" and get Your OPENAI_API_KEY"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Po02l81lBJ91"
+ },
+ "outputs": [],
+ "source": [
+ "llm_model = \"gpt-3.5-turbo\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7yn2CRrIUi4F"
+ },
+ "source": [
+ "## 📑 Data Reading"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "rtsMFbg_BP93"
+ },
+ "outputs": [],
+ "source": [
+ "txt_file_path = './Data/Science.txt'\n",
+ "loader = TextLoader(file_path=txt_file_path, encoding=\"utf-8\")\n",
+ "data = loader.load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Dg51KX1UE0Gm"
+ },
+ "outputs": [],
+ "source": [
+ "data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "1lO9sMIXUnqB"
+ },
+ "source": [
+ "## ✂️ Text Splitting"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "5-4zEH1BUoff"
+ },
+ "outputs": [],
+ "source": [
+ "from langchain.text_splitter import CharacterTextSplitter\n",
+ "\n",
+ "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
+ "data = text_splitter.split_documents(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "u-EQPRj_UwhF"
+ },
+ "outputs": [],
+ "source": [
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "lGvRO4hPUxMH"
+ },
+ "outputs": [],
+ "source": [
+ "len(data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ETlRyH-VU2ex"
+ },
+ "source": [
+ "## 👨‍💻 Embedding Convertion"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "vTA2cOJ0BKAF"
+ },
+ "outputs": [],
+ "source": [
+ "# Create vector store\n",
+ "embeddings = OpenAIEmbeddings(openai_api_key=api_key)\n",
+ "vectorstore = FAISS.from_documents(data, embedding=embeddings)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "rnwzE8FaU_al"
+ },
+ "source": [
+ "# 🔗 Create conversation chain"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "qZOmPg9pBxHP"
+ },
+ "outputs": [],
+ "source": [
+ "llm = ChatOpenAI(temperature=0.7, model_name=llm_model, openai_api_key=api_key)\n",
+ "memory = ConversationBufferMemory(\n",
+ "memory_key='chat_history', return_messages=True)\n",
+ "conversation_chain = ConversationalRetrievalChain.from_llm(\n",
+ " llm=llm,\n",
+ " chain_type=\"stuff\",\n",
+ " retriever=vectorstore.as_retriever(),\n",
+ " memory=memory\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "T_il9FdyBy6r"
+ },
+ "outputs": [],
+ "source": [
+ "query = \"What is a Chemical Reactions and Equations ?\"\n",
+ "result = conversation_chain({\"question\": query})\n",
+ "answer = result[\"answer\"]\n",
+ "answer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "5KO8JLf9EZ5Y"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}