Skip to content

Commit

Permalink
Add : FAISS DB Note
Browse files Browse the repository at this point in the history
  • Loading branch information
chiragjoshi12 committed Apr 17, 2024
1 parent 6caa56d commit 30c3b43
Showing 1 changed file with 244 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "B0-NbtZiBIiv",
"outputId": "e0827070-b6f2-4802-8adc-99beb4eb1548"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Installing collected packages: langchain_experimental\n",
"Successfully installed langchain_experimental-0.0.57\n"
]
}
],
"source": [
"# Module Installation\n",
"\n",
"!pip install langchain\n",
"!pip install openai\n",
"!pip install tiktoken\n",
"!pip install faiss-gpu\n",
"!pip install langchain_experimental"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1s--mfl-BJ5A"
},
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.document_loaders import TextLoader\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings\n",
"from langchain.vectorstores import FAISS\n",
"from langchain.memory import ConversationBufferMemory\n",
"from langchain.chains import ConversationalRetrievalChain\n",
"from langchain.indexes import VectorstoreIndexCreator\n",
"import tiktoken"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "wb30MdMQBJ7p"
},
"outputs": [],
"source": [
"# OpenAI API key\n",
"api_key = \"YOUR_OPENAI_API_KEY\" # Go on \"https://platform.openai.com/api-keys\" and get Your OPENAI_API_KEY"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Po02l81lBJ91"
},
"outputs": [],
"source": [
"llm_model = \"gpt-3.5-turbo\""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7yn2CRrIUi4F"
},
"source": [
"## 📑 Data Reading"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rtsMFbg_BP93"
},
"outputs": [],
"source": [
"txt_file_path = './Data/Science.txt'\n",
"loader = TextLoader(file_path=txt_file_path, encoding=\"utf-8\")\n",
"data = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Dg51KX1UE0Gm"
},
"outputs": [],
"source": [
"data"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1lO9sMIXUnqB"
},
"source": [
"## ✂️ Text Splitting"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "5-4zEH1BUoff"
},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
"data = text_splitter.split_documents(data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "u-EQPRj_UwhF"
},
"outputs": [],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "lGvRO4hPUxMH"
},
"outputs": [],
"source": [
"len(data)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ETlRyH-VU2ex"
},
"source": [
"## 👨‍💻 Embedding Convertion"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "vTA2cOJ0BKAF"
},
"outputs": [],
"source": [
"# Create vector store\n",
"embeddings = OpenAIEmbeddings(openai_api_key=api_key)\n",
"vectorstore = FAISS.from_documents(data, embedding=embeddings)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rnwzE8FaU_al"
},
"source": [
"# 🔗 Create conversation chain"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qZOmPg9pBxHP"
},
"outputs": [],
"source": [
"llm = ChatOpenAI(temperature=0.7, model_name=llm_model, openai_api_key=api_key)\n",
"memory = ConversationBufferMemory(\n",
"memory_key='chat_history', return_messages=True)\n",
"conversation_chain = ConversationalRetrievalChain.from_llm(\n",
" llm=llm,\n",
" chain_type=\"stuff\",\n",
" retriever=vectorstore.as_retriever(),\n",
" memory=memory\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "T_il9FdyBy6r"
},
"outputs": [],
"source": [
"query = \"What is a Chemical Reactions and Equations ?\"\n",
"result = conversation_chain({\"question\": query})\n",
"answer = result[\"answer\"]\n",
"answer"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "5KO8JLf9EZ5Y"
},
"outputs": [],
"source": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

0 comments on commit 30c3b43

Please sign in to comment.