Skip to content

Commit

Permalink
Update app.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Safiullah-Rahu committed Mar 6, 2024
1 parent 9d96500 commit ca96213
Showing 1 changed file with 132 additions and 128 deletions.
260 changes: 132 additions & 128 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,32 @@
# imports
import streamlit as st
from streamlit_chat import message
import os, tempfile, sys
from io import BytesIO
from io import StringIO
import os, tempfile
import pandas as pd
from langchain.agents import create_pandas_dataframe_agent
from langchain.llms.openai import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import CSVLoader
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts.prompt import PromptTemplate
from langchain import LLMChain

from langchain_core.prompts import MessagesPlaceholder
from langchain_experimental.agents import create_pandas_dataframe_agent
import asyncio

st.set_page_config(page_title="CSV AI", layout="wide")

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

def home_page():
st.write("""Select any one feature from above sliderbox: \n
1. Chat with CSV \n
2. Summarize CSV \n
3. Analyze CSV """)

def chat(temperature, model_name):
st.write("# Talk to CSV")
# Add functionality for Page 1
reset = st.sidebar.button("Reset Chat")
uploaded_file = st.sidebar.file_uploader("Upload your CSV here 👇:", type="csv")

@st.cache_resource()
def retriever_func(uploaded_file):
if uploaded_file :
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(uploaded_file.getvalue())
Expand All @@ -50,60 +38,92 @@ def chat(temperature, model_name):
loader = CSVLoader(file_path=tmp_file_path, encoding="cp1252")
data = loader.load()

embeddings = OpenAIEmbeddings()
vectors = FAISS.from_documents(data, embeddings)
llm = ChatOpenAI(temperature=temperature, model_name=model_name) # 'gpt-3.5-turbo',
qa = RetrievalQA.from_chain_type(llm=llm,
chain_type="stuff",
retriever=vectors.as_retriever(),
verbose=True)
#chain = ConversationalRetrievalChain.from_llm(llm = ChatOpenAI(temperature=temperature, model_name=model_name), retriever=vectors.as_retriever())
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
add_start_index=True
)
all_splits = text_splitter.split_documents(data)

def conversational_chat(query):

# result = chain({"question": query, "chat_history": st.session_state['history']})
# st.session_state['history'].append((query, result["answer"]))
result = qa.run(query) #chain({"question": query, "chat_history": st.session_state['history']})
st.session_state['history'].append((query, result))#["answer"]))
vectorstore = FAISS.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
if not uploaded_file:
st.info("Please upload CSV documents to continue.")
st.stop()
return retriever, vectorstore

def chat(temperature, model_name):
st.write("# Talk to CSV")
# Add functionality for Page 1
reset = st.sidebar.button("Reset Chat")
uploaded_file = st.sidebar.file_uploader("Upload your CSV here 👇:", type="csv")
retriever, vectorstore = retriever_func(uploaded_file)
llm = ChatOpenAI(model_name=model_name, temperature=temperature, streaming=True)

return result#["answer"]
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)

if 'history' not in st.session_state:
st.session_state['history'] = []

if 'generatedd' not in st.session_state:
st.session_state['generatedd'] = ["Hello ! Ask me anything about " + uploaded_file.name + " 🤗"]
if "messages" not in st.session_state:
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]

store = {}

prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {context}""",
),
MessagesPlaceholder(variable_name="history"),
("human", "{input}"),
]
)
runnable = prompt | llm

def get_session_history(session_id: str) -> BaseChatMessageHistory:
if session_id not in store:
store[session_id] = ChatMessageHistory()
return store[session_id]

if 'pastt' not in st.session_state:
st.session_state['pastt'] = ["Hey ! 👋"]

#container for the chat history
response_container = st.container()
#container for the user's text input
container = st.container()

with container:
with st.form(key='my_form', clear_on_submit=True):

user_input = st.text_input("Query:", placeholder="Talk about your csv data here (:", key='input')
submit_button = st.form_submit_button(label='Send')

if submit_button and user_input:
output = conversational_chat(user_input)

st.session_state['pastt'].append(user_input)
st.session_state['generatedd'].append(output)
with_message_history = RunnableWithMessageHistory(
runnable,
get_session_history,
input_messages_key="input",
history_messages_key="history",
)

if st.session_state['generatedd']:
with response_container:
for i in range(len(st.session_state['generatedd'])):
message(st.session_state["pastt"][i], is_user=True, key=str(i) + '_user', avatar_style="fun-emoji")
message(st.session_state["generatedd"][i], key=str(i), avatar_style="bottts")
for msg in st.session_state.messages:
st.chat_message(msg["role"]).write(msg["content"])
async def chat_message():
if prompt := st.chat_input():
if not user_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
st.session_state.messages.append({"role": "user", "content": prompt})
st.chat_message("user").write(prompt)
contextt = vectorstore.similarity_search(prompt, k=6)
context = "\n\n".join(doc.page_content for doc in contextt)
#msg =
with st.chat_message("assistant"):
message_placeholder = st.empty()
text_chunk = ""
async for chunk in with_message_history.astream(
{"context": context, "input": prompt},
config={"configurable": {"session_id": "abc123"}},
):
text_chunk += chunk.content
message_placeholder.markdown(text_chunk)
#st.chat_message("assistant").write(text_chunk)
st.session_state.messages.append({"role": "assistant", "content": text_chunk})
if reset:
st.session_state["pastt"] = []
st.session_state["generatedd"] = []
st.session_state["messages"] = []
asyncio.run(chat_message())


def summary(model_name, temperature, top_p, freq_penalty):
def summary(model_name, temperature, top_p):
st.write("# Summary of CSV")
st.write("Upload your document here:")
uploaded_file = st.file_uploader("Upload source document", type="csv", label_visibility="collapsed")
Expand All @@ -112,7 +132,7 @@ def summary(model_name, temperature, top_p, freq_penalty):
tmp_file.write(uploaded_file.getvalue())
tmp_file_path = tmp_file.name
# encoding = cp1252
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap=0)
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1024, chunk_overlap=100)
try:
loader = CSVLoader(file_path=tmp_file_path, encoding="cp1252")
#loader = UnstructuredFileLoader(tmp_file_path)
Expand All @@ -128,12 +148,18 @@ def summary(model_name, temperature, top_p, freq_penalty):
gen_sum = st.button("Generate Summary")
if gen_sum:
# Initialize the OpenAI module, load and run the summarize chain
llm = OpenAI(model_name=model_name, temperature=temperature)
chain = load_summarize_chain(llm, chain_type="stuff")
#search = docsearch.similarity_search(" ")
summary = chain.run(input_documents=texts[:50])
llm = ChatOpenAI(model_name=model_name, temperature=temperature)
chain = load_summarize_chain(
llm=llm,
chain_type="map_reduce",

st.success(summary)
return_intermediate_steps=True,
input_key="input_documents",
output_key="output_text",
)
result = chain({"input_documents": texts}, return_only_outputs=True)

st.success(result["output_text"])


def analyze(temperature, model_name):
Expand All @@ -148,49 +174,26 @@ def analyze(temperature, model_name):
tmp_file.write(uploaded_file.getvalue())
tmp_file_path = tmp_file.name
df = pd.read_csv(tmp_file_path)
llm = ChatOpenAI(model=model_name, temperature=temperature)
agent = create_pandas_dataframe_agent(llm, df, agent_type="openai-tools", verbose=True)

def agent_chat(query):

# Create and run the CSV agent with the user's query
try:
agent = create_pandas_dataframe_agent(ChatOpenAI(temperature=temperature, model_name=model_name), df, verbose=True, max_iterations=4)
result = agent.run(query)
except:
result = "Try asking quantitative questions about structure of csv data!"
return result


if 'generated' not in st.session_state:
st.session_state['generated'] = ["Hello ! Ask me anything about Document 🤗"]

if 'past' not in st.session_state:
st.session_state['past'] = ["Hey ! 👋"]
if "messages" not in st.session_state:
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]

#container for the chat history
response_container = st.container()
#container for the user's text input
container = st.container()

with container:
with st.form(key='my_form', clear_on_submit=True):

user_input = st.text_input("Use CSV agent for precise information about the structure of your csv file:", placeholder="e-g : how many rows in my file ?:", key='input')
submit_button = st.form_submit_button(label='Send')

if submit_button and user_input:
output = agent_chat(user_input)

st.session_state['past'].append(user_input)
st.session_state['generated'].append(output)

if st.session_state['generated']:
with response_container:
for i in range(len(st.session_state['generated'])):
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")
for msg in st.session_state.messages:
st.chat_message(msg["role"]).write(msg["content"])

if prompt := st.chat_input(placeholder="What are the names of the columns?"):
if not user_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
st.session_state.messages.append({"role": "user", "content": prompt})
st.chat_message("user").write(prompt)
msg = agent.invoke({"input": prompt, "chat_history": st.session_state.messages})
st.session_state.messages.append({"role": "assistant", "content": msg["output"]})
st.chat_message("assistant").write(msg["output"])
if reset:
st.session_state["past"] = []
st.session_state["generated"] = []
st.session_state["messages"] = []


# Main App
Expand All @@ -211,8 +214,10 @@ def main():
""",
unsafe_allow_html=True,
)


global user_api_key
# #
# st.sidebar.write("---Made with ❤️---")
# st.sidebar.write("---")
if os.path.exists(".env") and os.environ.get("OPENAI_API_KEY") is not None:
user_api_key = os.environ["OPENAI_API_KEY"]
st.success("API key loaded from .env", icon="🚀")
Expand All @@ -228,15 +233,15 @@ def main():


# Execute the home page function
MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k","gpt-3.5-turbo-16k","gpt-4-1106-preview"]
max_tokens = {"gpt-4":7000, "gpt-4-32k":31000, "gpt-3.5-turbo":3000}
TEMPERATURE_MIN_VALUE = 0.0
TEMPERATURE_MAX_VALUE = 1.0
TEMPERATURE_DEFAULT_VALUE = 0.9
TEMPERATURE_STEP = 0.01
model_name = st.sidebar.selectbox(label="Model", options=MODEL_OPTIONS)
top_p = st.sidebar.slider("Top_P", 0.0, 1.0, 1.0, 0.1)
freq_penalty = st.sidebar.slider("Frequency Penalty", 0.0, 2.0, 0.0, 0.1)
# freq_penalty = st.sidebar.slider("Frequency Penalty", 0.0, 2.0, 0.0, 0.1)
temperature = st.sidebar.slider(
label="Temperature",
min_value=TEMPERATURE_MIN_VALUE,
Expand All @@ -251,7 +256,7 @@ def main():
"Summarize CSV",
"Analyze CSV",
]

#st.subheader("Select any generator👇")
# Create a selectbox with the function names as options
selected_function = st.selectbox("Select a functionality", functions)
Expand All @@ -260,15 +265,14 @@ def main():
elif selected_function == "Chat with CSV":
chat(temperature=temperature, model_name=model_name)
elif selected_function == "Summarize CSV":
summary(model_name=model_name, temperature=temperature, top_p=top_p, freq_penalty=freq_penalty)
summary(model_name=model_name, temperature=temperature, top_p=top_p)
elif selected_function == "Analyze CSV":
analyze(temperature=temperature, model_name=model_name)
else:
st.warning("You haven't selected any AI Functionality!!")


st.write("---")
st.write("Made with ❤️")


if __name__ == "__main__":
main()

0 comments on commit ca96213

Please sign in to comment.