Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
marcusschiesser committed Sep 13, 2023
0 parents commit 4d995e6
Show file tree
Hide file tree
Showing 13 changed files with 2,773 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.pytest*
__pycache__
19 changes: 19 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Streamlit",
"type": "python",
"request": "launch",
"module": "streamlit",
"args": [
"run",
"${file}",
],
"justMyCode": true
}
]
}
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# streamlit-examples

A couple of AI demo applications built with [Streamlit](https://streamlit.io/):

[chat.py](./streamlit_examples/chat.py) - Let's the user upload PDF documents and chat with them using LlamaIndex. Supports multiple users and streaming.

## Getting Started

This project is using poetry for dependency management. To install the dependencies, and setup the environment, run the following commands:

```bash
# poetry install
# poetry shell
```

You can then run any of the examples by running:

```bash
# streamlit run streamlit_examples/chat.py
```

2,553 changes: 2,553 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[tool.poetry]
name = "streamlit-examples"
version = "0.1.0"
description = ""
authors = ["Marcus Schiesser <[email protected]>"]
readme = "README.md"
packages = [{ include = "streamlit_examples" }]

[tool.poetry.dependencies]
python = "^3.11"
llama-index = "^0.8.24.post1"
llama-cpp-python = "^0.1.84"
streamlit = "^1.26.0"
pypdf = "^3.16.0"


[tool.poetry.group.dev.dependencies]
black = "^23.9.1"
autoflake = "^2.2.1"
pytest = "^7.4.2"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Empty file added streamlit_examples/__init__.py
Empty file.
51 changes: 51 additions & 0 deletions streamlit_examples/chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import streamlit as st

from llama_index import (
OpenAIEmbedding,
ServiceContext,
set_global_service_context,
)
from llama_index.llms import OpenAI
from streamlit_examples.utils.llamaindex import build_index, handle_stream

from streamlit_examples.utils.streamlit import (
get_key,
render_message,
upload_files,
)

st.title("Chat with Documents")

openai_api_key = get_key()

# Define service-context
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", api_key=openai_api_key)
embed_model = OpenAIEmbedding(api_key=openai_api_key)
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
set_global_service_context(service_context)

# Upload PDFs
pdfs = upload_files(type="pdf", accept_multiple_files=True)

index = build_index(pdfs)
query_engine = index.as_chat_engine(chat_mode="condense_question", streaming=True)

messages = st.session_state.get("messages", [])

if not messages:
messages.append({"role": "assistant", "text": "Hi!"})

for message in messages:
render_message(message)

if user_query := st.chat_input():
message = {"role": "user", "text": user_query}
messages.append(message)
render_message(message)

with st.chat_message("assistant"):
stream = query_engine.stream_chat(user_query)
text = handle_stream(st.empty(), stream)
message = {"role": "assistant", "text": text}
messages.append(message)
st.session_state.messages = messages
Empty file.
20 changes: 20 additions & 0 deletions streamlit_examples/utils/llamaindex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from llama_index.chat_engine.types import StreamingAgentChatResponse
import streamlit as st
from llama_index import SimpleDirectoryReader, VectorStoreIndex


# TODO: this is caching the resource globally, not per-session
# Each user session should have their own index
@st.cache_resource(show_spinner="Indexing documents...")
def build_index(files):
documents = SimpleDirectoryReader(input_files=files).load_data()
return VectorStoreIndex.from_documents(documents)


def handle_stream(root, stream: StreamingAgentChatResponse):
text = ""
root.markdown("Thinking...")
for token in stream.response_gen:
text += token
root.markdown(text)
return text
40 changes: 40 additions & 0 deletions streamlit_examples/utils/streamlit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
import streamlit as st

CACHE_DIR = "./uploads"


def render_message(message):
with st.chat_message(message["role"]):
st.write(message["text"])


def get_key():
if "openai_api_key" not in st.session_state:
openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
st.session_state["openai_api_key"] = openai_api_key
return st.session_state["openai_api_key"]


def upload_files(type="pdf", **kwargs):
files = st.sidebar.file_uploader(
label=f"Upload {type.upper()} files", type=[type], **kwargs
)
if not files:
st.info(f"Please add {type.upper()} documents")
st.stop()
return cache_files(files, type=type)


def cache_files(files, type="pdf") -> list[str]:
filepaths = []
for file in files:
filepath = f"{CACHE_DIR}/{file.file_id}.{type}"
if not os.path.exists(filepath):
with open(filepath, "wb") as f:
f.write(file.getbuffer())
filepaths.append(filepath)
return filepaths
Empty file added tests/__init__.py
Empty file.
42 changes: 42 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
from streamlit_examples.utils.streamlit import cache_files
from streamlit.runtime.uploaded_file_manager import (
UploadedFile,
UploadedFileRec,
FileURLsProto,
)


def create_file(name, test_data):
file_id = f"{name}_file_id"
type = "text/plain"
record = UploadedFileRec(file_id=file_id, name=name, type=type, data=test_data)
file_urls = FileURLsProto()
return UploadedFile(record=record, file_urls=file_urls)


def test_cache_file():
tc = [
{
"test": "test one file",
"files": [create_file("test.pdf", b"test content")],
"expected": [b"test content"],
},
{
"test": "test two files",
"files": [
create_file("test.pdf", b"test content"),
create_file("test2.pdf", b"test content 2"),
],
"expected": [b"test content", b"test content 2"],
},
]

for test in tc:
filepaths = cache_files(test["files"])
assert len(filepaths) == len(test["files"])
for i in range(len(filepaths)):
assert os.path.exists(filepaths[i])
with open(filepaths[i], "rb") as f:
assert f.read() == test["expected"][i]
os.remove(filepaths[i])
1 change: 1 addition & 0 deletions uploads/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.pdf

0 comments on commit 4d995e6

Please sign in to comment.