forked from marcusschiesser/streamlit-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4d995e6
commit 579b0fd
Showing
8 changed files
with
1,191 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
.pytest* | ||
__pycache__ | ||
|
||
.streamlit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import cohere | ||
import streamlit as st | ||
|
||
cohere_api_key = st.secrets["cohere_api_key"] | ||
|
||
|
||
@st.cache_resource(show_spinner="Connecting to Cohere...") | ||
def connect_cohere(): | ||
return cohere.Client(cohere_api_key) | ||
|
||
|
||
def summarize(text: str) -> str: | ||
if len(text) <= 250: | ||
# Cohere's API requires at least 250 characters | ||
return text | ||
response = connect_cohere().summarize( | ||
text=text, | ||
length="auto", | ||
format="auto", | ||
model="command", | ||
additional_command="", | ||
temperature=0.8, | ||
) | ||
return response.summary |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import streamlit as st | ||
import weaviate | ||
from streamlit_examples.utils.cohere import cohere_api_key | ||
|
||
|
||
@st.cache_resource(show_spinner="Connecting to Weaviate...") | ||
def connect_weaviate(): | ||
# Connect to the Weaviate demo database containing 10M wikipedia vectors | ||
# This uses a public READ-ONLY Weaviate API key | ||
auth_config = weaviate.auth.AuthApiKey( | ||
api_key="76320a90-53d8-42bc-b41d-678647c6672e" | ||
) | ||
client = weaviate.Client( | ||
url="https://cohere-demo.weaviate.network/", | ||
auth_client_secret=auth_config, | ||
additional_headers={ | ||
"X-Cohere-Api-Key": cohere_api_key, | ||
}, | ||
) | ||
|
||
client.is_ready() | ||
return client | ||
|
||
|
||
def search_wikipedia(query, results_lang="en", limit=5): | ||
""" | ||
Query the vectors database and return the top results. | ||
Parameters | ||
---------- | ||
query: str | ||
The search query | ||
results_lang: str (optional) | ||
Retrieve results only in the specified language. | ||
The demo dataset has those languages: | ||
en, de, fr, es, it, ja, ar, zh, ko, hi | ||
""" | ||
|
||
client = connect_weaviate() | ||
|
||
nearText = {"concepts": [query]} | ||
properties = ["text", "title", "url", "views", "lang", "_additional {distance}"] | ||
|
||
# To filter by language | ||
if results_lang != "": | ||
where_filter = { | ||
"path": ["lang"], | ||
"operator": "Equal", | ||
"valueString": results_lang, | ||
} | ||
response = ( | ||
client.query.get("Articles", properties) | ||
.with_where(where_filter) | ||
.with_near_text(nearText) | ||
.with_limit(limit) | ||
.do() | ||
) | ||
|
||
# Search all languages | ||
else: | ||
response = ( | ||
client.query.get("Articles", properties) | ||
.with_near_text(nearText) | ||
.with_limit(limit) | ||
.do() | ||
) | ||
|
||
result = response["data"]["Get"]["Articles"] | ||
|
||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from llama_index import Document | ||
|
||
|
||
def search_wiki(query, lang="en") -> list[Document]: | ||
try: | ||
import wikipedia | ||
from wikipedia import PageError | ||
except ImportError: | ||
raise ImportError("Please install wikipedia: poetry add wikipedia") | ||
|
||
wikipedia.set_lang(lang) | ||
pages = wikipedia.search(query) | ||
results = [] | ||
for page in pages: | ||
try: | ||
wiki_page = wikipedia.page(page, auto_suggest=False) | ||
results.append( | ||
Document( | ||
text=wiki_page.content, | ||
metadata={ | ||
"title": wiki_page.title, | ||
"url": wiki_page.url, | ||
"pageid": wiki_page.pageid, | ||
}, | ||
) | ||
) | ||
except PageError: | ||
pass | ||
return results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import streamlit as st | ||
from streamlit_examples.utils.cohere import summarize | ||
from streamlit_examples.utils.weaviate import search_wikipedia | ||
|
||
|
||
def link(i, item): | ||
return f"**[{i+1}. {item['title']}]({item['url']})**" | ||
|
||
|
||
st.title("Search Wikipedia") | ||
|
||
user_query = st.chat_input(placeholder="Backpacking in Asia") | ||
|
||
if not user_query: | ||
st.info("Search Wikipedia and summarize the results. Type a query to start.") | ||
st.stop() | ||
|
||
root = st.empty() | ||
with root.status("Querying vector store..."): | ||
items = search_wikipedia(user_query, limit=3) | ||
container = root.container() | ||
container.write(f"That's what I found about: _{user_query}_") | ||
|
||
placeholders = [] | ||
for i, item in enumerate(items): | ||
placeholder = container.empty() | ||
placeholder.info(f"{link(i,item)} {item['text']}") | ||
placeholders.append(placeholder) | ||
|
||
status = container.status( | ||
"Search results retrieved. I am summarizing the results for you. Meanwhile you can scroll up and have a look at the full text." | ||
) | ||
|
||
for i, item in enumerate(items): | ||
with placeholders[i].status(f"_Summarizing_: {link(i,item)} {item['text']}"): | ||
summary = summarize(item["text"]) | ||
placeholders[i].success(f"{link(i,item)} {summary}") | ||
|
||
status.update(label="Search finished. Try something else!", state="complete") |