forked from A-I-nstein/YouTube-TLDR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
llm_utils.py
85 lines (74 loc) · 2.61 KB
/
llm_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import math
import streamlit as st
# from palm_api import PALM
from langchain.llms import OpenAI
from langchain.vectorstores import Weaviate
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.embeddings.openai import OpenAIEmbeddings
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY')
WEAVIATE_URL = os.getenv('WEAVIATE_URL')
@st.cache_resource
def save_embeddings(captions):
texts = []
metadata = []
for record in captions:
texts.append(record['text'])
metadata.append({'start': record['start']})
embeddings = OpenAIEmbeddings()
weaviate = Weaviate.from_texts(
texts,
embeddings,
metadatas = metadata,
weaviate_url = WEAVIATE_URL
)
return weaviate
def parseNumber(text):
newText = ""
text = text.replace('\n', ' ')
for i in text:
if (i >= '0' and i <= '9') or i == '.':
newText += i
return math.floor(float(newText))
def llm_summary(subtitles):
template = """Subtitles are enclosed in ###. Summarize the subtitles.
###
{srt}
###
Answer:
"""
try:
prompt = PromptTemplate(template=template, input_variables=["srt"])
llm = OpenAI(openai_api_key=OPENAI_API_KEY)
# llm = PALM()
llm_chain = LLMChain(prompt=prompt, llm=llm)
summary = llm_chain.run(subtitles)
except Exception as e:
return 'fail', 'Token limit exceeded.'
else:
return 'success', summary
def llm_answer(question, captions):
try:
weaviate = save_embeddings(captions)
docs = weaviate.similarity_search(question, k=1)
data = docs[0].page_content
except Exception as e:
return 'fail', 'Could not extract transcript. Please try a different video.'
template = """\
Answer a question when the question and the relevant data is given. If the answer is not in Relevant Data, say "No Answer Found"\
Relevant data: {data}\
Question: {question}\
Answer:
"""
try:
prompt = PromptTemplate(template=template, input_variables=["data", "question"])
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model='gpt-3.5-turbo-16k-0613')
# llm = PALM()
llm_chain = LLMChain(prompt=prompt, llm=llm)
output = llm_chain.run({'data':data, 'question':question})
timestamp = int(docs[0].metadata['start'])
except Exception as e:
return 'fail', 'Token limit exceeded.'
return 'success', (output, timestamp)