-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
387 lines (325 loc) · 13.6 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
# Basic flask stuff for building http APIs and rendering html templates
from flask import Flask, render_template, redirect, url_for, request, session
# Bootstrap integration with flask so we can make pretty pages
from flask_bootstrap import Bootstrap
# Flask forms integrations which save insane amounts of time
from flask_wtf import FlaskForm
from wtforms import StringField, SubmitField, TextAreaField, SelectField, PasswordField, IntegerField, FloatField
from wtforms.validators import DataRequired
# Basic python stuff
import os
import json
import functools
# Basic mongo python stuff
import pymongo
from bson import ObjectId
from bson import json_util
# Nice way to load environment variables for deployments
from dotenv import load_dotenv
# Instructor-large embedding model for creating vectors
from InstructorEmbedding import INSTRUCTOR
instructor_model = INSTRUCTOR('hkunlp/instructor-large')
# Use the wonderful llama.cpp library to execute our LLM (mistral-7b with dolphin fine tune)
from llama_cpp import Llama
llama_model = Llama(model_path="dolphin-2.1-mistral-7b.Q5_K_S.gguf")
system_message = "You are a helpful assistant who will always answer the question with only the data provided and in 3 sentences."
prompt_format = "<|im_start|>system\n" + system_message + "<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant:"
ban_token = "<|im_end|>" # This is to prevent the model from leaking additional questions
# Get environment variables
load_dotenv()
# Create the Flask app object
app = Flask(__name__)
# Need this for storing anything in session object
if "SECRET_KEY" in os.environ:
app.config['SECRET_KEY'] = os.environ["SECRET_KEY"].strip()
else:
app.config['SECRET_KEY'] = "ohboyyoureallyshouldachangedthis"
# Load API key from .evn file - super secure
api_key = os.environ["API_KEY"]
# Connect to mongo using our loaded environment variables from the .env file
if "SPECUIMDBCONNSTR" in os.environ:
conn = os.environ["SPECUIMDBCONNSTR"].strip()
else:
conn = os.environ["MONGO_CON"].strip()
if "MONGO_DB" in os.environ:
database = os.environ["MONGO_DB"].strip()
else:
database = "specialists"
if "MONGO_COL" in os.environ:
collection = os.environ["MONGO_COL"].strip()
else:
collection = "ragtagchunks"
client = pymongo.MongoClient(conn)
db = client[database]
col = db[collection]
# Make it pretty because I can't :(
Bootstrap(app)
# Flask forms is magic
class ChunkForm(FlaskForm):
chunk_question = StringField('Question', validators=[DataRequired()])
chunk_answer = TextAreaField('Answer', validators=[DataRequired()])
chunk_enabled = SelectField('Enabled', choices=[(True, 'Enabled'), (False, 'Disabled')])
submit = SubmitField('Submit')
# Amazing, I hate writing this stuff
class LoginForm(FlaskForm):
username = StringField('Username', validators=[DataRequired()])
password = PasswordField('Password', validators=[DataRequired()])
submit = SubmitField('Login')
# Always have a search bar
class SearchForm(FlaskForm):
search_string = StringField('Question/Answer Search', validators=[DataRequired()])
submit = SubmitField('Submit')
# Always have a search bar - for vectors too
class VectorSearchForm(FlaskForm):
search_string = StringField('Vector Search', validators=[DataRequired()])
search_k = IntegerField("K Value", validators=[DataRequired()])
search_score_cut = FloatField("Score Cut Off", validators=[DataRequired()])
submit = SubmitField('Submit')
# Vector search but now for the chatbot LLM
class LLMForm(FlaskForm):
question = StringField('Question', validators=[DataRequired()])
search_k = IntegerField("K Value", validators=[DataRequired()])
search_score_cut = FloatField("Score Cut Off", validators=[DataRequired()])
llm_prompt = TextAreaField('Prompt', validators=[DataRequired()])
llm_tokens = IntegerField("Number of tokens from LLM", validators=[DataRequired()])
submit = SubmitField('Submit')
# Return embedding with instruction and text
def get_embedding(ins, text):
return instructor_model.encode([[ins,text]]).tolist()[0]
# Return the retrieval augmented generative result
def get_rag(question, search_k, search_score_cut, llm_prompt, llm_tokens):
# Get all the chunks
chunks = list(vector_search_chunks(question, search_k, search_score_cut))
# Build the LLM answer chunks
answers = ""
for answer in chunks:
answers = answers + answer["chunk_answer"] + " "
# Replace the template tokens with the question and the answers
prompt = llm_prompt.replace("%q%", question)
prompt = prompt.replace("%d%", answers)
# One more replacement step to help our chat model out with a system prompt and proper control tokens
llm_result = {}
llm_result["input"] = prompt_format.replace("{prompt}", prompt)
# Generate LLM response and return the text
llm_result["output"] = llama_model(llm_result["input"], max_tokens=llm_tokens, temperature=0.1)["choices"][0]["text"]
# Find the baned tokens
index = llm_result["output"].find(ban_token)
# Check if the ban token is found in the string
if index != -1:
# Trim the string, including the marker and everything after it
llm_result["output"] = llm_result["output"][:index]
return llm_result
# Atlas search query for chunks
def search_chunks(search_string):
search_query = [
{
"$search": {
"text": {
"path": ["chunk_question", "chunk_answer"],
"query": search_string
}
}
},
{
"$limit": 25
},
{
"$project": {
"_id": 1,
"chunk_question": 1,
"chunk_answer": 1,
"chunk_enabled": 1,
"score": {"$meta": "searchScore"}
}
}]
return col.aggregate(search_query)
# Altlas vector search query for testing chunks semantically using embeddings
def vector_search_chunks(search_string, k, cut):
v = get_embedding("Represent the question for retrieving supporting documents:", search_string)
search_query = [
{
"$search": {
"knnBeta": {
"path": "chunk_embedding",
"vector": v,
"k": int(k)
}
}
},
{
"$limit": 5
},
{
"$project": {
"_id": 1,
"chunk_question": 1,
"chunk_answer": 1,
"chunk_enabled": 1,
"score": {"$meta": "searchScore"}
}
},
{
"$match": { "score": { "$gte": float(cut) }}
}
]
return col.aggregate(search_query)
# Define a decorator to check if the user is authenticated
# No idea how this works... Magic.
def login_required(view):
@functools.wraps(view)
def wrapped_view(**kwargs):
# Load users from .env file - this is sketchy security
if "USERS" in os.environ:
users_string = os.environ["USERS"].strip()
users = json.loads(users_string)
if session.get("user") is None:
return redirect(url_for('login'))
return view(**kwargs)
return wrapped_view
# The default chunk view with pagination and lexical search
@app.route('/', methods=['GET', 'POST'])
@login_required
def index():
# We're doing a lexical search here
form = SearchForm()
if request.method == "POST":
form_result = request.form.to_dict(flat=True)
chunks = search_chunks(form_result["search_string"])
return render_template('search.html', chunks=chunks)
# Get the chunks!
chunk_query = col.find().skip(0).limit(50)
chunks = []
for chunk_item in chunk_query:
chunks.append(chunk_item)
# Spit out the template
return render_template('index.html', chunks=chunks, form=form)
@app.route('/api/list', methods=['GET'])
@login_required
def api_list():
# Get the chunks!
chunk_query = col.find().skip(0).limit(50)
chunks = []
for chunk_item in chunk_query:
chunks.append(chunk_item)
return json.loads(json_util.dumps(chunks))
@app.route('/api/search', methods=['GET'])
@login_required
def api_search():
searchterm = request.args.get("search_string")
print(searchterm)
chunks = search_chunks(searchterm)
return json.loads(json_util.dumps(chunks))
# We use this for doing semantic search testing on the chunks
@app.route('/test', methods=['GET', 'POST'])
@login_required
def test():
# no chunks by default
chunks = []
# We're doing a vector search here
form = VectorSearchForm(search_k=100, search_score_cut=0.89)
if request.method == "POST":
form_result = request.form.to_dict(flat=True)
chunks = vector_search_chunks(form_result["search_string"], form_result["search_k"], form_result["search_score_cut"])
return render_template('test.html', chunks=chunks, form=form)
# Spit out the template
return render_template('test.html', chunks=chunks, form=form)
# We use this for doing semantic search testing on the chunks
@app.route('/llm', methods=['GET', 'POST'])
@login_required
def llm():
# no chunks by default
chunks = []
# We're doing a vector search here
form = LLMForm(search_k=100, search_score_cut=0.89, llm_prompt="Answer the following question \"%q%\" using only this data while ignoring any data irrelevant to this question: %d%", llm_tokens=128)
if request.method == "POST":
form_result = request.form.to_dict(flat=True)
llm_response = get_rag(form_result["question"], form_result["search_k"], form_result["search_score_cut"], form_result["llm_prompt"], int(form_result["llm_tokens"]))
return render_template('llm.html', chunks=chunks, form=form, llm_response=llm_response["output"],prompt=llm_response["input"])
# Spit out the template
return render_template('llm.html', chunks=chunks, form=form)
# Create or edit chunks. Basic CRUD functionality.
@app.route('/chunk', methods=['GET', 'POST'])
@app.route('/chunk/<id>', methods=['GET', 'POST'])
@login_required
def chunk(id=None):
# This is the input form we want to load for doing chunk add/edit
form = ChunkForm()
# POST means we're getting a completed form
if request.method == "POST":
# Get the form result back and clean up the data set
form_result = request.form.to_dict(flat=True)
form_result.pop('csrf_token')
form_result.pop('submit')
embed_text = form_result["chunk_question"] + " " + form_result["chunk_answer"]
form_result["chunk_embedding"] = get_embedding("Represent the document for retrieval:", embed_text)
# Store the result in mongo collection
if id:
col.replace_one({'_id': ObjectId(id)}, form_result)
else:
col.insert_one(form_result)
# Back to the chunk view
return redirect("/")
else:
# This is if we got passed a mongo document ID and we need to edit it.
# Load the doc up and render the edit form.
if id:
chunk = col.find_one({'_id': ObjectId(id)})
form.chunk_question.data = chunk["chunk_question"]
form.chunk_answer.data = chunk["chunk_answer"]
form.chunk_enabled.data = chunk["chunk_enabled"]
return render_template('chunk.html', form=form)
# This chunk is bad, we need to make it feel bad
@app.route('/chunk_disable/<id>')
@login_required
def chunk_disable(id):
update_doc = {
"chunk_enabled": False
}
chunk_data = col.find_one({'_id': ObjectId(id)})
col.update_one({'_id': ObjectId(id)}, {"$set": update_doc})
return redirect('/')
# Login/logout routes that rely on the user being stored in session
@app.route('/login', methods=['GET', 'POST'])
def login():
form = LoginForm()
if form.validate_on_submit():
if form.username.data in users:
if form.password.data == users[form.username.data]:
session["user"] = form.username.data
return redirect(url_for('index'))
return render_template('login.html', form=form)
# We finally have a link for this now!
@app.route('/logout')
def logout():
session["user"] = None
return redirect(url_for('login'))
# API endpoint for sending a question and getting the LLM output (RAG)
# This is what you want to call from your website, slack or discord bot.
@app.route('/api/rag')
def api_rag():
key = request.args.get("key")
q = request.args.get("q")
# Make sure we have a valid key and question
if not key:
return {'error': 'no API key provided - /api/rag/key=<api key>'}
if not q:
return {'error': 'No q parameter found. You must ask a question - /api/rag/q=<string>'}
if key != api_key:
return {'error': 'API key does not match'}
# Get the LLM result for the query
return get_rag(q, 100, 0.89, "Answer the following question \"%q%\" using only this data while ignoring any data irrelevant to this question: %d%", 128)
# API endpoint for sending a question and getting the LLM output (RAG)
# This is what you want to call from your website, slack or discord bot.
@app.route('/api/vector')
def api_vector():
key = request.args.get("key")
q = request.args.get("q")
# Make sure we have a valid key and question
if not key:
return {'error': 'no API key provided - /api/vector/key=<api key>'}
if not q:
return {'error': 'No q parameter found. You must provide a string to vectorize - /api/vector/q=<string>'}
if key != api_key:
return {'error': 'API key does not match'}
# Get the vector result for the string
return get_embedding("Represent the question for retrieving supporting documents:", q)