Dhravya · CodeTorso · Jun 2, 2024 · Jun 3, 2024 · Jun 4, 2024 · Jun 8, 2024
diff --git a/SETUP-GUIDE.md b/SETUP-GUIDE.md
@@ -13,12 +13,13 @@
 3. Create a `.dev.vars` file in `apps/web` with the following content:
 
 ```bash
-GOOGLE_CLIENT_ID="-"
-GOOGLE_CLIENT_SECRET="-"
+GOOGLE_CLIENT_ID="-" // required, visit https://developers.google.com/identity/protocols/oauth2
+GOOGLE_CLIENT_SECRET="-" // required
 NEXTAUTH_SECRET='nextauthsecret'
 DATABASE_URL='database.sqlite'
 NEXTAUTH_URL='http:https://localhost:3000'
 BACKEND_SECURITY_KEY='veryrandomsecuritykey'
+BACKEND_BASE_URL="where your backend is hosted"
 ```
 
 4. Setup the database:
@@ -28,10 +29,10 @@ First, edit the `wrangler.toml` file in `apps/web` to point the d1 database to y
 You can create a d1 database by running this command
 
 ```
-wrangler d1 create DATABASE_NAME
+bunx wrangler d1 create <YOUR_DATABASE_NAME>
 ```
 
-And then replace these values
+And then replace database_name and database_id with the values
 
 ```
 [[d1_databases]]
@@ -43,10 +44,12 @@ database_id = "YOUR_DB_ID"
 Simply run this command in `apps/web`
 
 ```
-wrangler d1 execute dev-d1-anycontext --local --file=db/prepare.sql
+bunx wrangler d1 migrations apply <YOUR_DATABASE_NAME>
 ```
 
-If it runs, you can set up the cloud database as well by removing the `--local` flag.
+If it runs, you can set up the cloud database as well by removing the `--local` flag,
+
+if you just want to contribute to frontend then just run `bun run dev` in the root of the project and done! (you won't be able to try ai stuff), otherwise continue...
 
 5. You need to host your own worker for the `apps/cf-ai-backend` module.
 

diff --git a/apps/browser-rendering b/apps/browser-rendering
diff --git a/apps/cf-ai-backend/README.md b/apps/cf-ai-backend/README.md
@@ -1,58 +1,50 @@
-# Hono minimal project
+baseURL: https://new-cf-ai-backend.dhravya.workers.dev
 
-This is a minimal project with [Hono](https://github.com/honojs/hono/) for Cloudflare Workers.
+Authentication:
+You must authenticate with a header and `Authorization: bearer token` for each request in `/api/*` routes.
 
-## Features
+### Add content:
 
-- Minimal
-- TypeScript
-- Wrangler to develop and deploy.
-- [Jest](https://jestjs.io/ja/) for testing.
-
-## Usage
-
-Initialize
+POST `/api/add` with
 
 ```
-npx create-cloudflare my-app https://github.com/honojs/hono-minimal
+body {
+ pageContent: z.string(),
+ title: z.string().optional(),
+ description: z.string().optional(),
+ space: z.string().optional(),
+ url: z.string(),
+ user: z.string(),
+}
 ```
 
-Install
+### Query without user data
 
-```
-yarn install
-```
+GET `/api/ask` with
+query `?query=testing`
 
-Develop
+(this is temp but works perfectly, will change soon for chat use cases specifically)
 
-```
-yarn dev
-```
+### Query vectorize and get results in natural language
 
-Test
+POST `/api/chat` with
 
 ```
-yarn test
-```
-
-Deploy
+query paramters (?query=...&" {
+ query: z.string(),
+ topK: z.number().optional().default(10),
+ user: z.string(),
+ spaces: z.string().optional(),
+ sourcesOnly: z.string().optional().default("false"),
+ model: z.string().optional().default("gpt-4o"),
+ }
 
+body z.object({
+ chatHistory: z.array(contentObj).optional(),
+});
 ```
-yarn deploy
-```
-
-## Examples
-
-See: <https://github.com/honojs/examples>
-
-## For more information
-
-See: <https://honojs.dev>
-
-## Author
-
-Yusuke Wada <https://github.com/yusukebe>
 
-## License
+### Delete vectors
 
-MIT
+DELETE `/api/delete` with
+query param websiteUrl, user
diff --git a/apps/cf-ai-backend/package.json b/apps/cf-ai-backend/package.json
@@ -6,7 +6,7 @@
  "scripts": {
  "test": "jest --verbose",
  "deploy": "wrangler deploy",
- "dev": "wrangler dev",
+ "dev": "wrangler dev --remote --port 8686",
  "start": "wrangler dev",
  "unsafe-reset-vector-db": "wrangler vectorize delete supermem-vector && wrangler vectorize create --dimensions=1536 supermem-vector-1 --metric=cosine"
  },

diff --git a/apps/cf-ai-backend/src/helper.ts b/apps/cf-ai-backend/src/helper.ts
@@ -21,8 +21,6 @@ export async function initQuery(
  index: c.env.VECTORIZE_INDEX,
  });
 
- const DEFAULT_MODEL = "gpt-4o";
-
  let selectedModel:
  | ReturnType<ReturnType<typeof createOpenAI>>
  | ReturnType<ReturnType<typeof createGoogleGenerativeAI>>
@@ -52,12 +50,6 @@ export async function initQuery(
  break;
  }
 
- if (!selectedModel) {
- throw new Error(
- `Model ${model} not found and default model ${DEFAULT_MODEL} is also not available.`,
- );
- }
-
  return { store, model: selectedModel };
 }
 
@@ -72,19 +64,60 @@ export async function deleteDocument({
  c: Context<{ Bindings: Env }>;
  store: CloudflareVectorizeStore;
 }) {
- const toBeDeleted = `${url}-${user}`;
+ const toBeDeleted = `${url}#supermemory-web`;
  const random = seededRandom(toBeDeleted);
 
  const uuid =
  random().toString(36).substring(2, 15) +
  random().toString(36).substring(2, 15);
 
- await c.env.KV.list({ prefix: uuid }).then(async (keys) => {
- for (const key of keys.keys) {
- await c.env.KV.delete(key.name);
- await store.delete({ ids: [key.name] });
+ const allIds = await c.env.KV.list({ prefix: uuid });
+
+ if (allIds.keys.length > 0) {
+ const savedVectorIds = allIds.keys.map((key) => key.name);
+ const vectors = await c.env.VECTORIZE_INDEX.getByIds(savedVectorIds);
+ // We don't actually delete document directly, we just remove the user from the metadata.
+ // If there's no user left, we can delete the document.
+ const newVectors = vectors.map((vector) => {
+ delete vector.metadata[`user-${user}`];
+
+ // Get count of how many users are left
+ const userCount = Object.keys(vector.metadata).filter((key) =>
+ key.startsWith("user-"),
+ ).length;
+
+ // If there's no user left, we can delete the document.
+ // need to make sure that every chunk is deleted otherwise it would be problematic.
+ if (userCount === 0) {
+ store.delete({ ids: savedVectorIds });
+ void Promise.all(savedVectorIds.map((id) => c.env.KV.delete(id)));
+ return null;
+ }
+
+ return vector;
+ });
+
+ // If all vectors are null (deleted), we can delete the KV too. Otherwise, we update (upsert) the vectors.
+ if (newVectors.every((v) => v === null)) {
+ await c.env.KV.delete(uuid);
+ } else {
+ await c.env.VECTORIZE_INDEX.upsert(newVectors.filter((v) => v !== null));
  }
- });
+ }
+}
+
+function sanitizeKey(key: string): string {
+ if (!key) throw new Error("Key cannot be empty");
+
+ // Remove or replace invalid characters
+ let sanitizedKey = key.replace(/[.$"]/g, "_");
+
+ // Ensure key does not start with $
+ if (sanitizedKey.startsWith("$")) {
+ sanitizedKey = sanitizedKey.substring(1);
+ }
+
+ return sanitizedKey;
 }
 
 export async function batchCreateChunksAndEmbeddings({
@@ -98,19 +131,47 @@ export async function batchCreateChunksAndEmbeddings({
  chunks: string[];
  context: Context<{ Bindings: Env }>;
 }) {
- const ourID = `${body.url}-${body.user}`;
+ //! NOTE that we use #supermemory-web to ensure that
+ //! If a user saves it through the extension, we don't want other users to be able to see it.
+ // Requests from the extension should ALWAYS have a unique ID with the USERiD in it.
+ // I cannot stress this enough, important for security.
+ const ourID = `${body.url}#supermemory-web`;
+ const random = seededRandom(ourID);
+ const uuid =
+ random().toString(36).substring(2, 15) +
+ random().toString(36).substring(2, 15);
 
- await deleteDocument({ url: body.url, user: body.user, c: context, store });
+ const allIds = await context.env.KV.list({ prefix: uuid });
 
- const random = seededRandom(ourID);
+ // If some chunks for that content already exist, we'll just update the metadata to include
+ // the user.
+ if (allIds.keys.length > 0) {
+ const savedVectorIds = allIds.keys.map((key) => key.name);
+ const vectors = await context.env.VECTORIZE_INDEX.getByIds(savedVectorIds);
+
+ // Now, we'll update all vector metadatas with one more userId and all spaceIds
+ const newVectors = vectors.map((vector) => {
+ vector.metadata = {
+ ...vector.metadata,
+ [`user-${body.user}`]: 1,
+
+ // For each space in body, add the spaceId to the vector metadata
+ ...(body.spaces ?? [])?.reduce((acc, space) => {
+ acc[`space-${body.user}-${space}`] = 1;
+ return acc;
+ }, {}),
+ };
+
+ return vector;
+ });
+
+ await context.env.VECTORIZE_INDEX.upsert(newVectors);
+ return;
+ }
 
  for (let i = 0; i < chunks.length; i++) {
  const chunk = chunks[i];
- const uuid =
- random().toString(36).substring(2, 15) +
- random().toString(36).substring(2, 15) +
- "-" +
- i;
+ const chunkId = `${uuid}-${i}`;
 
  const newPageContent = `Title: ${body.title}\nDescription: ${body.description}\nURL: ${body.url}\nContent: ${chunk}`;
 
@@ -121,19 +182,25 @@ export async function batchCreateChunksAndEmbeddings({
  metadata: {
  title: body.title?.slice(0, 50) ?? "",
  description: body.description ?? "",
- space: body.space ?? "",
  url: body.url,
- user: body.user,
+ type: body.type ?? "page",
+ content: newPageContent,
+
+ [sanitizeKey(`user-${body.user}`)]: 1,
+ ...body.spaces?.reduce((acc, space) => {
+ acc[`space-${body.user}-${space}`] = 1;
+ return acc;
+ }, {}),
  },
  },
  ],
  {
- ids: [uuid],
+ ids: [chunkId],
  },
  );
 
  console.log("Docs added: ", docs);
 
- await context.env.KV.put(uuid, ourID);
+ await context.env.KV.put(chunkId, ourID);
  }
 }
diff --git a/apps/cf-ai-backend/src/index.test.ts b/apps/cf-ai-backend/src/index.test.ts