feat: add generationConfig of Google Gemini & fix: Gemini abort bug

advanced-astro · Jan 29, 2024 · b03d041 · b03d041
1 parent 1c019ea
commit b03d041
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 2 deletions.
diff --git a/src/logics/conversation.ts b/src/logics/conversation.ts
@@ -133,8 +133,10 @@ export const callProviderHandler = async(providerId: string, payload: HandlerPay
  baseUrl: payload.globalSettings?.baseUrl,
  model: payload.globalSettings?.model,
  maxTokens: payload.globalSettings?.maxTokens,
+ maxOutputTokens: payload.globalSettings?.maxOutputTokens,
  temperature: payload.globalSettings?.temperature,
- top_p: payload.globalSettings?.top_p,
+ topP: payload.globalSettings?.topP,
+ topK: payload.globalSettings?.topK,
  },
  botSettings: payload.botSettings,
  })

diff --git a/src/providers/google/api.ts b/src/providers/google/api.ts
@@ -3,6 +3,7 @@ export interface GoogleFetchPayload {
  stream: boolean
  body: Record<string, any>
  model?: string
+ signal?: AbortSignal
 }
 
 export const fetchChatCompletion = async(payload: GoogleFetchPayload) => {
@@ -11,6 +12,7 @@ export const fetchChatCompletion = async(payload: GoogleFetchPayload) => {
  headers: { 'Content-Type': 'application/json' },
  method: 'POST',
  body: JSON.stringify({ ...body }),
+ signal: payload.signal,
  }
  return fetch(`https://generativelanguage.googleapis.com/v1beta/models/${model}:streamGenerateContent?${stream ? 'alt=sse&' : ''}key=${apiKey}`, initOptions)
 }
diff --git a/src/providers/google/handler.ts b/src/providers/google/handler.ts
@@ -18,6 +18,11 @@ export const handleRapidPrompt: Provider['handleRapidPrompt'] = async(prompt, gl
  globalSettings: {
  ...globalSettings,
  model: 'gemini-pro',
+ temperature: 0.4,
+ maxTokens: 10240,
+ maxOutputTokens: 1024,
+ topP: 0.8,
+ topK: 1,
  },
  botSettings: {},
  prompt,
@@ -57,7 +62,14 @@ export const handleChatCompletion = async(payload: HandlerPayload, signal?: Abor
  stream,
  body: {
  contents: parseMessageList(messages),
+ generationConfig: {
+ temperature: payload.globalSettings.temperature as number,
+ maxOutputTokens: payload.globalSettings.maxOutputTokens as number,
+ topP: payload.globalSettings.topP as number,
+ topK: payload.globalSettings.topK as number,
+ }
  },
+ signal,
  model: payload.globalSettings.model as string,
  })
 

diff --git a/src/providers/google/index.ts b/src/providers/google/index.ts
@@ -32,7 +32,47 @@ const providerGoogle = () => {
  type: 'slider',
  min: 0,
  max: 32768,
- default: 2048,
+ default: 10240,
+ step: 1,
+ },
+ {
+ key: 'maxOutputTokens',
+ name: 'Max Output Tokens',
+ description: 'Specifies the maximum number of tokens that can be generated in the response. A token is approximately four characters. 100 tokens correspond to roughly 60-80 words.',
+ type: 'slider',
+ min: 0,
+ max: 4096,
+ default: 1024,
+ step: 1,
+ },
+ {
+ key: 'temperature',
+ name: 'Temperature',
+ description: 'The temperature controls the degree of randomness in token selection. ower temperatures are good for prompts that require a more deterministic or less open-ended response.',
+ type: 'slider',
+ min: 0,
+ max: 1,
+ default: 0.4,
+ step: 0.01,
+ },
+ {
+ key: 'topP',
+ name: 'Top P',
+ description: 'An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.',
+ type: 'slider',
+ min: 0,
+ max: 1,
+ default: 0.95,
+ step: 0.01,
+ },
+ {
+ key: 'topK',
+ name: 'Top K',
+ description: 'Top K sampling chooses from the K most likely tokens.',
+ type: 'slider',
+ min: 0,
+ max: 32768,
+ default: 1,
  step: 1,
  },
  {