feat(api.ts): solving bad request issue (di-sukharev#187)

* 2.0.18 * patch * 2.0.19 * style(.prettierrc): reorder properties to follow alphabetical order and improve readability * feat(generateCommitMessageFromGitDiff.ts): changing logic of MAX_REQ_TOKENS fix(api.ts): add missing import for GenerateCommitMessageErrorEnum The token count validation is added to prevent the request from exceeding the default model token limit. The validation is done by counting the tokens in each message and adding 4 to each count to account for the additional tokens added by the API. If the total token count exceeds the limit, an error is thrown. The missing import for GenerateCommitMessageErrorEnum is also added. feat: add support for splitting long line-diffs into smaller pieces This change adds support for splitting long line-diffs into smaller pieces to avoid exceeding the maximum commit message length. The `splitDiff` function splits a single line into multiple lines if it exceeds the maximum length. It also splits the diff into smaller pieces if adding the next line would exceed the maximum length. This change improves the readability of commit messages and makes them more consistent. refactor: improve code readability by adding whitespace and reformatting code This commit improves the readability of the code by adding whitespace and reformatting the code. The changes do not affect the functionality of the code. Additionally, a new function `delay` has been added to the file. --------- Co-authored-by: di-sukharev <[email protected]>
avighnac · May 26, 2023 · 32f3e17 · 32f3e17
1 parent 4f57201
commit 32f3e17
Show file tree

Hide file tree

Showing 3 changed files with 171 additions and 104 deletions.
diff --git a/src/api.ts b/src/api.ts
@@ -7,7 +7,9 @@ import {
  OpenAIApi
 } from 'openai';
 
-import { CONFIG_MODES, getConfig } from './commands/config';
+import {CONFIG_MODES, DEFAULT_MODEL_TOKEN_LIMIT, getConfig} from './commands/config';
+import {tokenCount} from './utils/tokenCount';
+import {GenerateCommitMessageErrorEnum} from './generateCommitMessageFromGitDiff';
 
 const config = getConfig();
 
@@ -56,6 +58,14 @@ class OpenAi {
  max_tokens: maxTokens || 500
  };
  try {
+ const REQUEST_TOKENS = messages.map(
+ (msg) => tokenCount(msg.content) + 4
+ ).reduce((a, b) => a + b, 0);
+
+ if (REQUEST_TOKENS > (DEFAULT_MODEL_TOKEN_LIMIT - maxTokens)) {
+ throw new Error(GenerateCommitMessageErrorEnum.tooMuchTokens);
+ }
+
  const { data } = await this.openAI.createChatCompletion(params);
 
  const message = data.choices[0].message;

diff --git a/src/commands/config.ts b/src/commands/config.ts
@@ -22,6 +22,8 @@ export enum CONFIG_KEYS {
  OCO_LANGUAGE = 'OCO_LANGUAGE'
 }
 
+export const DEFAULT_MODEL_TOKEN_LIMIT = 4096;
+
 export enum CONFIG_MODES {
  get = 'get',
  set = 'set'

diff --git a/src/generateCommitMessageFromGitDiff.ts b/src/generateCommitMessageFromGitDiff.ts
@@ -1,28 +1,28 @@
 import {
- ChatCompletionRequestMessage,
- ChatCompletionRequestMessageRoleEnum
+  ChatCompletionRequestMessage,
+  ChatCompletionRequestMessageRoleEnum
 } from 'openai';
-import { api } from './api';
-import { getConfig } from './commands/config';
-import { mergeDiffs } from './utils/mergeDiffs';
-import { i18n, I18nLocals } from './i18n';
-import { tokenCount } from './utils/tokenCount';
+import {api} from './api';
+import {DEFAULT_MODEL_TOKEN_LIMIT, getConfig} from './commands/config';
+import {mergeDiffs} from './utils/mergeDiffs';
+import {i18n, I18nLocals} from './i18n';
+import {tokenCount} from './utils/tokenCount';
 
 const config = getConfig();
 const translation = i18n[(config?.OCO_LANGUAGE as I18nLocals) || 'en'];
 
 const INIT_MESSAGES_PROMPT: Array<ChatCompletionRequestMessage> = [
- {
- role: ChatCompletionRequestMessageRoleEnum.System,
- // prettier-ignore
- content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention and explain WHAT were the changes and WHY the changes were done. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message.
-${config?.OCO_EMOJI ? 'Use GitMoji convention to preface the commit.': 'Do not preface the commit with anything.'}
-${config?.OCO_DESCRIPTION  ? 'Add a short description of WHY the changes are done after the commit message. Don\'t start it with "This commit", just describe the changes.': "Don't add any descriptions to the commit, only commit message."}
+  {
+  role: ChatCompletionRequestMessageRoleEnum.System,
+  // prettier-ignore
+  content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention and explain WHAT were the changes and WHY the changes were done. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message.
+${config?.OCO_EMOJI ? 'Use GitMoji convention to preface the commit.' : 'Do not preface the commit with anything.'}
+${config?.OCO_DESCRIPTION ? 'Add a short description of WHY the changes are done after the commit message. Don\'t start it with "This commit", just describe the changes.' : "Don't add any descriptions to the commit, only commit message."}
 Use the present tense. Lines must not be longer than 74 characters. Use ${translation.localLanguage} to answer.`
- },
- {
- role: ChatCompletionRequestMessageRoleEnum.User,
- content: `diff --git a/src/server.ts b/src/server.ts
+  },
+  {
+  role: ChatCompletionRequestMessageRoleEnum.User,
+  content: `diff --git a/src/server.ts b/src/server.ts
 index ad4db42..f3b18a9 100644
 --- a/src/server.ts
 +++ b/src/server.ts
@@ -46,128 +46,183 @@ app.use((_, res, next) => {
  +app.listen(process.env.PORT || PORT, () => {
  + console.log(\`Server listening on port \${PORT}\`);
  });`
- },
- {
- role: ChatCompletionRequestMessageRoleEnum.Assistant,
- content: `${config?.OCO_EMOJI ? '🐛 ' : ''}${translation.commitFix}
+  },
+  {
+  role: ChatCompletionRequestMessageRoleEnum.Assistant,
+  content: `${config?.OCO_EMOJI ? '🐛 ' : ''}${translation.commitFix}
 ${config?.OCO_EMOJI ? '✨ ' : ''}${translation.commitFeat}
 ${config?.OCO_DESCRIPTION ? translation.commitDescription : ''}`
- }
+  }
 ];
 
 const generateCommitMessageChatCompletionPrompt = (
- diff: string
+  diff: string
 ): Array<ChatCompletionRequestMessage> => {
- const chatContextAsCompletionRequest = [...INIT_MESSAGES_PROMPT];
+  const chatContextAsCompletionRequest = [...INIT_MESSAGES_PROMPT];
 
- chatContextAsCompletionRequest.push({
- role: ChatCompletionRequestMessageRoleEnum.User,
- content: diff
- });
+  chatContextAsCompletionRequest.push({
+  role: ChatCompletionRequestMessageRoleEnum.User,
+  content: diff
+  });
 
- return chatContextAsCompletionRequest;
+  return chatContextAsCompletionRequest;
 };
 
 export enum GenerateCommitMessageErrorEnum {
- tooMuchTokens = 'TOO_MUCH_TOKENS',
- internalError = 'INTERNAL_ERROR',
- emptyMessage = 'EMPTY_MESSAGE'
+  tooMuchTokens = 'TOO_MUCH_TOKENS',
+  internalError = 'INTERNAL_ERROR',
+  emptyMessage = 'EMPTY_MESSAGE'
 }
 
-
 const INIT_MESSAGES_PROMPT_LENGTH = INIT_MESSAGES_PROMPT.map(
- (msg) => tokenCount(msg.content) + 4
+  (msg) => tokenCount(msg.content) + 4
 ).reduce((a, b) => a + b, 0);
 
-const MAX_REQ_TOKENS = 3000 - INIT_MESSAGES_PROMPT_LENGTH;
+const ADJUSTMENT_FACTOR = 20;
 
 export const generateCommitMessageByDiff = async (
- diff: string
+  diff: string
 ): Promise<string> => {
- try {
- if (tokenCount(diff) >= MAX_REQ_TOKENS) {
- const commitMessagePromises = getCommitMsgsPromisesFromFileDiffs(
- diff,
- MAX_REQ_TOKENS
- );
-
- const commitMessages = await Promise.all(commitMessagePromises);
-
- return commitMessages.join('\n\n');
- } else {
- const messages = generateCommitMessageChatCompletionPrompt(diff);
-
- const commitMessage = await api.generateCommitMessage(messages);
-
- if (!commitMessage)
- throw new Error(GenerateCommitMessageErrorEnum.emptyMessage);
-
- return commitMessage;
+ try {
+ const MAX_REQUEST_TOKENS = DEFAULT_MODEL_TOKEN_LIMIT
+ - ADJUSTMENT_FACTOR
+ - INIT_MESSAGES_PROMPT_LENGTH
+ - config?.OCO_OPENAI_MAX_TOKENS;
+
+ if (tokenCount(diff) >= MAX_REQUEST_TOKENS) {
+ const commitMessagePromises = getCommitMsgsPromisesFromFileDiffs(
+ diff,
+ MAX_REQUEST_TOKENS
+ );
+
+ const commitMessages = [];
+ for (const promise of commitMessagePromises) {
+ commitMessages.push(await promise);
+ await delay(2000);
+ }
+
+ return commitMessages.join('\n\n');
+ } else {
+ const messages = generateCommitMessageChatCompletionPrompt(diff);
+
+ const commitMessage = await api.generateCommitMessage(messages);
+
+ if (!commitMessage)
+ throw new Error(GenerateCommitMessageErrorEnum.emptyMessage);
+
+ return commitMessage;
+ }
+ } catch (error) {
+ throw error;
  }
- } catch (error) {
- throw error;
- }
 };
 
 function getMessagesPromisesByChangesInFile(
- fileDiff: string,
- separator: string,
- maxChangeLength: number
+  fileDiff: string,
+  separator: string,
+  maxChangeLength: number
 ) {
- const hunkHeaderSeparator = '@@ ';
- const [fileHeader, ...fileDiffByLines] = fileDiff.split(hunkHeaderSeparator);
-
- // merge multiple line-diffs into 1 to save tokens
- const mergedChanges = mergeDiffs(
- fileDiffByLines.map((line) => hunkHeaderSeparator + line),
- maxChangeLength
- );
-
- const lineDiffsWithHeader = mergedChanges.map(
- (change) => fileHeader + change
- );
-
- const commitMsgsFromFileLineDiffs = lineDiffsWithHeader.map((lineDiff) => {
- const messages = generateCommitMessageChatCompletionPrompt(
- separator + lineDiff
+ const hunkHeaderSeparator = '@@ ';
+ const [fileHeader, ...fileDiffByLines] = fileDiff.split(hunkHeaderSeparator);
+
+ // merge multiple line-diffs into 1 to save tokens
+ const mergedChanges = mergeDiffs(
+ fileDiffByLines.map((line) => hunkHeaderSeparator + line),
+ maxChangeLength
  );
 
- return api.generateCommitMessage(messages);
- });
+ const lineDiffsWithHeader = [];
+ for (const change of mergedChanges) {
+ const totalChange = fileHeader + change;
+ if (tokenCount(totalChange) > maxChangeLength) {
+ // If the totalChange is too large, split it into smaller pieces
+ const splitChanges = splitDiff(totalChange, maxChangeLength);
+ lineDiffsWithHeader.push(...splitChanges);
+ } else {
+ lineDiffsWithHeader.push(totalChange);
+ }
+ }
+
+ const commitMsgsFromFileLineDiffs = lineDiffsWithHeader.map((lineDiff) => {
+ const messages = generateCommitMessageChatCompletionPrompt(
+ separator + lineDiff
+ );
+
+ return api.generateCommitMessage(messages);
+ });
 
- return commitMsgsFromFileLineDiffs;
+ return commitMsgsFromFileLineDiffs;
+}
+
+
+function splitDiff(diff: string, maxChangeLength: number) {
+ const lines = diff.split('\n');
+ const splitDiffs = [];
+ let currentDiff = '';
+
+ for (let line of lines) {
+ // If a single line exceeds maxChangeLength, split it into multiple lines
+ while (tokenCount(line) > maxChangeLength) {
+ const subLine = line.substring(0, maxChangeLength);
+ line = line.substring(maxChangeLength);
+ splitDiffs.push(subLine);
+ }
+
+ // Check the tokenCount of the currentDiff and the line separately
+ if (tokenCount(currentDiff) + tokenCount('\n' + line) > maxChangeLength) {
+ // If adding the next line would exceed the maxChangeLength, start a new diff
+ splitDiffs.push(currentDiff);
+ currentDiff = line;
+ } else {
+ // Otherwise, add the line to the current diff
+ currentDiff += '\n' + line;
+ }
+ }
+
+ // Add the last diff
+ if (currentDiff) {
+ splitDiffs.push(currentDiff);
+ }
+
+ return splitDiffs;
 }
 
 export function getCommitMsgsPromisesFromFileDiffs(
- diff: string,
- maxDiffLength: number
+  diff: string,
+  maxDiffLength: number
 ) {
- const separator = 'diff --git ';
+  const separator = 'diff --git ';
 
- const diffByFiles = diff.split(separator).slice(1);
+  const diffByFiles = diff.split(separator).slice(1);
 
- // merge multiple files-diffs into 1 prompt to save tokens
- const mergedFilesDiffs = mergeDiffs(diffByFiles, maxDiffLength);
+  // merge multiple files-diffs into 1 prompt to save tokens
+  const mergedFilesDiffs = mergeDiffs(diffByFiles, maxDiffLength);
 
- const commitMessagePromises = [];
+  const commitMessagePromises = [];
 
- for (const fileDiff of mergedFilesDiffs) {
- if (tokenCount(fileDiff) >= maxDiffLength) {
- // if file-diff is bigger than gpt context — split fileDiff into lineDiff
- const messagesPromises = getMessagesPromisesByChangesInFile(
- fileDiff,
- separator,
- maxDiffLength
- );
+  for (const fileDiff of mergedFilesDiffs) {
+  if (tokenCount(fileDiff) >= maxDiffLength) {
+  // if file-diff is bigger than gpt context — split fileDiff into lineDiff
+  const messagesPromises = getMessagesPromisesByChangesInFile(
+  fileDiff,
+  separator,
+  maxDiffLength
+  );
 
- commitMessagePromises.push(...messagesPromises);
- } else {
- const messages = generateCommitMessageChatCompletionPrompt(
- separator + fileDiff
- );
+  commitMessagePromises.push(...messagesPromises);
+  } else {
+  const messages = generateCommitMessageChatCompletionPrompt(
+  separator + fileDiff
+  );
 
- commitMessagePromises.push(api.generateCommitMessage(messages));
+ commitMessagePromises.push(api.generateCommitMessage(messages));
+ }
  }
- }
- return commitMessagePromises;
+
+
+ return commitMessagePromises;
+}
+
+function delay(ms: number) {
+ return new Promise(resolve => setTimeout(resolve, ms));
 }