Improve Git commit message generation and add docstring functionality:

- Add functionality to generate Git commit messages automatically - Introduce feature to generate docstrings for Python code - Implement commands for generating docstrings for files and directories
domainexpert · May 5, 2024 · 5b179ba · 5b179ba
1 parent 66cd182
commit 5b179ba
Show file tree

Hide file tree

Showing 10 changed files with 215 additions and 25 deletions.
diff --git a/README.md b/README.md
@@ -16,11 +16,19 @@ To use certain features of this package, you need to set up your OpenAI API key.
 
 
 ## Usage
-### Generating Git Commit Messages
+
+#### Generating Git Commit Messages
 Sarathi provides a convenient command `sarathi git autocommit` to generate commit messages for Git commits. 
 - Stage the files you want to commit
 - Run `sarathi git autocommit`. This command will automatically analyze your staged changes (using git add .), generate a commit message, and if you confirm will commit your changes to the repository with the generate message
 
+#### Generating docstring messages
+You can generate docstrings for your python code using the below commands.
+
+- Run `sarathi docstrgen -f /scratchpad/ghmount/sarathi/src/sarathi/utils/formatters.py`. This command analyze the methods in the file passed and generates docstrings for functions if they do not exist.
+- Run `sarathi docstrgen -d /scratchpad/ghmount/sarathi/src/sarathi/`. This command analyze the methods in alls files presend in the directory specified and generates docstrings for functions if they do not exist.
+
+
 ## Helpul references
  - https://dev.to/taikedz/ive-parked-my-side-projects-3o62
  - https://github.com/lightningorb/autocommit

diff --git a/src/sarathi/cli/cli_handler.py b/src/sarathi/cli/cli_handler.py
@@ -6,18 +6,25 @@
 
 
 def parse_cmd_args():
- parser = argparse.ArgumentParser()
+ """This function parses command line arguments using argparse.
 
+ Returns:
+ argparse.Namespace: The parsed arguments from the command line."""
+ parser = argparse.ArgumentParser()
  subparsers = parser.add_subparsers(dest="op")
-
  sgit.setup_args(subparsers, opname="git")
  qahelper.setup_args(subparsers, opname="ask")
  docstrgen.setup_args(subparsers, opname="docstrgen")
-
  return parser.parse_args()
 
 
 def main():
+ """This function is the entry point of the program.
+ It parses the command line arguments and executes the corresponding command based on the value of op attribute in the parsed arguments.
+ - If the op is git, it executes a git command.
+ - If the op is ask, it executes a command related to question-answering.
+ - If the op is docstrgen, it executes a command related to generating docstrings.
+ """
  try:
  parsed_args = parse_cmd_args()
  if parsed_args.op == "git":

diff --git a/src/sarathi/cli/gendocstrings.py b/src/sarathi/cli/gendocstrings.py
@@ -9,7 +9,16 @@
 
 
 def setup_args(subparsers, opname):
+ """
+ Adds arguments for setting up a command line parser.
 
+ Args:
+ subparsers: The subparsers object to add the arguments to.
+ opname: The name of the operation.
+
+ Returns:
+ None
+ """
  gendocstr_parser = subparsers.add_parser(opname)
  gendocstr_parser.add_argument(
  "-f",
@@ -26,16 +35,26 @@ def setup_args(subparsers, opname):
 
 
 def execute_cmd(args):
+ """
+ Execute a command based on the provided arguments.
+
+ Args:
+ args: A dictionary containing filepath and dirpath information.
+
+ Returns:
+ None
+ """
  file_path = args.filepath
  dir_path = args.dirpath
-
  if file_path and dir_path:
  print("Please enter a file or a folder. Both arguments cannot be specified")
  elif file_path:
+ print(f"Generating docstrings for file {file_path}")
  code_transformer = CodeTransformer(file_path)
  code_transformer.transform_code()
  elif dir_path:
  files_to_process = get_filepaths(dir_path)
  for fpath in files_to_process:
+ print(f"Generating docstrings for file {fpath}")
  code_transformer = CodeTransformer(fpath)
  code_transformer.transform_code()
diff --git a/src/sarathi/cli/qahelper.py b/src/sarathi/cli/qahelper.py
@@ -3,17 +3,32 @@
 
 
 def setup_args(subparsers, opname):
+ """Setup the arguments for a subparser.
 
+ Args:
+ subparsers: The subparsers object to add the argument parser to.
+ opname: The name of the operation.
+
+ Returns:
+ None
+ """
  qa_parser = subparsers.add_parser(opname)
  qa_parser.add_argument("-q", "--question", required=True)
 
 
 def execute_cmd(args):
+ """Execute a command.
+
+ Args:
+ args: A dictionary containing the command arguments.
+
+ Returns:
+ None
+ """
  question_asked = args.question
  llm_response = call_llm_model(prompt_dict["qahelper"], question_asked)
  if "Error" not in llm_response:
  answer = llm_response["choices"][0]["message"]["content"]
  print(answer)
- # print(answer)
  else:
  print(llm_response["Error"])
diff --git a/src/sarathi/cli/sgit.py b/src/sarathi/cli/sgit.py
@@ -6,34 +6,64 @@
 
 
 def get_staged_diff():
+ """Returns the staged difference in the git repository.
+
+ Returns:
+ The staged difference in the git repository as a string.
+ """
  return subprocess.run(
  ["git", "diff", "--staged"], stdout=subprocess.PIPE
  ).stdout.decode("utf-8")
 
 
 def generate_commit_message():
+ """Generates a commit message using a language model.
+
+ Returns:
+ The commit message generated by the language model.
+ """
  diff = get_staged_diff()
  prompt_info = prompt_dict["autocommit"]
  llm_response = call_llm_model(prompt_info, diff)
  return llm_response["choices"][0]["message"]["content"]
 
 
 def get_user_confirmation():
+ """Prompts the user for confirmation to proceed.
+
+ Returns:
+ True if user input is y, False otherwise.
+ """
  return input(f"Do you want to proceed " + format_green("y/n") + ": ").strip() == "y"
 
 
 def setup_args(subparsers, opname):
+ """Adds a new sub-parser to the provided subparsers.
+
+ Args:
+ subparsers: The argument parser object to which a new sub-parser will be added.
+ opname: The name of the sub-parser to be added.
 
+ Returns:
+ The newly added sub-parser object.
+ """
  git_parser = subparsers.add_parser(opname)
  git_sub_cmd = git_parser.add_subparsers(dest="git_sub_cmd")
-
  commit_op = git_sub_cmd.add_parser("autocommit")
 
 
 def execute_cmd(args):
+ """
+ Executes a Git sub-command based on the provided arguments.
+
+ Args:
+ args: The arguments containing the Git sub-command to be executed.
+
+ Returns:
+ None
+ """
  if args.git_sub_cmd == "autocommit":
  generated_commit_msg = generate_commit_message()
-
  if generated_commit_msg:
  print(generated_commit_msg)
  if get_user_confirmation():

diff --git a/src/sarathi/code/codetasks.py b/src/sarathi/code/codetasks.py
@@ -1,40 +1,80 @@
 import ast
 import subprocess
-
 import astor
-
 from src.sarathi.llm.call_llm import call_llm_model
 from src.sarathi.llm.prompts import prompt_dict
 from src.sarathi.utils.formatters import format_code
 
 
 class CodeTransformer:
+
  def __init__(self, file_path):
+ """Initializes the class with the provided file path.
+
+ Args:
+ file_path: The path to the file.
+
+ Returns:
+ None.
+ """
  self.file_path = file_path
  self.dosctring_prompt = "update_docstrings"
 
  def get_ast(self):
+ """Parse the content of a file and return the abstract syntax tree (AST).
+
+ Returns:
+ The abstract syntax tree (AST) generated from the content of the file.
+ """
  with open(self.file_path, "r") as file:
  code = file.read()
  return ast.parse(code)
 
  def find_methods(self, tree):
+ """Find all the methods in the given abstract syntax tree.
+
+ Args:
+ self: The instance of the class.
+ tree: The abstract syntax tree to search for methods.
+
+ Returns:
+ A list of method nodes found in the abstract syntax tree.
+ """
  methods = []
  for node in ast.walk(tree):
  if isinstance(node, ast.FunctionDef):
  methods.append(node)
  return methods
 
  def format_node_with_new_docstring(self, new_docstring, method):
+ """
+ Formats a node with a new docstring.
+
+ Args:
+ new_docstring: The new docstring to be formatted.
+ method: The method to which the new docstring will be added.
+
+ Returns:
+ The new docstring node after formatting.
+ """
  indentation = method.body[0].col_offset if method.body else 0
+ new_docstring = new_docstring.replace('"""', "")
+ new_docstring = new_docstring.replace("'", "")
  new_docstring_node = ast.Expr(
- value=ast.Str(new_docstring.replace('"""', "")),
- lineno=method.lineno,
- col_offset=indentation,
+ value=ast.Str(new_docstring), lineno=method.lineno, col_offset=indentation
  )
  return new_docstring_node
 
  def update_docstrings(self, methods, overwrite_existing=False):
+ """Update docstrings for the given methods.
+
+ Args:
+ methods: A list of methods whose docstrings need to be updated.
+ overwrite_existing: A boolean flag indicating whether to overwrite existing docstrings. Default is False.
+
+ Returns:
+ None.
+ """
  for method in methods:
  try:
  existing_docstring = ast.get_docstring(method)
@@ -63,11 +103,18 @@ def update_docstrings(self, methods, overwrite_existing=False):
  new_docstring, method
  )
  method.body.insert(0, new_docstring_node)
- # method.body = new_docstring.body[0].body
  except Exception as e:
  print(f"{e}")
 
  def transform_code(self, op="update_docstrings"):
+ """Transforms the code based on the specified operation.
+
+ Args:
+ op: The operation to be performed. Default value is update_docstrings.
+
+ Returns:
+ None
+ """
  tree = self.get_ast()
  if op == "update_docstrings":
  methods = self.find_methods(tree)
@@ -76,6 +123,14 @@ def transform_code(self, op="update_docstrings"):
  self.update_code(modified_source)
 
  def update_code(self, updated_code):
+ """Update the code in the file with the provided updated code.
+
+ Args:
+ updated_code: The updated code that needs to be written to the file.
+
+ Returns:
+ None
+ """
  formatted_code = format_code(updated_code)
  with open(self.file_path, "w") as f:
  f.write(formatted_code)
diff --git a/src/sarathi/llm/call_llm.py b/src/sarathi/llm/call_llm.py
@@ -4,16 +4,24 @@
 
 
 def call_llm_model(prompt_info, user_msg, resp_type=None):
+ """
+ Generate a response from the OpenAI language model based on the given prompt and user message.
+
+ Args:
+ prompt_info (dict): A dictionary containing information about the prompt, including the model and system message.
+ user_msg (str): The user message to be used as input for the language model.
+ resp_type (str, optional): The type of response expected. Defaults to None.
+
+ Returns:
+ """
  try:
  url = "https://api.openai.com/v1/chat/completions"
- # url = "https://api.openai.com/v1/engines/davinci-codex/completions"
  model = prompt_info["model"]
  system_msg = prompt_info["system_msg"]
  headers = {
  "Authorization": "Bearer " + os.environ["OPENAI_API_KEY"],
  "Content-Type": "application/json",
  }
-
  body = {
  "model": model,
  "messages": [
@@ -33,6 +41,3 @@ def call_llm_model(prompt_info, user_msg, resp_type=None):
  except Exception as e:
  if str(e) == "'OPENAI_API_KEY'":
  raise ValueError("Exception occured " + str(e) + " not found")
- # return {"Error": "Exception occured " + str(e) + " not found"}
-
- # return {"Error": "Exception occured " + str(e) + " occured"}
diff --git a/src/sarathi/llm/prompts.py b/src/sarathi/llm/prompts.py
@@ -25,11 +25,20 @@
  },
  "update_docstrings": {
  "system_msg": """ 
- Your task is to generate a docstring for the python code provided below. Please follow below guidelines while generating the docstring
- - generate the docstrings for the provided function in google format
+ Your task is to generat Google style docstrings format for the python code provided below. Please follow below guidelines while generating the docstring
+ - docstrings should be generated in Google style docstrings format. An example is mentioned below
+ \"\"\"Reads the content of a file.
+
+ Args:
+ file_path: The path to the file to be read.
+
+ Returns:
+ The content of the file as a string.
+ \"\"\"
  - in your response only the docstrings should be send back, make sure not to send any code back in response
  - if you cannot determine the type of parameter or arguments, do not make up the type values in docstrings
- - If you do not know the answer do not make it up, just say sorry I do not know that
+ - do not mention any single quotes or double quotes in the response
+ - if you do not know the answer do not make it up, just say sorry I do not know that
  """,
  "model": "gpt-3.5-turbo",
  },

diff --git a/src/sarathi/utils/formatters.py b/src/sarathi/utils/formatters.py
@@ -13,5 +13,14 @@ def format_green(text):
 
 
 def format_code(code):
+ """
+ Formats the input code using the specified mode.
+
+ Args:
+ code: The input code to be formatted.
+
+ Returns:
+ The formatted code.
+ """
  out = format_str(code, mode=FileMode())
  return out