chore: go live (#5)

groq · Feb 15, 2024 · af9a838 · af9a838
1 parent 3907c47
commit af9a838
Show file tree

Hide file tree

Showing 15 changed files with 158 additions and 303 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -2,10 +2,10 @@ name: CI
 on:
  push:
  branches:
- - main
+ - stainless
  pull_request:
  branches:
- - main
+ - stainless
 
 jobs:
  lint:

diff --git a/README.md b/README.md
@@ -261,9 +261,9 @@ completion = response.parse() # get the object that `chat.completions.create()`
 print(completion.id)
 ```
 
-These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 

diff --git a/bin/check-release-environment b/bin/check-release-environment
@@ -6,9 +6,9 @@ if [ -z "${PYPI_TOKEN}" ]; then
  errors+=("The GROQ_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
-len=${#errors[@]}
+lenErrors=${#errors[@]}
 
-if [[ len -gt 0 ]]; then
+if [[ lenErrors -gt 0 ]]; then
  echo -e "Found the following errors in the release environment:\n"
 
  for error in "${errors[@]}"; do

diff --git a/bin/check-test-server b/bin/check-test-server
diff --git a/bin/test b/bin/test
diff --git a/examples/chat_completion.py b/examples/chat_completion.py
@@ -10,43 +10,33 @@
  # Set an optional system message. This sets the behavior of the
  # assistant and can be used to provide specific instructions for
  # how it should behave throughout the conversation.
- {
- "role": "system",
- "content": "you are a helpful assistant."
- },
+ {"role": "system", "content": "you are a helpful assistant."},
  # Set a user message for the assistant to respond to.
  {
  "role": "user",
  "content": "Explain the importance of low latency LLMs",
- }
+ },
  ],
-
  # The language model which will generate the completion.
  model="mixtral-8x7b-32768",
-
  #
  # Optional parameters
  #
-
  # Controls randomness: lowering results in less random completions.
  # As the temperature approaches zero, the model will become deterministic
  # and repetitive.
  temperature=0.5,
-
  # The maximum number of tokens to generate. Requests can use up to
  # 2048 tokens shared between prompt and completion.
  max_tokens=1024,
-
  # Controls diversity via nucleus sampling: 0.5 means half of all
  # likelihood-weighted options are considered.
  top_p=1,
-
  # A stop sequence is a predefined or user-specified text string that
  # signals an AI to stop generating content, ensuring its responses
  # remain focused and concise. Examples include punctuation marks and
  # markers like "[end]".
  stop=None,
-
  # If set, partial message deltas will be sent.
  stream=False,
 )

diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py
@@ -2,6 +2,7 @@
 
 from groq import AsyncGroq
 
+
 async def main():
  client = AsyncGroq()
 
@@ -13,48 +14,39 @@ async def main():
  # Set an optional system message. This sets the behavior of the
  # assistant and can be used to provide specific instructions for
  # how it should behave throughout the conversation.
- {
- "role": "system",
- "content": "you are a helpful assistant."
- },
+ {"role": "system", "content": "you are a helpful assistant."},
  # Set a user message for the assistant to respond to.
  {
  "role": "user",
  "content": "Explain the importance of low latency LLMs",
- }
+ },
  ],
-
  # The language model which will generate the completion.
  model="mixtral-8x7b-32768",
-
  #
  # Optional parameters
  #
-
  # Controls randomness: lowering results in less random completions.
  # As the temperature approaches zero, the model will become
  # deterministic and repetitive.
  temperature=0.5,
-
  # The maximum number of tokens to generate. Requests can use up to
  # 2048 tokens shared between prompt and completion.
  max_tokens=1024,
-
  # Controls diversity via nucleus sampling: 0.5 means half of all
  # likelihood-weighted options are considered.
  top_p=1,
-
  # A stop sequence is a predefined or user-specified text string that
  # signals an AI to stop generating content, ensuring its responses
  # remain focused and concise. Examples include punctuation marks and
  # markers like "[end]".
  stop=None,
-
  # If set, partial message deltas will be sent.
  stream=False,
  )
 
  # Print the completion returned by the LLM.
  print(chat_completion.choices[0].message.content)
 
+
 asyncio.run(main())
diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py
@@ -2,6 +2,7 @@
 
 from groq import AsyncGroq
 
+
 async def main():
  client = AsyncGroq()
 
@@ -13,39 +14,30 @@ async def main():
  # Set an optional system message. This sets the behavior of the
  # assistant and can be used to provide specific instructions for
  # how it should behave throughout the conversation.
- {
- "role": "system",
- "content": "you are a helpful assistant."
- },
+ {"role": "system", "content": "you are a helpful assistant."},
  # Set a user message for the assistant to respond to.
  {
  "role": "user",
  "content": "Explain the importance of low latency LLMs",
- }
+ },
  ],
-
  # The language model which will generate the completion.
  model="mixtral-8x7b-32768",
-
  #
  # Optional parameters
  #
-
  # Controls randomness: lowering results in less random completions.
  # As the temperature approaches zero, the model will become
  # deterministic and repetitive.
  temperature=0.5,
-
  # The maximum number of tokens to generate. Requests can use up to
  # 2048 tokens shared between prompt and completion.
  max_tokens=1024,
-
  # A stop sequence is a predefined or user-specified text string that
  # signals an AI to stop generating content, ensuring its responses
  # remain focused and concise. Examples include punctuation marks and
  # markers like "[end]".
  stop=None,
-
  # Controls diversity via nucleus sampling: 0.5 means half of all
  # likelihood-weighted options are considered.
  stream=True,
@@ -55,4 +47,5 @@ async def main():
  async for chunk in stream:
  print(chunk.choices[0].delta.content, end="")
 
+
 asyncio.run(main())
diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py
@@ -10,37 +10,28 @@
  # Set an optional system message. This sets the behavior of the
  # assistant and can be used to provide specific instructions for
  # how it should behave throughout the conversation.
- {
- "role": "system",
- "content": "you are a helpful assistant."
- },
+ {"role": "system", "content": "you are a helpful assistant."},
  # Set a user message for the assistant to respond to.
  {
  "role": "user",
- "content": "Count to 10. Your response must begin with \"1, \". example: 1, 2, 3, ...",
- }
+ "content": 'Count to 10. Your response must begin with "1, ". example: 1, 2, 3, ...',
+ },
  ],
-
  # The language model which will generate the completion.
  model="mixtral-8x7b-32768",
-
  #
  # Optional parameters
  #
-
  # Controls randomness: lowering results in less random completions.
  # As the temperature approaches zero, the model will become deterministic
  # and repetitive.
  temperature=0.5,
-
  # The maximum number of tokens to generate. Requests can use up to
  # 2048 tokens shared between prompt and completion.
  max_tokens=1024,
-
  # Controls diversity via nucleus sampling: 0.5 means half of all
  # likelihood-weighted options are considered.
  top_p=1,
-
  # A stop sequence is a predefined or user-specified text string that
  # signals an AI to stop generating content, ensuring its responses
  # remain focused and concise. Examples include punctuation marks and
@@ -49,7 +40,6 @@
  # If multiple stop values are needed, an array of string may be passed,
  # stop=[", 6", ", six", ", Six"]
  stop=", 6",
-
  # If set, partial message deltas will be sent.
  stream=False,
 )

diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py
@@ -10,43 +10,33 @@
  # Set an optional system message. This sets the behavior of the
  # assistant and can be used to provide specific instructions for
  # how it should behave throughout the conversation.
- {
- "role": "system",
- "content": "you are a helpful assistant."
- },
+ {"role": "system", "content": "you are a helpful assistant."},
  # Set a user message for the assistant to respond to.
  {
  "role": "user",
  "content": "Explain the importance of low latency LLMs",
- }
+ },
  ],
-
  # The language model which will generate the completion.
  model="mixtral-8x7b-32768",
-
  #
  # Optional parameters
  #
-
  # Controls randomness: lowering results in less random completions.
  # As the temperature approaches zero, the model will become deterministic
  # and repetitive.
  temperature=0.5,
-
  # The maximum number of tokens to generate. Requests can use up to
  # 2048 tokens shared between prompt and completion.
  max_tokens=1024,
-
  # Controls diversity via nucleus sampling: 0.5 means half of all
  # likelihood-weighted options are considered.
  top_p=1,
-
  # A stop sequence is a predefined or user-specified text string that
  # signals an AI to stop generating content, ensuring its responses
  # remain focused and concise. Examples include punctuation marks and
  # markers like "[end]".
  stop=None,
-
  # If set, partial message deltas will be sent.
  stream=True,
 )

diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py
@@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]:
  iterator = self._iter_events()
 
  for sse in iterator:
- if sse.data.startswith("[DONE]"):
- break
  yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
  # Ensure the entire stream is consumed
@@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]:
 
  async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
  async for sse in self._decoder.aiter(self.response.aiter_lines()):
- if sse.data.startswith("[DONE]"):
- break
  yield sse
 
  async def __stream__(self) -> AsyncIterator[_T]: