From 4de80dbe4c27d6ecbcc2d2b6192d27ade5da3866 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 15 Feb 2024 23:01:42 +0000
Subject: [PATCH] feat: Add initial Stainless SDK

---
 .release-please-manifest.json                 |   2 +-
 CHANGELOG.md                                  |  15 --
 bin/check-test-server                         |   0
 bin/test                                      |   0
 examples/chat_completion.py                   |  55 -----
 examples/chat_completion_async.py             |  62 ------
 examples/chat_completion_async_streaming.py   |  60 ------
 examples/chat_completion_stop.py              |  58 -----
 examples/chat_completion_streaming.py         |  56 -----
 pyproject.toml                                |   2 +-
 src/groq/_streaming.py                        |   4 -
 src/groq/_version.py                          |   2 +-
 src/groq/resources/chat/completions.py        | 198 +-----------------
 src/groqcloud/lib/.keep                       |   4 +
 .../lib/chat_completion_chunk.py              |   5 +-
 15 files changed, 14 insertions(+), 509 deletions(-)
 mode change 100755 => 100644 bin/check-test-server
 mode change 100755 => 100644 bin/test
 delete mode 100644 examples/chat_completion.py
 delete mode 100644 examples/chat_completion_async.py
 delete mode 100644 examples/chat_completion_async_streaming.py
 delete mode 100644 examples/chat_completion_stop.py
 delete mode 100644 examples/chat_completion_streaming.py
 create mode 100644 src/groqcloud/lib/.keep
 rename src/{groq => groqcloud}/lib/chat_completion_chunk.py (97%)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index da59f99..3d2ac0b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.4.0"
+  ".": "0.1.0"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a3b8a29..f643576 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,20 +1,5 @@
 # Changelog
 
-## 0.4.0 (2024-02-14)
-
-Full Changelog: [v0.1.0...v0.4.0](https://github.com/groq/groq-python/compare/v0.1.0...v0.4.0)
-
-### Features
-
-* Add initial Stainless SDK ([316de2c](https://github.com/groq/groq-python/commit/316de2ccfeb76e36fe34bb8656ea90a8d42a7d00))
-* create default branch ([7e00266](https://github.com/groq/groq-python/commit/7e00266e3c691d92d508e753e2c14c03297c09f9))
-* update via SDK Studio ([#3](https://github.com/groq/groq-python/issues/3)) ([2241036](https://github.com/groq/groq-python/commit/2241036e9dbee6629ad7ebce5e6f4f5e5f1028ce))
-
-
-### Chores
-
-* go live ([#2](https://github.com/groq/groq-python/issues/2)) ([13665ad](https://github.com/groq/groq-python/commit/13665ad76705513d99cbaa497ccccc694932f2c3))
-
 ## 0.1.0 (2024-02-10)
 
 Full Changelog: [v0.0.1...v0.1.0](https://github.com/definitive-io/groqcloud-python/compare/v0.0.1...v0.1.0)
diff --git a/bin/check-test-server b/bin/check-test-server
old mode 100755
new mode 100644
diff --git a/bin/test b/bin/test
old mode 100755
new mode 100644
diff --git a/examples/chat_completion.py b/examples/chat_completion.py
deleted file mode 100644
index 77511d0..0000000
--- a/examples/chat_completion.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from groq import Groq
-
-client = Groq()
-
-chat_completion = client.chat.completions.create(
-    #
-    # Required parameters
-    #
-    messages=[
-        # Set an optional system message. This sets the behavior of the
-        # assistant and can be used to provide specific instructions for
-        # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
-        # Set a user message for the assistant to respond to.
-        {
-            "role": "user",
-            "content": "Explain the importance of low latency LLMs",
-        },
-    ],
-
-    # The language model which will generate the completion.
-    model="mixtral-8x7b-32768",
-
-    #
-    # Optional parameters
-    #
-
-    # Controls randomness: lowering results in less random completions.
-    # As the temperature approaches zero, the model will become deterministic
-    # and repetitive.
-    temperature=0.5,
-
-    # The maximum number of tokens to generate. Requests can use up to
-    # 2048 tokens shared between prompt and completion.
-    max_tokens=1024,
-
-    # Controls diversity via nucleus sampling: 0.5 means half of all
-    # likelihood-weighted options are considered.
-    top_p=1,
-
-    # A stop sequence is a predefined or user-specified text string that
-    # signals an AI to stop generating content, ensuring its responses
-    # remain focused and concise. Examples include punctuation marks and
-    # markers like "[end]".
-    stop=None,
-
-    # If set, partial message deltas will be sent.
-    stream=False,
-)
-
-# Print the completion returned by the LLM.
-print(chat_completion.choices[0].message.content)
diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py
deleted file mode 100644
index 99b8fc4..0000000
--- a/examples/chat_completion_async.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import asyncio
-
-from groq import AsyncGroq
-
-
-async def main():
-    client = AsyncGroq()
-
-    chat_completion = await client.chat.completions.create(
-        #
-        # Required parameters
-        #
-        messages=[
-            # Set an optional system message. This sets the behavior of the
-            # assistant and can be used to provide specific instructions for
-            # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
-            # Set a user message for the assistant to respond to.
-            {
-                "role": "user",
-                "content": "Explain the importance of low latency LLMs",
-            },
-        ],
-
-        # The language model which will generate the completion.
-        model="mixtral-8x7b-32768",
-
-        #
-        # Optional parameters
-        #
-
-        # Controls randomness: lowering results in less random completions.
-        # As the temperature approaches zero, the model will become
-        # deterministic and repetitive.
-        temperature=0.5,
-
-        # The maximum number of tokens to generate. Requests can use up to
-        # 2048 tokens shared between prompt and completion.
-        max_tokens=1024,
-
-        # Controls diversity via nucleus sampling: 0.5 means half of all
-        # likelihood-weighted options are considered.
-        top_p=1,
-
-        # A stop sequence is a predefined or user-specified text string that
-        # signals an AI to stop generating content, ensuring its responses
-        # remain focused and concise. Examples include punctuation marks and
-        # markers like "[end]".
-        stop=None,
-
-        # If set, partial message deltas will be sent.
-        stream=False,
-    )
-
-    # Print the completion returned by the LLM.
-    print(chat_completion.choices[0].message.content)
-
-
-asyncio.run(main())
diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py
deleted file mode 100644
index 39105f9..0000000
--- a/examples/chat_completion_async_streaming.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import asyncio
-
-from groq import AsyncGroq
-
-
-async def main():
-    client = AsyncGroq()
-
-    stream = await client.chat.completions.create(
-        #
-        # Required parameters
-        #
-        messages=[
-            # Set an optional system message. This sets the behavior of the
-            # assistant and can be used to provide specific instructions for
-            # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
-            # Set a user message for the assistant to respond to.
-            {
-                "role": "user",
-                "content": "Explain the importance of low latency LLMs",
-            },
-        ],
-
-        # The language model which will generate the completion.
-        model="mixtral-8x7b-32768",
-
-        #
-        # Optional parameters
-        #
-
-        # Controls randomness: lowering results in less random completions.
-        # As the temperature approaches zero, the model will become
-        # deterministic and repetitive.
-        temperature=0.5,
-
-        # The maximum number of tokens to generate. Requests can use up to
-        # 2048 tokens shared between prompt and completion.
-        max_tokens=1024,
-
-        # A stop sequence is a predefined or user-specified text string that
-        # signals an AI to stop generating content, ensuring its responses
-        # remain focused and concise. Examples include punctuation marks and
-        # markers like "[end]".
-        stop=None,
-
-        # Controls diversity via nucleus sampling: 0.5 means half of all
-        # likelihood-weighted options are considered.
-        stream=True,
-    )
-
-    # Print the incremental deltas returned by the LLM.
-    async for chunk in stream:
-        print(chunk.choices[0].delta.content, end="")
-
-
-asyncio.run(main())
diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py
deleted file mode 100644
index 4abb63b..0000000
--- a/examples/chat_completion_stop.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from groq import Groq
-
-client = Groq()
-
-chat_completion = client.chat.completions.create(
-    #
-    # Required parameters
-    #
-    messages=[
-        # Set an optional system message. This sets the behavior of the
-        # assistant and can be used to provide specific instructions for
-        # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
-        # Set a user message for the assistant to respond to.
-        {
-            "role": "user",
-            "content": "Count to 10.  Your response must begin with \"1, \".  example: 1, 2, 3, ...",
-        },
-    ],
-
-    # The language model which will generate the completion.
-    model="mixtral-8x7b-32768",
-
-    #
-    # Optional parameters
-    #
-
-    # Controls randomness: lowering results in less random completions.
-    # As the temperature approaches zero, the model will become deterministic
-    # and repetitive.
-    temperature=0.5,
-
-    # The maximum number of tokens to generate. Requests can use up to
-    # 2048 tokens shared between prompt and completion.
-    max_tokens=1024,
-
-    # Controls diversity via nucleus sampling: 0.5 means half of all
-    # likelihood-weighted options are considered.
-    top_p=1,
-
-    # A stop sequence is a predefined or user-specified text string that
-    # signals an AI to stop generating content, ensuring its responses
-    # remain focused and concise. Examples include punctuation marks and
-    # markers like "[end]".
-    # For this example, we will use ", 6" so that the llm stops counting at 5.
-    # If multiple stop values are needed, an array of string may be passed,
-    # stop=[", 6", ", six", ", Six"]
-    stop=", 6",
-
-    # If set, partial message deltas will be sent.
-    stream=False,
-)
-
-# Print the completion returned by the LLM.
-print(chat_completion.choices[0].message.content)
diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py
deleted file mode 100644
index 78d2607..0000000
--- a/examples/chat_completion_streaming.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from groq import Groq
-
-client = Groq()
-
-stream = client.chat.completions.create(
-    #
-    # Required parameters
-    #
-    messages=[
-        # Set an optional system message. This sets the behavior of the
-        # assistant and can be used to provide specific instructions for
-        # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
-        # Set a user message for the assistant to respond to.
-        {
-            "role": "user",
-            "content": "Explain the importance of low latency LLMs",
-        },
-    ],
-
-    # The language model which will generate the completion.
-    model="mixtral-8x7b-32768",
-
-    #
-    # Optional parameters
-    #
-
-    # Controls randomness: lowering results in less random completions.
-    # As the temperature approaches zero, the model will become deterministic
-    # and repetitive.
-    temperature=0.5,
-
-    # The maximum number of tokens to generate. Requests can use up to
-    # 2048 tokens shared between prompt and completion.
-    max_tokens=1024,
-
-    # Controls diversity via nucleus sampling: 0.5 means half of all
-    # likelihood-weighted options are considered.
-    top_p=1,
-
-    # A stop sequence is a predefined or user-specified text string that
-    # signals an AI to stop generating content, ensuring its responses
-    # remain focused and concise. Examples include punctuation marks and
-    # markers like "[end]".
-    stop=None,
-
-    # If set, partial message deltas will be sent.
-    stream=True,
-)
-
-# Print the incremental deltas returned by the LLM.
-for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
diff --git a/pyproject.toml b/pyproject.toml
index 5eb419f..f3c156b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "groq"
-version = "0.4.0"
+version = "0.1.0"
 description = "The official Python library for the groq API"
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py
index 2769874..ac0ea8a 100644
--- a/src/groq/_streaming.py
+++ b/src/groq/_streaming.py
@@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]:
         iterator = self._iter_events()
 
         for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
@@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]:
 
     async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
         async for sse in self._decoder.aiter(self.response.aiter_lines()):
-            if sse.data.startswith("[DONE]"):
-                break
             yield sse
 
     async def __stream__(self) -> AsyncIterator[_T]:
diff --git a/src/groq/_version.py b/src/groq/_version.py
index 1612b72..0282ec9 100644
--- a/src/groq/_version.py
+++ b/src/groq/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless.
 
 __title__ = "groq"
-__version__ = "0.4.0"  # x-release-please-version
+__version__ = "0.1.0"  # x-release-please-version
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
index 67b7ab8..9f332f5 100644
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Literal, Iterable, Optional, overload
+from typing import Dict, List, Union, Iterable, Optional
 
 import httpx
 
@@ -16,12 +16,10 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._streaming import Stream, AsyncStream
 from ...types.chat import ChatCompletion, completion_create_params
 from ..._base_client import (
     make_request_options,
 )
-from ...lib.chat_completion_chunk import ChatCompletionChunk
 
 __all__ = ["Completions", "AsyncCompletions"]
 
@@ -35,7 +33,6 @@ def with_raw_response(self) -> CompletionsWithRawResponse:
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
         return CompletionsWithStreamingResponse(self)
 
-    @overload
     def create(
         self,
         *,
@@ -50,7 +47,7 @@ def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: bool | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -64,98 +61,6 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        ...
-
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -200,8 +105,6 @@ def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
         )
 
 
@@ -214,7 +117,6 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
         return AsyncCompletionsWithStreamingResponse(self)
 
-    @overload
     async def create(
         self,
         *,
@@ -229,7 +131,7 @@ async def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: bool | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -243,98 +145,6 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        ...
-
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -379,8 +189,6 @@ async def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
         )
 
 
diff --git a/src/groqcloud/lib/.keep b/src/groqcloud/lib/.keep
new file mode 100644
index 0000000..5e2c99f
--- /dev/null
+++ b/src/groqcloud/lib/.keep
@@ -0,0 +1,4 @@
+File generated from our OpenAPI spec by Stainless.
+
+This directory can be used to store custom files to expand the SDK.
+It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file
diff --git a/src/groq/lib/chat_completion_chunk.py b/src/groqcloud/lib/chat_completion_chunk.py
similarity index 97%
rename from src/groq/lib/chat_completion_chunk.py
rename to src/groqcloud/lib/chat_completion_chunk.py
index e4e3533..2aabbcf 100644
--- a/src/groq/lib/chat_completion_chunk.py
+++ b/src/groqcloud/lib/chat_completion_chunk.py
@@ -16,6 +16,8 @@
     "ChoiceDeltaToolCall",
     "ChoiceDeltaToolCallFunction",
 ]
+
+
 class ChoiceDeltaFunctionCall(BaseModel):
     arguments: Optional[str] = None
     """
@@ -28,6 +30,7 @@ class ChoiceDeltaFunctionCall(BaseModel):
     name: Optional[str] = None
     """The name of the function to call."""
 
+
 class ChoiceLogprobsContentTopLogprob(BaseModel):
     token: Optional[str] = None
 
@@ -97,4 +100,4 @@ class ChatCompletionChunk(BaseModel):
 
     object: Optional[str] = None
 
-    system_fingerprint: Optional[str] = None
\ No newline at end of file
+    system_fingerprint: Optional[str] = None