From 4de80dbe4c27d6ecbcc2d2b6192d27ade5da3866 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 15 Feb 2024 23:01:42 +0000 Subject: [PATCH] feat: Add initial Stainless SDK --- .release-please-manifest.json | 2 +- CHANGELOG.md | 15 -- bin/check-test-server | 0 bin/test | 0 examples/chat_completion.py | 55 ----- examples/chat_completion_async.py | 62 ------ examples/chat_completion_async_streaming.py | 60 ------ examples/chat_completion_stop.py | 58 ----- examples/chat_completion_streaming.py | 56 ----- pyproject.toml | 2 +- src/groq/_streaming.py | 4 - src/groq/_version.py | 2 +- src/groq/resources/chat/completions.py | 198 +----------------- src/groqcloud/lib/.keep | 4 + .../lib/chat_completion_chunk.py | 5 +- 15 files changed, 14 insertions(+), 509 deletions(-) mode change 100755 => 100644 bin/check-test-server mode change 100755 => 100644 bin/test delete mode 100644 examples/chat_completion.py delete mode 100644 examples/chat_completion_async.py delete mode 100644 examples/chat_completion_async_streaming.py delete mode 100644 examples/chat_completion_stop.py delete mode 100644 examples/chat_completion_streaming.py create mode 100644 src/groqcloud/lib/.keep rename src/{groq => groqcloud}/lib/chat_completion_chunk.py (97%) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index da59f99..3d2ac0b 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.4.0" + ".": "0.1.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index a3b8a29..f643576 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,20 +1,5 @@ # Changelog -## 0.4.0 (2024-02-14) - -Full Changelog: [v0.1.0...v0.4.0](https://github.com/groq/groq-python/compare/v0.1.0...v0.4.0) - -### Features - -* Add initial Stainless SDK ([316de2c](https://github.com/groq/groq-python/commit/316de2ccfeb76e36fe34bb8656ea90a8d42a7d00)) -* create default branch ([7e00266](https://github.com/groq/groq-python/commit/7e00266e3c691d92d508e753e2c14c03297c09f9)) -* update via SDK Studio ([#3](https://github.com/groq/groq-python/issues/3)) ([2241036](https://github.com/groq/groq-python/commit/2241036e9dbee6629ad7ebce5e6f4f5e5f1028ce)) - - -### Chores - -* go live ([#2](https://github.com/groq/groq-python/issues/2)) ([13665ad](https://github.com/groq/groq-python/commit/13665ad76705513d99cbaa497ccccc694932f2c3)) - ## 0.1.0 (2024-02-10) Full Changelog: [v0.0.1...v0.1.0](https://github.com/definitive-io/groqcloud-python/compare/v0.0.1...v0.1.0) diff --git a/bin/check-test-server b/bin/check-test-server old mode 100755 new mode 100644 diff --git a/bin/test b/bin/test old mode 100755 new mode 100644 diff --git a/examples/chat_completion.py b/examples/chat_completion.py deleted file mode 100644 index 77511d0..0000000 --- a/examples/chat_completion.py +++ /dev/null @@ -1,55 +0,0 @@ -from groq import Groq - -client = Groq() - -chat_completion = client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Explain the importance of low latency LLMs", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become deterministic - # and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - top_p=1, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - stop=None, - - # If set, partial message deltas will be sent. - stream=False, -) - -# Print the completion returned by the LLM. -print(chat_completion.choices[0].message.content) diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py deleted file mode 100644 index 99b8fc4..0000000 --- a/examples/chat_completion_async.py +++ /dev/null @@ -1,62 +0,0 @@ -import asyncio - -from groq import AsyncGroq - - -async def main(): - client = AsyncGroq() - - chat_completion = await client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Explain the importance of low latency LLMs", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become - # deterministic and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - top_p=1, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - stop=None, - - # If set, partial message deltas will be sent. - stream=False, - ) - - # Print the completion returned by the LLM. - print(chat_completion.choices[0].message.content) - - -asyncio.run(main()) diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py deleted file mode 100644 index 39105f9..0000000 --- a/examples/chat_completion_async_streaming.py +++ /dev/null @@ -1,60 +0,0 @@ -import asyncio - -from groq import AsyncGroq - - -async def main(): - client = AsyncGroq() - - stream = await client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Explain the importance of low latency LLMs", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become - # deterministic and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - stop=None, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - stream=True, - ) - - # Print the incremental deltas returned by the LLM. - async for chunk in stream: - print(chunk.choices[0].delta.content, end="") - - -asyncio.run(main()) diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py deleted file mode 100644 index 4abb63b..0000000 --- a/examples/chat_completion_stop.py +++ /dev/null @@ -1,58 +0,0 @@ -from groq import Groq - -client = Groq() - -chat_completion = client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Count to 10. Your response must begin with \"1, \". example: 1, 2, 3, ...", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become deterministic - # and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - top_p=1, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - # For this example, we will use ", 6" so that the llm stops counting at 5. - # If multiple stop values are needed, an array of string may be passed, - # stop=[", 6", ", six", ", Six"] - stop=", 6", - - # If set, partial message deltas will be sent. - stream=False, -) - -# Print the completion returned by the LLM. -print(chat_completion.choices[0].message.content) diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py deleted file mode 100644 index 78d2607..0000000 --- a/examples/chat_completion_streaming.py +++ /dev/null @@ -1,56 +0,0 @@ -from groq import Groq - -client = Groq() - -stream = client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Explain the importance of low latency LLMs", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become deterministic - # and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - top_p=1, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - stop=None, - - # If set, partial message deltas will be sent. - stream=True, -) - -# Print the incremental deltas returned by the LLM. -for chunk in stream: - print(chunk.choices[0].delta.content, end="") diff --git a/pyproject.toml b/pyproject.toml index 5eb419f..f3c156b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "groq" -version = "0.4.0" +version = "0.1.0" description = "The official Python library for the groq API" readme = "README.md" license = "Apache-2.0" diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py index 2769874..ac0ea8a 100644 --- a/src/groq/_streaming.py +++ b/src/groq/_streaming.py @@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]: iterator = self._iter_events() for sse in iterator: - if sse.data.startswith("[DONE]"): - break yield process_data(data=sse.json(), cast_to=cast_to, response=response) # Ensure the entire stream is consumed @@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]: async def _iter_events(self) -> AsyncIterator[ServerSentEvent]: async for sse in self._decoder.aiter(self.response.aiter_lines()): - if sse.data.startswith("[DONE]"): - break yield sse async def __stream__(self) -> AsyncIterator[_T]: diff --git a/src/groq/_version.py b/src/groq/_version.py index 1612b72..0282ec9 100644 --- a/src/groq/_version.py +++ b/src/groq/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. __title__ = "groq" -__version__ = "0.4.0" # x-release-please-version +__version__ = "0.1.0" # x-release-please-version diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py index 67b7ab8..9f332f5 100644 --- a/src/groq/resources/chat/completions.py +++ b/src/groq/resources/chat/completions.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Dict, List, Union, Literal, Iterable, Optional, overload +from typing import Dict, List, Union, Iterable, Optional import httpx @@ -16,12 +16,10 @@ async_to_raw_response_wrapper, async_to_streamed_response_wrapper, ) -from ..._streaming import Stream, AsyncStream from ...types.chat import ChatCompletion, completion_create_params from ..._base_client import ( make_request_options, ) -from ...lib.chat_completion_chunk import ChatCompletionChunk __all__ = ["Completions", "AsyncCompletions"] @@ -35,7 +33,6 @@ def with_raw_response(self) -> CompletionsWithRawResponse: def with_streaming_response(self) -> CompletionsWithStreamingResponse: return CompletionsWithStreamingResponse(self) - @overload def create( self, *, @@ -50,7 +47,7 @@ def create( response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream: bool | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -64,98 +61,6 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - ... - - @overload - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Literal[True], - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Stream[ChatCompletionChunk]: - ... - - @overload - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | Stream[ChatCompletionChunk]: - ... - - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | Stream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -200,8 +105,6 @@ def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, - stream=stream or False, - stream_cls=Stream[ChatCompletionChunk], ) @@ -214,7 +117,6 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse: def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: return AsyncCompletionsWithStreamingResponse(self) - @overload async def create( self, *, @@ -229,7 +131,7 @@ async def create( response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream: bool | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -243,98 +145,6 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - ... - - @overload - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Literal[True], - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> AsyncStream[ChatCompletionChunk]: - ... - - @overload - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: - ... - - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -379,8 +189,6 @@ async def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, - stream=stream or False, - stream_cls=AsyncStream[ChatCompletionChunk], ) diff --git a/src/groqcloud/lib/.keep b/src/groqcloud/lib/.keep new file mode 100644 index 0000000..5e2c99f --- /dev/null +++ b/src/groqcloud/lib/.keep @@ -0,0 +1,4 @@ +File generated from our OpenAPI spec by Stainless. + +This directory can be used to store custom files to expand the SDK. +It is ignored by Stainless code generation and its content (other than this keep file) won't be touched. \ No newline at end of file diff --git a/src/groq/lib/chat_completion_chunk.py b/src/groqcloud/lib/chat_completion_chunk.py similarity index 97% rename from src/groq/lib/chat_completion_chunk.py rename to src/groqcloud/lib/chat_completion_chunk.py index e4e3533..2aabbcf 100644 --- a/src/groq/lib/chat_completion_chunk.py +++ b/src/groqcloud/lib/chat_completion_chunk.py @@ -16,6 +16,8 @@ "ChoiceDeltaToolCall", "ChoiceDeltaToolCallFunction", ] + + class ChoiceDeltaFunctionCall(BaseModel): arguments: Optional[str] = None """ @@ -28,6 +30,7 @@ class ChoiceDeltaFunctionCall(BaseModel): name: Optional[str] = None """The name of the function to call.""" + class ChoiceLogprobsContentTopLogprob(BaseModel): token: Optional[str] = None @@ -97,4 +100,4 @@ class ChatCompletionChunk(BaseModel): object: Optional[str] = None - system_fingerprint: Optional[str] = None \ No newline at end of file + system_fingerprint: Optional[str] = None