togethercomputer · stainless-app · Dec 4, 2025 · Dec 4, 2025 · Dec 5, 2025 · Dec 8, 2025
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "2.0.0-alpha.9"
+  ".": "2.0.0-alpha.10"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 43
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-1c6ce663fedf553574b97bea0a29eead80b3c8e508f5a8ef2a3e7694ad59c23c.yml
-openapi_spec_hash: 02565e3bb15204f0b97e7d1a47557353
-config_hash: 87a5832ab2ecefe567d22108531232f5
+configured_endpoints: 44
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-291c169d09f5ccc52f1e20f6f239db136003f4735ebd82f14f10cfdf96bb88fd.yml
+openapi_spec_hash: 241fba23e79ab8bcfb06c7781c01aa27
+config_hash: 9749f2f8998aa6b15452b2187ff675b9
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,28 @@
 # Changelog
 
+## 2.0.0-alpha.10 (2025-12-11)
+
+Full Changelog: [v2.0.0-alpha.9...v2.0.0-alpha.10](https://github.com/togethercomputer/together-py/compare/v2.0.0-alpha.9...v2.0.0-alpha.10)
+
+### Features
+
+* **api:** Add fine_tuning.estimate_price api ([1582cc4](https://github.com/togethercomputer/together-py/commit/1582cc498e17562a3a23ae5120dfff2d39ae1e41))
+* **api:** api update ([5341347](https://github.com/togethercomputer/together-py/commit/53413475daeeec382968407d47688cf7926f643c))
+* **api:** api update ([96fc9b3](https://github.com/togethercomputer/together-py/commit/96fc9b3b1218bcf0c8dd13a28b8eab5c9690c6fd))
+* **api:** api update ([e5cfa45](https://github.com/togethercomputer/together-py/commit/e5cfa45f476c77965a9249e9ae41b55b029abfaa))
+
+
+### Bug Fixes
+
+* **types:** allow pyright to infer TypedDict types within SequenceNotStr ([048f2b7](https://github.com/togethercomputer/together-py/commit/048f2b7d347aa2ab09a4b49c2770cbf15a70c3e4))
+
+
+### Chores
+
+* add missing docstrings ([a1c8329](https://github.com/togethercomputer/together-py/commit/a1c8329a0c2562bcdbd22c262eb7a995bfbd0deb))
+* **internal:** avoid using unstable Python versions in tests ([6268112](https://github.com/togethercomputer/together-py/commit/62681124a807a4f718e1711039242d2b9037e33b))
+* Update model list CLI to use api parameter for dedicated filtering ([#195](https://github.com/togethercomputer/together-py/issues/195)) ([95cc672](https://github.com/togethercomputer/together-py/commit/95cc672583e2a908f54dd557cd0f22465da26a4b))
+
 ## 2.0.0-alpha.9 (2025-12-03)
 
 Full Changelog: [v2.0.0-alpha.8...v2.0.0-alpha.9](https://github.com/togethercomputer/together-py/compare/v2.0.0-alpha.8...v2.0.0-alpha.9)

diff --git a/README.md b/README.md
@@ -186,17 +186,15 @@ from together import Together
 
 client = Together()
 
-chat_completion = client.chat.completions.create(
-    messages=[
-        {
-            "content": "content",
-            "role": "system",
-        }
-    ],
-    model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-    response_format={},
+dedicated_endpoint = client.endpoints.create(
+    autoscaling={
+        "max_replicas": 5,
+        "min_replicas": 2,
+    },
+    hardware="1x_nvidia_a100_80gb_sxm",
+    model="meta-llama/Llama-3-8b-chat-hf",
 )
-print(chat_completion.response_format)
+print(dedicated_endpoint.autoscaling)
 ```
 
 The async client uses the exact same interface. If you pass a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance, the file contents will be read asynchronously automatically.

diff --git a/api.md b/api.md
@@ -78,6 +78,7 @@ from together.types import (
     FineTuningListResponse,
     FineTuningDeleteResponse,
     FineTuningCancelResponse,
+    FineTuningEstimatePriceResponse,
     FineTuningListCheckpointsResponse,
     FineTuningListEventsResponse,
 )
@@ -90,6 +91,7 @@ Methods:
 - <code title="delete /fine-tunes/{id}">client.fine_tuning.<a href="./src/together/resources/fine_tuning.py">delete</a>(id, \*\*<a href="src/together/types/fine_tuning_delete_params.py">params</a>) -> <a href="./src/together/types/fine_tuning_delete_response.py">FineTuningDeleteResponse</a></code>
 - <code title="post /fine-tunes/{id}/cancel">client.fine_tuning.<a href="./src/together/resources/fine_tuning.py">cancel</a>(id) -> <a href="./src/together/types/fine_tuning_cancel_response.py">FineTuningCancelResponse</a></code>
 - <code title="get /finetune/download">client.fine_tuning.<a href="./src/together/resources/fine_tuning.py">content</a>(\*\*<a href="src/together/types/fine_tuning_content_params.py">params</a>) -> BinaryAPIResponse</code>
+- <code title="post /fine-tunes/estimate-price">client.fine_tuning.<a href="./src/together/resources/fine_tuning.py">estimate_price</a>(\*\*<a href="src/together/types/fine_tuning_estimate_price_params.py">params</a>) -> <a href="./src/together/types/fine_tuning_estimate_price_response.py">FineTuningEstimatePriceResponse</a></code>
 - <code title="get /fine-tunes/{id}/checkpoints">client.fine_tuning.<a href="./src/together/resources/fine_tuning.py">list_checkpoints</a>(id) -> <a href="./src/together/types/fine_tuning_list_checkpoints_response.py">FineTuningListCheckpointsResponse</a></code>
 - <code title="get /fine-tunes/{id}/events">client.fine_tuning.<a href="./src/together/resources/fine_tuning.py">list_events</a>(id) -> <a href="./src/together/types/fine_tuning_list_events_response.py">FineTuningListEventsResponse</a></code>
 
@@ -202,7 +204,7 @@ from together.types import ModelObject, ModelListResponse, ModelUploadResponse
 
 Methods:
 
-- <code title="get /models">client.models.<a href="./src/together/resources/models.py">list</a>() -> <a href="./src/together/types/model_list_response.py">ModelListResponse</a></code>
+- <code title="get /models">client.models.<a href="./src/together/resources/models.py">list</a>(\*\*<a href="src/together/types/model_list_params.py">params</a>) -> <a href="./src/together/types/model_list_response.py">ModelListResponse</a></code>
 - <code title="post /models">client.models.<a href="./src/together/resources/models.py">upload</a>(\*\*<a href="src/together/types/model_upload_params.py">params</a>) -> <a href="./src/together/types/model_upload_response.py">ModelUploadResponse</a></code>
 
 # Jobs

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "together"
-version = "2.0.0-alpha.9"
+version = "2.0.0-alpha.10"
 description = "The official Python library for the together API"
 dynamic = ["readme"]
 license = "Apache-2.0"

diff --git a/scripts/test b/scripts/test
@@ -54,12 +54,17 @@ fi
 
 export DEFER_PYDANTIC_BUILD=false
 
+# Note that we need to specify the patch version here so that uv
+# won't use unstable (alpha, beta, rc) releases for the tests
+PY_VERSION_MIN=">=3.9.0"
+PY_VERSION_MAX=">=3.14.0"
+
 function run_tests() {
     echo "==> Running tests with Pydantic v2"
     uv run --isolated --all-extras pytest "$@"
 
-    # Pydantic v1 does not support Python 3.14, skip these tests
-    if [[ "$UV_PYTHON" != "3.14" ]]; then
+    # Skip Pydantic v1 tests on latest Python (not supported)
+    if [[ "$UV_PYTHON" != "$PY_VERSION_MAX" ]]; then
         echo "==> Running tests with Pydantic v1"
         uv run --isolated --all-extras --group=pydantic-v1 pytest "$@"
     fi
@@ -71,9 +76,9 @@ if [[ -n "$UV_PYTHON" ]]; then
 else
   # If UV_PYTHON is not set, run the command for min and max versions
 
-  echo "==> Running tests for Python 3.9"
-  UV_PYTHON=3.9 run_tests "$@"
+  echo "==> Running tests for Python $PY_VERSION_MIN"
+  UV_PYTHON="$PY_VERSION_MIN" run_tests "$@"
 
-  echo "==> Running tests for Python 3.14"
-  UV_PYTHON=3.14 run_tests "$@"
+  echo "==> Running tests for Python $PY_VERSION_MAX"
+  UV_PYTHON="$PY_VERSION_MAX" run_tests "$@"
 fi
diff --git a/src/together/_types.py b/src/together/_types.py
@@ -243,6 +243,9 @@ class HttpxSendArgs(TypedDict, total=False):
 if TYPE_CHECKING:
     # This works because str.__contains__ does not accept object (either in typeshed or at runtime)
     # https://github.com/hauntsaninja/useful_types/blob/5e9710f3875107d068e7679fd7fec9cfab0eff3b/useful_types/__init__.py#L285
+    #
+    # Note: index() and count() methods are intentionally omitted to allow pyright to properly
+    # infer TypedDict types when dict literals are used in lists assigned to SequenceNotStr.
     class SequenceNotStr(Protocol[_T_co]):
         @overload
         def __getitem__(self, index: SupportsIndex, /) -> _T_co: ...
@@ -251,8 +254,6 @@ def __getitem__(self, index: slice, /) -> Sequence[_T_co]: ...
         def __contains__(self, value: object, /) -> bool: ...
         def __len__(self) -> int: ...
         def __iter__(self) -> Iterator[_T_co]: ...
-        def index(self, value: Any, start: int = 0, stop: int = ..., /) -> int: ...
-        def count(self, value: Any, /) -> int: ...
         def __reversed__(self) -> Iterator[_T_co]: ...
 else:
     # just point this to a normal `Sequence` at runtime to avoid having to special case

diff --git a/src/together/_version.py b/src/together/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "together"
-__version__ = "2.0.0-alpha.9"  # x-release-please-version
+__version__ = "2.0.0-alpha.10"  # x-release-please-version
diff --git a/src/together/lib/cli/api/models.py b/src/together/lib/cli/api/models.py
@@ -7,7 +7,6 @@
 from together import Together, omit
 from together._models import BaseModel
 from together._response import APIResponse as APIResponse
-from together.lib.resources.models import filter_by_dedicated_models
 from together.types.model_upload_response import ModelUploadResponse
 
 
@@ -34,11 +33,7 @@ def list(ctx: click.Context, type: Optional[str], json: bool) -> None:
     """List models"""
     client: Together = ctx.obj
 
-    response = client.models.list()
-    models_list = response
-
-    if type == "dedicated":
-        models_list = filter_by_dedicated_models(client, models_list)
+    models_list = client.models.list(dedicated=type == "dedicated" if type else omit)
 
     display_list: List[Dict[str, Any]] = []
     model: BaseModel

diff --git a/src/together/lib/resources/models.py b/src/together/lib/resources/models.py
diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py
@@ -136,6 +136,14 @@ def create(
 
           response_format: An object specifying the format that the model must output.
 
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the [Structured Outputs guide](https://docs.together.ai/docs/json-mode).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
           safety_model: The name of the moderation model used to validate tokens. Choose from the
               available moderation models found
               [here](https://docs.together.ai/docs/inference-models#moderation-models).
@@ -277,6 +285,14 @@ def create(
 
           response_format: An object specifying the format that the model must output.
 
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the [Structured Outputs guide](https://docs.together.ai/docs/json-mode).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
           safety_model: The name of the moderation model used to validate tokens. Choose from the
               available moderation models found
               [here](https://docs.together.ai/docs/inference-models#moderation-models).
@@ -414,6 +430,14 @@ def create(
 
           response_format: An object specifying the format that the model must output.
 
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the [Structured Outputs guide](https://docs.together.ai/docs/json-mode).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
           safety_model: The name of the moderation model used to validate tokens. Choose from the
               available moderation models found
               [here](https://docs.together.ai/docs/inference-models#moderation-models).
@@ -653,6 +677,14 @@ async def create(
 
           response_format: An object specifying the format that the model must output.
 
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the [Structured Outputs guide](https://docs.together.ai/docs/json-mode).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
           safety_model: The name of the moderation model used to validate tokens. Choose from the
               available moderation models found
               [here](https://docs.together.ai/docs/inference-models#moderation-models).
@@ -794,6 +826,14 @@ async def create(
 
           response_format: An object specifying the format that the model must output.
 
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the [Structured Outputs guide](https://docs.together.ai/docs/json-mode).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
           safety_model: The name of the moderation model used to validate tokens. Choose from the
               available moderation models found
               [here](https://docs.together.ai/docs/inference-models#moderation-models).
@@ -931,6 +971,14 @@ async def create(
 
           response_format: An object specifying the format that the model must output.
 
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the [Structured Outputs guide](https://docs.together.ai/docs/json-mode).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
           safety_model: The name of the moderation model used to validate tokens. Choose from the
               available moderation models found
               [here](https://docs.together.ai/docs/inference-models#moderation-models).