CentML · c-fteixeira · May 14, 2026 · May 14, 2026
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ No description provided (generated by Openapi Generator https://github.com/opena
 This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project:
 
 - API version: 0.1.0
-- Package version: 4.9.4
+- Package version: 4.9.5
 - Generator version: 7.9.0
 - Build package: org.openapitools.codegen.languages.PythonClientCodegen
 

diff --git a/docs/CreateCServeV3DeploymentRequest.md b/docs/CreateCServeV3DeploymentRequest.md
@@ -24,6 +24,7 @@ Name | Type | Description | Notes
 **env_vars** | **Dict[str, str]** |  | [optional] 
 **enable_logging** | **bool** |  | [optional] [default to True]
 **enable_node_model_cache** | **bool** |  | [optional] [default to False]
+**session_affinity** | **bool** | Enable best-effort sticky routing via the &#x60;X-Session-Id&#x60; request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state. | [optional] [default to False]
 
 ## Example
 

diff --git a/docs/CreateInferenceV3DeploymentRequest.md b/docs/CreateInferenceV3DeploymentRequest.md
@@ -27,6 +27,7 @@ Name | Type | Description | Notes
 **hf_token** | **str** |  | [optional] 
 **backend_protocol** | [**BackendProtocol**](BackendProtocol.md) |  | [optional] 
 **enable_logging** | **bool** |  | [optional] [default to False]
+**session_affinity** | **bool** | Enable best-effort sticky routing via the &#x60;X-Session-Id&#x60; request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state. | [optional] [default to False]
 
 ## Example
 

diff --git a/docs/DeploymentResponse.md b/docs/DeploymentResponse.md
@@ -29,6 +29,7 @@ Name | Type | Description | Notes
 **env_vars** | **Dict[str, str]** |  | [optional] 
 **enable_logging** | **bool** |  | [optional] [default to True]
 **enable_node_model_cache** | **bool** |  | [optional] [default to False]
+**session_affinity** | **bool** | Enable best-effort sticky routing via the &#x60;X-Session-Id&#x60; request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state. | [optional] [default to False]
 **container_port** | **int** |  | 
 **healthcheck** | **str** |  | [optional] 
 **command** | **List[str]** |  | [optional] 

diff --git a/docs/GetCServeV3DeploymentResponse.md b/docs/GetCServeV3DeploymentResponse.md
@@ -29,6 +29,7 @@ Name | Type | Description | Notes
 **env_vars** | **Dict[str, str]** |  | [optional] 
 **enable_logging** | **bool** |  | [optional] [default to True]
 **enable_node_model_cache** | **bool** |  | [optional] [default to False]
+**session_affinity** | **bool** | Enable best-effort sticky routing via the &#x60;X-Session-Id&#x60; request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state. | [optional] [default to False]
 
 ## Example
 

diff --git a/docs/GetInferenceV3DeploymentResponse.md b/docs/GetInferenceV3DeploymentResponse.md
@@ -33,6 +33,7 @@ Name | Type | Description | Notes
 **image_pull_secret_credentials** | [**ImagePullSecretCredentials**](ImagePullSecretCredentials.md) |  | [optional] 
 **backend_protocol** | [**BackendProtocol**](BackendProtocol.md) |  | [optional] 
 **enable_logging** | **bool** |  | [optional] [default to True]
+**session_affinity** | **bool** | Enable best-effort sticky routing via the &#x60;X-Session-Id&#x60; request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state. | [optional] [default to False]
 
 ## Example
 

diff --git a/platform_api_python_client/__init__.py b/platform_api_python_client/__init__.py
@@ -14,7 +14,7 @@
 """  # noqa: E501
 
 
-__version__ = "4.9.4"
+__version__ = "4.9.5"
 
 # import apis into sdk package
 from platform_api_python_client.api.external_api import EXTERNALApi

diff --git a/platform_api_python_client/api_client.py b/platform_api_python_client/api_client.py
@@ -90,7 +90,7 @@ def __init__(
             self.default_headers[header_name] = header_value
         self.cookie = cookie
         # Set default User-Agent.
-        self.user_agent = 'OpenAPI-Generator/4.9.4/python'
+        self.user_agent = 'OpenAPI-Generator/4.9.5/python'
         self.client_side_validation = configuration.client_side_validation
 
     def __enter__(self):

diff --git a/platform_api_python_client/configuration.py b/platform_api_python_client/configuration.py
@@ -392,7 +392,7 @@ def to_debug_report(self):
                "OS: {env}\n"\
                "Python Version: {pyversion}\n"\
                "Version of the API: 0.1.0\n"\
-               "SDK Package Version: 4.9.4".\
+               "SDK Package Version: 4.9.5".\
                format(env=sys.platform, pyversion=sys.version)
 
     def get_host_settings(self):

diff --git a/platform_api_python_client/models/create_c_serve_v3_deployment_request.py b/platform_api_python_client/models/create_c_serve_v3_deployment_request.py
@@ -47,7 +47,8 @@ class CreateCServeV3DeploymentRequest(BaseModel):
     env_vars: Optional[Dict[str, StrictStr]] = None
     enable_logging: Optional[StrictBool] = True
     enable_node_model_cache: Optional[StrictBool] = False
-    __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "recipe", "cserve_version", "hf_token", "endpoint_bearer_token", "endpoint_certificate_authority", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache"]
+    session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
+    __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "recipe", "cserve_version", "hf_token", "endpoint_bearer_token", "endpoint_certificate_authority", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache", "session_affinity"]
 
     @field_validator('name')
     def name_validate_regular_expression(cls, value):
@@ -178,7 +179,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "cooldown_period": obj.get("cooldown_period"),
             "env_vars": obj.get("env_vars"),
             "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
-            "enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False
+            "enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False,
+            "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
         })
         return _obj
 

diff --git a/platform_api_python_client/models/create_inference_v3_deployment_request.py b/platform_api_python_client/models/create_inference_v3_deployment_request.py
@@ -51,7 +51,8 @@ class CreateInferenceV3DeploymentRequest(BaseModel):
     hf_token: Optional[StrictStr] = None
     backend_protocol: Optional[BackendProtocol] = None
     enable_logging: Optional[StrictBool] = False
-    __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "env_vars", "command", "endpoint_bearer_token", "endpoint_certificate_authority", "hf_token", "backend_protocol", "enable_logging"]
+    session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
+    __properties: ClassVar[List[str]] = ["max_surge", "max_unavailable", "name", "cluster_id", "hardware_instance_id", "user_annotations", "image_url", "image_pull_secret_credentials", "port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "env_vars", "command", "endpoint_bearer_token", "endpoint_certificate_authority", "hf_token", "backend_protocol", "enable_logging", "session_affinity"]
 
     @field_validator('name')
     def name_validate_regular_expression(cls, value):
@@ -200,7 +201,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "endpoint_certificate_authority": obj.get("endpoint_certificate_authority"),
             "hf_token": obj.get("hf_token"),
             "backend_protocol": obj.get("backend_protocol"),
-            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else False
+            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else False,
+            "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
         })
         return _obj
 

diff --git a/platform_api_python_client/models/get_c_serve_v3_deployment_response.py b/platform_api_python_client/models/get_c_serve_v3_deployment_response.py
@@ -18,7 +18,7 @@
 import json
 
 from datetime import datetime
-from pydantic import BaseModel, ConfigDict, StrictBool, StrictInt, StrictStr
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
 from typing import Any, ClassVar, Dict, List, Optional
 from platform_api_python_client.models.c_serve_v2_recipe import CServeV2Recipe
 from platform_api_python_client.models.deployment_status import DeploymentStatus
@@ -54,7 +54,8 @@ class GetCServeV3DeploymentResponse(BaseModel):
     env_vars: Optional[Dict[str, StrictStr]] = None
     enable_logging: Optional[StrictBool] = True
     enable_node_model_cache: Optional[StrictBool] = False
-    __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "recipe", "cserve_version", "min_replicas", "max_replicas", "initial_replicas", "endpoint_certificate_authority", "endpoint_bearer_token", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache"]
+    session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
+    __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "recipe", "cserve_version", "min_replicas", "max_replicas", "initial_replicas", "endpoint_certificate_authority", "endpoint_bearer_token", "concurrency", "cooldown_period", "env_vars", "enable_logging", "enable_node_model_cache", "session_affinity"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -168,7 +169,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "cooldown_period": obj.get("cooldown_period") if obj.get("cooldown_period") is not None else 1800,
             "env_vars": obj.get("env_vars"),
             "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
-            "enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False
+            "enable_node_model_cache": obj.get("enable_node_model_cache") if obj.get("enable_node_model_cache") is not None else False,
+            "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
         })
         return _obj
 

diff --git a/platform_api_python_client/models/get_inference_v3_deployment_response.py b/platform_api_python_client/models/get_inference_v3_deployment_response.py
@@ -18,7 +18,7 @@
 import json
 
 from datetime import datetime
-from pydantic import BaseModel, ConfigDict, StrictBool, StrictInt, StrictStr
+from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictInt, StrictStr
 from typing import Any, ClassVar, Dict, List, Optional
 from platform_api_python_client.models.backend_protocol import BackendProtocol
 from platform_api_python_client.models.deployment_status import DeploymentStatus
@@ -59,7 +59,8 @@ class GetInferenceV3DeploymentResponse(BaseModel):
     image_pull_secret_credentials: Optional[ImagePullSecretCredentials] = None
     backend_protocol: Optional[BackendProtocol] = None
     enable_logging: Optional[StrictBool] = True
-    __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "container_port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "endpoint_certificate_authority", "endpoint_bearer_token", "env_vars", "command", "command_args", "original_command", "image_pull_secret_credentials", "backend_protocol", "enable_logging"]
+    session_affinity: Optional[StrictBool] = Field(default=False, description="Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state.")
+    __properties: ClassVar[List[str]] = ["creator_email", "cluster_id", "id", "name", "endpoint_url", "image_url", "type", "status", "created_at", "hardware_instance_id", "revision_number", "user_annotations", "container_port", "min_replicas", "max_replicas", "initial_replicas", "concurrency", "cooldown_period", "healthcheck", "endpoint_certificate_authority", "endpoint_bearer_token", "env_vars", "command", "command_args", "original_command", "image_pull_secret_credentials", "backend_protocol", "enable_logging", "session_affinity"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -202,7 +203,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "original_command": obj.get("original_command"),
             "image_pull_secret_credentials": ImagePullSecretCredentials.from_dict(obj["image_pull_secret_credentials"]) if obj.get("image_pull_secret_credentials") is not None else None,
             "backend_protocol": obj.get("backend_protocol"),
-            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True
+            "enable_logging": obj.get("enable_logging") if obj.get("enable_logging") is not None else True,
+            "session_affinity": obj.get("session_affinity") if obj.get("session_affinity") is not None else False
         })
         return _obj
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "platform_api_python_client"
-version = "4.9.4"
+version = "4.9.5"
 description = "Platform External API"
 authors = ["OpenAPI Generator Community <team@openapitools.org>"]
 license = "NoLicense"

diff --git a/setup.py b/setup.py
@@ -21,7 +21,7 @@
 # prerequisite: setuptools
 # http://pypi.python.org/pypi/setuptools
 NAME = "platform-api-python-client"
-VERSION = "4.9.4"
+VERSION = "4.9.5"
 PYTHON_REQUIRES = ">= 3.8"
 REQUIRES = [
     "urllib3 >= 1.25.3, < 3.0.0",

diff --git a/test/test_create_c_serve_v3_deployment_request.py b/test/test_create_c_serve_v3_deployment_request.py
@@ -57,7 +57,8 @@ def make_instance(self, include_optional) -> CreateCServeV3DeploymentRequest:
                     'key' : ''
                     },
                 enable_logging = True,
-                enable_node_model_cache = True
+                enable_node_model_cache = True,
+                session_affinity = True
             )
         else:
             return CreateCServeV3DeploymentRequest(

diff --git a/test/test_create_inference_v3_deployment_request.py b/test/test_create_inference_v3_deployment_request.py
@@ -62,7 +62,8 @@ def make_instance(self, include_optional) -> CreateInferenceV3DeploymentRequest:
                 endpoint_certificate_authority = '',
                 hf_token = '',
                 backend_protocol = 'HTTP',
-                enable_logging = True
+                enable_logging = True,
+                session_affinity = True
             )
         else:
             return CreateInferenceV3DeploymentRequest(

diff --git a/test/test_deployment_response.py b/test/test_deployment_response.py
@@ -63,6 +63,7 @@ def make_instance(self, include_optional) -> DeploymentResponse:
                     },
                 enable_logging = True,
                 enable_node_model_cache = True,
+                session_affinity = True,
                 container_port = 56,
                 healthcheck = '',
                 command = [

diff --git a/test/test_get_c_serve_v3_deployment_response.py b/test/test_get_c_serve_v3_deployment_response.py
@@ -62,7 +62,8 @@ def make_instance(self, include_optional) -> GetCServeV3DeploymentResponse:
                     'key' : ''
                     },
                 enable_logging = True,
-                enable_node_model_cache = True
+                enable_node_model_cache = True,
+                session_affinity = True
             )
         else:
             return GetCServeV3DeploymentResponse(

diff --git a/test/test_get_inference_v3_deployment_response.py b/test/test_get_inference_v3_deployment_response.py
@@ -72,7 +72,8 @@ def make_instance(self, include_optional) -> GetInferenceV3DeploymentResponse:
                     username = '', 
                     password = '', ),
                 backend_protocol = 'HTTP',
-                enable_logging = True
+                enable_logging = True,
+                session_affinity = True
             )
         else:
             return GetInferenceV3DeploymentResponse(