Skip to content

Latest commit

 

History

History
52 lines (43 loc) · 2.69 KB

File metadata and controls

52 lines (43 loc) · 2.69 KB

CreateInferenceV3DeploymentRequest

Properties

Name Type Description Notes
max_surge int [optional]
max_unavailable int [optional]
name str
cluster_id int
hardware_instance_id int
user_annotations Dict[str, str] [optional]
image_url str
image_pull_secret_credentials ImagePullSecretCredentials [optional]
port int
min_replicas int
max_replicas int
initial_replicas int [optional]
concurrency int [optional]
cooldown_period int [optional]
healthcheck str [optional]
env_vars Dict[str, str] [optional]
command str [optional]
endpoint_bearer_token str [optional]
endpoint_certificate_authority str [optional]
hf_token str [optional]
backend_protocol BackendProtocol [optional]
enable_logging bool [optional] [default to False]
session_affinity bool Enable best-effort sticky routing via the `X-Session-Id` request header. Requests carrying the same header value land on the same pod, improving KV cache reuse for agentic workloads. Requests without the header are routed at random. Affinity is NOT durable: scaling, rollouts, restarts, or readiness-probe transitions will remap sessions to different pods. Do not use for irreplaceable in-pod state. [optional] [default to False]
config_file ConfigFileMount [optional]

Example

from platform_api_python_client.models.create_inference_v3_deployment_request import CreateInferenceV3DeploymentRequest

# TODO update the JSON string below
json = "{}"
# create an instance of CreateInferenceV3DeploymentRequest from a JSON string
create_inference_v3_deployment_request_instance = CreateInferenceV3DeploymentRequest.from_json(json)
# print the JSON string representation of the object
print(CreateInferenceV3DeploymentRequest.to_json())

# convert the object into a dict
create_inference_v3_deployment_request_dict = create_inference_v3_deployment_request_instance.to_dict()
# create an instance of CreateInferenceV3DeploymentRequest from a dict
create_inference_v3_deployment_request_from_dict = CreateInferenceV3DeploymentRequest.from_dict(create_inference_v3_deployment_request_dict)

[Back to Model list] [Back to API list] [Back to README]