diff --git a/src/graphn/_generated/api/custom_models/get_supported_architectures.py b/src/graphn/_generated/api/custom_models/get_supported_architectures.py
new file mode 100644
index 0000000..3d05d62
--- /dev/null
+++ b/src/graphn/_generated/api/custom_models/get_supported_architectures.py
@@ -0,0 +1,202 @@
+from http import HTTPStatus
+from typing import Any
+from urllib.parse import quote
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.error import Error
+from ...models.supported_architectures import SupportedArchitectures
+from ...types import Response
+
+
+def _get_kwargs(
+    workspace_id: str,
+) -> dict[str, Any]:
+
+    _kwargs: dict[str, Any] = {
+        "method": "get",
+        "url": "/v1/{workspace_id}/custom-models/supported-architectures".format(
+            workspace_id=quote(str(workspace_id), safe=""),
+        ),
+    }
+
+    return _kwargs
+
+
+def _parse_response(
+    *, client: AuthenticatedClient | Client, response: httpx.Response
+) -> Error | SupportedArchitectures | None:
+    if response.status_code == 200:
+        response_200 = SupportedArchitectures.from_dict(response.json())
+
+        return response_200
+
+    if response.status_code == 401:
+        response_401 = Error.from_dict(response.json())
+
+        return response_401
+
+    if response.status_code == 403:
+        response_403 = Error.from_dict(response.json())
+
+        return response_403
+
+    if client.raise_on_unexpected_status:
+        raise errors.UnexpectedStatus(response.status_code, response.content)
+    else:
+        return None
+
+
+def _build_response(
+    *, client: AuthenticatedClient | Client, response: httpx.Response
+) -> Response[Error | SupportedArchitectures]:
+    return Response(
+        status_code=HTTPStatus(response.status_code),
+        content=response.content,
+        headers=response.headers,
+        parsed=_parse_response(client=client, response=response),
+    )
+
+
+def sync_detailed(
+    workspace_id: str,
+    *,
+    client: AuthenticatedClient | Client,
+) -> Response[Error | SupportedArchitectures]:
+    """List model architectures supported for custom-model import
+
+     Returns the static list of HuggingFace model architectures the
+    platform's serving runtimes can deploy, alongside the capability
+    tags (`tool_calling`, `vision`, etc.) each architecture exposes.
+    Use this to drive the UI's architecture/capability filters before
+    calling `validateCustomModel`.
+
+    The list is updated alongside platform runtime upgrades; clients
+    should not cache it for more than a build cycle.
+
+    Args:
+        workspace_id (str):
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Response[Error | SupportedArchitectures]
+    """
+
+    kwargs = _get_kwargs(
+        workspace_id=workspace_id,
+    )
+
+    response = client.get_httpx_client().request(
+        **kwargs,
+    )
+
+    return _build_response(client=client, response=response)
+
+
+def sync(
+    workspace_id: str,
+    *,
+    client: AuthenticatedClient | Client,
+) -> Error | SupportedArchitectures | None:
+    """List model architectures supported for custom-model import
+
+     Returns the static list of HuggingFace model architectures the
+    platform's serving runtimes can deploy, alongside the capability
+    tags (`tool_calling`, `vision`, etc.) each architecture exposes.
+    Use this to drive the UI's architecture/capability filters before
+    calling `validateCustomModel`.
+
+    The list is updated alongside platform runtime upgrades; clients
+    should not cache it for more than a build cycle.
+
+    Args:
+        workspace_id (str):
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Error | SupportedArchitectures
+    """
+
+    return sync_detailed(
+        workspace_id=workspace_id,
+        client=client,
+    ).parsed
+
+
+async def asyncio_detailed(
+    workspace_id: str,
+    *,
+    client: AuthenticatedClient | Client,
+) -> Response[Error | SupportedArchitectures]:
+    """List model architectures supported for custom-model import
+
+     Returns the static list of HuggingFace model architectures the
+    platform's serving runtimes can deploy, alongside the capability
+    tags (`tool_calling`, `vision`, etc.) each architecture exposes.
+    Use this to drive the UI's architecture/capability filters before
+    calling `validateCustomModel`.
+
+    The list is updated alongside platform runtime upgrades; clients
+    should not cache it for more than a build cycle.
+
+    Args:
+        workspace_id (str):
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Response[Error | SupportedArchitectures]
+    """
+
+    kwargs = _get_kwargs(
+        workspace_id=workspace_id,
+    )
+
+    response = await client.get_async_httpx_client().request(**kwargs)
+
+    return _build_response(client=client, response=response)
+
+
+async def asyncio(
+    workspace_id: str,
+    *,
+    client: AuthenticatedClient | Client,
+) -> Error | SupportedArchitectures | None:
+    """List model architectures supported for custom-model import
+
+     Returns the static list of HuggingFace model architectures the
+    platform's serving runtimes can deploy, alongside the capability
+    tags (`tool_calling`, `vision`, etc.) each architecture exposes.
+    Use this to drive the UI's architecture/capability filters before
+    calling `validateCustomModel`.
+
+    The list is updated alongside platform runtime upgrades; clients
+    should not cache it for more than a build cycle.
+
+    Args:
+        workspace_id (str):
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Error | SupportedArchitectures
+    """
+
+    return (
+        await asyncio_detailed(
+            workspace_id=workspace_id,
+            client=client,
+        )
+    ).parsed
diff --git a/src/graphn/_generated/api/custom_models/update_custom_model.py b/src/graphn/_generated/api/custom_models/update_custom_model.py
new file mode 100644
index 0000000..d56a354
--- /dev/null
+++ b/src/graphn/_generated/api/custom_models/update_custom_model.py
@@ -0,0 +1,311 @@
+from http import HTTPStatus
+from typing import Any
+from urllib.parse import quote
+
+import httpx
+
+from ... import errors
+from ...client import AuthenticatedClient, Client
+from ...models.custom_model import CustomModel
+from ...models.custom_model_update import CustomModelUpdate
+from ...models.error import Error
+from ...types import Response
+
+
+def _get_kwargs(
+    workspace_id: str,
+    model_id: str,
+    *,
+    body: CustomModelUpdate,
+) -> dict[str, Any]:
+    headers: dict[str, Any] = {}
+
+    _kwargs: dict[str, Any] = {
+        "method": "patch",
+        "url": "/v1/{workspace_id}/custom-models/{model_id}".format(
+            workspace_id=quote(str(workspace_id), safe=""),
+            model_id=quote(str(model_id), safe=""),
+        ),
+    }
+
+    _kwargs["json"] = body.to_dict()
+
+    headers["Content-Type"] = "application/json"
+
+    _kwargs["headers"] = headers
+    return _kwargs
+
+
+def _parse_response(
+    *, client: AuthenticatedClient | Client, response: httpx.Response
+) -> CustomModel | Error | None:
+    if response.status_code == 200:
+        response_200 = CustomModel.from_dict(response.json())
+
+        return response_200
+
+    if response.status_code == 400:
+        response_400 = Error.from_dict(response.json())
+
+        return response_400
+
+    if response.status_code == 401:
+        response_401 = Error.from_dict(response.json())
+
+        return response_401
+
+    if response.status_code == 403:
+        response_403 = Error.from_dict(response.json())
+
+        return response_403
+
+    if response.status_code == 404:
+        response_404 = Error.from_dict(response.json())
+
+        return response_404
+
+    if client.raise_on_unexpected_status:
+        raise errors.UnexpectedStatus(response.status_code, response.content)
+    else:
+        return None
+
+
+def _build_response(
+    *, client: AuthenticatedClient | Client, response: httpx.Response
+) -> Response[CustomModel | Error]:
+    return Response(
+        status_code=HTTPStatus(response.status_code),
+        content=response.content,
+        headers=response.headers,
+        parsed=_parse_response(client=client, response=response),
+    )
+
+
+def sync_detailed(
+    workspace_id: str,
+    model_id: str,
+    *,
+    client: AuthenticatedClient | Client,
+    body: CustomModelUpdate,
+) -> Response[CustomModel | Error]:
+    """Update mutable fields on a custom model
+
+     Update the model record (cp-side) and propagate the change to the
+    live deployment when applicable. Only a small set of fields are
+    mutable post-create; immutable fields (`huggingface_model_id`,
+    `weight_source`, GPU topology, …) are intentionally not exposed
+    here — change them by deleting and re-creating the model.
+
+    Today the only fields that take effect immediately on the live
+    deployment are `min_replicas` and `max_replicas`: cp updates the
+    DynamoDB record, then issues an AF-side in-place patch of the
+    underlying InferenceService's predictor scale bounds (no rolling
+    restart, no downtime). KServe propagates `max_replicas` through
+    to the KEDA `ScaledObject`'s `maxReplicaCount` on its next
+    reconcile.
+
+    Other fields update the model record only and take effect on the
+    next deployment.
+
+    Args:
+        workspace_id (str):
+        model_id (str):
+        body (CustomModelUpdate): Partial-update payload for `PATCH /v1/{workspaceId}/custom-
+            models/{modelId}`.
+            All fields are independently optional; omitted fields are left
+            unchanged. At least one field MUST be supplied.
+
+            Only a small, vetted set of fields are mutable post-create.
+            Immutable fields (`huggingface_model_id`, `weight_source`,
+            GPU topology, …) are not exposed here — change them by
+            deleting and re-creating the model.
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Response[CustomModel | Error]
+    """
+
+    kwargs = _get_kwargs(
+        workspace_id=workspace_id,
+        model_id=model_id,
+        body=body,
+    )
+
+    response = client.get_httpx_client().request(
+        **kwargs,
+    )
+
+    return _build_response(client=client, response=response)
+
+
+def sync(
+    workspace_id: str,
+    model_id: str,
+    *,
+    client: AuthenticatedClient | Client,
+    body: CustomModelUpdate,
+) -> CustomModel | Error | None:
+    """Update mutable fields on a custom model
+
+     Update the model record (cp-side) and propagate the change to the
+    live deployment when applicable. Only a small set of fields are
+    mutable post-create; immutable fields (`huggingface_model_id`,
+    `weight_source`, GPU topology, …) are intentionally not exposed
+    here — change them by deleting and re-creating the model.
+
+    Today the only fields that take effect immediately on the live
+    deployment are `min_replicas` and `max_replicas`: cp updates the
+    DynamoDB record, then issues an AF-side in-place patch of the
+    underlying InferenceService's predictor scale bounds (no rolling
+    restart, no downtime). KServe propagates `max_replicas` through
+    to the KEDA `ScaledObject`'s `maxReplicaCount` on its next
+    reconcile.
+
+    Other fields update the model record only and take effect on the
+    next deployment.
+
+    Args:
+        workspace_id (str):
+        model_id (str):
+        body (CustomModelUpdate): Partial-update payload for `PATCH /v1/{workspaceId}/custom-
+            models/{modelId}`.
+            All fields are independently optional; omitted fields are left
+            unchanged. At least one field MUST be supplied.
+
+            Only a small, vetted set of fields are mutable post-create.
+            Immutable fields (`huggingface_model_id`, `weight_source`,
+            GPU topology, …) are not exposed here — change them by
+            deleting and re-creating the model.
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        CustomModel | Error
+    """
+
+    return sync_detailed(
+        workspace_id=workspace_id,
+        model_id=model_id,
+        client=client,
+        body=body,
+    ).parsed
+
+
+async def asyncio_detailed(
+    workspace_id: str,
+    model_id: str,
+    *,
+    client: AuthenticatedClient | Client,
+    body: CustomModelUpdate,
+) -> Response[CustomModel | Error]:
+    """Update mutable fields on a custom model
+
+     Update the model record (cp-side) and propagate the change to the
+    live deployment when applicable. Only a small set of fields are
+    mutable post-create; immutable fields (`huggingface_model_id`,
+    `weight_source`, GPU topology, …) are intentionally not exposed
+    here — change them by deleting and re-creating the model.
+
+    Today the only fields that take effect immediately on the live
+    deployment are `min_replicas` and `max_replicas`: cp updates the
+    DynamoDB record, then issues an AF-side in-place patch of the
+    underlying InferenceService's predictor scale bounds (no rolling
+    restart, no downtime). KServe propagates `max_replicas` through
+    to the KEDA `ScaledObject`'s `maxReplicaCount` on its next
+    reconcile.
+
+    Other fields update the model record only and take effect on the
+    next deployment.
+
+    Args:
+        workspace_id (str):
+        model_id (str):
+        body (CustomModelUpdate): Partial-update payload for `PATCH /v1/{workspaceId}/custom-
+            models/{modelId}`.
+            All fields are independently optional; omitted fields are left
+            unchanged. At least one field MUST be supplied.
+
+            Only a small, vetted set of fields are mutable post-create.
+            Immutable fields (`huggingface_model_id`, `weight_source`,
+            GPU topology, …) are not exposed here — change them by
+            deleting and re-creating the model.
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        Response[CustomModel | Error]
+    """
+
+    kwargs = _get_kwargs(
+        workspace_id=workspace_id,
+        model_id=model_id,
+        body=body,
+    )
+
+    response = await client.get_async_httpx_client().request(**kwargs)
+
+    return _build_response(client=client, response=response)
+
+
+async def asyncio(
+    workspace_id: str,
+    model_id: str,
+    *,
+    client: AuthenticatedClient | Client,
+    body: CustomModelUpdate,
+) -> CustomModel | Error | None:
+    """Update mutable fields on a custom model
+
+     Update the model record (cp-side) and propagate the change to the
+    live deployment when applicable. Only a small set of fields are
+    mutable post-create; immutable fields (`huggingface_model_id`,
+    `weight_source`, GPU topology, …) are intentionally not exposed
+    here — change them by deleting and re-creating the model.
+
+    Today the only fields that take effect immediately on the live
+    deployment are `min_replicas` and `max_replicas`: cp updates the
+    DynamoDB record, then issues an AF-side in-place patch of the
+    underlying InferenceService's predictor scale bounds (no rolling
+    restart, no downtime). KServe propagates `max_replicas` through
+    to the KEDA `ScaledObject`'s `maxReplicaCount` on its next
+    reconcile.
+
+    Other fields update the model record only and take effect on the
+    next deployment.
+
+    Args:
+        workspace_id (str):
+        model_id (str):
+        body (CustomModelUpdate): Partial-update payload for `PATCH /v1/{workspaceId}/custom-
+            models/{modelId}`.
+            All fields are independently optional; omitted fields are left
+            unchanged. At least one field MUST be supplied.
+
+            Only a small, vetted set of fields are mutable post-create.
+            Immutable fields (`huggingface_model_id`, `weight_source`,
+            GPU topology, …) are not exposed here — change them by
+            deleting and re-creating the model.
+
+    Raises:
+        errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True.
+        httpx.TimeoutException: If the request takes longer than Client.timeout.
+
+    Returns:
+        CustomModel | Error
+    """
+
+    return (
+        await asyncio_detailed(
+            workspace_id=workspace_id,
+            model_id=model_id,
+            client=client,
+            body=body,
+        )
+    ).parsed
diff --git a/src/graphn/_generated/models/__init__.py b/src/graphn/_generated/models/__init__.py
index 8629c74..8bdfe18 100644
--- a/src/graphn/_generated/models/__init__.py
+++ b/src/graphn/_generated/models/__init__.py
@@ -1,5 +1,6 @@
 """Contains all the data models used in inputs/outputs"""
 
+from .architecture_info import ArchitectureInfo
 from .capability import Capability
 from .chat_completion_request import ChatCompletionRequest
 from .chat_completion_request_response_format import ChatCompletionRequestResponseFormat
@@ -16,11 +17,13 @@
 from .chat_message_tool_calls_item import ChatMessageToolCallsItem
 from .custom_model import CustomModel
 from .custom_model_access import CustomModelAccess
+from .custom_model_artifact_type import CustomModelArtifactType
 from .custom_model_create import CustomModelCreate
 from .custom_model_create_quantization import CustomModelCreateQuantization
 from .custom_model_list import CustomModelList
 from .custom_model_quantization import CustomModelQuantization
 from .custom_model_status import CustomModelStatus
+from .custom_model_update import CustomModelUpdate
 from .discover_imported_models_request import DiscoverImportedModelsRequest
 from .discover_imported_models_response import DiscoverImportedModelsResponse
 from .discovered_imported_model import DiscoveredImportedModel
@@ -38,6 +41,7 @@
 from .secret_create import SecretCreate
 from .secret_list import SecretList
 from .secret_update import SecretUpdate
+from .supported_architectures import SupportedArchitectures
 from .test_connection_request import TestConnectionRequest
 from .test_connection_response import TestConnectionResponse
 from .test_connection_response_usage import TestConnectionResponseUsage
@@ -46,9 +50,11 @@
 from .validate_model_request import ValidateModelRequest
 from .validate_model_request_quantization import ValidateModelRequestQuantization
 from .validate_model_response import ValidateModelResponse
+from .validate_model_response_artifact_type import ValidateModelResponseArtifactType
 from .weight_source import WeightSource
 
 __all__ = (
+    "ArchitectureInfo",
     "Capability",
     "ChatCompletionRequest",
     "ChatCompletionRequestResponseFormat",
@@ -63,11 +69,13 @@
     "ChatMessageToolCallsItem",
     "CustomModel",
     "CustomModelAccess",
+    "CustomModelArtifactType",
     "CustomModelCreate",
     "CustomModelCreateQuantization",
     "CustomModelList",
     "CustomModelQuantization",
     "CustomModelStatus",
+    "CustomModelUpdate",
     "DiscoveredImportedModel",
     "DiscoverImportedModelsRequest",
     "DiscoverImportedModelsResponse",
@@ -85,6 +93,7 @@
     "SecretCreate",
     "SecretList",
     "SecretUpdate",
+    "SupportedArchitectures",
     "TestConnectionRequest",
     "TestConnectionResponse",
     "TestConnectionResponseUsage",
@@ -93,5 +102,6 @@
     "ValidateModelRequest",
     "ValidateModelRequestQuantization",
     "ValidateModelResponse",
+    "ValidateModelResponseArtifactType",
     "WeightSource",
 )
diff --git a/src/graphn/_generated/models/architecture_info.py b/src/graphn/_generated/models/architecture_info.py
new file mode 100644
index 0000000..168c72d
--- /dev/null
+++ b/src/graphn/_generated/models/architecture_info.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, TypeVar, cast
+
+from attrs import define as _attrs_define
+
+T = TypeVar("T", bound="ArchitectureInfo")
+
+
+@_attrs_define
+class ArchitectureInfo:
+    """
+    Attributes:
+        name (str): HuggingFace `architectures[0]` value (e.g. `LlamaForCausalLM`,
+            `Qwen3VLMoeForConditionalGeneration`).
+        capabilities (list[str]): Capability tags this architecture exposes — `tool_calling`,
+            `vision`, `image_input`, `video_input`, `streaming`, `json_mode`.
+            Drives the UI capability filters and AF's per-feature gating.
+    """
+
+    name: str
+    capabilities: list[str]
+
+    def to_dict(self) -> dict[str, Any]:
+        name = self.name
+
+        capabilities = self.capabilities
+
+        field_dict: dict[str, Any] = {}
+
+        field_dict.update(
+            {
+                "name": name,
+                "capabilities": capabilities,
+            }
+        )
+
+        return field_dict
+
+    @classmethod
+    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
+        d = dict(src_dict)
+        name = d.pop("name")
+
+        capabilities = cast(list[str], d.pop("capabilities"))
+
+        architecture_info = cls(
+            name=name,
+            capabilities=capabilities,
+        )
+
+        return architecture_info
diff --git a/src/graphn/_generated/models/custom_model.py b/src/graphn/_generated/models/custom_model.py
index 60e86f0..b39c286 100644
--- a/src/graphn/_generated/models/custom_model.py
+++ b/src/graphn/_generated/models/custom_model.py
@@ -9,6 +9,7 @@
 from dateutil.parser import isoparse
 
 from ..models.capability import Capability
+from ..models.custom_model_artifact_type import CustomModelArtifactType
 from ..models.custom_model_quantization import CustomModelQuantization
 from ..models.custom_model_status import CustomModelStatus
 from ..models.weight_source import WeightSource
@@ -50,6 +51,29 @@ class CustomModel:
             huggingface_model_id (str | Unset): Set when `weight_source` is `huggingface`.
             s3_url (None | str | Unset): Set when `weight_source` is `s3_presigned` or `s3_assume_role`.
             s3_role_arn (None | str | Unset): Set when `weight_source` is `s3_assume_role`.
+            artifact_type (CustomModelArtifactType | Unset): Whether this import is a full base checkpoint or a LoRA
+                adapter on top of an allowlisted base. Set eagerly at
+                create-time:
+
+                * **`huggingface`** imports are classified by probing
+                  `adapter_config.json` on the upstream repo.
+                * **`s3_*`** imports are classified as `lora` iff
+                  `base_model_id` is supplied on the create request;
+                  otherwise the bundle is treated as `base`. If the
+                  downloaded S3 bundle later turns out to be a LoRA
+                  adapter (caller forgot the hint), the model deploys to
+                  `failed` with an actionable `error_message`.
+            base_model_id (None | str | Unset): Populated when `artifact_type=lora`. The base model id the
+                adapter loads on top of. For HuggingFace imports this is
+                either `adapter_config.json::base_model_name_or_path` or
+                the caller's `base_model_id` override on
+                `CustomModelCreate` (the override wins on disagreement).
+                For S3 imports it is the caller-supplied
+                `base_model_id` from `CustomModelCreate`.
+            lora_adapter_name (None | str | Unset): vLLM routing name the LoRA adapter is served under.
+                Defaults to the model's short name; clients address it
+                via `model=<lora_adapter_name>` in chat completions.
+            lora_rank (int | None | Unset): `r` value from `adapter_config.json` when `artifact_type=lora`.
             max_model_len (int | None | Unset): Maximum context length in tokens.
             quantization (CustomModelQuantization | Unset): Weight quantization scheme, if any.
             replicas_available (int | None | Unset): Currently serving replicas (live status).
@@ -78,6 +102,10 @@ class CustomModel:
     huggingface_model_id: str | Unset = UNSET
     s3_url: None | str | Unset = UNSET
     s3_role_arn: None | str | Unset = UNSET
+    artifact_type: CustomModelArtifactType | Unset = UNSET
+    base_model_id: None | str | Unset = UNSET
+    lora_adapter_name: None | str | Unset = UNSET
+    lora_rank: int | None | Unset = UNSET
     max_model_len: int | None | Unset = UNSET
     quantization: CustomModelQuantization | Unset = UNSET
     replicas_available: int | None | Unset = UNSET
@@ -134,6 +162,28 @@ def to_dict(self) -> dict[str, Any]:
         else:
             s3_role_arn = self.s3_role_arn
 
+        artifact_type: str | Unset = UNSET
+        if not isinstance(self.artifact_type, Unset):
+            artifact_type = self.artifact_type.value
+
+        base_model_id: None | str | Unset
+        if isinstance(self.base_model_id, Unset):
+            base_model_id = UNSET
+        else:
+            base_model_id = self.base_model_id
+
+        lora_adapter_name: None | str | Unset
+        if isinstance(self.lora_adapter_name, Unset):
+            lora_adapter_name = UNSET
+        else:
+            lora_adapter_name = self.lora_adapter_name
+
+        lora_rank: int | None | Unset
+        if isinstance(self.lora_rank, Unset):
+            lora_rank = UNSET
+        else:
+            lora_rank = self.lora_rank
+
         max_model_len: int | None | Unset
         if isinstance(self.max_model_len, Unset):
             max_model_len = UNSET
@@ -210,6 +260,14 @@ def to_dict(self) -> dict[str, Any]:
             field_dict["s3_url"] = s3_url
         if s3_role_arn is not UNSET:
             field_dict["s3_role_arn"] = s3_role_arn
+        if artifact_type is not UNSET:
+            field_dict["artifact_type"] = artifact_type
+        if base_model_id is not UNSET:
+            field_dict["base_model_id"] = base_model_id
+        if lora_adapter_name is not UNSET:
+            field_dict["lora_adapter_name"] = lora_adapter_name
+        if lora_rank is not UNSET:
+            field_dict["lora_rank"] = lora_rank
         if max_model_len is not UNSET:
             field_dict["max_model_len"] = max_model_len
         if quantization is not UNSET:
@@ -285,6 +343,40 @@ def _parse_s3_role_arn(data: object) -> None | str | Unset:
 
         s3_role_arn = _parse_s3_role_arn(d.pop("s3_role_arn", UNSET))
 
+        _artifact_type = d.pop("artifact_type", UNSET)
+        artifact_type: CustomModelArtifactType | Unset
+        if isinstance(_artifact_type, Unset):
+            artifact_type = UNSET
+        else:
+            artifact_type = CustomModelArtifactType(_artifact_type)
+
+        def _parse_base_model_id(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        base_model_id = _parse_base_model_id(d.pop("base_model_id", UNSET))
+
+        def _parse_lora_adapter_name(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        lora_adapter_name = _parse_lora_adapter_name(d.pop("lora_adapter_name", UNSET))
+
+        def _parse_lora_rank(data: object) -> int | None | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(int | None | Unset, data)
+
+        lora_rank = _parse_lora_rank(d.pop("lora_rank", UNSET))
+
         def _parse_max_model_len(data: object) -> int | None | Unset:
             if data is None:
                 return data
@@ -385,6 +477,10 @@ def _parse_architectures(data: object) -> list[str] | None | Unset:
             huggingface_model_id=huggingface_model_id,
             s3_url=s3_url,
             s3_role_arn=s3_role_arn,
+            artifact_type=artifact_type,
+            base_model_id=base_model_id,
+            lora_adapter_name=lora_adapter_name,
+            lora_rank=lora_rank,
             max_model_len=max_model_len,
             quantization=quantization,
             replicas_available=replicas_available,
diff --git a/src/graphn/_generated/models/custom_model_artifact_type.py b/src/graphn/_generated/models/custom_model_artifact_type.py
new file mode 100644
index 0000000..f2adb23
--- /dev/null
+++ b/src/graphn/_generated/models/custom_model_artifact_type.py
@@ -0,0 +1,9 @@
+from enum import Enum
+
+
+class CustomModelArtifactType(str, Enum):
+    BASE = "base"
+    LORA = "lora"
+
+    def __str__(self) -> str:
+        return str(self.value)
diff --git a/src/graphn/_generated/models/custom_model_create.py b/src/graphn/_generated/models/custom_model_create.py
index 6eeb477..49f1ac9 100644
--- a/src/graphn/_generated/models/custom_model_create.py
+++ b/src/graphn/_generated/models/custom_model_create.py
@@ -36,9 +36,14 @@ class CustomModelCreate:
             `s3_assume_role`. Conditional requirement is enforced by
             the server (returns 422); not encoded as a JSON Schema
             keyword for OAS-3.0-tooling compatibility.
-        s3_role_arn (str | Unset): Required when `weight_source` is `s3_assume_role`.
-            Conditional requirement is enforced by the server (returns
-            422); not encoded as a JSON Schema keyword.
+        s3_role_arn (str | Unset): Required when `weight_source` is `s3_assume_role`. The role
+            name (the segment after `:role/`) must start with
+            `graphn-byom-`; GraphN's platform IAM policy is scoped to
+            that prefix as a defense-in-depth boundary, and the
+            customer-facing CloudFormation template enforces the same
+            constraint at stack-create time. Conditional requirement
+            (s3_assume_role only) is enforced by the server (returns
+            422); the format itself is checked against this pattern.
         hf_token_secret_id (str | Unset): ID of a workspace secret holding a HuggingFace access token.
             Required for gated HuggingFace models.
         gpu_count (int | Unset):  Default: 1.
@@ -49,6 +54,27 @@ class CustomModelCreate:
         min_replicas (int | Unset):  Default: 0.
         max_replicas (int | Unset):  Default: 1.
         cooldown_seconds (int | Unset):  Default: 600.
+        base_model_id (str | Unset): Override / hint for LoRA imports. Must be one of the
+            platform's allowlisted base models (see
+            `GET /v1/{workspaceId}/custom-models/supported-architectures`).
+
+            * **`weight_source=s3_*`**: this is the **only** way to
+              classify the bundle as a LoRA adapter at create-time --
+              omitting it routes the import through the base path,
+              and a bundle that later turns out to be a LoRA adapter
+              will deploy to `failed` with an actionable error
+              ("re-create with `base_model_id` set").
+            * **`weight_source=huggingface`**: the field **overrides**
+              `adapter_config.json::base_model_name_or_path` from the
+              adapter repo. Useful for adapters trained against a
+              local filesystem path (e.g. `C:/users/.../base`) whose
+              recorded base id isn't a valid HF id. When the override
+              disagrees with the adapter's declared base the caller's
+              value wins; the disagreement is logged server-side for
+              debuggability.
+
+            Ignored when the resolved artifact type is `base`.
+             Example: Qwen/Qwen3.5-4B.
     """
 
     name: str
@@ -66,6 +92,7 @@ class CustomModelCreate:
     min_replicas: int | Unset = 0
     max_replicas: int | Unset = 1
     cooldown_seconds: int | Unset = 600
+    base_model_id: str | Unset = UNSET
 
     def to_dict(self) -> dict[str, Any]:
         name = self.name
@@ -107,6 +134,8 @@ def to_dict(self) -> dict[str, Any]:
 
         cooldown_seconds = self.cooldown_seconds
 
+        base_model_id = self.base_model_id
+
         field_dict: dict[str, Any] = {}
 
         field_dict.update(
@@ -141,6 +170,8 @@ def to_dict(self) -> dict[str, Any]:
             field_dict["max_replicas"] = max_replicas
         if cooldown_seconds is not UNSET:
             field_dict["cooldown_seconds"] = cooldown_seconds
+        if base_model_id is not UNSET:
+            field_dict["base_model_id"] = base_model_id
 
         return field_dict
 
@@ -194,6 +225,8 @@ def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
 
         cooldown_seconds = d.pop("cooldown_seconds", UNSET)
 
+        base_model_id = d.pop("base_model_id", UNSET)
+
         custom_model_create = cls(
             name=name,
             huggingface_model_id=huggingface_model_id,
@@ -210,6 +243,7 @@ def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
             min_replicas=min_replicas,
             max_replicas=max_replicas,
             cooldown_seconds=cooldown_seconds,
+            base_model_id=base_model_id,
         )
 
         return custom_model_create
diff --git a/src/graphn/_generated/models/custom_model_update.py b/src/graphn/_generated/models/custom_model_update.py
new file mode 100644
index 0000000..da74ce8
--- /dev/null
+++ b/src/graphn/_generated/models/custom_model_update.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any, TypeVar
+
+from attrs import define as _attrs_define
+
+from ..types import UNSET, Unset
+
+T = TypeVar("T", bound="CustomModelUpdate")
+
+
+@_attrs_define
+class CustomModelUpdate:
+    """Partial-update payload for `PATCH /v1/{workspaceId}/custom-models/{modelId}`.
+    All fields are independently optional; omitted fields are left
+    unchanged. At least one field MUST be supplied.
+
+    Only a small, vetted set of fields are mutable post-create.
+    Immutable fields (`huggingface_model_id`, `weight_source`,
+    GPU topology, …) are not exposed here — change them by
+    deleting and re-creating the model.
+
+        Attributes:
+            name (str | Unset): Display name. Persisted to the model record only.
+            min_replicas (int | Unset): New floor for the autoscaler. `0` re-enables scale-to-zero;
+                any value `>= 1` keeps the model warm. Applied to the live
+                deployment in place — no rolling restart, no downtime.
+            max_replicas (int | Unset): New ceiling for the autoscaler. KServe propagates the value
+                to the underlying KEDA `ScaledObject`'s `maxReplicaCount` on
+                its next reconcile. Applied to the live deployment in place.
+            cooldown_seconds (int | Unset): Idle period (in seconds) the controller waits before scaling
+                an idle replica back to zero. Persisted to the model record;
+                the controller picks it up on the next reconcile.
+    """
+
+    name: str | Unset = UNSET
+    min_replicas: int | Unset = UNSET
+    max_replicas: int | Unset = UNSET
+    cooldown_seconds: int | Unset = UNSET
+
+    def to_dict(self) -> dict[str, Any]:
+        name = self.name
+
+        min_replicas = self.min_replicas
+
+        max_replicas = self.max_replicas
+
+        cooldown_seconds = self.cooldown_seconds
+
+        field_dict: dict[str, Any] = {}
+
+        field_dict.update({})
+        if name is not UNSET:
+            field_dict["name"] = name
+        if min_replicas is not UNSET:
+            field_dict["min_replicas"] = min_replicas
+        if max_replicas is not UNSET:
+            field_dict["max_replicas"] = max_replicas
+        if cooldown_seconds is not UNSET:
+            field_dict["cooldown_seconds"] = cooldown_seconds
+
+        return field_dict
+
+    @classmethod
+    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
+        d = dict(src_dict)
+        name = d.pop("name", UNSET)
+
+        min_replicas = d.pop("min_replicas", UNSET)
+
+        max_replicas = d.pop("max_replicas", UNSET)
+
+        cooldown_seconds = d.pop("cooldown_seconds", UNSET)
+
+        custom_model_update = cls(
+            name=name,
+            min_replicas=min_replicas,
+            max_replicas=max_replicas,
+            cooldown_seconds=cooldown_seconds,
+        )
+
+        return custom_model_update
diff --git a/src/graphn/_generated/models/supported_architectures.py b/src/graphn/_generated/models/supported_architectures.py
new file mode 100644
index 0000000..6133402
--- /dev/null
+++ b/src/graphn/_generated/models/supported_architectures.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any, TypeVar
+
+from attrs import define as _attrs_define
+
+if TYPE_CHECKING:
+    from ..models.architecture_info import ArchitectureInfo
+
+
+T = TypeVar("T", bound="SupportedArchitectures")
+
+
+@_attrs_define
+class SupportedArchitectures:
+    """
+    Attributes:
+        architectures (list[ArchitectureInfo]): Sorted (by `name`) list of architectures the platform's serving
+            runtimes can deploy.
+    """
+
+    architectures: list[ArchitectureInfo]
+
+    def to_dict(self) -> dict[str, Any]:
+        architectures = []
+        for architectures_item_data in self.architectures:
+            architectures_item = architectures_item_data.to_dict()
+            architectures.append(architectures_item)
+
+        field_dict: dict[str, Any] = {}
+
+        field_dict.update(
+            {
+                "architectures": architectures,
+            }
+        )
+
+        return field_dict
+
+    @classmethod
+    def from_dict(cls: type[T], src_dict: Mapping[str, Any]) -> T:
+        from ..models.architecture_info import ArchitectureInfo
+
+        d = dict(src_dict)
+        architectures = []
+        _architectures = d.pop("architectures")
+        for architectures_item_data in _architectures:
+            architectures_item = ArchitectureInfo.from_dict(architectures_item_data)
+
+            architectures.append(architectures_item)
+
+        supported_architectures = cls(
+            architectures=architectures,
+        )
+
+        return supported_architectures
diff --git a/src/graphn/_generated/models/validate_model_request.py b/src/graphn/_generated/models/validate_model_request.py
index acf74bb..197b62f 100644
--- a/src/graphn/_generated/models/validate_model_request.py
+++ b/src/graphn/_generated/models/validate_model_request.py
@@ -21,12 +21,18 @@ class ValidateModelRequest:
         hf_token_secret_id (None | str | Unset): ID of a workspace secret holding a HuggingFace token.
         quantization (ValidateModelRequestQuantization | Unset):
         gpu_memory_utilization (float | Unset):  Default: 0.9.
+        model_size_gb (int | None | Unset): Optional caller-supplied estimate of the on-disk weights size,
+            in GiB. When provided, the platform sizes the model-weights PVC
+            from this hint instead of waiting for a HuggingFace head-bytes
+            probe; useful for very large models where the probe would
+            otherwise stall the validate response.
     """
 
     huggingface_model_id: str
     hf_token_secret_id: None | str | Unset = UNSET
     quantization: ValidateModelRequestQuantization | Unset = UNSET
     gpu_memory_utilization: float | Unset = 0.9
+    model_size_gb: int | None | Unset = UNSET
 
     def to_dict(self) -> dict[str, Any]:
         huggingface_model_id = self.huggingface_model_id
@@ -43,6 +49,12 @@ def to_dict(self) -> dict[str, Any]:
 
         gpu_memory_utilization = self.gpu_memory_utilization
 
+        model_size_gb: int | None | Unset
+        if isinstance(self.model_size_gb, Unset):
+            model_size_gb = UNSET
+        else:
+            model_size_gb = self.model_size_gb
+
         field_dict: dict[str, Any] = {}
 
         field_dict.update(
@@ -56,6 +68,8 @@ def to_dict(self) -> dict[str, Any]:
             field_dict["quantization"] = quantization
         if gpu_memory_utilization is not UNSET:
             field_dict["gpu_memory_utilization"] = gpu_memory_utilization
+        if model_size_gb is not UNSET:
+            field_dict["model_size_gb"] = model_size_gb
 
         return field_dict
 
@@ -84,11 +98,21 @@ def _parse_hf_token_secret_id(data: object) -> None | str | Unset:
 
         gpu_memory_utilization = d.pop("gpu_memory_utilization", UNSET)
 
+        def _parse_model_size_gb(data: object) -> int | None | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(int | None | Unset, data)
+
+        model_size_gb = _parse_model_size_gb(d.pop("model_size_gb", UNSET))
+
         validate_model_request = cls(
             huggingface_model_id=huggingface_model_id,
             hf_token_secret_id=hf_token_secret_id,
             quantization=quantization,
             gpu_memory_utilization=gpu_memory_utilization,
+            model_size_gb=model_size_gb,
         )
 
         return validate_model_request
diff --git a/src/graphn/_generated/models/validate_model_response.py b/src/graphn/_generated/models/validate_model_response.py
index 35ba902..b2519ba 100644
--- a/src/graphn/_generated/models/validate_model_response.py
+++ b/src/graphn/_generated/models/validate_model_response.py
@@ -6,6 +6,9 @@
 from attrs import define as _attrs_define
 from attrs import field as _attrs_field
 
+from ..models.validate_model_response_artifact_type import (
+    ValidateModelResponseArtifactType,
+)
 from ..types import UNSET, Unset
 
 T = TypeVar("T", bound="ValidateModelResponse")
@@ -22,6 +25,24 @@ class ValidateModelResponse:
         num_params (int | None | Unset):
         estimated_memory_gb (float | None | Unset):
         max_context_length (int | None | Unset):
+        artifact_type (ValidateModelResponseArtifactType | Unset): `lora` when AF detected an `adapter_config.json` in
+            the HuggingFace
+            repo at validate time; `base` otherwise (the default — what every
+            existing caller saw before the LoRA auto-detect work landed). Use
+            this to branch in client code without keeping track of two
+            different `weight_source` enum values for the HF case.
+
+            When `artifact_type=lora`, the `architectures`, `num_params`,
+            `estimated_memory_gb`, and `max_context_length` fields describe
+            the **base** model (resolved from `adapter_config.json`), not
+            the adapter itself.
+             Default: ValidateModelResponseArtifactType.BASE.
+        detected_base_model_id (None | str | Unset): Populated only when `artifact_type=lora`. The base model id read
+            from `adapter_config.json::base_model_name_or_path`. Use to pin
+            the base on subsequent deploy calls or to surface a "detected as
+            LoRA adapter for X" affordance in your UI.
+        lora_rank (int | None | Unset): Populated only when `artifact_type=lora`. The `r` value from
+            `adapter_config.json` (LoRA rank).
     """
 
     valid: bool
@@ -31,6 +52,11 @@ class ValidateModelResponse:
     num_params: int | None | Unset = UNSET
     estimated_memory_gb: float | None | Unset = UNSET
     max_context_length: int | None | Unset = UNSET
+    artifact_type: ValidateModelResponseArtifactType | Unset = (
+        ValidateModelResponseArtifactType.BASE
+    )
+    detected_base_model_id: None | str | Unset = UNSET
+    lora_rank: int | None | Unset = UNSET
     additional_properties: dict[str, Any] = _attrs_field(init=False, factory=dict)
 
     def to_dict(self) -> dict[str, Any]:
@@ -66,6 +92,22 @@ def to_dict(self) -> dict[str, Any]:
         else:
             max_context_length = self.max_context_length
 
+        artifact_type: str | Unset = UNSET
+        if not isinstance(self.artifact_type, Unset):
+            artifact_type = self.artifact_type.value
+
+        detected_base_model_id: None | str | Unset
+        if isinstance(self.detected_base_model_id, Unset):
+            detected_base_model_id = UNSET
+        else:
+            detected_base_model_id = self.detected_base_model_id
+
+        lora_rank: int | None | Unset
+        if isinstance(self.lora_rank, Unset):
+            lora_rank = UNSET
+        else:
+            lora_rank = self.lora_rank
+
         field_dict: dict[str, Any] = {}
         field_dict.update(self.additional_properties)
         field_dict.update(
@@ -85,6 +127,12 @@ def to_dict(self) -> dict[str, Any]:
             field_dict["estimated_memory_gb"] = estimated_memory_gb
         if max_context_length is not UNSET:
             field_dict["max_context_length"] = max_context_length
+        if artifact_type is not UNSET:
+            field_dict["artifact_type"] = artifact_type
+        if detected_base_model_id is not UNSET:
+            field_dict["detected_base_model_id"] = detected_base_model_id
+        if lora_rank is not UNSET:
+            field_dict["lora_rank"] = lora_rank
 
         return field_dict
 
@@ -137,6 +185,33 @@ def _parse_max_context_length(data: object) -> int | None | Unset:
             d.pop("max_context_length", UNSET)
         )
 
+        _artifact_type = d.pop("artifact_type", UNSET)
+        artifact_type: ValidateModelResponseArtifactType | Unset
+        if isinstance(_artifact_type, Unset):
+            artifact_type = UNSET
+        else:
+            artifact_type = ValidateModelResponseArtifactType(_artifact_type)
+
+        def _parse_detected_base_model_id(data: object) -> None | str | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(None | str | Unset, data)
+
+        detected_base_model_id = _parse_detected_base_model_id(
+            d.pop("detected_base_model_id", UNSET)
+        )
+
+        def _parse_lora_rank(data: object) -> int | None | Unset:
+            if data is None:
+                return data
+            if isinstance(data, Unset):
+                return data
+            return cast(int | None | Unset, data)
+
+        lora_rank = _parse_lora_rank(d.pop("lora_rank", UNSET))
+
         validate_model_response = cls(
             valid=valid,
             error=error,
@@ -145,6 +220,9 @@ def _parse_max_context_length(data: object) -> int | None | Unset:
             num_params=num_params,
             estimated_memory_gb=estimated_memory_gb,
             max_context_length=max_context_length,
+            artifact_type=artifact_type,
+            detected_base_model_id=detected_base_model_id,
+            lora_rank=lora_rank,
         )
 
         validate_model_response.additional_properties = d
diff --git a/src/graphn/_generated/models/validate_model_response_artifact_type.py b/src/graphn/_generated/models/validate_model_response_artifact_type.py
new file mode 100644
index 0000000..07adb92
--- /dev/null
+++ b/src/graphn/_generated/models/validate_model_response_artifact_type.py
@@ -0,0 +1,9 @@
+from enum import Enum
+
+
+class ValidateModelResponseArtifactType(str, Enum):
+    BASE = "base"
+    LORA = "lora"
+
+    def __str__(self) -> str:
+        return str(self.value)