From a18627188f803d9219c9ea4e62ddcece4d413c4e Mon Sep 17 00:00:00 2001 From: Patrick Ogenstad Date: Sun, 10 May 2026 19:27:44 +0200 Subject: [PATCH 1/4] Add raises section to existing docstrings --- .../sdk_ref/infrahub_sdk/client.mdx | 32 +++++++++ .../sdk_ref/infrahub_sdk/node/node.mdx | 30 +++++++- .../sdk_ref/infrahub_sdk/node/parsers.mdx | 4 ++ .../infrahub_sdk/node/relationship.mdx | 8 +++ .../mdx/mdx_collapsed_overload_section.py | 4 ++ infrahub_sdk/_importer.py | 3 + infrahub_sdk/client.py | 68 +++++++++++++++---- infrahub_sdk/config.py | 4 ++ infrahub_sdk/graphql/query_renderer.py | 12 +++- infrahub_sdk/graphql/utils.py | 4 ++ infrahub_sdk/node/node.py | 38 +++++++++-- infrahub_sdk/node/parsers.py | 7 +- infrahub_sdk/node/relationship.py | 20 +++++- infrahub_sdk/object_store.py | 25 ++++++- infrahub_sdk/pytest_plugin/items/base.py | 7 +- infrahub_sdk/schema/__init__.py | 9 +++ infrahub_sdk/schema/repository.py | 5 +- .../spec/processors/range_expand_processor.py | 9 ++- infrahub_sdk/spec/range_expansion.py | 7 +- infrahub_sdk/task/manager.py | 14 +++- infrahub_sdk/template/infrahub_filters.py | 29 ++++++-- infrahub_sdk/utils.py | 17 ++++- pyproject.toml | 8 ++- 23 files changed, 322 insertions(+), 42 deletions(-) diff --git a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/client.mdx b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/client.mdx index 639b3611..e8e42017 100644 --- a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/client.mdx +++ b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/client.mdx @@ -241,6 +241,10 @@ If retry_on_failure is True, the query will retry until the server becomes reach **Raises:** - `GraphQLError`: When the GraphQL response contains errors. +- `ServerNotReachableError`: If the server is not reachable after exhausting retries. +- `AuthenticationError`: If the server returns a 401 or 403 response. +- `URLNotFoundError`: If the server returns a 404 response. +- `Error`: If the response is unexpectedly missing. **Returns:** @@ -286,6 +290,10 @@ Get complete diff tree with metadata and nodes. Returns None if no diff exists. +**Raises:** + +- `ValueError`: If ``from_time`` is later than ``to_time``. + #### `allocate_next_ip_address` ```python @@ -324,6 +332,10 @@ Allocate a new IP address by using the provided resource pool. - Node corresponding to the allocated resource. +**Raises:** + +- `ValueError`: If ``resource_pool`` is not a ``CoreIPAddressPool``. + #### `allocate_next_ip_prefix` @@ -364,6 +376,10 @@ Allocate a new IP prefix by using the provided resource pool. - Node corresponding to the allocated resource. +**Raises:** + +- `ValueError`: If ``resource_pool`` is not a ``CoreIPPrefixPool``. + #### `create_batch` @@ -524,6 +540,10 @@ If retry_on_failure is True, the query will retry until the server becomes reach **Raises:** - `GraphQLError`: When the GraphQL response contains errors. +- `ServerNotReachableError`: If the server is not reachable after exhausting retries. +- `AuthenticationError`: If the server returns a 401 or 403 response. +- `URLNotFoundError`: If the server returns a 404 response. +- `Error`: If the response is unexpectedly missing. **Returns:** @@ -676,6 +696,10 @@ Get complete diff tree with metadata and nodes. Returns None if no diff exists. +**Raises:** + +- `ValueError`: If ``from_time`` is later than ``to_time``. + #### `allocate_next_ip_address` ```python @@ -714,6 +738,10 @@ Allocate a new IP address by using the provided resource pool. - Node corresponding to the allocated resource. +**Raises:** + +- `ValueError`: If ``resource_pool`` is not a ``CoreIPAddressPool``. + #### `allocate_next_ip_prefix` @@ -754,6 +782,10 @@ Allocate a new IP prefix by using the provided resource pool. - Node corresponding to the allocated resource. +**Raises:** + +- `ValueError`: If ``resource_pool`` is not a ``CoreIPPrefixPool``. + #### `repository_update_commit` diff --git a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/node.mdx b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/node.mdx index e23120dd..4b8a2ab9 100644 --- a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/node.mdx +++ b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/node.mdx @@ -155,6 +155,10 @@ Fetch all nodes that were allocated for the pool and a given resource. - list\[InfrahubNode]: The allocated nodes. +**Raises:** + +- `ValueError`: If the node is not a resource pool. + #### `get_pool_resources_utilization` ```python @@ -167,19 +171,28 @@ Fetch the utilization of each resource for the pool. - list\[dict\[str, Any]]: A list containing the allocation numbers for each resource of the pool. +**Raises:** + +- `ValueError`: If the node is not a resource pool. + #### `get_flat_value` ```python get_flat_value(self, key: str, separator: str = '__') -> Any ``` -Query recursively a value defined in a flat notation (string), on a hierarchy of objects +Query recursively a value defined in a flat notation (string), on a hierarchy of objects. **Examples:** name__value module.object.value +**Raises:** + +- `ValueError`: If ``key`` references an unknown attribute or relationship, +or if a referenced relationship is not of cardinality ``ONE``. + #### `extract` ```python @@ -336,6 +349,10 @@ Fetch all nodes that were allocated for the pool and a given resource. - list\[InfrahubNodeSync]: The allocated nodes. +**Raises:** + +- `ValueError`: If the node is not a resource pool. + #### `get_pool_resources_utilization` ```python @@ -348,19 +365,28 @@ Fetch the utilization of each resource for the pool. - list\[dict\[str, Any]]: A list containing the allocation numbers for each resource of the pool. +**Raises:** + +- `ValueError`: If the node is not a resource pool. + #### `get_flat_value` ```python get_flat_value(self, key: str, separator: str = '__') -> Any ``` -Query recursively a value defined in a flat notation (string), on a hierarchy of objects +Query recursively a value defined in a flat notation (string), on a hierarchy of objects. **Examples:** name__value module.object.value +**Raises:** + +- `ValueError`: If ``key`` references an unknown attribute or relationship, +or if a referenced relationship is not of cardinality ``ONE``. + #### `extract` ```python diff --git a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/parsers.mdx b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/parsers.mdx index f70c6788..fb9f6ae5 100644 --- a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/parsers.mdx +++ b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/parsers.mdx @@ -14,3 +14,7 @@ parse_human_friendly_id(hfid: str | list[str]) -> tuple[str | None, list[str]] ``` Parse a human-friendly ID into a kind and an identifier. + +**Raises:** + +- `ValueError`: If ``hfid`` is neither a string nor a list of strings. diff --git a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/relationship.mdx b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/relationship.mdx index 567b7c8d..ec21f962 100644 --- a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/relationship.mdx +++ b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/relationship.mdx @@ -65,6 +65,10 @@ add(self, data: str | RelatedNode | dict) -> None Add a new peer to this relationship. +**Raises:** + +- `UninitializedError`: If ``fetch()`` has not been called on the manager yet. + #### `extend` ```python @@ -99,6 +103,10 @@ add(self, data: str | RelatedNodeSync | dict) -> None Add a new peer to this relationship. +**Raises:** + +- `UninitializedError`: If ``fetch()`` has not been called on the manager yet. + #### `extend` ```python diff --git a/docs/docs_generation/content_gen_methods/mdx/mdx_collapsed_overload_section.py b/docs/docs_generation/content_gen_methods/mdx/mdx_collapsed_overload_section.py index 85b6339d..afb98006 100644 --- a/docs/docs_generation/content_gen_methods/mdx/mdx_collapsed_overload_section.py +++ b/docs/docs_generation/content_gen_methods/mdx/mdx_collapsed_overload_section.py @@ -48,6 +48,10 @@ def from_overloads(cls, sections: list[MdxSection]) -> CollapsedOverloadSection: Selects the overload with the most parameters as *primary*. On ties, the first in source order wins. + + Raises: + ValueError: If ``sections`` is empty. + """ if not sections: raise ValueError("Cannot create CollapsedOverloadSection from an empty list") diff --git a/infrahub_sdk/_importer.py b/infrahub_sdk/_importer.py index f51a9cd0..48c75f70 100644 --- a/infrahub_sdk/_importer.py +++ b/infrahub_sdk/_importer.py @@ -21,6 +21,9 @@ def import_module(module_path: Path, import_root: str | None = None, relative_pa import_root (Optional[str]): Absolute string path to the current repository. relative_path (Optional[str]): Relative string path between module_path and import_root. + Raises: + ModuleImportError: If the module cannot be found or contains a syntax error. + """ import_root = import_root or str(module_path.parent) diff --git a/infrahub_sdk/client.py b/infrahub_sdk/client.py index 6855f4de..dcedd311 100644 --- a/infrahub_sdk/client.py +++ b/infrahub_sdk/client.py @@ -956,6 +956,10 @@ async def execute_graphql( Raises: GraphQLError: When the GraphQL response contains errors. + ServerNotReachableError: If the server is not reachable after exhausting retries. + AuthenticationError: If the server returns a 401 or 403 response. + URLNotFoundError: If the server returns a 404 response. + Error: If the response is unexpectedly missing. Returns: dict: The GraphQL data payload (response["data"]). @@ -1125,7 +1129,13 @@ def _build_proxy_config(self) -> ProxyConfig: async def _request_multipart( self, url: str, headers: dict[str, Any], timeout: int, files: dict[str, Any] ) -> httpx.Response: - """Execute a multipart HTTP POST request.""" + """Execute a multipart HTTP POST request. + + Raises: + ServerNotReachableError: If we are not able to connect to the server. + ServerNotResponsiveError: If the server didn't respond before the timeout expired. + + """ async with httpx.AsyncClient(**self._build_proxy_config(), verify=self.config.tls_context) as client: try: response = await client.post(url=url, headers=headers, timeout=timeout, files=files) @@ -1148,8 +1158,8 @@ async def _post( """Execute a HTTP POST with HTTPX. Raises: - ServerNotReachableError if we are not able to connect to the server - ServerNotResponsiveError if the server didn't respond before the timeout expired + ServerNotReachableError: If we are not able to connect to the server. + ServerNotResponsiveError: If the server didn't respond before the timeout expired. """ await self.login() @@ -1171,8 +1181,8 @@ async def _get(self, url: str, headers: dict | None = None, timeout: int | None """Execute a HTTP GET with HTTPX. Raises: - ServerNotReachableError if we are not able to connect to the server - ServerNotResponsiveError if the server didnd't respond before the timeout expired + ServerNotReachableError: If we are not able to connect to the server. + ServerNotResponsiveError: If the server didn't respond before the timeout expired. """ await self.login() @@ -1198,8 +1208,8 @@ async def _get_streaming( Use this for downloading large files without loading into memory. Raises: - ServerNotReachableError if we are not able to connect to the server - ServerNotResponsiveError if the server didn't respond before the timeout expired + ServerNotReachableError: If we are not able to connect to the server. + ServerNotResponsiveError: If the server didn't respond before the timeout expired. """ await self.login() @@ -1455,6 +1465,10 @@ async def get_diff_tree( """Get complete diff tree with metadata and nodes. Returns None if no diff exists. + + Raises: + ValueError: If ``from_time`` is later than ``to_time``. + """ query = get_diff_tree_query() input_data = {"branch_name": branch} @@ -1554,6 +1568,9 @@ async def allocate_next_ip_address( Returns: InfrahubNode: Node corresponding to the allocated resource. + Raises: + ValueError: If ``resource_pool`` is not a ``CoreIPAddressPool``. + """ if resource_pool.get_kind() != "CoreIPAddressPool": raise ValueError("resource_pool is not an IP address pool") @@ -1639,6 +1656,9 @@ async def allocate_next_ip_prefix( Returns: InfrahubNode: Node corresponding to the allocated resource. + Raises: + ValueError: If ``resource_pool`` is not a ``CoreIPPrefixPool``. + """ if resource_pool.get_kind() != "CoreIPPrefixPool": raise ValueError("resource_pool is not an IP prefix pool") @@ -1870,6 +1890,10 @@ def execute_graphql( Raises: GraphQLError: When the GraphQL response contains errors. + ServerNotReachableError: If the server is not reachable after exhausting retries. + AuthenticationError: If the server returns a 401 or 403 response. + URLNotFoundError: If the server returns a 404 response. + Error: If the response is unexpectedly missing. Returns: dict: The GraphQL data payload (`response["data"]`). @@ -2037,7 +2061,13 @@ def _build_proxy_config(self) -> ProxyConfigSync: def _request_multipart( self, url: str, headers: dict[str, Any], timeout: int, files: dict[str, Any] ) -> httpx.Response: - """Execute a multipart HTTP POST request.""" + """Execute a multipart HTTP POST request. + + Raises: + ServerNotReachableError: If we are not able to connect to the server. + ServerNotResponsiveError: If the server didn't respond before the timeout expired. + + """ with httpx.Client(**self._build_proxy_config(), verify=self.config.tls_context) as client: try: response = client.post(url=url, headers=headers, timeout=timeout, files=files) @@ -2769,6 +2799,10 @@ def get_diff_tree( """Get complete diff tree with metadata and nodes. Returns None if no diff exists. + + Raises: + ValueError: If ``from_time`` is later than ``to_time``. + """ query = get_diff_tree_query() input_data = {"branch_name": branch} @@ -2868,6 +2902,9 @@ def allocate_next_ip_address( Returns: InfrahubNodeSync: Node corresponding to the allocated resource. + Raises: + ValueError: If ``resource_pool`` is not a ``CoreIPAddressPool``. + """ if resource_pool.get_kind() != "CoreIPAddressPool": raise ValueError("resource_pool is not an IP address pool") @@ -2953,6 +2990,9 @@ def allocate_next_ip_prefix( Returns: InfrahubNodeSync: Node corresponding to the allocated resource. + Raises: + ValueError: If ``resource_pool`` is not a ``CoreIPPrefixPool``. + """ if resource_pool.get_kind() != "CoreIPPrefixPool": raise ValueError("resource_pool is not an IP prefix pool") @@ -2996,8 +3036,8 @@ def _get(self, url: str, headers: dict | None = None, timeout: int | None = None """Execute a HTTP GET with HTTPX. Raises: - ServerNotReachableError if we are not able to connect to the server - ServerNotResponsiveError if the server didnd't respond before the timeout expired + ServerNotReachableError: If we are not able to connect to the server. + ServerNotResponsiveError: If the server didn't respond before the timeout expired. """ self.login() @@ -3023,8 +3063,8 @@ def _get_streaming( Use this for downloading large files without loading into memory. Raises: - ServerNotReachableError if we are not able to connect to the server - ServerNotResponsiveError if the server didn't respond before the timeout expired + ServerNotReachableError: If we are not able to connect to the server. + ServerNotResponsiveError: If the server didn't respond before the timeout expired. """ self.login() @@ -3055,8 +3095,8 @@ def _post( """Execute a HTTP POST with HTTPX. Raises: - ServerNotReachableError if we are not able to connect to the server - ServerNotResponsiveError if the server didnd't respond before the timeout expired + ServerNotReachableError: If we are not able to connect to the server. + ServerNotResponsiveError: If the server didn't respond before the timeout expired. """ self.login() diff --git a/infrahub_sdk/config.py b/infrahub_sdk/config.py index 7efa4dcf..87a0f82c 100644 --- a/infrahub_sdk/config.py +++ b/infrahub_sdk/config.py @@ -143,6 +143,10 @@ def validate_mix_authentication_schemes(cls, values: dict[str, Any]) -> dict[str we prioritize the explicitly provided method. If we can determine which fields were explicitly set, we use that; otherwise, we prefer password auth when both username and password are present. + + Raises: + ValueError: If both token and username/password authentication are explicitly configured. + """ # Extract tracking information about explicitly provided fields explicit_fields = values.pop("_explicit_fields", set()) diff --git a/infrahub_sdk/graphql/query_renderer.py b/infrahub_sdk/graphql/query_renderer.py index ccde2597..615fbd04 100644 --- a/infrahub_sdk/graphql/query_renderer.py +++ b/infrahub_sdk/graphql/query_renderer.py @@ -37,7 +37,10 @@ def _collect_spread_names(node: ASTNode) -> list[str]: def build_fragment_index(fragment_files: list[str]) -> dict[str, FragmentDefinitionNode]: """Parse all fragment file contents and return a mapping from fragment name to its AST node. - Raises DuplicateFragmentError if the same fragment name appears more than once. + Raises: + QuerySyntaxError: A fragment file contains invalid GraphQL syntax. + DuplicateFragmentError: The same fragment name appears more than once. + """ index: dict[str, FragmentDefinitionNode] = {} for content in fragment_files: @@ -61,8 +64,11 @@ def collect_required_fragments( """Walk query_doc and collect all fragment names required (transitively). Returns a topologically ordered list of unique fragment names. - Raises FragmentNotFoundError for any unresolved name. - Raises CircularFragmentError for cyclic dependencies. + + Raises: + FragmentNotFoundError: An unresolved fragment name was referenced. + CircularFragmentError: A cyclic dependency was detected among fragments. + """ # Collect spreads only from operation definitions — any fragment definitions already # present in the query document are self-contained and do not need external resolution. diff --git a/infrahub_sdk/graphql/utils.py b/infrahub_sdk/graphql/utils.py index ed15c407..abd56d21 100644 --- a/infrahub_sdk/graphql/utils.py +++ b/infrahub_sdk/graphql/utils.py @@ -20,6 +20,10 @@ def strip_typename_from_selection_set(selection_set: SelectionSetNode | None) -> This function removes all __typename fields from the selection set, allowing code generation to proceed without errors. + + Raises: + TypeError: If the selection set contains an unexpected GraphQL node type. + """ if selection_set is None: return None diff --git a/infrahub_sdk/node/node.py b/infrahub_sdk/node/node.py index 4c94f6aa..9885ed72 100644 --- a/infrahub_sdk/node/node.py +++ b/infrahub_sdk/node/node.py @@ -705,7 +705,12 @@ def __getattr__(self, name: str) -> Attribute | RelationshipManager | RelatedNod raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") def __setattr__(self, name: str, value: Any) -> None: - """Set values for relationship names that exist or revert to normal behaviour""" + """Set values for relationship names that exist or revert to normal behaviour. + + Raises: + SchemaNotFoundError: If a matching relationship schema cannot be found for ``name``. + + """ if "_relationship_cardinality_one_data" in self.__dict__ and name in self._relationship_cardinality_one_data: rel_schemas = [rel_schema for rel_schema in self._schema.relationships if rel_schema.name == name] if not rel_schemas: @@ -1251,6 +1256,9 @@ async def get_pool_allocated_resources(self, resource: InfrahubNode) -> list[Inf Returns: list[InfrahubNode]: The allocated nodes. + Raises: + ValueError: If the node is not a resource pool. + """ if not self.is_resource_pool(): raise ValueError("Allocated resources can only be fetched from resource pool nodes.") @@ -1310,6 +1318,9 @@ async def get_pool_resources_utilization(self) -> list[dict[str, Any]]: Returns: list[dict[str, Any]]: A list containing the allocation numbers for each resource of the pool. + Raises: + ValueError: If the node is not a resource pool. + """ if not self.is_resource_pool(): raise ValueError("Pool utilization can only be fetched for resource pool nodes.") @@ -1359,12 +1370,16 @@ def _get_relationship_one(self, name: str) -> RelatedNode: raise ResourceNotDefinedError(message=f"The node doesn't have a cardinality=one relationship for {name}") async def get_flat_value(self, key: str, separator: str = "__") -> Any: - """Query recursively a value defined in a flat notation (string), on a hierarchy of objects + """Query recursively a value defined in a flat notation (string), on a hierarchy of objects. Examples: name__value module.object.value + Raises: + ValueError: If ``key`` references an unknown attribute or relationship, + or if a referenced relationship is not of cardinality ``ONE``. + """ if separator not in key: return getattr(self, key) @@ -1528,7 +1543,12 @@ def __getattr__(self, name: str) -> Attribute | RelationshipManagerSync | Relate raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") def __setattr__(self, name: str, value: Any) -> None: - """Set values for relationship names that exist or revert to normal behaviour""" + """Set values for relationship names that exist or revert to normal behaviour. + + Raises: + SchemaNotFoundError: If a matching relationship schema cannot be found for ``name``. + + """ if "_relationship_cardinality_one_data" in self.__dict__ and name in self._relationship_cardinality_one_data: rel_schemas = [rel_schema for rel_schema in self._schema.relationships if rel_schema.name == name] if not rel_schemas: @@ -2068,6 +2088,9 @@ def get_pool_allocated_resources(self, resource: InfrahubNodeSync) -> list[Infra Returns: list[InfrahubNodeSync]: The allocated nodes. + Raises: + ValueError: If the node is not a resource pool. + """ if not self.is_resource_pool(): raise ValueError("Allocate resources can only be fetched from resource pool nodes.") @@ -2127,6 +2150,9 @@ def get_pool_resources_utilization(self) -> list[dict[str, Any]]: Returns: list[dict[str, Any]]: A list containing the allocation numbers for each resource of the pool. + Raises: + ValueError: If the node is not a resource pool. + """ if not self.is_resource_pool(): raise ValueError("Pool utilization can only be fetched for resource pool nodes.") @@ -2176,12 +2202,16 @@ def _get_relationship_one(self, name: str) -> RelatedNode | RelatedNodeSync: raise ResourceNotDefinedError(message=f"The node doesn't have a cardinality=one relationship for {name}") def get_flat_value(self, key: str, separator: str = "__") -> Any: - """Query recursively a value defined in a flat notation (string), on a hierarchy of objects + """Query recursively a value defined in a flat notation (string), on a hierarchy of objects. Examples: name__value module.object.value + Raises: + ValueError: If ``key`` references an unknown attribute or relationship, + or if a referenced relationship is not of cardinality ``ONE``. + """ if separator not in key: return getattr(self, key) diff --git a/infrahub_sdk/node/parsers.py b/infrahub_sdk/node/parsers.py index c5d2fbbd..b8790627 100644 --- a/infrahub_sdk/node/parsers.py +++ b/infrahub_sdk/node/parsers.py @@ -4,7 +4,12 @@ def parse_human_friendly_id(hfid: str | list[str]) -> tuple[str | None, list[str]]: - """Parse a human-friendly ID into a kind and an identifier.""" + """Parse a human-friendly ID into a kind and an identifier. + + Raises: + ValueError: If ``hfid`` is neither a string nor a list of strings. + + """ if isinstance(hfid, str): hfid_parts = hfid.split(HFID_STR_SEPARATOR) if len(hfid_parts) == 1: diff --git a/infrahub_sdk/node/relationship.py b/infrahub_sdk/node/relationship.py index 5915ae81..ba5a8736 100644 --- a/infrahub_sdk/node/relationship.py +++ b/infrahub_sdk/node/relationship.py @@ -133,6 +133,9 @@ def __init__( schema (RelationshipSchema): The schema of the relationship. data (Union[Any, dict]): Initial data for the relationships. + Raises: + ValueError: If ``data`` is in an unexpected format. + """ self.client = client self.node = node @@ -198,7 +201,12 @@ async def fetch(self) -> None: pass def add(self, data: str | RelatedNode | dict) -> None: - """Add a new peer to this relationship.""" + """Add a new peer to this relationship. + + Raises: + UninitializedError: If ``fetch()`` has not been called on the manager yet. + + """ if not self.initialized: raise UninitializedError("Must call fetch() on RelationshipManager before editing members") new_node = RelatedNode(schema=self.schema, client=self.client, branch=self.branch, data=data) @@ -256,6 +264,9 @@ def __init__( schema (RelationshipSchema): The schema of the relationship. data (Union[Any, dict]): Initial data for the relationships. + Raises: + ValueError: If ``data`` is in an unexpected format. + """ self.client = client self.node = node @@ -321,7 +332,12 @@ def fetch(self) -> None: pass def add(self, data: str | RelatedNodeSync | dict) -> None: - """Add a new peer to this relationship.""" + """Add a new peer to this relationship. + + Raises: + UninitializedError: If ``fetch()`` has not been called on the manager yet. + + """ if not self.initialized: raise UninitializedError("Must call fetch() on RelationshipManager before editing members") new_node = RelatedNodeSync(schema=self.schema, client=self.client, branch=self.branch, data=data) diff --git a/infrahub_sdk/object_store.py b/infrahub_sdk/object_store.py index d5ed36cc..628c06e9 100644 --- a/infrahub_sdk/object_store.py +++ b/infrahub_sdk/object_store.py @@ -22,7 +22,12 @@ def _extract_content_type(response: httpx.Response) -> str: class ObjectStoreBase: @staticmethod def _validate_text_content(response: httpx.Response, identifier: str) -> str: - """Validate that a file response has a text-based content-type and return the text.""" + """Validate that a file response has a text-based content-type and return the text. + + Raises: + ValueError: If the response content-type is not text-based. + + """ content_type = _extract_content_type(response) if not content_type.startswith("text/") and content_type not in ALLOWED_TEXT_CONTENT_TYPES: raise ValueError( @@ -81,7 +86,14 @@ async def upload(self, content: str, tracker: str | None = None) -> dict[str, st return resp.json() async def _get_file(self, url: str, identifier: str, tracker: str | None = None) -> str: - """Fetch a file endpoint and validate that the response is text-based.""" + """Fetch a file endpoint and validate that the response is text-based. + + Raises: + ServerNotReachableError: If the Infrahub server is not reachable. + AuthenticationError: If the server returns a 401 or 403 response. + HTTPStatusError: For other non-2xx HTTP responses. + + """ headers = copy.copy(self.client.headers or {}) if self.client.insert_tracker and tracker: headers["X-Infrahub-Tracker"] = tracker @@ -169,7 +181,14 @@ def upload(self, content: str, tracker: str | None = None) -> dict[str, str]: return resp.json() def _get_file(self, url: str, identifier: str, tracker: str | None = None) -> str: - """Fetch a file endpoint and validate that the response is text-based.""" + """Fetch a file endpoint and validate that the response is text-based. + + Raises: + ServerNotReachableError: If the Infrahub server is not reachable. + AuthenticationError: If the server returns a 401 or 403 response. + HTTPStatusError: For other non-2xx HTTP responses. + + """ headers = copy.copy(self.client.headers or {}) if self.client.insert_tracker and tracker: headers["X-Infrahub-Tracker"] = tracker diff --git a/infrahub_sdk/pytest_plugin/items/base.py b/infrahub_sdk/pytest_plugin/items/base.py index ae08f036..bb7687d0 100644 --- a/infrahub_sdk/pytest_plugin/items/base.py +++ b/infrahub_sdk/pytest_plugin/items/base.py @@ -36,7 +36,12 @@ def __init__( self.test.spec.update_paths(base_dir=self.path.parent) def validate_resource_config(self) -> None: - """Make sure that a test resource config is properly defined.""" + """Make sure that a test resource config is properly defined. + + Raises: + InvalidResourceConfigError: If the resource config is missing. + + """ if self.resource_config is None: raise InvalidResourceConfigError(self.resource_name) diff --git a/infrahub_sdk/schema/__init__.py b/infrahub_sdk/schema/__init__.py index 43621853..8b58f98b 100644 --- a/infrahub_sdk/schema/__init__.py +++ b/infrahub_sdk/schema/__init__.py @@ -580,6 +580,9 @@ async def get_graphql_schema(self, branch: str | None = None) -> str: Returns: The GraphQL schema as a string. + Raises: + ValueError: If the server returns a non-200 response when fetching the schema. + """ branch = branch or self.client.default_branch url = f"{self.client.address}/schema.graphql?branch={branch}" @@ -656,6 +659,9 @@ def get( Returns: MainSchemaTypes: The schema object. + Raises: + SchemaNotFoundError: If the requested schema kind is not present on the branch. + """ branch = branch or self.client.default_branch @@ -852,6 +858,9 @@ def get_graphql_schema(self, branch: str | None = None) -> str: Returns: The GraphQL schema as a string. + Raises: + ValueError: If the server returns a non-200 response when fetching the schema. + """ branch = branch or self.client.default_branch url = f"{self.client.address}/schema.graphql?branch={branch}" diff --git a/infrahub_sdk/schema/repository.py b/infrahub_sdk/schema/repository.py index cbca6a60..9ecee7ca 100644 --- a/infrahub_sdk/schema/repository.py +++ b/infrahub_sdk/schema/repository.py @@ -177,7 +177,10 @@ def load_fragments(self, relative_path: str = ".") -> list[str]: If file_path is a .gql file, returns a single-element list. If file_path is a directory, returns one entry per .gql file found (sorted alphabetically). - Raises FragmentFileNotFoundError if file_path does not exist. + + Raises: + FragmentFileNotFoundError: If ``file_path`` does not exist. + """ resolved = Path(f"{relative_path}/{self.file_path}") if not resolved.exists(): diff --git a/infrahub_sdk/spec/processors/range_expand_processor.py b/infrahub_sdk/spec/processors/range_expand_processor.py index 53e159e4..f0e4290c 100644 --- a/infrahub_sdk/spec/processors/range_expand_processor.py +++ b/infrahub_sdk/spec/processors/range_expand_processor.py @@ -20,7 +20,14 @@ async def process_data( cls, data: list[dict[str, Any]], ) -> list[dict[str, Any]]: - """Expand any item in data with range pattern in any value. Supports multiple fields, requires equal expansion length.""" + """Expand any item in data with range pattern in any value. + + Supports multiple fields, requires equal expansion length. + + Raises: + ValidationError: If multiple range fields on the same item expand to different lengths. + + """ range_pattern = re.compile(MATCH_PATTERN) expanded = [] for item in data: diff --git a/infrahub_sdk/spec/range_expansion.py b/infrahub_sdk/spec/range_expansion.py index 46c893d2..4f231777 100644 --- a/infrahub_sdk/spec/range_expansion.py +++ b/infrahub_sdk/spec/range_expansion.py @@ -13,7 +13,12 @@ def _unescape_brackets(s: str) -> str: def _char_range_expand(char_range_str: str) -> list[str]: - """Expands a string of numbers or single-character letters.""" + """Expands a string of numbers or single-character letters. + + Raises: + ValueError: If the input contains non-alphanumeric characters that cannot be expanded. + + """ expanded_values: list[str] = [] # Special case: if no dash and no comma, and multiple characters, error if not all alphanumeric if "," not in char_range_str and "-" not in char_range_str and len(char_range_str) > 1: diff --git a/infrahub_sdk/task/manager.py b/infrahub_sdk/task/manager.py index 500299e2..1931f896 100644 --- a/infrahub_sdk/task/manager.py +++ b/infrahub_sdk/task/manager.py @@ -291,7 +291,12 @@ async def process_non_batch( include_logs: bool = False, include_related_nodes: bool = False, ) -> list[Task]: - """Process queries without parallel mode.""" + """Process queries without parallel mode. + + Raises: + ValueError: If the underlying GraphQL query did not return a count. + + """ tasks = [] has_remaining_items = True page_number = 1 @@ -526,7 +531,12 @@ def process_non_batch( include_logs: bool = False, include_related_nodes: bool = False, ) -> list[Task]: - """Process queries without parallel mode.""" + """Process queries without parallel mode. + + Raises: + ValueError: If the underlying GraphQL query did not return a count. + + """ tasks = [] has_remaining_items = True page_number = 1 diff --git a/infrahub_sdk/template/infrahub_filters.py b/infrahub_sdk/template/infrahub_filters.py index 6279177d..c2b8faf1 100644 --- a/infrahub_sdk/template/infrahub_filters.py +++ b/infrahub_sdk/template/infrahub_filters.py @@ -42,7 +42,13 @@ def _require_client(self, filter_name: str) -> InfrahubClient: return self._client async def artifact_content(self, storage_id: str) -> str: - """Retrieve artifact content by storage_id.""" + """Retrieve artifact content by storage_id. + + Raises: + JinjaFilterError: If ``storage_id`` is missing/empty, the request fails authentication, + or content retrieval fails for another reason. + + """ client = self._require_client(filter_name="artifact_content") if storage_id is None: raise JinjaFilterError( @@ -120,7 +126,12 @@ async def file_object_content_by_id(self, node_id: str) -> str: ) async def file_object_content_by_hfid(self, hfid: str | list[str], kind: str = "") -> str: - """Retrieve file object content by Human-Friendly ID.""" + """Retrieve file object content by Human-Friendly ID. + + Raises: + JinjaFilterError: If ``kind`` is missing or ``hfid`` contains empty elements. + + """ client = self._require_client(filter_name="file_object_content_by_hfid") if not kind: raise JinjaFilterError( @@ -144,7 +155,12 @@ async def file_object_content_by_hfid(self, hfid: str | list[str], kind: str = " def from_json(value: str) -> dict | list: - """Parse a JSON string into a Python dict or list.""" + """Parse a JSON string into a Python dict or list. + + Raises: + JinjaFilterError: If ``value`` is not valid JSON. + + """ if not value: return {} try: @@ -154,7 +170,12 @@ def from_json(value: str) -> dict | list: def from_yaml(value: str) -> dict | list: - """Parse a YAML string into a Python dict or list.""" + """Parse a YAML string into a Python dict or list. + + Raises: + JinjaFilterError: If ``value`` is not valid YAML. + + """ if not value: return {} try: diff --git a/infrahub_sdk/utils.py b/infrahub_sdk/utils.py index ab2c7469..ef7c9bb6 100644 --- a/infrahub_sdk/utils.py +++ b/infrahub_sdk/utils.py @@ -135,7 +135,12 @@ def compare_lists(list1: list[Any], list2: list[Any]) -> tuple[list[Any], list[A def deep_merge_dict(dicta: dict, dictb: dict, path: list | None = None) -> dict: """Deep Merge Dictionary B into Dictionary A. + Code is inspired by https://stackoverflow.com/a/7205107 + + Raises: + ValueError: If both dictionaries hold incompatible non-mergeable values for the same key. + """ if path is None: path = [] @@ -160,7 +165,13 @@ def deep_merge_dict(dicta: dict, dictb: dict, path: list | None = None) -> dict: def str_to_bool(value: str | bool | int) -> bool: - """Convert a String to a Boolean""" + """Convert a String to a Boolean. + + Raises: + TypeError: If ``value`` is not a string, boolean, or integer. + ValueError: If ``value`` is a string that doesn't map to a boolean. + + """ if isinstance(value, bool): return value @@ -306,6 +317,10 @@ def write_to_file(path: Path, value: Any) -> bool: """Write a given value into a file and return if the operation was successful. If the file does not exist, the function will attempt to create it. + + Raises: + FileExistsError: If ``path`` exists but is a directory. + """ if not path.exists(): path.touch() diff --git a/pyproject.toml b/pyproject.toml index 1943e09f..654ced5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -360,8 +360,7 @@ ignore = [ "DOC102", # Docstring contains extraneous parameter(s) "DOC201", # `return` is not documented in docstring "DOC402", # `yield` is not documented in docstring - "DOC501", # Raised exception missing from docstring - "DOC502", # Raised exception is not explicitly raised + "DOC502", # Raised exception is not explicitly raised (false positives for transitive raises through helpers) ] @@ -426,6 +425,10 @@ max-complexity = 14 "PLR0912", # Too many branches ] +"infrahub_sdk/ctl/**/*.py" = [ + "DOC501", # CLI commands raise typer.Exit/Abort as control flow; not part of the public API +] + "infrahub_sdk/pytest_plugin/models.py" = [ "S105", # 'PASS' is not a password but a state ] @@ -469,6 +472,7 @@ max-complexity = 14 "tasks.py" = [ "PLC0415", # `import` should be at the top-level of a file + "DOC501", # invoke task helpers raise Exit/ValueError for build-script flow; not part of the public API ] [tool.towncrier] From 6a7ddab04a859939d635ba282d6673d9a00c5f74 Mon Sep 17 00:00:00 2001 From: Alex Gittings Date: Mon, 11 May 2026 09:17:31 +0100 Subject: [PATCH 2/4] feat: add SHA-1 idempotency primitives for CoreFileObject (#963) --------- Co-authored-by: Claude Sonnet 4.6 Co-authored-by: Guillaume Mazoyer --- changelog/+idempotent-file-ops.added.md | 7 + .../sdk_ref/infrahub_sdk/node/node.mdx | 237 ++++++++- infrahub_sdk/file_handler.py | 48 ++ infrahub_sdk/node/__init__.py | 7 +- infrahub_sdk/node/constants.py | 6 + infrahub_sdk/node/node.py | 313 +++++++++++- tests/unit/sdk/test_file_handler.py | 47 +- tests/unit/sdk/test_file_object.py | 463 +++++++++++++++++- 8 files changed, 1115 insertions(+), 13 deletions(-) create mode 100644 changelog/+idempotent-file-ops.added.md diff --git a/changelog/+idempotent-file-ops.added.md b/changelog/+idempotent-file-ops.added.md new file mode 100644 index 00000000..2329358a --- /dev/null +++ b/changelog/+idempotent-file-ops.added.md @@ -0,0 +1,7 @@ +Added SHA-1 idempotency primitives for `CoreFileObject` nodes: + +- `InfrahubNode.matches_local_checksum(source)` / sync variant — compare a local `bytes | Path | BinaryIO` source against the node's server-stored checksum without invoking a transfer. +- `InfrahubNode.upload_if_changed(source, name=None)` / sync variant — stage + save only when the local source differs from the server, returning an `UploadResult(was_uploaded, checksum)` dataclass. +- `download_file(..., skip_if_unchanged=True)` — short-circuit the download when `dest` already exists on disk with a matching SHA-1. Returns `0` bytes written when skipped. + +A shared `sha1_of_source` helper (streaming, 64 KiB chunks) centralises the hashing convention in `infrahub_sdk.file_handler`. diff --git a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/node.mdx b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/node.mdx index e23120dd..c5ca26c2 100644 --- a/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/node.mdx +++ b/docs/docs/python-sdk/sdk_ref/infrahub_sdk/node/node.mdx @@ -40,7 +40,22 @@ artifact_fetch(self, name: str) -> str | dict[str, Any] #### `download_file` ```python -download_file(self, dest: Path | None = None) -> bytes | int +download_file(self, dest: None = None, skip_if_unchanged: bool = ...) -> bytes +``` + +
+Show 2 other overloads + +#### `download_file` + +```python +download_file(self, dest: Path, skip_if_unchanged: bool = ...) -> int +``` + +#### `download_file` + +```python +download_file(self, dest: Path | None = None, skip_if_unchanged: bool = False) -> bytes | int ``` Download the file content from this FileObject node. @@ -54,16 +69,25 @@ The node must have been saved (have an id) before calling this method. directly to this path (memory-efficient for large files) and the number of bytes written will be returned. If not provided, the file content will be returned as bytes. +- `skip_if_unchanged`: When ``True``, compute the SHA-1 of the file at + ``dest`` (which must be provided) and compare against the + node's ``checksum`` attribute. If they match, return ``0`` + without hitting the network. The ``checksum`` is the value + loaded when this node was fetched — a later server-side + change to the file will not be detected unless the caller + re-fetches the node first. **Returns:** - If ``dest`` is None: The file content as bytes. - If ``dest`` is provided: The number of bytes written to the file. +- If ``skip_if_unchanged=True`` and the local file matches the server checksum: ``0``. **Raises:** - `FeatureNotSupportedError`: If this node doesn't inherit from CoreFileObject. -- `ValueError`: If the node hasn't been saved yet or file not found. +- `ValueError`: If the node hasn't been saved yet, file not found, or + ``skip_if_unchanged=True`` was passed without a ``dest``. - `AuthenticationError`: If authentication fails. **Examples:** @@ -73,8 +97,88 @@ The node must have been saved (have an id) before calling this method. >>> content = await contract.download_file() >>> # Stream to file (memory-efficient for large files) >>> bytes_written = await contract.download_file(dest=Path("/tmp/contract.pdf")) +>>> # Skip download if local file already matches server checksum +>>> bytes_written = await contract.download_file( +... dest=Path("/tmp/contract.pdf"), skip_if_unchanged=True +... ) +``` + +
+#### `matches_local_checksum` + +```python +matches_local_checksum(self, source: bytes | Path | BinaryIO) -> bool +``` + +Return True if ``source``'s SHA-1 matches this node's server checksum. + +Only available for nodes inheriting from ``CoreFileObject``. Callers +that want to branch on the comparison without invoking a transfer +should use this primitive instead of reading ``node.checksum.value`` +and hashing ``source`` themselves, so the hashing convention stays +centralised in the SDK. + +The comparison is against the ``checksum`` attribute as loaded +when this node was retrieved from the server. If the server's +file has been replaced since the node was fetched, this method +will not see that change — re-fetch the node to refresh the +checksum before comparing. + +**Args:** + +- `source`: Local content to hash and compare. Accepts the same +shapes as \:func\:`infrahub_sdk.file_handler.sha1_of_source`. + +**Returns:** + +- True if the local digest equals the server's stored checksum. + +**Raises:** + +- `FeatureNotSupportedError`: Node is not a ``CoreFileObject``. +- `ValueError`: Node has no server-side checksum yet (unsaved or +file never attached). + +#### `upload_if_changed` + +```python +upload_if_changed(self, source: bytes | Path | BinaryIO, name: str | None = None) -> UploadResult ``` +Upload ``source`` only if its SHA-1 differs from the server checksum. + +Composes :meth:`matches_local_checksum` with :meth:`upload_from_path` +(or :meth:`upload_from_bytes`) and :meth:`save`. For unsaved nodes or +nodes that have no prior server-side file, the upload is always +performed — there is nothing to compare against. + +Idempotency is content-only: when the local SHA-1 matches the server +checksum the upload is skipped even if ``name`` differs from the +server-side filename. Use a regular :meth:`upload_from_path` / +:meth:`save` round-trip if you need to rename without changing +content. + +**Args:** + +- `source`: Content to upload. ``bytes`` and ``BinaryIO`` sources +must supply ``name``; for a ``Path`` the filename is derived +from ``source.name`` when ``name`` is omitted. +- `name`: Filename to use on the server. Required for ``bytes`` / +``BinaryIO`` sources. + +**Returns:** + +- class:`UploadResult` with ``was_uploaded=False`` (skipped) or +- ``was_uploaded=True`` (transfer occurred), and the resulting server +- checksum (``None`` only when no server checksum was available +- after the operation). + +**Raises:** + +- `FeatureNotSupportedError`: Node is not a ``CoreFileObject``. +- `ValueError`: ``source`` is ``bytes`` or ``BinaryIO`` and no +``name`` was supplied. + #### `delete` ```python @@ -221,7 +325,22 @@ artifact_fetch(self, name: str) -> str | dict[str, Any] #### `download_file` ```python -download_file(self, dest: Path | None = None) -> bytes | int +download_file(self, dest: None = None, skip_if_unchanged: bool = ...) -> bytes +``` + +
+Show 2 other overloads + +#### `download_file` + +```python +download_file(self, dest: Path, skip_if_unchanged: bool = ...) -> int +``` + +#### `download_file` + +```python +download_file(self, dest: Path | None = None, skip_if_unchanged: bool = False) -> bytes | int ``` Download the file content from this FileObject node. @@ -235,16 +354,25 @@ The node must have been saved (have an id) before calling this method. directly to this path (memory-efficient for large files) and the number of bytes written will be returned. If not provided, the file content will be returned as bytes. +- `skip_if_unchanged`: When ``True``, compute the SHA-1 of the file at + ``dest`` (which must be provided) and compare against the + node's ``checksum`` attribute. If they match, return ``0`` + without hitting the network. The ``checksum`` is the value + loaded when this node was fetched — a later server-side + change to the file will not be detected unless the caller + re-fetches the node first. **Returns:** - If ``dest`` is None: The file content as bytes. - If ``dest`` is provided: The number of bytes written to the file. +- If ``skip_if_unchanged=True`` and the local file matches the server checksum: ``0``. **Raises:** - `FeatureNotSupportedError`: If this node doesn't inherit from CoreFileObject. -- `ValueError`: If the node hasn't been saved yet or file not found. +- `ValueError`: If the node hasn't been saved yet, file not found, or + ``skip_if_unchanged=True`` was passed without a ``dest``. - `AuthenticationError`: If authentication fails. **Examples:** @@ -254,8 +382,88 @@ The node must have been saved (have an id) before calling this method. >>> content = contract.download_file() >>> # Stream to file (memory-efficient for large files) >>> bytes_written = contract.download_file(dest=Path("/tmp/contract.pdf")) +>>> # Skip download if local file already matches server checksum +>>> bytes_written = contract.download_file( +... dest=Path("/tmp/contract.pdf"), skip_if_unchanged=True +... ) +``` + +
+#### `matches_local_checksum` + +```python +matches_local_checksum(self, source: bytes | Path | BinaryIO) -> bool ``` +Return True if ``source``'s SHA-1 matches this node's server checksum. + +Only available for nodes inheriting from ``CoreFileObject``. Callers +that want to branch on the comparison without invoking a transfer +should use this primitive instead of reading ``node.checksum.value`` +and hashing ``source`` themselves, so the hashing convention stays +centralised in the SDK. + +The comparison is against the ``checksum`` attribute as loaded +when this node was retrieved from the server. If the server's +file has been replaced since the node was fetched, this method +will not see that change — re-fetch the node to refresh the +checksum before comparing. + +**Args:** + +- `source`: Local content to hash and compare. Accepts the same +shapes as \:func\:`infrahub_sdk.file_handler.sha1_of_source`. + +**Returns:** + +- True if the local digest equals the server's stored checksum. + +**Raises:** + +- `FeatureNotSupportedError`: Node is not a ``CoreFileObject``. +- `ValueError`: Node has no server-side checksum yet (unsaved or +file never attached). + +#### `upload_if_changed` + +```python +upload_if_changed(self, source: bytes | Path | BinaryIO, name: str | None = None) -> UploadResult +``` + +Upload ``source`` only if its SHA-1 differs from the server checksum. + +Composes :meth:`matches_local_checksum` with :meth:`upload_from_path` +(or :meth:`upload_from_bytes`) and :meth:`save`. For unsaved nodes or +nodes that have no prior server-side file, the upload is always +performed — there is nothing to compare against. + +Idempotency is content-only: when the local SHA-1 matches the server +checksum the upload is skipped even if ``name`` differs from the +server-side filename. Use a regular :meth:`upload_from_path` / +:meth:`save` round-trip if you need to rename without changing +content. + +**Args:** + +- `source`: Content to upload. ``bytes`` and ``BinaryIO`` sources +must supply ``name``; for a ``Path`` the filename is derived +from ``source.name`` when ``name`` is omitted. +- `name`: Filename to use on the server. Required for ``bytes`` / +``BinaryIO`` sources. + +**Returns:** + +- class:`UploadResult` with ``was_uploaded=False`` (skipped) or +- ``was_uploaded=True`` (transfer occurred), and the resulting server +- checksum (``None`` only when no server checksum was available +- after the operation). + +**Raises:** + +- `FeatureNotSupportedError`: Node is not a ``CoreFileObject``. +- `ValueError`: ``source`` is ``bytes`` or ``BinaryIO`` and no +``name`` was supplied. + #### `delete` ```python @@ -369,6 +577,27 @@ extract(self, params: dict[str, str]) -> dict[str, Any] Extract some data points defined in a flat notation. +### `UploadResult` + +Outcome of an idempotent upload attempt. + +Returned by :meth:`InfrahubNode.upload_if_changed` and its sync twin. +``was_uploaded`` tells the caller whether a network transfer actually +happened; ``checksum`` carries the SHA-1 of the content held on the +server after the operation — on skip paths that is the server's +pre-existing value, on upload paths it is the locally-computed SHA-1 +used as a proxy (which matches what a standard CoreFileObject server +stores, since the server computes SHA-1 of received bytes). ``None`` +only when no server checksum was available (either the node was +unsaved and nothing was transferred, or the save returned no checksum +value). + +The comparison used by ``upload_if_changed`` reads the node's +``checksum`` attribute, which was populated when the node was +fetched via ``client.get(...)``. A server-side change to the file +between the fetch and the call will not be detected unless the +caller re-fetches the node first. + ### `InfrahubNodeBase` Base class for InfrahubNode and InfrahubNodeSync diff --git a/infrahub_sdk/file_handler.py b/infrahub_sdk/file_handler.py index 779a1bb9..56a7f106 100644 --- a/infrahub_sdk/file_handler.py +++ b/infrahub_sdk/file_handler.py @@ -1,5 +1,6 @@ from __future__ import annotations +import hashlib from dataclasses import dataclass from io import BytesIO from pathlib import Path @@ -13,6 +14,53 @@ if TYPE_CHECKING: from .client import InfrahubClient, InfrahubClientSync +_SHA1_CHUNK_BYTES = 64 * 1024 + + +def sha1_of_source(source: bytes | Path | BinaryIO) -> str: + """Compute the SHA-1 hex digest of an upload/download source. + + Accepts the same shapes as :meth:`FileHandlerBase.prepare_upload` so + callers can compare local content against a server-stored checksum + without materialising the full file in memory. + + Args: + source: The content to hash. ``bytes`` are hashed in one shot. + A ``Path`` is read in 64 KiB chunks. A ``BinaryIO`` is read + from its current position, then rewound so downstream + callers can re-read it. + + Returns: + Lowercase SHA-1 hex digest, matching the algorithm Infrahub + stores in ``CoreFileObject.checksum``. + + Raises: + TypeError: If ``source`` is not one of the supported types. + + """ + hasher = hashlib.sha1(usedforsecurity=False) + + if isinstance(source, bytes): + hasher.update(source) + return hasher.hexdigest() + + if isinstance(source, Path): + with source.open("rb") as fh: + while chunk := fh.read(_SHA1_CHUNK_BYTES): + hasher.update(chunk) + return hasher.hexdigest() + + if hasattr(source, "read") and hasattr(source, "seek"): + start = source.tell() + try: + while chunk := source.read(_SHA1_CHUNK_BYTES): + hasher.update(chunk) + finally: + source.seek(start) + return hasher.hexdigest() + + raise TypeError(f"sha1_of_source expects bytes, Path, or BinaryIO; got {type(source).__name__}") + @dataclass class PreparedFile: diff --git a/infrahub_sdk/node/__init__.py b/infrahub_sdk/node/__init__.py index 2a1c39e5..136100e8 100644 --- a/infrahub_sdk/node/__init__.py +++ b/infrahub_sdk/node/__init__.py @@ -7,11 +7,13 @@ ARTIFACT_GENERATE_FEATURE_NOT_SUPPORTED_MESSAGE, HFID_STR_SEPARATOR, IP_TYPES, + MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE, PROPERTIES_FLAG, PROPERTIES_OBJECT, SAFE_VALUE, + UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE, ) -from .node import InfrahubNode, InfrahubNodeBase, InfrahubNodeSync +from .node import InfrahubNode, InfrahubNodeBase, InfrahubNodeSync, UploadResult from .parsers import parse_human_friendly_id from .property import NodeProperty from .related_node import RelatedNode, RelatedNodeBase, RelatedNodeSync @@ -23,9 +25,11 @@ "ARTIFACT_GENERATE_FEATURE_NOT_SUPPORTED_MESSAGE", "HFID_STR_SEPARATOR", "IP_TYPES", + "MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE", "PROPERTIES_FLAG", "PROPERTIES_OBJECT", "SAFE_VALUE", + "UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE", "Attribute", "InfrahubNode", "InfrahubNodeBase", @@ -37,5 +41,6 @@ "RelationshipManager", "RelationshipManagerBase", "RelationshipManagerSync", + "UploadResult", "parse_human_friendly_id", ] diff --git a/infrahub_sdk/node/constants.py b/infrahub_sdk/node/constants.py index 7a0bc6fd..6a56584e 100644 --- a/infrahub_sdk/node/constants.py +++ b/infrahub_sdk/node/constants.py @@ -30,6 +30,12 @@ FILE_DOWNLOAD_FEATURE_NOT_SUPPORTED_MESSAGE = ( "calling download_file is only supported for nodes that inherit from CoreFileObject" ) +MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE = ( + "calling matches_local_checksum is only supported for nodes that inherit from CoreFileObject" +) +UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE = ( + "calling upload_if_changed is only supported for nodes that inherit from CoreFileObject" +) HIERARCHY_FETCH_FEATURE_NOT_SUPPORTED_MESSAGE = "Hierarchical fields are not supported for this node." diff --git a/infrahub_sdk/node/node.py b/infrahub_sdk/node/node.py index 4c94f6aa..5dea69ee 100644 --- a/infrahub_sdk/node/node.py +++ b/infrahub_sdk/node/node.py @@ -2,12 +2,13 @@ from collections.abc import Iterable from copy import copy, deepcopy +from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Any, BinaryIO +from typing import TYPE_CHECKING, Any, BinaryIO, overload from ..constants import InfrahubClientMode from ..exceptions import FeatureNotSupportedError, NodeNotFoundError, ResourceNotDefinedError, SchemaNotFoundError -from ..file_handler import FileHandler, FileHandlerBase, FileHandlerSync, PreparedFile +from ..file_handler import FileHandler, FileHandlerBase, FileHandlerSync, PreparedFile, sha1_of_source from ..graphql import Mutation, Query from ..schema import ( GenericSchemaAPI, @@ -22,7 +23,9 @@ ARTIFACT_FETCH_FEATURE_NOT_SUPPORTED_MESSAGE, ARTIFACT_GENERATE_FEATURE_NOT_SUPPORTED_MESSAGE, FILE_DOWNLOAD_FEATURE_NOT_SUPPORTED_MESSAGE, + MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE, PROPERTIES_OBJECT, + UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE, ) from .metadata import NodeMetadata from .related_node import RelatedNode, RelatedNodeBase, RelatedNodeSync @@ -37,6 +40,32 @@ from ..types import Order +@dataclass(frozen=True) +class UploadResult: + """Outcome of an idempotent upload attempt. + + Returned by :meth:`InfrahubNode.upload_if_changed` and its sync twin. + ``was_uploaded`` tells the caller whether a network transfer actually + happened; ``checksum`` carries the SHA-1 of the content held on the + server after the operation — on skip paths that is the server's + pre-existing value, on upload paths it is the locally-computed SHA-1 + used as a proxy (which matches what a standard CoreFileObject server + stores, since the server computes SHA-1 of received bytes). ``None`` + only when no server checksum was available (either the node was + unsaved and nothing was transferred, or the save returned no checksum + value). + + The comparison used by ``upload_if_changed`` reads the node's + ``checksum`` attribute, which was populated when the node was + fetched via ``client.get(...)``. A server-side change to the file + between the fetch and the call will not be detected unless the + caller re-fetches the node first. + """ + + was_uploaded: bool + checksum: str | None + + class InfrahubNodeBase: """Base class for InfrahubNode and InfrahubNodeSync""" @@ -742,7 +771,17 @@ async def artifact_fetch(self, name: str) -> str | dict[str, Any]: artifact = await self._client.get(kind="CoreArtifact", name__value=name, object__ids=[self.id]) return await self._client.object_store.get(identifier=artifact._get_attribute(name="storage_id").value) - async def download_file(self, dest: Path | None = None) -> bytes | int: + @overload + async def download_file(self, dest: None = None, skip_if_unchanged: bool = ...) -> bytes: ... + + @overload + async def download_file(self, dest: Path, skip_if_unchanged: bool = ...) -> int: ... + + async def download_file( + self, + dest: Path | None = None, + skip_if_unchanged: bool = False, + ) -> bytes | int: """Download the file content from this FileObject node. This method is only available for nodes that inherit from CoreFileObject. @@ -753,14 +792,23 @@ async def download_file(self, dest: Path | None = None) -> bytes | int: directly to this path (memory-efficient for large files) and the number of bytes written will be returned. If not provided, the file content will be returned as bytes. + skip_if_unchanged: When ``True``, compute the SHA-1 of the file at + ``dest`` (which must be provided) and compare against the + node's ``checksum`` attribute. If they match, return ``0`` + without hitting the network. The ``checksum`` is the value + loaded when this node was fetched — a later server-side + change to the file will not be detected unless the caller + re-fetches the node first. Returns: If ``dest`` is None: The file content as bytes. If ``dest`` is provided: The number of bytes written to the file. + If ``skip_if_unchanged=True`` and the local file matches the server checksum: ``0``. Raises: FeatureNotSupportedError: If this node doesn't inherit from CoreFileObject. - ValueError: If the node hasn't been saved yet or file not found. + ValueError: If the node hasn't been saved yet, file not found, or + ``skip_if_unchanged=True`` was passed without a ``dest``. AuthenticationError: If authentication fails. Examples: @@ -770,14 +818,131 @@ async def download_file(self, dest: Path | None = None) -> bytes | int: >>> # Stream to file (memory-efficient for large files) >>> bytes_written = await contract.download_file(dest=Path("/tmp/contract.pdf")) + >>> # Skip download if local file already matches server checksum + >>> bytes_written = await contract.download_file( + ... dest=Path("/tmp/contract.pdf"), skip_if_unchanged=True + ... ) + """ self._validate_file_object_support(message=FILE_DOWNLOAD_FEATURE_NOT_SUPPORTED_MESSAGE) if not self.id: raise ValueError("Cannot download file for a node that hasn't been saved yet.") + if skip_if_unchanged: + if dest is None: + raise ValueError("skip_if_unchanged requires dest to be provided") + if dest.exists() and dest.is_file(): + server_checksum = self.checksum # type: ignore[attr-defined] + if server_checksum.value is not None and sha1_of_source(dest) == server_checksum.value: # type: ignore[union-attr] + return 0 + return await self._file_handler.download(node_id=self.id, branch=self._branch, dest=dest) + async def matches_local_checksum(self, source: bytes | Path | BinaryIO) -> bool: + """Return True if ``source``'s SHA-1 matches this node's server checksum. + + Only available for nodes inheriting from ``CoreFileObject``. Callers + that want to branch on the comparison without invoking a transfer + should use this primitive instead of reading ``node.checksum.value`` + and hashing ``source`` themselves, so the hashing convention stays + centralised in the SDK. + + The comparison is against the ``checksum`` attribute as loaded + when this node was retrieved from the server. If the server's + file has been replaced since the node was fetched, this method + will not see that change — re-fetch the node to refresh the + checksum before comparing. + + Args: + source: Local content to hash and compare. Accepts the same + shapes as :func:`infrahub_sdk.file_handler.sha1_of_source`. + + Returns: + True if the local digest equals the server's stored checksum. + + Raises: + FeatureNotSupportedError: Node is not a ``CoreFileObject``. + ValueError: Node has no server-side checksum yet (unsaved or + file never attached). + + """ + self._validate_file_object_support(message=MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE) + + server_checksum = self.checksum # type: ignore[attr-defined] + if server_checksum.value is None: # type: ignore[union-attr] + raise ValueError( + f"{self._schema.kind} node has no server-side checksum; " + "ensure the node has been saved with file content attached before comparing." + ) + + return sha1_of_source(source) == server_checksum.value # type: ignore[union-attr] + + async def upload_if_changed( + self, + source: bytes | Path | BinaryIO, + name: str | None = None, + ) -> UploadResult: + """Upload ``source`` only if its SHA-1 differs from the server checksum. + + Composes :meth:`matches_local_checksum` with :meth:`upload_from_path` + (or :meth:`upload_from_bytes`) and :meth:`save`. For unsaved nodes or + nodes that have no prior server-side file, the upload is always + performed — there is nothing to compare against. + + Idempotency is content-only: when the local SHA-1 matches the server + checksum the upload is skipped even if ``name`` differs from the + server-side filename. Use a regular :meth:`upload_from_path` / + :meth:`save` round-trip if you need to rename without changing + content. + + Args: + source: Content to upload. ``bytes`` and ``BinaryIO`` sources + must supply ``name``; for a ``Path`` the filename is derived + from ``source.name`` when ``name`` is omitted. + name: Filename to use on the server. Required for ``bytes`` / + ``BinaryIO`` sources. + + Returns: + :class:`UploadResult` with ``was_uploaded=False`` (skipped) or + ``was_uploaded=True`` (transfer occurred), and the resulting server + checksum (``None`` only when no server checksum was available + after the operation). + + Raises: + FeatureNotSupportedError: Node is not a ``CoreFileObject``. + ValueError: ``source`` is ``bytes`` or ``BinaryIO`` and no + ``name`` was supplied. + + """ + self._validate_file_object_support(message=UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE) + + resolved_name: str | None = name + if resolved_name is None and isinstance(source, Path): + resolved_name = source.name + if resolved_name is None: + raise ValueError("name is required when source is bytes or BinaryIO") + + # Short-circuit only if we have a server checksum to compare against. + server_checksum = self.checksum # type: ignore[attr-defined] + have_server_state = bool(self.id) and server_checksum.value is not None # type: ignore[union-attr] + + # Compute digest before staging — source may only be readable once. + local_digest = sha1_of_source(source) + + if have_server_state and local_digest == server_checksum.value: # type: ignore[union-attr] + return UploadResult(was_uploaded=False, checksum=server_checksum.value) # type: ignore[union-attr] + + # Either no server state, or checksum mismatched — stage + save. + if isinstance(source, Path): + self.upload_from_path(path=source) + else: + self.upload_from_bytes(content=source, name=resolved_name) + + await self.save() + + return UploadResult(was_uploaded=True, checksum=local_digest) + async def delete(self, timeout: int | None = None, request_context: RequestContext | None = None) -> None: input_data = {"data": {"id": self.id}} if context_data := self._get_request_context(request_context=request_context): @@ -1562,7 +1727,17 @@ def artifact_fetch(self, name: str) -> str | dict[str, Any]: artifact = self._client.get(kind="CoreArtifact", name__value=name, object__ids=[self.id]) return self._client.object_store.get(identifier=artifact._get_attribute(name="storage_id").value) - def download_file(self, dest: Path | None = None) -> bytes | int: + @overload + def download_file(self, dest: None = None, skip_if_unchanged: bool = ...) -> bytes: ... + + @overload + def download_file(self, dest: Path, skip_if_unchanged: bool = ...) -> int: ... + + def download_file( + self, + dest: Path | None = None, + skip_if_unchanged: bool = False, + ) -> bytes | int: """Download the file content from this FileObject node. This method is only available for nodes that inherit from CoreFileObject. @@ -1573,14 +1748,23 @@ def download_file(self, dest: Path | None = None) -> bytes | int: directly to this path (memory-efficient for large files) and the number of bytes written will be returned. If not provided, the file content will be returned as bytes. + skip_if_unchanged: When ``True``, compute the SHA-1 of the file at + ``dest`` (which must be provided) and compare against the + node's ``checksum`` attribute. If they match, return ``0`` + without hitting the network. The ``checksum`` is the value + loaded when this node was fetched — a later server-side + change to the file will not be detected unless the caller + re-fetches the node first. Returns: If ``dest`` is None: The file content as bytes. If ``dest`` is provided: The number of bytes written to the file. + If ``skip_if_unchanged=True`` and the local file matches the server checksum: ``0``. Raises: FeatureNotSupportedError: If this node doesn't inherit from CoreFileObject. - ValueError: If the node hasn't been saved yet or file not found. + ValueError: If the node hasn't been saved yet, file not found, or + ``skip_if_unchanged=True`` was passed without a ``dest``. AuthenticationError: If authentication fails. Examples: @@ -1590,14 +1774,131 @@ def download_file(self, dest: Path | None = None) -> bytes | int: >>> # Stream to file (memory-efficient for large files) >>> bytes_written = contract.download_file(dest=Path("/tmp/contract.pdf")) + >>> # Skip download if local file already matches server checksum + >>> bytes_written = contract.download_file( + ... dest=Path("/tmp/contract.pdf"), skip_if_unchanged=True + ... ) + """ self._validate_file_object_support(message=FILE_DOWNLOAD_FEATURE_NOT_SUPPORTED_MESSAGE) if not self.id: raise ValueError("Cannot download file for a node that hasn't been saved yet.") + if skip_if_unchanged: + if dest is None: + raise ValueError("skip_if_unchanged requires dest to be provided") + if dest.exists() and dest.is_file(): + server_checksum = self.checksum # type: ignore[attr-defined] + if server_checksum.value is not None and sha1_of_source(dest) == server_checksum.value: # type: ignore[union-attr] + return 0 + return self._file_handler.download(node_id=self.id, branch=self._branch, dest=dest) + def matches_local_checksum(self, source: bytes | Path | BinaryIO) -> bool: + """Return True if ``source``'s SHA-1 matches this node's server checksum. + + Only available for nodes inheriting from ``CoreFileObject``. Callers + that want to branch on the comparison without invoking a transfer + should use this primitive instead of reading ``node.checksum.value`` + and hashing ``source`` themselves, so the hashing convention stays + centralised in the SDK. + + The comparison is against the ``checksum`` attribute as loaded + when this node was retrieved from the server. If the server's + file has been replaced since the node was fetched, this method + will not see that change — re-fetch the node to refresh the + checksum before comparing. + + Args: + source: Local content to hash and compare. Accepts the same + shapes as :func:`infrahub_sdk.file_handler.sha1_of_source`. + + Returns: + True if the local digest equals the server's stored checksum. + + Raises: + FeatureNotSupportedError: Node is not a ``CoreFileObject``. + ValueError: Node has no server-side checksum yet (unsaved or + file never attached). + + """ + self._validate_file_object_support(message=MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE) + + server_checksum = self.checksum # type: ignore[attr-defined] + if server_checksum.value is None: # type: ignore[union-attr] + raise ValueError( + f"{self._schema.kind} node has no server-side checksum; " + "ensure the node has been saved with file content attached before comparing." + ) + + return sha1_of_source(source) == server_checksum.value # type: ignore[union-attr] + + def upload_if_changed( + self, + source: bytes | Path | BinaryIO, + name: str | None = None, + ) -> UploadResult: + """Upload ``source`` only if its SHA-1 differs from the server checksum. + + Composes :meth:`matches_local_checksum` with :meth:`upload_from_path` + (or :meth:`upload_from_bytes`) and :meth:`save`. For unsaved nodes or + nodes that have no prior server-side file, the upload is always + performed — there is nothing to compare against. + + Idempotency is content-only: when the local SHA-1 matches the server + checksum the upload is skipped even if ``name`` differs from the + server-side filename. Use a regular :meth:`upload_from_path` / + :meth:`save` round-trip if you need to rename without changing + content. + + Args: + source: Content to upload. ``bytes`` and ``BinaryIO`` sources + must supply ``name``; for a ``Path`` the filename is derived + from ``source.name`` when ``name`` is omitted. + name: Filename to use on the server. Required for ``bytes`` / + ``BinaryIO`` sources. + + Returns: + :class:`UploadResult` with ``was_uploaded=False`` (skipped) or + ``was_uploaded=True`` (transfer occurred), and the resulting server + checksum (``None`` only when no server checksum was available + after the operation). + + Raises: + FeatureNotSupportedError: Node is not a ``CoreFileObject``. + ValueError: ``source`` is ``bytes`` or ``BinaryIO`` and no + ``name`` was supplied. + + """ + self._validate_file_object_support(message=UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE) + + resolved_name: str | None = name + if resolved_name is None and isinstance(source, Path): + resolved_name = source.name + if resolved_name is None: + raise ValueError("name is required when source is bytes or BinaryIO") + + # Short-circuit only if we have a server checksum to compare against. + server_checksum = self.checksum # type: ignore[attr-defined] + have_server_state = bool(self.id) and server_checksum.value is not None # type: ignore[union-attr] + + # Compute digest before staging — source may only be readable once. + local_digest = sha1_of_source(source) + + if have_server_state and local_digest == server_checksum.value: # type: ignore[union-attr] + return UploadResult(was_uploaded=False, checksum=server_checksum.value) # type: ignore[union-attr] + + # Either no server state, or checksum mismatched — stage + save. + if isinstance(source, Path): + self.upload_from_path(path=source) + else: + self.upload_from_bytes(content=source, name=resolved_name) + + self.save() + + return UploadResult(was_uploaded=True, checksum=local_digest) + def delete(self, timeout: int | None = None, request_context: RequestContext | None = None) -> None: input_data = {"data": {"id": self.id}} if context_data := self._get_request_context(request_context=request_context): diff --git a/tests/unit/sdk/test_file_handler.py b/tests/unit/sdk/test_file_handler.py index ae59c842..f8ffacfa 100644 --- a/tests/unit/sdk/test_file_handler.py +++ b/tests/unit/sdk/test_file_handler.py @@ -1,5 +1,6 @@ from __future__ import annotations +import hashlib import tempfile from io import BytesIO from pathlib import Path @@ -10,7 +11,7 @@ import pytest from infrahub_sdk.exceptions import AuthenticationError, NodeNotFoundError -from infrahub_sdk.file_handler import FileHandler, FileHandlerBase, FileHandlerSync, PreparedFile +from infrahub_sdk.file_handler import FileHandler, FileHandlerBase, FileHandlerSync, PreparedFile, sha1_of_source if TYPE_CHECKING: from pytest_httpx import HTTPXMock @@ -303,3 +304,47 @@ async def test_file_handler_build_url_without_branch(client_type: str, clients: url = handler._build_url(node_id="node-456", branch=None) assert url == "http://mock/api/storage/files/node-456" + + +KNOWN_CONTENT = b"hello infrahub" +KNOWN_SHA1 = hashlib.sha1(KNOWN_CONTENT, usedforsecurity=False).hexdigest() + + +class TestSha1OfSource: + def test_bytes_matches_known_digest(self) -> None: + assert sha1_of_source(KNOWN_CONTENT) == KNOWN_SHA1 + + def test_path_matches_known_digest(self, tmp_path: Path) -> None: + target = tmp_path / "sample.bin" + target.write_bytes(KNOWN_CONTENT) + assert sha1_of_source(target) == KNOWN_SHA1 + + def test_binaryio_matches_known_digest(self) -> None: + stream = BytesIO(KNOWN_CONTENT) + assert sha1_of_source(stream) == KNOWN_SHA1 + + def test_binaryio_resets_position(self) -> None: + stream = BytesIO(KNOWN_CONTENT) + sha1_of_source(stream) + # Hashing must not consume the stream — later callers (upload_from_bytes) + # still need to read it. + assert stream.read() == KNOWN_CONTENT + + def test_large_file_streams_without_full_read(self, tmp_path: Path) -> None: + # 2 MiB — bigger than the 64 KiB chunk to exercise the streaming loop. + payload = b"x" * (2 * 1024 * 1024) + target = tmp_path / "big.bin" + target.write_bytes(payload) + expected = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + assert sha1_of_source(target) == expected + + def test_rejects_none(self) -> None: + with pytest.raises(TypeError): + sha1_of_source(None) # type: ignore[arg-type] + + def test_binaryio_resets_to_original_position_not_start(self) -> None: + stream = BytesIO(b"prefixhello") + stream.read(6) # advance to position 6, so only b"hello" remains + digest = sha1_of_source(stream) + assert digest == hashlib.sha1(b"hello", usedforsecurity=False).hexdigest() + assert stream.tell() == 6 # rewound to the original non-zero position, not 0 diff --git a/tests/unit/sdk/test_file_object.py b/tests/unit/sdk/test_file_object.py index f6267003..b64d9f09 100644 --- a/tests/unit/sdk/test_file_object.py +++ b/tests/unit/sdk/test_file_object.py @@ -1,3 +1,4 @@ +import hashlib import tempfile from pathlib import Path @@ -6,7 +7,7 @@ from pytest_httpx import HTTPXMock from infrahub_sdk.exceptions import FeatureNotSupportedError -from infrahub_sdk.node import InfrahubNode, InfrahubNodeSync +from infrahub_sdk.node import InfrahubNode, InfrahubNodeSync, UploadResult from infrahub_sdk.schema import NodeSchemaAPI from tests.unit.sdk.conftest import BothClients @@ -293,3 +294,463 @@ async def test_node_download_file_unsaved_node_raises( node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") with pytest.raises(ValueError, match=r"Cannot download file for a node that hasn't been saved yet"): node.download_file() + + +class TestUploadResult: + def test_carries_was_uploaded_and_checksum(self) -> None: + result = UploadResult(was_uploaded=True, checksum="abc123") + assert result.was_uploaded is True + assert result.checksum == "abc123" + + def test_checksum_optional(self) -> None: + result = UploadResult(was_uploaded=False, checksum=None) + assert result.checksum is None + + +@pytest.mark.parametrize("client_type", client_types) +class TestMatchesLocalChecksum: + async def test_bytes_match(self, client_type: str, clients: BothClients, file_object_schema: NodeSchemaAPI) -> None: + payload = b"matching content" + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "node-1" + node.checksum.value = digest # type: ignore[attr-defined] + + if isinstance(node, InfrahubNode): + assert await node.matches_local_checksum(payload) is True + else: + assert node.matches_local_checksum(payload) is True + + async def test_bytes_differ( + self, client_type: str, clients: BothClients, file_object_schema: NodeSchemaAPI + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "node-1" + node.checksum.value = "different-digest" # type: ignore[attr-defined] + + if isinstance(node, InfrahubNode): + assert await node.matches_local_checksum(b"hello world") is False + else: + assert node.matches_local_checksum(b"hello world") is False + + async def test_path_source( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + ) -> None: + payload = b"file on disk" + target = tmp_path / "f.bin" + target.write_bytes(payload) + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "node-1" + node.checksum.value = digest # type: ignore[attr-defined] + + if isinstance(node, InfrahubNode): + assert await node.matches_local_checksum(target) is True + else: + assert node.matches_local_checksum(target) is True + + async def test_raises_for_non_file_object( + self, client_type: str, clients: BothClients, non_file_object_schema: NodeSchemaAPI + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=non_file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=non_file_object_schema, branch="main") + + if isinstance(node, InfrahubNode): + with pytest.raises( + FeatureNotSupportedError, + match=r"calling matches_local_checksum is only supported", + ): + await node.matches_local_checksum(b"anything") + else: + with pytest.raises( + FeatureNotSupportedError, + match=r"calling matches_local_checksum is only supported", + ): + node.matches_local_checksum(b"anything") + + async def test_raises_when_no_server_checksum( + self, client_type: str, clients: BothClients, file_object_schema: NodeSchemaAPI + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "node-1" + # Do NOT set node.checksum.value — default is None. + + if isinstance(node, InfrahubNode): + with pytest.raises(ValueError, match=r"has no server-side checksum"): + await node.matches_local_checksum(b"anything") + else: + with pytest.raises(ValueError, match=r"has no server-side checksum"): + node.matches_local_checksum(b"anything") + + +@pytest.mark.parametrize("client_type", client_types) +class TestUploadIfChanged: + async def test_skips_when_checksum_matches( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + httpx_mock: HTTPXMock, + ) -> None: + payload = b"unchanged content" + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "already-on-server" + node._existing = True + node.checksum.value = digest # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + result = await node.upload_if_changed(source=payload, name="f.bin") + else: + result = node.upload_if_changed(source=payload, name="f.bin") + + assert isinstance(result, UploadResult) + assert result.was_uploaded is False + assert result.checksum == digest + # No HTTP request should have been issued. + assert httpx_mock.get_requests() == [] + + async def test_uploads_when_checksum_differs( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + mock_node_update_with_file: HTTPXMock, + ) -> None: + new_content = b"new content" + expected_digest = hashlib.sha1(new_content, usedforsecurity=False).hexdigest() + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "existing-file-node-456" + node._existing = True + node.checksum.value = "old-server-digest" # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + result = await node.upload_if_changed(source=new_content, name="f.bin") + else: + result = node.upload_if_changed(source=new_content, name="f.bin") + + assert result.was_uploaded is True + # Post-save checksum is the locally computed SHA-1 of the uploaded content. + assert result.checksum == expected_digest + # Positive-path HTTP verification: the update mutation must have been dispatched as a multipart request. + requests = mock_node_update_with_file.get_requests() + assert len(requests) == 1 + assert requests[0].headers.get("x-infrahub-tracker") == "mutation-networkcircuitcontract-update" + assert requests[0].headers.get("content-type").startswith("multipart/form-data;") + assert b'filename="f.bin"' in requests[0].content + + async def test_uploads_when_node_unsaved( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + mock_node_create_with_file: HTTPXMock, + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + # Do NOT set node.id — unsaved. + + if isinstance(node, InfrahubNode): + result = await node.upload_if_changed(source=b"initial content", name=FILE_NAME) + else: + result = node.upload_if_changed(source=b"initial content", name=FILE_NAME) + + assert result.was_uploaded is True + assert result.checksum is not None + # Positive-path HTTP verification: the create mutation must have been dispatched as a multipart request. + requests = mock_node_create_with_file.get_requests() + assert len(requests) == 1 + assert requests[0].headers.get("x-infrahub-tracker") == "mutation-networkcircuitcontract-create" + assert requests[0].headers.get("content-type").startswith("multipart/form-data;") + assert f'filename="{FILE_NAME}"'.encode() in requests[0].content + + async def test_derives_name_from_path( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + mock_node_update_with_file: HTTPXMock, + tmp_path: Path, + ) -> None: + target = tmp_path / "derived-name.bin" + target.write_bytes(b"content") + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "existing-file-node-456" + node._existing = True + node.checksum.value = "old-server-digest" # type: ignore[attr-defined, union-attr] + + # No explicit name — should derive from target.name internally. + if isinstance(node, InfrahubNode): + result = await node.upload_if_changed(source=target) + else: + result = node.upload_if_changed(source=target) + + assert result.was_uploaded is True + + async def test_requires_name_for_bytes( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "some-id" + node.checksum.value = "x" # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + with pytest.raises(ValueError, match=r"name is required"): + await node.upload_if_changed(source=b"bytes content") # no name supplied + else: + with pytest.raises(ValueError, match=r"name is required"): + node.upload_if_changed(source=b"bytes content") # no name supplied + + async def test_raises_for_non_file_object( + self, + client_type: str, + clients: BothClients, + non_file_object_schema: NodeSchemaAPI, + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=non_file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=non_file_object_schema, branch="main") + + if isinstance(node, InfrahubNode): + with pytest.raises( + FeatureNotSupportedError, + match=r"calling upload_if_changed is only supported", + ): + await node.upload_if_changed(source=b"x", name="f.bin") + else: + with pytest.raises( + FeatureNotSupportedError, + match=r"calling upload_if_changed is only supported", + ): + node.upload_if_changed(source=b"x", name="f.bin") + + +@pytest.mark.parametrize("client_type", client_types) +class TestDownloadSkipIfUnchanged: + async def test_skip_when_local_matches( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + httpx_mock: HTTPXMock, + ) -> None: + payload = b"identical content" + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + dest = tmp_path / "local.bin" + dest.write_bytes(payload) + + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-skip" + node.checksum.value = digest # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + bytes_written = await node.download_file(dest=dest, skip_if_unchanged=True) + else: + bytes_written = node.download_file(dest=dest, skip_if_unchanged=True) + + assert bytes_written == 0 + # pytest-httpx raises if any unregistered request is attempted; this also asserts + # that zero requests were made at all. + assert httpx_mock.get_requests() == [] + + async def test_downloads_when_local_differs( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + mock_download_file_to_disk: HTTPXMock, # existing fixture + ) -> None: + dest = tmp_path / "local.bin" + dest.write_bytes(b"stale content") # different from FILE_CONTENT + + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-stream" # id matches mock_download_file_to_disk + node.checksum.value = "server-digest-different-from-local" # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + bytes_written = await node.download_file(dest=dest, skip_if_unchanged=True) + else: + bytes_written = node.download_file(dest=dest, skip_if_unchanged=True) + + assert bytes_written == len(FILE_CONTENT) + assert dest.read_bytes() == FILE_CONTENT + # Positive-path HTTP verification: the GET to the storage endpoint must have fired. + download_requests = [ + r + for r in mock_download_file_to_disk.get_requests() + if r.method == "GET" and "/api/storage/files/" in r.url.path + ] + assert len(download_requests) == 1 + + async def test_downloads_when_dest_missing( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + mock_download_file_to_disk: HTTPXMock, + ) -> None: + dest = tmp_path / "missing.bin" # does not exist + assert not dest.exists() + + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-stream" + node.checksum.value = "any-digest" # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + bytes_written = await node.download_file(dest=dest, skip_if_unchanged=True) + else: + bytes_written = node.download_file(dest=dest, skip_if_unchanged=True) + + assert bytes_written == len(FILE_CONTENT) + assert dest.exists() + + async def test_raises_when_skip_without_dest( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-1" + node.checksum.value = "any-digest" # type: ignore[attr-defined, union-attr] + + with pytest.raises(ValueError, match=r"skip_if_unchanged requires dest"): + if isinstance(node, InfrahubNode): + await node.download_file(dest=None, skip_if_unchanged=True) + else: + node.download_file(dest=None, skip_if_unchanged=True) + + async def test_default_behavior_unchanged( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + mock_download_file: HTTPXMock, # existing fixture for in-memory download + ) -> None: + # skip_if_unchanged defaults to False — download always occurs. + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-123" # matches mock_download_file + + if isinstance(node, InfrahubNode): + content = await node.download_file() # no flag + else: + content = node.download_file() # no flag + + assert isinstance(content, bytes) + assert content == FILE_CONTENT + + async def test_skip_raises_for_unsaved_node( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + ) -> None: + # Unsaved node (no id) with a dest whose checksum happens to match + # the node's checksum attribute should still raise the unsaved-node + # ValueError, not silently return 0. + payload = b"content" + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + dest = tmp_path / "local.bin" + dest.write_bytes(payload) + + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + # Do NOT set node.id — unsaved. + node.checksum.value = digest # type: ignore[attr-defined, union-attr] + + with pytest.raises(ValueError, match=r"hasn't been saved yet"): + if isinstance(node, InfrahubNode): + await node.download_file(dest=dest, skip_if_unchanged=True) + else: + node.download_file(dest=dest, skip_if_unchanged=True) From 5f4f656964e80d8989bea832fbed277a5c6d6f2b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 08:49:54 -0700 Subject: [PATCH 3/4] chore(deps-dev): bump infrahub-testcontainers from 1.9.2 to 1.9.3 (#1012) Bumps [infrahub-testcontainers](https://github.com/opsmill/infrahub) from 1.9.2 to 1.9.3. - [Release notes](https://github.com/opsmill/infrahub/releases) - [Changelog](https://github.com/opsmill/infrahub/blob/stable/CHANGELOG.md) - [Commits](https://github.com/opsmill/infrahub/compare/infrahub-v1.9.2...infrahub-v1.9.3) --- updated-dependencies: - dependency-name: infrahub-testcontainers dependency-version: 1.9.3 dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index 673a4e2c..1446e0a0 100644 --- a/uv.lock +++ b/uv.lock @@ -878,7 +878,7 @@ types = [ [[package]] name = "infrahub-testcontainers" -version = "1.9.2" +version = "1.9.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -888,9 +888,9 @@ dependencies = [ { name = "pytest" }, { name = "testcontainers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a2/b8/c04007377ec72e497935b0a947efff90ed3fd65fedb5d43ea5f5a8a6014d/infrahub_testcontainers-1.9.2.tar.gz", hash = "sha256:77beccfb5818b58ae4bc4c33271bd61e0ba122b204b236c3a0d9e58d32ea44e2", size = 17363, upload-time = "2026-04-30T15:17:43.049Z" } +sdist = { url = "https://files.pythonhosted.org/packages/78/d7/e94e83dbed6db2a8f3aa1c2edc6daef18c134603a5a7ea85f2df2fc19160/infrahub_testcontainers-1.9.3.tar.gz", hash = "sha256:aefbe23de210df9a2f30a2b1324219261e4c657ed204691f4a62b5b8650b3db9", size = 17362, upload-time = "2026-05-05T10:01:29.546Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/35/50/4c70ad3fa2f423332e694f00900d7a3e951872aacf9a8bdd1ade3ae7ca9e/infrahub_testcontainers-1.9.2-py3-none-any.whl", hash = "sha256:dfae768d7b67cf27c451cf2e49feeef7515db301a71310d8fc01b998487f3bbd", size = 23189, upload-time = "2026-04-30T15:17:42.075Z" }, + { url = "https://files.pythonhosted.org/packages/59/cd/5d98a82b31dfba2b115a7054055e48dfb76fffed1f95f3c1f8921c98bf7a/infrahub_testcontainers-1.9.3-py3-none-any.whl", hash = "sha256:ba56a1dcdd205b238741c6f18e152d91216341f2a54c3a7ee8e924c4db20cdaa", size = 23192, upload-time = "2026-05-05T10:01:30.626Z" }, ] [[package]] From 012bec5b5e4c7d4678ca4a6fe7772b286e5b7c99 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 11:51:36 -0700 Subject: [PATCH 4/4] chore(deps): bump urllib3 from 2.6.3 to 2.7.0 (#1013) Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.6.3 to 2.7.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.6.3...2.7.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.7.0 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index 1446e0a0..6e64d9e4 100644 --- a/uv.lock +++ b/uv.lock @@ -2953,11 +2953,11 @@ wheels = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +sdist = { url = "https://files.pythonhosted.org/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602, upload-time = "2026-05-07T16:13:18.596Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, + { url = "https://files.pythonhosted.org/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087, upload-time = "2026-05-07T16:13:17.151Z" }, ] [[package]]