diff --git a/docs/user/next/advanced/HackTheToolchain.md b/docs/user/next/advanced/HackTheToolchain.md index 785cc0b24d..74cf66e1da 100644 --- a/docs/user/next/advanced/HackTheToolchain.md +++ b/docs/user/next/advanced/HackTheToolchain.md @@ -59,7 +59,7 @@ class PureCpp2WorkflowFactory(gtx.program_processors.runners.gtfn.GTFNCompileWor translation: workflow.Workflow[ gtx.otf.definitions.CompilableProgramDef, gtx.otf.stages.ProgramSource ] = MyCodeGen() - bindings: workflow.Workflow[gtx.otf.stages.ProgramSource, gtx.otf.stages.CompilableProject] = ( + bindings: workflow.Workflow[gtx.otf.stages.ProgramSource, gtx.otf.stages.ExtensionSource] = ( Cpp2BindingsGen() ) diff --git a/src/gt4py/next/config.py b/src/gt4py/next/config.py index a9fa290941..330362c667 100644 --- a/src/gt4py/next/config.py +++ b/src/gt4py/next/config.py @@ -104,21 +104,18 @@ def _get_build_cache_version_id() -> str: #: Build type to be used when CMake is used to compile generated code. #: Might have no effect when CMake is not used as part of the toolchain. -# FIXME[#2447](egparedes): compile-time setting, should be included in the build cache key. CMAKE_BUILD_TYPE: CMakeBuildType = CMakeBuildType[ os.environ.get("GT4PY_CMAKE_BUILD_TYPE", "debug" if DEBUG else "release").upper() ] #: Experimental, use at your own risk: assume horizontal dimension has stride 1 -# FIXME[#2447](egparedes): compile-time setting, should be included in the build cache key. UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE: bool = env_flag_to_bool( "GT4PY_UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE", default=False ) #: Add GPU trace markers (NVTX, ROC-TX) to the generated code, at compile time. -# FIXME[#2447](egparedes): compile-time setting, should be included in the build cache key. ADD_GPU_TRACE_MARKERS: bool = env_flag_to_bool("GT4PY_ADD_GPU_TRACE_MARKERS", default=False) diff --git a/src/gt4py/next/otf/binding/nanobind.py b/src/gt4py/next/otf/binding/nanobind.py index b92355a85f..a353dcff3e 100644 --- a/src/gt4py/next/otf/binding/nanobind.py +++ b/src/gt4py/next/otf/binding/nanobind.py @@ -10,6 +10,7 @@ from __future__ import annotations +import dataclasses from collections.abc import Collection from typing import Any, Optional, Sequence, TypeVar, Union @@ -200,7 +201,9 @@ def _tuple_get(index: int, var: str) -> str: return f"gridtools::tuple_util::get<{index}>({var})" -def make_argument(name: str, type_: ts.TypeSpec) -> str | BufferSID | Tuple: +def make_argument( + name: str, type_: ts.TypeSpec, unstructured_horizontal_has_unit_stride: bool +) -> str | BufferSID | Tuple: if isinstance(type_, ts.FieldType): return BufferSID( source_buffer=name, @@ -209,7 +212,7 @@ def make_argument(name: str, type_: ts.TypeSpec) -> str | BufferSID | Tuple: name=dim.value, static_stride=1 if ( - config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + unstructured_horizontal_has_unit_stride and dim.kind == common.DimensionKind.HORIZONTAL ) else None, @@ -219,7 +222,10 @@ def make_argument(name: str, type_: ts.TypeSpec) -> str | BufferSID | Tuple: scalar_type=type_.dtype, ) elif isinstance(type_, ts.TupleType): - elements = [make_argument(_tuple_get(i, name), t) for i, t in enumerate(type_.types)] + elements = [ + make_argument(_tuple_get(i, name), t, unstructured_horizontal_has_unit_stride) + for i, t in enumerate(type_.types) + ] return Tuple(elems=elements) elif isinstance(type_, ts.ScalarType): return name @@ -228,7 +234,7 @@ def make_argument(name: str, type_: ts.TypeSpec) -> str | BufferSID | Tuple: def create_bindings( - program_source: stages.ProgramSource[CodeSpecT], + program_source: stages.ProgramSource[CodeSpecT], unstructured_horizontal_has_unit_stride: bool ) -> stages.BindingSource[CodeSpecT, code_specs.PythonCodeSpec]: """ Generate Python bindings through which a C++ function can be called. @@ -274,7 +280,9 @@ def create_bindings( expr=FunctionCall( target=program_source.entry_point, args=[ - make_argument(param.name, param.type_) + make_argument( + param.name, param.type_, unstructured_horizontal_has_unit_stride + ) for param in program_source.entry_point.parameters ], ) @@ -305,7 +313,18 @@ def create_bindings( return stages.BindingSource(src, (interface.LibraryDependency("nanobind", "2.0.0"),)) -def bind_source( - inp: stages.ProgramSource[CodeSpecT], -) -> stages.CompilableProject[CodeSpecT, code_specs.PythonCodeSpec]: - return stages.CompilableProject(program_source=inp, binding_source=create_bindings(inp)) +@dataclasses.dataclass(frozen=True) +class ExtensionGenerator: + """ + Generate a Python extension module that contains the bindings for a C++ function. + """ + + unstructured_horizontal_has_unit_stride: bool = config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + + def __call__( + self, program_source: stages.ProgramSource[CodeSpecT] + ) -> stages.ExtensionSource[CodeSpecT, code_specs.PythonCodeSpec]: + binding_source = create_bindings( + program_source, self.unstructured_horizontal_has_unit_stride + ) + return stages.ExtensionSource(program_source=program_source, binding_source=binding_source) diff --git a/src/gt4py/next/otf/compilation/build_systems/cmake.py b/src/gt4py/next/otf/compilation/build_systems/cmake.py index 1b79cad6e4..9942de6fa8 100644 --- a/src/gt4py/next/otf/compilation/build_systems/cmake.py +++ b/src/gt4py/next/otf/compilation/build_systems/cmake.py @@ -66,7 +66,7 @@ def get_cmake_device_arch_option() -> str: class CMakeFactory( compiler.BuildSystemProjectGenerator[CPPLikeCodeSpecT, code_specs.PythonCodeSpec] ): - """Create a CMakeProject from a ``CompilableSource`` stage object with given CMake settings.""" + """Create a CMakeProject from an ``ExtensionSource`` stage object with given CMake settings.""" cmake_generator_name: str = "Ninja" cmake_build_type: config.CMakeBuildType = config.CMakeBuildType.DEBUG @@ -74,7 +74,7 @@ class CMakeFactory( def __call__( self, - source: stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + source: stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], cache_lifetime: config.BuildCacheLifetime, ) -> CMakeProject: if not source.binding_source: diff --git a/src/gt4py/next/otf/compilation/build_systems/compiledb.py b/src/gt4py/next/otf/compilation/build_systems/compiledb.py index 347b0e25e9..b0f9466d53 100644 --- a/src/gt4py/next/otf/compilation/build_systems/compiledb.py +++ b/src/gt4py/next/otf/compilation/build_systems/compiledb.py @@ -17,7 +17,7 @@ from typing import Optional, TypeVar from gt4py._core import locking -from gt4py.next import config, errors +from gt4py.next import config, errors, fingerprinting from gt4py.next.otf import code_specs, stages from gt4py.next.otf.binding import interface from gt4py.next.otf.compilation import build_data, cache, compiler @@ -32,7 +32,7 @@ class CompiledbFactory( compiler.BuildSystemProjectGenerator[CPPLikeCodeSpecT, code_specs.PythonCodeSpec] ): """ - Create a CompiledbProject from a ``CompilableSource`` stage object with given CMake settings. + Create a CompiledbProject from an ``ExtensionSource`` stage object with given CMake settings. Use CMake to generate a compiledb with the required sequence of build commands. Generate a compiledb only if there isn't one for the given combination of cmake configuration @@ -45,7 +45,7 @@ class CompiledbFactory( def __call__( self, - source: stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + source: stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], cache_lifetime: config.BuildCacheLifetime, ) -> CompiledbProject: if not source.binding_source: @@ -72,7 +72,11 @@ def __call__( ) return CompiledbProject( - root_path=cache.get_cache_folder(source, cache_lifetime), + root_path=cache.get_cache_folder( + source, + cache_lifetime, + build_context_id=fingerprinting.strict_fingerprinter(self), + ), program_name=name, source_files={ header_name: source.program_source.source_code, @@ -264,16 +268,19 @@ def _cc_get_compiledb( cmake_flags: list[str], cache_lifetime: config.BuildCacheLifetime, ) -> pathlib.Path: - cache_path = cache.get_cache_folder( - stages.CompilableProject(prototype_program_source, None), cache_lifetime + # Use the same prototype source (with empty bindings) for both locating and creating the + # compiledb, so `get_cache_folder` names the same folder in either path. + prototype_source: stages.ExtensionSource = stages.ExtensionSource( + prototype_program_source, stages.BindingSource(source_code="", library_deps=()) ) + cache_path = cache.get_cache_folder(prototype_source, cache_lifetime) # In a multi-threaded environment, multiple threads may try to create the compiledb at the same time # leading to compilation errors. with locking.lock(cache_path): if renew_compiledb or not (compiled_db := _cc_find_compiledb(path=cache_path)): compiled_db = _cc_create_compiledb( - prototype_program_source=prototype_program_source, + prototype_source=prototype_source, build_type=build_type, cmake_flags=cmake_flags, cache_lifetime=cache_lifetime, @@ -292,7 +299,7 @@ def _cc_find_compiledb(path: pathlib.Path) -> Optional[pathlib.Path]: def _cc_create_compiledb( - prototype_program_source: stages.ProgramSource, + prototype_source: stages.ExtensionSource, build_type: config.CMakeBuildType, cmake_flags: list[str], cache_lifetime: config.BuildCacheLifetime, @@ -302,18 +309,17 @@ def _cc_create_compiledb( cmake_build_type=build_type, cmake_extra_flags=cmake_flags, )( - stages.CompilableProject( - prototype_program_source, stages.BindingSource(source_code="", library_deps=()) - ), + prototype_source, cache_lifetime, ) path = prototype_project.root_path name = prototype_project.program_name + file_extension = prototype_source.program_source.code_spec.file_extension binding_src_name = next( name for name in prototype_project.source_files.keys() - if name.endswith(f"_bindings.{prototype_program_source.code_spec.file_extension}") + if name.endswith(f"_bindings.{file_extension}") ) prototype_project.build() diff --git a/src/gt4py/next/otf/compilation/cache.py b/src/gt4py/next/otf/compilation/cache.py index ebc28f10a5..76f00eebce 100644 --- a/src/gt4py/next/otf/compilation/cache.py +++ b/src/gt4py/next/otf/compilation/cache.py @@ -8,13 +8,11 @@ """Caching for compiled backend artifacts.""" -import hashlib import pathlib import tempfile -from gt4py.next import config +from gt4py.next import config, fingerprinting from gt4py.next.otf import stages -from gt4py.next.otf.binding import interface _session_cache_dir = tempfile.TemporaryDirectory(prefix="gt4py_session_") @@ -22,33 +20,6 @@ _session_cache_dir_path = pathlib.Path(_session_cache_dir.name) -def _serialize_param(parameter: interface.Parameter) -> str: - return f"{parameter.name}: {parameter.type_!s}" - - -def _serialize_library_dependency(dependency: interface.LibraryDependency) -> str: - return f"{dependency.name}/{dependency.version}" - - -def _serialize_source(source: stages.ProgramSource) -> str: - parameters = [_serialize_param(param) for param in source.entry_point.parameters] - dependencies = [_serialize_library_dependency(dep) for dep in source.library_deps] - return f"""\ - language: {source.code_spec} - name: {source.entry_point.name} - params: {", ".join(parameters)} - deps: {", ".join(dependencies)} - src: {source.source_code} - """ - - -def _cache_folder_name(source: stages.ProgramSource) -> str: - serialized = _serialize_source(source) - fingerprint = hashlib.sha256(serialized.encode(encoding="utf-8")) - fingerprint_hex_str = fingerprint.hexdigest() - return source.entry_point.name + "_" + fingerprint_hex_str - - def get_cache_base_path(lifetime: config.BuildCacheLifetime) -> pathlib.Path: """Return the base directory for cached artifacts with the given lifetime.""" match lifetime: @@ -61,15 +32,24 @@ def get_cache_base_path(lifetime: config.BuildCacheLifetime) -> pathlib.Path: def get_cache_folder( - compilable_source: stages.CompilableProject, lifetime: config.BuildCacheLifetime + ext_source: stages.ExtensionSource, + lifetime: config.BuildCacheLifetime, + build_context_id: str = "", ) -> pathlib.Path: """ - Construct the path to where the build system project artifact of a compilable source should be cached. + Construct the path to where the build system project artifact of an extension source should be cached. + An optional ``build_context_id`` can be provided to distinguish between different contexts + that may produce different artifacts for the same extension source. The returned path points to an existing folder in all cases. """ - # TODO(ricoh): make dependent on binding source too or add alternative that depends on bindings - folder_name = _cache_folder_name(compilable_source.program_source) + fingerprinter = fingerprinting.strict_fingerprinter + slug = ext_source.program_source.entry_point.name + if ext_source.binding_source: + slug = f"{slug}_pyext" + folder_name = f"{slug}_{fingerprinter(ext_source)}" + if build_context_id: + folder_name = f"{folder_name}_{build_context_id}" base_path = get_cache_base_path(lifetime) base_path.mkdir(exist_ok=True) diff --git a/src/gt4py/next/otf/compilation/compiler.py b/src/gt4py/next/otf/compilation/compiler.py index 3748d95192..af72677031 100644 --- a/src/gt4py/next/otf/compilation/compiler.py +++ b/src/gt4py/next/otf/compilation/compiler.py @@ -15,7 +15,7 @@ import factory from gt4py._core import locking -from gt4py.next import config +from gt4py.next import config, fingerprinting from gt4py.next.otf import code_specs, definitions, stages, workflow from gt4py.next.otf.compilation import build_data, cache, importer @@ -39,7 +39,7 @@ def module_exists(data: build_data.BuildData, src_dir: pathlib.Path) -> bool: class BuildSystemProjectGenerator(Protocol[CodeSpecT, TargetCodeSpecT]): def __call__( self, - source: stages.CompilableProject[CodeSpecT, TargetCodeSpecT], + source: stages.ExtensionSource[CodeSpecT, TargetCodeSpecT], cache_lifetime: config.BuildCacheLifetime, ) -> stages.BuildSystemProject[CodeSpecT, TargetCodeSpecT]: ... @@ -47,11 +47,11 @@ def __call__( @dataclasses.dataclass(frozen=True) class Compiler( workflow.ChainableWorkflowMixin[ - stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], stages.ExecutableProgram, ], workflow.ReplaceEnabledWorkflowMixin[ - stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], stages.ExecutableProgram, ], definitions.CompilationStep[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], @@ -60,13 +60,19 @@ class Compiler( cache_lifetime: config.BuildCacheLifetime builder_factory: BuildSystemProjectGenerator[CPPLikeCodeSpecT, code_specs.PythonCodeSpec] + fingerprint_builder_factory: bool = True force_recompile: bool = False def __call__( self, - inp: stages.CompilableProject[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[CPPLikeCodeSpecT, code_specs.PythonCodeSpec], ) -> stages.ExecutableProgram: - src_dir = cache.get_cache_folder(inp, self.cache_lifetime) + build_context_id = ( + fingerprinting.strict_fingerprinter(self.builder_factory) + if self.fingerprint_builder_factory + else "" + ) + src_dir = cache.get_cache_folder(inp, self.cache_lifetime, build_context_id) # If we are compiling the same program at the same time (e.g. multiple MPI ranks), # we need to make sure that only one of them accesses the same build directory for compilation. diff --git a/src/gt4py/next/otf/definitions.py b/src/gt4py/next/otf/definitions.py index 11b42dc6ce..2366695242 100644 --- a/src/gt4py/next/otf/definitions.py +++ b/src/gt4py/next/otf/definitions.py @@ -44,25 +44,23 @@ class TranslationStep( class BindingStep(Protocol[CodeSpecT, TargetCodeSpecT]): """ - Generate Bindings for program source and package both together (ProgramSource -> CompilableSource). + Generate Bindings for program source and package both together (ProgramSource -> ExtensionSource). In the special cases where bindings are not required, such a step could also simply construct - a ``CompilableSource`` from the ``ProgramSource`` with bindings set to ``None``. + an ``ExtensionSource`` from the ``ProgramSource`` with bindings set to ``None``. """ def __call__( self, program_source: stages.ProgramSource[CodeSpecT] - ) -> stages.CompilableProject[CodeSpecT, TargetCodeSpecT]: ... + ) -> stages.ExtensionSource[CodeSpecT, TargetCodeSpecT]: ... class CompilationStep( - workflow.Workflow[ - stages.CompilableProject[CodeSpecT, TargetCodeSpecT], stages.ExecutableProgram - ], + workflow.Workflow[stages.ExtensionSource[CodeSpecT, TargetCodeSpecT], stages.ExecutableProgram], Protocol[CodeSpecT, TargetCodeSpecT], ): - """Compile program source code and bindings into a python callable (CompilableSource -> CompiledProgram).""" + """Compile program source code and bindings into a python callable (ExtensionSource -> CompiledProgram).""" def __call__( - self, source: stages.CompilableProject[CodeSpecT, TargetCodeSpecT] + self, source: stages.ExtensionSource[CodeSpecT, TargetCodeSpecT] ) -> stages.ExecutableProgram: ... diff --git a/src/gt4py/next/otf/recipes.py b/src/gt4py/next/otf/recipes.py index 79cd17162b..0057ef46ed 100644 --- a/src/gt4py/next/otf/recipes.py +++ b/src/gt4py/next/otf/recipes.py @@ -18,6 +18,6 @@ class OTFCompileWorkflow(workflow.NamedStepSequence): """The typical compiled backend steps composed into a workflow.""" translation: definitions.TranslationStep - bindings: workflow.Workflow[stages.ProgramSource, stages.CompilableProject] - compilation: workflow.Workflow[stages.CompilableProject, stages.ExecutableProgram] + bindings: workflow.Workflow[stages.ProgramSource, stages.ExtensionSource] + compilation: workflow.Workflow[stages.ExtensionSource, stages.ExecutableProgram] decoration: workflow.Workflow[stages.ExecutableProgram, stages.ExecutableProgram] diff --git a/src/gt4py/next/otf/stages.py b/src/gt4py/next/otf/stages.py index c0bdddee1c..f731690e83 100644 --- a/src/gt4py/next/otf/stages.py +++ b/src/gt4py/next/otf/stages.py @@ -90,9 +90,8 @@ class BindingSource(Generic[CodeSpecT, TargetCodeSpecT]): library_deps: tuple[interface.LibraryDependency, ...] -# TODO(ricoh): reconsider name in view of future backends producing standalone compilable ProgramSource code @dataclasses.dataclass(frozen=True) -class CompilableProject(Generic[CodeSpecT, TargetCodeSpecT]): +class ExtensionSource(Generic[CodeSpecT, TargetCodeSpecT]): """ Encapsulate all the source code required for OTF compilation. @@ -117,7 +116,7 @@ def library_deps(self) -> tuple[interface.LibraryDependency, ...]: class BuildSystemProject(Protocol[CodeSpecT_co, TargetCodeSpecT_co]): """ - Use source code extracted from a ``CompilableSource`` to configure and build a GT4Py program. + Use source code extracted from an ``ExtensionSource`` to configure and build a GT4Py program. Should only be considered an OTF stage if used as an endpoint, as this only runs commands on source files and is not responsible for importing the results into Python. diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/bindings.py b/src/gt4py/next/program_processors/runners/dace/workflow/bindings.py index 7a729804fc..172668af65 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/bindings.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/bindings.py @@ -288,13 +288,13 @@ def _create_sdfg_bindings( def bind_sdfg( inp: stages.ProgramSource[code_specs.SDFGCodeSpec], bind_func_name: str, -) -> stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec]: +) -> stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec]: """ Method to be used as workflow stage for generation of SDFG bindings. Refer to `_create_sdfg_bindings` documentation. """ - return stages.CompilableProject( + return stages.ExtensionSource( program_source=inp, binding_source=_create_sdfg_bindings(inp, bind_func_name), ) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/common.py b/src/gt4py/next/program_processors/runners/dace/workflow/common.py index 6ef363d924..738caffc13 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/common.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/common.py @@ -20,10 +20,18 @@ """Name of SDFG argument to input the GT4Py metrics level.""" +SDFG_ARG_METRIC_LEVEL_DTYPE: Final[dace.dtypes.typeclass] = dace.int32 +"""DaCe datatype of `SDFG_ARG_METRIC_LEVEL` argument.""" + + SDFG_ARG_METRIC_COMPUTE_TIME: Final[str] = "gt_compute_time" """Name of SDFG argument to return the total compute time to GT4Py.""" +SDFG_ARG_METRIC_COMPUTE_TIME_DTYPE: Final[dace.dtypes.typeclass] = dace.float64 +"""DaCe datatype of `SDFG_ARG_METRIC_COMPUTE_TIME` argument.""" + + def set_dace_config( device_type: core_defs.DeviceType, cmake_build_type: Optional[gtx_config.CMakeBuildType] = None, diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py index b8e18382d8..44861582e9 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/compilation.py @@ -18,7 +18,7 @@ import factory from gt4py._core import definitions as core_defs, locking -from gt4py.next import common, config +from gt4py.next import common, config, fingerprinting from gt4py.next.otf import code_specs, definitions, stages, workflow from gt4py.next.otf.compilation import cache as gtx_cache from gt4py.next.program_processors.runners.dace.workflow import common as gtx_wfdcommon @@ -131,11 +131,11 @@ def __call__(self, **kwargs: Any) -> None: @dataclasses.dataclass(frozen=True) class DaCeCompiler( workflow.ChainableWorkflowMixin[ - stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], CompiledDaceProgram, ], workflow.ReplaceEnabledWorkflowMixin[ - stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], CompiledDaceProgram, ], definitions.CompilationStep[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], @@ -151,16 +151,31 @@ class DaCeCompiler( cmake_build_type: config.CMakeBuildType = dataclasses.field( default_factory=lambda: config.CMAKE_BUILD_TYPE ) + # we store the non-default values of `dace.Config` in order to include it in the stage fingerprint + dace_config_nondefaults: dict[str, Any] = dataclasses.field(init=False) + + def __post_init__(self) -> None: + with gtx_wfdcommon.dace_context( + device_type=self.device_type, + cmake_build_type=self.cmake_build_type, + ): + object.__setattr__(self, "dace_config_nondefaults", dace.Config._data.nondefaults()) def __call__( self, - inp: stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], ) -> CompiledDaceProgram: with gtx_wfdcommon.dace_context( device_type=self.device_type, cmake_build_type=self.cmake_build_type, ): - sdfg_build_folder = gtx_cache.get_cache_folder(inp, self.cache_lifetime) + # We use the fingerprint of the compilation stage to ensure that the SDFG + # will be rebuilt if the user changes the backend configuration. + sdfg_build_folder = gtx_cache.get_cache_folder( + inp, + self.cache_lifetime, + build_context_id=fingerprinting.strict_fingerprinter(self.dace_config_nondefaults), + ) sdfg_build_folder.mkdir(parents=True, exist_ok=True) sdfg = dace.SDFG.from_json(inp.program_source.source_code) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/decoration.py b/src/gt4py/next/program_processors/runners/dace/workflow/decoration.py index 103e7af33b..e0bcfec6b6 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/decoration.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/decoration.py @@ -14,7 +14,7 @@ import numpy as np from gt4py._core import definitions as core_defs -from gt4py.next import common as gtx_common, config, utils as gtx_utils +from gt4py.next import common as gtx_common, utils as gtx_utils from gt4py.next.instrumentation import metrics from gt4py.next.otf import stages from gt4py.next.program_processors.runners.dace import sdfg_callable @@ -30,7 +30,9 @@ def convert_args( ) -> stages.ExecutableProgram: # Retieve metrics level from GT4Py environment variable. collect_time = metrics.is_level_enabled(metrics.PERFORMANCE) - collect_time_arg = np.array([1], dtype=np.float64) + collect_time_arg = np.array( + [1], dtype=gtx_wfdcommon.SDFG_ARG_METRIC_COMPUTE_TIME_DTYPE.as_numpy_dtype() + ) # We use the callback function provided by the compiled program to update the SDFG arglist. update_sdfg_call_args = functools.partial( fun.update_sdfg_ctype_arglist, device, fun.sdfg_argtypes @@ -64,7 +66,7 @@ def decorated_program( filter_args=False, ) this_call_args |= { - gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL: config.COLLECT_METRICS_LEVEL, + gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL: metrics.get_current_level(), gtx_wfdcommon.SDFG_ARG_METRIC_COMPUTE_TIME: collect_time_arg, } fun.construct_arguments(**this_call_args) diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py index 5c8e0cc260..8beecdd7e3 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/translation.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/translation.py @@ -173,7 +173,9 @@ def add_instrumentation(sdfg: dace.SDFG, gpu: bool) -> None: """ output, _ = sdfg.add_array(gtx_wfdcommon.SDFG_ARG_METRIC_COMPUTE_TIME, [1], dace.float64) start_time, _ = sdfg.add_scalar("gt_start_time", dace.int64, transient=True) - metrics_level = sdfg.add_symbol(gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL, dace.int32) + metrics_level = sdfg.add_symbol( + gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL, gtx_wfdcommon.SDFG_ARG_METRIC_LEVEL_DTYPE + ) #### 1. Synchronize the CUDA device, in order to wait for kernels completion. # Even when the target device is GPU, it can happen that dace emits code without diff --git a/src/gt4py/next/program_processors/runners/gtfn.py b/src/gt4py/next/program_processors/runners/gtfn.py index c4b4d3d698..fc5e9c99eb 100644 --- a/src/gt4py/next/program_processors/runners/gtfn.py +++ b/src/gt4py/next/program_processors/runners/gtfn.py @@ -112,6 +112,9 @@ class Params: cmake_build_type: config.CMakeBuildType = factory.LazyFunction( # type: ignore[assignment] # factory-boy typing not precise enough lambda: config.CMAKE_BUILD_TYPE ) + unstructured_horizontal_has_unit_stride: bool = factory.LazyFunction( # type: ignore[assignment] # factory-boy typing not precise enough + lambda: config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + ) builder_factory: compiler.BuildSystemProjectGenerator = factory.LazyAttribute( # type: ignore[assignment] # factory-boy typing not precise enough lambda o: compiledb.CompiledbFactory(cmake_build_type=o.cmake_build_type) ) @@ -134,8 +137,12 @@ class Params: ) translation = factory.LazyAttribute(lambda o: o.bare_translation) - bindings: workflow.Workflow[stages.ProgramSource, stages.CompilableProject] = ( - nanobind.bind_source + bindings: workflow.Workflow[stages.ProgramSource, stages.ExtensionSource] = ( + factory.LazyAttribute( # type: ignore[assignment] # factory-boy typing not precise enough + lambda o: nanobind.ExtensionGenerator( + unstructured_horizontal_has_unit_stride=o.unstructured_horizontal_has_unit_stride + ) + ) ) compilation = factory.SubFactory( compiler.CompilerFactory, diff --git a/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py b/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py index 49bd7b8f87..aa239b08d5 100644 --- a/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py +++ b/tests/next_tests/integration_tests/feature_tests/otf_tests/test_nanobind_build.py @@ -23,9 +23,11 @@ def test_gtfn_cpp_with_cmake(program_source_with_name): example_program_source = program_source_with_name("gtfn_cpp_with_cmake") - build_the_program = workflow.make_step(nanobind.bind_source).chain( + build_the_program = workflow.make_step(nanobind.ExtensionGenerator()).chain( compiler.Compiler( - cache_lifetime=config.BuildCacheLifetime.SESSION, builder_factory=cmake.CMakeFactory() + cache_lifetime=config.BuildCacheLifetime.SESSION, + builder_factory=cmake.CMakeFactory(), + fingerprint_builder_factory=False, ) ) compiled_program = build_the_program(example_program_source) @@ -41,7 +43,7 @@ def test_gtfn_cpp_with_cmake(program_source_with_name): def test_gtfn_cpp_with_compiledb(program_source_with_name): example_program_source = program_source_with_name("gtfn_cpp_with_compiledb") - build_the_program = workflow.make_step(nanobind.bind_source).chain( + build_the_program = workflow.make_step(nanobind.ExtensionGenerator()).chain( compiler.Compiler( cache_lifetime=config.BuildCacheLifetime.SESSION, builder_factory=compiledb.CompiledbFactory(), diff --git a/tests/next_tests/unit_tests/otf_tests/binding_tests/test_nanobind.py b/tests/next_tests/unit_tests/otf_tests/binding_tests/test_nanobind.py index 93a491f04f..50a82692ba 100644 --- a/tests/next_tests/unit_tests/otf_tests/binding_tests/test_nanobind.py +++ b/tests/next_tests/unit_tests/otf_tests/binding_tests/test_nanobind.py @@ -6,6 +6,7 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause +from gt4py.next import config from gt4py.next.otf.binding import nanobind from next_tests.unit_tests.otf_tests.compilation_tests.build_systems_tests.conftest import ( @@ -14,5 +15,7 @@ def test_bindings(program_source_example): - module = nanobind.create_bindings(program_source_example) + module = nanobind.create_bindings( + program_source_example, config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + ) assert module.library_deps[0].name == "nanobind" diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py index d3bd748af0..9e524fcfeb 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/conftest.py @@ -88,16 +88,10 @@ def program_source_example(): @pytest.fixture -def compilable_source_example(program_source_example): - return stages.CompilableProject( +def extension_source_example(program_source_example): + return stages.ExtensionSource( program_source=program_source_example, - binding_source=nanobind.create_bindings(program_source_example), + binding_source=nanobind.create_bindings( + program_source_example, config.UNSTRUCTURED_HORIZONTAL_HAS_UNIT_STRIDE + ), ) - - -@pytest.fixture -def clean_example_session_cache(compilable_source_example): - cache_dir = cache.get_cache_folder(compilable_source_example, config.BuildCacheLifetime.SESSION) - if cache_dir.exists(): - shutil.rmtree(cache_dir) - yield diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py index 1903468da9..f976f9b98f 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_cmake.py @@ -8,14 +8,25 @@ import os import pathlib +import shutil from unittest import mock +import pytest + from gt4py._core import definitions as core_defs from gt4py.next import config -from gt4py.next.otf.compilation import build_data, importer +from gt4py.next.otf.compilation import build_data, cache, importer from gt4py.next.otf.compilation.build_systems import cmake +@pytest.fixture +def clean_cmake_cache(extension_source_example): + cache_dir = cache.get_cache_folder(extension_source_example, config.BuildCacheLifetime.SESSION) + if cache_dir.exists(): + shutil.rmtree(cache_dir) + yield + + def test_get_cmake_device_arch_option_cuda(): with ( mock.patch("gt4py._core.definitions.CUPY_DEVICE_TYPE", core_defs.DeviceType.CUDA), @@ -50,9 +61,9 @@ def test_get_cmake_device_arch_option_rocm(): assert cmake.get_cmake_device_arch_option() == "-DCMAKE_HIP_ARCHITECTURES=gfx90a" -def test_default_cmake_factory(compilable_source_example, clean_example_session_cache): +def test_default_cmake_factory(extension_source_example, clean_cmake_cache): otf_builder = cmake.CMakeFactory()( - source=compilable_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION + source=extension_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION ) assert not build_data.contains_data(otf_builder.root_path) diff --git a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py index 4c1ab0dba8..b208b7cd6f 100644 --- a/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py +++ b/tests/next_tests/unit_tests/otf_tests/compilation_tests/build_systems_tests/test_compiledb.py @@ -10,14 +10,28 @@ import shutil import tempfile -from gt4py.next import config -from gt4py.next.otf.compilation import build_data, importer +import pytest + +from gt4py.next import config, fingerprinting +from gt4py.next.otf.compilation import build_data, cache, importer from gt4py.next.otf.compilation.build_systems import compiledb -def test_default_compiledb_factory(compilable_source_example, clean_example_session_cache): +@pytest.fixture +def clean_compiledb_cache(extension_source_example): + cache_dir = cache.get_cache_folder( + ext_source=extension_source_example, + lifetime=config.BuildCacheLifetime.SESSION, + build_context_id=fingerprinting.strict_fingerprinter(compiledb.CompiledbFactory()), + ) + if cache_dir.exists(): + shutil.rmtree(cache_dir) + yield + + +def test_default_compiledb_factory(extension_source_example, clean_compiledb_cache): otf_builder = compiledb.CompiledbFactory()( - compilable_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION + extension_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION ) # make sure the example project has not been written yet @@ -35,9 +49,9 @@ def test_default_compiledb_factory(compilable_source_example, clean_example_sess assert (otf_builder.root_path / "build.sh").exists() -def test_compiledb_project_is_relocatable(compilable_source_example, clean_example_session_cache): +def test_compiledb_project_is_relocatable(extension_source_example, clean_compiledb_cache): builder = compiledb.CompiledbFactory()( - compilable_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION + extension_source_example, cache_lifetime=config.BuildCacheLifetime.SESSION ) # make sure the example project has not been written yet diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py index 8a144fefdc..2c4811dc7d 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py @@ -228,7 +228,7 @@ def {_bind_func_name}(device, sdfg_argtypes, args, sdfg_call_args, offset_provid def mocked_compile_call( self, - inp: stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], binding_source_ref: str, ): assert len(inp.library_deps) == 0 @@ -245,7 +245,7 @@ def mocked_compile_call( def mocked_compile_call_cartesian( self, - inp: stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], use_metrics: bool, use_zero_origin: bool, ): @@ -257,7 +257,7 @@ def mocked_compile_call_cartesian( def mocked_compile_call_unstructured( self, - inp: stages.CompilableProject[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], + inp: stages.ExtensionSource[code_specs.SDFGCodeSpec, code_specs.PythonCodeSpec], use_metrics: bool, use_zero_origin: bool, ): diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py index 1cbf9d3c2e..890cbe71d3 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_compilation.py @@ -19,6 +19,8 @@ from gt4py._core import definitions as core_defs from gt4py.next import config +from gt4py.next.otf import code_specs, stages +from gt4py.next.otf.binding import interface from gt4py.next.program_processors.runners.dace.workflow import compilation as dace_wf_compilation @@ -66,16 +68,31 @@ def _make_sdfg_with_gpu_map() -> dace.SDFG: return sdfg +def _make_extension_source() -> stages.ExtensionSource: + """A real `ExtensionSource` wrapping the GPU SDFG, as the dace translation step emits. + + Using a real source (rather than a `MagicMock`) lets the unmocked `get_cache_folder` + fingerprint the program source for the build-folder name. + """ + program_source = stages.ProgramSource( + entry_point=interface.Function("gpu_program", parameters=()), + source_code=_make_sdfg_with_gpu_map().to_json(), + library_deps=(), + code_spec=code_specs.SDFGCodeSpec(), + ) + binding_source = stages.BindingSource(source_code="", library_deps=()) + return stages.ExtensionSource(program_source=program_source, binding_source=binding_source) + + def _run_compiler( - tmp_path, *, add_gpu_trace_markers: bool, device_type: core_defs.DeviceType + *, add_gpu_trace_markers: bool, device_type: core_defs.DeviceType ) -> tuple[mock.MagicMock, dace.SDFG]: """Run `DaCeCompiler` on a GPU SDFG with compilation stubbed out. Returns the spy wrapping `_add_tx_markers` and the SDFG that was handed to ``SDFG.compile`` (i.e. the SDFG after any marker processing). """ - inp = mock.MagicMock() - inp.program_source.source_code = _make_sdfg_with_gpu_map().to_json() + inp = _make_extension_source() compiler = dace_wf_compilation.DaCeCompiler( bind_func_name="bind", @@ -92,12 +109,6 @@ def _run_compiler( ) as spy, mock.patch.object(dace.SDFG, "compile", autospec=True) as compile_mock, mock.patch.object(dace_wf_compilation, "CompiledDaceProgram"), - mock.patch.object( - dace_wf_compilation.gtx_wfdcommon, - "dace_context", - lambda **kwargs: contextlib.nullcontext(), - ), - mock.patch.object(dace_wf_compilation.gtx_cache, "get_cache_folder", return_value=tmp_path), mock.patch.object( dace_wf_compilation.locking, "lock", lambda *args, **kwargs: contextlib.nullcontext() ), @@ -116,7 +127,7 @@ def _run_compiler( def test_compiler_applies_tx_markers_for_gpu(tmp_path): """On a CUDA target with the flag on, the compiler applies the markers to the SDFG.""" spy, compiled_sdfg = _run_compiler( - tmp_path, add_gpu_trace_markers=True, device_type=core_defs.DeviceType.CUDA + add_gpu_trace_markers=True, device_type=core_defs.DeviceType.CUDA ) spy.assert_called_once() @@ -132,7 +143,7 @@ def test_compiler_applies_tx_markers_for_gpu(tmp_path): def test_compiler_skips_tx_markers_when_flag_disabled(tmp_path): """With the flag off the compiler must not touch instrumentation, even on CUDA.""" spy, compiled_sdfg = _run_compiler( - tmp_path, add_gpu_trace_markers=False, device_type=core_defs.DeviceType.CUDA + add_gpu_trace_markers=False, device_type=core_defs.DeviceType.CUDA ) spy.assert_not_called() @@ -142,8 +153,37 @@ def test_compiler_skips_tx_markers_when_flag_disabled(tmp_path): def test_compiler_skips_tx_markers_for_non_gpu_device(tmp_path): """On a CPU target the markers must not be applied even with the flag on.""" spy, compiled_sdfg = _run_compiler( - tmp_path, add_gpu_trace_markers=True, device_type=core_defs.DeviceType.CPU + add_gpu_trace_markers=True, device_type=core_defs.DeviceType.CPU ) spy.assert_not_called() assert compiled_sdfg.instrument == _NONE + + +# `CXXFLAGS`, `CUDAFLAGS` and `HIPFLAGS` feed `compiler.cpu.args`, `compiler.cuda.args` +# and `compiler.cuda.hip_args` respectively (see `set_dace_config`). +@pytest.mark.parametrize( + ("device_type", "compiler_flags_env"), + [ + (core_defs.DeviceType.CPU, "CXXFLAGS"), + (core_defs.DeviceType.CUDA, "CUDAFLAGS"), + (core_defs.DeviceType.ROCM, "HIPFLAGS"), + ], +) +def test_compiler_flags_change_build_folder(monkeypatch, device_type, compiler_flags_env): + """Different compiler flags must produce a different build folder. + + The flags are captured in `dace_config_nondefaults`, whose fingerprint the compiler + passes to `get_cache_folder` as the `build_context_id`. That id is appended to the + build-folder name, so changing any flag lands the build in a different folder of the + build cache. + """ + monkeypatch.delenv(compiler_flags_env, raising=False) + _, sdfg_default = _run_compiler(add_gpu_trace_markers=False, device_type=device_type) + + monkeypatch.setenv(compiler_flags_env, "-O0 -some-custom-flag") + _, sdfg_custom = _run_compiler(add_gpu_trace_markers=False, device_type=device_type) + + # The differing `dace_config_nondefaults` make the two compilers fingerprint differently, + # so `get_cache_folder` names two distinct build folders. + assert sdfg_default.build_folder != sdfg_custom.build_folder