From 93cab05584dc15cf9f12236ec0ef24becd21beb0 Mon Sep 17 00:00:00 2001 From: Mark Dittmer Date: Mon, 8 Jun 2026 14:54:09 +0000 Subject: [PATCH] [anneal] Check archive Lake cache reuse in tests Findings: the symlink-free v1 contract depends on a fully read-only extracted Aeneas archive being usable by a fresh generated workspace with a complete relative manifest, lake --old, and no package reconfiguration or cache rebuild. In response, v1 and v2 integration tests now install the Nix-built archive, assert Aeneas is read-only, create a tiny generated Lake workspace with a relative locked manifest, then run lake --old build Generated and lake env lean --json against the archive to catch regressions. gherrit-pr-id: Gk6ir6ucgyyji67sqkhteqq3wjxijq7at --- .../archive_lake_cache_reuse/anneal.toml | 4 + anneal/tests/integration.rs | 211 ++++++++++++++- anneal/v2/Cargo.lock | 9 + anneal/v2/Cargo.toml | 5 + anneal/v2/src/main.rs | 256 +++++++++++++++++- 5 files changed, 474 insertions(+), 11 deletions(-) create mode 100644 anneal/tests/fixtures/archive_lake_cache_reuse/anneal.toml diff --git a/anneal/tests/fixtures/archive_lake_cache_reuse/anneal.toml b/anneal/tests/fixtures/archive_lake_cache_reuse/anneal.toml new file mode 100644 index 0000000000..bd44e3c3bc --- /dev/null +++ b/anneal/tests/fixtures/archive_lake_cache_reuse/anneal.toml @@ -0,0 +1,4 @@ +description = """ +Purpose: Verifies that the installed Nix-built archive can be used read-only by a fresh generated Lake workspace. +Behavior: Builds and diagnoses a tiny workspace with a complete relative Lake manifest pointing at the installed Aeneas archive. +""" diff --git a/anneal/tests/integration.rs b/anneal/tests/integration.rs index 96f27102c8..8fc6f6b9b4 100644 --- a/anneal/tests/integration.rs +++ b/anneal/tests/integration.rs @@ -12,7 +12,7 @@ use std::{ use assert_cmd::assert::Assert; use fs2::FileExt as _; use serde::Deserialize; -use serde_json::json; +use serde_json::{Value, json}; use walkdir::WalkDir; fn new_sorted_walkdir(path: impl AsRef) -> WalkDir { @@ -294,6 +294,210 @@ fn get_toolchain_bin_dir() -> PathBuf { get_toolchain_install_dir().join("aeneas").join("bin") } +fn run_archive_lake_cache_reuse_test(test_name: &str) -> datatest_stable::Result<()> { + let _permit = + profile_step(test_name, None, "wait_toolchain_run_slot", || acquire_toolchain_run_slot()); + let temp = tempfile::Builder::new() + .prefix("anneal-archive-cache-reuse-") + .tempdir_in(get_target_dir())?; + assert_archive_lake_cache_reuse(test_name, &get_toolchain_install_dir(), temp.path()) +} + +fn assert_archive_lake_cache_reuse( + test_name: &str, + toolchain_root: &Path, + temp_root: &Path, +) -> Result<(), Box> { + let aeneas_root = toolchain_root.join("aeneas"); + let aeneas_lean = aeneas_root.join("backends/lean"); + let lean_root = toolchain_root.join("lean"); + let workspace = temp_root.join("generated-workspace"); + + assert_no_write_bits(&aeneas_root)?; + + fs::create_dir_all(workspace.join("generated"))?; + fs::copy(aeneas_lean.join("lean-toolchain"), workspace.join("lean-toolchain"))?; + fs::write(workspace.join("generated/Generated.lean"), "import Aeneas\n")?; + fs::write( + workspace.join("lakefile.lean"), + format!( + r#"import Lake +open Lake DSL + +require aeneas from "{}" + +package anneal_verification + +@[default_target] +lean_lib Generated where + srcDir := "generated" + roots := #[`Generated] +"#, + lake_string(&aeneas_lean) + ), + )?; + write_relative_archive_manifest(&workspace, &aeneas_lean)?; + + // This mirrors v1's generated workspace contract with the Nix-built + // archive: dependency paths are locked relative to the workspace, package + // caches are read-only, and `--old` must reuse the prebuilt Lake outputs. + run_lake_archive_command( + test_name, + &workspace, + &lean_root, + &["--keep-toolchain", "--old", "build", "Generated"], + )?; + run_lake_archive_command( + test_name, + &workspace, + &lean_root, + &["--keep-toolchain", "env", "lean", "--json", "generated/Generated.lean"], + )?; + + Ok(()) +} + +fn assert_no_write_bits(root: &Path) -> Result<(), Box> { + for entry in new_sorted_walkdir(root) { + let entry = entry?; + let metadata = fs::symlink_metadata(entry.path())?; + if metadata.file_type().is_symlink() { + continue; + } + if metadata.permissions().mode() & 0o222 != 0 { + panic!("archive path should be read-only: {}", entry.path().display()); + } + } + Ok(()) +} + +fn write_relative_archive_manifest( + workspace: &Path, + aeneas_lean: &Path, +) -> Result<(), Box> { + let aeneas_lean = fs::canonicalize(aeneas_lean)?; + let workspace = fs::canonicalize(workspace)?; + let manifest_path = aeneas_lean.join("lake-manifest.json"); + let manifest: Value = serde_json::from_reader(fs::File::open(&manifest_path)?)?; + let aeneas_packages = manifest.get("packages").and_then(Value::as_array).ok_or_else(|| { + invalid_data(format!( + "Aeneas Lake manifest {} is missing packages", + manifest_path.display() + )) + })?; + + let aeneas_dir = relative_manifest_string(&aeneas_lean, &workspace)?; + let mut packages = vec![json!({ + "type": "path", + "name": "aeneas", + "dir": aeneas_dir, + "inherited": false, + })]; + + for entry in aeneas_packages { + let mut entry = entry + .as_object() + .cloned() + .ok_or_else(|| invalid_data("Aeneas Lake manifest package entry is not an object"))?; + let package_type = entry + .get("type") + .and_then(Value::as_str) + .ok_or_else(|| invalid_data("Aeneas Lake manifest package entry is missing type"))?; + if package_type != "path" { + return Err(invalid_data(format!( + "Aeneas Lake manifest package entry is {package_type:?}, not a path dependency" + )) + .into()); + } + let package_dir = entry + .get("dir") + .and_then(Value::as_str) + .ok_or_else(|| invalid_data("Aeneas Lake manifest package entry is missing dir"))?; + let package_dir = Path::new(package_dir); + let package_dir = if package_dir.is_absolute() { + package_dir.to_path_buf() + } else { + aeneas_lean.join(package_dir) + }; + let package_dir = fs::canonicalize(package_dir)?; + entry.insert("dir".to_string(), json!(relative_manifest_string(&package_dir, &workspace)?)); + entry.insert("inherited".to_string(), json!(true)); + packages.push(Value::Object(entry)); + } + + let manifest = json!({ + "version": "1.2.0", + "packagesDir": ".lake/packages", + "packages": packages, + "name": "anneal_verification", + "lakeDir": ".lake", + "fixedToolchain": false, + }); + fs::write( + workspace.join("lake-manifest.json"), + format!("{}\n", serde_json::to_string_pretty(&manifest)?), + )?; + Ok(()) +} + +fn relative_manifest_string( + path: &Path, + base: &Path, +) -> Result> { + let path = pathdiff::diff_paths(path, base).ok_or_else(|| { + invalid_data(format!( + "failed to compute relative path from {} to {}", + base.display(), + path.display() + )) + })?; + Ok(path.to_string_lossy().into_owned()) +} + +fn lake_string(path: &Path) -> String { + path.to_string_lossy().replace('\\', "\\\\").replace('"', "\\\"") +} + +fn run_lake_archive_command( + test_name: &str, + workspace: &Path, + lean_root: &Path, + args: &[&str], +) -> Result<(), Box> { + let lean_bin = lean_root.join("bin"); + let mut cmd = process::Command::new(lean_bin.join("lake")); + cmd.args(args) + .current_dir(workspace) + .env_remove("CI") + .env("LEAN_SYSROOT", lean_root) + .env("MATHLIB_NO_CACHE_ON_UPDATE", "1") + .env("PATH", prepend_env_paths("PATH", &[lean_bin])?); + + let lib_var = if cfg!(target_os = "macos") { "DYLD_LIBRARY_PATH" } else { "LD_LIBRARY_PATH" }; + cmd.env( + lib_var, + prepend_env_paths(lib_var, &[lean_root.join("lib"), lean_root.join("lib/lean")])?, + ); + + run_command_with_profile(test_name, Some("archive_lake_cache_reuse"), cmd)?.assert.success(); + Ok(()) +} + +fn prepend_env_paths( + var_name: &str, + new_paths: &[PathBuf], +) -> Result> { + let mut paths = new_paths.to_vec(); + if let Some(existing) = std::env::var_os(var_name) { + paths.extend(std::env::split_paths(&existing)); + } + Ok(std::env::join_paths(paths)?) +} + +fn invalid_data(message: impl Into) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, message.into()) +} + fn toolchain_run_jobs() -> usize { *TOOLCHAIN_RUN_JOBS.get_or_init(|| { std::env::var("ANNEAL_INTEGRATION_REAL_JOBS") @@ -949,6 +1153,11 @@ fn run_integration_test(path: &Path) -> datatest_stable::Result<()> { run_dirty_sandbox_test(path) }); } + if path_str.contains("archive_lake_cache_reuse/anneal.toml") { + return profile_step(&test_name, None, "archive_lake_cache_reuse_test", || { + run_archive_lake_cache_reuse_test(&test_name) + }); + } // Load the test configuration from the associated 'anneal.toml' manifest. let config = anneal_toml.test.unwrap_or_default(); diff --git a/anneal/v2/Cargo.lock b/anneal/v2/Cargo.lock index b546668876..4a297be25c 100644 --- a/anneal/v2/Cargo.lock +++ b/anneal/v2/Cargo.lock @@ -108,6 +108,9 @@ dependencies = [ "env_logger", "exocrate", "log", + "pathdiff", + "serde_json", + "tempfile", "toml_const", ] @@ -550,6 +553,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "pathdiff" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" + [[package]] name = "percent-encoding" version = "2.3.2" diff --git a/anneal/v2/Cargo.toml b/anneal/v2/Cargo.toml index 853b7f7f5c..ff669f7e7f 100644 --- a/anneal/v2/Cargo.toml +++ b/anneal/v2/Cargo.toml @@ -50,3 +50,8 @@ toml_const = "1.3.0" clap = { version = "4.5", features = ["derive"] } env_logger = "0.11" log = "0.4" + +[dev-dependencies] +pathdiff = "0.2" +serde_json = "1" +tempfile = "3" diff --git a/anneal/v2/src/main.rs b/anneal/v2/src/main.rs index d5b672d035..0cb74addc8 100644 --- a/anneal/v2/src/main.rs +++ b/anneal/v2/src/main.rs @@ -46,7 +46,7 @@ exocrate::parse_remote_archive! { ]; } -fn setup(args: SetupArgs) { +fn setup_installation_dir(args: SetupArgs) -> std::path::PathBuf { let location = if std::env::var("__ANNEAL_LOCAL_DEV").is_ok() { exocrate::Location::LocalDev } else { @@ -57,10 +57,14 @@ fn setup(args: SetupArgs) { None => exocrate::Source::Remote(REMOTE), }; - let installation_dir = CONFIG + CONFIG .resolve_installation_dir_or_install(location, source) // FIXME: Implement unified error reporting (e.g., via `anyhow`). - .expect("failed to resolve-or-install dependencies"); + .expect("failed to resolve-or-install dependencies") +} + +fn setup(args: SetupArgs) { + let installation_dir = setup_installation_dir(args); log::info!("anneal toolchain is installed at {:?}", installation_dir); } @@ -85,12 +89,244 @@ fn main() { #[cfg(test)] mod tests { #[cfg(feature = "exocrate_tests")] - #[test] - fn test_setup() { - super::setup(super::SetupArgs { - // ASSUMPTION: Dependency builder installs archive at - // `target/anneal-exocrate.tar.zst`. - local_archive: Some("target/anneal-exocrate.tar.zst".into()), - }) + mod exocrate_tests { + use std::{ + fs, io, + path::{Path, PathBuf}, + process::Command, + sync::OnceLock, + }; + + use serde_json::{Value, json}; + + const LOCAL_ARCHIVE: &str = "target/anneal-exocrate.tar.zst"; + static INSTALLATION_DIR: OnceLock = OnceLock::new(); + + #[test] + fn test_setup() { + install_local_archive(); + } + + #[test] + fn test_archive_lake_cache_reuse() { + let installation_dir = install_local_archive(); + let temp = tempfile::Builder::new() + .prefix("anneal-v2-archive-cache-reuse-") + .tempdir() + .expect("failed to create archive cache reuse tempdir"); + assert_archive_lake_cache_reuse(&installation_dir, temp.path()) + .expect("archive Lake cache reuse test failed"); + } + + fn install_local_archive() -> PathBuf { + // ASSUMPTION: The CI dependency builder downloads the Nix-built + // archive artifact to this path before running v2 tests. + INSTALLATION_DIR + .get_or_init(|| { + super::super::setup_installation_dir(super::super::SetupArgs { + local_archive: Some(LOCAL_ARCHIVE.into()), + }) + }) + .clone() + } + + fn assert_archive_lake_cache_reuse( + toolchain_root: &Path, + temp_root: &Path, + ) -> Result<(), Box> { + let aeneas_root = toolchain_root.join("aeneas"); + let aeneas_lean = aeneas_root.join("backends/lean"); + let lean_root = toolchain_root.join("lean"); + let workspace = temp_root.join("generated-workspace"); + + assert_no_write_bits(&aeneas_root)?; + + fs::create_dir_all(workspace.join("generated"))?; + fs::copy(aeneas_lean.join("lean-toolchain"), workspace.join("lean-toolchain"))?; + fs::write(workspace.join("generated/Generated.lean"), "import Aeneas\n")?; + fs::write( + workspace.join("lakefile.lean"), + format!( + r#"import Lake +open Lake DSL + +require aeneas from "{}" + +package anneal_verification + +@[default_target] +lean_lib Generated where + srcDir := "generated" + roots := #[`Generated] +"#, + lake_string(&aeneas_lean) + ), + )?; + write_relative_archive_manifest(&workspace, &aeneas_lean)?; + + // The Nix archive must support fresh generated workspaces without + // reconfiguring packages or rebuilding read-only Lake artifacts. + run_lake_archive_command( + &workspace, + &lean_root, + &["--keep-toolchain", "--old", "build", "Generated"], + )?; + run_lake_archive_command( + &workspace, + &lean_root, + &["--keep-toolchain", "env", "lean", "--json", "generated/Generated.lean"], + )?; + + Ok(()) + } + + fn assert_no_write_bits(root: &Path) -> Result<(), Box> { + let metadata = fs::symlink_metadata(root)?; + if metadata.file_type().is_symlink() { + return Ok(()); + } + if has_write_bits(&metadata.permissions()) { + panic!("archive path should be read-only: {}", root.display()); + } + if metadata.is_dir() { + for entry in fs::read_dir(root)? { + assert_no_write_bits(&entry?.path())?; + } + } + Ok(()) + } + + #[cfg(unix)] + fn has_write_bits(permissions: &fs::Permissions) -> bool { + use std::os::unix::fs::PermissionsExt as _; + permissions.mode() & 0o222 != 0 + } + + #[cfg(not(unix))] + fn has_write_bits(permissions: &fs::Permissions) -> bool { + !permissions.readonly() + } + + fn write_relative_archive_manifest( + workspace: &Path, + aeneas_lean: &Path, + ) -> Result<(), Box> { + let aeneas_lean = fs::canonicalize(aeneas_lean)?; + let workspace = fs::canonicalize(workspace)?; + let manifest_path = aeneas_lean.join("lake-manifest.json"); + let manifest: Value = serde_json::from_reader(fs::File::open(&manifest_path)?)?; + let aeneas_packages = + manifest.get("packages").and_then(Value::as_array).ok_or_else(|| { + invalid_data(format!( + "Aeneas Lake manifest {} is missing packages", + manifest_path.display() + )) + })?; + + let aeneas_dir = relative_manifest_string(&aeneas_lean, &workspace)?; + let mut packages = vec![json!({ + "type": "path", + "name": "aeneas", + "dir": aeneas_dir, + "inherited": false, + })]; + + for entry in aeneas_packages { + let mut entry = entry.as_object().cloned().ok_or_else(|| { + invalid_data("Aeneas Lake manifest package entry is not an object") + })?; + let package_type = entry.get("type").and_then(Value::as_str).ok_or_else(|| { + invalid_data("Aeneas Lake manifest package entry is missing type") + })?; + if package_type != "path" { + return Err(invalid_data(format!( + "Aeneas Lake manifest package entry is {package_type:?}, not a path dependency" + )) + .into()); + } + let package_dir = entry.get("dir").and_then(Value::as_str).ok_or_else(|| { + invalid_data("Aeneas Lake manifest package entry is missing dir") + })?; + let package_dir = Path::new(package_dir); + let package_dir = if package_dir.is_absolute() { + package_dir.to_path_buf() + } else { + aeneas_lean.join(package_dir) + }; + let package_dir = fs::canonicalize(package_dir)?; + entry.insert( + "dir".to_string(), + json!(relative_manifest_string(&package_dir, &workspace)?), + ); + entry.insert("inherited".to_string(), json!(true)); + packages.push(Value::Object(entry)); + } + + let manifest = json!({ + "version": "1.2.0", + "packagesDir": ".lake/packages", + "packages": packages, + "name": "anneal_verification", + "lakeDir": ".lake", + "fixedToolchain": false, + }); + fs::write( + workspace.join("lake-manifest.json"), + format!("{}\n", serde_json::to_string_pretty(&manifest)?), + )?; + Ok(()) + } + + fn relative_manifest_string( + path: &Path, + base: &Path, + ) -> Result> { + let path = pathdiff::diff_paths(path, base).ok_or_else(|| { + invalid_data(format!( + "failed to compute relative path from {} to {}", + base.display(), + path.display() + )) + })?; + Ok(path.to_string_lossy().into_owned()) + } + + fn lake_string(path: &Path) -> String { + path.to_string_lossy().replace('\\', "\\\\").replace('"', "\\\"") + } + + fn run_lake_archive_command( + workspace: &Path, + lean_root: &Path, + args: &[&str], + ) -> Result<(), Box> { + let lean_bin = lean_root.join("bin"); + let mut cmd = Command::new(lean_bin.join("lake")); + cmd.args(args).current_dir(workspace).env_clear(); + + let lib_var = + if cfg!(target_os = "macos") { "DYLD_LIBRARY_PATH" } else { "LD_LIBRARY_PATH" }; + cmd.env( + lib_var, + std::env::join_paths([lean_root.join("lib"), lean_root.join("lib/lean")])?, + ); + + let output = cmd.output()?; + if !output.status.success() { + return Err(io::Error::other(format!( + "lake {:?} failed with status {}\nstdout:\n{}\nstderr:\n{}", + args, + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + )) + .into()); + } + Ok(()) + } + + fn invalid_data(message: impl Into) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, message.into()) + } } }