diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 342475e..aef24a3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -19,6 +19,10 @@ jobs:
       - name: Run pytest with coverage gate
         working-directory: build/dashboard
         run: python -m pytest --cov=mining_dashboard --cov-report=term-missing --cov-fail-under=80
+      - name: Fake-daemon contract test (real clients vs controllable fakes)
+        # Points the real Monero/Tari clients at the integration fakes and asserts they parse
+        # every state (synced/syncing/down). Docker-free, so it runs on every PR (issue #54).
+        run: PYTHONPATH=build/dashboard python -m pytest tests/integration/fakes -q
 
   frontend:
     name: Frontend logic tests (node --test)
@@ -53,14 +57,23 @@ jobs:
       # the job when one is briefly out of sync — see issue #64.
       - name: Lint pithead and test scripts
         # Gate on warnings+errors (real issues); info-level style nits vary by shellcheck version.
-        run: shellcheck --severity=warning pithead tests/stack/run.sh tests/stack/test_compose.sh
+        run: shellcheck --severity=warning pithead tests/stack/run.sh tests/stack/test_compose.sh tests/inventory.sh tests/integration/*.sh tests/integration/mini-stack/*.sh
       - name: Run pithead test suite
         run: bash tests/stack/run.sh
+      - name: Run integration harness self-test
+        # Pure-logic checks for the tests/integration/ harness (config rendering, matrix
+        # coverage, redaction). The LIVE matrix (tests/integration/run.sh) needs a real test
+        # server and runs as a gated/manual release gate (#54), not on every PR.
+        run: bash tests/integration/selftest.sh
+      - name: Check the test inventory is up to date
+        # docs/test-inventory.md is generated from the suites; fail if a test was added/removed
+        # without regenerating it (run `make test-inventory`).
+        run: make test-inventory-check
 
   compose:
-    name: Compose config validation
+    name: Compose config + security hardening
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - name: Validate docker-compose.yml interpolation
+      - name: Validate docker-compose.yml interpolation + hardening invariants (#90)
         run: bash tests/stack/test_compose.sh
diff --git a/.github/workflows/integration-mini-stack.yml b/.github/workflows/integration-mini-stack.yml
new file mode 100644
index 0000000..958a41c
--- /dev/null
+++ b/.github/workflows/integration-mini-stack.yml
@@ -0,0 +1,27 @@
+name: Integration mini-stack
+
+# The fake-daemon docker mini-stack (issue #54, tier 3): brings up the REAL dashboard +
+# docker-control proxy against controllable fake monerod/Tari and asserts the control plane
+# (sync hold/release, node-down reject/readmit) end-to-end. It needs a Docker daemon, so it
+# runs as its own job (not part of the always-on CI matrix), triggered on changes to the
+# integration harness or the dashboard, and on demand.
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - "tests/integration/**"
+      - "build/dashboard/**"
+      - ".github/workflows/integration-mini-stack.yml"
+
+jobs:
+  mini-stack:
+    name: Fake-daemon mini-stack (docker)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      # ubuntu-latest ships Docker with the Compose v2 plugin — no setup needed.
+      - name: Run the fake-daemon mini-stack
+        run: bash tests/integration/mini-stack/run-mini-stack.sh
+      - name: Dump dashboard logs on failure
+        if: failure()
+        run: docker compose -f tests/integration/mini-stack/docker-compose.fake.yml logs --no-color || true
diff --git a/.github/workflows/release-gate.yml b/.github/workflows/release-gate.yml
new file mode 100644
index 0000000..421a2e7
--- /dev/null
+++ b/.github/workflows/release-gate.yml
@@ -0,0 +1,72 @@
+name: Release gate (self-hosted)
+
+# Tier-4 end-to-end validation against the REAL synced Monero + Tari nodes — the pre-release
+# gate (#54). It runs on the dedicated, self-hosted release server (which holds real wallet /
+# onion keys), so it MUST only ever run code we trust.
+#
+# SECURITY: there is deliberately NO `pull_request` trigger. A fork PR's code running on this
+# runner could steal the box's keys or persist a backdoor (GitHub recommends against self-hosted
+# runners on public repos for exactly this reason). The gate runs only on:
+#   - workflow_dispatch — a maintainer manually runs it on a ref they've reviewed, OR
+#   - push to main      — post-merge, on trusted code.
+# To end-to-end a specific fork PR, review it first, then dispatch this workflow on that ref.
+# See docs/release-server.md.
+on:
+  workflow_dispatch:
+    inputs:
+      stack_dir:
+        description: "Path to the deployed Pithead stack on the runner (absolute; default $HOME/code/pithead)"
+        required: false
+        default: ""
+      mode:
+        description: "check = non-destructive; matrix = full destructive config matrix (with a safety backup + auto-rollback)"
+        required: false
+        default: "check"
+        type: choice
+        options: [check, matrix]
+  push:
+    branches: [main]
+
+# Never run two gates against the one shared box at the same time.
+concurrency:
+  group: release-gate
+  cancel-in-progress: false
+
+jobs:
+  release-gate:
+    name: Tier-4 live matrix (real nodes)
+    # Register the server with these labels: `pithead-release` scopes the gate to the dedicated
+    # box; prefer an ephemeral / just-in-time runner in its own runner group.
+    runs-on: [self-hosted, pithead-release]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Validate against the real synced nodes
+        # Inputs go through env (not interpolated into the script) to avoid shell injection.
+        env:
+          STACK_DIR_INPUT: ${{ github.event.inputs.stack_dir }}
+          MODE_INPUT: ${{ github.event.inputs.mode }}
+        run: |
+          set -euo pipefail
+          DIR="${STACK_DIR_INPUT:-$HOME/code/pithead}"
+          MODE="${MODE_INPUT:-check}"
+          echo "Release gate: stack dir=$DIR, mode=$MODE"
+
+          # Always assess fitness + the non-destructive live state first.
+          bash tests/integration/run.sh --local --dir "$DIR" --readiness
+          bash tests/integration/run.sh --local --dir "$DIR" --check
+
+          # The full destructive config matrix is opt-in; --safety-backup rolls the box back if
+          # anything fails, so a red run leaves the server as it found it.
+          if [ "$MODE" = "matrix" ]; then
+            bash tests/integration/run.sh --local --dir "$DIR" --workers 2 --safety-backup --lifecycle
+          fi
+
+      - name: Upload artifacts (redacted)
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-gate-results
+          path: tests/integration/results/
+          if-no-files-found: ignore
+          retention-days: 14
diff --git a/.gitignore b/.gitignore
index 3f43412..729558f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,5 +21,8 @@ htmlcov/
 *.egg-info/
 .eggs/
 
+# Integration test artifacts (manifest, per-scenario logs, captured state)
+/tests/integration/results/
+
 # OS
 .DS_Store
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0d9c1de..2aae36f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,53 @@ per the process in [`docs/releasing.md`](docs/releasing.md).
 
 ### Added
 
+- A four-tier test strategy for simulating every runtime situation (#54), documented in
+  `docs/testing-strategy.md` with a full scenario catalog:
+  - **Live config-matrix suite** (`tests/integration/`, tier 4) that drives a real, synced
+    server through the config matrix and asserts the stack behaves — containers healthy, nodes
+    synced, miners mining, dashboard reading correct live state, `status` exit codes, secrets
+    preserved. Runs over SSH or `--local`; the blocking pre-release gate. A `--fault-injection`
+    phase deliberately breaks monerod (stop / SIGSTOP / remove) to assert `pithead status`'
+    down/unhealthy/missing verdicts and the failover→recovery cycle. `make test-integration`.
+  - **Controllable fake monerod/Tari + a contract test** (`tests/integration/fakes/`, tier 2)
+    that points the real dashboard clients at the fakes and asserts they parse every state —
+    docker-free, runs on every PR. `make test-fakes`.
+  - **Fake-daemon docker mini-stack** (`tests/integration/mini-stack/`, tier 3) running the real
+    dashboard + docker-control proxy against the fakes, asserting sync hold/release and Tari
+    reject/readmit end-to-end with real containers (`make test-mini-stack`). Validated green
+    (11/11) on a real Docker host, and isolated (namespaced container names + non-colliding
+    ports) so it can run safely beside a live deployment.
+  - New dashboard unit tests for the required-Tari sync gate, the #35-latch × #31-failover
+    interaction, and simultaneous double outages.
+  - A generated **test inventory** (`docs/test-inventory.md`, `make test-inventory`) listing
+    every test/scenario across all suites, kept honest by a CI drift check.
+  - A non-destructive **`--check`** mode for the live harness (assert the box's current state —
+    no config change/apply/restore); the safe first run / ongoing health check. Validated with
+    a 22/22 green run against a real synced, mining box, which calibrated the harness to trust
+    monerod's own sync flag (a synced local node's dashboard sync panel reads "loading") and
+    `proxy_workers` for mining liveness (`stratum.conns` can read 0 while mining).
+  - A developer testing guide (`docs/testing-guide.md`): per-change recipes, conventions, and
+    the calibration gotchas learned on real hardware.
+  - Regression guards for past bugs/security fixes: extended the #90 hardening section of
+    `tests/stack/test_compose.sh` with per-service least-privilege checks for the Docker socket
+    proxies (the read proxy can't POST; the control proxy is start/stop-only; both mount the
+    socket read-only) and the Tari `[m]inotari` self-match guard — alongside the existing
+    no-new-privileges / cap_drop / credential-free-healthcheck assertions. Plus a
+    `dashboard.host` "auto"-revert test and the schema-migration test that caught the DB upgrade
+    bug above.
+  - Release/validation-server tooling: a `--readiness` mode for the live harness (non-destructive
+    assessment that a box is fit to be a release server — synced chains reusable, snapshot-capable
+    filesystem, disk headroom, secrets owner-only, dashboard localhost-only), a
+    `docs/release-server.md` guide (why end-to-end validation needs a dedicated server vs. what
+    GitHub Actions runs free on every PR, the hardening checklist, and the **safe** self-hosted-
+    runner setup), and a `release-gate.yml` workflow that runs the tier-4 matrix on a self-hosted
+    runner only on trusted code (manual dispatch / push to main — never on a fork PR).
+  - A `--safety-backup` rollback net for the live harness: takes a real `pithead backup` before
+    the destructive scenarios and automatically rolls the box back (down → restore → up) if
+    anything fails, removing the archive on success — so the destructive matrix can run on a
+    precious box. The `--lifecycle` phase also does a `backup` → `restore` round-trip (assert the
+    pool reverts and secrets survive), exercising both verbs end-to-end.
+  - `UPDATE_INTERVAL` is now env-configurable (lets the mini-stack loop fast in CI).
 - Dashboard header shows the host's **IP address** next to the hostname when the configured
   `dashboard.host` is a name, as `hostname @ ip` (e.g. `pithead.local @ 192.168.1.42`), so you can still reach the
   dashboard when the hostname doesn't resolve from your phone or another machine on the LAN. The
@@ -62,6 +109,14 @@ per the process in [`docs/releasing.md`](docs/releasing.md).
 
 ### Changed
 
+- The Compose **project name is now pinned to `pithead`** (`name:` in `docker-compose.yml`), so
+  the stack's images, network and volumes are prefixed `pithead*` regardless of the checkout
+  directory — instead of inheriting the directory's name (which left older checkouts named after
+  the repo's previous name). `pithead up`/`apply`/`upgrade` detect a stack still running under
+  the old, directory-derived project name and migrate it automatically (only that project's
+  containers are removed so the renamed project can take over — bind-mounted chain data and the
+  Tor onion keys are untouched). One-time after the rename, Caddy re-issues its local TLS cert
+  under the new project, so re-trust the dashboard cert if you'd installed the old one.
 - Hardened the leaf containers (caddy, xmrig-proxy, dashboard, docker-proxy, docker-control)
   with `no-new-privileges`. All except the dashboard also `cap_drop: [ALL]` (caddy keeps
   `NET_BIND_SERVICE` for `:80`/`:443`); the dashboard keeps its default capabilities because it
@@ -88,6 +143,16 @@ per the process in [`docs/releasing.md`](docs/releasing.md).
   before resetting (without an `apply`) can no longer wipe a directory the stack never used. It
   also refuses to run rather than guess if `.env` doesn't name them (#139).
 
+### Fixed
+
+- Dashboard pruned/full label (#32) always showed **Full** on local nodes: the dashboard parsed
+  `MONERO_PRUNE` with `== "true"`, but pithead writes it as `1`/`0`, so a pruned node read as
+  Full. Now accepts `1`/`true`/`yes`/`on`. Found by the live integration harness on a real box.
+- Dashboard DB upgrade path: opening a database created by an early (pre-`timestamp`) schema
+  threw `no such column: timestamp` and aborted the migration, leaving the DB half-upgraded —
+  `_create_tables` built the `idx_ts` index on a column `_migrate_db` hadn't added yet. Indexes
+  are now created after migrations. Found by a new schema-migration intent test.
+
 ### Security
 
 - The monerod RPC credentials are no longer interpolated into the compose healthcheck command
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b1501be..b801517 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -21,17 +21,26 @@ whole new feature, contributions are very welcome. This guide covers the workflo
    make test
    ```
 
-   This runs everything CI does:
+   This runs everything CI does without a server or Docker:
 
-   - **lint** — `shellcheck` over `pithead` and the test scripts. Keep `pithead`
-     shellcheck-clean (no new warnings).
-   - **test-dashboard** — the dashboard `pytest` suite, which must stay at or above the
-     **80% coverage gate**.
+   - **lint** — `shellcheck` over `pithead` and the test scripts (keep them
+     `--severity=warning` clean).
+   - **test-dashboard** — the dashboard `pytest` suite (must stay ≥ the **80% coverage gate**).
    - **test-stack** — the `pithead` shell test suite.
    - **test-compose** — `docker-compose.yml` interpolation validation.
-
-4. Update the docs in [`docs/`](docs/) (and the README, if relevant) for any
-   user-facing change.
+   - **test-integration-selftest** — the integration harness's own pure logic.
+   - **test-fakes** — the tier-2 contract test (real dashboard clients vs controllable fakes).
+   - the **test-inventory drift check** — fails if a test was added/removed without
+     regenerating [`docs/test-inventory.md`](docs/test-inventory.md) (`make test-inventory`).
+
+   Bigger, infra-dependent suites run separately: `make test-mini-stack` (tier-3 docker) and
+   `make test-integration` (tier-4 live, against a real box — start with `--check`).
+
+4. **Add or update tests** for your change — cover the *intent* (a behavior/contract), not just
+   the line. The [Testing Guide](docs/testing-guide.md) has per-change recipes; the
+   [Testing Strategy](docs/testing-strategy.md) explains the tiers.
+5. Update the docs in [`docs/`](docs/) (and the README, if relevant) for any
+   user-facing change, and run `make test-inventory` if you touched the test suites.
 
 ## Opening a pull request
 
diff --git a/Makefile b/Makefile
index bfa8c12..91a51fb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 # Local test entry points (mirror the GitHub Actions CI jobs).
-.PHONY: test test-dashboard test-stack test-compose lint
+.PHONY: test test-dashboard test-stack test-compose test-integration test-integration-selftest test-fakes test-mini-stack lint
 
-test: lint test-dashboard test-stack test-compose ## Run everything
+test: lint test-dashboard test-stack test-compose test-integration-selftest test-fakes ## Run everything that doesn't need a server/docker
 
 test-dashboard: ## Dashboard unit/component tests with coverage gate
 	cd build/dashboard && PYTHONPATH=. python3 -m pytest \
@@ -10,8 +10,33 @@ test-dashboard: ## Dashboard unit/component tests with coverage gate
 test-stack: ## pithead shell test suite
 	bash tests/stack/run.sh
 
-test-compose: ## Validate docker-compose.yml interpolation
+test-compose: ## Validate docker-compose.yml interpolation + hardening invariants (#90)
 	bash tests/stack/test_compose.sh
 
+test-integration-selftest: ## Integration harness pure-logic self-test (no server needed)
+	bash tests/integration/selftest.sh
+
+test-fakes: ## Fake-daemon contract test — real dashboard clients vs controllable fakes (no docker)
+	PYTHONPATH=build/dashboard python3 -m pytest tests/integration/fakes -q
+
+test-mini-stack: ## Fake-daemon docker mini-stack end-to-end (needs docker; CI)
+	bash tests/integration/mini-stack/run-mini-stack.sh
+
+test-inventory: ## Regenerate the test coverage inventory (docs/test-inventory.md)
+	bash tests/inventory.sh > docs/test-inventory.md
+
+test-inventory-check: ## Fail if docs/test-inventory.md is stale (CI drift guard)
+	@bash tests/inventory.sh | diff -u docs/test-inventory.md - \
+		&& echo "test-inventory is up to date" \
+		|| { echo "docs/test-inventory.md is stale — run 'make test-inventory'"; exit 1; }
+
+# End-to-end matrix against a REAL test server (issue #54). Needs a provisioned box; pass
+# connection + options through ARGS, e.g.:
+#   make test-integration ARGS="--host miner@10.0.0.5 --dir pithead --lifecycle"
+# See docs/integration-testing.md.
+test-integration: ## Run the live config-matrix integration suite (requires a test box; pass ARGS=...)
+	bash tests/integration/run.sh $(ARGS)
+
 lint: ## shellcheck the stack scripts
-	shellcheck --severity=warning pithead tests/stack/run.sh tests/stack/test_compose.sh
+	shellcheck --severity=warning pithead tests/stack/run.sh tests/stack/test_compose.sh \
+		tests/inventory.sh tests/integration/*.sh tests/integration/mini-stack/*.sh
diff --git a/build/dashboard/mining_dashboard/config/config.py b/build/dashboard/mining_dashboard/config/config.py
index 7aa4c3a..606d2fd 100644
--- a/build/dashboard/mining_dashboard/config/config.py
+++ b/build/dashboard/mining_dashboard/config/config.py
@@ -31,7 +31,12 @@
 # XMRig Worker API Configuration
 XMRIG_API_PORT = 8080
 API_TIMEOUT = 1         # Connection timeout (seconds) for worker API calls
-UPDATE_INTERVAL = 30    # Frequency (seconds) of the main data aggregation loop
+try:
+    # main data-loop period (s); lowered in integration tests. Tolerate a malformed override
+    # rather than crashing the dashboard at import.
+    UPDATE_INTERVAL = int(float(os.environ.get("UPDATE_INTERVAL", "30")))
+except (TypeError, ValueError):
+    UPDATE_INTERVAL = 30
 
 # --- XvB Algorithm Constants ---
 # Duration of the donation switching cycle (10 minutes)
@@ -127,7 +132,10 @@
 # Whether the bundled monerod is configured to prune the blockchain (config.json
 # monero.prune → MONERO_PRUNE). Used to label the node Pruned/Full in the UI (Issue #32);
 # only meaningful for a local node (we don't control a remote node's pruning).
-MONERO_PRUNE = os.environ.get("MONERO_PRUNE", "true").strip().lower() == "true"
+# pithead renders this as 1/0 (the form monerod's CLI wants), so accept the numeric/boolean
+# truthy forms — not just "true", which silently read pruned nodes as Full before (the
+# pruned/full label is purely display, #32).
+MONERO_PRUNE = os.environ.get("MONERO_PRUNE", "true").strip().lower() in ("true", "1", "yes", "on")
 
 # --- Tari Configuration ---
 # Connection details for the Tari Base Node and Block Explorer
diff --git a/build/dashboard/mining_dashboard/service/storage_service.py b/build/dashboard/mining_dashboard/service/storage_service.py
index 7853d72..2b361ef 100644
--- a/build/dashboard/mining_dashboard/service/storage_service.py
+++ b/build/dashboard/mining_dashboard/service/storage_service.py
@@ -63,6 +63,11 @@ def _init_db(self):
                 with self._conn:
                     self._create_tables()
                     self._migrate_db()
+                    # Indexes come AFTER migration: idx_ts is on history(timestamp), a column
+                    # _migrate_db adds when upgrading a pre-timestamp DB. Creating it in
+                    # _create_tables would throw "no such column: timestamp" on that old schema
+                    # and abort the whole migration, leaving the DB half-upgraded.
+                    self._create_indexes()
         except sqlite3.Error as e:
             self.logger.error(f"DB Init Error: {e}")
 
@@ -72,6 +77,10 @@ def _create_tables(self):
         self._conn.execute("CREATE TABLE IF NOT EXISTS workers (name TEXT PRIMARY KEY, ip TEXT, last_seen REAL)")
         self._conn.execute("CREATE TABLE IF NOT EXISTS kv_store (key TEXT PRIMARY KEY, value TEXT)")
         self._conn.execute("CREATE TABLE IF NOT EXISTS shares (ts REAL PRIMARY KEY, difficulty REAL)")
+
+    def _create_indexes(self):
+        """Creates indexes. Called after migrations so the indexed columns are guaranteed to
+        exist even on a database created by an older schema version."""
         self._conn.execute("CREATE INDEX IF NOT EXISTS idx_ts ON history(timestamp)")
         self._conn.execute("CREATE INDEX IF NOT EXISTS idx_share_ts ON shares(ts)")
 
diff --git a/build/dashboard/tests/config/test_config.py b/build/dashboard/tests/config/test_config.py
index ef8153e..eef8715 100644
--- a/build/dashboard/tests/config/test_config.py
+++ b/build/dashboard/tests/config/test_config.py
@@ -27,6 +27,24 @@ def test_donation_level_env_override(self):
             cfg = _reload_config()
             assert cfg.XVB_DONATION_LEVEL == "auto"  # normalized to lowercase
 
+    def test_monero_prune_accepts_truthy_forms(self):
+        # pithead writes MONERO_PRUNE=1, so "1" (and friends) must read as pruned — not just
+        # the literal "true". Regression for the Pruned/Full label always showing Full (#32).
+        for v in ("true", "1", "yes", "On", " 1 ", "TRUE"):
+            with patch.dict(os.environ, {"MONERO_PRUNE": v}):
+                assert _reload_config().MONERO_PRUNE is True, f"{v!r} should be pruned"
+
+    def test_monero_prune_accepts_falsy_forms(self):
+        for v in ("false", "0", "no", "off", ""):
+            with patch.dict(os.environ, {"MONERO_PRUNE": v}):
+                assert _reload_config().MONERO_PRUNE is False, f"{v!r} should be full"
+
+    def test_update_interval_tolerates_bad_values(self):
+        # A malformed override must fall back to the default, not crash the dashboard at import.
+        for v, expected in [("2", 2), ("2.5", 2), ("", 30), ("nonsense", 30)]:
+            with patch.dict(os.environ, {"UPDATE_INTERVAL": v}):
+                assert _reload_config().UPDATE_INTERVAL == expected, f"{v!r} -> {expected}"
+
     def test_tier_config_env_override_valid(self):
         custom = {"donor_ultra": 5_000_000, "donor_basic": 500}
         # deploy injects the JSON wrapped in single quotes
diff --git a/build/dashboard/tests/service/test_data_service.py b/build/dashboard/tests/service/test_data_service.py
index 304c681..ee881d7 100644
--- a/build/dashboard/tests/service/test_data_service.py
+++ b/build/dashboard/tests/service/test_data_service.py
@@ -580,3 +580,73 @@ async def test_iteration_survives_collector_error(self):
             # The error is caught inside the loop; the sleep after it raises to stop us.
             with pytest.raises(StopAsyncIteration):
                 await svc.run()
+
+
+class TestControlPlaneComposition:
+    """Compositions of the sync-gate (#35) and failover (#31) the per-feature tests don't
+    cover on their own: the required-Tari hold, and the two features coexisting after release."""
+
+    async def test_run_holds_when_tari_required_and_only_monero_synced(self):
+        # Monero synced, Tari still syncing, Tari REQUIRED: the gate condition
+        # `monero_synced AND (tari_synced OR NOT TARI_REQUIRED)` is NOT satisfied, so the
+        # miner stays held until Tari also finishes — the mirror of the non-blocking case.
+        svc, sm, proxy = _make_service()
+        proxy.get_workers.return_value = {"workers": []}
+        svc._apply_worker_rejection = AsyncMock()
+
+        worker_client = MagicMock()
+        worker_client.get_stats = AsyncMock(return_value={})
+        tari_client = MagicMock()
+        tari_client.get_sync_status = AsyncMock(
+            return_value={"is_syncing": True, "reachable": True, "percent": 80, "current": 80, "target": 100})
+        tari_client.close = AsyncMock()
+
+        with patch.object(ds_mod, "ClientSession", _FakeClientSession), \
+             patch.object(ds_mod, "XMRigWorkerClient", return_value=worker_client), \
+             patch.object(ds_mod, "TariClient", return_value=tari_client), \
+             patch.object(ds_mod, "SYNC_GATE_CONTAINERS", ["p2pool", "xmrig-proxy"]), \
+             patch.object(ds_mod, "TARI_REQUIRED", True), \
+             patch.object(ds_mod, "get_stratum_stats", return_value=({}, [])), \
+             patch.object(ds_mod, "get_network_stats", return_value={"height": 100}), \
+             patch.object(ds_mod, "get_tari_stats", return_value={"active": True, "status": "OK", "height": 3}), \
+             patch.object(ds_mod, "get_p2pool_stats", return_value={"pool": {"last_share_time": 0, "difficulty": 0}}), \
+             patch.object(ds_mod, "get_monero_sync_status", AsyncMock(return_value={"is_syncing": False, "reachable": True})), \
+             patch.object(ds_mod, "get_disk_usage", return_value={}), \
+             patch.object(ds_mod, "get_hugepages_status", return_value=("Enabled", "ok", "1/2")), \
+             patch.object(ds_mod, "get_memory_usage", return_value={}), \
+             patch.object(ds_mod, "get_load_average", return_value="0"), \
+             patch.object(ds_mod, "get_cpu_usage", return_value="0%"), \
+             patch("asyncio.sleep", AsyncMock(side_effect=StopAsyncIteration)):
+            with pytest.raises(StopAsyncIteration):
+                await svc.run()
+
+        stopped = {c.args[0] for c in svc.docker_control.stop.await_args_list}
+        assert stopped == {"p2pool", "xmrig-proxy"}
+        svc.docker_control.start.assert_not_called()
+        assert svc.miner_released is False
+        assert svc.latest_data["miner_held"] is True
+
+    async def test_post_release_blip_lets_failover_act_without_rehold(self):
+        # After release, a node-down event must NOT be re-held by the sync gate (the #35
+        # one-way latch), yet #31 failover must still stop the proxy so workers fail over.
+        # The two coexist: gate no-ops, rejection acts on the proxy only.
+        svc, _sm, _proxy = _make_service()
+        svc.miner_released = True
+        with patch.object(ds_mod, "SYNC_GATE_CONTAINERS", ["p2pool", "xmrig-proxy"]), \
+             patch.object(ds_mod, "REJECT_WORKERS_CONTAINER", "xmrig-proxy"), \
+             patch.object(ds_mod, "TARI_REQUIRED", True):
+            await svc._apply_sync_gate(gate_satisfied=False)   # latch → no-op
+            await svc._apply_worker_rejection(monero_down=True, tari_down=False)
+        stopped = [c.args[0] for c in svc.docker_control.stop.await_args_list]
+        assert stopped == ["xmrig-proxy"]          # p2pool was NOT re-held
+        svc.docker_control.start.assert_not_called()
+        assert svc.workers_rejected is True
+
+    async def test_both_nodes_down_rejects_once(self):
+        # A simultaneous Monero+Tari outage (both required) is a single rejection, not two.
+        svc, _sm, _proxy = _make_service()
+        with patch.object(ds_mod, "REJECT_WORKERS_CONTAINER", "xmrig-proxy"), \
+             patch.object(ds_mod, "TARI_REQUIRED", True):
+            await svc._apply_worker_rejection(monero_down=True, tari_down=True)
+        svc.docker_control.stop.assert_awaited_once_with("xmrig-proxy")
+        assert svc.workers_rejected is True
diff --git a/build/dashboard/tests/service/test_storage_service.py b/build/dashboard/tests/service/test_storage_service.py
index 0c75cbb..9ee3996 100644
--- a/build/dashboard/tests/service/test_storage_service.py
+++ b/build/dashboard/tests/service/test_storage_service.py
@@ -1,9 +1,10 @@
+import sqlite3
 import time
 
 import pytest
 
 from mining_dashboard.service.storage_service import StateManager
-from mining_dashboard.config.config import TIER_DEFAULTS
+from mining_dashboard.config.config import TIER_DEFAULTS, HISTORY_RETENTION_SEC, WORKER_RETENTION_SEC
 
 
 class TestDefaults:
@@ -153,3 +154,81 @@ def test_corrupted_kv_value_skipped(self, tmp_path):
         sm.load()  # must not raise
         assert sm.get_xvb_stats()["avg_1h"] == 0.0  # falls back to default
         sm.close()
+
+
+class TestSchemaMigration:
+    """The upgrade path: opening a DB created by an older version must migrate in place
+    without losing data. These exercise branches a fresh DB never hits."""
+
+    def test_history_timestamp_backfilled_from_iso_on_upgrade(self, tmp_path):
+        # Intent: a pre-timestamp history table (only the original t/v columns) must gain the
+        # v_p2pool/v_xvb/timestamp columns AND have timestamp backfilled from the ISO `t`
+        # string — otherwise old points become undatable and drop out of the chart/retention.
+        db = str(tmp_path / "old_schema.db")
+        # Recent UTC ISO strings (SQLite's strftime('%s', t) treats t as UTC) so the migrated
+        # rows fall inside load()'s 30-day retention window and aren't filtered out.
+        now = time.time()
+        t1 = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(now - 7200))  # 2h ago
+        t2 = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(now - 3600))  # 1h ago
+        conn = sqlite3.connect(db)
+        conn.execute("CREATE TABLE history (t TEXT, v REAL)")  # the original schema
+        conn.execute("INSERT INTO history (t, v) VALUES (?, ?)", (t1, 1000.0))
+        conn.execute("INSERT INTO history (t, v) VALUES (?, ?)", (t2, 1100.0))
+        conn.commit()
+        conn.close()
+
+        sm = StateManager(db_path=db)  # __init__ runs _create_tables (no-op) + _migrate_db
+        try:
+            hist = sm.get_history()
+            assert len(hist) == 2
+            assert all(h["timestamp"] > 0 for h in hist), "timestamp backfilled from ISO t"
+            # ordering preserved: the earlier ISO time sorts first (load() orders by timestamp)
+            assert hist[0]["timestamp"] < hist[1]["timestamp"]
+            # the new split-rate columns default to 0, not NULL
+            assert hist[0]["v_p2pool"] == 0 and hist[0]["v_xvb"] == 0
+        finally:
+            sm.close()
+
+
+class TestRetention:
+    """Long-running behavior: history/workers must not grow unbounded. Tests are white-box
+    (they backdate timestamps) so they don't need to actually wait days."""
+
+    def test_history_older_than_retention_pruned_from_memory(self, state_manager):
+        # Intent: appending a fresh sample drops in-memory points older than the 30-day window
+        # (the popleft loop), so the deque can't grow without bound on a long-running dashboard.
+        state_manager.state["hashrate_history"].append({
+            "t": "old", "v": 1.0, "v_p2pool": 0, "v_xvb": 0,
+            "timestamp": time.time() - HISTORY_RETENTION_SEC - 3600,  # 30d + 1h ago
+        })
+        assert len(state_manager.get_history()) == 1
+        state_manager.update_history(2000.0)  # a fresh sample at "now"
+        hist = state_manager.get_history()
+        assert len(hist) == 1 and hist[0]["v"] == 2000.0  # the ancient point was pruned
+
+    def test_old_history_pruned_from_db_when_cleanup_fires(self, state_manager, monkeypatch):
+        # Intent: the probabilistic DB cleanup actually deletes expired rows when it fires, so
+        # the on-disk DB stays bounded. We force the 5% path deterministically.
+        old_ts = time.time() - HISTORY_RETENTION_SEC - 10 * 24 * 3600  # 40 days ago
+        with state_manager._db_lock:
+            state_manager._conn.execute(
+                "INSERT INTO history (t, v, v_p2pool, v_xvb, timestamp) VALUES (?,?,?,?,?)",
+                ("old", 1.0, 0, 0, old_ts))
+            state_manager._conn.commit()
+        monkeypatch.setattr("mining_dashboard.service.storage_service.random.random", lambda: 0.0)
+        state_manager.update_history(2000.0)
+        with state_manager._db_lock:
+            remaining = state_manager._conn.execute(
+                "SELECT COUNT(*) FROM history WHERE timestamp < ?",
+                (time.time() - HISTORY_RETENTION_SEC,)).fetchone()[0]
+        assert remaining == 0, "expired DB rows are pruned"
+
+    def test_stale_workers_pruned_after_retention_window(self, state_manager):
+        # Intent: a worker not seen within WORKER_RETENTION_SEC (7d) is dropped when any worker
+        # next checks in — so stale name→IP mappings don't linger and leak memory.
+        state_manager.update_known_workers([{"name": "rig1", "ip": "10.0.0.1"}])
+        # Backdate rig1 so it's now older than the retention window.
+        state_manager.state["known_workers"]["rig1"]["last_seen"] = time.time() - WORKER_RETENTION_SEC - 3600
+        state_manager.update_known_workers([{"name": "rig2", "ip": "10.0.0.2"}])  # a fresh check-in
+        names = {w["name"] for w in state_manager.get_known_workers()}
+        assert "rig2" in names and "rig1" not in names
diff --git a/docker-compose.yml b/docker-compose.yml
index 782fb42..aaeab9d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,3 +1,9 @@
+# Pin the Compose project name so the stack is always "pithead" — its images, network and
+# volumes are prefixed `pithead*` regardless of the checkout directory's name. Without this,
+# Compose derives the project from the directory, which left older checkouts named after the
+# repo's previous name. `pithead up`/`apply`/`upgrade` migrate an old-named stack automatically.
+name: pithead
+
 x-logging: &default-logging
   driver: "json-file"
   options:
diff --git a/docs/README.md b/docs/README.md
index b30dd61..718aa24 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -17,7 +17,12 @@ deeper on individual topics once you're up and running.
 | [Connecting Miners](workers.md) | Pointing any existing rig at the stack, plus [RigForge](https://github.com/p2pool-starter-stack/rigforge) for setting up new miners. |
 | [Architecture](architecture.md) | The nine services, how they fit together, the privacy model, and the algorithmic XvB switching engine. |
 | [Operations & Maintenance](operations.md) | The full `pithead` command reference, upgrades, backups, and troubleshooting. |
+| [Testing Strategy](testing-strategy.md) | The four test tiers (unit → contract → fake-daemon mini-stack → live matrix), the full scenario catalog, and which tier proves each situation. |
+| [Testing Guide](testing-guide.md) | For developers: how to write and run tests, per-change recipes, conventions, and real-hardware gotchas. |
+| [Test Inventory](test-inventory.md) | Generated, exhaustive list of every test/scenario across all suites — the inventory of what's covered. |
+| [Integration Testing](integration-testing.md) | The end-to-end config-matrix suite that validates the stack against real Monero + Tari nodes — the blocking pre-release gate. |
 | [Releasing](releasing.md) | How Pithead is versioned and released — one product, one version, the `VERSION` source of truth, and the GHCR stage→promote pipeline. |
+| [Release / Validation Server](release-server.md) | Why end-to-end validation needs a dedicated server (and what GitHub Actions does free on every PR), how to provision and **harden** it, and the safe self-hosted-runner setup. |
 | [FAQ](faq.md) | Common questions, plus why Pithead vs. doing it yourself or Gupax. |
 
 ## Quick links
diff --git a/docs/integration-testing.md b/docs/integration-testing.md
new file mode 100644
index 0000000..05d7c73
--- /dev/null
+++ b/docs/integration-testing.md
@@ -0,0 +1,222 @@
+# Integration Testing
+
+How Pithead is validated end-to-end against a **real Ubuntu server** running full Monero and
+full Tari nodes — the runtime/integration half of our testing, and the **blocking pre-release
+gate** described in [Releasing](releasing.md) (issue
+[#54](https://github.com/p2pool-starter-stack/pithead/issues/54)).
+
+Our other suites are client-side and never touch a daemon: the `pithead` shell tests stub out
+`docker`/`sudo`, the compose test only checks `docker compose config` interpolation, and the
+dashboard pytest mocks its clients. They prove the *code* is correct; they can't prove that a
+real `apply → sync-gate → mine → status` flow works on a real host. That's what this suite is
+for.
+
+> This live matrix is **tier 4** of a four-tier plan. The runtime *situations* a healthy box
+> can't show (cold sync, node-down, unhealthy containers, XvB tiers) are simulated more cheaply
+> at lower tiers — unit tests, a client **contract test** against controllable fakes
+> ([`tests/integration/fakes/`](../tests/integration/fakes/)), and a **fake-daemon docker
+> mini-stack** ([`tests/integration/mini-stack/`](../tests/integration/mini-stack/)). See
+> [Testing Strategy](testing-strategy.md) for the full picture and scenario catalog.
+
+The lives under [`tests/integration/`](../tests/integration/):
+
+| File | Role |
+|---|---|
+| `run.sh` | Entry point. Connects to the box (SSH or `--local`), iterates the config matrix, asserts, captures artifacts, restores. |
+| `scenarios.sh` | The **declarative config matrix** — adding a case is a one-line data edit. |
+| `lib.sh` | Shared helpers: target I/O (SSH/local), assertions, readiness waiters, config rendering, secret redaction. |
+| `selftest.sh` | Pure-logic self-test (no server). Runs in CI on every PR. |
+
+---
+
+## How it works
+
+The suite assumes the box is **already deployed and synced with miners connected** — the whole
+point of a dedicated test server is that the full Monero and Tari nodes are synced once and
+*reused*, so each scenario runs in minutes instead of waiting days for a chain sync.
+
+Given that, the harness moves between matrix scenarios with non-interactive **`pithead apply
+-y`**, which:
+
+- recreates only the containers whose resolved config changed,
+- **reuses the synced chain data dirs** (it never re-syncs, never re-provisions Tor), and
+- **preserves secrets** (`PROXY_AUTH_TOKEN`, onion addresses).
+
+For each scenario it writes a `config.json`, applies it, **waits on real readiness signals**
+(container health, `pithead status`, dashboard sync %, miner-released) with timeouts — never a
+fixed `sleep` — then runs the assertion battery below. All reads happen *on the box*
+(`pithead status`/`doctor` and `curl http://127.0.0.1:8000/api/state`), so SSH and `--local`
+behave identically and we never depend on resolving the box's dashboard hostname.
+
+Before the first scenario it snapshots the box's original `config.json` and a fingerprint of
+its secrets; after the run it **restores the original config** and re-applies (unless
+`--keep`).
+
+### Safety model
+
+The test box holds real synced nodes and real keys — treat it as production-sensitive.
+
+- **Never mutates the canonical chains.** The harness only ever writes `config.json` and lets
+  `apply` recreate containers. It does not `rm -rf` data dirs. The destructive `monero.prune`
+  axis (a pruned vs. full DB are different on disk) is only exercised against a *separate*
+  synced data dir you pass with `--pruned-data-dir` / `--full-data-dir`; without it the case
+  is reported **SKIPPED**, never run against the canonical DB.
+- **No silent coverage drops.** Any scenario whose prerequisite is missing (an alt data dir, a
+  remote endpoint) is logged as `SKIPPED` with the reason — it never quietly disappears.
+- **Secrets hygiene.** RPC creds, the proxy token, and onion addresses are never printed.
+  Secret-preservation is checked by hashing them **on the box** (`sha256sum`) and comparing the
+  hash — the plaintext never crosses the wire. All captured artifacts are passed through a
+  redactor.
+- **Continue-on-error.** A failing assertion doesn't abort the run; the whole matrix is
+  collected and summarized, with per-scenario artifacts for the failures.
+
+---
+
+## Provisioning the test box
+
+A one-time setup. Target the Ubuntu LTS releases we support (22.04 / 24.04).
+
+1. **Install and deploy Pithead** normally (see [Getting Started](getting-started.md)) and let
+   it fully sync. You want the box in the steady state: all containers healthy, Monero + Tari
+   synced, and at least one miner (ideally two) connected and submitting shares.
+2. **Reusable synced data.** The synced `monero.data_dir` and `tari.data_dir` are the key
+   enabler — they're reused across every scenario. The same synced full monerod is also what
+   the `remote` scenario points at as an external node (see `--remote-monero-host`).
+3. **Tools on the box:** `jq`, `curl`, `docker` (with compose v2), and `sha256sum`. The first
+   three are already Pithead prerequisites; `sha256sum` ships with coreutils.
+4. **Access.** Key-based SSH from wherever you run the suite (or run it on the box with
+   `--local`). If Docker needs root there, use `--pithead "sudo ./pithead"`.
+5. *(Optional)* A second synced data dir for the **opposite** prune mode if you want to cover
+   both pruned and full in one run — see the prune axis above.
+
+> **Runner security.** Keep the box least-privilege and network-isolated; it holds real keys.
+> This is a self-hosted/manual gate, not something we run on public CI.
+
+---
+
+## Running it
+
+```bash
+# Non-destructive health check first (recommended): no config changes, no apply
+tests/integration/run.sh --host miner@10.0.0.5 --dir pithead --check
+
+# Whole matrix over SSH
+make test-integration ARGS="--host miner@10.0.0.5 --dir pithead"
+
+# …or directly
+tests/integration/run.sh --host miner@10.0.0.5 --dir pithead
+
+# On the box itself, plus the lifecycle + node-down failover phase
+tests/integration/run.sh --local --dir /home/miner/pithead --lifecycle
+
+# A single scenario (see --list for names)
+tests/integration/run.sh --host miner@10.0.0.5 --scenario remote-main-secure-tari \
+    --remote-monero-host 10.0.0.5:18081
+
+# Cover the OPPOSITE prune mode. The box mines one mode against its live chain; the other is
+# skipped unless you supply a chain for it (it's otherwise covered by the fake mini-stack). A
+# pruned box supplies a full chain; a full box supplies a pruned one (build one with
+# tests/integration/build-pruned-chain.sh). See docs/release-server.md → prune-axis recipe.
+tests/integration/run.sh --host miner@10.0.0.5 --full-data-dir /srv/monero-full
+```
+
+Useful flags (full list in `run.sh --help`):
+
+| Flag | Purpose |
+|---|---|
+| `--host <user@host>` / `--local` | Drive the box over SSH, or a stack on this machine. |
+| `--dir <path>` | The Pithead stack directory **on the box** — relative to the SSH login dir or absolute (default `pithead`). Avoid a literal `~`; your local shell expands it before the box sees it. |
+| `--pithead <cmd>` | How to invoke pithead there (e.g. `"sudo ./pithead"`). |
+| `--check` | **Non-destructive**: assert the box's current live state only — no config change, no apply, no restore. The safe first run / ongoing health check. |
+| `--readiness` | **Non-destructive**: assess whether the box is fit to be a release/validation server (synced chains reusable, snapshot-capable FS, disk headroom, secrets owner-only, dashboard localhost-only). See [Release Server](release-server.md). |
+| `--scenario <name>` | Run just one scenario. |
+| `--workers <n>` | Miners expected online while mining (default `2`). |
+| `--remote-monero-host <h>` | External node endpoint for the `remote` scenario. |
+| `--pruned-data-dir` / `--full-data-dir` | Synced alt DB to enable the opposite prune mode. |
+| `--lifecycle` | Also run the lifecycle phase (restart, apply secret-preservation). |
+| `--fault-injection` | Also break monerod (stop / SIGSTOP / remove) and assert `status`' down/unhealthy/missing verdicts and the failover→recovery cycle. Destructive-then-restored; local mode only; slow. |
+| `--safety-backup` | Take a `pithead backup` before the destructive scenarios and **auto-roll-back** (down → restore → up) if anything fails; the archive is removed on success. Recommended for the destructive matrix on a precious box; also exercises backup/restore end-to-end. |
+| `--keep` | Don't restore the original config (leave the box on the last scenario). |
+| `--out <dir>` | Where to write the manifest and failure artifacts. |
+| `--list` | Print the matrix and axis coverage and exit. |
+
+The runner exits non-zero if any assertion failed.
+
+---
+
+## The config matrix
+
+Every axis below changes a real runtime path. The matrix covers the realistic combinations and
+guarantees **every value of every axis is exercised at least once** (the `selftest` enforces
+this, and `--list` prints it).
+
+| Axis | Values | What it exercises |
+|---|---|---|
+| `monero.mode` | `local` / `remote` | profile gating, RPC wiring, `status` ignoring monerod in remote mode |
+| `monero.prune` | `true` (pruned) / `false` (full) | pruned vs. full display ([#32](https://github.com/p2pool-starter-stack/pithead/issues/32)), DB size |
+| `monero.rpc_lan_access` | `false` (127.0.0.1) / `true` (LAN) | RPC bind address, security posture |
+| `p2pool.pool` | `main` / `mini` / `nano` | `P2POOL_FLAGS`, sidechain selection |
+| `xvb.enabled` | `true` / `false` | XvB tunnel/donor wiring |
+| `dashboard.secure` | `true` (Caddy TLS) / `false` | Caddy config / scheme |
+| `dashboard.tari_required` | `true` (blocking) / `false` | sync-gate behavior ([#35](https://github.com/p2pool-starter-stack/pithead/issues/35)/[#51](https://github.com/p2pool-starter-stack/pithead/issues/51)) |
+
+### What each scenario asserts
+
+- **Expected containers up, unexpected absent** — every service for that config is running and
+  healthy; in `remote` mode there is **no** `monerod`.
+- **`pithead status` exit code** — `0` for a healthy config.
+- **Dashboard reads live state** — `/api/state` is reachable; Monero is synced (`done`);
+  pruned/full display matches `monero.prune` ([#32](https://github.com/p2pool-starter-stack/pithead/issues/32)); the sidechain `pool.type` matches `p2pool.pool`.
+- **End-to-end mining** — workers are online (`proxy_workers >= --workers`), stratum has
+  connections, and total hashes are accumulating ([#28](https://github.com/p2pool-starter-stack/pithead/issues/28)).
+- **Posture propagated** — `MONERO_RPC_BIND`, `DASHBOARD_SECURE`, `XVB_ENABLED`, and
+  `TARI_REQUIRED` in `.env` match the config; the Caddyfile uses the right scheme.
+- **Idempotency** — a second `apply -y` with no change is a clean no-op.
+- **Secrets preserved** — the proxy token and onion addresses are unchanged across every apply.
+
+### Lifecycle + failover (`--lifecycle`)
+
+For one representative config:
+
+- `restart` brings the stack back healthy (`status` → `0`).
+- An `apply` that changes the sidechain recreates only the affected containers and
+  **preserves secrets**; the dashboard reflects the new pool; then it's reverted.
+- **Node-down failover ([#31](https://github.com/p2pool-starter-stack/pithead/issues/31)):**
+  stop `monerod` → `status` returns non-zero (node down) and the dashboard rejects workers
+  (stops `xmrig-proxy`) → start `monerod` → workers readmitted → `status` → `0`.
+
+> `upgrade` (which rebuilds/pulls images) is intentionally **not** run unattended — it's slow
+> and changes the bundle under test. Validate it as part of the [release](releasing.md)
+> staging smoke test instead.
+
+---
+
+## Artifacts & triage
+
+Each run writes a **manifest** (`results/manifest.txt`) recording exactly what was under test
+— the stack `VERSION`, git revision, and `docker compose images` — so a run is reproducible.
+
+On a scenario failure, the harness captures (redacted) to `results/<scenario>/`:
+`compose-ps.txt`, `status.txt`, `doctor.txt`, `config.json`, `env.redacted.txt`,
+`api-state.json`, and `logs.txt` (last 200 lines per service). The end-of-run summary lists
+each failed assertion and points at these.
+
+---
+
+## The self-test (CI)
+
+`tests/integration/selftest.sh` exercises the harness's pure logic — config rendering and
+value typing, expectation derivation (profile gating), secret redaction, the SSH/local exec
+wrapper, JSON parsing, and **matrix axis coverage** — with no server. It runs in CI on every
+PR (the `shell` job) and via `make test-integration-selftest`, so the harness itself is held to
+the same lint/test standard as the rest of the stack.
+
+---
+
+## Release gate (#44)
+
+The live matrix is the **required, blocking pre-release gate**: a release is not promoted or
+published unless it's green against the real Monero + Tari nodes. It's surfaced as `make
+test-integration` and wired into the `make release` pipeline's test gate — see
+[Releasing › Pre-release gate](releasing.md#pre-release-gate-54). The version tagged/published
+is the exact bundle this run validated.
diff --git a/docs/release-server.md b/docs/release-server.md
new file mode 100644
index 0000000..69c966f
--- /dev/null
+++ b/docs/release-server.md
@@ -0,0 +1,210 @@
+# The Release / Validation Server
+
+How we validate a build **end-to-end before release**, why that needs a dedicated server, what
+GitHub Actions does for free on every PR, and how to harden the server so it can't become a
+liability. This is the operational companion to [Releasing](releasing.md) (the version/promote
+pipeline) and [Integration Testing](integration-testing.md) (the harness it runs).
+
+## Can GitHub Actions do the full end-to-end? (short answer: no — and that's fine)
+
+**GitHub-hosted runners can't do the real-chain tier.** On a public repo the hosted Ubuntu
+runners are generous and **free** (4 vCPU / 16 GiB RAM), but they are **ephemeral** — a fresh VM
+per job, ~14 GiB of free disk, and a 6-hour job ceiling. A Monero chain is ~95 GiB pruned /
+~270 GiB full and takes **days** to sync; Tari adds ~50 GiB. There is nowhere to keep that
+synced state between runs, and no time to sync it inside a job. So the **real-daemon, real
+merge-mining tier (tier 4) is simply not possible on hosted runners** — which is the whole
+reason a dedicated, already-synced server exists ([#54](https://github.com/p2pool-starter-stack/pithead/issues/54)).
+
+**But GitHub already runs almost everything else, free, on every PR.** Tiers 1–3 of the
+[testing strategy](testing-strategy.md) need no real chain and run on the hosted runners in
+minutes:
+
+- **Tier 1 — unit/component** (dashboard pytest + coverage gate, frontend, the `pithead` shell
+  suite, compose interpolation **and the #90 security/hardening invariants**).
+- **Tier 2 — contract** (the real Monero/Tari clients vs. controllable fakes).
+- **Tier 3 — the fake-daemon mini-stack** (the **real** dashboard + docker-control proxy driven
+  against fake daemons, with **real Docker** on the hosted runner) — this proves the control
+  plane end-to-end (sync hold/release, reject/readmit) on every PR.
+
+So the split is clean:
+
+| | Runs | Cost | Triggered |
+|---|---|---|---|
+| **Tiers 1–3** (logic, wiring, control plane, hardening) | GitHub-hosted runners | free (public repo) | **every PR** — the merge gate |
+| **Tier 4** (real synced Monero+Tari, real merge-mining, prune/full DB, TLS/Tor, the config matrix, the staging smoke test) | the **dedicated server** | your hardware | pre-release / on-demand — the **release gate** |
+
+The hosted runners catch the vast majority of regressions before merge; the dedicated server
+proves the things only reality can — and it's the **blocking pre-release gate**.
+
+## Validating PRs on the dedicated server — possible, but security-loaded
+
+You *can* register the server as a GitHub Actions **self-hosted runner** so Actions dispatches
+the tier-4 job to it (self-hosted minutes don't count against anything — also free). But there
+is a sharp edge, and it's the single most important thing on this page:
+
+> **GitHub explicitly recommends against self-hosted runners on public repositories.** Any user
+> can open a pull request, and a malicious PR can run **arbitrary code on the runner**. Our
+> server holds real **wallet payout addresses, Tor onion private keys, and RPC credentials**, so
+> a compromised runner is a key-theft / persistent-backdoor event, not a flaky build.
+
+The safe rule: **the keyed server only ever runs code we trust.** Concretely:
+
+- **Do NOT trigger tier-4 on `pull_request`** (and never on a fork PR). "Require approval" only
+  gates *starting* the run — once it starts, the PR's code still executes on the box.
+- **Trigger tier-4 only on trusted code:** `workflow_dispatch` (a maintainer manually runs it on
+  a ref they've reviewed) and/or `push` to `main` (post-merge). To E2E a specific fork PR, a
+  maintainer reviews it first, then dispatches the workflow on that ref.
+- Register the runner as **ephemeral / just-in-time** (one job, then auto-removed) in its own
+  **runner group**, isolated from any private repos.
+- Keep the runner **least-privilege**: a dedicated unprivileged user, the box runs nothing else
+  sensitive, and ideally the runner can reach the stack only through `pithead`/`docker`, not the
+  raw key files.
+
+This is exactly how the workflow ships:
+[`.github/workflows/release-gate.yml`](../.github/workflows/release-gate.yml) runs **only** on
+`workflow_dispatch` (and `push` to `main`) on a `[self-hosted, pithead-release]` runner — never
+automatically on a PR.
+
+## Provisioning the server
+
+Target an LTS Ubuntu (22.04 / 24.04). One-time:
+
+1. **Install Pithead and let it fully sync** ([Getting Started](getting-started.md)) — full
+   Monero + full Tari, all containers healthy, a worker (ideally two) mining. The synced
+   `monero.data_dir` / `tari.data_dir` are the asset the harness reuses.
+2. **Keep the active chain on fast storage (SSD/NVMe).** monerod is random-I/O heavy, so the
+   chain it runs against must not sit on a spinning HDD — that alone makes every scenario crawl.
+   A snapshot/reflink-capable filesystem (**btrfs**/**zfs**/**xfs reflink**) is a *bonus*: it lets
+   the harness snapshot/restore a chain cheaply for the prune axis. But it's optional — on plain
+   ext4-on-SSD the matrix only edits `config.json` and reuses one chain, with `--safety-backup`
+   isolating destructive runs. See the recipe below for the prune-axis details.
+3. **Disk headroom** — enough for the chains plus a snapshot / second DB (budget ≥ ~150 GiB
+   free beyond the live chains).
+4. **Tools** — `jq`, `curl`, `docker` (compose v2), `sha256sum`, `git`, `tar`.
+
+Check the box is fit at any time, **non-destructively**:
+
+```bash
+tests/integration/run.sh --host you@server --dir pithead --readiness
+```
+
+It asserts: chains synced (reusable), the prune axis is exercisable (the live chain FS is
+snapshot-capable **or** a pre-built variant chain is supplied), disk headroom, `.env` is
+owner-only, the dashboard is bound to localhost, and the backup/rollback net is usable.
+
+### Recipe: prune-axis coverage, and the storage that actually matters
+
+**Put the active chain on fast storage.** The biggest factor is the *disk*, not the filesystem:
+monerod does heavy random LMDB I/O, so a chain on a 7200 rpm HDD makes every scenario crawl.
+Check what you have before placing chains:
+
+```bash
+lsblk -d -o NAME,ROTA,SIZE,MODEL   # ROTA=0 is SSD/NVMe, ROTA=1 is a spinning HDD
+```
+
+Keep the chain monerod runs against on an **SSD/NVMe**. A spare **HDD** is fine for cold backups
+and `pithead backup` archives — but *not* for an active test chain.
+
+**A CoW filesystem (btrfs/zfs/xfs-reflink) is a bonus, not a requirement.** On a CoW volume the
+harness can snapshot/restore a chain cheaply for per-scenario isolation — but only if it's on
+fast storage. A loopback btrfs on a spare HDD gives you CoW semantics at HDD speed, which is the
+wrong trade for an *active* chain. If your root FS is ext4 on an SSD (the common case) you don't
+need CoW at all: the matrix only edits `config.json` and reuses one chain, and `--safety-backup`
+(a `pithead backup` + auto-rollback) isolates the destructive scenarios.
+
+**Covering both prune modes.** The box mines one mode (its real config). The harness exercises
+that mode against the live chain and **skips** the other unless you supply a chain for it
+(`--full-data-dir` / `--pruned-data-dir`). You usually don't need to: the opposite mode is
+covered by the fake mini-stack ([integration-testing](integration-testing.md)) plus the
+compose/config tests, which need no real chain. Supply the opposite-mode chain only to exercise
+it end-to-end — and build it on fast storage:
+
+- **Pruned chain next to a full one?** [`build-pruned-chain.sh`](../tests/integration/build-pruned-chain.sh)
+  copies the LMDB consistently (brief monerod stop, then immediate restart) and prunes the *copy*,
+  leaving the canonical chain untouched. Fetch `monero-blockchain-prune` at the **same version**
+  as the running monerod and verify it against the hash the image pins (`build/monero/Dockerfile`
+  → `MONERO_VERSION` / `MONERO_HASH`).
+- **Full chain?** Pruning is irreversible, so a full chain means a fresh full sync
+  (`MONERO_PRUNE=0`, ~1–3 days) — rarely worth it just for test coverage.
+
+`gouda` (the reference box) is a **pruned** node on NVMe: it validates pruned mode live with
+`--safety-backup`, and full mode comes from the fakes. `--readiness` reports exactly this:
+
+```bash
+tests/integration/run.sh --host you@server --dir pithead --readiness
+```
+
+> **Gotcha — a pruned chain's file stays large.** An in-place prune does *not* shrink the LMDB
+> file: it stays at the full-chain high-water mark (~250 GiB) with the freed space sitting as
+> internal free pages (Monero reuses them as the chain grows). To actually reclaim it you must
+> rewrite the DB with `monero-blockchain-prune --copy-pruned-database` (see
+> [`compact-chain.sh`](../tests/integration/compact-chain.sh)) — slow (it copies every block over
+> hours), though it reads through a snapshot so monerod keeps mining; you then swap the compact
+> copy in during a ~2 min window. The generic `mdb_copy -c` does **not** work: Monero ships a
+> patched LMDB and stock mdb_copy rejects the format (`MDB_VERSION_MISMATCH`). Often it's simplest
+> to leave the free pages.
+
+## Hardening checklist (the pitfalls)
+
+Treat the box as **production-sensitive** — it holds keys *and* it's the thing that signs off
+releases.
+
+- **Secrets.** `.env` (RPC creds), `config.json` (wallet addresses), and the Tor data dir
+  (onion private keys) must be **owner-only** (`chmod 600 .env`; the `--readiness` check verifies
+  this). Never print secrets in logs; the harness hashes them on the box and redacts artifacts.
+  If the box also *publishes* releases, the GHCR token lives in the environment / a secret store,
+  never in the repo.
+- **Network.** Firewall to least exposure: inbound **SSH** (key-only, no root login, fail2ban)
+  and the **stratum** port scoped to the LAN ([workers › firewall](workers.md#firewall)); the
+  **dashboard stays on localhost behind Caddy** and the **monerod RPC on localhost** (both
+  asserted by `--readiness`). Nothing else should be reachable from the internet.
+- **Untrusted code.** The runner only runs trusted code (see above). Prefer ephemeral/JIT
+  runners; don't share the runner with private repos.
+- **Least privilege.** A dedicated unprivileged user; the stack already runs least-privilege
+  containers (`no-new-privileges`, `cap_drop`, read-only roots, scoped Docker socket proxies —
+  regression-guarded in `tests/stack/test_compose.sh`).
+- **Reproducible, clean baseline.** The matrix reuses the synced chains and never mutates the
+  canonical copies (config-only changes, snapshot/restore for the prune axis), restores the
+  original `config.json` at the end, and `--safety-backup` takes a `pithead backup` first and
+  **rolls the box back** (down → restore → up) if anything fails.
+- **Build isolation & integrity.** Build images in containers with pinned upstream versions and
+  SHA256-verified binaries (the stack already does this); promote releases **by digest** so the
+  published bundle is bit-for-bit what was validated ([Releasing](releasing.md)).
+
+## How a release is validated end-to-end
+
+1. **Every PR** → GitHub-hosted runners run tiers 1–3 (the merge gate). Cheap, free, fast.
+2. **Pre-release (or on-demand for a reviewed PR)** → a maintainer dispatches the release-gate
+   workflow on the dedicated server: `make test` (tiers 1–2 on the trusted box) **+** the tier-4
+   live matrix against the real synced nodes (`run.sh --safety-backup`), then — per
+   [Releasing](releasing.md) — the staging smoke test (pull the GHCR images on a clean host,
+   real `setup → up → status → mine` check).
+3. **Nothing is tagged or published until that's green**, and promotion is by digest, so the
+   version users get is the exact bundle the server validated.
+
+## End-to-end coverage & gaps
+
+What the live tier-4 gate actually exercises, and what it doesn't — so a release decision is made
+with eyes open. (The reference box `gouda` is a **pruned** Monero node on NVMe; its own snapshot
+and this table also live at `~/pithead-testbench/` on the box, for operators and AI agents.)
+
+**Validated live** (real synced chains): the config matrix (remote/local node, dashboard
+secure/insecure, Tari required/optional, RPC LAN access, XvB on/off) applied + asserted; lifecycle
+(restart, secret-preserving `apply`, backup→restore round-trip); node-down failover → recovery;
+release readiness; pruned monerod (the real prod config). **Covered without a real chain**
+(tiers 1–3): client↔daemon contract tests, the fake-daemon mini-stack (incl. full-prune behavior),
+compose hardening, config rendering, dashboard tests.
+
+| Gap (not tested live) | Worth filling before release? |
+|---|---|
+| **Full (unpruned) Monero** live — a pruned box can't exercise it | **Low** — stack paths don't differ by prune mode; fakes/config cover it. A multi-day full sync isn't justified. |
+| **Privacy / Tor egress** — no clearnet-leak assertions in the live harness (#160) | **High** — privacy is a core promise. Add egress checks (no clearnet to XvB stats, p2pool, Tari DNS). |
+| **Automated PR gate** — the self-hosted runner is manual/opt-in | **Medium-high, high-leverage** — wire the live harness as a required check on `workflow_dispatch`/push-to-`main` only (never fork PRs). |
+| **Upgrade / migration** across image versions with chain continuity | **Medium** — add a scenario: pull new images → `apply` → assert no re-sync + secrets intact. |
+| **XvB live routing** end-to-end (the raffle optimization) | **Medium** — core value-prop but unit/sim-tested today; a periodic live smoke test would help. |
+| **Multi-worker scale** — the harness assumes ~2 workers | **Medium** — add a load-gen worker + assert proxy routing/hashrate for perf confidence. |
+| **Real Tari merge-mined block** acceptance | **Low** — probabilistic; rely on template/connectivity checks. |
+| **Fault injection over SSH** (currently local-mode only) | **Low-Medium** — extend the SIGSTOP/remove cases to the `--host` path. |
+
+**Recommended before release:** the privacy-egress checks and the automated PR gate; then the
+upgrade scenario and an XvB live smoke test. The remainder are nice-to-have.
diff --git a/docs/releasing.md b/docs/releasing.md
index a6daf5e..9fb3dea 100644
--- a/docs/releasing.md
+++ b/docs/releasing.md
@@ -64,6 +64,10 @@ nodes (the integration-test environment from
 point — `make release` (or `pithead release`) — runs the whole pipeline. **Nothing is
 promoted or published until every gate is green.**
 
+> How to provision and **harden** that server, why end-to-end validation can't run on
+> GitHub-hosted runners (and what does run free on every PR), and the safe self-hosted-runner
+> setup are covered in **[Release / Validation Server](release-server.md)**.
+
 ### Pipeline: stage → smoke-test → promote
 
 1. **Preflight** — clean working tree; read the product version from the top-level
@@ -136,6 +140,10 @@ What exists today:
 - ✅ Top-level `VERSION` file (single source of truth).
 - ✅ `CHANGELOG.md` (Keep a Changelog + SemVer, with an `Unreleased` section).
 - ✅ This document.
+- ✅ The [#54](https://github.com/p2pool-starter-stack/pithead/issues/54) integration test
+  suite — the live config-matrix gate against real nodes (`tests/integration/`, `make
+  test-integration`). See [Integration Testing](integration-testing.md). Still to wire: making
+  it a *blocking step* inside the (not-yet-built) `make release` pipeline.
 - ✅ The dashboard version badge ([#58](https://github.com/p2pool-starter-stack/pithead/issues/58)) —
   `VERSION` + git build-args baked into the dashboard image (env + OCI labels); shows `vX.Y.Z` on
   releases and `dev · branch @ hash` otherwise.
diff --git a/docs/test-inventory.md b/docs/test-inventory.md
new file mode 100644
index 0000000..6ff0d13
--- /dev/null
+++ b/docs/test-inventory.md
@@ -0,0 +1,667 @@
+# Test Inventory
+
+_Generated by `make test-inventory` ([`tests/inventory.sh`](../tests/inventory.sh)). **Do not
+edit by hand** — re-run the target to refresh. See [Testing Strategy](testing-strategy.md) for
+how the tiers fit together._
+
+**Totals:** 418 dashboard unit tests · 12 contract tests · 25 frontend
+tests · 21 `pithead` shell sections · 11 harness self-test sections ·
+8 live config scenarios (15 axis values) · 6 mini-stack scenarios.
+
+> Counts are **test functions / named cases** (parametrized pytest cases expand to more at
+> run time — e.g. the dashboard suite collects ~381). Generated statically by grep, so it's
+> stable regardless of what's installed.
+
+| Tier | Suite | Cases |
+|---|---|---|
+| 1 — Unit | dashboard pytest | 418 |
+| 1 — Unit | frontend (node --test) | 25 |
+| 1 — Unit | `pithead` shell suite | 21 sections |
+| 1 — Unit | compose interpolation + hardening (#90) | 1 |
+| 2 — Contract | fake-daemon clients | 12 |
+| 3 — Mini-stack | docker control-plane scenarios | 6 |
+| 4 — Live matrix | config scenarios | 8 (15 axis values) |
+| 4 — Live matrix | harness self-test | 11 sections |
+
+---
+
+## Tier 1 — Unit & component
+
+### Dashboard (pytest) — 418 tests
+
+#### tests/client/test_docker_control.py — 6
+- test_tcp_scheme_rewritten_to_http
+- test_stop_success_204
+- test_already_stopped_304_is_success
+- test_start_success
+- test_error_status_returns_false
+- test_connection_error_returns_false
+
+#### tests/client/test_monero_client.py — 12
+- test_url_and_digest_auth_built
+- test_no_username_means_no_auth
+- test_success_returns_payload
+- test_network_error_returns_none
+- test_non_200_returns_none
+- test_non_json_returns_none
+- test_busy_status_returns_none
+- test_syncing
+- test_synced_via_flag
+- test_synced_via_zero_target
+- test_synced_when_height_reaches_target
+- test_unreachable_returns_none
+
+#### tests/client/test_tari_client.py — 7
+- test_fully_synced
+- test_syncing_with_target
+- test_syncing_without_reliable_target
+- test_grpc_error_returns_default_when_no_cache
+- test_serves_last_known_state_on_transient_failure
+- test_stale_cache_expires
+- test_close_closes_channel
+
+#### tests/client/test_xmrig_client.py — 5
+- test_first_success_returns_payload_and_short_circuits
+- test_all_attempts_fail_returns_empty
+- test_exceptions_are_swallowed
+- test_zero_ip_skipped_uses_name_host
+- test_name_token_strips_plus_suffix
+
+#### tests/client/test_xmrig_proxy_client.py — 7
+- test_auth_header_set
+- test_get_summary
+- test_get_workers
+- test_get_config
+- test_update_config_returns_json
+- test_update_config_204_returns_empty
+- test_get_summary_raises_on_http_error
+
+#### tests/client/test_xvb_client.py — 7
+- test_missing_wallet_returns_none
+- test_get_stats_success_parses_html
+- test_get_stats_non_200_returns_none
+- test_get_stats_network_error_returns_none
+- test_fail_count_only
+- test_no_critical_stats_returns_none
+- test_hashrate_units
+
+#### tests/collector/test_logs.py — 18
+- test_parses_multiple_frames
+- test_skips_blank_lines
+- test_truncated_frame_breaks_cleanly
+- test_success
+- test_non_200_returns_error
+- test_connection_error_handled
+- test_syncing
+- test_synced
+- test_file_not_found
+- test_bad_json
+- test_new_format_top_block_candidate
+- test_old_synced_format
+- test_already_synchronized
+- test_error_logs
+- test_rpc_result_used_when_available
+- test_falls_back_to_logs_when_rpc_unreachable
+- test_local_when_default_host
+- test_remote_when_other_host
+
+#### tests/collector/test_pools.py — 15
+- test_empty_is_unknown
+- test_majority_wins
+- test_unknown_ports
+- test_port_matched_exactly_not_as_substring
+- test_aggregates_sources
+- test_empty_files_give_defaults
+- test_hashrate_derived_when_missing
+- test_hashrate_passthrough
+- test_worker_parsing
+- test_worker_without_name_defaults_to_miner
+- test_active_chain_converts_utari
+- test_no_chains_inactive
+- test_missing_file_returns_empty
+- test_malformed_json_returns_empty
+- test_valid_json
+
+#### tests/collector/test_system.py — 11
+- test_normal
+- test_error_returns_zeros
+- test_parses_meminfo
+- test_error_returns_zeros
+- test_formats
+- test_error
+- test_delta_calculation
+- test_malformed_line
+- test_enabled_when_used
+- test_allocated_when_unused
+- test_unknown_when_missing
+
+#### tests/config/test_config.py — 8
+- test_defaults_load
+- test_donation_level_env_override
+- test_monero_prune_accepts_truthy_forms
+- test_monero_prune_accepts_falsy_forms
+- test_update_interval_tolerates_bad_values
+- test_tier_config_env_override_valid
+- test_tier_config_env_override_invalid_json_falls_back
+- test_xvb_enabled_flag
+
+#### tests/helper/test_utils.py — 29
+- test_plain_numbers
+- test_unit_suffixes_case_insensitive
+- test_unrecognized_suffix_is_raw
+- test_bad_data_returns_zero
+- test_unit_boundaries
+- test_bad_data
+- test_branches
+- test_bad_data
+- test_formats_localtime
+- test_falsy_is_never
+- test_invalid_type_does_not_crash
+- test_default_tiers
+- test_custom_tiers
+- test_zero_threshold_ignored
+- test_auto_picks_highest_sustainable
+- test_auto_zero_when_nothing_sustainable
+- test_named_tier_honored
+- test_named_tier_not_downgraded_but_flagged_unsustainable
+- test_cannot_sustain_named_tier_is_flagged
+- test_numeric_level_honored
+- test_unknown_level_falls_back_to_lowest
+- test_ipv4_is_an_address
+- test_ipv6_is_an_address
+- test_hostname_is_not_an_address
+- test_surrounding_whitespace_tolerated
+- test_non_string_and_empty_are_not_addresses
+- test_returns_socket_source_address
+- test_none_when_no_route
+- test_socket_is_closed_even_on_error
+
+#### tests/service/test_algo_service.py — 28
+- test_xvb_disabled_forces_p2pool
+- test_zero_shares_forces_p2pool
+- test_excessive_failures_forces_p2pool
+- test_low_hashrate_no_tier_is_p2pool
+- test_cold_start_seeds_feedforward
+- test_loop_ramps_up_when_below_reference
+- test_loop_backs_off_when_above_reference
+- test_advance_false_does_not_move_the_loop
+- test_nano_pool_uses_longer_window
+- test_difficulty_reserve_caps_donation
+- test_falls_back_to_flat_cap_without_difficulty
+- test_reserve_never_exceeds_hard_cap
+- test_loop_clamped_to_reserve
+- test_reference_cushion_is_absolute_capped
+- test_fraction_to_ms_zero_and_positive
+- test_advance_noop_when_no_hashrate
+- test_advance_clamps_to_bounds
+- test_routed_fraction_for_instrumentation
+- test_get_target_uses_state_manager_tiers
+- test_default_auto_targets_highest_sustainable
+- test_explicit_tier_not_downgraded
+- test_switch_updates_proxy_and_state
+- test_switch_aborts_on_bad_config
+- test_aborts_early_when_decision_flips_to_donate
+- test_aborts_early_when_below_tier
+- test_sleeps_full_duration_when_in_tier_on_p2pool
+- test_run_invokes_switch_then_stops
+- test_run_skips_switching_while_workers_rejected
+
+#### tests/service/test_data_service.py — 51
+- test_parse_list_row_named_fields
+- test_parse_list_row_share_counts
+- test_parse_list_row_offline_and_uptime
+- test_parse_legacy_dict_row
+- test_parse_legacy_dict_share_counts
+- test_list_format_online
+- test_list_format_offline_when_no_connections
+- test_list_format_uptime_estimate_from_last_share
+- test_short_list_row_is_skipped
+- test_legacy_dict_format
+- test_legacy_dict_defaults
+- test_missing_payload_returns_empty
+- test_extracts_results_and_best
+- test_best_defaults_to_zero_when_empty
+- test_missing_results_block_zeros_out
+- test_malformed_payload_returns_empty
+- test_proxy_kind_scales_khs_to_hs
+- test_xmrig_kind_not_scaled
+- test_unreachable_direct_api_keeps_proxy_values_online
+- test_short_hashrate_total_ignored
+- test_prefers_h15
+- test_falls_back_to_h60_then_h10
+- test_offline_excluded
+- test_empty
+- test_restores_snapshot
+- test_ignores_non_dict_snapshot
+- test_restores_workers_rejected_flag
+- test_restores_miner_released_latch
+- test_holds_miner_when_restart_mid_sync
+- test_stop_when_monero_down
+- test_stop_when_tari_down_and_required
+- test_tari_down_ignored_when_non_blocking
+- test_stop_failure_keeps_flag_false_for_retry
+- test_no_double_stop_when_already_rejected
+- test_readmit_when_relevant_nodes_healthy
+- test_no_readmit_while_a_relevant_node_unconfirmed
+- test_readmit_ignores_tari_when_non_blocking
+- test_no_readmit_until_monero_healthy_even_if_tari_non_blocking
+- test_holds_all_containers_when_not_synced
+- test_releases_when_gate_satisfied
+- test_noop_once_released
+- test_partial_start_failure_keeps_latch_closed
+- test_rehold_stops_quietly_after_first_cycle
+- test_single_iteration_aggregates
+- test_run_holds_miner_while_syncing
+- test_run_releases_despite_height_override
+- test_run_nonblocking_tari_releases_and_stays_operational
+- test_iteration_survives_collector_error
+- test_run_holds_when_tari_required_and_only_monero_synced
+- test_post_release_blip_lets_failover_act_without_rehold
+- test_both_nodes_down_rejects_once
+
+#### tests/service/test_earnings.py — 4
+- test_matches_closed_form
+- test_worked_field_example
+- test_linear_in_inputs
+- test_missing_or_bad_inputs_are_zero
+
+#### tests/service/test_metrics.py — 34
+- test_empty_history_returns_zero
+- test_averages_v_p2pool_in_window
+- test_excludes_samples_outside_window
+- test_legacy_rows_count_as_p2pool
+- test_xvb_samples_drag_average_down
+- test_total_and_stratum_passthrough
+- test_p2pool_averages_from_history
+- test_xvb_averages_from_stats
+- test_xvb_routed_is_fraction_of_hashrate
+- test_xvb_routed_zero_without_fraction
+- test_mode_default
+- test_xvb_disabled_overrides_mode_and_tiers
+- test_current_tier_from_xvb_24h
+- test_low_hr_warning_for_unsustainable_explicit_tier
+- test_no_warning_for_auto
+- test_no_warning_when_sustainable
+- test_fail_count_and_last_update
+- test_counts_online_and_total
+- test_empty
+- test_counts_recent_within_pplns_window
+- test_nano_block_time
+- test_loading_when_no_target
+- test_done_when_full
+- test_mid_sync_remaining
+- test_down_flag
+- test_global_syncing
+- test_local_pruned
+- test_local_full
+- test_remote_unknown
+- test_pool_and_network_figures
+- test_tari_mining_flag
+- test_empty_snapshot_does_not_crash
+- test_history_fetched_when_not_passed
+- test_passed_history_avoids_refetch
+
+#### tests/service/test_node_health.py — 6
+- test_not_down_before_threshold
+- test_down_after_threshold
+- test_single_blip_does_not_trip
+- test_never_reachable_never_down
+- test_down_clears_only_after_recovery_window
+- test_healthy_requires_stable_window_from_unknown
+
+#### tests/service/test_storage_service.py — 24
+- test_get_tiers
+- test_default_xvb_stats
+- test_partial_updates
+- test_kwargs_update_and_type_coercion
+- test_none_kwargs_skipped
+- test_unknown_kwarg_ignored
+- test_add_share_and_dedup
+- test_old_shares_pruned_from_memory
+- test_update_history_roundtrip
+- test_history_bad_values_default_zero
+- test_update_and_get_known_workers
+- test_worker_without_ip_skipped
+- test_none_list_is_noop
+- test_roundtrip
+- test_empty_snapshot_not_saved
+- test_load_missing_snapshot_returns_none
+- test_share_stats_persist_across_instances
+- test_state_persists_across_instances
+- test_legacy_kv_keys_migrated_on_load
+- test_corrupted_kv_value_skipped
+- test_history_timestamp_backfilled_from_iso_on_upgrade
+- test_history_older_than_retention_pruned_from_memory
+- test_old_history_pruned_from_db_when_cleanup_fires
+- test_stale_workers_pruned_after_retention_window
+
+#### tests/sim/test_donation_model.py — 10
+- test_holds_tier_without_overshoot
+- test_no_windup_from_cold_start
+- test_more_headroom_means_more_p2pool
+- test_holds_tier_across_credit_factor
+- test_overcredit_frees_p2pool
+- test_stable_under_lag
+- test_reserve_keeps_p2pool_in_the_window
+- test_low_tier_high_difficulty_caps_donation_for_vip
+- test_zero_reads_do_not_run_away
+- test_recovers_after_worker_drop
+
+#### tests/test_main.py — 1
+- test_build_app_returns_wired_application
+
+#### tests/test_version.py — 11
+- test_version_with_no_git_metadata_is_a_release
+- test_leading_v_in_version_is_not_doubled
+- test_explicit_release_flag_wins_over_git_metadata
+- test_release_flag_accepts_common_truthy_spellings
+- test_branch_and_commit
+- test_commit_only
+- test_branch_only
+- test_dirty_marker_passes_through
+- test_a_versioned_build_with_a_commit_is_still_dev
+- test_no_metadata_falls_back_to_generic_dev
+- test_blank_values_treated_as_absent
+
+#### tests/web/test_server.py — 23
+- test_index_serves_shell
+- test_get_state_ok_json
+- test_range_query_accepted
+- test_from_to_window_accepted
+- test_malformed_from_to_falls_back
+- test_window_filters_history_end_to_end
+- test_node_down_badges_in_state
+- test_passive_tari_badge_in_state
+- test_state_error_is_sanitized_json
+- test_security_headers_present
+- test_csp_has_no_unsafe_inline_or_eval
+- test_state_response_also_carries_headers
+- test_apply_security_headers_unit
+- test_js_mimetypes_registered
+- test_frontend_modules_served
+- test_static_assets_revalidate
+- test_shell_revalidates
+- test_css_has_phone_breakpoint
+- test_css_has_horizontal_scroll_rule
+- test_workers_table_opts_into_scroll_wrapper
+- test_css_lets_stat_values_wrap
+- test_css_lets_hostname_wrap
+- test_host_at_separator_styled_and_rendered
+
+#### tests/web/test_views.py — 101
+- test_point_shape_is_xy_with_epoch_ms
+- test_legacy_rows_attributed_to_p2pool
+- test_range_filtering
+- test_downsampling_caps_points
+- test_outage_inserts_null_break
+- test_regular_data_has_no_breaks
+- test_single_missing_sample_does_not_break
+- test_break_sits_inside_the_gap
+- test_threshold_adapts_to_spacing
+- test_downsampled_outage_still_breaks
+- test_single_point_no_break
+- test_share_points_sparse_and_top_pinned
+- test_share_marker_top_pinned_when_value_zero
+- test_no_shares_no_points
+- test_unknown_range_keeps_everything
+- test_empty_history
+- test_custom_window_filters_both_bounds
+- test_window_overrides_range
+- test_short_window_kept_at_native_resolution
+- test_long_window_downsamples_to_tier
+- test_target_points_tiers
+- test_chart_tension_tiers
+- test_stacked_series_sum_to_the_total
+- test_zoom_reveals_more_detail
+- test_all_range_adapts_density_to_data_extent
+- test_formats_hashrates
+- test_routed_distinct_from_credited
+- test_p2pool_mode_grays_xvb
+- test_xvb_mode_grays_p2pool
+- test_split_mode_both_active
+- test_low_hr_badge_present_only_when_warned
+- test_tiers_and_fail_count_passthrough
+- test_loading_done_syncing_states
+- test_done_state
+- test_monero_mode_and_db_passthrough
+- test_syncing_shows_syncing_only
+- test_operational_shows_mode_and_pool
+- test_low_hr_badge
+- test_node_down_and_rejected
+- test_miner_held
+- test_passive_tari_with_and_without_percent
+- test_monero_pruned_badge
+- test_monero_full_badge
+- test_no_prune_badge_when_unknown
+- test_disk_badge_critical
+- test_disk_badge_warn
+- test_no_disk_badge_when_ample
+- test_no_disk_badge_when_missing
+- test_high_usage_levels_and_fill
+- test_warning_fill_between_70_and_90
+- test_unparseable_cpu_is_ok
+- test_empty_system_defaults
+- test_pool_tokens
+- test_formatted_and_raw_fields
+- test_online_sorted_before_offline
+- test_malformed_worker_skipped
+- test_bad_ip_sorts_to_zero
+- test_name_passthrough
+- test_share_counts_raw_and_formatted
+- test_invalid_appended_to_rejected_string_only_when_nonzero
+- test_missing_share_fields_default_to_zero
+- test_reject_flag_set_on_high_reject_rate
+- test_none_without_rejects
+- test_none_below_noise_floor
+- test_none_when_rate_low
+- test_flags_high_rate_above_floor
+- test_flags_all_rejects_at_floor
+- test_active
+- test_inactive_defaults
+- test_long_wallet_shortened
+- test_formats_totals_and_best
+- test_reject_pct_and_level
+- test_best_dash_when_unknown
+- test_empty_summary_has_no_data
+- test_formats_from_metrics_and_data
+- test_db_size_dash_when_unknown
+- test_resolves_ip_for_a_hostname
+- test_none_when_host_is_already_an_ip
+- test_none_when_ip_undetectable
+- test_none_when_detected_ip_equals_host
+- test_publishes_rate_and_inputs
+- test_default_hashrate_is_the_displayed_p2pool_1h
+- test_no_p2pool_hashrate_when_average_is_zero
+- test_unavailable_when_network_reward_missing
+- test_unavailable_when_difficulty_missing
+- test_p2pool_hr_passthrough_is_raw
+- test_has_all_sections
+- test_version_section_shape
+- test_is_json_serializable
+- test_range_echoed
+- test_window_null_on_preset
+- test_window_echoed_when_zoomed
+- test_syncing_flag_and_title
+- test_proxy_workers_from_metrics
+- test_chart_uses_timestamps
+- test_propagates_state_errors
+- test_valid_pair
+- test_absent_is_none
+- test_malformed_falls_back_to_none
+- test_returns_html_referencing_module
+- test_error_fallback
+
+### Frontend logic (node --test) — 25 tests
+- sortWorkers: null index keeps the server-provided order
+- sortWorkers: numeric columns sort numerically, not lexically
+- sortWorkers: hashrate column also sorts numerically
+- sortWorkers: descending reverses the order
+- sortWorkers: name column sorts as text
+- sortWorkers: does not mutate the input array
+- WORKER_COLUMNS: keys match the worker fields the server sends
+- sortWorkers: rejected column sorts numerically (find problem rigs)
+- fmtTimestamp: returns a non-empty string for an epoch-ms value
+- normalizeTheme: passes valid modes through, defaults the rest to auto
+- THEME_ORDER: the control renders every theme exactly once
+- clampZoomWindow: orders endpoints and enforces a minimum span
+- clampZoomWindow: rejects unusable input
+- fmtWindowDuration: two coarsest units, trailing zeros dropped
+- normalizeSeries: defaults every series to visible, only explicit false hides
+- parseHashrate: accepts bare numbers and k/M/G suffixes
+- parseHashrate: rejects empty / unparseable input
+- computeEarnings: scales the daily rate to day/month/year + time-to-share
+- computeEarnings: returns nulls when unavailable or hashrate is non-positive
+- computeEarnings: no time-to-share when share difficulty is unknown
+- heroKpis: surfaces the five headline numbers under stable labels, in order
+- heroKpis: wires each KPI to its build_state field
+- heroKpis: shares colour reflects the ok flag
+- heroKpis: mode colour follows the server mode_variant token
+- heroKpis: total is accent-coloured; blocks and tier carry no colour class
+
+### `pithead` shell suite (tests/stack/run.sh) — 21 sections
+- unit: resolve_default
+- unit: assert_safe_dir
+- unit: is_ipv4
+- unit: resolve_dashboard_host (dashboard.host 'auto' revert, 247c5a0)
+- unit: docker_boot_enabled (#137)
+- unit: is_valid_host (#130)
+- unit: describe_change
+- unit: env helpers
+- unit: export_build_provenance (Issue #58)
+- unit: node credential helpers
+- unit: disk_component_gib
+- unit: check_disk_grouped (mocked df)
+- black-box: CLI dispatch
+- black-box: guards
+- black-box: config validation
+- black-box: apply preserves secrets + propagates
+- black-box: local node creds auto-generated + persisted (#50)
+- black-box: status health check
+- black-box: doctor exit code (#127)
+- black-box: reset-dashboard targets .env dirs, not config.json (#139)
+- black-box: reset-dashboard refuses to guess without .env dirs (#139)
+
+### Compose validation + hardening (tests/stack/test_compose.sh)
+- docker-compose.yml `${VAR}` interpolation resolves against a representative .env
+- #90 hardening invariants: no-new-privileges / cap_drop / read-only roots, credential-free
+  healthchecks, least-privilege Docker socket proxies, and the pinned `pithead` project name
+
+## Tier 2 — Contract (real clients vs controllable fakes)
+
+### tests/integration/fakes/test_contract.py — 12 tests
+- test_monero_synced_reads_no_sync_and_db_size
+- test_monero_syncing_reports_percent
+- test_monero_down_is_unreachable
+- test_monero_busy_status_is_unreachable
+- test_monero_synced_by_height_even_without_flag
+- test_monero_db_size_unknown_reads_zero
+- test_monero_http_control_mutates_state
+- test_tari_synced_reads_done
+- test_tari_syncing_reports_percent
+- test_tari_down_is_unreachable_with_no_cache
+- test_tari_syncing_without_reliable_target_avoids_false_100
+- test_tari_serves_cached_reading_when_briefly_unreachable
+
+## Tier 3 — Fake-daemon mini-stack (docker)
+
+### tests/integration/mini-stack/run-mini-stack.sh — 6 scenarios
+- scenario 1: holds the miner while both chains sync
+- scenario 2: keeps holding while Tari (required) is still syncing
+- scenario 3: releases the miner once both chains are synced
+- scenario 4: rejects workers when required Tari is down
+- scenario 5: readmits workers when Tari recovers
+- scenario 6: a dashboard restart does not re-hold a released miner
+
+## Tier 4 — Live config matrix (real synced server)
+
+### Config scenarios (tests/integration/scenarios.sh) — 8
+- local-pruned-main-secure-tari
+- local-full-main-secure-tari
+- local-pruned-mini-secure-tari
+- local-pruned-nano-insecure
+- local-pruned-main-rpclan
+- local-pruned-main-xvb-off
+- local-pruned-main-tari-optional
+- remote-main-secure-tari
+
+### Axis coverage (every value exercised at least once) — 15
+- monero.mode=local
+- monero.mode=remote
+- monero.prune=true
+- monero.prune=false
+- monero.rpc_lan_access=true
+- monero.rpc_lan_access=false
+- p2pool.pool=main
+- p2pool.pool=mini
+- p2pool.pool=nano
+- xvb.enabled=true
+- xvb.enabled=false
+- dashboard.secure=true
+- dashboard.secure=false
+- dashboard.tari_required=true
+- dashboard.tari_required=false
+
+### Per-scenario assertions (tests/integration/run.sh)
+- .env is owner-only (mode $envmode)
+- Caddyfile uses correct scheme
+- DASHBOARD_SECURE matches config
+- MONERO_RPC_BIND matches rpc_lan_access
+- Monero is synced (chain reusable by the matrix)
+- TARI_REQUIRED env matches config
+- XVB_ENABLED matches config
+- backup archive contains .env
+- backup archive contains config.json
+- backup/rollback prerequisites present (writable backups/, tar)
+- both prune modes exercisable (live=$baseline_mode + supplied $opp_label chain at $opp_dir)
+- check
+- container up: $svc
+- dashboard /api/state reachable
+- dashboard bound to localhost only (Caddy fronts it)
+- disk headroom on the live chain FS (${avail} GiB free)
+- monero display mode determinate ($dmode)
+- monero display mode present ($dmode)
+- monerod absent in remote mode
+- monerod reported missing
+- monerod reports synced (RPC)
+- monerod running-but-unhealthy
+- pool actually changed
+- pool type
+- prune axis: live FS is snapshot-capable ($fstype) — the $opp_label variant can be built cheaply
+- re-apply is a no-op
+- restore preserves secrets
+- restore reverts the pool to the backed-up value
+- secrets intact (token + onions)
+- secrets intact after restore
+- secrets preserved across pool change
+- snapshot-isolated $baseline_mode chain on a CoW FS ($same_dir, $sfs) — destructive scenarios needn't touch the live chain
+- stack is healthy (pithead status)
+- status OK after monerod recovery
+- status OK after node recovery
+- status OK after restart
+- status exit code is 0 (healthy)
+- status non-zero when monerod is down
+- status non-zero when monerod missing
+- status non-zero when monerod unhealthy
+- status non-zero when node down
+- stratum total hashes > 0
+- tari synced (required)
+- workers online (>= $EXPECTED_WORKERS)
+- xmrig-proxy stopped for failover
+
+### Harness self-test (tests/integration/selftest.sh) — 11 sections
+- overrides_to_jq: value typing
+- resolve_overrides: prerequisite gate (never mutates the canonical chain)
+- render_scenario_config: applies overrides, stays valid JSON
+- expected/absent services: profile gating
+- redact: secrets never leak into artifacts
+- matrix: every axis value is covered
+- scenarios: lookup helpers
+- rx: local exec runs in the stack dir
+- api_state + jq_get: parse a fixture
+- service_state parsing (fault-injection predicates)
+- assertion helpers: counters behave
+
+---
+
+_Grand total: **501** enumerated cases/sections across the four tiers (plus the live
+lifecycle and fault-injection phases, which are exercised on a real server)._
diff --git a/docs/test-server-architecture.md b/docs/test-server-architecture.md
new file mode 100644
index 0000000..b396f14
--- /dev/null
+++ b/docs/test-server-architecture.md
@@ -0,0 +1,143 @@
+# Test / build server architecture & recreation
+
+How the Pithead reference test server (`gouda`) is structured, and how to **recreate it on another
+box** with minimal pain. The whole point: the synced chains are the slow-to-acquire asset (days to
+sync) — everything else is reproducible in minutes from the repo.
+
+## What this server is
+
+A single box that runs the **live Pithead stack** against real, synced chains and serves three jobs:
+
+1. **Tier-4 release gate** — the integration harness ([integration-testing](integration-testing.md))
+   validates a release end-to-end here before it ships.
+2. **Developer + AI-agent test bench** — a consistent place to reproduce, test, and debug stack
+   behavior against real daemons.
+3. **Reference deployment** — a known-good, documented install other boxes can be cloned from.
+
+It is **not** a production miner — downtime is fine; tear-down/redeploy is fine. The one rule:
+**don't lose the synced chains** (reuse them, don't re-sync).
+
+## Hardware & storage policy
+
+```
+NVMe SSD (fast, PRIMARY)               HDD (slow, SPARING)
+  /boot, /boot/efi                      /home   ← cold backups / archives only
+  / (root, LVM ext4)
+    ├─ /var/lib/docker        (images)
+    ├─ /srv/code/pithead      (CHECKOUT, = ~/code/pithead)
+    └─ /srv/code/pithead-data (CHAINS)  ← the asset, on fast storage
+```
+
+**Policy — the single most important hardware rule:** the chains monerod/Tari actively run against
+live on the **SSD/NVMe**. monerod is random-I/O heavy; a chain on a spinning HDD makes every test
+crawl. The HDD is for **cold** things only (backup tarballs, archived snapshots). Check `lsblk -d
+-o NAME,ROTA` before placing any chain — `ROTA=0` is SSD/NVMe, `ROTA=1` is HDD.
+
+**Sizing (a pruned-Monero test bench fits comfortably in ~1 TB):**
+
+| Component | Size |
+|---|---|
+| Pruned Monero (compacted) | ~95 GB |
+| Tari (archival/full) | ~132 GB |
+| Docker images + cache | ~20 GB |
+| OS + working headroom | ~30 GB |
+| **Total** | **~280 GB** |
+| *+ optional full Monero node* | *+250 GB* |
+| *+ a few full chain copies* | *95–250 GB each* |
+
+A 1 TB NVMe holds the pruned bench with ~650 GB to spare — room for a full node and copies too.
+
+> **⚠️ Verify the disk is actually fast.** "SSD" in the model name is not enough — check the *bus*.
+> On the reference box (`gouda`), the "1 TB SSD" enumerated as `/dev/sdb` on **SATA** (not
+> `/dev/nvme0n1`), on a link that negotiated down to 1.5 Gbps, and benchmarked at **~37–98 MB/s —
+> HDD-class**. There was **no NVMe** in the machine at all. That single fact bottlenecks monerod,
+> builds, and makes LMDB compaction (heavy random I/O) impractical (~16 h instead of ~10 min).
+>
+> **The highest-value upgrade by far is a real m.2 PCIe NVMe** (~$80–150): ~20–50× faster, which
+> turns chain compaction into minutes and makes CoW snapshots actually worth doing. Confirm a
+> drive is genuinely NVMe before relying on it:
+> ```bash
+> lsblk -d -o NAME,TRAN,ROTA,MODEL   # want TRAN=nvme, not sata
+> ls /dev/nvme*                       # an NVMe drive appears as /dev/nvme0n1
+> # quick reality check on random read (what monerod does):
+> dd if=/path/to/data.mdb of=/dev/null bs=4k count=200000 skip=10000000   # want >>100 MB/s
+> ```
+> Until a fast disk is added, keep the chain at its working ~253 GB (it's correctly pruned; the
+> size is reclaimable free-page bloat, not a full chain) and skip CoW. The matching
+> `mdb_copy` (LMDB 0.9.70, built from Monero's vendored source) is staged at
+> `~/pithead-testbench/bin/mdb_copy` — on a fast disk, `mdb_copy -c <chain>/lmdb <dest>` compacts
+> in minutes.
+
+A **second m.2 NVMe (PCIe) with btrfs/zfs** additionally enables **copy-on-write snapshots** —
+instant, near-free chain clones for isolated/parallel test runs — the upgrade that helps a busy
+multi-agent bench.
+
+## Directory layout
+
+| Path | Disk | What | Lose it? |
+|---|---|---|---|
+| `~/code/pithead/` (`/srv/code/pithead`) | SSD | stack checkout (`docker-compose.yml`, the `pithead` CLI), `config.json`, `.env` | reproducible (clone) |
+| `/srv/code/pithead-data/{monero,tari,p2pool,dashboard,tor}/` | **SSD** | the chains + Tor onion keys — **the asset** | **don't** (days to re-sync) |
+| `/var/lib/docker/` | SSD | images / build cache | reproducible (rebuild) |
+| `~/pithead-testbench/` | HDD | build-server docs + ops tools (see its `README.md`) | reproducible (repo) |
+| `/home` … `/mnt/chains` | HDD | cold backups / archives | — |
+
+The chains live **outside** the checkout (`/srv/...`, absolute paths in `config.json`), so the stack
+can be refreshed/redeployed without ever touching them.
+
+## Recreate on another box
+
+Goal: stand up an equivalent server. Minutes of work + one chain transfer (vs days of re-sync).
+
+**1. Prereqs.** Ubuntu LTS, Docker (Compose v2), `git jq curl tar`, and your user in the `docker`
+group. Put the chains' target on the **SSD**.
+
+**2. Clone + configure:**
+```bash
+git clone https://github.com/p2pool-starter-stack/pithead.git ~/pithead && cd ~/pithead
+cp /path/to/your/config.json .         # your wallets/settings — or run `./pithead setup` to make one
+# point data dirs at fast storage (checkout-independent):
+jq '.monero.data_dir="/srv/<you>/pithead/data/monero"
+  | .tari.data_dir="/srv/<you>/pithead/data/tari"
+  | .p2pool.data_dir="/srv/<you>/pithead/data/p2pool"
+  | .dashboard.data_dir="/srv/<you>/pithead/data/dashboard"
+  | .tor.data_dir="/srv/<you>/pithead/data/tor"' config.json | sponge config.json
+```
+
+**3. Bring the chains (the painless part — reuse, don't re-sync).** Stop the stack on the source
+box (`./pithead down`) so the LMDBs are consistent, then copy `/srv/.../pithead/data/` to the new
+box's SSD — over the network or a fast external drive:
+```bash
+# from the new box, pulling from the old one (chains are ~230 GB; hours over GbE, faster over USB3/10G):
+rsync -aP --info=progress2 olduser@oldbox:/srv/code/pithead-data/ /srv/<you>/pithead/data/
+```
+The Tor onion keys travel in `data/tor`, so the box keeps its onion identity. (No old box yet? Omit
+this and let monerod/Tari sync from scratch — days, but hands-off.)
+
+**4. Deploy + verify:**
+```bash
+cd ~/pithead && ./pithead setup        # deps, .env, Tor, Caddy; then `up`
+./pithead status                        # all healthy; monerod just catches up the gap
+```
+
+**5. Test-bench tooling:**
+```bash
+mkdir -p ~/pithead-testbench/bin
+cp ~/pithead/tests/integration/{build-pruned-chain,compact-chain,system-info}.sh ~/pithead-testbench/
+cp ~/pithead/tests/integration/gouda-testbench-README.md ~/pithead-testbench/README.md
+# fetch monero-blockchain-prune at the running monerod's version (see build/monero/Dockerfile pins):
+~/pithead-testbench/system-info.sh > ~/pithead-testbench/system-info.md
+```
+
+**6. Validate it's release-fit:**
+```bash
+tests/integration/run.sh --host you@newbox --dir pithead --readiness
+```
+
+## Migrating gouda → a bigger box later
+
+Same as above, steps 3–6: `rsync` the chains + `config.json` over, `pithead setup`, redeploy,
+copy the test-bench tooling, regenerate `system-info.md`. Because the chains are decoupled and the
+config is captured, the move is a transfer + a redeploy — no reconfiguration, no re-sync. Put the
+chains on the bigger box's fastest disk; if it has spare NVMe, that's the place for a btrfs/zfs CoW
+volume to get cheap test-chain snapshots.
diff --git a/docs/testing-guide.md b/docs/testing-guide.md
new file mode 100644
index 0000000..1efed35
--- /dev/null
+++ b/docs/testing-guide.md
@@ -0,0 +1,110 @@
+# Testing Guide (for developers)
+
+Practical "how do I test the change I just made?" companion to the
+[Testing Strategy](testing-strategy.md) (which explains *why* the tiers exist) and the
+generated [Test Inventory](test-inventory.md) (which lists *what* exists today).
+
+## Philosophy
+
+- **Test the intent, not the line.** A test should pin down a *behavior or contract* —
+  "a pruned node displays Pruned", "the gate holds until both chains sync", "an old DB
+  migrates without losing history" — and read clearly enough that its name + one-line comment
+  explain *why it exists*. Don't add a test purely to move the coverage number.
+- **The 80% coverage gate is a floor, not a target.** Uncovered defensive error-handling is
+  fine; uncovered *behavior* (a migration path, a retention rule, a decision branch) is a gap.
+- **Tests are real code.** They're linted (`shellcheck`), version-controlled with the change
+  they protect, and listed in the inventory (a CI drift check fails if you add/remove a test
+  without regenerating it).
+
+## Commands
+
+```bash
+make test                 # everything that needs no server/docker (run before every PR)
+make test-dashboard       # dashboard pytest + 80% coverage gate
+make test-stack           # pithead shell suite
+make test-fakes           # tier-2 contract test (real clients vs fakes)
+make test-integration-selftest   # the integration harness's own logic
+make test-inventory       # regenerate docs/test-inventory.md (do this when adding/removing tests)
+make test-mini-stack      # tier-3 docker mini-stack (needs docker)
+make test-integration ARGS="--host user@box --dir pithead --check"   # tier-4 live, non-destructive
+```
+
+## Where tests live
+
+| You changed… | Write the test here | Tier |
+|---|---|---|
+| Dashboard logic (a decision, metric, `/api/state` field) | `build/dashboard/tests/**/test_*.py` (pytest) | 1 |
+| Frontend logic (worker sort, formatting) | `build/dashboard/tests/frontend/*.test.mjs` (`node --test`) | 1 |
+| A client that parses a daemon (monerod RPC, Tari gRPC) | `tests/integration/fakes/test_contract.py` (+ extend the fakes) | 2 |
+| The control plane (sync-gate #35, failover #31) | `tests/service/test_data_service.py` (+ a `mini-stack` scenario) | 1 + 3 |
+| `pithead` CLI behavior | `tests/stack/run.sh` | 1 |
+| A compose **security/hardening** invariant (caps, `no-new-privileges`, no secret in a healthcheck, socket-proxy scope) | the #90 section of `tests/stack/test_compose.sh` | 1 |
+| A new `config.json` axis | one row in `tests/integration/scenarios.sh` | 4 |
+| A failure mode needing real containers | `run.sh` `--fault-injection` and/or a `mini-stack` scenario | 4 / 3 |
+| The integration harness's own logic | `tests/integration/selftest.sh` | — |
+
+## Recipes
+
+### Dashboard behavior (tier 1)
+Add a `test_*` to the matching file under `build/dashboard/tests/`. Name it for the behavior,
+add a one-line docstring stating the intent, mock at the client boundary (the conftest gives
+you an in-memory `state_manager`). Run `make test-dashboard` — coverage must stay ≥ 80%.
+
+```python
+def test_pruned_node_is_labelled_pruned(...):
+    # Intent: a local pruned node shows "Pruned" so a config/DB mismatch is visible (#32).
+    ...
+```
+
+### A client parsing a new daemon state (tier 2)
+1. Teach the fake to produce the state: edit `tests/integration/fakes/fake_monerod.py` or
+   `fake_tari.py` (add a `mode`, or a field the daemon returns).
+2. Assert the *real* client parses it: add a test to `fakes/test_contract.py` that points the
+   real `MoneroClient`/`TariClient` at the fake and checks the parsed result.
+3. `make test-fakes`. This is the seam that catches "the daemon changed its wire format".
+
+### A config axis (tier 4)
+Add a `NAME<TAB>overrides` row to `scenario_matrix()` in `scenarios.sh`, and the value to
+`axis_coverage()`. The self-test **enforces** that every axis value appears in some scenario,
+so a half-added axis fails `make test-integration-selftest`. No code changes needed.
+
+### A control-plane scenario (tier 3)
+Add a scenario to `tests/integration/mini-stack/run-mini-stack.sh`: drive the fakes via their
+`/control` endpoints (`set_monerod`/`set_tari`) and assert real container state with
+`assert_state` / `assert_stays`. `make test-mini-stack` (needs docker).
+
+## Conventions
+
+- **Determinism, no sleep-and-hope.** Wait on a real signal with a timeout (`wait_for`,
+  `assert_state`, `wait_status_ok`). For time-based logic, **backdate timestamps white-box**
+  rather than patching the global clock — e.g. push an old point into the deque, then act
+  (see `test_history_older_than_retention_pruned_from_memory`).
+- **Shell:** pure logic goes in `lib.sh`/`scenarios.sh` and is tested by `selftest.sh`; I/O
+  (ssh, docker, RPC) is thin wrappers that aren't unit-tested. Everything stays
+  `shellcheck --severity=warning` clean.
+- **Regenerate the inventory** (`make test-inventory`) when you add/remove a test — CI's
+  drift check (`make test-inventory-check`) fails otherwise.
+- **Secrets:** never print tokens/creds/onions; the harness redacts artifacts and hashes
+  secrets on the box. If you add a secret-bearing field, confirm `redact()` covers it (there's
+  a self-test for the patterns).
+
+## Gotchas learned on real hardware
+
+The live harness was first run against a real synced, mining box — these are the
+calibration lessons baked into the tests now. Keep them in mind:
+
+- **A synced *local* monerod shows `state: "loading"` in `/api/state`**, not `"done"` — it has
+  no target height once caught up. Assert "synced" via monerod's own `get_info.synchronized`
+  (the harness's `monero_caught_up`), not the dashboard UI field.
+- **`stratum.conns` can read 0 on a healthy, mining box.** Use `proxy_workers` /
+  `total_hashes` for mining-liveness; `conns` is informational.
+- **The mini-stack must be isolated.** Containers are named `itest-*` and control ports are
+  28081/28152 so it can't collide with — or control — a real deployment on the same host.
+  A fake server inside a container must bind `0.0.0.0` (binding `127.0.0.1` makes it
+  unreachable from peer containers — this once broke release in the mini-stack).
+- **monerod-down failover isn't simulated in the mini-stack** (the dashboard's monerod
+  down-path log-scrapes a real `monerod` container the fake stack lacks); it's covered on real
+  hardware by `run.sh --fault-injection`. Tari-down is simulated there cleanly.
+- **Run `--check` first.** Against any real box, `run.sh --check` asserts the current live
+  state non-destructively (no config change) — the safe way to validate before the
+  config-churning matrix.
diff --git a/docs/testing-strategy.md b/docs/testing-strategy.md
new file mode 100644
index 0000000..89c32b0
--- /dev/null
+++ b/docs/testing-strategy.md
@@ -0,0 +1,210 @@
+# Testing Strategy
+
+How Pithead simulates **every situation the stack can be in** — and which layer proves each
+one. This is the map behind the [integration suite](integration-testing.md); read that for how
+to run the live matrix, and this for *what we test where, and why*.
+
+The guiding idea: the stack's runtime behaviour is a **state machine** (syncing → held →
+released; healthy → down → rejected → recovered → readmitted; XvB tiers; container health),
+and a healthy, already-synced box only ever shows you one corner of it. So we simulate the rest
+— at the cheapest layer that can prove each situation honestly.
+
+## The four tiers
+
+| Tier | What it is | Simulates | Where it runs |
+|---|---|---|---|
+| **1 — Unit** | `build/dashboard/tests/` (pytest, mocked clients) and `tests/stack/` (shell, `docker`/`sudo` stubbed) | Decision logic & field mapping: sync-gate, failover, node-health debounce, XvB engine, `/api/state` shapes, `pithead` config/status logic | Every PR (`make test`) |
+| **2 — Contract** | `tests/integration/fakes/test_contract.py` | The real Monero/Tari **clients** parsing the real daemons' wire format — points the actual clients at controllable fakes | Every PR (docker-free) |
+| **3 — Mini-stack** | `tests/integration/mini-stack/` (real dashboard + docker-control vs fake daemons) | The control plane **end-to-end with real containers**: hold/release and reject/readmit actually stopping/starting `p2pool`/`xmrig-proxy`, driven deterministically | CI with Docker (`make test-mini-stack`) |
+| **4 — Live matrix** | `tests/integration/run.sh` against a real, synced box | What only reality proves: real merge-mining, prune/full DB size, Caddy TLS, Tor onions, HugePages, plus fault injection for real container health verdicts | Manual / release gate (`make test-integration`) |
+
+**Why this shape, and the answer to "should we use stubs?"** Stubs already do the heavy
+lifting — the dashboard has ~140 unit tests that exhaustively drive the hard runtime states with
+mocked clients. Adding *more* mocks for the same logic would be duplication. What stubs **can't**
+prove is wiring: that the real clients parse real daemon output (tier 2), that the dashboard's
+stop/start actually moves real containers (tier 3), and that real daemons sync/merge-mine and
+real containers go unhealthy (tier 4). So the strategy is **stubs for logic, controllable fake
+daemons for the control-plane wiring, and the real box for the irreducibly-real** — each
+situation tested once, at the lowest tier that's honest.
+
+The fakes are the key enabler: because the whole control plane is env-configurable
+(`MONERO_RPC_URL`, `TARI_GRPC_ADDRESS`, `DOCKER_CONTROL_URL`, `NODE_DOWN_AFTER_SEC`,
+`UPDATE_INTERVAL`, …), we can point the real code at tiny controllable servers and drive the
+entire state machine in seconds, in CI, with no chain and no test box.
+
+## Scenario catalog
+
+Every situation we care about, what triggers it, and the tier(s) that cover it. ✅ = covered
+today; ▶ = exercised by the live matrix / mini-stack when run.
+
+### A. Configuration permutations
+The deploy-time axes — each changes a real runtime path. Full table and assertions in
+[Integration Testing › The config matrix](integration-testing.md#the-config-matrix).
+
+| Situation | Trigger | Tier |
+|---|---|---|
+| `monero.mode` local vs remote (monerod present/absent, profile gating) | config | 4 ▶ |
+| `monero.prune` pruned vs full (DB size, #32 display) | config | 1 ✅ (display) · 4 ▶ (real DB) |
+| `monero.rpc_lan_access`, `dashboard.secure`, `xvb.enabled`, `dashboard.tari_required` | config → `.env`/Caddyfile | 4 ▶ |
+| `p2pool.pool` main / mini / nano (sidechain, flags) | config | 4 ▶ |
+
+### B. Sync lifecycle (#35)
+| Situation | Trigger | Tier |
+|---|---|---|
+| Cold start, chains syncing → **hold** `p2pool`+`xmrig-proxy` | both `is_syncing` | 1 ✅ · 3 ▶ |
+| Monero synced, Tari **required** but still syncing → keep holding | `monero_synced ∧ ¬tari_synced ∧ TARI_REQUIRED` | 1 ✅ (added) · 3 ▶ |
+| Monero synced, Tari **non-blocking** → release, passive Tari badge (#51) | `¬TARI_REQUIRED` | 1 ✅ · 4 ▶ |
+| Both synced → **release** (one-way latch) | gate satisfied | 1 ✅ · 3 ▶ |
+| Network-height UI override doesn't deadlock the gate | p2pool held → height 0 | 1 ✅ |
+| Restart mid-sync / post-release (latch persisted) | snapshot reload | 1 ✅ |
+
+### C. Node health & failover (#31)
+| Situation | Trigger | Tier |
+|---|---|---|
+| monerod down → **reject workers** (stop `xmrig-proxy`) | unreachable ≥ `NODE_DOWN_AFTER_SEC` | 1 ✅ · 3 ▶ · 4 ▶ |
+| Tari down + required → reject; Tari down + non-blocking → **ignore** | `tari_down ∧ TARI_REQUIRED?` | 1 ✅ |
+| Recovery hysteresis — readmit only after stable `NODE_RECOVERY_AFTER_SEC` | reachable again | 1 ✅ |
+| Transient blip / never-reachable → **no** false reject | debounce / `ever_up` | 1 ✅ |
+| Double outage; readmit only when **both** healthy | both down → both up | 1 ✅ (added) |
+| #35 latch × #31 failover coexist after release | down post-release | 1 ✅ (added) · 3 ▶ |
+| Stop/start fails → retry next cycle (idempotent) | docker error | 1 ✅ |
+
+### D. Container health verdicts (`pithead status`)
+| Situation | Trigger | Tier |
+|---|---|---|
+| All healthy → exit 0 | steady state | 1 ✅ · 4 ▶ |
+| Required node **down** / **missing** → exit 1 | stop / `rm` monerod | 1 ✅ (node-down) · 4 ▶ (`--fault-injection`) |
+| Running but **unhealthy** → exit 1 | healthcheck fails (SIGSTOP) | 4 ▶ (`--fault-injection`) |
+| Miner stopped under sync-hold / failover → exit **0** (intentional) | held / rejected | 1 ✅ · 4 ▶ |
+| Remote mode ignores monerod | profile off | 1 ✅ · 4 ▶ |
+
+### E. XvB switching engine
+| Situation | Trigger | Tier |
+|---|---|---|
+| Disabled / zero shares / `fail_count ≥ 3` / no sustainable tier → P2POOL | guards | 1 ✅ |
+| Closed-loop ramp/back-off, cold-start seed, VIP-reserve anti-overshoot (#70) | controller | 1 ✅ |
+| P2POOL / XVB / SPLIT modes, tiers, smart-sleep early exit | decision | 1 ✅ |
+| Real XvB endpoint reachable / failing | network | 4 (real endpoint) |
+
+### F. Dashboard `/api/state` field states
+| Situation | Trigger | Tier |
+|---|---|---|
+| sync state loading/syncing/done; pruned/full/unknown; db_size | metrics | 1 ✅ |
+| badges (node-down, workers-rejected, miner-held, passive-Tari, pruned/full, low-HR) | metrics | 1 ✅ |
+| system levels (cpu/mem/disk/hugepages), worker pool/online, chart outage breaks | metrics | 1 ✅ |
+| Dashboard reads correct live state on a real stack | real daemons | 4 ▶ |
+
+### G. CLI lifecycle (`pithead`)
+| Situation | Trigger | Tier |
+|---|---|---|
+| Config validation, secret preservation, `apply` no-op/destructive guards | sourced fns | 1 ✅ |
+| `setup`→`up`→`status`→`apply`→`restart`→`down`; idempotency; secret preservation | real box | 4 ▶ (`--lifecycle`) |
+| `upgrade` (image pull/rebuild) | real box | release staging smoke (docs) |
+| `backup`/`restore`, `reset-dashboard`, `doctor` | real box | 1 ✅ (partial) · 4 (future) |
+
+### H. Host / infrastructure (real-only)
+| Situation | Trigger | Tier |
+|---|---|---|
+| Real merge-mining share lands; real hashrate on dashboard | live mining | 4 ▶ |
+| Caddy TLS scheme; Tor onion provisioning; HugePages/AVX2; real disk pressure; prune DB size | real host | 4 ▶ |
+
+## Running each tier
+
+```bash
+make test                 # tiers 1 + 2 (+ harness self-test) — every-PR, no docker/server
+make test-fakes           # tier 2 contract test on its own
+make test-mini-stack      # tier 3 — needs docker
+make test-integration ARGS="--host user@box --dir pithead --lifecycle --fault-injection"  # tier 4
+```
+
+## Production-readiness posture
+
+What gates a merge vs. a release, the engineering standards every test holds to, and the gaps
+we know about. The full enumerated coverage is in the generated
+[Test Inventory](test-inventory.md) (kept honest by a CI drift check).
+
+### What runs where
+
+| Check | Tier | When | Blocking? |
+|---|---|---|---|
+| Dashboard pytest + **≥80% coverage gate** | 1 | every PR | ✅ required |
+| Frontend logic (`node --test`) | 1 | every PR | ✅ required |
+| Dashboard image test stage (in-container) | 1 | every PR | ✅ required |
+| `pithead` shell suite + shellcheck | 1 | every PR | ✅ required |
+| Compose interpolation + **security/hardening** invariants | 1 | every PR | ✅ required |
+| Fake-daemon **contract test** | 2 | every PR | ✅ required |
+| Integration harness **self-test** | 4 | every PR | ✅ required |
+| **Test-inventory drift** check | — | every PR | ✅ required |
+| Fake-daemon **docker mini-stack** | 3 | PRs touching the harness/dashboard | ✅ (own workflow) |
+| **Live config matrix** on real nodes | 4 | manual / pre-release | ✅ **release gate** ([#44](https://github.com/p2pool-starter-stack/pithead/issues/44)) |
+
+The first three tiers run on every PR with no special infrastructure; tier 4 is the blocking
+**pre-release** gate (see [Releasing](releasing.md)) because it needs the real synced nodes.
+
+### Engineering standards
+
+Every scenario, at every tier, holds to the same discipline:
+
+- **Deterministic, no sleep-and-hope.** Wait on real readiness signals — container health,
+  `pithead status`, dashboard sync %, miner-released — with timeouts. The only fixed sleeps are
+  *poll intervals* and the deliberate "stays in state" windows that prove the gate does **not**
+  act prematurely.
+- **Isolated & idempotent.** Each scenario starts from a known baseline and restores it; the
+  live matrix snapshots `config.json` and reuses (never mutates) the canonical chain dirs; the
+  mini-stack tears down with `down -v`.
+- **Actionable failures.** Per-scenario pass/fail, continue-on-error to collect the whole
+  matrix, and artifact capture (redacted logs, `compose ps`, `.env`-minus-secrets, dashboard
+  responses) on failure.
+- **Secrets hygiene.** Tokens / RPC creds / onions are never printed; preservation is checked
+  by hashing on the box; all artifacts pass a redactor.
+- **Reproducible.** The live run records a manifest (stack `VERSION`, git rev, image digests).
+- **Test code is real code.** Same lint (shellcheck), the coverage gate, and the inventory
+  drift check apply to the tests themselves.
+
+### Flake policy
+
+Integration scenarios **quarantine, never blind-retry**: a scenario that fails intermittently
+is marked and investigated, not wrapped in a retry loop that hides a real race. The waiters
+have generous timeouts so a slow-but-correct stack passes while a genuinely broken one fails
+fast with artifacts.
+
+### Known gaps (honest)
+
+These are deliberately **not** yet covered and are the road to full production confidence:
+
+- **First green run on real hardware.** ✅ Two of the three real-environment tiers are green:
+  the live harness `--check` (tier 4 read path — 22/22 against a synced, mining box) and the
+  fake-daemon mini-stack (tier 3 — 11/11 on a real Docker host). Between them they surfaced and
+  fixed four bugs: the dashboard pruned/full label (#32); the harness's three over-strict
+  assertions (monero-synced, conns, prune display); the fake Tari binding gRPC to loopback; and
+  the mini-stack's container-name/port isolation. Still pending: the full **destructive** config
+  matrix run on the box (its read path is already proven via `--check`).
+- **Destructive-matrix safety.** ✅ `run.sh --safety-backup` takes a real `pithead backup`
+  before the destructive scenarios and **automatically rolls the box back** (down → restore →
+  up) if anything fails; the archive is removed on success. So the matrix can run on a precious
+  box with a one-command rollback net.
+- **CLI breadth in automation.** ✅ `backup`/`restore` are now exercised end-to-end — by
+  `--safety-backup` and by a `--lifecycle` backup→restore round-trip (assert the pool reverts
+  and secrets survive). `reset-dashboard` and `upgrade` are still only unit-covered (upgrade
+  belongs to the release staging smoke test, since it rebuilds/pulls the bundle under test).
+- **Soak / longevity.** No multi-hour run asserting no leaks, no log/DB growth runaway, and that
+  the XvB controller converges over a realistic window.
+- **Load / capacity.** No test drives many workers or high share rates to find limits.
+- **Security review.** The compose **hardening invariants are regression-guarded** (the #90
+  section of `tests/stack/test_compose.sh`: RPC creds never in a healthcheck command,
+  `no-new-privileges` / `cap_drop` on the leaf containers, the Docker socket proxies stay
+  least-privilege), so a past fix can't be silently undone. A full security *audit* is still a
+  separate exercise (`SECURITY.md`) — these tests pin the decisions we've already made, they
+  don't find new ones.
+
+## Adding a scenario
+
+- **Logic** (a new decision/branch) → a unit test (tier 1). Cheapest, fastest.
+- **A new daemon state** the clients must parse → extend the fakes + the contract test (tier 2),
+  and it becomes drivable in the mini-stack (tier 3).
+- **A config axis** → one row in `tests/integration/scenarios.sh` (tier 4). The self-test
+  enforces every axis value is covered.
+- **A failure mode needing real containers** → a fault in `run.sh`'s fault-injection phase
+  (tier 4) and/or a mini-stack scenario (tier 3).
+
+Keep each situation at the lowest honest tier; don't re-prove logic with a heavier harness.
diff --git a/pithead b/pithead
index 59ad23b..b01a9a4 100755
--- a/pithead
+++ b/pithead
@@ -60,8 +60,49 @@ fi
 
 # --- Lifecycle Helpers ---
 
+# The Compose project name is pinned to "pithead" (docker-compose.yml `name:`). A stack first
+# deployed under the old directory-derived project name still has containers holding our
+# container_names (tor, monerod, …) under that old project — they'd block `up` with a name
+# clash. Remove ONLY those — the containers belonging to the exact project THIS directory used
+# to create — so the renamed project can take over. We never touch a container that merely
+# shares a service name with us (e.g. someone else's `caddy` from an unrelated project). Chain
+# data lives in the bind-mounted data dirs and the Tor onion keys in a bind mount too, so
+# nothing is lost; Caddy re-issues its local TLS cert once under the new name.
+migrate_compose_project() {
+    local cfg our_project dir_project names name cid proj
+    # Best-effort and must never abort pithead, so every substitution is guarded (a bare
+    # `var=$(failing)` would trip `set -e`).
+    cfg=$(docker compose config --format json 2>/dev/null) || return 0
+    [ -n "$cfg" ] || return 0
+    our_project=$(printf '%s' "$cfg" | jq -r '.name // "pithead"' 2>/dev/null) || our_project="pithead"
+    [ -n "$our_project" ] || our_project="pithead"
+    # The old project name is the one Compose derived from this directory's basename (lowercased,
+    # sanitised to [a-z0-9_-]). Matching it exactly is what keeps us from removing an unrelated
+    # container. If it already equals our pinned name there's nothing to migrate.
+    dir_project=$(basename "$PWD" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9_-')
+    { [ -n "$dir_project" ] && [ "$dir_project" != "$our_project" ]; } || return 0
+    names=$(printf '%s' "$cfg" | jq -r '.services[].container_name // empty' 2>/dev/null) || return 0
+    [ -n "$names" ] || return 0
+
+    local stale=()
+    while IFS= read -r name; do
+        [ -n "$name" ] || continue
+        cid=$(docker ps -aq --filter "name=^${name}$" 2>/dev/null | head -n1) || cid=""
+        [ -n "$cid" ] || continue
+        proj=$(docker inspect --format '{{index .Config.Labels "com.docker.compose.project"}}' "$cid" 2>/dev/null) || proj=""
+        [ "$proj" = "$dir_project" ] && stale+=("$name")
+    done <<< "$names"
+
+    [ "${#stale[@]}" -gt 0 ] || return 0
+    warn "Migrating this stack from the old Compose project '$dir_project' to '$our_project'."
+    log "Removing the old-named containers so the renamed project can take over. Chain data dirs"
+    log "and Tor onion keys are bind-mounted (untouched); Caddy re-issues its local TLS cert."
+    docker rm -f "${stale[@]}" >/dev/null 2>&1 || true
+}
+
 stack_up() {
     log "Starting stack..."
+    migrate_compose_project
     # Docker Compose automatically picks up COMPOSE_PROFILES from .env
     docker compose up -d
     log "Stack started successfully!"
@@ -82,6 +123,7 @@ stack_restart() {
 
 stack_upgrade() {
     log "Upgrading stack (rebuilding containers)..."
+    migrate_compose_project
     docker compose up -d --build
     log "Stack upgraded."
 }
@@ -1754,6 +1796,7 @@ apply() {
     generate_caddyfile
 
     log "Updating containers..."
+    migrate_compose_project
     # Compose recreates only the services whose resolved config changed; --remove-orphans
     # drops monerod when a local→remote switch deactivates the local_node profile.
     docker compose up -d --remove-orphans
diff --git a/tests/integration/README.md b/tests/integration/README.md
new file mode 100644
index 0000000..760aeba
--- /dev/null
+++ b/tests/integration/README.md
@@ -0,0 +1,37 @@
+# Integration tests (`tests/integration/`)
+
+End-to-end suite that drives a **real, already-provisioned Pithead server** through the
+config matrix and asserts the stack behaves (issue
+[#54](https://github.com/p2pool-starter-stack/pithead/issues/54)).
+
+```
+run.sh         entry point — connects (SSH or --local) and runs the matrix (+ --lifecycle,
+               --fault-injection)
+scenarios.sh   the declarative config matrix (data, not code)
+lib.sh         shared helpers: target I/O, assertions, readiness waiters, redaction
+selftest.sh    pure-logic self-test (no server) — runs in CI on every PR
+fakes/         controllable fake monerod/Tari + a contract test pointing the REAL clients at
+               them (tier 2; runs in CI, no docker)
+mini-stack/    docker overlay running the real dashboard + docker-control vs the fakes, with a
+               scenario runner for hold/release + reject/readmit (tier 3; needs docker)
+```
+
+The live matrix here is **tier 4** of the broader plan — see
+[`docs/testing-strategy.md`](../../docs/testing-strategy.md) for all four tiers and the full
+scenario catalog.
+
+Quick start:
+
+```bash
+# Against a remote box over SSH
+make test-integration ARGS="--host miner@10.0.0.5 --dir pithead"
+
+# On the box itself
+./run.sh --local --dir /home/miner/pithead --lifecycle
+
+# Just the pure-logic checks (no server)
+make test-integration-selftest
+```
+
+**Full guide — provisioning the box, the safety model, the matrix, artifacts, and
+CI/release wiring — is in [`docs/integration-testing.md`](../../docs/integration-testing.md).**
diff --git a/tests/integration/build-pruned-chain.sh b/tests/integration/build-pruned-chain.sh
new file mode 100644
index 0000000..40dbcbc
--- /dev/null
+++ b/tests/integration/build-pruned-chain.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+#
+# build-pruned-chain.sh — one-shot builder for a pruned Monero chain alongside the
+# canonical full chain, used to give the live test harness BOTH prune modes on one box.
+#
+# Strategy (minimal mining downtime, full chain never modified):
+#   1. stop monerod            -> makes the live LMDB consistent for copying
+#   2. copy full data.mdb      -> onto the CoW (btrfs) volume   [downtime window]
+#   3. start monerod           -> mining resumes immediately after the copy
+#   4. prune the COPY in place -> shrinks ~250G -> ~95G, full chain untouched
+#
+# Self-contained + idempotent-ish: logs with timestamps, writes a status sentinel,
+# and always restarts monerod even if the copy fails. Designed to be run under nohup.
+set -uo pipefail
+
+SRC_DIR="${SRC_DIR:-$HOME/code/p2pool-starter-stack/data/monero}"
+DST_DIR="${DST_DIR:-/mnt/chains/monero-pruned}"
+PRUNE_BIN="${PRUNE_BIN:-$HOME/pithead-testbench/bin/monero-blockchain-prune}"
+STATUS="${STATUS:-$HOME/pithead-testbench/status}"
+CONTAINER="${CONTAINER:-monerod}"
+
+ts() { date '+%Y-%m-%dT%H:%M:%S%z'; }
+say() { echo "[$(ts)] $*"; }
+set_status() { echo "$1" > "$STATUS"; }
+
+say "START build-pruned-chain"
+say "src=$SRC_DIR dst=$DST_DIR"
+mkdir -p "$DST_DIR/lmdb"
+
+src_mdb="$SRC_DIR/lmdb/data.mdb"
+if [ ! -f "$src_mdb" ]; then say "FATAL: source $src_mdb not found"; set_status "FAIL_NO_SRC"; exit 1; fi
+say "source size: $(du -h "$src_mdb" | cut -f1)"
+
+set_status "STOPPING"
+say "stopping $CONTAINER (downtime begins)"
+docker stop "$CONTAINER" >/dev/null 2>&1 || { say "WARN docker stop failed (already stopped?)"; }
+
+set_status "COPYING"
+say "copy begin"
+copy_start=$(date +%s)
+cp "$src_mdb" "$DST_DIR/lmdb/data.mdb"
+rc=$?
+copy_end=$(date +%s)
+say "copy done rc=$rc in $((copy_end - copy_start))s"
+
+# Restart monerod immediately, regardless of copy outcome — minimise downtime.
+set_status "RESTARTING"
+say "starting $CONTAINER (downtime ends)"
+docker start "$CONTAINER" >/dev/null 2>&1 || say "WARN docker start failed"
+
+if [ $rc -ne 0 ]; then say "FATAL: copy failed"; set_status "FAIL_COPY"; exit 1; fi
+
+set_status "PRUNING"
+say "prune begin (full chain is back online; pruning the copy)"
+prune_start=$(date +%s)
+"$PRUNE_BIN" --data-dir "$DST_DIR" 2>&1
+rc=$?
+prune_end=$(date +%s)
+say "prune done rc=$rc in $((prune_end - prune_start))s"
+if [ $rc -ne 0 ]; then say "FATAL: prune failed"; set_status "FAIL_PRUNE"; exit 1; fi
+
+say "pruned size: $(du -h "$DST_DIR/lmdb/data.mdb" | cut -f1)"
+set_status "DONE"
+say "ALL DONE"
diff --git a/tests/integration/compact-chain.sh b/tests/integration/compact-chain.sh
new file mode 100644
index 0000000..d27971a
--- /dev/null
+++ b/tests/integration/compact-chain.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+#
+# compact-chain.sh — reclaim LMDB file bloat from an already-pruned Monero chain.
+#
+# An in-place prune leaves the LMDB file at its full-chain high-water mark (LMDB never shrinks
+# its file), so a pruned chain can sit at ~270 GiB on disk while holding only ~95 GiB of live
+# data. `monero-blockchain-prune --copy-pruned-database` rewrites the chain into a fresh DB at
+# <data-dir>/lmdb-pruned, which comes out at its true compact size.
+#
+# IMPORTANT — speed & safety:
+#  * It copies every block one-by-one, so it is SLOW (multiple HOURS for a mainnet chain). It is
+#    NOT a page-level copy.
+#  * It only READS the source, through a consistent LMDB snapshot, so it is safe to run while
+#    monerod is up and mining — zero downtime during the copy; the source is never modified.
+#  * The generic `mdb_copy -c` does NOT work on a Monero chain: Monero ships a patched LMDB and
+#    stock mdb_copy rejects the on-disk format (MDB_VERSION_MISMATCH). This tool is the only path.
+#
+# When it finishes, swap the compact copy in (brief downtime) and verify:
+#   docker stop monerod
+#   mv <data-dir>/lmdb <data-dir>/lmdb.bloated && mv <data-dir>/lmdb-pruned <data-dir>/lmdb
+#   docker start monerod      # re-syncs the few blocks added during the copy
+#   # confirm healthy (get_info: synchronized), then: rm -rf <data-dir>/lmdb.bloated
+#
+# This script ONLY builds the compact copy; it does not stop/start containers or swap. Logs
+# before/after sizes and a status sentinel.
+set -uo pipefail
+
+DATA_DIR="${1:?usage: compact-chain.sh <data-dir>}"
+PRUNE_BIN="${PRUNE_BIN:-$HOME/pithead-testbench/bin/monero-blockchain-prune}"
+LOG="${LOG:-$HOME/pithead-testbench/compact.log}"
+STATUS="${STATUS:-$HOME/pithead-testbench/compact-status}"
+
+ts() { date '+%Y-%m-%dT%H:%M:%S%z'; }
+say() { echo "[$(ts)] $*" | tee -a "$LOG"; }
+
+{
+  echo "===== compact run $(ts) ====="
+  echo "data-dir: $DATA_DIR"
+  echo "--- lmdb BEFORE ---"
+  ls -la "$DATA_DIR/lmdb/" 2>/dev/null
+  echo "data.mdb apparent+disk:"; du -h --apparent-size "$DATA_DIR/lmdb/data.mdb" 2>/dev/null; du -h "$DATA_DIR/lmdb/data.mdb" 2>/dev/null
+} >> "$LOG" 2>&1
+
+echo COMPACTING > "$STATUS"
+say "compaction begin (data-dir=$DATA_DIR)"
+t0=$(date +%s)
+"$PRUNE_BIN" --data-dir "$DATA_DIR" --copy-pruned-database >> "$LOG" 2>&1
+rc=$?
+t1=$(date +%s)
+say "compaction done rc=$rc in $((t1 - t0))s"
+
+{
+  echo "--- lmdb AFTER ---"
+  ls -la "$DATA_DIR/lmdb/" 2>/dev/null
+  echo "data.mdb apparent+disk:"; du -h --apparent-size "$DATA_DIR/lmdb/data.mdb" 2>/dev/null; du -h "$DATA_DIR/lmdb/data.mdb" 2>/dev/null
+} >> "$LOG" 2>&1
+
+if [ $rc -eq 0 ]; then echo DONE > "$STATUS"; else echo "FAIL_rc$rc" > "$STATUS"; fi
+say "status=$(cat "$STATUS")"
diff --git a/tests/integration/fakes/fake_monerod.py b/tests/integration/fakes/fake_monerod.py
new file mode 100644
index 0000000..82d3db7
--- /dev/null
+++ b/tests/integration/fakes/fake_monerod.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+"""
+Controllable fake monerod for the integration mini-stack (issue #54, tier 3).
+
+Speaks just enough of monerod's `get_info` RPC for the dashboard's MoneroClient to read it,
+plus a `/control` endpoint to drive its state from a test. Lets us reproduce the whole Monero
+side of the runtime state machine — syncing %, synced, unreachable, pruned/full DB size —
+deterministically, with no real chain.
+
+Run standalone (in the docker mini-stack):
+    python3 fake_monerod.py --port 18081
+
+Drive it:
+    curl -s localhost:18081/control -d '{"mode":"syncing","height":1500,"target_height":3000}'
+    curl -s localhost:18081/control -d '{"mode":"down"}'
+    curl -s localhost:18081/get_info
+
+Use in-process (the contract test):
+    with FakeMonerod() as m:
+        m.set(mode="syncing", height=1500, target_height=3000)
+        ...point a real MoneroClient at m.url...
+"""
+import argparse
+import json
+import threading
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+# mode ∈ {"synced", "syncing", "down"}. height/target_height/database_size are the figures
+# get_info returns; the client derives sync %/DB size from them (MoneroClient.get_sync_status).
+DEFAULT_STATE = {
+    "mode": "synced",
+    "height": 3_000_000,
+    "target_height": 3_000_000,
+    "database_size": 85 * 10**9,
+}
+
+
+class _Handler(BaseHTTPRequestHandler):
+    def log_message(self, *_args):  # keep the test output clean
+        pass
+
+    def _send(self, code, payload):
+        body = json.dumps(payload).encode()
+        self.send_response(code)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def do_GET(self):
+        if self.path.rstrip("/") != "/get_info":
+            self._send(404, {"status": "NOT_FOUND"})
+            return
+        st = self.server.state
+        # "down" → unreachable: monerod's RPC not answering. A non-200 makes MoneroClient
+        # treat the node as unreachable (get_info returns None), which is what we want.
+        if st["mode"] == "down":
+            self._send(503, {"status": "BUSY"})
+            return
+        # "busy" → RPC answers HTTP 200 but reports a non-OK status (e.g. mid-reorg). The
+        # client must distrust the heights and treat it as unreachable, not as synced.
+        if st["mode"] == "busy":
+            self._send(200, {"status": "BUSY", "height": st["height"],
+                             "target_height": st["target_height"]})
+            return
+        if st["mode"] == "syncing":
+            payload = {
+                "status": "OK",
+                "synchronized": False,
+                "height": st["height"],
+                "target_height": st["target_height"],
+                "database_size": st["database_size"],
+            }
+        else:  # synced — monerod reports synchronized and target_height 0 once caught up
+            payload = {
+                "status": "OK",
+                "synchronized": True,
+                "height": st["height"],
+                "target_height": 0,
+                "database_size": st["database_size"],
+            }
+        self._send(200, payload)
+
+    def do_POST(self):
+        if self.path.rstrip("/") != "/control":
+            self._send(404, {"status": "NOT_FOUND"})
+            return
+        length = int(self.headers.get("Content-Length", 0))
+        try:
+            data = json.loads(self.rfile.read(length) or b"{}")
+        except ValueError:
+            self._send(400, {"error": "bad json"})
+            return
+        self.server.state.update(data)
+        self._send(200, self.server.state)
+
+
+class _Server(ThreadingHTTPServer):
+    daemon_threads = True
+
+    def __init__(self, addr, state):
+        super().__init__(addr, _Handler)
+        self.state = state
+
+
+class FakeMonerod:
+    """Context manager that runs the fake on an ephemeral port in a background thread."""
+
+    def __init__(self, port=0, host="127.0.0.1", **state):
+        self.state = {**DEFAULT_STATE, **state}
+        self._srv = _Server((host, port), self.state)
+        self.host, self.port = self._srv.server_address
+
+    @property
+    def url(self):
+        return f"http://{self.host}:{self.port}"
+
+    def set(self, **kwargs):
+        self.state.update(kwargs)
+
+    def __enter__(self):
+        self._thread = threading.Thread(target=self._srv.serve_forever, daemon=True)
+        self._thread.start()
+        return self
+
+    def __exit__(self, *_exc):
+        self._srv.shutdown()
+        self._srv.server_close()
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Controllable fake monerod")
+    ap.add_argument("--port", type=int, default=18081)
+    ap.add_argument("--host", default="0.0.0.0")  # noqa: S104 — test-only container
+    ap.add_argument("--mode", default="synced", choices=["synced", "syncing", "down"],
+                    help="initial state (the mini-stack boots 'syncing' to exercise the hold)")
+    args = ap.parse_args()
+    state = dict(DEFAULT_STATE, mode=args.mode)
+    # "syncing" needs height < target_height to read as syncing (else it looks caught up).
+    if args.mode == "syncing" and state["height"] >= state["target_height"]:
+        state["height"], state["target_height"] = 1_500_000, 3_000_000
+    srv = _Server((args.host, args.port), state)
+    print(f"fake-monerod listening on {args.host}:{args.port} (mode={args.mode})", flush=True)
+    try:
+        srv.serve_forever()
+    except KeyboardInterrupt:
+        pass
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration/fakes/fake_tari.py b/tests/integration/fakes/fake_tari.py
new file mode 100644
index 0000000..3f913d5
--- /dev/null
+++ b/tests/integration/fakes/fake_tari.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""
+Controllable fake Tari base node for the integration mini-stack (issue #54, tier 3).
+
+Implements just the two BaseNode gRPC methods the dashboard's TariClient calls — GetTipInfo
+and GetSyncProgress — against the project's own vendored protobuf stubs, so the real client
+talks to it unchanged (the client uses an insecure channel, so there's no auth to fake). A
+small HTTP `/control` side-channel drives its state.
+
+Run standalone (in the docker mini-stack):
+    python3 fake_tari.py --grpc-port 18142 --control-port 18152
+
+Drive it:
+    curl -s localhost:18152/control -d '{"mode":"syncing","height":500,"target_height":2000}'
+    curl -s localhost:18152/control -d '{"mode":"down"}'
+
+Use in-process (the contract test) via start_server().
+"""
+import argparse
+import asyncio
+import json
+import threading
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+import grpc
+
+from mining_dashboard.client.tari.generated import base_node_pb2 as bn
+from mining_dashboard.client.tari.generated import base_node_pb2_grpc as bn_grpc
+
+# mode ∈ {"synced", "syncing", "down"}.
+DEFAULT_STATE = {"mode": "synced", "height": 2_000_000, "target_height": 2_000_000}
+
+
+class FakeBaseNode(bn_grpc.BaseNodeServicer):
+    def __init__(self, state):
+        self.state = state
+
+    async def GetTipInfo(self, request, context):
+        st = self.state
+        if st["mode"] == "down":
+            await context.abort(grpc.StatusCode.UNAVAILABLE, "fake node down")
+        resp = bn.TipInfoResponse()
+        resp.metadata.best_block_height = st["height"]
+        # initial_sync_achieved is the authoritative "fully synced" flag the client trusts.
+        resp.initial_sync_achieved = st["mode"] == "synced"
+        return resp
+
+    async def GetSyncProgress(self, request, context):
+        st = self.state
+        if st["mode"] == "down":
+            await context.abort(grpc.StatusCode.UNAVAILABLE, "fake node down")
+        resp = bn.SyncProgressResponse()
+        resp.local_height = st["height"]
+        resp.tip_height = st["target_height"]
+        return resp
+
+
+async def start_server(port, state, host="127.0.0.1"):
+    """Start a gRPC server on `host:port` (port 0 = ephemeral). Returns (server, bound_port).
+
+    Defaults to loopback for the in-process contract test; the standalone container passes
+    0.0.0.0 so the dashboard can reach it across the docker network (binding 127.0.0.1 inside
+    a container makes the port unreachable from peer containers).
+    """
+    server = grpc.aio.server()
+    bn_grpc.add_BaseNodeServicer_to_server(FakeBaseNode(state), server)
+    bound = server.add_insecure_port(f"{host}:{port}")
+    await server.start()
+    return server, bound
+
+
+# --- standalone HTTP control side-channel (docker mini-stack only) ----------
+class _ControlHandler(BaseHTTPRequestHandler):
+    def log_message(self, *_args):
+        pass
+
+    def do_POST(self):
+        if self.path.rstrip("/") != "/control":
+            self.send_response(404)
+            self.end_headers()
+            return
+        length = int(self.headers.get("Content-Length", 0))
+        try:
+            data = json.loads(self.rfile.read(length) or b"{}")
+        except ValueError:
+            self.send_response(400)
+            self.end_headers()
+            return
+        self.server.state.update(data)
+        body = json.dumps(self.server.state).encode()
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(body)
+
+
+class _ControlServer(ThreadingHTTPServer):
+    daemon_threads = True
+
+    def __init__(self, addr, state):
+        super().__init__(addr, _ControlHandler)
+        self.state = state
+
+
+async def _main_async(args, state):
+    server, _ = await start_server(args.grpc_port, state, host="0.0.0.0")  # noqa: S104 — test container
+    ctrl = _ControlServer(("0.0.0.0", args.control_port), state)  # noqa: S104 — test-only
+    threading.Thread(target=ctrl.serve_forever, daemon=True).start()
+    print(
+        f"fake-tari gRPC on :{args.grpc_port}, control on :{args.control_port}",
+        flush=True,
+    )
+    await server.wait_for_termination()
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Controllable fake Tari base node")
+    ap.add_argument("--grpc-port", type=int, default=18142)
+    ap.add_argument("--control-port", type=int, default=18152)
+    ap.add_argument("--mode", default="synced", choices=["synced", "syncing", "down"],
+                    help="initial state (the mini-stack boots 'syncing' to exercise the hold)")
+    args = ap.parse_args()
+    state = dict(DEFAULT_STATE, mode=args.mode)
+    if args.mode == "syncing" and state["height"] >= state["target_height"]:
+        state["height"], state["target_height"] = 1_000_000, 2_000_000
+    try:
+        asyncio.run(_main_async(args, state))
+    except KeyboardInterrupt:
+        pass
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/integration/fakes/test_contract.py b/tests/integration/fakes/test_contract.py
new file mode 100644
index 0000000..019f829
--- /dev/null
+++ b/tests/integration/fakes/test_contract.py
@@ -0,0 +1,140 @@
+"""
+Contract test: point the REAL dashboard clients at the controllable fakes and assert they
+parse every state we need to drive in the mini-stack (issue #54, tier 3 / tier 2 seam).
+
+This is the proof that the fakes speak the daemons' wire format closely enough for the real
+MoneroClient / TariClient — and it runs anywhere (no docker, no real chain). If a future
+monerod/Tari change breaks the parser, this goes red here instead of only on the live box.
+
+Run: PYTHONPATH=build/dashboard python3 -m pytest tests/integration/fakes -q
+"""
+import asyncio
+import pathlib
+import sys
+
+import requests
+from unittest.mock import MagicMock
+
+_HERE = pathlib.Path(__file__).resolve().parent
+_REPO = _HERE.parents[2]
+# Make the dashboard package and the fakes importable regardless of how pytest is invoked.
+sys.path.insert(0, str(_REPO / "build" / "dashboard"))
+sys.path.insert(0, str(_HERE))
+
+from fake_monerod import FakeMonerod  # noqa: E402
+from fake_tari import start_server  # noqa: E402
+from mining_dashboard.client.monero.monero_client import MoneroClient  # noqa: E402
+from mining_dashboard.client.tari.tari_client import TariClient  # noqa: E402
+
+
+# --- Monero (HTTP get_info) -------------------------------------------------
+def test_monero_synced_reads_no_sync_and_db_size():
+    with FakeMonerod(database_size=85 * 10**9) as m:
+        client = MoneroClient(url=m.url, username="")
+        st = client.get_sync_status()
+    assert st == {"is_syncing": False, "db_size": 85 * 10**9}
+
+
+def test_monero_syncing_reports_percent():
+    with FakeMonerod() as m:
+        m.set(mode="syncing", height=1500, target_height=3000, database_size=40 * 10**9)
+        client = MoneroClient(url=m.url, username="")
+        st = client.get_sync_status()
+    assert st["is_syncing"] is True
+    assert st["current"] == 1500 and st["target"] == 3000 and st["percent"] == 50
+    assert st["db_size"] == 40 * 10**9
+
+
+def test_monero_down_is_unreachable():
+    with FakeMonerod() as m:
+        m.set(mode="down")
+        client = MoneroClient(url=m.url, username="")
+        assert client.get_sync_status() is None
+
+
+def test_monero_busy_status_is_unreachable():
+    # HTTP 200 but status=BUSY (e.g. mid-reorg): the client must distrust it, not read it synced.
+    with FakeMonerod() as m:
+        m.set(mode="busy")
+        assert MoneroClient(url=m.url, username="").get_sync_status() is None
+
+
+def test_monero_synced_by_height_even_without_flag():
+    # synchronized=false but height has reached target → caught up (mirrors monerod at the tip).
+    with FakeMonerod() as m:
+        m.set(mode="syncing", height=3_000_000, target_height=3_000_000)
+        st = MoneroClient(url=m.url, username="").get_sync_status()
+    assert st["is_syncing"] is False
+
+
+def test_monero_db_size_unknown_reads_zero():
+    with FakeMonerod(database_size=0) as m:
+        st = MoneroClient(url=m.url, username="").get_sync_status()
+    assert st == {"is_syncing": False, "db_size": 0}
+
+
+def test_monero_http_control_mutates_state():
+    # Validates the /control path the docker mini-stack drives over the network.
+    with FakeMonerod() as m:
+        requests.post(m.url + "/control", json={"mode": "syncing", "height": 10, "target_height": 100}, timeout=5)
+        info = requests.get(m.url + "/get_info", timeout=5).json()
+    assert info["synchronized"] is False and info["height"] == 10 and info["target_height"] == 100
+
+
+# --- Tari (gRPC BaseNode) ---------------------------------------------------
+# Driven via asyncio.run so they don't depend on pytest-asyncio being active (the dashboard's
+# asyncio_mode=auto only applies when pytest's rootdir is build/dashboard).
+async def _tari_get_status(state):
+    server, bound = await start_server(0, state)
+    client = TariClient(MagicMock())
+    client.grpc_address = f"127.0.0.1:{bound}"
+    try:
+        return await client.get_sync_status()
+    finally:
+        await client.close()
+        await server.stop(None)
+
+
+def test_tari_synced_reads_done():
+    st = asyncio.run(_tari_get_status({"mode": "synced", "height": 2000, "target_height": 2000}))
+    assert st["is_syncing"] is False and st["reachable"] is True and st["percent"] == 100
+
+
+def test_tari_syncing_reports_percent():
+    st = asyncio.run(_tari_get_status({"mode": "syncing", "height": 500, "target_height": 2000}))
+    assert st["is_syncing"] is True and st["percent"] == 25 and st["reachable"] is True
+
+
+def test_tari_down_is_unreachable_with_no_cache():
+    # No prior good reading to cache, so a down node is reported unreachable immediately.
+    st = asyncio.run(_tari_get_status({"mode": "down", "height": 0, "target_height": 0}))
+    assert st["reachable"] is False
+
+
+def test_tari_syncing_without_reliable_target_avoids_false_100():
+    # Early sync: the node can't give a target above local height yet → report syncing at 0%,
+    # never a premature ✔ (target 0, not a bogus 100%).
+    st = asyncio.run(_tari_get_status({"mode": "syncing", "height": 1000, "target_height": 1000}))
+    assert st["is_syncing"] is True and st["target"] == 0 and st["percent"] == 0
+
+
+def test_tari_serves_cached_reading_when_briefly_unreachable():
+    # A busy-but-alive node (gRPC blips) should keep showing its last good reading, flagged
+    # reachable=False so node-down detection still sees the outage.
+    async def _impl():
+        state = {"mode": "synced", "height": 2000, "target_height": 2000}
+        server, bound = await start_server(0, state)
+        client = TariClient(MagicMock())
+        client.grpc_address = f"127.0.0.1:{bound}"
+        try:
+            first = await client.get_sync_status()    # live: synced + reachable
+            state["mode"] = "down"
+            second = await client.get_sync_status()    # cached: last reading, reachable False
+            return first, second
+        finally:
+            await client.close()
+            await server.stop(None)
+
+    first, second = asyncio.run(_impl())
+    assert first["reachable"] is True and first["is_syncing"] is False
+    assert second["reachable"] is False and second["is_syncing"] is False
diff --git a/tests/integration/gouda-testbench-README.md b/tests/integration/gouda-testbench-README.md
new file mode 100644
index 0000000..1737b95
--- /dev/null
+++ b/tests/integration/gouda-testbench-README.md
@@ -0,0 +1,110 @@
+# Pithead reference build & test server (`gouda`)
+
+A dedicated **dev + AI-agent test platform** that runs the **live Pithead stack** (Monero node +
+P2Pool + Tari merge-mining + dashboard) against real, synced chains, and serves as the **Tier-4
+release gate** — changes are validated end-to-end here before release. Read this first.
+
+See **`docs/test-server-architecture.md`** in the repo for the full architecture + how to recreate
+this box on another machine. `system-info.md` (next to this file) is a live hardware snapshot:
+regenerate with `~/pithead-testbench/system-info.sh > ~/pithead-testbench/system-info.md`.
+
+## ⚠️ Golden rules
+
+This is a **test bench, not a production miner** — downtime and teardown/redeploy are fine. The
+constraints that matter:
+
+1. **Never lose the synced chains.** They are the only slow-to-acquire asset (days to re-sync) —
+   reuse them. They live at `/srv/code/pithead-data/`, decoupled from the checkout, so you can
+   refresh/redeploy the stack freely without touching them.
+2. **Storage is the bottleneck (no NVMe yet).** `sdb` (the SATA "SSD") benchmarks at ~37–98 MB/s —
+   HDD-class — so monerod, builds, and especially LMDB compaction are slow. Chains live on it at
+   `/srv/code/pithead-data` (still better than the `/home` HDD for random I/O, which stays cold
+   storage). A real **m.2 PCIe NVMe is the #1 upgrade** — see `docs/test-server-architecture.md`.
+3. **Least privilege.** `sudo` is password-protected and interactive-only — don't expect or leave
+   passwordless grants. Almost everything here needs **no sudo** (your user is in the `docker` group).
+4. **Secrets stay put.** `.env` (RPC creds) and `config.json` (wallet addresses) are owner-only.
+   Never print, copy, or commit them.
+
+## Where things are
+
+| Path | What |
+|---|---|
+| `~/code/pithead/` (`/srv/code/pithead`, NVMe) | the stack checkout: `docker-compose.yml`, the `pithead` CLI, your `config.json`/`.env` |
+| `/srv/code/pithead-data/{monero,tari,p2pool,dashboard,tor}/` | the chains — **the asset**, on the NVMe, decoupled from the checkout |
+| `~/pithead-testbench/` | **this dir** — build-server docs + tools |
+| `~/pithead-testbench/bin/monero-blockchain-prune` | verified offline Monero tool (version matches monerod) |
+| `~/pithead-testbench/{build-pruned-chain,compact-chain,system-info}.sh` | chain ops + system snapshot (also versioned in the repo `tests/integration/`) |
+| `/home`, `/mnt/chains` | HDD — cold backups / archives only |
+
+## The chains (this was the confusing part)
+
+- **Monero is PRUNED** (`MONERO_PRUNE=1`) and compacted to its true ~95 GiB. If it ever reads
+  ~250 GiB again, that is **LMDB free-page bloat** from an in-place prune — *not* a full chain.
+  Compact it (below). Note: the generic `mdb_copy` **cannot** read Monero's patched LMDB
+  (`MDB_VERSION_MISMATCH`); only `monero-blockchain-prune` works.
+- **Tari is ARCHIVAL/full** (~132 GiB, no pruning configured). That size is genuine data, not
+  bloat — there is nothing to compact. Shrinking it would mean *pruning* Tari (a config change +
+  re-sync), which is a product decision, not housekeeping.
+
+**Compacting the Monero chain** (reclaim bloat; hours, but no downtime until the swap):
+```bash
+~/pithead-testbench/compact-chain.sh /srv/code/pithead-data/monero   # builds lmdb-pruned/ (monerod stays up)
+# when DONE, swap it in (brief downtime):
+docker stop monerod
+cd /srv/code/pithead-data/monero && mv lmdb lmdb.bloated && mv lmdb-pruned lmdb
+docker start monerod        # re-syncs the few blocks added during the copy
+# confirm `pithead status` healthy, then: rm -rf lmdb.bloated
+```
+
+## Running the stack
+```bash
+cd ~/code/pithead
+./pithead status         # health summary
+./pithead doctor         # deeper diagnostics
+./pithead up | down | apply | backup
+```
+
+## Running the test harness (the point of this box)
+
+Tiers 1–3 run anywhere with no real chains; **Tier 4 (the live matrix) runs here.**
+```bash
+# Drive gouda over SSH from a dev checkout (start non-destructive):
+tests/integration/run.sh --host vijit@gouda --dir code/pithead --check       # assert current live state
+tests/integration/run.sh --host vijit@gouda --dir code/pithead --readiness   # is the box fit to gate a release?
+# Full destructive config matrix, with a pithead backup + auto-rollback on failure:
+tests/integration/run.sh --host vijit@gouda --dir code/pithead --safety-backup
+# On the box itself:
+cd ~/code/pithead && tests/integration/run.sh --local --dir "$PWD" --lifecycle
+```
+Always start with `--check`/`--readiness`. Use `--safety-backup` for the destructive matrix so a
+failure rolls the box back (down → restore → up). See `docs/integration-testing.md` in the repo.
+
+## End-to-end coverage: validated live vs. gaps
+
+**Validated live on gouda (Tier 4):** the config matrix (remote/local node, dashboard secure/insecure,
+Tari required/optional, RPC LAN access, XvB on/off) applied + asserted on real synced chains;
+lifecycle (restart, secret-preserving `apply`, backup→restore round-trip); node-down failover →
+recovery; release readiness; **pruned** monerod (the real prod config).
+**Covered without a real chain:** client↔daemon contract tests, the fake daemon mini-stack
+(incl. full-prune behavior), compose hardening, config rendering, dashboard unit/frontend tests.
+
+| # | Gap (not tested live) | Worth filling before release? |
+|---|---|---|
+| 1 | **Full (unpruned) Monero** mode live — gouda is pruned-only | **Low.** Stack code paths don't differ by prune mode (monerod-internal); fakes/config cover it. A multi-day full sync isn't justified. |
+| 2 | **Privacy / Tor egress** — no clearnet-leak assertions in the live harness (issue #160) | **High.** Privacy is a core promise. Add egress checks (no clearnet to XvB stats, p2pool, Tari DNS) to the live harness. |
+| 3 | **Automated PR gate** — self-hosted runner exists but is manual/opt-in | **Medium-high, high-leverage.** Wire the live harness as a required check on `workflow_dispatch`/push-to-`main` only (never fork PRs). |
+| 4 | **Upgrade / migration** across image versions with chain continuity | **Medium.** Real users upgrade. Add a scenario: pull new images → `apply` → assert chain continuity + no re-sync + secrets intact. |
+| 5 | **XvB live routing** end-to-end (the raffle optimization) | **Medium.** Core value-prop, but unit/sim-tested today. A periodic live XvB smoke test would help; hard to assert deterministically. |
+| 6 | **Multi-worker scale** — harness assumes ~2 workers | **Medium.** For perf confidence add a load-gen worker + assert proxy routing/hashrate. Not a blocker. |
+| 7 | **Real Tari merge-mined block** acceptance | **Low.** Finding a block is probabilistic; rely on template/connectivity checks. |
+| 8 | **Fault injection over SSH** (currently local-mode only) | **Low-Medium.** Extend SIGSTOP/remove fault cases to the `--host` path. |
+
+**Recommended before release:** #2 (privacy egress) and #3 (automated PR gate); then #4 (upgrade)
+and #5 (XvB smoke). The rest are nice-to-have.
+
+## Notes for AI agents
+- SSH from a sandboxed agent needs the LAN allowance (e.g. `dangerouslyDisableSandbox`); gouda is on the LAN.
+- **Avoid literal `( )` in remote command strings** — they break the non-interactive remote shell.
+- `pkill -f <pattern>` self-matches your own command line — kill by PID, or use the `[x]`-bracket trick.
+- Don't stop monerod without reason; check `docker ps` health first and narrate any downtime.
+- Long jobs: launch detached (`nohup … &`) and poll a status file; SSH sessions drop.
diff --git a/tests/integration/lib.sh b/tests/integration/lib.sh
new file mode 100644
index 0000000..6508dbb
--- /dev/null
+++ b/tests/integration/lib.sh
@@ -0,0 +1,266 @@
+# shellcheck shell=bash
+#
+# Shared library for the Pithead integration test harness (tests/integration/).
+#
+# This file is *sourced*, never executed. It defines pure helpers (config rendering,
+# expectation derivation, redaction) plus thin I/O wrappers (run a command on the target,
+# poll for readiness) that the runner and the self-test build on. Keeping the pure logic
+# here lets tests/integration/selftest.sh exercise it without a real server.
+#
+# Target model: every command runs *on the box* — either over SSH or, with --local, directly.
+# Reads (dashboard JSON, pithead status) therefore behave identically in both modes, and we
+# never depend on the runner being able to resolve the box's dashboard hostname.
+
+# --- Output -----------------------------------------------------------------
+# Colour only on a TTY with NO_COLOR unset (https://no-color.org), matching pithead.
+if [ -t 1 ] && [ -z "${NO_COLOR:-}" ]; then
+    IT_RESET='\033[0m'; IT_GREEN='\033[1;32m'; IT_YELLOW='\033[1;33m'; IT_RED='\033[1;31m'; IT_DIM='\033[2m'
+else
+    IT_RESET=''; IT_GREEN=''; IT_YELLOW=''; IT_RED=''; IT_DIM=''
+fi
+
+it_log()  { echo -e "${IT_GREEN}[ITEST]${IT_RESET} $1"; }
+it_warn() { echo -e "${IT_YELLOW}[ITEST]${IT_RESET} $1" >&2; }
+it_err()  { echo -e "${IT_RED}[ITEST]${IT_RESET} $1" >&2; }
+it_step() { echo -e "${IT_DIM}  → $1${IT_RESET}"; }
+
+# --- Secrets hygiene --------------------------------------------------------
+# The box holds real RPC creds, a proxy token, and onion addresses. Redact anything that
+# looks secret before it reaches a log file or the terminal. Defence-in-depth: we also avoid
+# printing these values in the first place. Patterns cover .env KEY=VALUE lines and .onion
+# hostnames. Keep this conservative — over-redaction is safe, leaks are not.
+redact() {
+    sed -E \
+        -e 's/(PROXY_AUTH_TOKEN|MONERO_NODE_PASSWORD|MONERO_NODE_USERNAME|.*_PASSWORD|.*_TOKEN|.*_SECRET)=.*/\1=<redacted>/' \
+        -e 's/[a-z2-7]{56}\.onion/<redacted>.onion/g'
+}
+
+# --- Assertions -------------------------------------------------------------
+# Counters are global so the runner can total them across scenarios.
+IT_PASS=0
+IT_FAIL=0
+IT_FAILED_NAMES=""
+
+it_pass() { IT_PASS=$((IT_PASS + 1)); printf '    %b✓%b %s\n' "$IT_GREEN" "$IT_RESET" "$1"; }
+it_fail() {
+    IT_FAIL=$((IT_FAIL + 1))
+    IT_FAILED_NAMES="${IT_FAILED_NAMES}\n    - ${IT_CURRENT_SCENARIO:-?}: $1"
+    printf '    %b✗%b %s\n        %s\n' "$IT_RED" "$IT_RESET" "$1" "${2:-}"
+}
+
+assert_eq()       { if [ "$2" = "$3" ]; then it_pass "$1"; else it_fail "$1" "expected [$3], got [$2]"; fi; }
+assert_ne()       { if [ "$2" != "$3" ]; then it_pass "$1"; else it_fail "$1" "expected not [$3]"; fi; }
+assert_rc()       { if [ "$2" = "$3" ]; then it_pass "$1"; else it_fail "$1" "expected rc $3, got $2"; fi; }
+assert_contains() { case "$2" in *"$3"*) it_pass "$1" ;; *) it_fail "$1" "[$2] missing [$3]" ;; esac; }
+# Numeric "greater than / >=" with a graceful non-number guard.
+assert_num_ge()   {
+    if [ -n "$2" ] && [ "$2" -ge "$3" ] 2>/dev/null; then it_pass "$1"; else it_fail "$1" "expected >= $3, got [$2]"; fi
+}
+assert_num_gt()   {
+    if [ -n "$2" ] && [ "$2" -gt "$3" ] 2>/dev/null; then it_pass "$1"; else it_fail "$1" "expected > $3, got [$2]"; fi
+}
+
+# --- Config rendering (pure) ------------------------------------------------
+# Map a space-separated list of `dotted.path=value` overrides into a jq program that applies
+# them to a config.json. Values are typed: true/false -> boolean, integers -> number,
+# everything else -> string. Pure and deterministic so selftest.sh can verify it.
+overrides_to_jq() {
+    local program="." pair path value jsonval
+    for pair in "$@"; do
+        [ -z "$pair" ] && continue
+        path="${pair%%=*}"
+        value="${pair#*=}"
+        case "$value" in
+            true|false)               jsonval="$value" ;;
+            ''|*[!0-9-]*)             jsonval="\"$value\"" ;;   # has a non-digit -> string
+            *)                        jsonval="$value" ;;       # all digits (+ optional leading -) -> number
+        esac
+        program="${program} | .${path}=${jsonval}"
+    done
+    printf '%s' "$program"
+}
+
+# Render a scenario's config.json to stdout: start from the box's baseline config (real
+# wallets / data dirs / host preserved) and apply the scenario overrides. Requires jq.
+render_scenario_config() {
+    local baseline_json="$1"; shift
+    local program; program="$(overrides_to_jq "$@")"
+    printf '%s' "$baseline_json" | jq "$program"
+}
+
+# Decide whether a scenario can run on this box, augmenting its overrides where needed (an alt
+# data dir for the prune axis, a remote endpoint for remote mode). On success sets RESOLVED to
+# the final override string and returns 0; on a missing prerequisite sets SKIP_REASON and
+# returns 1 — no silent drops, and never a prune flip on the canonical synced DB (which would
+# invalidate it). Reads the globals BASELINE_PRUNE / PRUNED_DATA_DIR / FULL_DATA_DIR /
+# REMOTE_MONERO_HOST (all optional). Pure given those globals, so the self-test exercises it.
+RESOLVED=""
+SKIP_REASON=""
+# shellcheck disable=SC2034  # RESOLVED/SKIP_REASON are output globals consumed by run.sh & selftest.sh
+resolve_overrides() {
+    local overrides="$1" prune mode out="$1"
+    RESOLVED=""; SKIP_REASON=""
+
+    prune="$(printf '%s' "$overrides" | tr ' ' '\n' | sed -n 's/^monero\.prune=//p')"
+    mode="$(printf '%s' "$overrides"  | tr ' ' '\n' | sed -n 's/^monero\.mode=//p')"
+
+    # Prune axis: only flip away from the baseline DB if a matching synced dir is provided —
+    # flipping prune on the canonical dir would invalidate it (a DEST change).
+    if [ "$prune" = "true" ] && [ "${BASELINE_PRUNE:-}" = "0" ]; then
+        [ -n "${PRUNED_DATA_DIR:-}" ] || { SKIP_REASON="needs --pruned-data-dir (box baseline is full)"; return 1; }
+        out="$out monero.data_dir=$PRUNED_DATA_DIR"
+    fi
+    if [ "$prune" = "false" ] && [ "${BASELINE_PRUNE:-}" = "1" ]; then
+        [ -n "${FULL_DATA_DIR:-}" ] || { SKIP_REASON="needs --full-data-dir (box baseline is pruned)"; return 1; }
+        out="$out monero.data_dir=$FULL_DATA_DIR"
+    fi
+
+    # Remote mode needs an external endpoint to point at.
+    if [ "$mode" = "remote" ]; then
+        [ -n "${REMOTE_MONERO_HOST:-}" ] || { SKIP_REASON="needs --remote-monero-host"; return 1; }
+        out="$out monero.remote.host=$REMOTE_MONERO_HOST"
+    fi
+
+    RESOLVED="$out"
+    return 0
+}
+
+# --- Expectation derivation (pure) ------------------------------------------
+# Given a rendered config.json, list the services we expect to be running. The bundled
+# monerod only runs in local mode (the local_node compose profile); in remote mode it must
+# be ABSENT. Everything else is always expected. Mirrors stack_status()'s profile gating.
+EXPECTED_ALWAYS="caddy dashboard docker-control docker-proxy p2pool tari tor xmrig-proxy"
+
+expected_services() {
+    local config_json="$1" mode
+    mode="$(printf '%s' "$config_json" | jq -r '.monero.mode // "local"')"
+    if [ "$mode" = "local" ]; then
+        printf '%s\n' "monerod $EXPECTED_ALWAYS" | tr ' ' '\n' | sort
+    else
+        printf '%s\n' "$EXPECTED_ALWAYS" | tr ' ' '\n' | sort
+    fi
+}
+
+# Services that must NOT exist for this config (remote mode -> no local monerod).
+absent_services() {
+    local config_json="$1" mode
+    mode="$(printf '%s' "$config_json" | jq -r '.monero.mode // "local"')"
+    [ "$mode" = "remote" ] && printf 'monerod\n'
+}
+
+# Human-readable pool label as the dashboard reports it, from the config pool key.
+pool_label() {
+    case "$1" in
+        main) printf 'Main' ;;
+        mini) printf 'Mini' ;;
+        nano) printf 'Nano' ;;
+        *)    printf '%s' "$1" ;;
+    esac
+}
+
+# --- Target I/O (SSH or local) ----------------------------------------------
+# Globals set by the runner: IT_MODE (ssh|local), IT_SSH_DEST, IT_SSH_OPTS (array),
+# IT_REMOTE_DIR, IT_PITHEAD (the pithead invocation, e.g. "./pithead" or "sudo ./pithead").
+
+# Run a shell snippet on the target, in the stack directory. The snippet is our own trusted
+# code; we never interpolate untrusted data into it. Returns the remote command's exit code.
+rx() {
+    local snippet="$1"
+    if [ "$IT_MODE" = "local" ]; then
+        ( cd "$IT_REMOTE_DIR" && bash -c "$snippet" )
+    else
+        local remote
+        remote="cd $(quote_arg "$IT_REMOTE_DIR") && { $snippet; }"
+        ssh "${IT_SSH_OPTS[@]}" "$IT_SSH_DEST" "$remote"
+    fi
+}
+
+# Quote a single argument for safe expansion inside the remote shell string.
+quote_arg() { printf '%q' "$1"; }
+
+# Run pithead with a subcommand on the target, e.g. `pithead status` or `pithead apply -y`.
+pithead() { rx "$IT_PITHEAD $*"; }
+
+# Fetch the dashboard state JSON from the box (dashboard binds 127.0.0.1:8000 on the host
+# network). Empty output on failure so callers can detect unreachable.
+api_state() { rx "curl -fsS --max-time 10 http://127.0.0.1:8000/api/state" 2>/dev/null; }
+
+# Split a "<state> <health>" string (from service_state) into its two fields. Pure helpers so
+# the self-test can verify the fault-injection predicates classify correctly.
+svc_state_of()  { printf '%s' "${1%% *}"; }
+svc_health_of() { printf '%s' "${1##* }"; }
+
+# Pull a jq path out of a JSON blob, printing nothing for an absent/null value. The `?`
+# swallows "cannot index null" on a missing parent, and `values` drops nulls — but NOT
+# boolean false (so `.monero.prune == false` reads as "false", not ""; `// empty` would
+# wrongly swallow it because false is falsy in jq).
+jq_get() { printf '%s' "$1" | jq -r "($2)? | values" 2>/dev/null; }
+
+# Authoritative "is Monero caught up?" — query monerod's own get_info on the box (creds stay
+# on the box) and trust its `synchronized` flag / target_height 0, exactly like the sync gate.
+# We do NOT use the dashboard's `.sync.monero.state`: a synced LOCAL node has no target height,
+# so that field reads "loading", not "done" (a real-hardware gotcha). Returns 0 when synced.
+monero_caught_up() {
+    rx 'u=$(grep -E "^MONERO_NODE_USERNAME=" .env 2>/dev/null | cut -d= -f2-);
+        p=$(grep -E "^MONERO_NODE_PASSWORD=" .env 2>/dev/null | cut -d= -f2-);
+        url=$(grep -E "^MONERO_RPC_URL=" .env 2>/dev/null | cut -d= -f2-); [ -n "$url" ] || url="http://127.0.0.1:18081";
+        if [ -n "$u" ]; then body=$(curl -fsS --max-time 8 --digest -u "$u:$p" "$url/get_info" 2>/dev/null);
+        else body=$(curl -fsS --max-time 8 "$url/get_info" 2>/dev/null); fi;
+        printf "%s" "$body" | jq -e "(.status==\"OK\") and ((.synchronized==true) or (.target_height==0))" >/dev/null 2>&1'
+}
+
+# --- Readiness waiters ------------------------------------------------------
+# Poll a predicate until it succeeds or the timeout elapses. The interval is a *poll* cadence
+# against a real readiness signal — not a fixed "sleep and hope" (issue #54). Returns 0 on
+# success, 1 on timeout.
+now_s() { date +%s; }
+
+wait_for() {  # wait_for <timeout_s> <interval_s> <desc> <predicate-cmd...>
+    local timeout="$1" interval="$2" desc="$3"; shift 3
+    local deadline=$(( $(now_s) + timeout ))
+    it_step "waiting for ${desc} (timeout ${timeout}s)…"
+    while :; do
+        if "$@"; then return 0; fi
+        if [ "$(now_s)" -ge "$deadline" ]; then
+            it_warn "timed out after ${timeout}s waiting for ${desc}"
+            return 1
+        fi
+        sleep "$interval"
+    done
+}
+
+# Predicate: pithead status exits 0 (all expected services healthy / intentional-stops aside).
+_pred_status_ok() { pithead status >/dev/null 2>&1; }
+
+# Predicate: monerod itself reports caught up (authoritative; see monero_caught_up).
+_pred_monero_synced() { monero_caught_up; }
+
+# Predicate: the sync gate has released the miner — at least one worker is online on the proxy.
+# (proxy_workers is the reliable signal; stratum.conns can read 0 on a healthy, mining box.)
+_pred_miner_running() {
+    local st; st="$(api_state)"; [ -n "$st" ] || return 1
+    local w; w="$(jq_get "$st" '.proxy_workers')"
+    [ -n "$w" ] && [ "$w" -ge 1 ] 2>/dev/null
+}
+
+wait_status_ok()     { wait_for "${1:-180}" 5 "pithead status OK"     _pred_status_ok; }
+wait_monero_synced() { wait_for "${1:-300}" 10 "Monero sync complete" _pred_monero_synced; }
+wait_miner_running() { wait_for "${1:-180}" 5 "miner released"        _pred_miner_running; }
+
+# --- Artifact capture -------------------------------------------------------
+# On a scenario failure, collect everything needed to debug it — redacted. Writes into
+# <outdir>/<scenario>/. Best-effort: never let capture failures mask the test result.
+capture_artifacts() {
+    local scenario="$1" outdir="$2"
+    local dir="${outdir}/${scenario}"
+    mkdir -p "$dir"
+    it_step "capturing artifacts to ${dir}"
+    rx "docker compose ps"                 2>&1 | redact > "${dir}/compose-ps.txt"      || true
+    rx "$IT_PITHEAD status"                2>&1 | redact > "${dir}/status.txt"          || true
+    rx "$IT_PITHEAD doctor"                2>&1 | redact > "${dir}/doctor.txt"          || true
+    rx "cat config.json"                   2>&1 | redact > "${dir}/config.json"         || true
+    rx "cat .env"                          2>&1 | redact > "${dir}/env.redacted.txt"    || true
+    api_state                                   | redact > "${dir}/api-state.json"      || true
+    # Last 200 lines of each service's logs, redacted.
+    rx "docker compose logs --tail=200 --no-color" 2>&1 | redact > "${dir}/logs.txt"   || true
+}
diff --git a/tests/integration/mini-stack/docker-compose.fake.yml b/tests/integration/mini-stack/docker-compose.fake.yml
new file mode 100644
index 0000000..2e66fc1
--- /dev/null
+++ b/tests/integration/mini-stack/docker-compose.fake.yml
@@ -0,0 +1,110 @@
+# Integration mini-stack (issue #54, tier 3).
+#
+# Runs the REAL dashboard + the REAL docker-control/-proxy socket proxies against CONTROLLABLE
+# fake monerod/Tari, with lightweight p2pool/xmrig-proxy containers the dashboard can actually
+# stop/start. This reproduces the runtime control plane end-to-end — sync-hold/release (#35) and
+# node-down → reject → readmit (#31) — deterministically, in CI, with no real chain or test box.
+#
+# Driven by run-mini-stack.sh. The dashboard and the fakes share one image (the dashboard's,
+# which already has mining_dashboard + grpc installed, so fake_tari can use the vendored stubs).
+name: pithead-itest
+
+x-fake-image: &fake_image pithead-dashboard:itest
+
+networks:
+  itestnet:
+    driver: bridge
+
+volumes:
+  dashboard_data:
+  dashboard_stats:
+
+services:
+  # The real dashboard, pointed at the fakes and the socket proxies. Fast loop + short debounce
+  # so scenarios converge in seconds. Binds 127.0.0.1:8000 inside the container; the runner
+  # reads /api/state via `compose exec`, so no published port is needed.
+  dashboard:
+    build: ../../../build/dashboard
+    image: *fake_image
+    container_name: itest-dashboard
+    networks: [itestnet]
+    volumes:
+      - dashboard_data:/data
+      - dashboard_stats:/app/stats:ro
+    environment:
+      HOST_IP: "127.0.0.1"
+      TZ: "Etc/UTC"
+      MONERO_RPC_URL: "http://fake-monerod:18081"
+      MONERO_NODE_USERNAME: ""
+      MONERO_NODE_PASSWORD: ""
+      MONERO_NODE_HOST: "fake-monerod"
+      MONERO_PRUNE: "true"
+      TARI_GRPC_ADDRESS: "fake-tari:18142"
+      DOCKER_PROXY_URL: "tcp://docker-proxy:2375"
+      DOCKER_CONTROL_URL: "tcp://docker-control:2375"
+      # Namespaced container names so the mini-stack never collides with — or controls — a real
+      # deployment's p2pool/xmrig-proxy on the same host.
+      SYNC_GATE_CONTAINERS: "itest-p2pool,itest-xmrig-proxy"
+      REJECT_WORKERS_CONTAINER: "itest-xmrig-proxy"
+      TARI_REQUIRED: "true"
+      XVB_ENABLED: "false"
+      XVB_POOL_URL: ""
+      XVB_DONOR_ID: ""
+      P2POOL_URL: "itest-p2pool:3333"
+      MONERO_WALLET_ADDRESS: "49iTestWalletPlaceholderXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+      PROXY_HOST: "xmrig-proxy"
+      PROXY_API_PORT: "3344"
+      PROXY_AUTH_TOKEN: "itest"
+      UPDATE_INTERVAL: "2"
+      NODE_DOWN_AFTER_SEC: "4"
+      NODE_RECOVERY_AFTER_SEC: "3"
+    depends_on: [fake-monerod, fake-tari, docker-control, docker-proxy, p2pool, xmrig-proxy]
+
+  fake-monerod:
+    image: *fake_image
+    container_name: itest-fake-monerod
+    networks: [itestnet]
+    entrypoint: []
+    # Boot mid-sync so the dashboard holds the miner; the runner flips it to synced/down.
+    command: ["python3", "/fakes/fake_monerod.py", "--port", "18081", "--mode", "syncing"]
+    ports: ["28081:18081"]   # 28081 on the host (avoids a real monerod's 18081), → 18081 inside
+    volumes: ["../fakes:/fakes:ro"]
+
+  fake-tari:
+    image: *fake_image
+    container_name: itest-fake-tari
+    networks: [itestnet]
+    entrypoint: []
+    command: ["python3", "/fakes/fake_tari.py", "--grpc-port", "18142", "--control-port", "18152", "--mode", "syncing"]
+    ports: ["28152:18152"]   # HTTP control side-channel on host 28152
+    volumes: ["../fakes:/fakes:ro"]
+
+  # Stand-ins for the miner containers: real, named containers the dashboard genuinely
+  # stops/starts via docker-control. They just idle.
+  p2pool:
+    image: busybox:1.36
+    container_name: itest-p2pool
+    networks: [itestnet]
+    command: ["sh", "-c", "while true; do sleep 30; done"]
+
+  xmrig-proxy:
+    image: busybox:1.36
+    container_name: itest-xmrig-proxy
+    networks: [itestnet]
+    command: ["sh", "-c", "while true; do sleep 30; done"]
+
+  # Read-only socket proxy (stats/logs) — mirrors the production docker-proxy.
+  docker-proxy:
+    image: tecnativa/docker-socket-proxy:v0.4.2
+    container_name: itest-docker-proxy
+    networks: [itestnet]
+    environment: ["CONTAINERS=1", "LOGS=1"]
+    volumes: ["/var/run/docker.sock:/var/run/docker.sock:ro"]
+
+  # Write proxy scoped to start/stop only — mirrors the production docker-control.
+  docker-control:
+    image: tecnativa/docker-socket-proxy:v0.4.2
+    container_name: itest-docker-control
+    networks: [itestnet]
+    environment: ["POST=1", "ALLOW_START=1", "ALLOW_STOP=1"]
+    volumes: ["/var/run/docker.sock:/var/run/docker.sock:ro"]
diff --git a/tests/integration/mini-stack/run-mini-stack.sh b/tests/integration/mini-stack/run-mini-stack.sh
new file mode 100755
index 0000000..42a00c6
--- /dev/null
+++ b/tests/integration/mini-stack/run-mini-stack.sh
@@ -0,0 +1,146 @@
+#!/usr/bin/env bash
+#
+# Drive the integration mini-stack (issue #54, tier 3) through the control-plane state machine
+# and assert the REAL dashboard holds/releases and rejects/readmits the REAL miner containers,
+# driven by the controllable fakes. Needs docker (compose v2). Runs in CI; also `make
+# test-mini-stack`.
+#
+# Scenarios:
+#   1. boot syncing            → dashboard HOLDS itest-p2pool + itest-xmrig-proxy (#35)
+#   2. both chains synced      → dashboard RELEASES them
+#   3. monerod down            → dashboard REJECTS workers (stops itest-xmrig-proxy) (#31)
+#   4. monerod back            → dashboard READMITS workers
+#
+set -uo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPOSE_FILE="$HERE/docker-compose.fake.yml"
+PASS=0
+FAIL=0
+
+c_ok()  { PASS=$((PASS + 1)); printf '  \033[1;32m✓\033[0m %s\n' "$1"; }
+c_bad() { FAIL=$((FAIL + 1)); printf '  \033[1;31m✗\033[0m %s\n      %s\n' "$1" "${2:-}"; }
+log()   { printf '\033[1;36m[mini-stack]\033[0m %s\n' "$1"; }
+
+if ! docker compose version >/dev/null 2>&1; then
+    echo "SKIP: docker compose not available"
+    exit 0
+fi
+
+compose() { docker compose -f "$COMPOSE_FILE" "$@"; }
+cstate()  { docker inspect -f '{{.State.Status}}' "$1" 2>/dev/null || echo "missing"; }
+ctl()     { curl -fsS --max-time 5 "$1" -d "$2" >/dev/null; }   # POST JSON to a fake /control
+
+# Poll a container until it reaches an expected state, or time out.
+wait_state() {  # wait_state <container> <expected-state> [timeout_s]
+    local c="$1" want="$2" timeout="${3:-60}" end
+    end=$(( $(date +%s) + timeout ))
+    while :; do
+        [ "$(cstate "$c")" = "$want" ] && return 0
+        [ "$(date +%s)" -ge "$end" ] && return 1
+        sleep 1
+    done
+}
+
+assert_state() {  # assert_state <label> <container> <expected> [timeout]
+    if wait_state "$2" "$3" "${4:-60}"; then
+        c_ok "$1 ($2 → $3)"
+    else
+        c_bad "$1" "$2 is '$(cstate "$2")', expected '$3'"
+    fi
+}
+
+# Assert a container STAYS in a state for a window — proves the gate does NOT release/act
+# prematurely (e.g. holds while only one required chain is synced). At UPDATE_INTERVAL=2 a
+# few seconds spans multiple control-loop cycles.
+assert_stays() {  # assert_stays <label> <container> <state> <seconds>
+    sleep "$4"
+    if [ "$(cstate "$2")" = "$3" ]; then
+        c_ok "$1 ($2 stays $3 for ${4}s)"
+    else
+        c_bad "$1" "$2 became '$(cstate "$2")', expected to stay '$3'"
+    fi
+}
+
+# POST a new mode to a fake's /control endpoint with a clear failure label. Host ports are
+# 28081/28152 (namespaced away from a real monerod/dashboard on the same host).
+set_monerod() { ctl "http://127.0.0.1:28081/control" "{\"mode\":\"$1\"}" || c_bad "set monerod $1" "control POST failed"; }
+set_tari()    { ctl "http://127.0.0.1:28152/control" "{\"mode\":\"$1\"}" || c_bad "set tari $1" "control POST failed"; }
+
+teardown() {
+    log "tearing down"
+    compose down -v --remove-orphans >/dev/null 2>&1 || true
+}
+trap teardown EXIT
+
+log "building images"
+if ! compose build >/dev/null 2>&1; then
+    c_bad "build" "docker compose build failed"
+    exit 1
+fi
+
+log "starting the mini-stack (fakes boot mid-sync)"
+compose up -d >/dev/null 2>&1
+
+# Wait for the dashboard's API to answer (it binds 127.0.0.1:8000 inside the container).
+log "waiting for the dashboard API"
+api_up=0
+for _ in $(seq 1 30); do
+    if compose exec -T dashboard python3 -c \
+        "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/api/state', timeout=3)" >/dev/null 2>&1; then
+        api_up=1; break
+    fi
+    sleep 2
+done
+[ "$api_up" = 1 ] && c_ok "dashboard API is up" || c_bad "dashboard API is up" "no /api/state after ~60s"
+
+# 1. Booting mid-sync → the gate holds both miner containers (stops them). (#35)
+log "scenario 1: holds the miner while both chains sync"
+assert_state "held: itest-p2pool stopped"      itest-p2pool      exited  90
+assert_state "held: itest-xmrig-proxy stopped" itest-xmrig-proxy exited  90
+
+# 2. Monerod synced but Tari still syncing, Tari REQUIRED → STILL held (the gate needs both).
+log "scenario 2: keeps holding while Tari (required) is still syncing"
+set_monerod synced
+assert_stays "still held on monerod-only" itest-p2pool exited 8
+
+# 3. Tari synced too → release both. (#35)
+log "scenario 3: releases the miner once both chains are synced"
+set_tari synced
+assert_state "released: itest-p2pool running"      itest-p2pool      running 90
+assert_state "released: itest-xmrig-proxy running" itest-xmrig-proxy running 90
+
+# 4. Tari down while required → reject workers (stop the proxy); itest-p2pool keeps running. (#31)
+#    NOTE: monerod-down failover is deliberately NOT simulated here — the dashboard's monerod
+#    down-path falls back to log-scraping a real `monerod` container, which this fake stack has
+#    no equivalent of. That path is covered on real hardware by the tier-4 --fault-injection
+#    phase. Tari has no such fallback, so its reject/readmit exercises the failover cleanly.
+log "scenario 4: rejects workers when required Tari is down"
+set_tari down
+assert_state "rejected on Tari outage: itest-xmrig-proxy stopped" itest-xmrig-proxy exited 90
+if [ "$(cstate itest-p2pool)" = "running" ]; then
+    c_ok "rejection leaves itest-p2pool running (only the proxy fails over)"
+else
+    c_bad "rejection leaves itest-p2pool running" "itest-p2pool is '$(cstate itest-p2pool)'"
+fi
+
+# 5. Tari recovers → readmit (after the recovery-hysteresis window).
+log "scenario 5: readmits workers when Tari recovers"
+set_tari synced
+assert_state "readmitted after Tari recovery: itest-xmrig-proxy running" itest-xmrig-proxy running 90
+
+# 6. Dashboard restart after release → the one-way latch is persisted, so the miner is NOT
+#    re-held: both containers stay running across the restart. (#35 persistence)
+log "scenario 6: a dashboard restart does not re-hold a released miner"
+compose restart dashboard >/dev/null 2>&1
+for _ in $(seq 1 30); do
+    compose exec -T dashboard python3 -c \
+        "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/api/state', timeout=3)" >/dev/null 2>&1 && break
+    sleep 2
+done
+assert_stays "itest-p2pool stays up across restart"      itest-p2pool      running 6
+assert_stays "itest-xmrig-proxy stays up across restart" itest-xmrig-proxy running 6
+
+echo ""
+log "mini-stack: $PASS passed, $FAIL failed"
+[ "$FAIL" -eq 0 ]
diff --git a/tests/integration/run.sh b/tests/integration/run.sh
new file mode 100755
index 0000000..99e0317
--- /dev/null
+++ b/tests/integration/run.sh
@@ -0,0 +1,772 @@
+#!/usr/bin/env bash
+#
+# Pithead end-to-end integration test runner (issue #54).
+#
+# Drives a REAL, already-provisioned Pithead server through the config matrix and asserts the
+# stack behaves — containers healthy, nodes synced, miners mining, the dashboard reading the
+# right live state, status exit codes correct, and secrets preserved across re-applies.
+#
+# The box is assumed already deployed and synced with miners connected; the harness moves
+# between scenarios with non-interactive `pithead apply -y` (recreates only changed
+# containers, reuses the synced chain data dirs — never re-syncs, never re-provisions Tor).
+# It saves the box's original config.json up front and restores it at the end.
+#
+#   ./run.sh --host user@1.2.3.4 [--dir ~/pithead] [options]
+#   ./run.sh --local             [--dir /path/to/stack] [options]
+#
+# Read-only against the canonical chain data dirs; safe to run against the live box. See
+# docs/integration-testing.md for provisioning, the safety model, and CI/release wiring.
+#
+set -uo pipefail   # NOT -e: we deliberately continue-on-error to collect the whole matrix.
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=tests/integration/lib.sh
+source "$HERE/lib.sh"
+# shellcheck source=tests/integration/scenarios.sh
+source "$HERE/scenarios.sh"
+
+# --- Defaults / globals -----------------------------------------------------
+IT_MODE="ssh"
+IT_SSH_DEST=""
+IT_SSH_OPTS=(-o BatchMode=yes -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new)
+IT_REMOTE_DIR="pithead"
+IT_PITHEAD="./pithead"
+IT_CURRENT_SCENARIO=""
+ONLY_SCENARIO=""
+CHECK_ONLY=0
+READINESS=0
+RUN_LIFECYCLE=0
+RUN_FAULTS=0
+SAFETY_BACKUP=0
+SAFETY_ARCHIVE=""
+KEEP_STATE=0
+EXPECTED_WORKERS=2
+REMOTE_MONERO_HOST=""
+PRUNED_DATA_DIR=""
+FULL_DATA_DIR=""
+OUT_DIR="$HERE/results"
+BASELINE_CONFIG=""
+BASELINE_PRUNE=""
+BASELINE_SECRET_FP=""
+
+usage() {
+    cat <<'EOF'
+Pithead integration test runner
+
+USAGE:
+  run.sh --host <user@host> [options]     drive the box over SSH
+  run.sh --local            [options]     drive a stack on this machine
+
+CONNECTION:
+  --host <user@host>     SSH destination of the test server
+  --identity <keyfile>   SSH private key (adds -i <keyfile>)
+  --ssh-opt <opt>        extra ssh -o option (repeatable), e.g. --ssh-opt Port=2222
+  --local                run against a stack on this machine instead of over SSH
+  --dir <path>           the Pithead stack directory ON THE BOX, relative to the SSH login
+                         dir or absolute (default: pithead). Avoid a literal ~ — your local
+                         shell would expand it before the box sees it.
+  --pithead <cmd>        how to invoke pithead on the box (default: ./pithead;
+                         use "sudo ./pithead" if docker needs root there)
+
+MATRIX:
+  --check                NON-DESTRUCTIVE: assert the box's current live state only — no config
+                         changes, no apply, no restore. The safe first run / health check.
+  --readiness            NON-DESTRUCTIVE: assess whether the box is fit to be a release/
+                         validation server (synced chains reusable, snapshot-capable FS, disk
+                         headroom, secrets not world-readable, dashboard localhost-only).
+  --scenario <name>      run only one scenario (see --list)
+  --workers <n>          miners expected online while mining (default: 2)
+  --remote-monero-host <h>  external node endpoint for the remote-mode scenario
+                            (e.g. the box's own synced node on its LAN IP)
+  --pruned-data-dir <d>  synced PRUNED monero data dir (enables the pruned case when the
+                         box's baseline is full)
+  --full-data-dir <d>    synced FULL monero data dir (enables the full case when the box's
+                         baseline is pruned)
+  --lifecycle            also run the lifecycle phase (restart, apply secret-preservation)
+  --safety-backup        take a `pithead backup` BEFORE the destructive scenarios; if anything
+                         fails, automatically roll the box back to it (down → restore → up).
+                         The archive is removed on success. Recommended for the destructive
+                         matrix on a precious box. Also exercises backup/restore end-to-end.
+  --fault-injection      also run the fault-injection phase: deliberately break monerod
+                         (stop / SIGSTOP / remove) and assert pithead's status verdicts
+                         (down / unhealthy / missing) and the failover→recovery cycle.
+                         DESTRUCTIVE-then-restored; local mode only. Slow (healthcheck +
+                         node-health debounce).
+  --keep                 do NOT restore the original config.json at the end (leaves the box
+                         on the last scenario — useful for debugging)
+
+OUTPUT:
+  --out <dir>            where to write artifacts (default: tests/integration/results)
+  --list                 print the scenario matrix and axis coverage, then exit
+  -h, --help             this help
+
+Scenarios whose prerequisites are missing (a full/pruned alt data dir, or a remote endpoint)
+are reported SKIPPED — never silently dropped, never mutating the canonical synced chain.
+EOF
+}
+
+# --- Arg parsing ------------------------------------------------------------
+# shellcheck disable=SC2034  # the data-dir / remote-host globals are consumed by lib.sh:resolve_overrides
+parse_args() {
+    while [ $# -gt 0 ]; do
+        case "$1" in
+            --host)       IT_SSH_DEST="$2"; IT_MODE="ssh"; shift 2 ;;
+            --identity)   IT_SSH_OPTS+=(-i "$2"); shift 2 ;;
+            --ssh-opt)    IT_SSH_OPTS+=(-o "$2"); shift 2 ;;
+            --local)      IT_MODE="local"; shift ;;
+            --dir)        IT_REMOTE_DIR="$2"; shift 2 ;;
+            --pithead)    IT_PITHEAD="$2"; shift 2 ;;
+            --check)      CHECK_ONLY=1; shift ;;
+            --readiness)  READINESS=1; shift ;;
+            --scenario)   ONLY_SCENARIO="$2"; shift 2 ;;
+            --workers)    EXPECTED_WORKERS="$2"; shift 2 ;;
+            --remote-monero-host) REMOTE_MONERO_HOST="$2"; shift 2 ;;
+            --pruned-data-dir)    PRUNED_DATA_DIR="$2"; shift 2 ;;
+            --full-data-dir)      FULL_DATA_DIR="$2"; shift 2 ;;
+            --lifecycle)  RUN_LIFECYCLE=1; shift ;;
+            --fault-injection) RUN_FAULTS=1; shift ;;
+            --safety-backup)   SAFETY_BACKUP=1; shift ;;
+            --keep)       KEEP_STATE=1; shift ;;
+            --out)        OUT_DIR="$2"; shift 2 ;;
+            --list)       print_list; exit 0 ;;
+            -h|--help)    usage; exit 0 ;;
+            *)            it_err "Unknown option: $1 (try --help)"; exit 2 ;;
+        esac
+    done
+
+    if [ "$IT_MODE" = "ssh" ] && [ -z "$IT_SSH_DEST" ]; then
+        it_err "Provide --host <user@host> or --local. See --help."
+        exit 2
+    fi
+}
+
+print_list() {
+    echo "Scenarios:"
+    local name rest
+    while IFS=$'\t' read -r name rest; do
+        printf '  %-32s %s\n' "$name" "$rest"
+    done < <(scenario_matrix)
+    echo ""
+    echo "Axis coverage (every value below must appear at least once):"
+    axis_coverage | sed 's/^/  /'
+}
+
+# --- Target I/O helpers (depend on globals set above) -----------------------
+# Write a config.json onto the box from stdin-less arg.
+push_config() {
+    local json="$1"
+    if [ "$IT_MODE" = "local" ]; then
+        printf '%s\n' "$json" > "$IT_REMOTE_DIR/config.json"
+    else
+        printf '%s\n' "$json" | ssh "${IT_SSH_OPTS[@]}" "$IT_SSH_DEST" \
+            "cd $(quote_arg "$IT_REMOTE_DIR") && cat > config.json"
+    fi
+}
+
+# Read a single (non-secret) .env value off the box.
+env_on_box() { rx "grep -E '^$1=' .env 2>/dev/null | head -n1 | cut -d= -f2-"; }
+
+# Services currently running, one per line, sorted. Honours active compose profiles, so
+# monerod is absent in remote mode.
+running_services() { rx "docker compose ps --services --status running 2>/dev/null | sort"; }
+
+# Print "<state> <health>" for one service, exactly as stack_status reads it: state is the
+# container State.Status (running/exited/paused/restarting/…) and health is the healthcheck
+# verdict (healthy/unhealthy/starting/none), or "missing none" when absent. The fault-injection
+# predicates assert pithead's status verdicts against this.
+service_state() {
+    rx 'cid=$(docker compose ps -aq '"$1"' 2>/dev/null | head -n1); if [ -z "$cid" ]; then echo "missing none"; else docker inspect --format "{{.State.Status}} {{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}" "$cid" 2>/dev/null || echo "unknown none"; fi'
+}
+
+# A stable fingerprint of the secrets we must preserve across applies (proxy token + onion
+# addresses). Hashed ON THE BOX so the plaintext never crosses the wire or hits a log.
+secret_fingerprint() {
+    rx "grep -E '^(PROXY_AUTH_TOKEN|[A-Z]+_ONION_ADDRESS)=' .env 2>/dev/null | sort | sha256sum | cut -d' ' -f1"
+}
+
+# --- Preflight --------------------------------------------------------------
+preflight() {
+    it_log "Connecting to target ($IT_MODE${IT_SSH_DEST:+ $IT_SSH_DEST}) at $IT_REMOTE_DIR …"
+    if ! rx "true" >/dev/null 2>&1; then
+        it_err "Cannot reach the target. Check --host/--local, --dir, and SSH access."
+        exit 1
+    fi
+
+    # The stack dir must contain a deployed pithead.
+    if ! rx "test -x $IT_PITHEAD" >/dev/null 2>&1; then
+        it_err "pithead not found/executable at $IT_REMOTE_DIR/$IT_PITHEAD (set --dir/--pithead)."
+        exit 1
+    fi
+    if ! rx "grep -q '^DEPLOYMENT_COMPLETED=true' .env" >/dev/null 2>&1; then
+        it_err "Box is not fully deployed (.env missing DEPLOYMENT_COMPLETED). Run 'pithead setup' there first."
+        exit 1
+    fi
+
+    # Tools the harness leans on, on the box.
+    local tool
+    for tool in jq curl docker sha256sum; do
+        if ! rx "command -v $tool" >/dev/null 2>&1; then
+            it_err "Required tool '$tool' missing on the box."
+            exit 1
+        fi
+    done
+
+    mkdir -p "$OUT_DIR"
+    record_manifest
+
+    # Snapshot the baseline so we can restore it and compare secrets later.
+    BASELINE_CONFIG="$(rx 'cat config.json')"
+    BASELINE_PRUNE="$(env_on_box MONERO_PRUNE)"   # 1 = pruned, 0 = full
+    BASELINE_SECRET_FP="$(secret_fingerprint)"
+    if [ -z "$BASELINE_CONFIG" ]; then
+        it_err "Could not read baseline config.json from the box."
+        exit 1
+    fi
+    it_log "Baseline captured (prune=$BASELINE_PRUNE). Original config will be restored at the end."
+}
+
+# Record exactly what's under test, so a run is reproducible (#54 manifest).
+record_manifest() {
+    local f="$OUT_DIR/manifest.txt"
+    {
+        echo "# Pithead integration run manifest"
+        echo "stack_version: $(rx 'cat VERSION 2>/dev/null' | tr -d '\n')"
+        echo "git_rev:       $(rx 'git rev-parse --short HEAD 2>/dev/null' | tr -d '\n')"
+        echo "target_mode:   $IT_MODE"
+        echo "remote_dir:    $IT_REMOTE_DIR"
+        echo "expected_workers: $EXPECTED_WORKERS"
+        echo ""
+        echo "# docker compose images"
+        rx "docker compose images 2>/dev/null"
+    } | redact > "$f" 2>/dev/null || true
+    it_step "wrote run manifest to $f"
+}
+
+# --- Scenario execution -----------------------------------------------------
+# resolve_overrides (the prerequisite gate that decides whether a scenario can run on this box,
+# and never mutates the canonical chain) lives in lib.sh so the self-test can exercise it. It
+# reads BASELINE_PRUNE / PRUNED_DATA_DIR / FULL_DATA_DIR / REMOTE_MONERO_HOST and sets the
+# globals RESOLVED / SKIP_REASON.
+
+run_scenario() {
+    local name="$1" overrides="$2"
+    IT_CURRENT_SCENARIO="$name"
+    echo ""
+    it_log "── scenario: ${name} ───────────────────────────────"
+
+    if ! resolve_overrides "$overrides"; then
+        it_warn "SKIPPED ${name}: ${SKIP_REASON}"
+        IT_SKIPPED=$((IT_SKIPPED + 1))
+        return 0
+    fi
+
+    # Render + push config, then apply non-interactively.
+    local config
+    # shellcheck disable=SC2086  # RESOLVED is a space-separated list of override tokens, on purpose
+    config="$(render_scenario_config "$BASELINE_CONFIG" $RESOLVED)"
+    if ! printf '%s' "$config" | jq empty 2>/dev/null; then
+        it_fail "rendered config is valid JSON" "jq rejected the rendered config"
+        return 0
+    fi
+    push_config "$config"
+
+    it_step "applying config (pithead apply -y)…"
+    if ! pithead apply -y > "$OUT_DIR/${name}.apply.log" 2>&1; then
+        it_fail "apply succeeded" "see $OUT_DIR/${name}.apply.log"
+        capture_artifacts "$name" "$OUT_DIR"
+        return 0
+    fi
+
+    # Wait for the stack to settle on real readiness signals before asserting.
+    wait_status_ok 240    || true
+    wait_monero_synced 120 || true
+    wait_miner_running 180 || true
+
+    local fails_before="$IT_FAIL"
+    assert_scenario "$name" "$config"
+    # If this scenario turned anything red, grab artifacts for it.
+    [ "$IT_FAIL" -gt "$fails_before" ] && capture_artifacts "$name" "$OUT_DIR"
+    return 0
+}
+
+# The read-only assertion battery (infrastructure-level). Asserts the live running state of
+# the stack for a given config WITHOUT changing anything — so it backs both a post-apply
+# scenario check and the non-destructive `--check` mode. Calibrated against real hardware:
+# it trusts monerod's own sync flag (the dashboard's UI state reads "loading" for a synced
+# local node) and proxy_workers for mining liveness (stratum.conns can read 0 while mining).
+assert_running_state() {
+    local name="$1" config="$2"
+    local st mode pool secure tari_req xvb rpc_lan
+    mode="$(jq_get "$config" '.monero.mode')";          mode="${mode:-local}"
+    pool="$(jq_get "$config" '.p2pool.pool')";          pool="${pool:-main}"
+    secure="$(jq_get "$config" '.dashboard.secure')"
+    tari_req="$(jq_get "$config" '.dashboard.tari_required')"
+    xvb="$(jq_get "$config" '.xvb.enabled')"
+    rpc_lan="$(jq_get "$config" '.monero.rpc_lan_access')"
+
+    # 1. Expected containers up; unexpected ones absent.
+    local running expected svc
+    running="$(running_services)"
+    expected="$(expected_services "$config")"
+    while IFS= read -r svc; do
+        [ -z "$svc" ] && continue
+        case "$running" in
+            *"$svc"*) it_pass "container up: $svc" ;;
+            *)        it_fail "container up: $svc" "not in running services" ;;
+        esac
+    done <<< "$expected"
+    if [ "$mode" = "remote" ]; then
+        case "$running" in
+            *monerod*) it_fail "monerod absent in remote mode" "monerod is running" ;;
+            *)         it_pass "monerod absent in remote mode" ;;
+        esac
+    fi
+
+    # 2. pithead status is green for a healthy config.
+    pithead status >/dev/null 2>&1; assert_rc "status exit code is 0 (healthy)" "$?" "0"
+
+    # 3. Dashboard reachable and reading live state.
+    st="$(api_state)"
+    if [ -z "$st" ]; then
+        it_fail "dashboard /api/state reachable" "empty response"
+        return 0
+    fi
+    it_pass "dashboard /api/state reachable"
+
+    # 4. Monero caught up — per monerod's own get_info, not the dashboard UI field.
+    if monero_caught_up; then it_pass "monerod reports synced (RPC)"; else it_fail "monerod reports synced (RPC)" "get_info not synchronized"; fi
+    # Pruned/full panel (#32): determinate (Pruned|Full) for a local node; remote is often Unknown.
+    local dmode; dmode="$(jq_get "$st" '.monero.mode')"
+    if [ "$mode" = "remote" ]; then
+        it_pass "monero display mode present ($dmode)"
+    else
+        case "$dmode" in Pruned|Full) it_pass "monero display mode determinate ($dmode)" ;;
+                         *)            it_fail "monero display mode determinate" "got [$dmode], want Pruned|Full" ;; esac
+    fi
+
+    # 5. Sidechain selection matches the pool axis.
+    assert_eq "pool type" "$(jq_get "$st" '.pool.type')" "$(pool_label "$pool")"
+
+    # 6. End-to-end mining: workers online + hashes accumulating (#28). proxy_workers is the
+    #    reliable liveness signal; stratum.conns is reported but informational (can be 0).
+    local workers conns hashes
+    workers="$(jq_get "$st" '.proxy_workers')"
+    conns="$(jq_get "$st" '.stratum.conns')"
+    hashes="$(jq_get "$st" '.stratum.total_hashes')"
+    assert_num_ge "workers online (>= $EXPECTED_WORKERS)" "${workers:-0}" "$EXPECTED_WORKERS"
+    assert_num_gt "stratum total hashes > 0" "${hashes:-0}" 0
+    it_step "stratum conns=${conns:-?} (informational)"
+
+    # 7. Tari sync-gate posture matches tari_required.
+    assert_eq "TARI_REQUIRED env matches config" "$(env_on_box TARI_REQUIRED)" "${tari_req:-true}"
+    if [ "$tari_req" = "true" ]; then
+        assert_eq "tari synced (required)" "$(jq_get "$st" '.sync.tari.state')" "done"
+    fi
+
+    # 8. Security/posture axes propagated to .env.
+    local want_bind; [ "$rpc_lan" = "true" ] && want_bind="0.0.0.0" || want_bind="127.0.0.1"
+    assert_eq "MONERO_RPC_BIND matches rpc_lan_access" "$(env_on_box MONERO_RPC_BIND)" "$want_bind"
+    assert_eq "DASHBOARD_SECURE matches config" "$(env_on_box DASHBOARD_SECURE)" "${secure:-true}"
+    assert_eq "XVB_ENABLED matches config" "$(env_on_box XVB_ENABLED)" "${xvb:-true}"
+
+    # 9. Caddy scheme matches dashboard.secure.
+    local scheme; [ "$secure" = "false" ] && scheme="http://" || scheme="https://"
+    assert_contains "Caddyfile uses correct scheme" "$(rx 'head -n1 Caddyfile 2>/dev/null')" "$scheme"
+
+    # 10. Secrets intact (proxy token + onions unchanged vs the baseline we captured).
+    assert_eq "secrets intact (token + onions)" "$(secret_fingerprint)" "$BASELINE_SECRET_FP"
+}
+
+# Full per-scenario battery: the read-only state assertions, plus the apply-only idempotency
+# check (a second apply with no config change is a clean no-op).
+assert_scenario() {
+    local name="$1" config="$2"
+    assert_running_state "$name" "$config"
+    local again; again="$(pithead apply -y 2>&1)"
+    assert_contains "re-apply is a no-op" "$again" "No configuration changes detected"
+}
+
+# Non-destructive --check: assert the box's CURRENT live state (its own config), no apply.
+assert_current_state() {
+    IT_CURRENT_SCENARIO="check"
+    echo ""
+    it_log "── read-only check against the live stack ──────────"
+    local fails_before="$IT_FAIL"
+    assert_running_state "check" "$BASELINE_CONFIG"
+    [ "$IT_FAIL" -gt "$fails_before" ] && capture_artifacts "check" "$OUT_DIR"
+}
+
+# --- Release-server readiness (--readiness) ---------------------------------
+# Read-only assessment of whether the box is fit to be a RELEASE / validation server: it must
+# reuse already-synced chains, vary configs cheaply, and keep its keys/secrets and dashboard
+# from leaking. Complements `pithead doctor` (stack health) — this checks the server's fitness
+# for the integration harness's job. A WARN is "works, but not ideal"; a FAIL is "fix before
+# using as a release gate".
+box_fstype()   { rx "df --output=fstype $(quote_arg "$1") 2>/dev/null | tail -n1 | tr -d ' '"; }
+box_avail_gb() { rx "df -BG --output=avail $(quote_arg "$1") 2>/dev/null | tail -n1 | tr -dc '0-9'"; }
+box_mode()     { rx "stat -c %a $(quote_arg "$1") 2>/dev/null"; }
+
+assert_release_readiness() {
+    IT_CURRENT_SCENARIO="readiness"
+    echo ""
+    it_log "── release-server readiness ────────────────────────"
+
+    # 1. The whole point of a release server: chains already synced, reused in minutes.
+    if monero_caught_up; then it_pass "Monero is synced (chain reusable by the matrix)"; else it_fail "Monero is synced" "monerod not caught up — the matrix would have to re-sync"; fi
+    pithead status >/dev/null 2>&1; assert_rc "stack is healthy (pithead status)" "$?" "0"
+
+    # 2. The prune axis must vary the DB without re-syncing or mutating the canonical chain. The
+    #    OTHER prune mode is unlocked either by (a) a snapshot/reflink-capable live FS (so a
+    #    variant can be made cheaply) or (b) supplying a pre-built chain of the OPPOSITE mode
+    #    (--full-data-dir when the box is pruned, --pruned-data-dir when it's full). A SAME-mode
+    #    copy on a CoW volume is also useful: it lets destructive scenarios run off the live chain.
+    #    gouda is a pruned box (MONERO_PRUNE=1) with a pruned copy on a btrfs CoW loopback, so it
+    #    exercises pruned mode live with snapshot isolation; full mode is covered by the fakes.
+    local mdir fstype="" cow_live=0 baseline_mode="full" bp
+    mdir="$(env_on_box MONERO_DATA_DIR)"
+    bp="${BASELINE_PRUNE:-$(env_on_box MONERO_PRUNE)}"   # so standalone --readiness sees it too
+    [ -n "$mdir" ] && fstype="$(box_fstype "$mdir")"
+    case "$fstype" in btrfs|zfs|xfs) cow_live=1 ;; esac
+    [ "$bp" = "1" ] && baseline_mode="pruned"
+    it_log "   live chain: ${mdir:-?} (${fstype:-unknown}, ${baseline_mode})"
+
+    # Classify any supplied chains by prune mode relative to the live baseline.
+    local opp_dir opp_label same_dir
+    if [ "$bp" = "1" ]; then opp_dir="${FULL_DATA_DIR:-}"; opp_label="full"; same_dir="${PRUNED_DATA_DIR:-}"
+    else opp_dir="${PRUNED_DATA_DIR:-}"; opp_label="pruned"; same_dir="${FULL_DATA_DIR:-}"; fi
+
+    # A same-mode copy (e.g. the CoW pruned chain) — snapshot isolation for destructive scenarios.
+    if [ -n "$same_dir" ]; then
+        local sfs; sfs="$(box_fstype "$same_dir")"
+        if rx "test -e $(quote_arg "$same_dir")/lmdb/data.mdb" >/dev/null 2>&1; then
+            case "$sfs" in
+                btrfs|zfs|xfs) it_pass "snapshot-isolated $baseline_mode chain on a CoW FS ($same_dir, $sfs) — destructive scenarios needn't touch the live chain" ;;
+                *)             it_log "   same-mode copy at $same_dir ($sfs — not CoW)" ;;
+            esac
+        else
+            it_warn "supplied same-mode dir has no lmdb/data.mdb ($same_dir)"
+        fi
+    fi
+
+    # The opposite-mode chain is what unlocks the OTHER value of the prune axis.
+    if [ -n "$opp_dir" ]; then
+        if rx "test -e $(quote_arg "$opp_dir")/lmdb/data.mdb" >/dev/null 2>&1; then
+            it_pass "both prune modes exercisable (live=$baseline_mode + supplied $opp_label chain at $opp_dir)"
+        else
+            it_fail "supplied $opp_label chain present" "$opp_dir has no lmdb/data.mdb"
+        fi
+    elif [ "$cow_live" -eq 1 ]; then
+        it_pass "prune axis: live FS is snapshot-capable ($fstype) — the $opp_label variant can be built cheaply"
+    else
+        it_warn "prune axis: only $baseline_mode is testable live — no $opp_label chain supplied, so $opp_label scenarios skip (cover that mode via the fake mini-stack, or build one)"
+    fi
+
+    # 3. Disk headroom on the live chain FS (room to operate + hold a co-located second chain).
+    if [ -n "$mdir" ]; then
+        local avail; avail="$(box_avail_gb "$mdir")"
+        if [ -n "$avail" ] && [ "$avail" -ge 100 ] 2>/dev/null; then
+            it_pass "disk headroom on the live chain FS (${avail} GiB free)"
+        else
+            it_warn "low disk headroom on the live chain FS (${avail:-?} GiB free) — snapshots / a full+pruned matrix may not fit"
+        fi
+    fi
+
+    # 4. Secrets must not be world/group readable (the box holds wallet/RPC creds + onion keys).
+    local envmode; envmode="$(box_mode .env)"
+    case "$envmode" in
+        ""|*[!0-9]*) it_warn ".env permissions unknown" ;;
+        ?00)         it_pass ".env is owner-only (mode $envmode)" ;;
+        *)           it_fail ".env is owner-only" "mode is $envmode — group/other can read RPC creds & onions; run: chmod 600 .env" ;;
+    esac
+
+    # 5. The dashboard must sit behind Caddy on localhost, never bound to a public interface.
+    local d_addrs exposed=0 st _q1 _q2 laddr
+    d_addrs="$(rx "ss -tlnH 'sport = :8000' 2>/dev/null")"
+    if [ -z "$d_addrs" ]; then
+        it_warn "nothing listening on :8000 (dashboard) — can't assess exposure"
+    else
+        while read -r st _q1 _q2 laddr _; do
+            [ -n "$laddr" ] || continue
+            case "$laddr" in 127.0.0.1:*|"[::1]:"*) : ;; *) exposed=1 ;; esac
+        done <<< "$d_addrs"
+        if [ "$exposed" -eq 0 ]; then it_pass "dashboard bound to localhost only (Caddy fronts it)"; else it_fail "dashboard bound to localhost only" "it is listening on a non-loopback address — do not expose the dashboard directly"; fi
+    fi
+
+    # 6. The backup/rollback safety net must be usable (writable backups dir + tar).
+    if rx "mkdir -p backups && touch backups/.itest-rw 2>/dev/null && rm -f backups/.itest-rw && command -v tar" >/dev/null 2>&1; then
+        it_pass "backup/rollback prerequisites present (writable backups/, tar)"
+    else
+        it_fail "backup prerequisites present" "backups/ not writable or tar missing — --safety-backup won't work"
+    fi
+}
+
+# --- Lifecycle + edge phase (--lifecycle) -----------------------------------
+run_lifecycle() {
+    IT_CURRENT_SCENARIO="lifecycle"
+    echo ""
+    it_log "── lifecycle + failover phase ──────────────────────"
+
+    # restart brings the stack back healthy.
+    it_step "pithead restart…"
+    pithead restart >/dev/null 2>&1
+    wait_status_ok 240 || true
+    pithead status >/dev/null 2>&1; assert_rc "status OK after restart" "$?" "0"
+
+    # apply that changes the sidechain recreates only the affected containers, preserving
+    # secrets. We flip main<->mini and assert the token/onions are untouched, then revert.
+    local cur_pool fp_before
+    cur_pool="$(jq_get "$BASELINE_CONFIG" '.p2pool.pool')"; cur_pool="${cur_pool:-main}"
+    local other; [ "$cur_pool" = "mini" ] && other="main" || other="mini"
+    fp_before="$(secret_fingerprint)"
+    push_config "$(render_scenario_config "$BASELINE_CONFIG" "p2pool.pool=$other")"
+    it_step "apply pool $cur_pool -> $other…"
+    pithead apply -y >/dev/null 2>&1
+    wait_status_ok 180 || true
+    assert_eq "secrets preserved across pool change" "$(secret_fingerprint)" "$fp_before"
+    assert_eq "pool actually changed" "$(jq_get "$(api_state)" '.pool.type')" "$(pool_label "$other")"
+
+    # Node-down failover (#31): stop monerod -> status non-zero (node down), dashboard rejects
+    # workers (xmrig-proxy stopped) -> start monerod -> readmitted -> status 0 again.
+    if [ "$(env_on_box COMPOSE_PROFILES)" = "local_node" ]; then
+        it_step "stopping monerod to exercise node-down failover…"
+        rx "docker compose stop monerod" >/dev/null 2>&1
+        wait_for 120 5 "status to report node down" _pred_status_down || true
+        pithead status >/dev/null 2>&1; assert_rc "status non-zero when node down" "$?" "1"
+        it_step "starting monerod and waiting for readmit…"
+        rx "docker compose start monerod" >/dev/null 2>&1
+        wait_status_ok 240 || true
+        pithead status >/dev/null 2>&1; assert_rc "status OK after node recovery" "$?" "0"
+    else
+        it_warn "skipping node-down failover (remote mode: no local monerod to stop)"
+    fi
+
+    # backup → restore round-trip (#102): a backup archives config/.env/onions/dashboard; a
+    # restore brings them back. We change the pool, restore, and assert the pool reverted and
+    # secrets survived — exercising both CLI verbs end-to-end (not just the rollback net).
+    it_step "backup → restore round-trip…"
+    if pithead backup -y >/dev/null 2>&1; then
+        local arch; arch="$(rx 'ls -t backups/pithead-backup-*.tar.gz 2>/dev/null | head -n1')"
+        if [ -n "$arch" ]; then
+            local fp_b; fp_b="$(secret_fingerprint)"
+            local backed_pool; backed_pool="$(jq_get "$(api_state)" '.pool.type')"
+            # Diverge from the backed-up state, then restore it back.
+            push_config "$(render_scenario_config "$BASELINE_CONFIG" "p2pool.pool=$other")"
+            pithead apply -y >/dev/null 2>&1
+            pithead down >/dev/null 2>&1
+            pithead restore -y "$arch" >/dev/null 2>&1
+            pithead up >/dev/null 2>&1
+            wait_status_ok 240 || true
+            assert_eq "restore reverts the pool to the backed-up value" "$(jq_get "$(api_state)" '.pool.type')" "$backed_pool"
+            assert_eq "restore preserves secrets" "$(secret_fingerprint)" "$fp_b"
+            rx "rm -f $(quote_arg "$arch")" >/dev/null 2>&1 || true
+        else
+            it_fail "backup produced an archive" "no backups/pithead-backup-*.tar.gz"
+        fi
+    else
+        it_fail "pithead backup succeeded" "backup returned non-zero"
+    fi
+}
+
+# Predicate: status reports a problem (non-zero) — used to detect node-down deterministically.
+_pred_status_down() { ! pithead status >/dev/null 2>&1; }
+
+# --- Fault-injection phase (--fault-injection) ------------------------------
+# Deliberately break monerod three ways and assert pithead's status verdicts plus the
+# dashboard's failover, then restore. Local mode only (needs a local monerod to break).
+# These are destructive-then-restored and slow (healthcheck + node-health debounce), so the
+# phase is opt-in.
+_monerod_is() {  # _monerod_is <state> [<health>]
+    local s; s="$(service_state monerod)"
+    [ "$(svc_state_of "$s")" = "$1" ] && { [ -z "${2:-}" ] || [ "$(svc_health_of "$s")" = "$2" ]; }
+}
+_pred_monerod_missing()   { _monerod_is missing; }
+_pred_monerod_unhealthy() { _monerod_is running unhealthy; }
+_pred_monerod_healthy()   { _monerod_is running healthy; }
+_pred_proxy_stopped()     { [ "$(svc_state_of "$(service_state xmrig-proxy)")" != "running" ]; }
+
+fault_node_down() {
+    it_step "fault: stop monerod (required node down)…"
+    rx "docker compose stop monerod" >/dev/null 2>&1
+    wait_for 60 5 "status to report a problem" _pred_status_down || true
+    pithead status >/dev/null 2>&1; assert_rc "status non-zero when monerod is down" "$?" "1"
+    # The dashboard rejects workers (stops xmrig-proxy) after its node-health debounce so they
+    # fail over to backup pools (#31).
+    wait_for 180 10 "xmrig-proxy stopped by failover" _pred_proxy_stopped || true
+    assert_eq "xmrig-proxy stopped for failover" "$(svc_state_of "$(service_state xmrig-proxy)")" "exited"
+    it_step "recover: start monerod…"
+    rx "docker compose start monerod" >/dev/null 2>&1
+    wait_for 240 5 "monerod healthy" _pred_monerod_healthy || true
+    wait_status_ok 240 || true
+    pithead status >/dev/null 2>&1; assert_rc "status OK after monerod recovery" "$?" "0"
+}
+
+fault_unhealthy() {
+    it_step "fault: freeze monerod (SIGSTOP) so its healthcheck fails…"
+    rx "docker compose kill -s SIGSTOP monerod" >/dev/null 2>&1
+    # The get_info healthcheck now times out; after its retries the container flips to
+    # running-but-unhealthy — the verdict stack_status flags as a problem.
+    wait_for 200 10 "monerod to report unhealthy" _pred_monerod_unhealthy || true
+    assert_eq "monerod running-but-unhealthy" "$(service_state monerod)" "running unhealthy"
+    pithead status >/dev/null 2>&1; assert_rc "status non-zero when monerod unhealthy" "$?" "1"
+    it_step "recover: thaw monerod (SIGCONT)…"
+    rx "docker compose kill -s SIGCONT monerod" >/dev/null 2>&1
+    wait_for 120 5 "monerod healthy" _pred_monerod_healthy || true
+}
+
+fault_missing() {
+    it_step "fault: remove the monerod container…"
+    rx "docker compose rm -sf monerod" >/dev/null 2>&1
+    wait_for 30 3 "monerod to be missing" _pred_monerod_missing || true
+    assert_eq "monerod reported missing" "$(svc_state_of "$(service_state monerod)")" "missing"
+    pithead status >/dev/null 2>&1; assert_rc "status non-zero when monerod missing" "$?" "1"
+    it_step "recover: recreate monerod…"
+    rx "docker compose up -d monerod" >/dev/null 2>&1
+    wait_for 240 5 "monerod healthy" _pred_monerod_healthy || true
+}
+
+run_fault_injection() {
+    # shellcheck disable=SC2034  # read by lib.sh:it_fail to label captured failures
+    IT_CURRENT_SCENARIO="fault-injection"
+    echo ""
+    it_log "── fault-injection phase ───────────────────────────"
+    if [ "$(env_on_box COMPOSE_PROFILES)" != "local_node" ]; then
+        it_warn "skipping fault injection (remote mode: no local monerod to break)"
+        return 0
+    fi
+
+    local fails_before="$IT_FAIL"
+    fault_node_down
+    fault_unhealthy
+    fault_missing
+    [ "$IT_FAIL" -gt "$fails_before" ] && capture_artifacts "fault-injection" "$OUT_DIR"
+
+    # Belt-and-braces: whatever happened above, leave monerod up and the stack healthy.
+    rx "docker compose up -d monerod" >/dev/null 2>&1 || true
+    wait_for 240 5 "monerod healthy after fault phase" _pred_monerod_healthy || true
+    wait_status_ok 240 || true
+}
+
+# --- Safety backup / rollback (--safety-backup) -----------------------------
+# Take a real `pithead backup` before the destructive scenarios so a failed run can be rolled
+# all the way back (config, .env, Caddyfile, Tor onion keys, dashboard DB). This both protects
+# a precious box AND exercises backup/restore end-to-end (#102) — closing that CLI-breadth gap.
+safety_backup() {
+    [ "$SAFETY_BACKUP" = "1" ] || return 0
+    it_log "Taking a safety backup before destructive scenarios (pithead backup -y)…"
+    if ! pithead backup -y > "$OUT_DIR/backup.log" 2>&1; then
+        it_fail "safety backup created" "see $OUT_DIR/backup.log"
+        return 0
+    fi
+    SAFETY_ARCHIVE="$(rx 'ls -t backups/pithead-backup-*.tar.gz 2>/dev/null | head -n1')"
+    if [ -z "$SAFETY_ARCHIVE" ]; then
+        it_fail "safety backup archive located" "no backups/pithead-backup-*.tar.gz on the box"
+        return 0
+    fi
+    it_log "Safety backup: $SAFETY_ARCHIVE"
+    # Exercise backup as an assertion: the archive must list the core files we'd roll back to.
+    local listing; listing="$(rx "tar -tzf $(quote_arg "$SAFETY_ARCHIVE") 2>/dev/null")"
+    assert_contains "backup archive contains config.json" "$listing" "config.json"
+    assert_contains "backup archive contains .env"        "$listing" ".env"
+}
+
+# On a failed run, roll the box back to the pre-test safety backup.
+safety_rollback_if_failed() {
+    [ "$SAFETY_BACKUP" = "1" ] && [ -n "$SAFETY_ARCHIVE" ] || return 0
+    [ "$IT_FAIL" -gt 0 ] || return 0
+    it_warn "failures detected — rolling back to the safety backup ($SAFETY_ARCHIVE)…"
+    pithead down >/dev/null 2>&1 || true
+    if pithead restore -y "$SAFETY_ARCHIVE" >/dev/null 2>&1; then
+        pithead up >/dev/null 2>&1 || true
+        wait_status_ok 240 || true
+        it_log "rollback complete — config/.env/onions/dashboard restored from the pre-test backup."
+    else
+        it_err "restore FAILED — the box may be in a partial state; archive kept at $SAFETY_ARCHIVE"
+        return 0
+    fi
+}
+
+# Remove the generated safety archive once we're done (kept on --keep, or if restore failed).
+safety_cleanup() {
+    [ -n "$SAFETY_ARCHIVE" ] || return 0
+    if [ "$KEEP_STATE" = "1" ]; then
+        it_warn "--keep: leaving the safety backup at $SAFETY_ARCHIVE"
+        return 0
+    fi
+    rx "rm -f $(quote_arg "$SAFETY_ARCHIVE")" >/dev/null 2>&1 || true
+    it_step "removed the safety backup archive"
+}
+
+# --- Restore + summary ------------------------------------------------------
+restore_baseline() {
+    [ "$KEEP_STATE" = "1" ] && { it_warn "--keep set: leaving the box on the last scenario."; return; }
+    [ -z "$BASELINE_CONFIG" ] && return
+    it_log "Restoring original config.json and re-applying…"
+    push_config "$BASELINE_CONFIG"
+    pithead apply -y >/dev/null 2>&1 || it_warn "restore apply reported a non-zero exit; check the box."
+    wait_status_ok 240 || true
+    assert_eq "secrets intact after restore" "$(secret_fingerprint)" "$BASELINE_SECRET_FP"
+}
+
+summary() {
+    echo ""
+    it_log "════════════════ summary ════════════════"
+    it_log "passed:  $IT_PASS"
+    it_log "skipped: $IT_SKIPPED"
+    if [ "$IT_FAIL" -gt 0 ]; then
+        it_err "failed:  $IT_FAIL"
+        echo -e "$IT_FAILED_NAMES" >&2
+        it_err "Artifacts for failed scenarios are under $OUT_DIR/"
+        return 1
+    fi
+    it_log "failed:  0"
+    it_log "All assertions passed. Artifacts/manifest under $OUT_DIR/"
+    return 0
+}
+
+# --- Main -------------------------------------------------------------------
+IT_SKIPPED=0
+
+main() {
+    parse_args "$@"
+    preflight
+
+    # Non-destructive release-server fitness assessment.
+    if [ "$READINESS" = "1" ]; then
+        assert_release_readiness
+        summary
+        return
+    fi
+
+    # Non-destructive health check: assert the current live state and stop.
+    if [ "$CHECK_ONLY" = "1" ]; then
+        assert_current_state
+        summary
+        return
+    fi
+
+    # Optional rollback net for the destructive phases that follow.
+    safety_backup
+
+    local name rest
+    if [ -n "$ONLY_SCENARIO" ]; then
+        rest="$(scenario_overrides "$ONLY_SCENARIO")" || { it_err "Unknown scenario: $ONLY_SCENARIO"; exit 2; }
+        run_scenario "$ONLY_SCENARIO" "$rest"
+    else
+        while IFS=$'\t' read -r name rest; do
+            [ -z "$name" ] && continue
+            run_scenario "$name" "$rest"
+        done < <(scenario_matrix)
+    fi
+
+    [ "$RUN_LIFECYCLE" = "1" ] && run_lifecycle
+    [ "$RUN_FAULTS" = "1" ] && run_fault_injection
+
+    # Failure → roll the box back to the safety backup; success → leave it (restore_baseline
+    # just puts config.json back to where we found it). Then drop the generated archive.
+    safety_rollback_if_failed
+    restore_baseline
+    safety_cleanup
+    summary
+}
+
+main "$@"
diff --git a/tests/integration/scenarios.sh b/tests/integration/scenarios.sh
new file mode 100644
index 0000000..1db4373
--- /dev/null
+++ b/tests/integration/scenarios.sh
@@ -0,0 +1,80 @@
+# shellcheck shell=bash
+#
+# Declarative config matrix for the integration suite (issue #54).
+#
+# Each scenario is a NAME and a set of `dotted.path=value` overrides applied to the box's
+# baseline config.json (see lib.sh:render_scenario_config). Keeping the matrix as data — not
+# code — means adding a case is a one-line edit, and selftest.sh can prove that every value
+# of every axis is exercised at least once (an acceptance criterion of #54).
+#
+# The full cross-product is large; we cover the realistic combinations and guarantee each
+# axis value appears once. Axes (from the issue):
+#   monero.mode .............. local | remote
+#   monero.prune ............. true (pruned) | false (full)
+#   monero.rpc_lan_access .... false (127.0.0.1) | true (LAN bind)
+#   p2pool.pool .............. main | mini | nano
+#   xvb.enabled .............. true | false
+#   dashboard.secure ......... true (Caddy TLS) | false
+#   dashboard.tari_required .. true (blocking) | false (non-blocking)
+#
+# Prerequisite-gated axes (skipped-with-a-loud-log, never silently, when the box can't host
+# them — see run.sh):
+#   * monero.prune=false (full) and =true (pruned) are different on-disk DBs. We only flip
+#     prune when a matching synced data dir is available; otherwise the case is reported
+#     SKIPPED so we never silently drop coverage or mutate the canonical chain.
+#   * monero.mode=remote needs a reachable external node (REMOTE_MONERO_HOST); the natural
+#     choice is the box's own synced monerod on its LAN address.
+
+# Emit the matrix as `NAME<TAB>overrides…`, one scenario per line. Lines starting with the
+# canonical-first scenario are ordered so the cheapest, most-common config runs first.
+scenario_matrix() {
+    cat <<'EOF'
+local-pruned-main-secure-tari	monero.mode=local monero.prune=true monero.rpc_lan_access=false p2pool.pool=main xvb.enabled=true dashboard.secure=true dashboard.tari_required=true
+local-full-main-secure-tari	monero.mode=local monero.prune=false p2pool.pool=main xvb.enabled=true dashboard.secure=true dashboard.tari_required=true
+local-pruned-mini-secure-tari	monero.mode=local monero.prune=true p2pool.pool=mini xvb.enabled=true dashboard.secure=true dashboard.tari_required=true
+local-pruned-nano-insecure	monero.mode=local monero.prune=true p2pool.pool=nano xvb.enabled=true dashboard.secure=false dashboard.tari_required=true
+local-pruned-main-rpclan	monero.mode=local monero.prune=true monero.rpc_lan_access=true p2pool.pool=main xvb.enabled=true dashboard.secure=true dashboard.tari_required=true
+local-pruned-main-xvb-off	monero.mode=local monero.prune=true p2pool.pool=main xvb.enabled=false dashboard.secure=true dashboard.tari_required=true
+local-pruned-main-tari-optional	monero.mode=local monero.prune=true p2pool.pool=main xvb.enabled=true dashboard.secure=true dashboard.tari_required=false
+remote-main-secure-tari	monero.mode=remote p2pool.pool=main xvb.enabled=true dashboard.secure=true dashboard.tari_required=true
+EOF
+}
+
+# The axis -> values map the matrix must cover. selftest.sh asserts every value below appears
+# in at least one scenario's overrides (or is justified as prerequisite-gated).
+axis_coverage() {
+    cat <<'EOF'
+monero.mode=local
+monero.mode=remote
+monero.prune=true
+monero.prune=false
+monero.rpc_lan_access=true
+monero.rpc_lan_access=false
+p2pool.pool=main
+p2pool.pool=mini
+p2pool.pool=nano
+xvb.enabled=true
+xvb.enabled=false
+dashboard.secure=true
+dashboard.secure=false
+dashboard.tari_required=true
+dashboard.tari_required=false
+EOF
+}
+
+# Print the override string for a named scenario (empty if not found).
+scenario_overrides() {
+    local want="$1" name rest
+    while IFS=$'\t' read -r name rest; do
+        [ "$name" = "$want" ] && { printf '%s' "$rest"; return 0; }
+    done < <(scenario_matrix)
+    return 1
+}
+
+# Print just the scenario names, one per line.
+scenario_names() {
+    local name rest
+    while IFS=$'\t' read -r name rest; do
+        [ -n "$name" ] && printf '%s\n' "$name"
+    done < <(scenario_matrix)
+}
diff --git a/tests/integration/selftest.sh b/tests/integration/selftest.sh
new file mode 100755
index 0000000..cf4ce2e
--- /dev/null
+++ b/tests/integration/selftest.sh
@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+#
+# Self-test for the integration harness's pure logic (config rendering, expectation
+# derivation, redaction, matrix coverage, the SSH/local exec wrapper, JSON parsing).
+#
+# This runs anywhere — no real server needed — so it can gate every PR (unlike the live
+# matrix in run.sh, which needs the test box). It dogfoods the very assertion helpers the
+# harness ships. Run: tests/integration/selftest.sh
+#
+set -uo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=tests/integration/lib.sh
+source "$HERE/lib.sh"
+# shellcheck source=tests/integration/scenarios.sh
+source "$HERE/scenarios.sh"
+
+echo "== overrides_to_jq: value typing =="
+assert_contains "boolean stays unquoted" "$(overrides_to_jq monero.prune=false)" '.monero.prune=false'
+assert_contains "string gets quoted"     "$(overrides_to_jq monero.mode=remote)"  '.monero.mode="remote"'
+assert_contains "integer stays unquoted"  "$(overrides_to_jq monero.remote.rpc_port=18081)" '.monero.remote.rpc_port=18081'
+assert_contains "negative int unquoted"   "$(overrides_to_jq foo=-5)" '.foo=-5'
+assert_contains "dotted ip is a string"   "$(overrides_to_jq monero.remote.host=10.0.0.5)" '.monero.remote.host="10.0.0.5"'
+assert_eq       "no overrides is identity" "$(overrides_to_jq)" '.'
+assert_eq       "empty token is skipped"   "$(overrides_to_jq '' a=1)" '. | .a=1'
+
+echo "== resolve_overrides: prerequisite gate (never mutates the canonical chain) =="
+# Happy path: a scenario needing no alt resources resolves unchanged.
+BASELINE_PRUNE=1; PRUNED_DATA_DIR=""; FULL_DATA_DIR=""; REMOTE_MONERO_HOST=""
+resolve_overrides "monero.mode=local monero.prune=true p2pool.pool=main"; rc=$?
+assert_rc "no-prereq scenario resolves" "$rc" "0"
+assert_eq "RESOLVED unchanged when no prereq" "$RESOLVED" "monero.mode=local monero.prune=true p2pool.pool=main"
+# prune=false (full) on a pruned box: SKIP without a dir, augment with one — never flips the canonical DB.
+BASELINE_PRUNE=1; FULL_DATA_DIR=""
+resolve_overrides "monero.prune=false"; rc=$?
+assert_rc "full-on-pruned-box skips without dir" "$rc" "1"
+assert_contains "skip names --full-data-dir" "$SKIP_REASON" "--full-data-dir"
+FULL_DATA_DIR="/srv/full"
+resolve_overrides "monero.prune=false"; rc=$?
+assert_rc "full-on-pruned-box ok with dir" "$rc" "0"
+assert_contains "augments full data_dir" "$RESOLVED" "monero.data_dir=/srv/full"
+# prune=true (pruned) on a full box: SKIP without a dir.
+BASELINE_PRUNE=0; PRUNED_DATA_DIR=""
+resolve_overrides "monero.prune=true"; rc=$?
+assert_rc "pruned-on-full-box skips without dir" "$rc" "1"
+assert_contains "skip names --pruned-data-dir" "$SKIP_REASON" "--pruned-data-dir"
+# remote mode: SKIP without an endpoint, augment with one.
+BASELINE_PRUNE=1; REMOTE_MONERO_HOST=""
+resolve_overrides "monero.mode=remote"; rc=$?
+assert_rc "remote skips without endpoint" "$rc" "1"
+assert_contains "skip names --remote-monero-host" "$SKIP_REASON" "--remote-monero-host"
+REMOTE_MONERO_HOST="10.0.0.5:18081"
+resolve_overrides "monero.mode=remote"; rc=$?
+assert_rc "remote ok with endpoint" "$rc" "0"
+assert_contains "augments remote host" "$RESOLVED" "monero.remote.host=10.0.0.5:18081"
+# Compound prerequisites both augment.
+BASELINE_PRUNE=1; FULL_DATA_DIR="/srv/full"; REMOTE_MONERO_HOST="10.0.0.5:18081"
+resolve_overrides "monero.mode=remote monero.prune=false"; rc=$?
+assert_rc "compound prereqs resolve" "$rc" "0"
+assert_contains "compound: data_dir" "$RESOLVED" "monero.data_dir=/srv/full"
+assert_contains "compound: remote host" "$RESOLVED" "monero.remote.host=10.0.0.5:18081"
+unset BASELINE_PRUNE PRUNED_DATA_DIR FULL_DATA_DIR REMOTE_MONERO_HOST
+
+echo "== render_scenario_config: applies overrides, stays valid JSON =="
+BASE='{"monero":{"mode":"local","prune":true,"wallet_address":"49keep"},"p2pool":{"pool":"main"}}'
+RENDERED="$(render_scenario_config "$BASE" monero.mode=remote monero.prune=false p2pool.pool=mini)"
+printf '%s' "$RENDERED" | jq empty 2>/dev/null && it_pass "rendered config is valid JSON" || it_fail "rendered config is valid JSON" "jq rejected it"
+assert_eq "override: mode"   "$(jq_get "$RENDERED" '.monero.mode')"  "remote"
+assert_eq "override: prune"  "$(jq_get "$RENDERED" '.monero.prune')" "false"
+assert_eq "override: pool"   "$(jq_get "$RENDERED" '.p2pool.pool')"  "mini"
+assert_eq "preserved: wallet" "$(jq_get "$RENDERED" '.monero.wallet_address')" "49keep"
+
+echo "== expected/absent services: profile gating =="
+LOCAL='{"monero":{"mode":"local"}}'
+REMOTE='{"monero":{"mode":"remote"}}'
+assert_contains "local includes monerod"  "$(expected_services "$LOCAL")"  "monerod"
+assert_contains "local includes p2pool"   "$(expected_services "$LOCAL")"  "p2pool"
+case "$(expected_services "$REMOTE")" in *monerod*) it_fail "remote excludes monerod" "monerod present" ;; *) it_pass "remote excludes monerod" ;; esac
+assert_eq "remote marks monerod absent"   "$(absent_services "$REMOTE")" "monerod"
+assert_eq "local marks nothing absent"    "$(absent_services "$LOCAL")" ""
+assert_eq "pool_label main"  "$(pool_label main)" "Main"
+assert_eq "pool_label mini"  "$(pool_label mini)" "Mini"
+assert_eq "pool_label nano"  "$(pool_label nano)" "Nano"
+assert_eq "pool_label unknown passes through" "$(pool_label custom)" "custom"
+
+echo "== redact: secrets never leak into artifacts =="
+ONION="$(printf 'a%.0s' $(seq 1 56)).onion"
+SECRETS="$(printf 'PROXY_AUTH_TOKEN=deadbeefcafe\nMONERO_NODE_PASSWORD=hunter2\nMONERO_RPC_PASSWORD=p\nBACKUP_SECRET=s3kr3t\nMONERO_ONION_ADDRESS=%s\nHOST_IP=box.lan\n' "$ONION")"
+REDACTED="$(printf '%s' "$SECRETS" | redact)"
+assert_contains "token redacted"      "$REDACTED" "PROXY_AUTH_TOKEN=<redacted>"
+assert_contains "password redacted"   "$REDACTED" "MONERO_NODE_PASSWORD=<redacted>"
+assert_contains "*_PASSWORD redacted" "$REDACTED" "MONERO_RPC_PASSWORD=<redacted>"
+assert_contains "*_SECRET redacted"   "$REDACTED" "BACKUP_SECRET=<redacted>"
+assert_contains "onion redacted"      "$REDACTED" "<redacted>.onion"
+assert_contains "non-secret kept"     "$REDACTED" "HOST_IP=box.lan"
+case "$REDACTED" in *deadbeefcafe*) it_fail "raw token absent" "token leaked" ;; *) it_pass "raw token absent" ;; esac
+case "$REDACTED" in *s3kr3t*) it_fail "raw secret absent" "secret leaked" ;; *) it_pass "raw secret absent" ;; esac
+
+echo "== matrix: every axis value is covered =="
+CORPUS="$(scenario_matrix | cut -f2 | tr '\n' ' ')"
+while IFS= read -r val; do
+    [ -z "$val" ] && continue
+    case " $CORPUS " in
+        *" $val "*) it_pass "axis covered: $val" ;;
+        *)          it_fail "axis covered: $val" "no scenario sets $val" ;;
+    esac
+done < <(axis_coverage)
+
+echo "== scenarios: lookup helpers =="
+assert_ne "scenario_names is non-empty" "$(scenario_names | head -n1)" ""
+assert_eq "scenario count matches matrix" "$(scenario_names | grep -c .)" "$(scenario_matrix | grep -c .)"
+assert_contains "overrides lookup works" "$(scenario_overrides remote-main-secure-tari)" "monero.mode=remote"
+# An unknown scenario name must fail (return 1) and print nothing — never silently resolve.
+miss="$(scenario_overrides no-such-scenario)"; rc=$?
+assert_rc "unknown scenario returns 1" "$rc" "1"
+assert_eq "unknown scenario prints nothing" "$miss" ""
+
+echo "== rx: local exec runs in the stack dir =="
+TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT
+printf 'marker' > "$TMP/sentinel"
+IT_MODE="local"; IT_REMOTE_DIR="$TMP"
+assert_eq "rx runs command on target" "$(rx 'cat sentinel')" "marker"
+assert_eq "rx cwd is the stack dir"   "$(rx 'pwd')" "$TMP"
+
+echo "== api_state + jq_get: parse a fixture =="
+# Stub rx so api_state returns a representative /api/state payload.
+FIXTURE='{"sync":{"monero":{"state":"done"},"tari":{"state":"done"}},"monero":{"mode":"Pruned"},"pool":{"type":"Main"},"proxy_workers":2,"stratum":{"conns":2,"total_hashes":12345}}'
+rx() { printf '%s' "$FIXTURE"; }
+ST="$(api_state)"
+assert_eq "parse monero sync state" "$(jq_get "$ST" '.sync.monero.state')" "done"
+assert_eq "parse pool type"         "$(jq_get "$ST" '.pool.type')" "Main"
+assert_eq "parse worker count"      "$(jq_get "$ST" '.proxy_workers')" "2"
+assert_eq "missing key -> empty"    "$(jq_get "$ST" '.nope.nope')" ""
+
+echo "== service_state parsing (fault-injection predicates) =="
+assert_eq "state of 'running healthy'"  "$(svc_state_of 'running healthy')"  "running"
+assert_eq "health of 'running healthy'" "$(svc_health_of 'running healthy')" "healthy"
+assert_eq "state of 'missing none'"     "$(svc_state_of 'missing none')"     "missing"
+assert_eq "health of 'running unhealthy'" "$(svc_health_of 'running unhealthy')" "unhealthy"
+assert_eq "state of 'exited none'"      "$(svc_state_of 'exited none')"      "exited"
+
+echo "== assertion helpers: counters behave =="
+_p="$IT_PASS"; _f="$IT_FAIL"
+assert_num_ge "num_ge passes when equal" 5 5
+assert_num_gt "num_gt passes when greater" 6 5
+[ "$IT_PASS" -gt "$_p" ] && it_pass "passing assertions increment IT_PASS" || it_fail "passing assertions increment IT_PASS" "no increment"
+
+# --- Tally ------------------------------------------------------------------
+echo ""
+echo "selftest: $IT_PASS passed, $IT_FAIL failed"
+[ "$IT_FAIL" -eq 0 ] || exit 1
diff --git a/tests/integration/system-info.sh b/tests/integration/system-info.sh
new file mode 100644
index 0000000..8b65430
--- /dev/null
+++ b/tests/integration/system-info.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+#
+# system-info.sh — snapshot the hardware/layout of a Pithead build/test server as Markdown.
+# Re-run any time (no sudo, safe while the stack is live):
+#   ~/pithead-testbench/system-info.sh > ~/pithead-testbench/system-info.md
+#
+# Lives in the repo (tests/integration/) so it is versioned; deployed onto the box for operators
+# and AI agents to discover what they are working with.
+set -uo pipefail
+
+STACK_DIR="${STACK_DIR:-$HOME/code/pithead}"
+
+h()    { printf '\n## %s\n\n' "$1"; }
+fence(){ printf '```\n'; cat; printf '```\n'; }
+
+printf '# Pithead build server — system snapshot\n\n'
+printf '_Host `%s` — generated %s_\n' "$(hostname)" "$(date '+%Y-%m-%d %H:%M:%S %z')"
+printf '\n> Regenerate: `~/pithead-testbench/system-info.sh > ~/pithead-testbench/system-info.md`\n'
+
+h "OS & kernel"
+{
+    # shellcheck disable=SC1091
+    { . /etc/os-release 2>/dev/null && echo "Distro: ${PRETTY_NAME:-?}"; } || true
+    echo "Kernel: $(uname -sr)"
+    echo "Uptime:$(uptime -p 2>/dev/null | sed 's/^up//' || true)"
+} | fence
+
+h "CPU"
+lscpu 2>/dev/null | grep -E 'Model name|^CPU\(s\)|Thread\(s\) per core|Core\(s\) per socket' | sed 's/  */ /g' | fence
+
+h "Memory"
+free -h 2>/dev/null | fence
+
+h "Disks — ROTA 0 = SSD/NVMe (fast), 1 = HDD (slow)"
+lsblk -d -o NAME,ROTA,SIZE,MODEL,TYPE 2>/dev/null | fence
+printf '\n**Storage policy:** active monerod/Tari chains live on the **NVMe** (root LV). The **HDD**\n'
+printf '(`/home`) is for backups / cold storage / the CoW loopback only — a chain on the HDD makes\n'
+printf 'every test scenario crawl. Check `ROTA` before placing any chain.\n'
+
+h "Filesystems"
+df -hT / /home /mnt/chains 2>/dev/null | fence
+
+h "Chains"
+{
+    # Read the real data-dir locations from .env (they're decoupled from the checkout).
+    mdir="$(grep -E '^MONERO_DATA_DIR=' "$STACK_DIR/.env" 2>/dev/null | cut -d= -f2-)"
+    tdir="$(grep -E '^TARI_DATA_DIR=' "$STACK_DIR/.env" 2>/dev/null | cut -d= -f2-)"
+    [ -n "$mdir" ] || mdir="$(readlink -f "$STACK_DIR/data/monero" 2>/dev/null)"
+    [ -n "$tdir" ] || tdir="$(readlink -f "$STACK_DIR/data/tari" 2>/dev/null)"
+    echo "Monero : ${mdir:-?}"
+    echo "         size $(du -sh "$mdir" 2>/dev/null | cut -f1)  |  MONERO_PRUNE=$(grep -E '^MONERO_PRUNE=' "$STACK_DIR/.env" 2>/dev/null | cut -d= -f2-) (1=pruned)"
+    echo "Tari   : ${tdir:-?}"
+    echo "         size $(du -sh "$tdir" 2>/dev/null | cut -f1)  |  archival/full (no pruning configured)"
+} | fence
+printf '\n_A pruned Monero chain that reads ~250 GiB on disk is LMDB free-page bloat, not a full chain;\n'
+printf 'see the build-server README for the compaction procedure._\n'
+
+h "Docker & containers"
+docker --version 2>/dev/null | fence
+printf '\n'
+docker ps --format 'table {{.Names}}\t{{.Status}}' 2>/dev/null | fence
+
+h "monerod version"
+docker exec monerod monerod --version 2>/dev/null | head -1 | fence
+
+h "Listening sockets — dashboard :8000 must be 127.0.0.1"
+{ PATH="/usr/sbin:/sbin:$PATH"; ss -tlnH 2>/dev/null | awk '{print $4}' | grep -E ':(8000|3333|18081|18083|37889|3344)$' | sort -u || echo "(ss not available)"; } | fence
+
+printf '\n---\n_See `README.md` in this directory for how to run the stack and the test harness._\n'
diff --git a/tests/inventory.sh b/tests/inventory.sh
new file mode 100755
index 0000000..d80fb86
--- /dev/null
+++ b/tests/inventory.sh
@@ -0,0 +1,155 @@
+#!/usr/bin/env bash
+#
+# Generate a coverage inventory across every test suite in the repo and print it as Markdown.
+# Source of truth for "what is covered" (issue #54). Regenerate with `make test-inventory`,
+# which writes docs/test-inventory.md — that file is generated, do not hand-edit it.
+#
+# Static (grep-based): no test run, no dependencies, deterministic — so it never drifts from
+# whether a daemon/server happens to be available.
+set -uo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT" || exit 1
+
+# Test functions (def test_… / async def test_…) in a python file, in source order.
+py_tests() {
+    grep -E '^[[:space:]]*(async[[:space:]]+)?def test_' "$1" 2>/dev/null \
+        | sed -E 's/^[[:space:]]*(async[[:space:]]+)?def (test_[A-Za-z0-9_]+).*/\2/'
+}
+# test('name' | test("name") cases in a node test file.
+node_tests() { grep -oE "test\((['\"])[^'\"]+\1" "$1" 2>/dev/null | sed -E "s/^test\(['\"]//; s/['\"]$//"; }
+# `== section ==` headers in a shell suite.
+sh_sections() { grep -oE '== [^=]+ ==' "$1" 2>/dev/null | sed -E 's/^== //; s/ ==$//'; }
+count() { grep -c . 2>/dev/null || true; }
+
+# Print "- name" bullets for each line on stdin.
+bullets() { sed 's/^/- /'; }
+
+# --- gather ---------------------------------------------------------------
+PY_DASH_FILES=$(find build/dashboard/tests -name 'test_*.py' | sort)
+PY_FAKE_FILE="tests/integration/fakes/test_contract.py"
+NODE_FILES=$(find build/dashboard/tests/frontend -name '*.test.mjs' 2>/dev/null | sort)
+
+n_py_dash=0
+for f in $PY_DASH_FILES; do n_py_dash=$((n_py_dash + $(py_tests "$f" | count))); done
+n_py_fake=$(py_tests "$PY_FAKE_FILE" | count)
+n_node=0
+for f in $NODE_FILES; do n_node=$((n_node + $(node_tests "$f" | count))); done
+n_stack=$(sh_sections tests/stack/run.sh | count)
+n_selftest=$(sh_sections tests/integration/selftest.sh | count)
+n_scen=$(awk -F'\t' 'NF>1{print $1}' <(sed -n '/scenario_matrix() {/,/^EOF/p' tests/integration/scenarios.sh | grep -E '\t') | count)
+n_axes=$(grep -cE '=' <(sed -n '/axis_coverage() {/,/^EOF/p' tests/integration/scenarios.sh | grep -E '^[a-z].*='))
+n_mini=$(grep -cE 'log "scenario [0-9]' tests/integration/mini-stack/run-mini-stack.sh)
+
+total=$((n_py_dash + n_py_fake + n_node + n_stack + n_selftest + n_scen + n_mini))
+
+# --- emit -----------------------------------------------------------------
+cat <<EOF
+# Test Inventory
+
+_Generated by \`make test-inventory\` ([\`tests/inventory.sh\`](../tests/inventory.sh)). **Do not
+edit by hand** — re-run the target to refresh. See [Testing Strategy](testing-strategy.md) for
+how the tiers fit together._
+
+**Totals:** ${n_py_dash} dashboard unit tests · ${n_py_fake} contract tests · ${n_node} frontend
+tests · ${n_stack} \`pithead\` shell sections · ${n_selftest} harness self-test sections ·
+${n_scen} live config scenarios (${n_axes} axis values) · ${n_mini} mini-stack scenarios.
+
+> Counts are **test functions / named cases** (parametrized pytest cases expand to more at
+> run time — e.g. the dashboard suite collects ~381). Generated statically by grep, so it's
+> stable regardless of what's installed.
+
+| Tier | Suite | Cases |
+|---|---|---|
+| 1 — Unit | dashboard pytest | ${n_py_dash} |
+| 1 — Unit | frontend (node --test) | ${n_node} |
+| 1 — Unit | \`pithead\` shell suite | ${n_stack} sections |
+| 1 — Unit | compose interpolation + hardening (#90) | 1 |
+| 2 — Contract | fake-daemon clients | ${n_py_fake} |
+| 3 — Mini-stack | docker control-plane scenarios | ${n_mini} |
+| 4 — Live matrix | config scenarios | ${n_scen} (${n_axes} axis values) |
+| 4 — Live matrix | harness self-test | ${n_selftest} sections |
+
+---
+
+## Tier 1 — Unit & component
+
+### Dashboard (pytest) — ${n_py_dash} tests
+EOF
+
+for f in $PY_DASH_FILES; do
+    n=$(py_tests "$f" | count)
+    printf '\n#### %s — %s\n' "${f#build/dashboard/}" "$n"
+    py_tests "$f" | bullets
+done
+
+cat <<EOF
+
+### Frontend logic (node --test) — ${n_node} tests
+EOF
+for f in $NODE_FILES; do node_tests "$f" | bullets; done
+
+cat <<EOF
+
+### \`pithead\` shell suite (tests/stack/run.sh) — ${n_stack} sections
+EOF
+sh_sections tests/stack/run.sh | bullets
+
+cat <<EOF
+
+### Compose validation + hardening (tests/stack/test_compose.sh)
+- docker-compose.yml \`\${VAR}\` interpolation resolves against a representative .env
+- #90 hardening invariants: no-new-privileges / cap_drop / read-only roots, credential-free
+  healthchecks, least-privilege Docker socket proxies, and the pinned \`pithead\` project name
+
+## Tier 2 — Contract (real clients vs controllable fakes)
+
+### tests/integration/fakes/test_contract.py — ${n_py_fake} tests
+EOF
+py_tests "$PY_FAKE_FILE" | bullets
+
+cat <<EOF
+
+## Tier 3 — Fake-daemon mini-stack (docker)
+
+### tests/integration/mini-stack/run-mini-stack.sh — ${n_mini} scenarios
+EOF
+grep -oE 'log "scenario [0-9]+: [^"]+"' tests/integration/mini-stack/run-mini-stack.sh \
+    | sed -E 's/^log "//; s/"$//' | bullets
+
+cat <<EOF
+
+## Tier 4 — Live config matrix (real synced server)
+
+### Config scenarios (tests/integration/scenarios.sh) — ${n_scen}
+EOF
+sed -n '/scenario_matrix() {/,/^EOF/p' tests/integration/scenarios.sh \
+    | grep -E $'\t' | awk -F'\t' '{print $1}' | bullets
+
+cat <<EOF
+
+### Axis coverage (every value exercised at least once) — ${n_axes}
+EOF
+sed -n '/axis_coverage() {/,/^EOF/p' tests/integration/scenarios.sh | grep -E '^[a-z].*=' | bullets
+
+cat <<EOF
+
+### Per-scenario assertions (tests/integration/run.sh)
+EOF
+grep -hoE '(assert_[a-z_]+|it_pass) "[^"]+"' tests/integration/run.sh \
+    | sed -E 's/^(assert_[a-z_]+|it_pass) "//; s/"$//' \
+    | grep -vE '^\$[A-Za-z_]+$' | sort -u | bullets
+
+cat <<EOF
+
+### Harness self-test (tests/integration/selftest.sh) — ${n_selftest} sections
+EOF
+sh_sections tests/integration/selftest.sh | bullets
+
+cat <<EOF
+
+---
+
+_Grand total: **${total}** enumerated cases/sections across the four tiers (plus the live
+lifecycle and fault-injection phases, which are exercised on a real server)._
+EOF
diff --git a/tests/stack/run.sh b/tests/stack/run.sh
index 88216ae..5fc4120 100755
--- a/tests/stack/run.sh
+++ b/tests/stack/run.sh
@@ -73,6 +73,16 @@ run_sourced "$SANDBOX" is_ipv4 "192.168.1.0/24" >/dev/null 2>&1; assert_rc "reje
 run_sourced "$SANDBOX" is_ipv4 "example.com"  >/dev/null 2>&1; assert_rc "rejects hostname"    "$?" "1"
 run_sourced "$SANDBOX" is_ipv4 ""             >/dev/null 2>&1; assert_rc "rejects empty"       "$?" "1"
 
+echo "== unit: resolve_dashboard_host (dashboard.host 'auto' revert, 247c5a0) =="
+# A configured dashboard.host is used verbatim.
+# shellcheck disable=SC1090,SC2034  # $STACK path is dynamic; DASHBOARD_HOST is read by the sourced function
+got="$( cd "$SANDBOX" && source "$STACK" 2>/dev/null; set +e; DASHBOARD_HOST='my.box.lan'; resolve_dashboard_host >/dev/null 2>&1; printf '%s' "$HOST_IP" )"
+assert_eq "configured dashboard.host is used" "$got" "my.box.lan"
+# 'auto' (no dashboard.host) on a non-interactive run must REVERT HOST_IP to the machine
+# hostname, not keep a stale prior value — the regression fixed in 247c5a0.
+# shellcheck disable=SC1090,SC2034
+got="$( cd "$SANDBOX" && source "$STACK" 2>/dev/null; set +e; DASHBOARD_HOST=''; HOST_IP='STALE'; resolve_dashboard_host >/dev/null 2>&1; printf '%s' "$HOST_IP" )"
+assert_eq "dashboard.host 'auto' reverts to hostname" "$got" "$(hostname)"
 echo "== unit: docker_boot_enabled (#137) =="
 # A systemctl stub on PATH; FAKE_BOOT picks which unit reports "enabled". Docker counts as
 # boot-enabled if EITHER docker.service or docker.socket is enabled.
diff --git a/tests/stack/test_compose.sh b/tests/stack/test_compose.sh
index 5001464..b61022a 100755
--- a/tests/stack/test_compose.sh
+++ b/tests/stack/test_compose.sh
@@ -106,6 +106,25 @@ expect_present "tecnativa socket-proxy pinned by digest" "tecnativa/docker-socke
 expect_present "caddy pinned by digest" "caddy:2.11@sha256:"
 expect_present "tari node pinned by digest" "minotari_node:v5.3.1-mainnet@sha256:"
 
+# Per-service precision checks via the JSON render (cleaner than grepping the flat YAML): the
+# Docker socket proxies must stay least-privilege, and the Tari probe must self-match safely.
+JSON="$(docker compose --env-file "$ENV_FILE" -f "$ROOT/docker-compose.yml" config --format json 2>/dev/null)"
+jq_assert() { # <label> <filter>
+    if printf '%s' "$JSON" | jq -e "$2" >/dev/null 2>&1; then echo "  ✓ $1"; else echo "  ✗ $1: failed [$2]"; fails=$((fails + 1)); fi
+}
+# The read proxy must never gain write (POST) access; the control proxy is start/stop ONLY.
+jq_assert "docker-proxy cannot POST (read-only API)" '(.services["docker-proxy"].environment.POST // "0") != "1"'
+jq_assert "docker-control is start/stop only (no exec/image ops)" \
+    '.services["docker-control"].environment | (.POST=="1" and .ALLOW_START=="1" and .ALLOW_STOP=="1" and ((.EXEC // "0") != "1") and ((.IMAGES // "0") != "1"))'
+# Both proxies mount the Docker socket read-only.
+jq_assert "docker socket mounted read-only in both proxies" \
+    '[.services["docker-proxy"], .services["docker-control"]] | all((.volumes // []) | any((.source == "/var/run/docker.sock") and (.read_only == true)))'
+# The Tari probe uses the [m] bracket so grep can't match its own argv (a false-healthy bug).
+jq_assert "tari healthcheck uses the [m]inotari self-match guard" \
+    '(.services.tari.healthcheck.test | tostring) | contains("[m]inotari")'
+# The Compose project name is pinned to "pithead" (not derived from the checkout directory).
+jq_assert "compose project name is pinned to pithead" '.name == "pithead"'
+
 if [ "$fails" -ne 0 ]; then
     echo "  ✗ $fails hardening check(s) failed"
     exit 1