diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 342475e..aef24a3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,6 +19,10 @@ jobs: - name: Run pytest with coverage gate working-directory: build/dashboard run: python -m pytest --cov=mining_dashboard --cov-report=term-missing --cov-fail-under=80 + - name: Fake-daemon contract test (real clients vs controllable fakes) + # Points the real Monero/Tari clients at the integration fakes and asserts they parse + # every state (synced/syncing/down). Docker-free, so it runs on every PR (issue #54). + run: PYTHONPATH=build/dashboard python -m pytest tests/integration/fakes -q frontend: name: Frontend logic tests (node --test) @@ -53,14 +57,23 @@ jobs: # the job when one is briefly out of sync — see issue #64. - name: Lint pithead and test scripts # Gate on warnings+errors (real issues); info-level style nits vary by shellcheck version. - run: shellcheck --severity=warning pithead tests/stack/run.sh tests/stack/test_compose.sh + run: shellcheck --severity=warning pithead tests/stack/run.sh tests/stack/test_compose.sh tests/inventory.sh tests/integration/*.sh tests/integration/mini-stack/*.sh - name: Run pithead test suite run: bash tests/stack/run.sh + - name: Run integration harness self-test + # Pure-logic checks for the tests/integration/ harness (config rendering, matrix + # coverage, redaction). The LIVE matrix (tests/integration/run.sh) needs a real test + # server and runs as a gated/manual release gate (#54), not on every PR. + run: bash tests/integration/selftest.sh + - name: Check the test inventory is up to date + # docs/test-inventory.md is generated from the suites; fail if a test was added/removed + # without regenerating it (run `make test-inventory`). + run: make test-inventory-check compose: - name: Compose config validation + name: Compose config + security hardening runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Validate docker-compose.yml interpolation + - name: Validate docker-compose.yml interpolation + hardening invariants (#90) run: bash tests/stack/test_compose.sh diff --git a/.github/workflows/integration-mini-stack.yml b/.github/workflows/integration-mini-stack.yml new file mode 100644 index 0000000..958a41c --- /dev/null +++ b/.github/workflows/integration-mini-stack.yml @@ -0,0 +1,27 @@ +name: Integration mini-stack + +# The fake-daemon docker mini-stack (issue #54, tier 3): brings up the REAL dashboard + +# docker-control proxy against controllable fake monerod/Tari and asserts the control plane +# (sync hold/release, node-down reject/readmit) end-to-end. It needs a Docker daemon, so it +# runs as its own job (not part of the always-on CI matrix), triggered on changes to the +# integration harness or the dashboard, and on demand. +on: + workflow_dispatch: + pull_request: + paths: + - "tests/integration/**" + - "build/dashboard/**" + - ".github/workflows/integration-mini-stack.yml" + +jobs: + mini-stack: + name: Fake-daemon mini-stack (docker) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + # ubuntu-latest ships Docker with the Compose v2 plugin — no setup needed. + - name: Run the fake-daemon mini-stack + run: bash tests/integration/mini-stack/run-mini-stack.sh + - name: Dump dashboard logs on failure + if: failure() + run: docker compose -f tests/integration/mini-stack/docker-compose.fake.yml logs --no-color || true diff --git a/.github/workflows/release-gate.yml b/.github/workflows/release-gate.yml new file mode 100644 index 0000000..421a2e7 --- /dev/null +++ b/.github/workflows/release-gate.yml @@ -0,0 +1,72 @@ +name: Release gate (self-hosted) + +# Tier-4 end-to-end validation against the REAL synced Monero + Tari nodes — the pre-release +# gate (#54). It runs on the dedicated, self-hosted release server (which holds real wallet / +# onion keys), so it MUST only ever run code we trust. +# +# SECURITY: there is deliberately NO `pull_request` trigger. A fork PR's code running on this +# runner could steal the box's keys or persist a backdoor (GitHub recommends against self-hosted +# runners on public repos for exactly this reason). The gate runs only on: +# - workflow_dispatch — a maintainer manually runs it on a ref they've reviewed, OR +# - push to main — post-merge, on trusted code. +# To end-to-end a specific fork PR, review it first, then dispatch this workflow on that ref. +# See docs/release-server.md. +on: + workflow_dispatch: + inputs: + stack_dir: + description: "Path to the deployed Pithead stack on the runner (absolute; default $HOME/code/pithead)" + required: false + default: "" + mode: + description: "check = non-destructive; matrix = full destructive config matrix (with a safety backup + auto-rollback)" + required: false + default: "check" + type: choice + options: [check, matrix] + push: + branches: [main] + +# Never run two gates against the one shared box at the same time. +concurrency: + group: release-gate + cancel-in-progress: false + +jobs: + release-gate: + name: Tier-4 live matrix (real nodes) + # Register the server with these labels: `pithead-release` scopes the gate to the dedicated + # box; prefer an ephemeral / just-in-time runner in its own runner group. + runs-on: [self-hosted, pithead-release] + steps: + - uses: actions/checkout@v4 + + - name: Validate against the real synced nodes + # Inputs go through env (not interpolated into the script) to avoid shell injection. + env: + STACK_DIR_INPUT: ${{ github.event.inputs.stack_dir }} + MODE_INPUT: ${{ github.event.inputs.mode }} + run: | + set -euo pipefail + DIR="${STACK_DIR_INPUT:-$HOME/code/pithead}" + MODE="${MODE_INPUT:-check}" + echo "Release gate: stack dir=$DIR, mode=$MODE" + + # Always assess fitness + the non-destructive live state first. + bash tests/integration/run.sh --local --dir "$DIR" --readiness + bash tests/integration/run.sh --local --dir "$DIR" --check + + # The full destructive config matrix is opt-in; --safety-backup rolls the box back if + # anything fails, so a red run leaves the server as it found it. + if [ "$MODE" = "matrix" ]; then + bash tests/integration/run.sh --local --dir "$DIR" --workers 2 --safety-backup --lifecycle + fi + + - name: Upload artifacts (redacted) + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-gate-results + path: tests/integration/results/ + if-no-files-found: ignore + retention-days: 14 diff --git a/.gitignore b/.gitignore index 3f43412..729558f 100644 --- a/.gitignore +++ b/.gitignore @@ -21,5 +21,8 @@ htmlcov/ *.egg-info/ .eggs/ +# Integration test artifacts (manifest, per-scenario logs, captured state) +/tests/integration/results/ + # OS .DS_Store \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d9c1de..2aae36f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,53 @@ per the process in [`docs/releasing.md`](docs/releasing.md). ### Added +- A four-tier test strategy for simulating every runtime situation (#54), documented in + `docs/testing-strategy.md` with a full scenario catalog: + - **Live config-matrix suite** (`tests/integration/`, tier 4) that drives a real, synced + server through the config matrix and asserts the stack behaves — containers healthy, nodes + synced, miners mining, dashboard reading correct live state, `status` exit codes, secrets + preserved. Runs over SSH or `--local`; the blocking pre-release gate. A `--fault-injection` + phase deliberately breaks monerod (stop / SIGSTOP / remove) to assert `pithead status`' + down/unhealthy/missing verdicts and the failover→recovery cycle. `make test-integration`. + - **Controllable fake monerod/Tari + a contract test** (`tests/integration/fakes/`, tier 2) + that points the real dashboard clients at the fakes and asserts they parse every state — + docker-free, runs on every PR. `make test-fakes`. + - **Fake-daemon docker mini-stack** (`tests/integration/mini-stack/`, tier 3) running the real + dashboard + docker-control proxy against the fakes, asserting sync hold/release and Tari + reject/readmit end-to-end with real containers (`make test-mini-stack`). Validated green + (11/11) on a real Docker host, and isolated (namespaced container names + non-colliding + ports) so it can run safely beside a live deployment. + - New dashboard unit tests for the required-Tari sync gate, the #35-latch × #31-failover + interaction, and simultaneous double outages. + - A generated **test inventory** (`docs/test-inventory.md`, `make test-inventory`) listing + every test/scenario across all suites, kept honest by a CI drift check. + - A non-destructive **`--check`** mode for the live harness (assert the box's current state — + no config change/apply/restore); the safe first run / ongoing health check. Validated with + a 22/22 green run against a real synced, mining box, which calibrated the harness to trust + monerod's own sync flag (a synced local node's dashboard sync panel reads "loading") and + `proxy_workers` for mining liveness (`stratum.conns` can read 0 while mining). + - A developer testing guide (`docs/testing-guide.md`): per-change recipes, conventions, and + the calibration gotchas learned on real hardware. + - Regression guards for past bugs/security fixes: extended the #90 hardening section of + `tests/stack/test_compose.sh` with per-service least-privilege checks for the Docker socket + proxies (the read proxy can't POST; the control proxy is start/stop-only; both mount the + socket read-only) and the Tari `[m]inotari` self-match guard — alongside the existing + no-new-privileges / cap_drop / credential-free-healthcheck assertions. Plus a + `dashboard.host` "auto"-revert test and the schema-migration test that caught the DB upgrade + bug above. + - Release/validation-server tooling: a `--readiness` mode for the live harness (non-destructive + assessment that a box is fit to be a release server — synced chains reusable, snapshot-capable + filesystem, disk headroom, secrets owner-only, dashboard localhost-only), a + `docs/release-server.md` guide (why end-to-end validation needs a dedicated server vs. what + GitHub Actions runs free on every PR, the hardening checklist, and the **safe** self-hosted- + runner setup), and a `release-gate.yml` workflow that runs the tier-4 matrix on a self-hosted + runner only on trusted code (manual dispatch / push to main — never on a fork PR). + - A `--safety-backup` rollback net for the live harness: takes a real `pithead backup` before + the destructive scenarios and automatically rolls the box back (down → restore → up) if + anything fails, removing the archive on success — so the destructive matrix can run on a + precious box. The `--lifecycle` phase also does a `backup` → `restore` round-trip (assert the + pool reverts and secrets survive), exercising both verbs end-to-end. + - `UPDATE_INTERVAL` is now env-configurable (lets the mini-stack loop fast in CI). - Dashboard header shows the host's **IP address** next to the hostname when the configured `dashboard.host` is a name, as `hostname @ ip` (e.g. `pithead.local @ 192.168.1.42`), so you can still reach the dashboard when the hostname doesn't resolve from your phone or another machine on the LAN. The @@ -62,6 +109,14 @@ per the process in [`docs/releasing.md`](docs/releasing.md). ### Changed +- The Compose **project name is now pinned to `pithead`** (`name:` in `docker-compose.yml`), so + the stack's images, network and volumes are prefixed `pithead*` regardless of the checkout + directory — instead of inheriting the directory's name (which left older checkouts named after + the repo's previous name). `pithead up`/`apply`/`upgrade` detect a stack still running under + the old, directory-derived project name and migrate it automatically (only that project's + containers are removed so the renamed project can take over — bind-mounted chain data and the + Tor onion keys are untouched). One-time after the rename, Caddy re-issues its local TLS cert + under the new project, so re-trust the dashboard cert if you'd installed the old one. - Hardened the leaf containers (caddy, xmrig-proxy, dashboard, docker-proxy, docker-control) with `no-new-privileges`. All except the dashboard also `cap_drop: [ALL]` (caddy keeps `NET_BIND_SERVICE` for `:80`/`:443`); the dashboard keeps its default capabilities because it @@ -88,6 +143,16 @@ per the process in [`docs/releasing.md`](docs/releasing.md). before resetting (without an `apply`) can no longer wipe a directory the stack never used. It also refuses to run rather than guess if `.env` doesn't name them (#139). +### Fixed + +- Dashboard pruned/full label (#32) always showed **Full** on local nodes: the dashboard parsed + `MONERO_PRUNE` with `== "true"`, but pithead writes it as `1`/`0`, so a pruned node read as + Full. Now accepts `1`/`true`/`yes`/`on`. Found by the live integration harness on a real box. +- Dashboard DB upgrade path: opening a database created by an early (pre-`timestamp`) schema + threw `no such column: timestamp` and aborted the migration, leaving the DB half-upgraded — + `_create_tables` built the `idx_ts` index on a column `_migrate_db` hadn't added yet. Indexes + are now created after migrations. Found by a new schema-migration intent test. + ### Security - The monerod RPC credentials are no longer interpolated into the compose healthcheck command diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b1501be..b801517 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -21,17 +21,26 @@ whole new feature, contributions are very welcome. This guide covers the workflo make test ``` - This runs everything CI does: + This runs everything CI does without a server or Docker: - - **lint** — `shellcheck` over `pithead` and the test scripts. Keep `pithead` - shellcheck-clean (no new warnings). - - **test-dashboard** — the dashboard `pytest` suite, which must stay at or above the - **80% coverage gate**. + - **lint** — `shellcheck` over `pithead` and the test scripts (keep them + `--severity=warning` clean). + - **test-dashboard** — the dashboard `pytest` suite (must stay ≥ the **80% coverage gate**). - **test-stack** — the `pithead` shell test suite. - **test-compose** — `docker-compose.yml` interpolation validation. - -4. Update the docs in [`docs/`](docs/) (and the README, if relevant) for any - user-facing change. + - **test-integration-selftest** — the integration harness's own pure logic. + - **test-fakes** — the tier-2 contract test (real dashboard clients vs controllable fakes). + - the **test-inventory drift check** — fails if a test was added/removed without + regenerating [`docs/test-inventory.md`](docs/test-inventory.md) (`make test-inventory`). + + Bigger, infra-dependent suites run separately: `make test-mini-stack` (tier-3 docker) and + `make test-integration` (tier-4 live, against a real box — start with `--check`). + +4. **Add or update tests** for your change — cover the *intent* (a behavior/contract), not just + the line. The [Testing Guide](docs/testing-guide.md) has per-change recipes; the + [Testing Strategy](docs/testing-strategy.md) explains the tiers. +5. Update the docs in [`docs/`](docs/) (and the README, if relevant) for any + user-facing change, and run `make test-inventory` if you touched the test suites. ## Opening a pull request diff --git a/Makefile b/Makefile index bfa8c12..91a51fb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Local test entry points (mirror the GitHub Actions CI jobs). -.PHONY: test test-dashboard test-stack test-compose lint +.PHONY: test test-dashboard test-stack test-compose test-integration test-integration-selftest test-fakes test-mini-stack lint -test: lint test-dashboard test-stack test-compose ## Run everything +test: lint test-dashboard test-stack test-compose test-integration-selftest test-fakes ## Run everything that doesn't need a server/docker test-dashboard: ## Dashboard unit/component tests with coverage gate cd build/dashboard && PYTHONPATH=. python3 -m pytest \ @@ -10,8 +10,33 @@ test-dashboard: ## Dashboard unit/component tests with coverage gate test-stack: ## pithead shell test suite bash tests/stack/run.sh -test-compose: ## Validate docker-compose.yml interpolation +test-compose: ## Validate docker-compose.yml interpolation + hardening invariants (#90) bash tests/stack/test_compose.sh +test-integration-selftest: ## Integration harness pure-logic self-test (no server needed) + bash tests/integration/selftest.sh + +test-fakes: ## Fake-daemon contract test — real dashboard clients vs controllable fakes (no docker) + PYTHONPATH=build/dashboard python3 -m pytest tests/integration/fakes -q + +test-mini-stack: ## Fake-daemon docker mini-stack end-to-end (needs docker; CI) + bash tests/integration/mini-stack/run-mini-stack.sh + +test-inventory: ## Regenerate the test coverage inventory (docs/test-inventory.md) + bash tests/inventory.sh > docs/test-inventory.md + +test-inventory-check: ## Fail if docs/test-inventory.md is stale (CI drift guard) + @bash tests/inventory.sh | diff -u docs/test-inventory.md - \ + && echo "test-inventory is up to date" \ + || { echo "docs/test-inventory.md is stale — run 'make test-inventory'"; exit 1; } + +# End-to-end matrix against a REAL test server (issue #54). Needs a provisioned box; pass +# connection + options through ARGS, e.g.: +# make test-integration ARGS="--host miner@10.0.0.5 --dir pithead --lifecycle" +# See docs/integration-testing.md. +test-integration: ## Run the live config-matrix integration suite (requires a test box; pass ARGS=...) + bash tests/integration/run.sh $(ARGS) + lint: ## shellcheck the stack scripts - shellcheck --severity=warning pithead tests/stack/run.sh tests/stack/test_compose.sh + shellcheck --severity=warning pithead tests/stack/run.sh tests/stack/test_compose.sh \ + tests/inventory.sh tests/integration/*.sh tests/integration/mini-stack/*.sh diff --git a/build/dashboard/mining_dashboard/config/config.py b/build/dashboard/mining_dashboard/config/config.py index 7aa4c3a..606d2fd 100644 --- a/build/dashboard/mining_dashboard/config/config.py +++ b/build/dashboard/mining_dashboard/config/config.py @@ -31,7 +31,12 @@ # XMRig Worker API Configuration XMRIG_API_PORT = 8080 API_TIMEOUT = 1 # Connection timeout (seconds) for worker API calls -UPDATE_INTERVAL = 30 # Frequency (seconds) of the main data aggregation loop +try: + # main data-loop period (s); lowered in integration tests. Tolerate a malformed override + # rather than crashing the dashboard at import. + UPDATE_INTERVAL = int(float(os.environ.get("UPDATE_INTERVAL", "30"))) +except (TypeError, ValueError): + UPDATE_INTERVAL = 30 # --- XvB Algorithm Constants --- # Duration of the donation switching cycle (10 minutes) @@ -127,7 +132,10 @@ # Whether the bundled monerod is configured to prune the blockchain (config.json # monero.prune → MONERO_PRUNE). Used to label the node Pruned/Full in the UI (Issue #32); # only meaningful for a local node (we don't control a remote node's pruning). -MONERO_PRUNE = os.environ.get("MONERO_PRUNE", "true").strip().lower() == "true" +# pithead renders this as 1/0 (the form monerod's CLI wants), so accept the numeric/boolean +# truthy forms — not just "true", which silently read pruned nodes as Full before (the +# pruned/full label is purely display, #32). +MONERO_PRUNE = os.environ.get("MONERO_PRUNE", "true").strip().lower() in ("true", "1", "yes", "on") # --- Tari Configuration --- # Connection details for the Tari Base Node and Block Explorer diff --git a/build/dashboard/mining_dashboard/service/storage_service.py b/build/dashboard/mining_dashboard/service/storage_service.py index 7853d72..2b361ef 100644 --- a/build/dashboard/mining_dashboard/service/storage_service.py +++ b/build/dashboard/mining_dashboard/service/storage_service.py @@ -63,6 +63,11 @@ def _init_db(self): with self._conn: self._create_tables() self._migrate_db() + # Indexes come AFTER migration: idx_ts is on history(timestamp), a column + # _migrate_db adds when upgrading a pre-timestamp DB. Creating it in + # _create_tables would throw "no such column: timestamp" on that old schema + # and abort the whole migration, leaving the DB half-upgraded. + self._create_indexes() except sqlite3.Error as e: self.logger.error(f"DB Init Error: {e}") @@ -72,6 +77,10 @@ def _create_tables(self): self._conn.execute("CREATE TABLE IF NOT EXISTS workers (name TEXT PRIMARY KEY, ip TEXT, last_seen REAL)") self._conn.execute("CREATE TABLE IF NOT EXISTS kv_store (key TEXT PRIMARY KEY, value TEXT)") self._conn.execute("CREATE TABLE IF NOT EXISTS shares (ts REAL PRIMARY KEY, difficulty REAL)") + + def _create_indexes(self): + """Creates indexes. Called after migrations so the indexed columns are guaranteed to + exist even on a database created by an older schema version.""" self._conn.execute("CREATE INDEX IF NOT EXISTS idx_ts ON history(timestamp)") self._conn.execute("CREATE INDEX IF NOT EXISTS idx_share_ts ON shares(ts)") diff --git a/build/dashboard/tests/config/test_config.py b/build/dashboard/tests/config/test_config.py index ef8153e..eef8715 100644 --- a/build/dashboard/tests/config/test_config.py +++ b/build/dashboard/tests/config/test_config.py @@ -27,6 +27,24 @@ def test_donation_level_env_override(self): cfg = _reload_config() assert cfg.XVB_DONATION_LEVEL == "auto" # normalized to lowercase + def test_monero_prune_accepts_truthy_forms(self): + # pithead writes MONERO_PRUNE=1, so "1" (and friends) must read as pruned — not just + # the literal "true". Regression for the Pruned/Full label always showing Full (#32). + for v in ("true", "1", "yes", "On", " 1 ", "TRUE"): + with patch.dict(os.environ, {"MONERO_PRUNE": v}): + assert _reload_config().MONERO_PRUNE is True, f"{v!r} should be pruned" + + def test_monero_prune_accepts_falsy_forms(self): + for v in ("false", "0", "no", "off", ""): + with patch.dict(os.environ, {"MONERO_PRUNE": v}): + assert _reload_config().MONERO_PRUNE is False, f"{v!r} should be full" + + def test_update_interval_tolerates_bad_values(self): + # A malformed override must fall back to the default, not crash the dashboard at import. + for v, expected in [("2", 2), ("2.5", 2), ("", 30), ("nonsense", 30)]: + with patch.dict(os.environ, {"UPDATE_INTERVAL": v}): + assert _reload_config().UPDATE_INTERVAL == expected, f"{v!r} -> {expected}" + def test_tier_config_env_override_valid(self): custom = {"donor_ultra": 5_000_000, "donor_basic": 500} # deploy injects the JSON wrapped in single quotes diff --git a/build/dashboard/tests/service/test_data_service.py b/build/dashboard/tests/service/test_data_service.py index 304c681..ee881d7 100644 --- a/build/dashboard/tests/service/test_data_service.py +++ b/build/dashboard/tests/service/test_data_service.py @@ -580,3 +580,73 @@ async def test_iteration_survives_collector_error(self): # The error is caught inside the loop; the sleep after it raises to stop us. with pytest.raises(StopAsyncIteration): await svc.run() + + +class TestControlPlaneComposition: + """Compositions of the sync-gate (#35) and failover (#31) the per-feature tests don't + cover on their own: the required-Tari hold, and the two features coexisting after release.""" + + async def test_run_holds_when_tari_required_and_only_monero_synced(self): + # Monero synced, Tari still syncing, Tari REQUIRED: the gate condition + # `monero_synced AND (tari_synced OR NOT TARI_REQUIRED)` is NOT satisfied, so the + # miner stays held until Tari also finishes — the mirror of the non-blocking case. + svc, sm, proxy = _make_service() + proxy.get_workers.return_value = {"workers": []} + svc._apply_worker_rejection = AsyncMock() + + worker_client = MagicMock() + worker_client.get_stats = AsyncMock(return_value={}) + tari_client = MagicMock() + tari_client.get_sync_status = AsyncMock( + return_value={"is_syncing": True, "reachable": True, "percent": 80, "current": 80, "target": 100}) + tari_client.close = AsyncMock() + + with patch.object(ds_mod, "ClientSession", _FakeClientSession), \ + patch.object(ds_mod, "XMRigWorkerClient", return_value=worker_client), \ + patch.object(ds_mod, "TariClient", return_value=tari_client), \ + patch.object(ds_mod, "SYNC_GATE_CONTAINERS", ["p2pool", "xmrig-proxy"]), \ + patch.object(ds_mod, "TARI_REQUIRED", True), \ + patch.object(ds_mod, "get_stratum_stats", return_value=({}, [])), \ + patch.object(ds_mod, "get_network_stats", return_value={"height": 100}), \ + patch.object(ds_mod, "get_tari_stats", return_value={"active": True, "status": "OK", "height": 3}), \ + patch.object(ds_mod, "get_p2pool_stats", return_value={"pool": {"last_share_time": 0, "difficulty": 0}}), \ + patch.object(ds_mod, "get_monero_sync_status", AsyncMock(return_value={"is_syncing": False, "reachable": True})), \ + patch.object(ds_mod, "get_disk_usage", return_value={}), \ + patch.object(ds_mod, "get_hugepages_status", return_value=("Enabled", "ok", "1/2")), \ + patch.object(ds_mod, "get_memory_usage", return_value={}), \ + patch.object(ds_mod, "get_load_average", return_value="0"), \ + patch.object(ds_mod, "get_cpu_usage", return_value="0%"), \ + patch("asyncio.sleep", AsyncMock(side_effect=StopAsyncIteration)): + with pytest.raises(StopAsyncIteration): + await svc.run() + + stopped = {c.args[0] for c in svc.docker_control.stop.await_args_list} + assert stopped == {"p2pool", "xmrig-proxy"} + svc.docker_control.start.assert_not_called() + assert svc.miner_released is False + assert svc.latest_data["miner_held"] is True + + async def test_post_release_blip_lets_failover_act_without_rehold(self): + # After release, a node-down event must NOT be re-held by the sync gate (the #35 + # one-way latch), yet #31 failover must still stop the proxy so workers fail over. + # The two coexist: gate no-ops, rejection acts on the proxy only. + svc, _sm, _proxy = _make_service() + svc.miner_released = True + with patch.object(ds_mod, "SYNC_GATE_CONTAINERS", ["p2pool", "xmrig-proxy"]), \ + patch.object(ds_mod, "REJECT_WORKERS_CONTAINER", "xmrig-proxy"), \ + patch.object(ds_mod, "TARI_REQUIRED", True): + await svc._apply_sync_gate(gate_satisfied=False) # latch → no-op + await svc._apply_worker_rejection(monero_down=True, tari_down=False) + stopped = [c.args[0] for c in svc.docker_control.stop.await_args_list] + assert stopped == ["xmrig-proxy"] # p2pool was NOT re-held + svc.docker_control.start.assert_not_called() + assert svc.workers_rejected is True + + async def test_both_nodes_down_rejects_once(self): + # A simultaneous Monero+Tari outage (both required) is a single rejection, not two. + svc, _sm, _proxy = _make_service() + with patch.object(ds_mod, "REJECT_WORKERS_CONTAINER", "xmrig-proxy"), \ + patch.object(ds_mod, "TARI_REQUIRED", True): + await svc._apply_worker_rejection(monero_down=True, tari_down=True) + svc.docker_control.stop.assert_awaited_once_with("xmrig-proxy") + assert svc.workers_rejected is True diff --git a/build/dashboard/tests/service/test_storage_service.py b/build/dashboard/tests/service/test_storage_service.py index 0c75cbb..9ee3996 100644 --- a/build/dashboard/tests/service/test_storage_service.py +++ b/build/dashboard/tests/service/test_storage_service.py @@ -1,9 +1,10 @@ +import sqlite3 import time import pytest from mining_dashboard.service.storage_service import StateManager -from mining_dashboard.config.config import TIER_DEFAULTS +from mining_dashboard.config.config import TIER_DEFAULTS, HISTORY_RETENTION_SEC, WORKER_RETENTION_SEC class TestDefaults: @@ -153,3 +154,81 @@ def test_corrupted_kv_value_skipped(self, tmp_path): sm.load() # must not raise assert sm.get_xvb_stats()["avg_1h"] == 0.0 # falls back to default sm.close() + + +class TestSchemaMigration: + """The upgrade path: opening a DB created by an older version must migrate in place + without losing data. These exercise branches a fresh DB never hits.""" + + def test_history_timestamp_backfilled_from_iso_on_upgrade(self, tmp_path): + # Intent: a pre-timestamp history table (only the original t/v columns) must gain the + # v_p2pool/v_xvb/timestamp columns AND have timestamp backfilled from the ISO `t` + # string — otherwise old points become undatable and drop out of the chart/retention. + db = str(tmp_path / "old_schema.db") + # Recent UTC ISO strings (SQLite's strftime('%s', t) treats t as UTC) so the migrated + # rows fall inside load()'s 30-day retention window and aren't filtered out. + now = time.time() + t1 = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(now - 7200)) # 2h ago + t2 = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(now - 3600)) # 1h ago + conn = sqlite3.connect(db) + conn.execute("CREATE TABLE history (t TEXT, v REAL)") # the original schema + conn.execute("INSERT INTO history (t, v) VALUES (?, ?)", (t1, 1000.0)) + conn.execute("INSERT INTO history (t, v) VALUES (?, ?)", (t2, 1100.0)) + conn.commit() + conn.close() + + sm = StateManager(db_path=db) # __init__ runs _create_tables (no-op) + _migrate_db + try: + hist = sm.get_history() + assert len(hist) == 2 + assert all(h["timestamp"] > 0 for h in hist), "timestamp backfilled from ISO t" + # ordering preserved: the earlier ISO time sorts first (load() orders by timestamp) + assert hist[0]["timestamp"] < hist[1]["timestamp"] + # the new split-rate columns default to 0, not NULL + assert hist[0]["v_p2pool"] == 0 and hist[0]["v_xvb"] == 0 + finally: + sm.close() + + +class TestRetention: + """Long-running behavior: history/workers must not grow unbounded. Tests are white-box + (they backdate timestamps) so they don't need to actually wait days.""" + + def test_history_older_than_retention_pruned_from_memory(self, state_manager): + # Intent: appending a fresh sample drops in-memory points older than the 30-day window + # (the popleft loop), so the deque can't grow without bound on a long-running dashboard. + state_manager.state["hashrate_history"].append({ + "t": "old", "v": 1.0, "v_p2pool": 0, "v_xvb": 0, + "timestamp": time.time() - HISTORY_RETENTION_SEC - 3600, # 30d + 1h ago + }) + assert len(state_manager.get_history()) == 1 + state_manager.update_history(2000.0) # a fresh sample at "now" + hist = state_manager.get_history() + assert len(hist) == 1 and hist[0]["v"] == 2000.0 # the ancient point was pruned + + def test_old_history_pruned_from_db_when_cleanup_fires(self, state_manager, monkeypatch): + # Intent: the probabilistic DB cleanup actually deletes expired rows when it fires, so + # the on-disk DB stays bounded. We force the 5% path deterministically. + old_ts = time.time() - HISTORY_RETENTION_SEC - 10 * 24 * 3600 # 40 days ago + with state_manager._db_lock: + state_manager._conn.execute( + "INSERT INTO history (t, v, v_p2pool, v_xvb, timestamp) VALUES (?,?,?,?,?)", + ("old", 1.0, 0, 0, old_ts)) + state_manager._conn.commit() + monkeypatch.setattr("mining_dashboard.service.storage_service.random.random", lambda: 0.0) + state_manager.update_history(2000.0) + with state_manager._db_lock: + remaining = state_manager._conn.execute( + "SELECT COUNT(*) FROM history WHERE timestamp < ?", + (time.time() - HISTORY_RETENTION_SEC,)).fetchone()[0] + assert remaining == 0, "expired DB rows are pruned" + + def test_stale_workers_pruned_after_retention_window(self, state_manager): + # Intent: a worker not seen within WORKER_RETENTION_SEC (7d) is dropped when any worker + # next checks in — so stale name→IP mappings don't linger and leak memory. + state_manager.update_known_workers([{"name": "rig1", "ip": "10.0.0.1"}]) + # Backdate rig1 so it's now older than the retention window. + state_manager.state["known_workers"]["rig1"]["last_seen"] = time.time() - WORKER_RETENTION_SEC - 3600 + state_manager.update_known_workers([{"name": "rig2", "ip": "10.0.0.2"}]) # a fresh check-in + names = {w["name"] for w in state_manager.get_known_workers()} + assert "rig2" in names and "rig1" not in names diff --git a/docker-compose.yml b/docker-compose.yml index 782fb42..aaeab9d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,3 +1,9 @@ +# Pin the Compose project name so the stack is always "pithead" — its images, network and +# volumes are prefixed `pithead*` regardless of the checkout directory's name. Without this, +# Compose derives the project from the directory, which left older checkouts named after the +# repo's previous name. `pithead up`/`apply`/`upgrade` migrate an old-named stack automatically. +name: pithead + x-logging: &default-logging driver: "json-file" options: diff --git a/docs/README.md b/docs/README.md index b30dd61..718aa24 100644 --- a/docs/README.md +++ b/docs/README.md @@ -17,7 +17,12 @@ deeper on individual topics once you're up and running. | [Connecting Miners](workers.md) | Pointing any existing rig at the stack, plus [RigForge](https://github.com/p2pool-starter-stack/rigforge) for setting up new miners. | | [Architecture](architecture.md) | The nine services, how they fit together, the privacy model, and the algorithmic XvB switching engine. | | [Operations & Maintenance](operations.md) | The full `pithead` command reference, upgrades, backups, and troubleshooting. | +| [Testing Strategy](testing-strategy.md) | The four test tiers (unit → contract → fake-daemon mini-stack → live matrix), the full scenario catalog, and which tier proves each situation. | +| [Testing Guide](testing-guide.md) | For developers: how to write and run tests, per-change recipes, conventions, and real-hardware gotchas. | +| [Test Inventory](test-inventory.md) | Generated, exhaustive list of every test/scenario across all suites — the inventory of what's covered. | +| [Integration Testing](integration-testing.md) | The end-to-end config-matrix suite that validates the stack against real Monero + Tari nodes — the blocking pre-release gate. | | [Releasing](releasing.md) | How Pithead is versioned and released — one product, one version, the `VERSION` source of truth, and the GHCR stage→promote pipeline. | +| [Release / Validation Server](release-server.md) | Why end-to-end validation needs a dedicated server (and what GitHub Actions does free on every PR), how to provision and **harden** it, and the safe self-hosted-runner setup. | | [FAQ](faq.md) | Common questions, plus why Pithead vs. doing it yourself or Gupax. | ## Quick links diff --git a/docs/integration-testing.md b/docs/integration-testing.md new file mode 100644 index 0000000..05d7c73 --- /dev/null +++ b/docs/integration-testing.md @@ -0,0 +1,222 @@ +# Integration Testing + +How Pithead is validated end-to-end against a **real Ubuntu server** running full Monero and +full Tari nodes — the runtime/integration half of our testing, and the **blocking pre-release +gate** described in [Releasing](releasing.md) (issue +[#54](https://github.com/p2pool-starter-stack/pithead/issues/54)). + +Our other suites are client-side and never touch a daemon: the `pithead` shell tests stub out +`docker`/`sudo`, the compose test only checks `docker compose config` interpolation, and the +dashboard pytest mocks its clients. They prove the *code* is correct; they can't prove that a +real `apply → sync-gate → mine → status` flow works on a real host. That's what this suite is +for. + +> This live matrix is **tier 4** of a four-tier plan. The runtime *situations* a healthy box +> can't show (cold sync, node-down, unhealthy containers, XvB tiers) are simulated more cheaply +> at lower tiers — unit tests, a client **contract test** against controllable fakes +> ([`tests/integration/fakes/`](../tests/integration/fakes/)), and a **fake-daemon docker +> mini-stack** ([`tests/integration/mini-stack/`](../tests/integration/mini-stack/)). See +> [Testing Strategy](testing-strategy.md) for the full picture and scenario catalog. + +The lives under [`tests/integration/`](../tests/integration/): + +| File | Role | +|---|---| +| `run.sh` | Entry point. Connects to the box (SSH or `--local`), iterates the config matrix, asserts, captures artifacts, restores. | +| `scenarios.sh` | The **declarative config matrix** — adding a case is a one-line data edit. | +| `lib.sh` | Shared helpers: target I/O (SSH/local), assertions, readiness waiters, config rendering, secret redaction. | +| `selftest.sh` | Pure-logic self-test (no server). Runs in CI on every PR. | + +--- + +## How it works + +The suite assumes the box is **already deployed and synced with miners connected** — the whole +point of a dedicated test server is that the full Monero and Tari nodes are synced once and +*reused*, so each scenario runs in minutes instead of waiting days for a chain sync. + +Given that, the harness moves between matrix scenarios with non-interactive **`pithead apply +-y`**, which: + +- recreates only the containers whose resolved config changed, +- **reuses the synced chain data dirs** (it never re-syncs, never re-provisions Tor), and +- **preserves secrets** (`PROXY_AUTH_TOKEN`, onion addresses). + +For each scenario it writes a `config.json`, applies it, **waits on real readiness signals** +(container health, `pithead status`, dashboard sync %, miner-released) with timeouts — never a +fixed `sleep` — then runs the assertion battery below. All reads happen *on the box* +(`pithead status`/`doctor` and `curl http://127.0.0.1:8000/api/state`), so SSH and `--local` +behave identically and we never depend on resolving the box's dashboard hostname. + +Before the first scenario it snapshots the box's original `config.json` and a fingerprint of +its secrets; after the run it **restores the original config** and re-applies (unless +`--keep`). + +### Safety model + +The test box holds real synced nodes and real keys — treat it as production-sensitive. + +- **Never mutates the canonical chains.** The harness only ever writes `config.json` and lets + `apply` recreate containers. It does not `rm -rf` data dirs. The destructive `monero.prune` + axis (a pruned vs. full DB are different on disk) is only exercised against a *separate* + synced data dir you pass with `--pruned-data-dir` / `--full-data-dir`; without it the case + is reported **SKIPPED**, never run against the canonical DB. +- **No silent coverage drops.** Any scenario whose prerequisite is missing (an alt data dir, a + remote endpoint) is logged as `SKIPPED` with the reason — it never quietly disappears. +- **Secrets hygiene.** RPC creds, the proxy token, and onion addresses are never printed. + Secret-preservation is checked by hashing them **on the box** (`sha256sum`) and comparing the + hash — the plaintext never crosses the wire. All captured artifacts are passed through a + redactor. +- **Continue-on-error.** A failing assertion doesn't abort the run; the whole matrix is + collected and summarized, with per-scenario artifacts for the failures. + +--- + +## Provisioning the test box + +A one-time setup. Target the Ubuntu LTS releases we support (22.04 / 24.04). + +1. **Install and deploy Pithead** normally (see [Getting Started](getting-started.md)) and let + it fully sync. You want the box in the steady state: all containers healthy, Monero + Tari + synced, and at least one miner (ideally two) connected and submitting shares. +2. **Reusable synced data.** The synced `monero.data_dir` and `tari.data_dir` are the key + enabler — they're reused across every scenario. The same synced full monerod is also what + the `remote` scenario points at as an external node (see `--remote-monero-host`). +3. **Tools on the box:** `jq`, `curl`, `docker` (with compose v2), and `sha256sum`. The first + three are already Pithead prerequisites; `sha256sum` ships with coreutils. +4. **Access.** Key-based SSH from wherever you run the suite (or run it on the box with + `--local`). If Docker needs root there, use `--pithead "sudo ./pithead"`. +5. *(Optional)* A second synced data dir for the **opposite** prune mode if you want to cover + both pruned and full in one run — see the prune axis above. + +> **Runner security.** Keep the box least-privilege and network-isolated; it holds real keys. +> This is a self-hosted/manual gate, not something we run on public CI. + +--- + +## Running it + +```bash +# Non-destructive health check first (recommended): no config changes, no apply +tests/integration/run.sh --host miner@10.0.0.5 --dir pithead --check + +# Whole matrix over SSH +make test-integration ARGS="--host miner@10.0.0.5 --dir pithead" + +# …or directly +tests/integration/run.sh --host miner@10.0.0.5 --dir pithead + +# On the box itself, plus the lifecycle + node-down failover phase +tests/integration/run.sh --local --dir /home/miner/pithead --lifecycle + +# A single scenario (see --list for names) +tests/integration/run.sh --host miner@10.0.0.5 --scenario remote-main-secure-tari \ + --remote-monero-host 10.0.0.5:18081 + +# Cover the OPPOSITE prune mode. The box mines one mode against its live chain; the other is +# skipped unless you supply a chain for it (it's otherwise covered by the fake mini-stack). A +# pruned box supplies a full chain; a full box supplies a pruned one (build one with +# tests/integration/build-pruned-chain.sh). See docs/release-server.md → prune-axis recipe. +tests/integration/run.sh --host miner@10.0.0.5 --full-data-dir /srv/monero-full +``` + +Useful flags (full list in `run.sh --help`): + +| Flag | Purpose | +|---|---| +| `--host ` / `--local` | Drive the box over SSH, or a stack on this machine. | +| `--dir ` | The Pithead stack directory **on the box** — relative to the SSH login dir or absolute (default `pithead`). Avoid a literal `~`; your local shell expands it before the box sees it. | +| `--pithead ` | How to invoke pithead there (e.g. `"sudo ./pithead"`). | +| `--check` | **Non-destructive**: assert the box's current live state only — no config change, no apply, no restore. The safe first run / ongoing health check. | +| `--readiness` | **Non-destructive**: assess whether the box is fit to be a release/validation server (synced chains reusable, snapshot-capable FS, disk headroom, secrets owner-only, dashboard localhost-only). See [Release Server](release-server.md). | +| `--scenario ` | Run just one scenario. | +| `--workers ` | Miners expected online while mining (default `2`). | +| `--remote-monero-host ` | External node endpoint for the `remote` scenario. | +| `--pruned-data-dir` / `--full-data-dir` | Synced alt DB to enable the opposite prune mode. | +| `--lifecycle` | Also run the lifecycle phase (restart, apply secret-preservation). | +| `--fault-injection` | Also break monerod (stop / SIGSTOP / remove) and assert `status`' down/unhealthy/missing verdicts and the failover→recovery cycle. Destructive-then-restored; local mode only; slow. | +| `--safety-backup` | Take a `pithead backup` before the destructive scenarios and **auto-roll-back** (down → restore → up) if anything fails; the archive is removed on success. Recommended for the destructive matrix on a precious box; also exercises backup/restore end-to-end. | +| `--keep` | Don't restore the original config (leave the box on the last scenario). | +| `--out ` | Where to write the manifest and failure artifacts. | +| `--list` | Print the matrix and axis coverage and exit. | + +The runner exits non-zero if any assertion failed. + +--- + +## The config matrix + +Every axis below changes a real runtime path. The matrix covers the realistic combinations and +guarantees **every value of every axis is exercised at least once** (the `selftest` enforces +this, and `--list` prints it). + +| Axis | Values | What it exercises | +|---|---|---| +| `monero.mode` | `local` / `remote` | profile gating, RPC wiring, `status` ignoring monerod in remote mode | +| `monero.prune` | `true` (pruned) / `false` (full) | pruned vs. full display ([#32](https://github.com/p2pool-starter-stack/pithead/issues/32)), DB size | +| `monero.rpc_lan_access` | `false` (127.0.0.1) / `true` (LAN) | RPC bind address, security posture | +| `p2pool.pool` | `main` / `mini` / `nano` | `P2POOL_FLAGS`, sidechain selection | +| `xvb.enabled` | `true` / `false` | XvB tunnel/donor wiring | +| `dashboard.secure` | `true` (Caddy TLS) / `false` | Caddy config / scheme | +| `dashboard.tari_required` | `true` (blocking) / `false` | sync-gate behavior ([#35](https://github.com/p2pool-starter-stack/pithead/issues/35)/[#51](https://github.com/p2pool-starter-stack/pithead/issues/51)) | + +### What each scenario asserts + +- **Expected containers up, unexpected absent** — every service for that config is running and + healthy; in `remote` mode there is **no** `monerod`. +- **`pithead status` exit code** — `0` for a healthy config. +- **Dashboard reads live state** — `/api/state` is reachable; Monero is synced (`done`); + pruned/full display matches `monero.prune` ([#32](https://github.com/p2pool-starter-stack/pithead/issues/32)); the sidechain `pool.type` matches `p2pool.pool`. +- **End-to-end mining** — workers are online (`proxy_workers >= --workers`), stratum has + connections, and total hashes are accumulating ([#28](https://github.com/p2pool-starter-stack/pithead/issues/28)). +- **Posture propagated** — `MONERO_RPC_BIND`, `DASHBOARD_SECURE`, `XVB_ENABLED`, and + `TARI_REQUIRED` in `.env` match the config; the Caddyfile uses the right scheme. +- **Idempotency** — a second `apply -y` with no change is a clean no-op. +- **Secrets preserved** — the proxy token and onion addresses are unchanged across every apply. + +### Lifecycle + failover (`--lifecycle`) + +For one representative config: + +- `restart` brings the stack back healthy (`status` → `0`). +- An `apply` that changes the sidechain recreates only the affected containers and + **preserves secrets**; the dashboard reflects the new pool; then it's reverted. +- **Node-down failover ([#31](https://github.com/p2pool-starter-stack/pithead/issues/31)):** + stop `monerod` → `status` returns non-zero (node down) and the dashboard rejects workers + (stops `xmrig-proxy`) → start `monerod` → workers readmitted → `status` → `0`. + +> `upgrade` (which rebuilds/pulls images) is intentionally **not** run unattended — it's slow +> and changes the bundle under test. Validate it as part of the [release](releasing.md) +> staging smoke test instead. + +--- + +## Artifacts & triage + +Each run writes a **manifest** (`results/manifest.txt`) recording exactly what was under test +— the stack `VERSION`, git revision, and `docker compose images` — so a run is reproducible. + +On a scenario failure, the harness captures (redacted) to `results//`: +`compose-ps.txt`, `status.txt`, `doctor.txt`, `config.json`, `env.redacted.txt`, +`api-state.json`, and `logs.txt` (last 200 lines per service). The end-of-run summary lists +each failed assertion and points at these. + +--- + +## The self-test (CI) + +`tests/integration/selftest.sh` exercises the harness's pure logic — config rendering and +value typing, expectation derivation (profile gating), secret redaction, the SSH/local exec +wrapper, JSON parsing, and **matrix axis coverage** — with no server. It runs in CI on every +PR (the `shell` job) and via `make test-integration-selftest`, so the harness itself is held to +the same lint/test standard as the rest of the stack. + +--- + +## Release gate (#44) + +The live matrix is the **required, blocking pre-release gate**: a release is not promoted or +published unless it's green against the real Monero + Tari nodes. It's surfaced as `make +test-integration` and wired into the `make release` pipeline's test gate — see +[Releasing › Pre-release gate](releasing.md#pre-release-gate-54). The version tagged/published +is the exact bundle this run validated. diff --git a/docs/release-server.md b/docs/release-server.md new file mode 100644 index 0000000..69c966f --- /dev/null +++ b/docs/release-server.md @@ -0,0 +1,210 @@ +# The Release / Validation Server + +How we validate a build **end-to-end before release**, why that needs a dedicated server, what +GitHub Actions does for free on every PR, and how to harden the server so it can't become a +liability. This is the operational companion to [Releasing](releasing.md) (the version/promote +pipeline) and [Integration Testing](integration-testing.md) (the harness it runs). + +## Can GitHub Actions do the full end-to-end? (short answer: no — and that's fine) + +**GitHub-hosted runners can't do the real-chain tier.** On a public repo the hosted Ubuntu +runners are generous and **free** (4 vCPU / 16 GiB RAM), but they are **ephemeral** — a fresh VM +per job, ~14 GiB of free disk, and a 6-hour job ceiling. A Monero chain is ~95 GiB pruned / +~270 GiB full and takes **days** to sync; Tari adds ~50 GiB. There is nowhere to keep that +synced state between runs, and no time to sync it inside a job. So the **real-daemon, real +merge-mining tier (tier 4) is simply not possible on hosted runners** — which is the whole +reason a dedicated, already-synced server exists ([#54](https://github.com/p2pool-starter-stack/pithead/issues/54)). + +**But GitHub already runs almost everything else, free, on every PR.** Tiers 1–3 of the +[testing strategy](testing-strategy.md) need no real chain and run on the hosted runners in +minutes: + +- **Tier 1 — unit/component** (dashboard pytest + coverage gate, frontend, the `pithead` shell + suite, compose interpolation **and the #90 security/hardening invariants**). +- **Tier 2 — contract** (the real Monero/Tari clients vs. controllable fakes). +- **Tier 3 — the fake-daemon mini-stack** (the **real** dashboard + docker-control proxy driven + against fake daemons, with **real Docker** on the hosted runner) — this proves the control + plane end-to-end (sync hold/release, reject/readmit) on every PR. + +So the split is clean: + +| | Runs | Cost | Triggered | +|---|---|---|---| +| **Tiers 1–3** (logic, wiring, control plane, hardening) | GitHub-hosted runners | free (public repo) | **every PR** — the merge gate | +| **Tier 4** (real synced Monero+Tari, real merge-mining, prune/full DB, TLS/Tor, the config matrix, the staging smoke test) | the **dedicated server** | your hardware | pre-release / on-demand — the **release gate** | + +The hosted runners catch the vast majority of regressions before merge; the dedicated server +proves the things only reality can — and it's the **blocking pre-release gate**. + +## Validating PRs on the dedicated server — possible, but security-loaded + +You *can* register the server as a GitHub Actions **self-hosted runner** so Actions dispatches +the tier-4 job to it (self-hosted minutes don't count against anything — also free). But there +is a sharp edge, and it's the single most important thing on this page: + +> **GitHub explicitly recommends against self-hosted runners on public repositories.** Any user +> can open a pull request, and a malicious PR can run **arbitrary code on the runner**. Our +> server holds real **wallet payout addresses, Tor onion private keys, and RPC credentials**, so +> a compromised runner is a key-theft / persistent-backdoor event, not a flaky build. + +The safe rule: **the keyed server only ever runs code we trust.** Concretely: + +- **Do NOT trigger tier-4 on `pull_request`** (and never on a fork PR). "Require approval" only + gates *starting* the run — once it starts, the PR's code still executes on the box. +- **Trigger tier-4 only on trusted code:** `workflow_dispatch` (a maintainer manually runs it on + a ref they've reviewed) and/or `push` to `main` (post-merge). To E2E a specific fork PR, a + maintainer reviews it first, then dispatches the workflow on that ref. +- Register the runner as **ephemeral / just-in-time** (one job, then auto-removed) in its own + **runner group**, isolated from any private repos. +- Keep the runner **least-privilege**: a dedicated unprivileged user, the box runs nothing else + sensitive, and ideally the runner can reach the stack only through `pithead`/`docker`, not the + raw key files. + +This is exactly how the workflow ships: +[`.github/workflows/release-gate.yml`](../.github/workflows/release-gate.yml) runs **only** on +`workflow_dispatch` (and `push` to `main`) on a `[self-hosted, pithead-release]` runner — never +automatically on a PR. + +## Provisioning the server + +Target an LTS Ubuntu (22.04 / 24.04). One-time: + +1. **Install Pithead and let it fully sync** ([Getting Started](getting-started.md)) — full + Monero + full Tari, all containers healthy, a worker (ideally two) mining. The synced + `monero.data_dir` / `tari.data_dir` are the asset the harness reuses. +2. **Keep the active chain on fast storage (SSD/NVMe).** monerod is random-I/O heavy, so the + chain it runs against must not sit on a spinning HDD — that alone makes every scenario crawl. + A snapshot/reflink-capable filesystem (**btrfs**/**zfs**/**xfs reflink**) is a *bonus*: it lets + the harness snapshot/restore a chain cheaply for the prune axis. But it's optional — on plain + ext4-on-SSD the matrix only edits `config.json` and reuses one chain, with `--safety-backup` + isolating destructive runs. See the recipe below for the prune-axis details. +3. **Disk headroom** — enough for the chains plus a snapshot / second DB (budget ≥ ~150 GiB + free beyond the live chains). +4. **Tools** — `jq`, `curl`, `docker` (compose v2), `sha256sum`, `git`, `tar`. + +Check the box is fit at any time, **non-destructively**: + +```bash +tests/integration/run.sh --host you@server --dir pithead --readiness +``` + +It asserts: chains synced (reusable), the prune axis is exercisable (the live chain FS is +snapshot-capable **or** a pre-built variant chain is supplied), disk headroom, `.env` is +owner-only, the dashboard is bound to localhost, and the backup/rollback net is usable. + +### Recipe: prune-axis coverage, and the storage that actually matters + +**Put the active chain on fast storage.** The biggest factor is the *disk*, not the filesystem: +monerod does heavy random LMDB I/O, so a chain on a 7200 rpm HDD makes every scenario crawl. +Check what you have before placing chains: + +```bash +lsblk -d -o NAME,ROTA,SIZE,MODEL # ROTA=0 is SSD/NVMe, ROTA=1 is a spinning HDD +``` + +Keep the chain monerod runs against on an **SSD/NVMe**. A spare **HDD** is fine for cold backups +and `pithead backup` archives — but *not* for an active test chain. + +**A CoW filesystem (btrfs/zfs/xfs-reflink) is a bonus, not a requirement.** On a CoW volume the +harness can snapshot/restore a chain cheaply for per-scenario isolation — but only if it's on +fast storage. A loopback btrfs on a spare HDD gives you CoW semantics at HDD speed, which is the +wrong trade for an *active* chain. If your root FS is ext4 on an SSD (the common case) you don't +need CoW at all: the matrix only edits `config.json` and reuses one chain, and `--safety-backup` +(a `pithead backup` + auto-rollback) isolates the destructive scenarios. + +**Covering both prune modes.** The box mines one mode (its real config). The harness exercises +that mode against the live chain and **skips** the other unless you supply a chain for it +(`--full-data-dir` / `--pruned-data-dir`). You usually don't need to: the opposite mode is +covered by the fake mini-stack ([integration-testing](integration-testing.md)) plus the +compose/config tests, which need no real chain. Supply the opposite-mode chain only to exercise +it end-to-end — and build it on fast storage: + +- **Pruned chain next to a full one?** [`build-pruned-chain.sh`](../tests/integration/build-pruned-chain.sh) + copies the LMDB consistently (brief monerod stop, then immediate restart) and prunes the *copy*, + leaving the canonical chain untouched. Fetch `monero-blockchain-prune` at the **same version** + as the running monerod and verify it against the hash the image pins (`build/monero/Dockerfile` + → `MONERO_VERSION` / `MONERO_HASH`). +- **Full chain?** Pruning is irreversible, so a full chain means a fresh full sync + (`MONERO_PRUNE=0`, ~1–3 days) — rarely worth it just for test coverage. + +`gouda` (the reference box) is a **pruned** node on NVMe: it validates pruned mode live with +`--safety-backup`, and full mode comes from the fakes. `--readiness` reports exactly this: + +```bash +tests/integration/run.sh --host you@server --dir pithead --readiness +``` + +> **Gotcha — a pruned chain's file stays large.** An in-place prune does *not* shrink the LMDB +> file: it stays at the full-chain high-water mark (~250 GiB) with the freed space sitting as +> internal free pages (Monero reuses them as the chain grows). To actually reclaim it you must +> rewrite the DB with `monero-blockchain-prune --copy-pruned-database` (see +> [`compact-chain.sh`](../tests/integration/compact-chain.sh)) — slow (it copies every block over +> hours), though it reads through a snapshot so monerod keeps mining; you then swap the compact +> copy in during a ~2 min window. The generic `mdb_copy -c` does **not** work: Monero ships a +> patched LMDB and stock mdb_copy rejects the format (`MDB_VERSION_MISMATCH`). Often it's simplest +> to leave the free pages. + +## Hardening checklist (the pitfalls) + +Treat the box as **production-sensitive** — it holds keys *and* it's the thing that signs off +releases. + +- **Secrets.** `.env` (RPC creds), `config.json` (wallet addresses), and the Tor data dir + (onion private keys) must be **owner-only** (`chmod 600 .env`; the `--readiness` check verifies + this). Never print secrets in logs; the harness hashes them on the box and redacts artifacts. + If the box also *publishes* releases, the GHCR token lives in the environment / a secret store, + never in the repo. +- **Network.** Firewall to least exposure: inbound **SSH** (key-only, no root login, fail2ban) + and the **stratum** port scoped to the LAN ([workers › firewall](workers.md#firewall)); the + **dashboard stays on localhost behind Caddy** and the **monerod RPC on localhost** (both + asserted by `--readiness`). Nothing else should be reachable from the internet. +- **Untrusted code.** The runner only runs trusted code (see above). Prefer ephemeral/JIT + runners; don't share the runner with private repos. +- **Least privilege.** A dedicated unprivileged user; the stack already runs least-privilege + containers (`no-new-privileges`, `cap_drop`, read-only roots, scoped Docker socket proxies — + regression-guarded in `tests/stack/test_compose.sh`). +- **Reproducible, clean baseline.** The matrix reuses the synced chains and never mutates the + canonical copies (config-only changes, snapshot/restore for the prune axis), restores the + original `config.json` at the end, and `--safety-backup` takes a `pithead backup` first and + **rolls the box back** (down → restore → up) if anything fails. +- **Build isolation & integrity.** Build images in containers with pinned upstream versions and + SHA256-verified binaries (the stack already does this); promote releases **by digest** so the + published bundle is bit-for-bit what was validated ([Releasing](releasing.md)). + +## How a release is validated end-to-end + +1. **Every PR** → GitHub-hosted runners run tiers 1–3 (the merge gate). Cheap, free, fast. +2. **Pre-release (or on-demand for a reviewed PR)** → a maintainer dispatches the release-gate + workflow on the dedicated server: `make test` (tiers 1–2 on the trusted box) **+** the tier-4 + live matrix against the real synced nodes (`run.sh --safety-backup`), then — per + [Releasing](releasing.md) — the staging smoke test (pull the GHCR images on a clean host, + real `setup → up → status → mine` check). +3. **Nothing is tagged or published until that's green**, and promotion is by digest, so the + version users get is the exact bundle the server validated. + +## End-to-end coverage & gaps + +What the live tier-4 gate actually exercises, and what it doesn't — so a release decision is made +with eyes open. (The reference box `gouda` is a **pruned** Monero node on NVMe; its own snapshot +and this table also live at `~/pithead-testbench/` on the box, for operators and AI agents.) + +**Validated live** (real synced chains): the config matrix (remote/local node, dashboard +secure/insecure, Tari required/optional, RPC LAN access, XvB on/off) applied + asserted; lifecycle +(restart, secret-preserving `apply`, backup→restore round-trip); node-down failover → recovery; +release readiness; pruned monerod (the real prod config). **Covered without a real chain** +(tiers 1–3): client↔daemon contract tests, the fake-daemon mini-stack (incl. full-prune behavior), +compose hardening, config rendering, dashboard tests. + +| Gap (not tested live) | Worth filling before release? | +|---|---| +| **Full (unpruned) Monero** live — a pruned box can't exercise it | **Low** — stack paths don't differ by prune mode; fakes/config cover it. A multi-day full sync isn't justified. | +| **Privacy / Tor egress** — no clearnet-leak assertions in the live harness (#160) | **High** — privacy is a core promise. Add egress checks (no clearnet to XvB stats, p2pool, Tari DNS). | +| **Automated PR gate** — the self-hosted runner is manual/opt-in | **Medium-high, high-leverage** — wire the live harness as a required check on `workflow_dispatch`/push-to-`main` only (never fork PRs). | +| **Upgrade / migration** across image versions with chain continuity | **Medium** — add a scenario: pull new images → `apply` → assert no re-sync + secrets intact. | +| **XvB live routing** end-to-end (the raffle optimization) | **Medium** — core value-prop but unit/sim-tested today; a periodic live smoke test would help. | +| **Multi-worker scale** — the harness assumes ~2 workers | **Medium** — add a load-gen worker + assert proxy routing/hashrate for perf confidence. | +| **Real Tari merge-mined block** acceptance | **Low** — probabilistic; rely on template/connectivity checks. | +| **Fault injection over SSH** (currently local-mode only) | **Low-Medium** — extend the SIGSTOP/remove cases to the `--host` path. | + +**Recommended before release:** the privacy-egress checks and the automated PR gate; then the +upgrade scenario and an XvB live smoke test. The remainder are nice-to-have. diff --git a/docs/releasing.md b/docs/releasing.md index a6daf5e..9fb3dea 100644 --- a/docs/releasing.md +++ b/docs/releasing.md @@ -64,6 +64,10 @@ nodes (the integration-test environment from point — `make release` (or `pithead release`) — runs the whole pipeline. **Nothing is promoted or published until every gate is green.** +> How to provision and **harden** that server, why end-to-end validation can't run on +> GitHub-hosted runners (and what does run free on every PR), and the safe self-hosted-runner +> setup are covered in **[Release / Validation Server](release-server.md)**. + ### Pipeline: stage → smoke-test → promote 1. **Preflight** — clean working tree; read the product version from the top-level @@ -136,6 +140,10 @@ What exists today: - ✅ Top-level `VERSION` file (single source of truth). - ✅ `CHANGELOG.md` (Keep a Changelog + SemVer, with an `Unreleased` section). - ✅ This document. +- ✅ The [#54](https://github.com/p2pool-starter-stack/pithead/issues/54) integration test + suite — the live config-matrix gate against real nodes (`tests/integration/`, `make + test-integration`). See [Integration Testing](integration-testing.md). Still to wire: making + it a *blocking step* inside the (not-yet-built) `make release` pipeline. - ✅ The dashboard version badge ([#58](https://github.com/p2pool-starter-stack/pithead/issues/58)) — `VERSION` + git build-args baked into the dashboard image (env + OCI labels); shows `vX.Y.Z` on releases and `dev · branch @ hash` otherwise. diff --git a/docs/test-inventory.md b/docs/test-inventory.md new file mode 100644 index 0000000..6ff0d13 --- /dev/null +++ b/docs/test-inventory.md @@ -0,0 +1,667 @@ +# Test Inventory + +_Generated by `make test-inventory` ([`tests/inventory.sh`](../tests/inventory.sh)). **Do not +edit by hand** — re-run the target to refresh. See [Testing Strategy](testing-strategy.md) for +how the tiers fit together._ + +**Totals:** 418 dashboard unit tests · 12 contract tests · 25 frontend +tests · 21 `pithead` shell sections · 11 harness self-test sections · +8 live config scenarios (15 axis values) · 6 mini-stack scenarios. + +> Counts are **test functions / named cases** (parametrized pytest cases expand to more at +> run time — e.g. the dashboard suite collects ~381). Generated statically by grep, so it's +> stable regardless of what's installed. + +| Tier | Suite | Cases | +|---|---|---| +| 1 — Unit | dashboard pytest | 418 | +| 1 — Unit | frontend (node --test) | 25 | +| 1 — Unit | `pithead` shell suite | 21 sections | +| 1 — Unit | compose interpolation + hardening (#90) | 1 | +| 2 — Contract | fake-daemon clients | 12 | +| 3 — Mini-stack | docker control-plane scenarios | 6 | +| 4 — Live matrix | config scenarios | 8 (15 axis values) | +| 4 — Live matrix | harness self-test | 11 sections | + +--- + +## Tier 1 — Unit & component + +### Dashboard (pytest) — 418 tests + +#### tests/client/test_docker_control.py — 6 +- test_tcp_scheme_rewritten_to_http +- test_stop_success_204 +- test_already_stopped_304_is_success +- test_start_success +- test_error_status_returns_false +- test_connection_error_returns_false + +#### tests/client/test_monero_client.py — 12 +- test_url_and_digest_auth_built +- test_no_username_means_no_auth +- test_success_returns_payload +- test_network_error_returns_none +- test_non_200_returns_none +- test_non_json_returns_none +- test_busy_status_returns_none +- test_syncing +- test_synced_via_flag +- test_synced_via_zero_target +- test_synced_when_height_reaches_target +- test_unreachable_returns_none + +#### tests/client/test_tari_client.py — 7 +- test_fully_synced +- test_syncing_with_target +- test_syncing_without_reliable_target +- test_grpc_error_returns_default_when_no_cache +- test_serves_last_known_state_on_transient_failure +- test_stale_cache_expires +- test_close_closes_channel + +#### tests/client/test_xmrig_client.py — 5 +- test_first_success_returns_payload_and_short_circuits +- test_all_attempts_fail_returns_empty +- test_exceptions_are_swallowed +- test_zero_ip_skipped_uses_name_host +- test_name_token_strips_plus_suffix + +#### tests/client/test_xmrig_proxy_client.py — 7 +- test_auth_header_set +- test_get_summary +- test_get_workers +- test_get_config +- test_update_config_returns_json +- test_update_config_204_returns_empty +- test_get_summary_raises_on_http_error + +#### tests/client/test_xvb_client.py — 7 +- test_missing_wallet_returns_none +- test_get_stats_success_parses_html +- test_get_stats_non_200_returns_none +- test_get_stats_network_error_returns_none +- test_fail_count_only +- test_no_critical_stats_returns_none +- test_hashrate_units + +#### tests/collector/test_logs.py — 18 +- test_parses_multiple_frames +- test_skips_blank_lines +- test_truncated_frame_breaks_cleanly +- test_success +- test_non_200_returns_error +- test_connection_error_handled +- test_syncing +- test_synced +- test_file_not_found +- test_bad_json +- test_new_format_top_block_candidate +- test_old_synced_format +- test_already_synchronized +- test_error_logs +- test_rpc_result_used_when_available +- test_falls_back_to_logs_when_rpc_unreachable +- test_local_when_default_host +- test_remote_when_other_host + +#### tests/collector/test_pools.py — 15 +- test_empty_is_unknown +- test_majority_wins +- test_unknown_ports +- test_port_matched_exactly_not_as_substring +- test_aggregates_sources +- test_empty_files_give_defaults +- test_hashrate_derived_when_missing +- test_hashrate_passthrough +- test_worker_parsing +- test_worker_without_name_defaults_to_miner +- test_active_chain_converts_utari +- test_no_chains_inactive +- test_missing_file_returns_empty +- test_malformed_json_returns_empty +- test_valid_json + +#### tests/collector/test_system.py — 11 +- test_normal +- test_error_returns_zeros +- test_parses_meminfo +- test_error_returns_zeros +- test_formats +- test_error +- test_delta_calculation +- test_malformed_line +- test_enabled_when_used +- test_allocated_when_unused +- test_unknown_when_missing + +#### tests/config/test_config.py — 8 +- test_defaults_load +- test_donation_level_env_override +- test_monero_prune_accepts_truthy_forms +- test_monero_prune_accepts_falsy_forms +- test_update_interval_tolerates_bad_values +- test_tier_config_env_override_valid +- test_tier_config_env_override_invalid_json_falls_back +- test_xvb_enabled_flag + +#### tests/helper/test_utils.py — 29 +- test_plain_numbers +- test_unit_suffixes_case_insensitive +- test_unrecognized_suffix_is_raw +- test_bad_data_returns_zero +- test_unit_boundaries +- test_bad_data +- test_branches +- test_bad_data +- test_formats_localtime +- test_falsy_is_never +- test_invalid_type_does_not_crash +- test_default_tiers +- test_custom_tiers +- test_zero_threshold_ignored +- test_auto_picks_highest_sustainable +- test_auto_zero_when_nothing_sustainable +- test_named_tier_honored +- test_named_tier_not_downgraded_but_flagged_unsustainable +- test_cannot_sustain_named_tier_is_flagged +- test_numeric_level_honored +- test_unknown_level_falls_back_to_lowest +- test_ipv4_is_an_address +- test_ipv6_is_an_address +- test_hostname_is_not_an_address +- test_surrounding_whitespace_tolerated +- test_non_string_and_empty_are_not_addresses +- test_returns_socket_source_address +- test_none_when_no_route +- test_socket_is_closed_even_on_error + +#### tests/service/test_algo_service.py — 28 +- test_xvb_disabled_forces_p2pool +- test_zero_shares_forces_p2pool +- test_excessive_failures_forces_p2pool +- test_low_hashrate_no_tier_is_p2pool +- test_cold_start_seeds_feedforward +- test_loop_ramps_up_when_below_reference +- test_loop_backs_off_when_above_reference +- test_advance_false_does_not_move_the_loop +- test_nano_pool_uses_longer_window +- test_difficulty_reserve_caps_donation +- test_falls_back_to_flat_cap_without_difficulty +- test_reserve_never_exceeds_hard_cap +- test_loop_clamped_to_reserve +- test_reference_cushion_is_absolute_capped +- test_fraction_to_ms_zero_and_positive +- test_advance_noop_when_no_hashrate +- test_advance_clamps_to_bounds +- test_routed_fraction_for_instrumentation +- test_get_target_uses_state_manager_tiers +- test_default_auto_targets_highest_sustainable +- test_explicit_tier_not_downgraded +- test_switch_updates_proxy_and_state +- test_switch_aborts_on_bad_config +- test_aborts_early_when_decision_flips_to_donate +- test_aborts_early_when_below_tier +- test_sleeps_full_duration_when_in_tier_on_p2pool +- test_run_invokes_switch_then_stops +- test_run_skips_switching_while_workers_rejected + +#### tests/service/test_data_service.py — 51 +- test_parse_list_row_named_fields +- test_parse_list_row_share_counts +- test_parse_list_row_offline_and_uptime +- test_parse_legacy_dict_row +- test_parse_legacy_dict_share_counts +- test_list_format_online +- test_list_format_offline_when_no_connections +- test_list_format_uptime_estimate_from_last_share +- test_short_list_row_is_skipped +- test_legacy_dict_format +- test_legacy_dict_defaults +- test_missing_payload_returns_empty +- test_extracts_results_and_best +- test_best_defaults_to_zero_when_empty +- test_missing_results_block_zeros_out +- test_malformed_payload_returns_empty +- test_proxy_kind_scales_khs_to_hs +- test_xmrig_kind_not_scaled +- test_unreachable_direct_api_keeps_proxy_values_online +- test_short_hashrate_total_ignored +- test_prefers_h15 +- test_falls_back_to_h60_then_h10 +- test_offline_excluded +- test_empty +- test_restores_snapshot +- test_ignores_non_dict_snapshot +- test_restores_workers_rejected_flag +- test_restores_miner_released_latch +- test_holds_miner_when_restart_mid_sync +- test_stop_when_monero_down +- test_stop_when_tari_down_and_required +- test_tari_down_ignored_when_non_blocking +- test_stop_failure_keeps_flag_false_for_retry +- test_no_double_stop_when_already_rejected +- test_readmit_when_relevant_nodes_healthy +- test_no_readmit_while_a_relevant_node_unconfirmed +- test_readmit_ignores_tari_when_non_blocking +- test_no_readmit_until_monero_healthy_even_if_tari_non_blocking +- test_holds_all_containers_when_not_synced +- test_releases_when_gate_satisfied +- test_noop_once_released +- test_partial_start_failure_keeps_latch_closed +- test_rehold_stops_quietly_after_first_cycle +- test_single_iteration_aggregates +- test_run_holds_miner_while_syncing +- test_run_releases_despite_height_override +- test_run_nonblocking_tari_releases_and_stays_operational +- test_iteration_survives_collector_error +- test_run_holds_when_tari_required_and_only_monero_synced +- test_post_release_blip_lets_failover_act_without_rehold +- test_both_nodes_down_rejects_once + +#### tests/service/test_earnings.py — 4 +- test_matches_closed_form +- test_worked_field_example +- test_linear_in_inputs +- test_missing_or_bad_inputs_are_zero + +#### tests/service/test_metrics.py — 34 +- test_empty_history_returns_zero +- test_averages_v_p2pool_in_window +- test_excludes_samples_outside_window +- test_legacy_rows_count_as_p2pool +- test_xvb_samples_drag_average_down +- test_total_and_stratum_passthrough +- test_p2pool_averages_from_history +- test_xvb_averages_from_stats +- test_xvb_routed_is_fraction_of_hashrate +- test_xvb_routed_zero_without_fraction +- test_mode_default +- test_xvb_disabled_overrides_mode_and_tiers +- test_current_tier_from_xvb_24h +- test_low_hr_warning_for_unsustainable_explicit_tier +- test_no_warning_for_auto +- test_no_warning_when_sustainable +- test_fail_count_and_last_update +- test_counts_online_and_total +- test_empty +- test_counts_recent_within_pplns_window +- test_nano_block_time +- test_loading_when_no_target +- test_done_when_full +- test_mid_sync_remaining +- test_down_flag +- test_global_syncing +- test_local_pruned +- test_local_full +- test_remote_unknown +- test_pool_and_network_figures +- test_tari_mining_flag +- test_empty_snapshot_does_not_crash +- test_history_fetched_when_not_passed +- test_passed_history_avoids_refetch + +#### tests/service/test_node_health.py — 6 +- test_not_down_before_threshold +- test_down_after_threshold +- test_single_blip_does_not_trip +- test_never_reachable_never_down +- test_down_clears_only_after_recovery_window +- test_healthy_requires_stable_window_from_unknown + +#### tests/service/test_storage_service.py — 24 +- test_get_tiers +- test_default_xvb_stats +- test_partial_updates +- test_kwargs_update_and_type_coercion +- test_none_kwargs_skipped +- test_unknown_kwarg_ignored +- test_add_share_and_dedup +- test_old_shares_pruned_from_memory +- test_update_history_roundtrip +- test_history_bad_values_default_zero +- test_update_and_get_known_workers +- test_worker_without_ip_skipped +- test_none_list_is_noop +- test_roundtrip +- test_empty_snapshot_not_saved +- test_load_missing_snapshot_returns_none +- test_share_stats_persist_across_instances +- test_state_persists_across_instances +- test_legacy_kv_keys_migrated_on_load +- test_corrupted_kv_value_skipped +- test_history_timestamp_backfilled_from_iso_on_upgrade +- test_history_older_than_retention_pruned_from_memory +- test_old_history_pruned_from_db_when_cleanup_fires +- test_stale_workers_pruned_after_retention_window + +#### tests/sim/test_donation_model.py — 10 +- test_holds_tier_without_overshoot +- test_no_windup_from_cold_start +- test_more_headroom_means_more_p2pool +- test_holds_tier_across_credit_factor +- test_overcredit_frees_p2pool +- test_stable_under_lag +- test_reserve_keeps_p2pool_in_the_window +- test_low_tier_high_difficulty_caps_donation_for_vip +- test_zero_reads_do_not_run_away +- test_recovers_after_worker_drop + +#### tests/test_main.py — 1 +- test_build_app_returns_wired_application + +#### tests/test_version.py — 11 +- test_version_with_no_git_metadata_is_a_release +- test_leading_v_in_version_is_not_doubled +- test_explicit_release_flag_wins_over_git_metadata +- test_release_flag_accepts_common_truthy_spellings +- test_branch_and_commit +- test_commit_only +- test_branch_only +- test_dirty_marker_passes_through +- test_a_versioned_build_with_a_commit_is_still_dev +- test_no_metadata_falls_back_to_generic_dev +- test_blank_values_treated_as_absent + +#### tests/web/test_server.py — 23 +- test_index_serves_shell +- test_get_state_ok_json +- test_range_query_accepted +- test_from_to_window_accepted +- test_malformed_from_to_falls_back +- test_window_filters_history_end_to_end +- test_node_down_badges_in_state +- test_passive_tari_badge_in_state +- test_state_error_is_sanitized_json +- test_security_headers_present +- test_csp_has_no_unsafe_inline_or_eval +- test_state_response_also_carries_headers +- test_apply_security_headers_unit +- test_js_mimetypes_registered +- test_frontend_modules_served +- test_static_assets_revalidate +- test_shell_revalidates +- test_css_has_phone_breakpoint +- test_css_has_horizontal_scroll_rule +- test_workers_table_opts_into_scroll_wrapper +- test_css_lets_stat_values_wrap +- test_css_lets_hostname_wrap +- test_host_at_separator_styled_and_rendered + +#### tests/web/test_views.py — 101 +- test_point_shape_is_xy_with_epoch_ms +- test_legacy_rows_attributed_to_p2pool +- test_range_filtering +- test_downsampling_caps_points +- test_outage_inserts_null_break +- test_regular_data_has_no_breaks +- test_single_missing_sample_does_not_break +- test_break_sits_inside_the_gap +- test_threshold_adapts_to_spacing +- test_downsampled_outage_still_breaks +- test_single_point_no_break +- test_share_points_sparse_and_top_pinned +- test_share_marker_top_pinned_when_value_zero +- test_no_shares_no_points +- test_unknown_range_keeps_everything +- test_empty_history +- test_custom_window_filters_both_bounds +- test_window_overrides_range +- test_short_window_kept_at_native_resolution +- test_long_window_downsamples_to_tier +- test_target_points_tiers +- test_chart_tension_tiers +- test_stacked_series_sum_to_the_total +- test_zoom_reveals_more_detail +- test_all_range_adapts_density_to_data_extent +- test_formats_hashrates +- test_routed_distinct_from_credited +- test_p2pool_mode_grays_xvb +- test_xvb_mode_grays_p2pool +- test_split_mode_both_active +- test_low_hr_badge_present_only_when_warned +- test_tiers_and_fail_count_passthrough +- test_loading_done_syncing_states +- test_done_state +- test_monero_mode_and_db_passthrough +- test_syncing_shows_syncing_only +- test_operational_shows_mode_and_pool +- test_low_hr_badge +- test_node_down_and_rejected +- test_miner_held +- test_passive_tari_with_and_without_percent +- test_monero_pruned_badge +- test_monero_full_badge +- test_no_prune_badge_when_unknown +- test_disk_badge_critical +- test_disk_badge_warn +- test_no_disk_badge_when_ample +- test_no_disk_badge_when_missing +- test_high_usage_levels_and_fill +- test_warning_fill_between_70_and_90 +- test_unparseable_cpu_is_ok +- test_empty_system_defaults +- test_pool_tokens +- test_formatted_and_raw_fields +- test_online_sorted_before_offline +- test_malformed_worker_skipped +- test_bad_ip_sorts_to_zero +- test_name_passthrough +- test_share_counts_raw_and_formatted +- test_invalid_appended_to_rejected_string_only_when_nonzero +- test_missing_share_fields_default_to_zero +- test_reject_flag_set_on_high_reject_rate +- test_none_without_rejects +- test_none_below_noise_floor +- test_none_when_rate_low +- test_flags_high_rate_above_floor +- test_flags_all_rejects_at_floor +- test_active +- test_inactive_defaults +- test_long_wallet_shortened +- test_formats_totals_and_best +- test_reject_pct_and_level +- test_best_dash_when_unknown +- test_empty_summary_has_no_data +- test_formats_from_metrics_and_data +- test_db_size_dash_when_unknown +- test_resolves_ip_for_a_hostname +- test_none_when_host_is_already_an_ip +- test_none_when_ip_undetectable +- test_none_when_detected_ip_equals_host +- test_publishes_rate_and_inputs +- test_default_hashrate_is_the_displayed_p2pool_1h +- test_no_p2pool_hashrate_when_average_is_zero +- test_unavailable_when_network_reward_missing +- test_unavailable_when_difficulty_missing +- test_p2pool_hr_passthrough_is_raw +- test_has_all_sections +- test_version_section_shape +- test_is_json_serializable +- test_range_echoed +- test_window_null_on_preset +- test_window_echoed_when_zoomed +- test_syncing_flag_and_title +- test_proxy_workers_from_metrics +- test_chart_uses_timestamps +- test_propagates_state_errors +- test_valid_pair +- test_absent_is_none +- test_malformed_falls_back_to_none +- test_returns_html_referencing_module +- test_error_fallback + +### Frontend logic (node --test) — 25 tests +- sortWorkers: null index keeps the server-provided order +- sortWorkers: numeric columns sort numerically, not lexically +- sortWorkers: hashrate column also sorts numerically +- sortWorkers: descending reverses the order +- sortWorkers: name column sorts as text +- sortWorkers: does not mutate the input array +- WORKER_COLUMNS: keys match the worker fields the server sends +- sortWorkers: rejected column sorts numerically (find problem rigs) +- fmtTimestamp: returns a non-empty string for an epoch-ms value +- normalizeTheme: passes valid modes through, defaults the rest to auto +- THEME_ORDER: the control renders every theme exactly once +- clampZoomWindow: orders endpoints and enforces a minimum span +- clampZoomWindow: rejects unusable input +- fmtWindowDuration: two coarsest units, trailing zeros dropped +- normalizeSeries: defaults every series to visible, only explicit false hides +- parseHashrate: accepts bare numbers and k/M/G suffixes +- parseHashrate: rejects empty / unparseable input +- computeEarnings: scales the daily rate to day/month/year + time-to-share +- computeEarnings: returns nulls when unavailable or hashrate is non-positive +- computeEarnings: no time-to-share when share difficulty is unknown +- heroKpis: surfaces the five headline numbers under stable labels, in order +- heroKpis: wires each KPI to its build_state field +- heroKpis: shares colour reflects the ok flag +- heroKpis: mode colour follows the server mode_variant token +- heroKpis: total is accent-coloured; blocks and tier carry no colour class + +### `pithead` shell suite (tests/stack/run.sh) — 21 sections +- unit: resolve_default +- unit: assert_safe_dir +- unit: is_ipv4 +- unit: resolve_dashboard_host (dashboard.host 'auto' revert, 247c5a0) +- unit: docker_boot_enabled (#137) +- unit: is_valid_host (#130) +- unit: describe_change +- unit: env helpers +- unit: export_build_provenance (Issue #58) +- unit: node credential helpers +- unit: disk_component_gib +- unit: check_disk_grouped (mocked df) +- black-box: CLI dispatch +- black-box: guards +- black-box: config validation +- black-box: apply preserves secrets + propagates +- black-box: local node creds auto-generated + persisted (#50) +- black-box: status health check +- black-box: doctor exit code (#127) +- black-box: reset-dashboard targets .env dirs, not config.json (#139) +- black-box: reset-dashboard refuses to guess without .env dirs (#139) + +### Compose validation + hardening (tests/stack/test_compose.sh) +- docker-compose.yml `${VAR}` interpolation resolves against a representative .env +- #90 hardening invariants: no-new-privileges / cap_drop / read-only roots, credential-free + healthchecks, least-privilege Docker socket proxies, and the pinned `pithead` project name + +## Tier 2 — Contract (real clients vs controllable fakes) + +### tests/integration/fakes/test_contract.py — 12 tests +- test_monero_synced_reads_no_sync_and_db_size +- test_monero_syncing_reports_percent +- test_monero_down_is_unreachable +- test_monero_busy_status_is_unreachable +- test_monero_synced_by_height_even_without_flag +- test_monero_db_size_unknown_reads_zero +- test_monero_http_control_mutates_state +- test_tari_synced_reads_done +- test_tari_syncing_reports_percent +- test_tari_down_is_unreachable_with_no_cache +- test_tari_syncing_without_reliable_target_avoids_false_100 +- test_tari_serves_cached_reading_when_briefly_unreachable + +## Tier 3 — Fake-daemon mini-stack (docker) + +### tests/integration/mini-stack/run-mini-stack.sh — 6 scenarios +- scenario 1: holds the miner while both chains sync +- scenario 2: keeps holding while Tari (required) is still syncing +- scenario 3: releases the miner once both chains are synced +- scenario 4: rejects workers when required Tari is down +- scenario 5: readmits workers when Tari recovers +- scenario 6: a dashboard restart does not re-hold a released miner + +## Tier 4 — Live config matrix (real synced server) + +### Config scenarios (tests/integration/scenarios.sh) — 8 +- local-pruned-main-secure-tari +- local-full-main-secure-tari +- local-pruned-mini-secure-tari +- local-pruned-nano-insecure +- local-pruned-main-rpclan +- local-pruned-main-xvb-off +- local-pruned-main-tari-optional +- remote-main-secure-tari + +### Axis coverage (every value exercised at least once) — 15 +- monero.mode=local +- monero.mode=remote +- monero.prune=true +- monero.prune=false +- monero.rpc_lan_access=true +- monero.rpc_lan_access=false +- p2pool.pool=main +- p2pool.pool=mini +- p2pool.pool=nano +- xvb.enabled=true +- xvb.enabled=false +- dashboard.secure=true +- dashboard.secure=false +- dashboard.tari_required=true +- dashboard.tari_required=false + +### Per-scenario assertions (tests/integration/run.sh) +- .env is owner-only (mode $envmode) +- Caddyfile uses correct scheme +- DASHBOARD_SECURE matches config +- MONERO_RPC_BIND matches rpc_lan_access +- Monero is synced (chain reusable by the matrix) +- TARI_REQUIRED env matches config +- XVB_ENABLED matches config +- backup archive contains .env +- backup archive contains config.json +- backup/rollback prerequisites present (writable backups/, tar) +- both prune modes exercisable (live=$baseline_mode + supplied $opp_label chain at $opp_dir) +- check +- container up: $svc +- dashboard /api/state reachable +- dashboard bound to localhost only (Caddy fronts it) +- disk headroom on the live chain FS (${avail} GiB free) +- monero display mode determinate ($dmode) +- monero display mode present ($dmode) +- monerod absent in remote mode +- monerod reported missing +- monerod reports synced (RPC) +- monerod running-but-unhealthy +- pool actually changed +- pool type +- prune axis: live FS is snapshot-capable ($fstype) — the $opp_label variant can be built cheaply +- re-apply is a no-op +- restore preserves secrets +- restore reverts the pool to the backed-up value +- secrets intact (token + onions) +- secrets intact after restore +- secrets preserved across pool change +- snapshot-isolated $baseline_mode chain on a CoW FS ($same_dir, $sfs) — destructive scenarios needn't touch the live chain +- stack is healthy (pithead status) +- status OK after monerod recovery +- status OK after node recovery +- status OK after restart +- status exit code is 0 (healthy) +- status non-zero when monerod is down +- status non-zero when monerod missing +- status non-zero when monerod unhealthy +- status non-zero when node down +- stratum total hashes > 0 +- tari synced (required) +- workers online (>= $EXPECTED_WORKERS) +- xmrig-proxy stopped for failover + +### Harness self-test (tests/integration/selftest.sh) — 11 sections +- overrides_to_jq: value typing +- resolve_overrides: prerequisite gate (never mutates the canonical chain) +- render_scenario_config: applies overrides, stays valid JSON +- expected/absent services: profile gating +- redact: secrets never leak into artifacts +- matrix: every axis value is covered +- scenarios: lookup helpers +- rx: local exec runs in the stack dir +- api_state + jq_get: parse a fixture +- service_state parsing (fault-injection predicates) +- assertion helpers: counters behave + +--- + +_Grand total: **501** enumerated cases/sections across the four tiers (plus the live +lifecycle and fault-injection phases, which are exercised on a real server)._ diff --git a/docs/test-server-architecture.md b/docs/test-server-architecture.md new file mode 100644 index 0000000..b396f14 --- /dev/null +++ b/docs/test-server-architecture.md @@ -0,0 +1,143 @@ +# Test / build server architecture & recreation + +How the Pithead reference test server (`gouda`) is structured, and how to **recreate it on another +box** with minimal pain. The whole point: the synced chains are the slow-to-acquire asset (days to +sync) — everything else is reproducible in minutes from the repo. + +## What this server is + +A single box that runs the **live Pithead stack** against real, synced chains and serves three jobs: + +1. **Tier-4 release gate** — the integration harness ([integration-testing](integration-testing.md)) + validates a release end-to-end here before it ships. +2. **Developer + AI-agent test bench** — a consistent place to reproduce, test, and debug stack + behavior against real daemons. +3. **Reference deployment** — a known-good, documented install other boxes can be cloned from. + +It is **not** a production miner — downtime is fine; tear-down/redeploy is fine. The one rule: +**don't lose the synced chains** (reuse them, don't re-sync). + +## Hardware & storage policy + +``` +NVMe SSD (fast, PRIMARY) HDD (slow, SPARING) + /boot, /boot/efi /home ← cold backups / archives only + / (root, LVM ext4) + ├─ /var/lib/docker (images) + ├─ /srv/code/pithead (CHECKOUT, = ~/code/pithead) + └─ /srv/code/pithead-data (CHAINS) ← the asset, on fast storage +``` + +**Policy — the single most important hardware rule:** the chains monerod/Tari actively run against +live on the **SSD/NVMe**. monerod is random-I/O heavy; a chain on a spinning HDD makes every test +crawl. The HDD is for **cold** things only (backup tarballs, archived snapshots). Check `lsblk -d +-o NAME,ROTA` before placing any chain — `ROTA=0` is SSD/NVMe, `ROTA=1` is HDD. + +**Sizing (a pruned-Monero test bench fits comfortably in ~1 TB):** + +| Component | Size | +|---|---| +| Pruned Monero (compacted) | ~95 GB | +| Tari (archival/full) | ~132 GB | +| Docker images + cache | ~20 GB | +| OS + working headroom | ~30 GB | +| **Total** | **~280 GB** | +| *+ optional full Monero node* | *+250 GB* | +| *+ a few full chain copies* | *95–250 GB each* | + +A 1 TB NVMe holds the pruned bench with ~650 GB to spare — room for a full node and copies too. + +> **⚠️ Verify the disk is actually fast.** "SSD" in the model name is not enough — check the *bus*. +> On the reference box (`gouda`), the "1 TB SSD" enumerated as `/dev/sdb` on **SATA** (not +> `/dev/nvme0n1`), on a link that negotiated down to 1.5 Gbps, and benchmarked at **~37–98 MB/s — +> HDD-class**. There was **no NVMe** in the machine at all. That single fact bottlenecks monerod, +> builds, and makes LMDB compaction (heavy random I/O) impractical (~16 h instead of ~10 min). +> +> **The highest-value upgrade by far is a real m.2 PCIe NVMe** (~$80–150): ~20–50× faster, which +> turns chain compaction into minutes and makes CoW snapshots actually worth doing. Confirm a +> drive is genuinely NVMe before relying on it: +> ```bash +> lsblk -d -o NAME,TRAN,ROTA,MODEL # want TRAN=nvme, not sata +> ls /dev/nvme* # an NVMe drive appears as /dev/nvme0n1 +> # quick reality check on random read (what monerod does): +> dd if=/path/to/data.mdb of=/dev/null bs=4k count=200000 skip=10000000 # want >>100 MB/s +> ``` +> Until a fast disk is added, keep the chain at its working ~253 GB (it's correctly pruned; the +> size is reclaimable free-page bloat, not a full chain) and skip CoW. The matching +> `mdb_copy` (LMDB 0.9.70, built from Monero's vendored source) is staged at +> `~/pithead-testbench/bin/mdb_copy` — on a fast disk, `mdb_copy -c /lmdb ` compacts +> in minutes. + +A **second m.2 NVMe (PCIe) with btrfs/zfs** additionally enables **copy-on-write snapshots** — +instant, near-free chain clones for isolated/parallel test runs — the upgrade that helps a busy +multi-agent bench. + +## Directory layout + +| Path | Disk | What | Lose it? | +|---|---|---|---| +| `~/code/pithead/` (`/srv/code/pithead`) | SSD | stack checkout (`docker-compose.yml`, the `pithead` CLI), `config.json`, `.env` | reproducible (clone) | +| `/srv/code/pithead-data/{monero,tari,p2pool,dashboard,tor}/` | **SSD** | the chains + Tor onion keys — **the asset** | **don't** (days to re-sync) | +| `/var/lib/docker/` | SSD | images / build cache | reproducible (rebuild) | +| `~/pithead-testbench/` | HDD | build-server docs + ops tools (see its `README.md`) | reproducible (repo) | +| `/home` … `/mnt/chains` | HDD | cold backups / archives | — | + +The chains live **outside** the checkout (`/srv/...`, absolute paths in `config.json`), so the stack +can be refreshed/redeployed without ever touching them. + +## Recreate on another box + +Goal: stand up an equivalent server. Minutes of work + one chain transfer (vs days of re-sync). + +**1. Prereqs.** Ubuntu LTS, Docker (Compose v2), `git jq curl tar`, and your user in the `docker` +group. Put the chains' target on the **SSD**. + +**2. Clone + configure:** +```bash +git clone https://github.com/p2pool-starter-stack/pithead.git ~/pithead && cd ~/pithead +cp /path/to/your/config.json . # your wallets/settings — or run `./pithead setup` to make one +# point data dirs at fast storage (checkout-independent): +jq '.monero.data_dir="/srv//pithead/data/monero" + | .tari.data_dir="/srv//pithead/data/tari" + | .p2pool.data_dir="/srv//pithead/data/p2pool" + | .dashboard.data_dir="/srv//pithead/data/dashboard" + | .tor.data_dir="/srv//pithead/data/tor"' config.json | sponge config.json +``` + +**3. Bring the chains (the painless part — reuse, don't re-sync).** Stop the stack on the source +box (`./pithead down`) so the LMDBs are consistent, then copy `/srv/.../pithead/data/` to the new +box's SSD — over the network or a fast external drive: +```bash +# from the new box, pulling from the old one (chains are ~230 GB; hours over GbE, faster over USB3/10G): +rsync -aP --info=progress2 olduser@oldbox:/srv/code/pithead-data/ /srv//pithead/data/ +``` +The Tor onion keys travel in `data/tor`, so the box keeps its onion identity. (No old box yet? Omit +this and let monerod/Tari sync from scratch — days, but hands-off.) + +**4. Deploy + verify:** +```bash +cd ~/pithead && ./pithead setup # deps, .env, Tor, Caddy; then `up` +./pithead status # all healthy; monerod just catches up the gap +``` + +**5. Test-bench tooling:** +```bash +mkdir -p ~/pithead-testbench/bin +cp ~/pithead/tests/integration/{build-pruned-chain,compact-chain,system-info}.sh ~/pithead-testbench/ +cp ~/pithead/tests/integration/gouda-testbench-README.md ~/pithead-testbench/README.md +# fetch monero-blockchain-prune at the running monerod's version (see build/monero/Dockerfile pins): +~/pithead-testbench/system-info.sh > ~/pithead-testbench/system-info.md +``` + +**6. Validate it's release-fit:** +```bash +tests/integration/run.sh --host you@newbox --dir pithead --readiness +``` + +## Migrating gouda → a bigger box later + +Same as above, steps 3–6: `rsync` the chains + `config.json` over, `pithead setup`, redeploy, +copy the test-bench tooling, regenerate `system-info.md`. Because the chains are decoupled and the +config is captured, the move is a transfer + a redeploy — no reconfiguration, no re-sync. Put the +chains on the bigger box's fastest disk; if it has spare NVMe, that's the place for a btrfs/zfs CoW +volume to get cheap test-chain snapshots. diff --git a/docs/testing-guide.md b/docs/testing-guide.md new file mode 100644 index 0000000..1efed35 --- /dev/null +++ b/docs/testing-guide.md @@ -0,0 +1,110 @@ +# Testing Guide (for developers) + +Practical "how do I test the change I just made?" companion to the +[Testing Strategy](testing-strategy.md) (which explains *why* the tiers exist) and the +generated [Test Inventory](test-inventory.md) (which lists *what* exists today). + +## Philosophy + +- **Test the intent, not the line.** A test should pin down a *behavior or contract* — + "a pruned node displays Pruned", "the gate holds until both chains sync", "an old DB + migrates without losing history" — and read clearly enough that its name + one-line comment + explain *why it exists*. Don't add a test purely to move the coverage number. +- **The 80% coverage gate is a floor, not a target.** Uncovered defensive error-handling is + fine; uncovered *behavior* (a migration path, a retention rule, a decision branch) is a gap. +- **Tests are real code.** They're linted (`shellcheck`), version-controlled with the change + they protect, and listed in the inventory (a CI drift check fails if you add/remove a test + without regenerating it). + +## Commands + +```bash +make test # everything that needs no server/docker (run before every PR) +make test-dashboard # dashboard pytest + 80% coverage gate +make test-stack # pithead shell suite +make test-fakes # tier-2 contract test (real clients vs fakes) +make test-integration-selftest # the integration harness's own logic +make test-inventory # regenerate docs/test-inventory.md (do this when adding/removing tests) +make test-mini-stack # tier-3 docker mini-stack (needs docker) +make test-integration ARGS="--host user@box --dir pithead --check" # tier-4 live, non-destructive +``` + +## Where tests live + +| You changed… | Write the test here | Tier | +|---|---|---| +| Dashboard logic (a decision, metric, `/api/state` field) | `build/dashboard/tests/**/test_*.py` (pytest) | 1 | +| Frontend logic (worker sort, formatting) | `build/dashboard/tests/frontend/*.test.mjs` (`node --test`) | 1 | +| A client that parses a daemon (monerod RPC, Tari gRPC) | `tests/integration/fakes/test_contract.py` (+ extend the fakes) | 2 | +| The control plane (sync-gate #35, failover #31) | `tests/service/test_data_service.py` (+ a `mini-stack` scenario) | 1 + 3 | +| `pithead` CLI behavior | `tests/stack/run.sh` | 1 | +| A compose **security/hardening** invariant (caps, `no-new-privileges`, no secret in a healthcheck, socket-proxy scope) | the #90 section of `tests/stack/test_compose.sh` | 1 | +| A new `config.json` axis | one row in `tests/integration/scenarios.sh` | 4 | +| A failure mode needing real containers | `run.sh` `--fault-injection` and/or a `mini-stack` scenario | 4 / 3 | +| The integration harness's own logic | `tests/integration/selftest.sh` | — | + +## Recipes + +### Dashboard behavior (tier 1) +Add a `test_*` to the matching file under `build/dashboard/tests/`. Name it for the behavior, +add a one-line docstring stating the intent, mock at the client boundary (the conftest gives +you an in-memory `state_manager`). Run `make test-dashboard` — coverage must stay ≥ 80%. + +```python +def test_pruned_node_is_labelled_pruned(...): + # Intent: a local pruned node shows "Pruned" so a config/DB mismatch is visible (#32). + ... +``` + +### A client parsing a new daemon state (tier 2) +1. Teach the fake to produce the state: edit `tests/integration/fakes/fake_monerod.py` or + `fake_tari.py` (add a `mode`, or a field the daemon returns). +2. Assert the *real* client parses it: add a test to `fakes/test_contract.py` that points the + real `MoneroClient`/`TariClient` at the fake and checks the parsed result. +3. `make test-fakes`. This is the seam that catches "the daemon changed its wire format". + +### A config axis (tier 4) +Add a `NAMEoverrides` row to `scenario_matrix()` in `scenarios.sh`, and the value to +`axis_coverage()`. The self-test **enforces** that every axis value appears in some scenario, +so a half-added axis fails `make test-integration-selftest`. No code changes needed. + +### A control-plane scenario (tier 3) +Add a scenario to `tests/integration/mini-stack/run-mini-stack.sh`: drive the fakes via their +`/control` endpoints (`set_monerod`/`set_tari`) and assert real container state with +`assert_state` / `assert_stays`. `make test-mini-stack` (needs docker). + +## Conventions + +- **Determinism, no sleep-and-hope.** Wait on a real signal with a timeout (`wait_for`, + `assert_state`, `wait_status_ok`). For time-based logic, **backdate timestamps white-box** + rather than patching the global clock — e.g. push an old point into the deque, then act + (see `test_history_older_than_retention_pruned_from_memory`). +- **Shell:** pure logic goes in `lib.sh`/`scenarios.sh` and is tested by `selftest.sh`; I/O + (ssh, docker, RPC) is thin wrappers that aren't unit-tested. Everything stays + `shellcheck --severity=warning` clean. +- **Regenerate the inventory** (`make test-inventory`) when you add/remove a test — CI's + drift check (`make test-inventory-check`) fails otherwise. +- **Secrets:** never print tokens/creds/onions; the harness redacts artifacts and hashes + secrets on the box. If you add a secret-bearing field, confirm `redact()` covers it (there's + a self-test for the patterns). + +## Gotchas learned on real hardware + +The live harness was first run against a real synced, mining box — these are the +calibration lessons baked into the tests now. Keep them in mind: + +- **A synced *local* monerod shows `state: "loading"` in `/api/state`**, not `"done"` — it has + no target height once caught up. Assert "synced" via monerod's own `get_info.synchronized` + (the harness's `monero_caught_up`), not the dashboard UI field. +- **`stratum.conns` can read 0 on a healthy, mining box.** Use `proxy_workers` / + `total_hashes` for mining-liveness; `conns` is informational. +- **The mini-stack must be isolated.** Containers are named `itest-*` and control ports are + 28081/28152 so it can't collide with — or control — a real deployment on the same host. + A fake server inside a container must bind `0.0.0.0` (binding `127.0.0.1` makes it + unreachable from peer containers — this once broke release in the mini-stack). +- **monerod-down failover isn't simulated in the mini-stack** (the dashboard's monerod + down-path log-scrapes a real `monerod` container the fake stack lacks); it's covered on real + hardware by `run.sh --fault-injection`. Tari-down is simulated there cleanly. +- **Run `--check` first.** Against any real box, `run.sh --check` asserts the current live + state non-destructively (no config change) — the safe way to validate before the + config-churning matrix. diff --git a/docs/testing-strategy.md b/docs/testing-strategy.md new file mode 100644 index 0000000..89c32b0 --- /dev/null +++ b/docs/testing-strategy.md @@ -0,0 +1,210 @@ +# Testing Strategy + +How Pithead simulates **every situation the stack can be in** — and which layer proves each +one. This is the map behind the [integration suite](integration-testing.md); read that for how +to run the live matrix, and this for *what we test where, and why*. + +The guiding idea: the stack's runtime behaviour is a **state machine** (syncing → held → +released; healthy → down → rejected → recovered → readmitted; XvB tiers; container health), +and a healthy, already-synced box only ever shows you one corner of it. So we simulate the rest +— at the cheapest layer that can prove each situation honestly. + +## The four tiers + +| Tier | What it is | Simulates | Where it runs | +|---|---|---|---| +| **1 — Unit** | `build/dashboard/tests/` (pytest, mocked clients) and `tests/stack/` (shell, `docker`/`sudo` stubbed) | Decision logic & field mapping: sync-gate, failover, node-health debounce, XvB engine, `/api/state` shapes, `pithead` config/status logic | Every PR (`make test`) | +| **2 — Contract** | `tests/integration/fakes/test_contract.py` | The real Monero/Tari **clients** parsing the real daemons' wire format — points the actual clients at controllable fakes | Every PR (docker-free) | +| **3 — Mini-stack** | `tests/integration/mini-stack/` (real dashboard + docker-control vs fake daemons) | The control plane **end-to-end with real containers**: hold/release and reject/readmit actually stopping/starting `p2pool`/`xmrig-proxy`, driven deterministically | CI with Docker (`make test-mini-stack`) | +| **4 — Live matrix** | `tests/integration/run.sh` against a real, synced box | What only reality proves: real merge-mining, prune/full DB size, Caddy TLS, Tor onions, HugePages, plus fault injection for real container health verdicts | Manual / release gate (`make test-integration`) | + +**Why this shape, and the answer to "should we use stubs?"** Stubs already do the heavy +lifting — the dashboard has ~140 unit tests that exhaustively drive the hard runtime states with +mocked clients. Adding *more* mocks for the same logic would be duplication. What stubs **can't** +prove is wiring: that the real clients parse real daemon output (tier 2), that the dashboard's +stop/start actually moves real containers (tier 3), and that real daemons sync/merge-mine and +real containers go unhealthy (tier 4). So the strategy is **stubs for logic, controllable fake +daemons for the control-plane wiring, and the real box for the irreducibly-real** — each +situation tested once, at the lowest tier that's honest. + +The fakes are the key enabler: because the whole control plane is env-configurable +(`MONERO_RPC_URL`, `TARI_GRPC_ADDRESS`, `DOCKER_CONTROL_URL`, `NODE_DOWN_AFTER_SEC`, +`UPDATE_INTERVAL`, …), we can point the real code at tiny controllable servers and drive the +entire state machine in seconds, in CI, with no chain and no test box. + +## Scenario catalog + +Every situation we care about, what triggers it, and the tier(s) that cover it. ✅ = covered +today; ▶ = exercised by the live matrix / mini-stack when run. + +### A. Configuration permutations +The deploy-time axes — each changes a real runtime path. Full table and assertions in +[Integration Testing › The config matrix](integration-testing.md#the-config-matrix). + +| Situation | Trigger | Tier | +|---|---|---| +| `monero.mode` local vs remote (monerod present/absent, profile gating) | config | 4 ▶ | +| `monero.prune` pruned vs full (DB size, #32 display) | config | 1 ✅ (display) · 4 ▶ (real DB) | +| `monero.rpc_lan_access`, `dashboard.secure`, `xvb.enabled`, `dashboard.tari_required` | config → `.env`/Caddyfile | 4 ▶ | +| `p2pool.pool` main / mini / nano (sidechain, flags) | config | 4 ▶ | + +### B. Sync lifecycle (#35) +| Situation | Trigger | Tier | +|---|---|---| +| Cold start, chains syncing → **hold** `p2pool`+`xmrig-proxy` | both `is_syncing` | 1 ✅ · 3 ▶ | +| Monero synced, Tari **required** but still syncing → keep holding | `monero_synced ∧ ¬tari_synced ∧ TARI_REQUIRED` | 1 ✅ (added) · 3 ▶ | +| Monero synced, Tari **non-blocking** → release, passive Tari badge (#51) | `¬TARI_REQUIRED` | 1 ✅ · 4 ▶ | +| Both synced → **release** (one-way latch) | gate satisfied | 1 ✅ · 3 ▶ | +| Network-height UI override doesn't deadlock the gate | p2pool held → height 0 | 1 ✅ | +| Restart mid-sync / post-release (latch persisted) | snapshot reload | 1 ✅ | + +### C. Node health & failover (#31) +| Situation | Trigger | Tier | +|---|---|---| +| monerod down → **reject workers** (stop `xmrig-proxy`) | unreachable ≥ `NODE_DOWN_AFTER_SEC` | 1 ✅ · 3 ▶ · 4 ▶ | +| Tari down + required → reject; Tari down + non-blocking → **ignore** | `tari_down ∧ TARI_REQUIRED?` | 1 ✅ | +| Recovery hysteresis — readmit only after stable `NODE_RECOVERY_AFTER_SEC` | reachable again | 1 ✅ | +| Transient blip / never-reachable → **no** false reject | debounce / `ever_up` | 1 ✅ | +| Double outage; readmit only when **both** healthy | both down → both up | 1 ✅ (added) | +| #35 latch × #31 failover coexist after release | down post-release | 1 ✅ (added) · 3 ▶ | +| Stop/start fails → retry next cycle (idempotent) | docker error | 1 ✅ | + +### D. Container health verdicts (`pithead status`) +| Situation | Trigger | Tier | +|---|---|---| +| All healthy → exit 0 | steady state | 1 ✅ · 4 ▶ | +| Required node **down** / **missing** → exit 1 | stop / `rm` monerod | 1 ✅ (node-down) · 4 ▶ (`--fault-injection`) | +| Running but **unhealthy** → exit 1 | healthcheck fails (SIGSTOP) | 4 ▶ (`--fault-injection`) | +| Miner stopped under sync-hold / failover → exit **0** (intentional) | held / rejected | 1 ✅ · 4 ▶ | +| Remote mode ignores monerod | profile off | 1 ✅ · 4 ▶ | + +### E. XvB switching engine +| Situation | Trigger | Tier | +|---|---|---| +| Disabled / zero shares / `fail_count ≥ 3` / no sustainable tier → P2POOL | guards | 1 ✅ | +| Closed-loop ramp/back-off, cold-start seed, VIP-reserve anti-overshoot (#70) | controller | 1 ✅ | +| P2POOL / XVB / SPLIT modes, tiers, smart-sleep early exit | decision | 1 ✅ | +| Real XvB endpoint reachable / failing | network | 4 (real endpoint) | + +### F. Dashboard `/api/state` field states +| Situation | Trigger | Tier | +|---|---|---| +| sync state loading/syncing/done; pruned/full/unknown; db_size | metrics | 1 ✅ | +| badges (node-down, workers-rejected, miner-held, passive-Tari, pruned/full, low-HR) | metrics | 1 ✅ | +| system levels (cpu/mem/disk/hugepages), worker pool/online, chart outage breaks | metrics | 1 ✅ | +| Dashboard reads correct live state on a real stack | real daemons | 4 ▶ | + +### G. CLI lifecycle (`pithead`) +| Situation | Trigger | Tier | +|---|---|---| +| Config validation, secret preservation, `apply` no-op/destructive guards | sourced fns | 1 ✅ | +| `setup`→`up`→`status`→`apply`→`restart`→`down`; idempotency; secret preservation | real box | 4 ▶ (`--lifecycle`) | +| `upgrade` (image pull/rebuild) | real box | release staging smoke (docs) | +| `backup`/`restore`, `reset-dashboard`, `doctor` | real box | 1 ✅ (partial) · 4 (future) | + +### H. Host / infrastructure (real-only) +| Situation | Trigger | Tier | +|---|---|---| +| Real merge-mining share lands; real hashrate on dashboard | live mining | 4 ▶ | +| Caddy TLS scheme; Tor onion provisioning; HugePages/AVX2; real disk pressure; prune DB size | real host | 4 ▶ | + +## Running each tier + +```bash +make test # tiers 1 + 2 (+ harness self-test) — every-PR, no docker/server +make test-fakes # tier 2 contract test on its own +make test-mini-stack # tier 3 — needs docker +make test-integration ARGS="--host user@box --dir pithead --lifecycle --fault-injection" # tier 4 +``` + +## Production-readiness posture + +What gates a merge vs. a release, the engineering standards every test holds to, and the gaps +we know about. The full enumerated coverage is in the generated +[Test Inventory](test-inventory.md) (kept honest by a CI drift check). + +### What runs where + +| Check | Tier | When | Blocking? | +|---|---|---|---| +| Dashboard pytest + **≥80% coverage gate** | 1 | every PR | ✅ required | +| Frontend logic (`node --test`) | 1 | every PR | ✅ required | +| Dashboard image test stage (in-container) | 1 | every PR | ✅ required | +| `pithead` shell suite + shellcheck | 1 | every PR | ✅ required | +| Compose interpolation + **security/hardening** invariants | 1 | every PR | ✅ required | +| Fake-daemon **contract test** | 2 | every PR | ✅ required | +| Integration harness **self-test** | 4 | every PR | ✅ required | +| **Test-inventory drift** check | — | every PR | ✅ required | +| Fake-daemon **docker mini-stack** | 3 | PRs touching the harness/dashboard | ✅ (own workflow) | +| **Live config matrix** on real nodes | 4 | manual / pre-release | ✅ **release gate** ([#44](https://github.com/p2pool-starter-stack/pithead/issues/44)) | + +The first three tiers run on every PR with no special infrastructure; tier 4 is the blocking +**pre-release** gate (see [Releasing](releasing.md)) because it needs the real synced nodes. + +### Engineering standards + +Every scenario, at every tier, holds to the same discipline: + +- **Deterministic, no sleep-and-hope.** Wait on real readiness signals — container health, + `pithead status`, dashboard sync %, miner-released — with timeouts. The only fixed sleeps are + *poll intervals* and the deliberate "stays in state" windows that prove the gate does **not** + act prematurely. +- **Isolated & idempotent.** Each scenario starts from a known baseline and restores it; the + live matrix snapshots `config.json` and reuses (never mutates) the canonical chain dirs; the + mini-stack tears down with `down -v`. +- **Actionable failures.** Per-scenario pass/fail, continue-on-error to collect the whole + matrix, and artifact capture (redacted logs, `compose ps`, `.env`-minus-secrets, dashboard + responses) on failure. +- **Secrets hygiene.** Tokens / RPC creds / onions are never printed; preservation is checked + by hashing on the box; all artifacts pass a redactor. +- **Reproducible.** The live run records a manifest (stack `VERSION`, git rev, image digests). +- **Test code is real code.** Same lint (shellcheck), the coverage gate, and the inventory + drift check apply to the tests themselves. + +### Flake policy + +Integration scenarios **quarantine, never blind-retry**: a scenario that fails intermittently +is marked and investigated, not wrapped in a retry loop that hides a real race. The waiters +have generous timeouts so a slow-but-correct stack passes while a genuinely broken one fails +fast with artifacts. + +### Known gaps (honest) + +These are deliberately **not** yet covered and are the road to full production confidence: + +- **First green run on real hardware.** ✅ Two of the three real-environment tiers are green: + the live harness `--check` (tier 4 read path — 22/22 against a synced, mining box) and the + fake-daemon mini-stack (tier 3 — 11/11 on a real Docker host). Between them they surfaced and + fixed four bugs: the dashboard pruned/full label (#32); the harness's three over-strict + assertions (monero-synced, conns, prune display); the fake Tari binding gRPC to loopback; and + the mini-stack's container-name/port isolation. Still pending: the full **destructive** config + matrix run on the box (its read path is already proven via `--check`). +- **Destructive-matrix safety.** ✅ `run.sh --safety-backup` takes a real `pithead backup` + before the destructive scenarios and **automatically rolls the box back** (down → restore → + up) if anything fails; the archive is removed on success. So the matrix can run on a precious + box with a one-command rollback net. +- **CLI breadth in automation.** ✅ `backup`/`restore` are now exercised end-to-end — by + `--safety-backup` and by a `--lifecycle` backup→restore round-trip (assert the pool reverts + and secrets survive). `reset-dashboard` and `upgrade` are still only unit-covered (upgrade + belongs to the release staging smoke test, since it rebuilds/pulls the bundle under test). +- **Soak / longevity.** No multi-hour run asserting no leaks, no log/DB growth runaway, and that + the XvB controller converges over a realistic window. +- **Load / capacity.** No test drives many workers or high share rates to find limits. +- **Security review.** The compose **hardening invariants are regression-guarded** (the #90 + section of `tests/stack/test_compose.sh`: RPC creds never in a healthcheck command, + `no-new-privileges` / `cap_drop` on the leaf containers, the Docker socket proxies stay + least-privilege), so a past fix can't be silently undone. A full security *audit* is still a + separate exercise (`SECURITY.md`) — these tests pin the decisions we've already made, they + don't find new ones. + +## Adding a scenario + +- **Logic** (a new decision/branch) → a unit test (tier 1). Cheapest, fastest. +- **A new daemon state** the clients must parse → extend the fakes + the contract test (tier 2), + and it becomes drivable in the mini-stack (tier 3). +- **A config axis** → one row in `tests/integration/scenarios.sh` (tier 4). The self-test + enforces every axis value is covered. +- **A failure mode needing real containers** → a fault in `run.sh`'s fault-injection phase + (tier 4) and/or a mini-stack scenario (tier 3). + +Keep each situation at the lowest honest tier; don't re-prove logic with a heavier harness. diff --git a/pithead b/pithead index 59ad23b..b01a9a4 100755 --- a/pithead +++ b/pithead @@ -60,8 +60,49 @@ fi # --- Lifecycle Helpers --- +# The Compose project name is pinned to "pithead" (docker-compose.yml `name:`). A stack first +# deployed under the old directory-derived project name still has containers holding our +# container_names (tor, monerod, …) under that old project — they'd block `up` with a name +# clash. Remove ONLY those — the containers belonging to the exact project THIS directory used +# to create — so the renamed project can take over. We never touch a container that merely +# shares a service name with us (e.g. someone else's `caddy` from an unrelated project). Chain +# data lives in the bind-mounted data dirs and the Tor onion keys in a bind mount too, so +# nothing is lost; Caddy re-issues its local TLS cert once under the new name. +migrate_compose_project() { + local cfg our_project dir_project names name cid proj + # Best-effort and must never abort pithead, so every substitution is guarded (a bare + # `var=$(failing)` would trip `set -e`). + cfg=$(docker compose config --format json 2>/dev/null) || return 0 + [ -n "$cfg" ] || return 0 + our_project=$(printf '%s' "$cfg" | jq -r '.name // "pithead"' 2>/dev/null) || our_project="pithead" + [ -n "$our_project" ] || our_project="pithead" + # The old project name is the one Compose derived from this directory's basename (lowercased, + # sanitised to [a-z0-9_-]). Matching it exactly is what keeps us from removing an unrelated + # container. If it already equals our pinned name there's nothing to migrate. + dir_project=$(basename "$PWD" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9_-') + { [ -n "$dir_project" ] && [ "$dir_project" != "$our_project" ]; } || return 0 + names=$(printf '%s' "$cfg" | jq -r '.services[].container_name // empty' 2>/dev/null) || return 0 + [ -n "$names" ] || return 0 + + local stale=() + while IFS= read -r name; do + [ -n "$name" ] || continue + cid=$(docker ps -aq --filter "name=^${name}$" 2>/dev/null | head -n1) || cid="" + [ -n "$cid" ] || continue + proj=$(docker inspect --format '{{index .Config.Labels "com.docker.compose.project"}}' "$cid" 2>/dev/null) || proj="" + [ "$proj" = "$dir_project" ] && stale+=("$name") + done <<< "$names" + + [ "${#stale[@]}" -gt 0 ] || return 0 + warn "Migrating this stack from the old Compose project '$dir_project' to '$our_project'." + log "Removing the old-named containers so the renamed project can take over. Chain data dirs" + log "and Tor onion keys are bind-mounted (untouched); Caddy re-issues its local TLS cert." + docker rm -f "${stale[@]}" >/dev/null 2>&1 || true +} + stack_up() { log "Starting stack..." + migrate_compose_project # Docker Compose automatically picks up COMPOSE_PROFILES from .env docker compose up -d log "Stack started successfully!" @@ -82,6 +123,7 @@ stack_restart() { stack_upgrade() { log "Upgrading stack (rebuilding containers)..." + migrate_compose_project docker compose up -d --build log "Stack upgraded." } @@ -1754,6 +1796,7 @@ apply() { generate_caddyfile log "Updating containers..." + migrate_compose_project # Compose recreates only the services whose resolved config changed; --remove-orphans # drops monerod when a local→remote switch deactivates the local_node profile. docker compose up -d --remove-orphans diff --git a/tests/integration/README.md b/tests/integration/README.md new file mode 100644 index 0000000..760aeba --- /dev/null +++ b/tests/integration/README.md @@ -0,0 +1,37 @@ +# Integration tests (`tests/integration/`) + +End-to-end suite that drives a **real, already-provisioned Pithead server** through the +config matrix and asserts the stack behaves (issue +[#54](https://github.com/p2pool-starter-stack/pithead/issues/54)). + +``` +run.sh entry point — connects (SSH or --local) and runs the matrix (+ --lifecycle, + --fault-injection) +scenarios.sh the declarative config matrix (data, not code) +lib.sh shared helpers: target I/O, assertions, readiness waiters, redaction +selftest.sh pure-logic self-test (no server) — runs in CI on every PR +fakes/ controllable fake monerod/Tari + a contract test pointing the REAL clients at + them (tier 2; runs in CI, no docker) +mini-stack/ docker overlay running the real dashboard + docker-control vs the fakes, with a + scenario runner for hold/release + reject/readmit (tier 3; needs docker) +``` + +The live matrix here is **tier 4** of the broader plan — see +[`docs/testing-strategy.md`](../../docs/testing-strategy.md) for all four tiers and the full +scenario catalog. + +Quick start: + +```bash +# Against a remote box over SSH +make test-integration ARGS="--host miner@10.0.0.5 --dir pithead" + +# On the box itself +./run.sh --local --dir /home/miner/pithead --lifecycle + +# Just the pure-logic checks (no server) +make test-integration-selftest +``` + +**Full guide — provisioning the box, the safety model, the matrix, artifacts, and +CI/release wiring — is in [`docs/integration-testing.md`](../../docs/integration-testing.md).** diff --git a/tests/integration/build-pruned-chain.sh b/tests/integration/build-pruned-chain.sh new file mode 100644 index 0000000..40dbcbc --- /dev/null +++ b/tests/integration/build-pruned-chain.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# +# build-pruned-chain.sh — one-shot builder for a pruned Monero chain alongside the +# canonical full chain, used to give the live test harness BOTH prune modes on one box. +# +# Strategy (minimal mining downtime, full chain never modified): +# 1. stop monerod -> makes the live LMDB consistent for copying +# 2. copy full data.mdb -> onto the CoW (btrfs) volume [downtime window] +# 3. start monerod -> mining resumes immediately after the copy +# 4. prune the COPY in place -> shrinks ~250G -> ~95G, full chain untouched +# +# Self-contained + idempotent-ish: logs with timestamps, writes a status sentinel, +# and always restarts monerod even if the copy fails. Designed to be run under nohup. +set -uo pipefail + +SRC_DIR="${SRC_DIR:-$HOME/code/p2pool-starter-stack/data/monero}" +DST_DIR="${DST_DIR:-/mnt/chains/monero-pruned}" +PRUNE_BIN="${PRUNE_BIN:-$HOME/pithead-testbench/bin/monero-blockchain-prune}" +STATUS="${STATUS:-$HOME/pithead-testbench/status}" +CONTAINER="${CONTAINER:-monerod}" + +ts() { date '+%Y-%m-%dT%H:%M:%S%z'; } +say() { echo "[$(ts)] $*"; } +set_status() { echo "$1" > "$STATUS"; } + +say "START build-pruned-chain" +say "src=$SRC_DIR dst=$DST_DIR" +mkdir -p "$DST_DIR/lmdb" + +src_mdb="$SRC_DIR/lmdb/data.mdb" +if [ ! -f "$src_mdb" ]; then say "FATAL: source $src_mdb not found"; set_status "FAIL_NO_SRC"; exit 1; fi +say "source size: $(du -h "$src_mdb" | cut -f1)" + +set_status "STOPPING" +say "stopping $CONTAINER (downtime begins)" +docker stop "$CONTAINER" >/dev/null 2>&1 || { say "WARN docker stop failed (already stopped?)"; } + +set_status "COPYING" +say "copy begin" +copy_start=$(date +%s) +cp "$src_mdb" "$DST_DIR/lmdb/data.mdb" +rc=$? +copy_end=$(date +%s) +say "copy done rc=$rc in $((copy_end - copy_start))s" + +# Restart monerod immediately, regardless of copy outcome — minimise downtime. +set_status "RESTARTING" +say "starting $CONTAINER (downtime ends)" +docker start "$CONTAINER" >/dev/null 2>&1 || say "WARN docker start failed" + +if [ $rc -ne 0 ]; then say "FATAL: copy failed"; set_status "FAIL_COPY"; exit 1; fi + +set_status "PRUNING" +say "prune begin (full chain is back online; pruning the copy)" +prune_start=$(date +%s) +"$PRUNE_BIN" --data-dir "$DST_DIR" 2>&1 +rc=$? +prune_end=$(date +%s) +say "prune done rc=$rc in $((prune_end - prune_start))s" +if [ $rc -ne 0 ]; then say "FATAL: prune failed"; set_status "FAIL_PRUNE"; exit 1; fi + +say "pruned size: $(du -h "$DST_DIR/lmdb/data.mdb" | cut -f1)" +set_status "DONE" +say "ALL DONE" diff --git a/tests/integration/compact-chain.sh b/tests/integration/compact-chain.sh new file mode 100644 index 0000000..d27971a --- /dev/null +++ b/tests/integration/compact-chain.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# +# compact-chain.sh — reclaim LMDB file bloat from an already-pruned Monero chain. +# +# An in-place prune leaves the LMDB file at its full-chain high-water mark (LMDB never shrinks +# its file), so a pruned chain can sit at ~270 GiB on disk while holding only ~95 GiB of live +# data. `monero-blockchain-prune --copy-pruned-database` rewrites the chain into a fresh DB at +# /lmdb-pruned, which comes out at its true compact size. +# +# IMPORTANT — speed & safety: +# * It copies every block one-by-one, so it is SLOW (multiple HOURS for a mainnet chain). It is +# NOT a page-level copy. +# * It only READS the source, through a consistent LMDB snapshot, so it is safe to run while +# monerod is up and mining — zero downtime during the copy; the source is never modified. +# * The generic `mdb_copy -c` does NOT work on a Monero chain: Monero ships a patched LMDB and +# stock mdb_copy rejects the on-disk format (MDB_VERSION_MISMATCH). This tool is the only path. +# +# When it finishes, swap the compact copy in (brief downtime) and verify: +# docker stop monerod +# mv /lmdb /lmdb.bloated && mv /lmdb-pruned /lmdb +# docker start monerod # re-syncs the few blocks added during the copy +# # confirm healthy (get_info: synchronized), then: rm -rf /lmdb.bloated +# +# This script ONLY builds the compact copy; it does not stop/start containers or swap. Logs +# before/after sizes and a status sentinel. +set -uo pipefail + +DATA_DIR="${1:?usage: compact-chain.sh }" +PRUNE_BIN="${PRUNE_BIN:-$HOME/pithead-testbench/bin/monero-blockchain-prune}" +LOG="${LOG:-$HOME/pithead-testbench/compact.log}" +STATUS="${STATUS:-$HOME/pithead-testbench/compact-status}" + +ts() { date '+%Y-%m-%dT%H:%M:%S%z'; } +say() { echo "[$(ts)] $*" | tee -a "$LOG"; } + +{ + echo "===== compact run $(ts) =====" + echo "data-dir: $DATA_DIR" + echo "--- lmdb BEFORE ---" + ls -la "$DATA_DIR/lmdb/" 2>/dev/null + echo "data.mdb apparent+disk:"; du -h --apparent-size "$DATA_DIR/lmdb/data.mdb" 2>/dev/null; du -h "$DATA_DIR/lmdb/data.mdb" 2>/dev/null +} >> "$LOG" 2>&1 + +echo COMPACTING > "$STATUS" +say "compaction begin (data-dir=$DATA_DIR)" +t0=$(date +%s) +"$PRUNE_BIN" --data-dir "$DATA_DIR" --copy-pruned-database >> "$LOG" 2>&1 +rc=$? +t1=$(date +%s) +say "compaction done rc=$rc in $((t1 - t0))s" + +{ + echo "--- lmdb AFTER ---" + ls -la "$DATA_DIR/lmdb/" 2>/dev/null + echo "data.mdb apparent+disk:"; du -h --apparent-size "$DATA_DIR/lmdb/data.mdb" 2>/dev/null; du -h "$DATA_DIR/lmdb/data.mdb" 2>/dev/null +} >> "$LOG" 2>&1 + +if [ $rc -eq 0 ]; then echo DONE > "$STATUS"; else echo "FAIL_rc$rc" > "$STATUS"; fi +say "status=$(cat "$STATUS")" diff --git a/tests/integration/fakes/fake_monerod.py b/tests/integration/fakes/fake_monerod.py new file mode 100644 index 0000000..82d3db7 --- /dev/null +++ b/tests/integration/fakes/fake_monerod.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +""" +Controllable fake monerod for the integration mini-stack (issue #54, tier 3). + +Speaks just enough of monerod's `get_info` RPC for the dashboard's MoneroClient to read it, +plus a `/control` endpoint to drive its state from a test. Lets us reproduce the whole Monero +side of the runtime state machine — syncing %, synced, unreachable, pruned/full DB size — +deterministically, with no real chain. + +Run standalone (in the docker mini-stack): + python3 fake_monerod.py --port 18081 + +Drive it: + curl -s localhost:18081/control -d '{"mode":"syncing","height":1500,"target_height":3000}' + curl -s localhost:18081/control -d '{"mode":"down"}' + curl -s localhost:18081/get_info + +Use in-process (the contract test): + with FakeMonerod() as m: + m.set(mode="syncing", height=1500, target_height=3000) + ...point a real MoneroClient at m.url... +""" +import argparse +import json +import threading +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + +# mode ∈ {"synced", "syncing", "down"}. height/target_height/database_size are the figures +# get_info returns; the client derives sync %/DB size from them (MoneroClient.get_sync_status). +DEFAULT_STATE = { + "mode": "synced", + "height": 3_000_000, + "target_height": 3_000_000, + "database_size": 85 * 10**9, +} + + +class _Handler(BaseHTTPRequestHandler): + def log_message(self, *_args): # keep the test output clean + pass + + def _send(self, code, payload): + body = json.dumps(payload).encode() + self.send_response(code) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self): + if self.path.rstrip("/") != "/get_info": + self._send(404, {"status": "NOT_FOUND"}) + return + st = self.server.state + # "down" → unreachable: monerod's RPC not answering. A non-200 makes MoneroClient + # treat the node as unreachable (get_info returns None), which is what we want. + if st["mode"] == "down": + self._send(503, {"status": "BUSY"}) + return + # "busy" → RPC answers HTTP 200 but reports a non-OK status (e.g. mid-reorg). The + # client must distrust the heights and treat it as unreachable, not as synced. + if st["mode"] == "busy": + self._send(200, {"status": "BUSY", "height": st["height"], + "target_height": st["target_height"]}) + return + if st["mode"] == "syncing": + payload = { + "status": "OK", + "synchronized": False, + "height": st["height"], + "target_height": st["target_height"], + "database_size": st["database_size"], + } + else: # synced — monerod reports synchronized and target_height 0 once caught up + payload = { + "status": "OK", + "synchronized": True, + "height": st["height"], + "target_height": 0, + "database_size": st["database_size"], + } + self._send(200, payload) + + def do_POST(self): + if self.path.rstrip("/") != "/control": + self._send(404, {"status": "NOT_FOUND"}) + return + length = int(self.headers.get("Content-Length", 0)) + try: + data = json.loads(self.rfile.read(length) or b"{}") + except ValueError: + self._send(400, {"error": "bad json"}) + return + self.server.state.update(data) + self._send(200, self.server.state) + + +class _Server(ThreadingHTTPServer): + daemon_threads = True + + def __init__(self, addr, state): + super().__init__(addr, _Handler) + self.state = state + + +class FakeMonerod: + """Context manager that runs the fake on an ephemeral port in a background thread.""" + + def __init__(self, port=0, host="127.0.0.1", **state): + self.state = {**DEFAULT_STATE, **state} + self._srv = _Server((host, port), self.state) + self.host, self.port = self._srv.server_address + + @property + def url(self): + return f"http://{self.host}:{self.port}" + + def set(self, **kwargs): + self.state.update(kwargs) + + def __enter__(self): + self._thread = threading.Thread(target=self._srv.serve_forever, daemon=True) + self._thread.start() + return self + + def __exit__(self, *_exc): + self._srv.shutdown() + self._srv.server_close() + + +def main(): + ap = argparse.ArgumentParser(description="Controllable fake monerod") + ap.add_argument("--port", type=int, default=18081) + ap.add_argument("--host", default="0.0.0.0") # noqa: S104 — test-only container + ap.add_argument("--mode", default="synced", choices=["synced", "syncing", "down"], + help="initial state (the mini-stack boots 'syncing' to exercise the hold)") + args = ap.parse_args() + state = dict(DEFAULT_STATE, mode=args.mode) + # "syncing" needs height < target_height to read as syncing (else it looks caught up). + if args.mode == "syncing" and state["height"] >= state["target_height"]: + state["height"], state["target_height"] = 1_500_000, 3_000_000 + srv = _Server((args.host, args.port), state) + print(f"fake-monerod listening on {args.host}:{args.port} (mode={args.mode})", flush=True) + try: + srv.serve_forever() + except KeyboardInterrupt: + pass + + +if __name__ == "__main__": + main() diff --git a/tests/integration/fakes/fake_tari.py b/tests/integration/fakes/fake_tari.py new file mode 100644 index 0000000..3f913d5 --- /dev/null +++ b/tests/integration/fakes/fake_tari.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +""" +Controllable fake Tari base node for the integration mini-stack (issue #54, tier 3). + +Implements just the two BaseNode gRPC methods the dashboard's TariClient calls — GetTipInfo +and GetSyncProgress — against the project's own vendored protobuf stubs, so the real client +talks to it unchanged (the client uses an insecure channel, so there's no auth to fake). A +small HTTP `/control` side-channel drives its state. + +Run standalone (in the docker mini-stack): + python3 fake_tari.py --grpc-port 18142 --control-port 18152 + +Drive it: + curl -s localhost:18152/control -d '{"mode":"syncing","height":500,"target_height":2000}' + curl -s localhost:18152/control -d '{"mode":"down"}' + +Use in-process (the contract test) via start_server(). +""" +import argparse +import asyncio +import json +import threading +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + +import grpc + +from mining_dashboard.client.tari.generated import base_node_pb2 as bn +from mining_dashboard.client.tari.generated import base_node_pb2_grpc as bn_grpc + +# mode ∈ {"synced", "syncing", "down"}. +DEFAULT_STATE = {"mode": "synced", "height": 2_000_000, "target_height": 2_000_000} + + +class FakeBaseNode(bn_grpc.BaseNodeServicer): + def __init__(self, state): + self.state = state + + async def GetTipInfo(self, request, context): + st = self.state + if st["mode"] == "down": + await context.abort(grpc.StatusCode.UNAVAILABLE, "fake node down") + resp = bn.TipInfoResponse() + resp.metadata.best_block_height = st["height"] + # initial_sync_achieved is the authoritative "fully synced" flag the client trusts. + resp.initial_sync_achieved = st["mode"] == "synced" + return resp + + async def GetSyncProgress(self, request, context): + st = self.state + if st["mode"] == "down": + await context.abort(grpc.StatusCode.UNAVAILABLE, "fake node down") + resp = bn.SyncProgressResponse() + resp.local_height = st["height"] + resp.tip_height = st["target_height"] + return resp + + +async def start_server(port, state, host="127.0.0.1"): + """Start a gRPC server on `host:port` (port 0 = ephemeral). Returns (server, bound_port). + + Defaults to loopback for the in-process contract test; the standalone container passes + 0.0.0.0 so the dashboard can reach it across the docker network (binding 127.0.0.1 inside + a container makes the port unreachable from peer containers). + """ + server = grpc.aio.server() + bn_grpc.add_BaseNodeServicer_to_server(FakeBaseNode(state), server) + bound = server.add_insecure_port(f"{host}:{port}") + await server.start() + return server, bound + + +# --- standalone HTTP control side-channel (docker mini-stack only) ---------- +class _ControlHandler(BaseHTTPRequestHandler): + def log_message(self, *_args): + pass + + def do_POST(self): + if self.path.rstrip("/") != "/control": + self.send_response(404) + self.end_headers() + return + length = int(self.headers.get("Content-Length", 0)) + try: + data = json.loads(self.rfile.read(length) or b"{}") + except ValueError: + self.send_response(400) + self.end_headers() + return + self.server.state.update(data) + body = json.dumps(self.server.state).encode() + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(body) + + +class _ControlServer(ThreadingHTTPServer): + daemon_threads = True + + def __init__(self, addr, state): + super().__init__(addr, _ControlHandler) + self.state = state + + +async def _main_async(args, state): + server, _ = await start_server(args.grpc_port, state, host="0.0.0.0") # noqa: S104 — test container + ctrl = _ControlServer(("0.0.0.0", args.control_port), state) # noqa: S104 — test-only + threading.Thread(target=ctrl.serve_forever, daemon=True).start() + print( + f"fake-tari gRPC on :{args.grpc_port}, control on :{args.control_port}", + flush=True, + ) + await server.wait_for_termination() + + +def main(): + ap = argparse.ArgumentParser(description="Controllable fake Tari base node") + ap.add_argument("--grpc-port", type=int, default=18142) + ap.add_argument("--control-port", type=int, default=18152) + ap.add_argument("--mode", default="synced", choices=["synced", "syncing", "down"], + help="initial state (the mini-stack boots 'syncing' to exercise the hold)") + args = ap.parse_args() + state = dict(DEFAULT_STATE, mode=args.mode) + if args.mode == "syncing" and state["height"] >= state["target_height"]: + state["height"], state["target_height"] = 1_000_000, 2_000_000 + try: + asyncio.run(_main_async(args, state)) + except KeyboardInterrupt: + pass + + +if __name__ == "__main__": + main() diff --git a/tests/integration/fakes/test_contract.py b/tests/integration/fakes/test_contract.py new file mode 100644 index 0000000..019f829 --- /dev/null +++ b/tests/integration/fakes/test_contract.py @@ -0,0 +1,140 @@ +""" +Contract test: point the REAL dashboard clients at the controllable fakes and assert they +parse every state we need to drive in the mini-stack (issue #54, tier 3 / tier 2 seam). + +This is the proof that the fakes speak the daemons' wire format closely enough for the real +MoneroClient / TariClient — and it runs anywhere (no docker, no real chain). If a future +monerod/Tari change breaks the parser, this goes red here instead of only on the live box. + +Run: PYTHONPATH=build/dashboard python3 -m pytest tests/integration/fakes -q +""" +import asyncio +import pathlib +import sys + +import requests +from unittest.mock import MagicMock + +_HERE = pathlib.Path(__file__).resolve().parent +_REPO = _HERE.parents[2] +# Make the dashboard package and the fakes importable regardless of how pytest is invoked. +sys.path.insert(0, str(_REPO / "build" / "dashboard")) +sys.path.insert(0, str(_HERE)) + +from fake_monerod import FakeMonerod # noqa: E402 +from fake_tari import start_server # noqa: E402 +from mining_dashboard.client.monero.monero_client import MoneroClient # noqa: E402 +from mining_dashboard.client.tari.tari_client import TariClient # noqa: E402 + + +# --- Monero (HTTP get_info) ------------------------------------------------- +def test_monero_synced_reads_no_sync_and_db_size(): + with FakeMonerod(database_size=85 * 10**9) as m: + client = MoneroClient(url=m.url, username="") + st = client.get_sync_status() + assert st == {"is_syncing": False, "db_size": 85 * 10**9} + + +def test_monero_syncing_reports_percent(): + with FakeMonerod() as m: + m.set(mode="syncing", height=1500, target_height=3000, database_size=40 * 10**9) + client = MoneroClient(url=m.url, username="") + st = client.get_sync_status() + assert st["is_syncing"] is True + assert st["current"] == 1500 and st["target"] == 3000 and st["percent"] == 50 + assert st["db_size"] == 40 * 10**9 + + +def test_monero_down_is_unreachable(): + with FakeMonerod() as m: + m.set(mode="down") + client = MoneroClient(url=m.url, username="") + assert client.get_sync_status() is None + + +def test_monero_busy_status_is_unreachable(): + # HTTP 200 but status=BUSY (e.g. mid-reorg): the client must distrust it, not read it synced. + with FakeMonerod() as m: + m.set(mode="busy") + assert MoneroClient(url=m.url, username="").get_sync_status() is None + + +def test_monero_synced_by_height_even_without_flag(): + # synchronized=false but height has reached target → caught up (mirrors monerod at the tip). + with FakeMonerod() as m: + m.set(mode="syncing", height=3_000_000, target_height=3_000_000) + st = MoneroClient(url=m.url, username="").get_sync_status() + assert st["is_syncing"] is False + + +def test_monero_db_size_unknown_reads_zero(): + with FakeMonerod(database_size=0) as m: + st = MoneroClient(url=m.url, username="").get_sync_status() + assert st == {"is_syncing": False, "db_size": 0} + + +def test_monero_http_control_mutates_state(): + # Validates the /control path the docker mini-stack drives over the network. + with FakeMonerod() as m: + requests.post(m.url + "/control", json={"mode": "syncing", "height": 10, "target_height": 100}, timeout=5) + info = requests.get(m.url + "/get_info", timeout=5).json() + assert info["synchronized"] is False and info["height"] == 10 and info["target_height"] == 100 + + +# --- Tari (gRPC BaseNode) --------------------------------------------------- +# Driven via asyncio.run so they don't depend on pytest-asyncio being active (the dashboard's +# asyncio_mode=auto only applies when pytest's rootdir is build/dashboard). +async def _tari_get_status(state): + server, bound = await start_server(0, state) + client = TariClient(MagicMock()) + client.grpc_address = f"127.0.0.1:{bound}" + try: + return await client.get_sync_status() + finally: + await client.close() + await server.stop(None) + + +def test_tari_synced_reads_done(): + st = asyncio.run(_tari_get_status({"mode": "synced", "height": 2000, "target_height": 2000})) + assert st["is_syncing"] is False and st["reachable"] is True and st["percent"] == 100 + + +def test_tari_syncing_reports_percent(): + st = asyncio.run(_tari_get_status({"mode": "syncing", "height": 500, "target_height": 2000})) + assert st["is_syncing"] is True and st["percent"] == 25 and st["reachable"] is True + + +def test_tari_down_is_unreachable_with_no_cache(): + # No prior good reading to cache, so a down node is reported unreachable immediately. + st = asyncio.run(_tari_get_status({"mode": "down", "height": 0, "target_height": 0})) + assert st["reachable"] is False + + +def test_tari_syncing_without_reliable_target_avoids_false_100(): + # Early sync: the node can't give a target above local height yet → report syncing at 0%, + # never a premature ✔ (target 0, not a bogus 100%). + st = asyncio.run(_tari_get_status({"mode": "syncing", "height": 1000, "target_height": 1000})) + assert st["is_syncing"] is True and st["target"] == 0 and st["percent"] == 0 + + +def test_tari_serves_cached_reading_when_briefly_unreachable(): + # A busy-but-alive node (gRPC blips) should keep showing its last good reading, flagged + # reachable=False so node-down detection still sees the outage. + async def _impl(): + state = {"mode": "synced", "height": 2000, "target_height": 2000} + server, bound = await start_server(0, state) + client = TariClient(MagicMock()) + client.grpc_address = f"127.0.0.1:{bound}" + try: + first = await client.get_sync_status() # live: synced + reachable + state["mode"] = "down" + second = await client.get_sync_status() # cached: last reading, reachable False + return first, second + finally: + await client.close() + await server.stop(None) + + first, second = asyncio.run(_impl()) + assert first["reachable"] is True and first["is_syncing"] is False + assert second["reachable"] is False and second["is_syncing"] is False diff --git a/tests/integration/gouda-testbench-README.md b/tests/integration/gouda-testbench-README.md new file mode 100644 index 0000000..1737b95 --- /dev/null +++ b/tests/integration/gouda-testbench-README.md @@ -0,0 +1,110 @@ +# Pithead reference build & test server (`gouda`) + +A dedicated **dev + AI-agent test platform** that runs the **live Pithead stack** (Monero node + +P2Pool + Tari merge-mining + dashboard) against real, synced chains, and serves as the **Tier-4 +release gate** — changes are validated end-to-end here before release. Read this first. + +See **`docs/test-server-architecture.md`** in the repo for the full architecture + how to recreate +this box on another machine. `system-info.md` (next to this file) is a live hardware snapshot: +regenerate with `~/pithead-testbench/system-info.sh > ~/pithead-testbench/system-info.md`. + +## ⚠️ Golden rules + +This is a **test bench, not a production miner** — downtime and teardown/redeploy are fine. The +constraints that matter: + +1. **Never lose the synced chains.** They are the only slow-to-acquire asset (days to re-sync) — + reuse them. They live at `/srv/code/pithead-data/`, decoupled from the checkout, so you can + refresh/redeploy the stack freely without touching them. +2. **Storage is the bottleneck (no NVMe yet).** `sdb` (the SATA "SSD") benchmarks at ~37–98 MB/s — + HDD-class — so monerod, builds, and especially LMDB compaction are slow. Chains live on it at + `/srv/code/pithead-data` (still better than the `/home` HDD for random I/O, which stays cold + storage). A real **m.2 PCIe NVMe is the #1 upgrade** — see `docs/test-server-architecture.md`. +3. **Least privilege.** `sudo` is password-protected and interactive-only — don't expect or leave + passwordless grants. Almost everything here needs **no sudo** (your user is in the `docker` group). +4. **Secrets stay put.** `.env` (RPC creds) and `config.json` (wallet addresses) are owner-only. + Never print, copy, or commit them. + +## Where things are + +| Path | What | +|---|---| +| `~/code/pithead/` (`/srv/code/pithead`, NVMe) | the stack checkout: `docker-compose.yml`, the `pithead` CLI, your `config.json`/`.env` | +| `/srv/code/pithead-data/{monero,tari,p2pool,dashboard,tor}/` | the chains — **the asset**, on the NVMe, decoupled from the checkout | +| `~/pithead-testbench/` | **this dir** — build-server docs + tools | +| `~/pithead-testbench/bin/monero-blockchain-prune` | verified offline Monero tool (version matches monerod) | +| `~/pithead-testbench/{build-pruned-chain,compact-chain,system-info}.sh` | chain ops + system snapshot (also versioned in the repo `tests/integration/`) | +| `/home`, `/mnt/chains` | HDD — cold backups / archives only | + +## The chains (this was the confusing part) + +- **Monero is PRUNED** (`MONERO_PRUNE=1`) and compacted to its true ~95 GiB. If it ever reads + ~250 GiB again, that is **LMDB free-page bloat** from an in-place prune — *not* a full chain. + Compact it (below). Note: the generic `mdb_copy` **cannot** read Monero's patched LMDB + (`MDB_VERSION_MISMATCH`); only `monero-blockchain-prune` works. +- **Tari is ARCHIVAL/full** (~132 GiB, no pruning configured). That size is genuine data, not + bloat — there is nothing to compact. Shrinking it would mean *pruning* Tari (a config change + + re-sync), which is a product decision, not housekeeping. + +**Compacting the Monero chain** (reclaim bloat; hours, but no downtime until the swap): +```bash +~/pithead-testbench/compact-chain.sh /srv/code/pithead-data/monero # builds lmdb-pruned/ (monerod stays up) +# when DONE, swap it in (brief downtime): +docker stop monerod +cd /srv/code/pithead-data/monero && mv lmdb lmdb.bloated && mv lmdb-pruned lmdb +docker start monerod # re-syncs the few blocks added during the copy +# confirm `pithead status` healthy, then: rm -rf lmdb.bloated +``` + +## Running the stack +```bash +cd ~/code/pithead +./pithead status # health summary +./pithead doctor # deeper diagnostics +./pithead up | down | apply | backup +``` + +## Running the test harness (the point of this box) + +Tiers 1–3 run anywhere with no real chains; **Tier 4 (the live matrix) runs here.** +```bash +# Drive gouda over SSH from a dev checkout (start non-destructive): +tests/integration/run.sh --host vijit@gouda --dir code/pithead --check # assert current live state +tests/integration/run.sh --host vijit@gouda --dir code/pithead --readiness # is the box fit to gate a release? +# Full destructive config matrix, with a pithead backup + auto-rollback on failure: +tests/integration/run.sh --host vijit@gouda --dir code/pithead --safety-backup +# On the box itself: +cd ~/code/pithead && tests/integration/run.sh --local --dir "$PWD" --lifecycle +``` +Always start with `--check`/`--readiness`. Use `--safety-backup` for the destructive matrix so a +failure rolls the box back (down → restore → up). See `docs/integration-testing.md` in the repo. + +## End-to-end coverage: validated live vs. gaps + +**Validated live on gouda (Tier 4):** the config matrix (remote/local node, dashboard secure/insecure, +Tari required/optional, RPC LAN access, XvB on/off) applied + asserted on real synced chains; +lifecycle (restart, secret-preserving `apply`, backup→restore round-trip); node-down failover → +recovery; release readiness; **pruned** monerod (the real prod config). +**Covered without a real chain:** client↔daemon contract tests, the fake daemon mini-stack +(incl. full-prune behavior), compose hardening, config rendering, dashboard unit/frontend tests. + +| # | Gap (not tested live) | Worth filling before release? | +|---|---|---| +| 1 | **Full (unpruned) Monero** mode live — gouda is pruned-only | **Low.** Stack code paths don't differ by prune mode (monerod-internal); fakes/config cover it. A multi-day full sync isn't justified. | +| 2 | **Privacy / Tor egress** — no clearnet-leak assertions in the live harness (issue #160) | **High.** Privacy is a core promise. Add egress checks (no clearnet to XvB stats, p2pool, Tari DNS) to the live harness. | +| 3 | **Automated PR gate** — self-hosted runner exists but is manual/opt-in | **Medium-high, high-leverage.** Wire the live harness as a required check on `workflow_dispatch`/push-to-`main` only (never fork PRs). | +| 4 | **Upgrade / migration** across image versions with chain continuity | **Medium.** Real users upgrade. Add a scenario: pull new images → `apply` → assert chain continuity + no re-sync + secrets intact. | +| 5 | **XvB live routing** end-to-end (the raffle optimization) | **Medium.** Core value-prop, but unit/sim-tested today. A periodic live XvB smoke test would help; hard to assert deterministically. | +| 6 | **Multi-worker scale** — harness assumes ~2 workers | **Medium.** For perf confidence add a load-gen worker + assert proxy routing/hashrate. Not a blocker. | +| 7 | **Real Tari merge-mined block** acceptance | **Low.** Finding a block is probabilistic; rely on template/connectivity checks. | +| 8 | **Fault injection over SSH** (currently local-mode only) | **Low-Medium.** Extend SIGSTOP/remove fault cases to the `--host` path. | + +**Recommended before release:** #2 (privacy egress) and #3 (automated PR gate); then #4 (upgrade) +and #5 (XvB smoke). The rest are nice-to-have. + +## Notes for AI agents +- SSH from a sandboxed agent needs the LAN allowance (e.g. `dangerouslyDisableSandbox`); gouda is on the LAN. +- **Avoid literal `( )` in remote command strings** — they break the non-interactive remote shell. +- `pkill -f ` self-matches your own command line — kill by PID, or use the `[x]`-bracket trick. +- Don't stop monerod without reason; check `docker ps` health first and narrate any downtime. +- Long jobs: launch detached (`nohup … &`) and poll a status file; SSH sessions drop. diff --git a/tests/integration/lib.sh b/tests/integration/lib.sh new file mode 100644 index 0000000..6508dbb --- /dev/null +++ b/tests/integration/lib.sh @@ -0,0 +1,266 @@ +# shellcheck shell=bash +# +# Shared library for the Pithead integration test harness (tests/integration/). +# +# This file is *sourced*, never executed. It defines pure helpers (config rendering, +# expectation derivation, redaction) plus thin I/O wrappers (run a command on the target, +# poll for readiness) that the runner and the self-test build on. Keeping the pure logic +# here lets tests/integration/selftest.sh exercise it without a real server. +# +# Target model: every command runs *on the box* — either over SSH or, with --local, directly. +# Reads (dashboard JSON, pithead status) therefore behave identically in both modes, and we +# never depend on the runner being able to resolve the box's dashboard hostname. + +# --- Output ----------------------------------------------------------------- +# Colour only on a TTY with NO_COLOR unset (https://no-color.org), matching pithead. +if [ -t 1 ] && [ -z "${NO_COLOR:-}" ]; then + IT_RESET='\033[0m'; IT_GREEN='\033[1;32m'; IT_YELLOW='\033[1;33m'; IT_RED='\033[1;31m'; IT_DIM='\033[2m' +else + IT_RESET=''; IT_GREEN=''; IT_YELLOW=''; IT_RED=''; IT_DIM='' +fi + +it_log() { echo -e "${IT_GREEN}[ITEST]${IT_RESET} $1"; } +it_warn() { echo -e "${IT_YELLOW}[ITEST]${IT_RESET} $1" >&2; } +it_err() { echo -e "${IT_RED}[ITEST]${IT_RESET} $1" >&2; } +it_step() { echo -e "${IT_DIM} → $1${IT_RESET}"; } + +# --- Secrets hygiene -------------------------------------------------------- +# The box holds real RPC creds, a proxy token, and onion addresses. Redact anything that +# looks secret before it reaches a log file or the terminal. Defence-in-depth: we also avoid +# printing these values in the first place. Patterns cover .env KEY=VALUE lines and .onion +# hostnames. Keep this conservative — over-redaction is safe, leaks are not. +redact() { + sed -E \ + -e 's/(PROXY_AUTH_TOKEN|MONERO_NODE_PASSWORD|MONERO_NODE_USERNAME|.*_PASSWORD|.*_TOKEN|.*_SECRET)=.*/\1=/' \ + -e 's/[a-z2-7]{56}\.onion/.onion/g' +} + +# --- Assertions ------------------------------------------------------------- +# Counters are global so the runner can total them across scenarios. +IT_PASS=0 +IT_FAIL=0 +IT_FAILED_NAMES="" + +it_pass() { IT_PASS=$((IT_PASS + 1)); printf ' %b✓%b %s\n' "$IT_GREEN" "$IT_RESET" "$1"; } +it_fail() { + IT_FAIL=$((IT_FAIL + 1)) + IT_FAILED_NAMES="${IT_FAILED_NAMES}\n - ${IT_CURRENT_SCENARIO:-?}: $1" + printf ' %b✗%b %s\n %s\n' "$IT_RED" "$IT_RESET" "$1" "${2:-}" +} + +assert_eq() { if [ "$2" = "$3" ]; then it_pass "$1"; else it_fail "$1" "expected [$3], got [$2]"; fi; } +assert_ne() { if [ "$2" != "$3" ]; then it_pass "$1"; else it_fail "$1" "expected not [$3]"; fi; } +assert_rc() { if [ "$2" = "$3" ]; then it_pass "$1"; else it_fail "$1" "expected rc $3, got $2"; fi; } +assert_contains() { case "$2" in *"$3"*) it_pass "$1" ;; *) it_fail "$1" "[$2] missing [$3]" ;; esac; } +# Numeric "greater than / >=" with a graceful non-number guard. +assert_num_ge() { + if [ -n "$2" ] && [ "$2" -ge "$3" ] 2>/dev/null; then it_pass "$1"; else it_fail "$1" "expected >= $3, got [$2]"; fi +} +assert_num_gt() { + if [ -n "$2" ] && [ "$2" -gt "$3" ] 2>/dev/null; then it_pass "$1"; else it_fail "$1" "expected > $3, got [$2]"; fi +} + +# --- Config rendering (pure) ------------------------------------------------ +# Map a space-separated list of `dotted.path=value` overrides into a jq program that applies +# them to a config.json. Values are typed: true/false -> boolean, integers -> number, +# everything else -> string. Pure and deterministic so selftest.sh can verify it. +overrides_to_jq() { + local program="." pair path value jsonval + for pair in "$@"; do + [ -z "$pair" ] && continue + path="${pair%%=*}" + value="${pair#*=}" + case "$value" in + true|false) jsonval="$value" ;; + ''|*[!0-9-]*) jsonval="\"$value\"" ;; # has a non-digit -> string + *) jsonval="$value" ;; # all digits (+ optional leading -) -> number + esac + program="${program} | .${path}=${jsonval}" + done + printf '%s' "$program" +} + +# Render a scenario's config.json to stdout: start from the box's baseline config (real +# wallets / data dirs / host preserved) and apply the scenario overrides. Requires jq. +render_scenario_config() { + local baseline_json="$1"; shift + local program; program="$(overrides_to_jq "$@")" + printf '%s' "$baseline_json" | jq "$program" +} + +# Decide whether a scenario can run on this box, augmenting its overrides where needed (an alt +# data dir for the prune axis, a remote endpoint for remote mode). On success sets RESOLVED to +# the final override string and returns 0; on a missing prerequisite sets SKIP_REASON and +# returns 1 — no silent drops, and never a prune flip on the canonical synced DB (which would +# invalidate it). Reads the globals BASELINE_PRUNE / PRUNED_DATA_DIR / FULL_DATA_DIR / +# REMOTE_MONERO_HOST (all optional). Pure given those globals, so the self-test exercises it. +RESOLVED="" +SKIP_REASON="" +# shellcheck disable=SC2034 # RESOLVED/SKIP_REASON are output globals consumed by run.sh & selftest.sh +resolve_overrides() { + local overrides="$1" prune mode out="$1" + RESOLVED=""; SKIP_REASON="" + + prune="$(printf '%s' "$overrides" | tr ' ' '\n' | sed -n 's/^monero\.prune=//p')" + mode="$(printf '%s' "$overrides" | tr ' ' '\n' | sed -n 's/^monero\.mode=//p')" + + # Prune axis: only flip away from the baseline DB if a matching synced dir is provided — + # flipping prune on the canonical dir would invalidate it (a DEST change). + if [ "$prune" = "true" ] && [ "${BASELINE_PRUNE:-}" = "0" ]; then + [ -n "${PRUNED_DATA_DIR:-}" ] || { SKIP_REASON="needs --pruned-data-dir (box baseline is full)"; return 1; } + out="$out monero.data_dir=$PRUNED_DATA_DIR" + fi + if [ "$prune" = "false" ] && [ "${BASELINE_PRUNE:-}" = "1" ]; then + [ -n "${FULL_DATA_DIR:-}" ] || { SKIP_REASON="needs --full-data-dir (box baseline is pruned)"; return 1; } + out="$out monero.data_dir=$FULL_DATA_DIR" + fi + + # Remote mode needs an external endpoint to point at. + if [ "$mode" = "remote" ]; then + [ -n "${REMOTE_MONERO_HOST:-}" ] || { SKIP_REASON="needs --remote-monero-host"; return 1; } + out="$out monero.remote.host=$REMOTE_MONERO_HOST" + fi + + RESOLVED="$out" + return 0 +} + +# --- Expectation derivation (pure) ------------------------------------------ +# Given a rendered config.json, list the services we expect to be running. The bundled +# monerod only runs in local mode (the local_node compose profile); in remote mode it must +# be ABSENT. Everything else is always expected. Mirrors stack_status()'s profile gating. +EXPECTED_ALWAYS="caddy dashboard docker-control docker-proxy p2pool tari tor xmrig-proxy" + +expected_services() { + local config_json="$1" mode + mode="$(printf '%s' "$config_json" | jq -r '.monero.mode // "local"')" + if [ "$mode" = "local" ]; then + printf '%s\n' "monerod $EXPECTED_ALWAYS" | tr ' ' '\n' | sort + else + printf '%s\n' "$EXPECTED_ALWAYS" | tr ' ' '\n' | sort + fi +} + +# Services that must NOT exist for this config (remote mode -> no local monerod). +absent_services() { + local config_json="$1" mode + mode="$(printf '%s' "$config_json" | jq -r '.monero.mode // "local"')" + [ "$mode" = "remote" ] && printf 'monerod\n' +} + +# Human-readable pool label as the dashboard reports it, from the config pool key. +pool_label() { + case "$1" in + main) printf 'Main' ;; + mini) printf 'Mini' ;; + nano) printf 'Nano' ;; + *) printf '%s' "$1" ;; + esac +} + +# --- Target I/O (SSH or local) ---------------------------------------------- +# Globals set by the runner: IT_MODE (ssh|local), IT_SSH_DEST, IT_SSH_OPTS (array), +# IT_REMOTE_DIR, IT_PITHEAD (the pithead invocation, e.g. "./pithead" or "sudo ./pithead"). + +# Run a shell snippet on the target, in the stack directory. The snippet is our own trusted +# code; we never interpolate untrusted data into it. Returns the remote command's exit code. +rx() { + local snippet="$1" + if [ "$IT_MODE" = "local" ]; then + ( cd "$IT_REMOTE_DIR" && bash -c "$snippet" ) + else + local remote + remote="cd $(quote_arg "$IT_REMOTE_DIR") && { $snippet; }" + ssh "${IT_SSH_OPTS[@]}" "$IT_SSH_DEST" "$remote" + fi +} + +# Quote a single argument for safe expansion inside the remote shell string. +quote_arg() { printf '%q' "$1"; } + +# Run pithead with a subcommand on the target, e.g. `pithead status` or `pithead apply -y`. +pithead() { rx "$IT_PITHEAD $*"; } + +# Fetch the dashboard state JSON from the box (dashboard binds 127.0.0.1:8000 on the host +# network). Empty output on failure so callers can detect unreachable. +api_state() { rx "curl -fsS --max-time 10 http://127.0.0.1:8000/api/state" 2>/dev/null; } + +# Split a " " string (from service_state) into its two fields. Pure helpers so +# the self-test can verify the fault-injection predicates classify correctly. +svc_state_of() { printf '%s' "${1%% *}"; } +svc_health_of() { printf '%s' "${1##* }"; } + +# Pull a jq path out of a JSON blob, printing nothing for an absent/null value. The `?` +# swallows "cannot index null" on a missing parent, and `values` drops nulls — but NOT +# boolean false (so `.monero.prune == false` reads as "false", not ""; `// empty` would +# wrongly swallow it because false is falsy in jq). +jq_get() { printf '%s' "$1" | jq -r "($2)? | values" 2>/dev/null; } + +# Authoritative "is Monero caught up?" — query monerod's own get_info on the box (creds stay +# on the box) and trust its `synchronized` flag / target_height 0, exactly like the sync gate. +# We do NOT use the dashboard's `.sync.monero.state`: a synced LOCAL node has no target height, +# so that field reads "loading", not "done" (a real-hardware gotcha). Returns 0 when synced. +monero_caught_up() { + rx 'u=$(grep -E "^MONERO_NODE_USERNAME=" .env 2>/dev/null | cut -d= -f2-); + p=$(grep -E "^MONERO_NODE_PASSWORD=" .env 2>/dev/null | cut -d= -f2-); + url=$(grep -E "^MONERO_RPC_URL=" .env 2>/dev/null | cut -d= -f2-); [ -n "$url" ] || url="http://127.0.0.1:18081"; + if [ -n "$u" ]; then body=$(curl -fsS --max-time 8 --digest -u "$u:$p" "$url/get_info" 2>/dev/null); + else body=$(curl -fsS --max-time 8 "$url/get_info" 2>/dev/null); fi; + printf "%s" "$body" | jq -e "(.status==\"OK\") and ((.synchronized==true) or (.target_height==0))" >/dev/null 2>&1' +} + +# --- Readiness waiters ------------------------------------------------------ +# Poll a predicate until it succeeds or the timeout elapses. The interval is a *poll* cadence +# against a real readiness signal — not a fixed "sleep and hope" (issue #54). Returns 0 on +# success, 1 on timeout. +now_s() { date +%s; } + +wait_for() { # wait_for + local timeout="$1" interval="$2" desc="$3"; shift 3 + local deadline=$(( $(now_s) + timeout )) + it_step "waiting for ${desc} (timeout ${timeout}s)…" + while :; do + if "$@"; then return 0; fi + if [ "$(now_s)" -ge "$deadline" ]; then + it_warn "timed out after ${timeout}s waiting for ${desc}" + return 1 + fi + sleep "$interval" + done +} + +# Predicate: pithead status exits 0 (all expected services healthy / intentional-stops aside). +_pred_status_ok() { pithead status >/dev/null 2>&1; } + +# Predicate: monerod itself reports caught up (authoritative; see monero_caught_up). +_pred_monero_synced() { monero_caught_up; } + +# Predicate: the sync gate has released the miner — at least one worker is online on the proxy. +# (proxy_workers is the reliable signal; stratum.conns can read 0 on a healthy, mining box.) +_pred_miner_running() { + local st; st="$(api_state)"; [ -n "$st" ] || return 1 + local w; w="$(jq_get "$st" '.proxy_workers')" + [ -n "$w" ] && [ "$w" -ge 1 ] 2>/dev/null +} + +wait_status_ok() { wait_for "${1:-180}" 5 "pithead status OK" _pred_status_ok; } +wait_monero_synced() { wait_for "${1:-300}" 10 "Monero sync complete" _pred_monero_synced; } +wait_miner_running() { wait_for "${1:-180}" 5 "miner released" _pred_miner_running; } + +# --- Artifact capture ------------------------------------------------------- +# On a scenario failure, collect everything needed to debug it — redacted. Writes into +# //. Best-effort: never let capture failures mask the test result. +capture_artifacts() { + local scenario="$1" outdir="$2" + local dir="${outdir}/${scenario}" + mkdir -p "$dir" + it_step "capturing artifacts to ${dir}" + rx "docker compose ps" 2>&1 | redact > "${dir}/compose-ps.txt" || true + rx "$IT_PITHEAD status" 2>&1 | redact > "${dir}/status.txt" || true + rx "$IT_PITHEAD doctor" 2>&1 | redact > "${dir}/doctor.txt" || true + rx "cat config.json" 2>&1 | redact > "${dir}/config.json" || true + rx "cat .env" 2>&1 | redact > "${dir}/env.redacted.txt" || true + api_state | redact > "${dir}/api-state.json" || true + # Last 200 lines of each service's logs, redacted. + rx "docker compose logs --tail=200 --no-color" 2>&1 | redact > "${dir}/logs.txt" || true +} diff --git a/tests/integration/mini-stack/docker-compose.fake.yml b/tests/integration/mini-stack/docker-compose.fake.yml new file mode 100644 index 0000000..2e66fc1 --- /dev/null +++ b/tests/integration/mini-stack/docker-compose.fake.yml @@ -0,0 +1,110 @@ +# Integration mini-stack (issue #54, tier 3). +# +# Runs the REAL dashboard + the REAL docker-control/-proxy socket proxies against CONTROLLABLE +# fake monerod/Tari, with lightweight p2pool/xmrig-proxy containers the dashboard can actually +# stop/start. This reproduces the runtime control plane end-to-end — sync-hold/release (#35) and +# node-down → reject → readmit (#31) — deterministically, in CI, with no real chain or test box. +# +# Driven by run-mini-stack.sh. The dashboard and the fakes share one image (the dashboard's, +# which already has mining_dashboard + grpc installed, so fake_tari can use the vendored stubs). +name: pithead-itest + +x-fake-image: &fake_image pithead-dashboard:itest + +networks: + itestnet: + driver: bridge + +volumes: + dashboard_data: + dashboard_stats: + +services: + # The real dashboard, pointed at the fakes and the socket proxies. Fast loop + short debounce + # so scenarios converge in seconds. Binds 127.0.0.1:8000 inside the container; the runner + # reads /api/state via `compose exec`, so no published port is needed. + dashboard: + build: ../../../build/dashboard + image: *fake_image + container_name: itest-dashboard + networks: [itestnet] + volumes: + - dashboard_data:/data + - dashboard_stats:/app/stats:ro + environment: + HOST_IP: "127.0.0.1" + TZ: "Etc/UTC" + MONERO_RPC_URL: "http://fake-monerod:18081" + MONERO_NODE_USERNAME: "" + MONERO_NODE_PASSWORD: "" + MONERO_NODE_HOST: "fake-monerod" + MONERO_PRUNE: "true" + TARI_GRPC_ADDRESS: "fake-tari:18142" + DOCKER_PROXY_URL: "tcp://docker-proxy:2375" + DOCKER_CONTROL_URL: "tcp://docker-control:2375" + # Namespaced container names so the mini-stack never collides with — or controls — a real + # deployment's p2pool/xmrig-proxy on the same host. + SYNC_GATE_CONTAINERS: "itest-p2pool,itest-xmrig-proxy" + REJECT_WORKERS_CONTAINER: "itest-xmrig-proxy" + TARI_REQUIRED: "true" + XVB_ENABLED: "false" + XVB_POOL_URL: "" + XVB_DONOR_ID: "" + P2POOL_URL: "itest-p2pool:3333" + MONERO_WALLET_ADDRESS: "49iTestWalletPlaceholderXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" + PROXY_HOST: "xmrig-proxy" + PROXY_API_PORT: "3344" + PROXY_AUTH_TOKEN: "itest" + UPDATE_INTERVAL: "2" + NODE_DOWN_AFTER_SEC: "4" + NODE_RECOVERY_AFTER_SEC: "3" + depends_on: [fake-monerod, fake-tari, docker-control, docker-proxy, p2pool, xmrig-proxy] + + fake-monerod: + image: *fake_image + container_name: itest-fake-monerod + networks: [itestnet] + entrypoint: [] + # Boot mid-sync so the dashboard holds the miner; the runner flips it to synced/down. + command: ["python3", "/fakes/fake_monerod.py", "--port", "18081", "--mode", "syncing"] + ports: ["28081:18081"] # 28081 on the host (avoids a real monerod's 18081), → 18081 inside + volumes: ["../fakes:/fakes:ro"] + + fake-tari: + image: *fake_image + container_name: itest-fake-tari + networks: [itestnet] + entrypoint: [] + command: ["python3", "/fakes/fake_tari.py", "--grpc-port", "18142", "--control-port", "18152", "--mode", "syncing"] + ports: ["28152:18152"] # HTTP control side-channel on host 28152 + volumes: ["../fakes:/fakes:ro"] + + # Stand-ins for the miner containers: real, named containers the dashboard genuinely + # stops/starts via docker-control. They just idle. + p2pool: + image: busybox:1.36 + container_name: itest-p2pool + networks: [itestnet] + command: ["sh", "-c", "while true; do sleep 30; done"] + + xmrig-proxy: + image: busybox:1.36 + container_name: itest-xmrig-proxy + networks: [itestnet] + command: ["sh", "-c", "while true; do sleep 30; done"] + + # Read-only socket proxy (stats/logs) — mirrors the production docker-proxy. + docker-proxy: + image: tecnativa/docker-socket-proxy:v0.4.2 + container_name: itest-docker-proxy + networks: [itestnet] + environment: ["CONTAINERS=1", "LOGS=1"] + volumes: ["/var/run/docker.sock:/var/run/docker.sock:ro"] + + # Write proxy scoped to start/stop only — mirrors the production docker-control. + docker-control: + image: tecnativa/docker-socket-proxy:v0.4.2 + container_name: itest-docker-control + networks: [itestnet] + environment: ["POST=1", "ALLOW_START=1", "ALLOW_STOP=1"] + volumes: ["/var/run/docker.sock:/var/run/docker.sock:ro"] diff --git a/tests/integration/mini-stack/run-mini-stack.sh b/tests/integration/mini-stack/run-mini-stack.sh new file mode 100755 index 0000000..42a00c6 --- /dev/null +++ b/tests/integration/mini-stack/run-mini-stack.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash +# +# Drive the integration mini-stack (issue #54, tier 3) through the control-plane state machine +# and assert the REAL dashboard holds/releases and rejects/readmits the REAL miner containers, +# driven by the controllable fakes. Needs docker (compose v2). Runs in CI; also `make +# test-mini-stack`. +# +# Scenarios: +# 1. boot syncing → dashboard HOLDS itest-p2pool + itest-xmrig-proxy (#35) +# 2. both chains synced → dashboard RELEASES them +# 3. monerod down → dashboard REJECTS workers (stops itest-xmrig-proxy) (#31) +# 4. monerod back → dashboard READMITS workers +# +set -uo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +COMPOSE_FILE="$HERE/docker-compose.fake.yml" +PASS=0 +FAIL=0 + +c_ok() { PASS=$((PASS + 1)); printf ' \033[1;32m✓\033[0m %s\n' "$1"; } +c_bad() { FAIL=$((FAIL + 1)); printf ' \033[1;31m✗\033[0m %s\n %s\n' "$1" "${2:-}"; } +log() { printf '\033[1;36m[mini-stack]\033[0m %s\n' "$1"; } + +if ! docker compose version >/dev/null 2>&1; then + echo "SKIP: docker compose not available" + exit 0 +fi + +compose() { docker compose -f "$COMPOSE_FILE" "$@"; } +cstate() { docker inspect -f '{{.State.Status}}' "$1" 2>/dev/null || echo "missing"; } +ctl() { curl -fsS --max-time 5 "$1" -d "$2" >/dev/null; } # POST JSON to a fake /control + +# Poll a container until it reaches an expected state, or time out. +wait_state() { # wait_state [timeout_s] + local c="$1" want="$2" timeout="${3:-60}" end + end=$(( $(date +%s) + timeout )) + while :; do + [ "$(cstate "$c")" = "$want" ] && return 0 + [ "$(date +%s)" -ge "$end" ] && return 1 + sleep 1 + done +} + +assert_state() { # assert_state