diff --git a/tests/systemtests/audit_empty_active_set_bootstrap_test.go b/tests/systemtests/audit_empty_active_set_bootstrap_test.go index 52411eeb..1a059157 100644 --- a/tests/systemtests/audit_empty_active_set_bootstrap_test.go +++ b/tests/systemtests/audit_empty_active_set_bootstrap_test.go @@ -115,23 +115,29 @@ func TestAuditEmptyActiveSetBootstrap_HostOnlyReportsRecover(t *testing.T) { // TestAuditEmptyActiveSetBootstrap_NonCompliantHostStaysPostponed verifies // the bootstrap-recovery exception still gates on self-compliance. A -// POSTPONED supernode that submits a host report violating a min-free -// threshold MUST remain POSTPONED even when the active set is empty. +// POSTPONED supernode that submits a host report violating a non-storage +// min-free threshold (CPU here) MUST remain POSTPONED even when the active +// set is empty. // // This guards against the exception turning into a "free pass" for // misbehaving SNs and complements the unit-level tests in // x/audit/v1/keeper/enforcement_empty_active_set_test.go. +// +// Note: per LEP-6 §17 disk pressure is owned exclusively by the STORAGE_FULL +// transition path (audit SetReport) and is no longer a postpone reason, so +// this test exercises the non-storage CPU path. The disk-pressure bootstrap +// case is covered by TestAuditEmptyActiveSetBootstrap_DiskPressureGoesToStorageFull. func TestAuditEmptyActiveSetBootstrap_NonCompliantHostStaysPostponed(t *testing.T) { const ( epochLengthBlocks = uint64(10) originHeight = int64(1) ) - // Set a non-zero MinDiskFreePercent so non-compliant disk usage in the host + // Set a non-zero MinCpuFreePercent so non-compliant CPU usage in the host // report blocks self-compliance. sut.ModifyGenesisJSON(t, setSupernodeParamsForAuditTests(t), - setAuditParamsForFastEpochsWithMinDiskFree(t, epochLengthBlocks, 1, 1, 1, []uint32{4444}, 20), + setAuditParamsForFastEpochsWithMinCpuFree(t, epochLengthBlocks, 1, 1, 1, []uint32{4444}, 20), ) sut.StartChain(t) @@ -153,10 +159,10 @@ func TestAuditEmptyActiveSetBootstrap_NonCompliantHostStaysPostponed(t *testing. require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n0.valAddr)) require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n1.valAddr)) - // Epoch 1: empty active set. Both submit host reports with disk usage 95% - // (5% free, below the 20% MinDiskFreePercent). Self-compliance fails. + // Epoch 1: empty active set. Both submit host reports with CPU usage 95% + // (5% free, below the 20% MinCpuFreePercent). Self-compliance fails. epochID1 := uint64((epoch1Start - originHeight) / int64(epochLengthBlocks)) - hostNonCompliant := auditHostReportWithDiskUsageJSON([]string{"PORT_STATE_OPEN"}, 95.0) + hostNonCompliant := auditHostReportWithCpuUsageJSON([]string{"PORT_STATE_OPEN"}, 95.0) RequireTxSuccess(t, submitEpochReport(t, cli, n0.nodeName, epochID1, hostNonCompliant, nil)) RequireTxSuccess(t, submitEpochReport(t, cli, n1.nodeName, epochID1, hostNonCompliant, nil)) @@ -168,3 +174,73 @@ func TestAuditEmptyActiveSetBootstrap_NonCompliantHostStaysPostponed(t *testing. require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n1.valAddr), "node1 should remain POSTPONED — self-compliance gate blocks the bootstrap exception") } + +// TestAuditEmptyActiveSetBootstrap_DiskPressureGoesToStorageFull verifies the +// LEP-6 §17 invariant that disk pressure is owned exclusively by the +// STORAGE_FULL transition path, not by audit_host_requirements POSTPONE. +// +// Scenario: +// 1. Two SNs register and miss epoch 0 reports → both POSTPONED for +// audit_missing_reports. +// 2. Epoch 1: empty active set. Both submit host reports with disk usage +// above the supernode module's MaxStorageUsagePercent (default 90). +// 3. Epoch 1 end: the bootstrap-recovery exception fires because +// selfHostCompliant ignores disk (only cpu/mem gate the bootstrap +// exception). The new recoverSupernodeFromPostponed helper observes +// disk > MaxStorageUsagePercent in the same epoch's report and steers +// recovery to STORAGE_FULL instead of ACTIVE. +// +// Invariant locked in: disk pressure never produces ACTIVE in this branch, +// never produces POSTPONED via audit_host_requirements, and produces +// STORAGE_FULL exactly when disk > MaxStorageUsagePercent. +func TestAuditEmptyActiveSetBootstrap_DiskPressureGoesToStorageFull(t *testing.T) { + const ( + epochLengthBlocks = uint64(10) + originHeight = int64(1) + ) + + // No MinCpuFreePercent / MinMemFreePercent override → only disk pressure + // is in play. The supernode module's default MaxStorageUsagePercent (90) + // gates the STORAGE_FULL transition; we report 95% to cross it. + sut.ModifyGenesisJSON(t, + setSupernodeParamsForAuditTests(t), + setAuditParamsForFastEpochs(t, epochLengthBlocks, 1, 1, 1, []uint32{4444}), + ) + sut.StartChain(t) + + cli := NewLumeradCLI(t, sut, true) + n0 := getNodeIdentity(t, cli, "node0") + n1 := getNodeIdentity(t, cli, "node1") + + registerSupernode(t, cli, n0, "192.168.1.1") + registerSupernode(t, cli, n1, "192.168.1.2") + + // Epoch 0: no reports → both POSTPONED for audit_missing_reports. + currentHeight := sut.AwaitNextBlock(t) + _, epoch0Start := nextEpochAfterHeight(originHeight, epochLengthBlocks, currentHeight) + epoch1Start := epoch0Start + int64(epochLengthBlocks) + epoch2Start := epoch1Start + int64(epochLengthBlocks) + + awaitAtLeastHeightWithSlack(t, epoch1Start) + + require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n0.valAddr)) + require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n1.valAddr)) + + // Epoch 1: empty active set. Both submit host reports with disk usage + // 95% (> MaxStorageUsagePercent=90). Self-compliance passes (no + // cpu/mem floor configured), so the bootstrap exception fires; the + // recovery helper observes the high disk and steers to STORAGE_FULL. + epochID1 := uint64((epoch1Start - originHeight) / int64(epochLengthBlocks)) + hostHighDisk := auditHostReportWithDiskUsageJSON([]string{"PORT_STATE_OPEN"}, 95.0) + RequireTxSuccess(t, submitEpochReport(t, cli, n0.nodeName, epochID1, hostHighDisk, nil)) + RequireTxSuccess(t, submitEpochReport(t, cli, n1.nodeName, epochID1, hostHighDisk, nil)) + + awaitAtLeastHeightWithSlack(t, epoch2Start) + + // LEP-6 §17 invariant: disk pressure routes POSTPONED → STORAGE_FULL, + // never POSTPONED → ACTIVE, never stuck POSTPONED on audit_host_requirements. + require.Equal(t, "SUPERNODE_STATE_STORAGE_FULL", querySupernodeLatestState(t, cli, n0.valAddr), + "node0 should transition POSTPONED → STORAGE_FULL via the audit recovery helper (disk > MaxStorageUsagePercent)") + require.Equal(t, "SUPERNODE_STATE_STORAGE_FULL", querySupernodeLatestState(t, cli, n1.valAddr), + "node1 should transition POSTPONED → STORAGE_FULL via the audit recovery helper (disk > MaxStorageUsagePercent)") +} diff --git a/tests/systemtests/audit_test_helpers_test.go b/tests/systemtests/audit_test_helpers_test.go index 4f9de9b1..54bf17ae 100644 --- a/tests/systemtests/audit_test_helpers_test.go +++ b/tests/systemtests/audit_test_helpers_test.go @@ -66,18 +66,22 @@ func setAuditParamsForFastEpochs(t *testing.T, epochLengthBlocks uint64, peerQuo } } -// setAuditParamsForFastEpochsWithMinDiskFree is setAuditParamsForFastEpochs -// plus an explicit MinDiskFreePercent override. Used by tests that need to -// exercise the self-compliance gate against the host report's disk-usage -// field (e.g. the empty-active-set bootstrap exception's self-compliance -// guard). -func setAuditParamsForFastEpochsWithMinDiskFree(t *testing.T, epochLengthBlocks uint64, peerQuorumReports, minTargets, maxTargets uint32, requiredOpenPorts []uint32, minDiskFreePercent uint32) GenesisMutator { +// setAuditParamsForFastEpochsWithMinCpuFree is setAuditParamsForFastEpochs +// plus an explicit MinCpuFreePercent override. Used by tests that need to +// exercise the self-compliance gate against a non-storage host minimum +// (e.g. the empty-active-set bootstrap exception's self-compliance guard). +// +// Note: disk pressure is intentionally NOT used for postpone/self-compliance +// gating — per LEP-6 it is owned exclusively by the STORAGE_FULL transition +// path in audit SetReport. Tests that want to keep an SN out of the recovery +// path must drive non-compliance via a non-storage metric (cpu or mem). +func setAuditParamsForFastEpochsWithMinCpuFree(t *testing.T, epochLengthBlocks uint64, peerQuorumReports, minTargets, maxTargets uint32, requiredOpenPorts []uint32, minCpuFreePercent uint32) GenesisMutator { base := setAuditParamsForFastEpochs(t, epochLengthBlocks, peerQuorumReports, minTargets, maxTargets, requiredOpenPorts) return func(genesis []byte) []byte { t.Helper() state := base(genesis) var err error - state, err = sjson.SetRawBytes(state, "app_state.audit.params.min_disk_free_percent", []byte(strconv.FormatUint(uint64(minDiskFreePercent), 10))) + state, err = sjson.SetRawBytes(state, "app_state.audit.params.min_cpu_free_percent", []byte(strconv.FormatUint(uint64(minCpuFreePercent), 10))) require.NoError(t, err) return state } @@ -311,7 +315,7 @@ func auditHostReportJSON(inboundPortStates []string) string { // auditHostReportWithDiskUsageJSON is like auditHostReportJSON but lets the // caller pin disk_usage_percent. Used by tests that exercise the -// self-compliance gate (e.g. min-free thresholds). +// STORAGE_FULL transition via the SetReport disk-pressure path. func auditHostReportWithDiskUsageJSON(inboundPortStates []string, diskUsagePercent float64) string { bz, _ := json.Marshal(map[string]any{ "cpu_usage_percent": 1.0, @@ -323,6 +327,20 @@ func auditHostReportWithDiskUsageJSON(inboundPortStates []string, diskUsagePerce return string(bz) } +// auditHostReportWithCpuUsageJSON is like auditHostReportJSON but lets the +// caller pin cpu_usage_percent. Used by tests that exercise the +// self-compliance gate against a non-storage host minimum. +func auditHostReportWithCpuUsageJSON(inboundPortStates []string, cpuUsagePercent float64) string { + bz, _ := json.Marshal(map[string]any{ + "cpu_usage_percent": cpuUsagePercent, + "mem_usage_percent": 1.0, + "disk_usage_percent": 1.0, + "inbound_port_states": inboundPortStates, + "failed_actions_count": 0, + }) + return string(bz) +} + // storageChallengeObservationJSON builds the JSON payload for --storage-challenge-observations flag. func storageChallengeObservationJSON(targetSupernodeAccount string, portStates []string) string { bz, _ := json.Marshal(map[string]any{ diff --git a/x/audit/v1/POSTPONEMENT_RULES.md b/x/audit/v1/POSTPONEMENT_RULES.md index 5459f768..f3fa6f01 100644 --- a/x/audit/v1/POSTPONEMENT_RULES.md +++ b/x/audit/v1/POSTPONEMENT_RULES.md @@ -1,6 +1,6 @@ # Postponement and Recovery Rules (audit/v1) -This document describes the on-chain rules implemented by the audit module (v1) for switching a supernode between `ACTIVE` and `POSTPONED`, and for recovering back to `ACTIVE`. +This document describes the on-chain rules implemented by the audit module (v1) for switching a supernode between `ACTIVE` and `POSTPONED`, and for recovering out of `POSTPONED`. ## Definitions @@ -33,10 +33,11 @@ This is evaluated by checking for a stored report in each of the last `N` epochs ### 2) Host Report requirements -If a submitted host report violates any enabled minimum free% threshold, the supernode is set to `POSTPONED`. +If a submitted host report violates any enabled CPU or memory minimum free% threshold, the supernode is set to `POSTPONED`. -- Params: `min_cpu_free_percent`, `min_mem_free_percent`, `min_disk_free_percent` (`free% = 100 - usage%`). +- Params: `min_cpu_free_percent`, `min_mem_free_percent` (`free% = 100 - usage%`). - Special case: if `*_usage_percent == 0`, that metric is treated as **unknown** and does not trigger postponement. +- Disk pressure is not a postponement criterion; it is handled by the `STORAGE_FULL` state path. The following host-report fields are currently ignored by postponement logic: - `failed_actions_count` @@ -50,8 +51,12 @@ An epoch counts toward the consecutive requirement only if: - there is at least **1** peer reporter about the target in that epoch, and - the share of peer reporters about the target in that epoch that report `PORT_STATE_CLOSED` for port index `i` meets or exceeds `peer_port_postpone_threshold_percent`. -## Recovery rule (POSTPONED → ACTIVE) +## Recovery rule (POSTPONED → ACTIVE or STORAGE_FULL) -In a single epoch, a `POSTPONED` supernode becomes `ACTIVE` if: +In a single epoch, a `POSTPONED` supernode recovers if: - it submits one compliant host report (Host Report requirements), and - there exists at least **1** peer report about that supernode in the same epoch where **all** required ports are `PORT_STATE_OPEN`. + +The recovery target is determined from the same-epoch self HostReport: +- if `disk_usage_percent` is omitted/zero or is at or below `supernode.max_storage_usage_percent`, the supernode becomes `ACTIVE`; +- if `disk_usage_percent` is above `supernode.max_storage_usage_percent`, the supernode becomes `STORAGE_FULL`. diff --git a/x/audit/v1/README.md b/x/audit/v1/README.md index 286e52dd..8b395239 100644 --- a/x/audit/v1/README.md +++ b/x/audit/v1/README.md @@ -87,16 +87,18 @@ At epoch end, a supernode can be postponed for: - **Action-finalization evidence thresholds** (per-epoch counts meeting consecutive-epoch windows), - **Missing reports** for `consecutive_epochs_to_postpone` consecutive epochs, -- **Self Report minimum failures** (CPU/mem/disk free% thresholds), +- **Self Report minimum failures** (CPU/mem free% thresholds), - **Peer port thresholds**: a required port is treated as CLOSED if peer observations meet `peer_port_postpone_threshold_percent`, and this happens for `consecutive_epochs_to_postpone` consecutive epochs. -### Recovery (`POSTPONED -> ACTIVE`) +### Recovery (`POSTPONED -> ACTIVE` or `STORAGE_FULL`) At epoch end, a supernode can recover: - If postponed due to action-finalization evidence: by the action-finalization recovery window and total-bad-evidence constraint. - Otherwise: if it has a compliant self report and at least one peer observation in the epoch where all required ports are `OPEN`. +If the same-epoch self HostReport still has `disk_usage_percent` above `supernode.max_storage_usage_percent`, recovery routes to `STORAGE_FULL` instead of `ACTIVE`. + Detailed behavior is implemented in the module's epoch-end enforcement logic. ## Evidence @@ -192,7 +194,7 @@ Params are initialized from genesis and may later be updated by governance via ` - Enforcement: - `min_cpu_free_percent`: `0` (disabled) - `min_mem_free_percent`: `0` (disabled) - - `min_disk_free_percent`: `0` (disabled) + - `min_disk_free_percent`: `0` (legacy/no-op for epoch-end postponement; disk pressure is handled as `STORAGE_FULL`) - `consecutive_epochs_to_postpone`: `1` - `peer_port_postpone_threshold_percent`: `100` - `keep_last_epoch_entries`: `200` diff --git a/x/audit/v1/keeper/enforcement.go b/x/audit/v1/keeper/enforcement.go index 08a89bec..5908a651 100644 --- a/x/audit/v1/keeper/enforcement.go +++ b/x/audit/v1/keeper/enforcement.go @@ -85,7 +85,7 @@ func (k Keeper) EnforceEpochEnd(ctx sdk.Context, epochID uint64, params types.Pa continue } - if err := k.recoverSupernodeActive(ctx, sn); err != nil { + if err := k.recoverSupernodeFromPostponed(ctx, sn, epochID); err != nil { return err } k.clearActionFinalizationPostponedAtEpochID(ctx, sn.SupernodeAccount) @@ -413,16 +413,14 @@ func (k Keeper) selfHostViolatesMinimums(ctx sdk.Context, supernodeAccount strin return false, nil } - // If any known metric is below minimum free%, postpone. + // If any known non-storage metric is below minimum free%, postpone. + // Disk pressure is modeled as STORAGE_FULL by audit SetReport, not POSTPONED. if violatesMinFree(r.HostReport.CpuUsagePercent, params.MinCpuFreePercent) { return true, nil } if violatesMinFree(r.HostReport.MemUsagePercent, params.MinMemFreePercent) { return true, nil } - if violatesMinFree(r.HostReport.DiskUsagePercent, params.MinDiskFreePercent) { - return true, nil - } return false, nil } @@ -439,9 +437,6 @@ func (k Keeper) selfHostCompliant(ctx sdk.Context, supernodeAccount string, epoc if !compliesMinFree(r.HostReport.MemUsagePercent, params.MinMemFreePercent) { return false, nil } - if !compliesMinFree(r.HostReport.DiskUsagePercent, params.MinDiskFreePercent) { - return false, nil - } return true, nil } @@ -567,6 +562,35 @@ func (k Keeper) recoverSupernodeActive(ctx sdk.Context, sn sntypes.SuperNode) er return k.supernodeKeeper.RecoverSuperNodeFromPostponed(ctx, valAddr) } +func (k Keeper) markSupernodeStorageFull(ctx sdk.Context, sn sntypes.SuperNode) error { + if sn.ValidatorAddress == "" { + return fmt.Errorf("missing validator address for supernode %q", sn.SupernodeAccount) + } + valAddr, err := sdk.ValAddressFromBech32(sn.ValidatorAddress) + if err != nil { + return err + } + return k.supernodeKeeper.MarkSuperNodeStorageFull(ctx, valAddr) +} + +func (k Keeper) recoverSupernodeFromPostponed(ctx sdk.Context, sn sntypes.SuperNode, epochID uint64) error { + r, found := k.GetReport(ctx, epochID, sn.SupernodeAccount) + if !found || r.HostReport.DiskUsagePercent == 0 { + return k.recoverSupernodeActive(ctx, sn) + } + + if !isValidHostUsagePercent(r.HostReport.DiskUsagePercent) { + return k.markSupernodeStorageFull(ctx, sn) + } + + maxStorage := float64(k.supernodeKeeper.GetParams(ctx).MaxStorageUsagePercent) + if r.HostReport.DiskUsagePercent <= maxStorage { + return k.recoverSupernodeActive(ctx, sn) + } + + return k.markSupernodeStorageFull(ctx, sn) +} + // storageTruthBand represents a node suspicion severity level. type storageTruthBand int diff --git a/x/audit/v1/keeper/enforcement_empty_active_set_test.go b/x/audit/v1/keeper/enforcement_empty_active_set_test.go index 24df6ca0..dbdc0d88 100644 --- a/x/audit/v1/keeper/enforcement_empty_active_set_test.go +++ b/x/audit/v1/keeper/enforcement_empty_active_set_test.go @@ -166,7 +166,7 @@ func TestEnforceEpochEnd_EmptyActiveSet_NoSelfReport_NoRecover(t *testing.T) { // TestEnforceEpochEnd_EmptyActiveSet_NonCompliantSelf_NoRecover verifies the // bootstrap exception does NOT bypass the self-compliance health checks. -// A POSTPONED SN that submits a report violating the disk-usage minimum +// A POSTPONED SN that submits a report violating a non-storage host minimum // stays POSTPONED even when the active set is empty. func TestEnforceEpochEnd_EmptyActiveSet_NonCompliantSelf_NoRecover(t *testing.T) { f := initFixture(t) @@ -181,28 +181,19 @@ func TestEnforceEpochEnd_EmptyActiveSet_NonCompliantSelf_NoRecover(t *testing.T) params := types.DefaultParams() params.RequiredOpenPorts = []uint32{4444} params.ConsecutiveEpochsToPostpone = 1 - // Require at least 20% disk free; sn0 reports 95% usage → 5% free → not compliant. - params.MinDiskFreePercent = 20 + // Require at least 20% CPU free; sn0 reports 95% usage → 5% free → not compliant. + params.MinCpuFreePercent = 20 epochID := uint64(1) writeEmptyActiveSetAnchor(t, f, epochID) - // SetReport with non-zero DiskUsagePercent invokes the STORAGE_FULL - // transition source path, which queries supernodeKeeper. Stub these - // dependencies so the call lands cleanly without triggering a - // transition (we return "not found" → SetReport short-circuits). - f.supernodeKeeper.EXPECT(). - GetSuperNodeByAccount(gomock.AssignableToTypeOf(f.ctx), sn0.SupernodeAccount). - Return(sntypes.SuperNode{}, false, nil). - Times(1) - if err := f.keeper.SetReport(f.ctx, types.EpochReport{ SupernodeAccount: sn0.SupernodeAccount, EpochId: epochID, ReportHeight: f.ctx.BlockHeight(), HostReport: types.HostReport{ - DiskUsagePercent: 95.0, // 5% free, below the 20% minimum + CpuUsagePercent: 95.0, // 5% free, below the 20% minimum }, }); err != nil { t.Fatalf("failed to set report: %v", err) diff --git a/x/audit/v1/keeper/enforcement_storagefull_transition_test.go b/x/audit/v1/keeper/enforcement_storagefull_transition_test.go index b4600575..e2cc1fc8 100644 --- a/x/audit/v1/keeper/enforcement_storagefull_transition_test.go +++ b/x/audit/v1/keeper/enforcement_storagefull_transition_test.go @@ -10,10 +10,6 @@ import ( "go.uber.org/mock/gomock" ) -// TestEnforceEpochEnd_RecoversPostponedNodeToActive verifies that a postponed node with -// a compliant peer port report is recovered to Active via RecoverSuperNodeFromPostponed. -// Per LEP-6 §17: recovery to StorageFull is no longer managed in the audit enforcement path; -// that transition is handled by the supernode module's own state machine. func TestEnforceEpochEnd_RecoversPostponedToStorageFullWhenDiskStillHigh(t *testing.T) { f := initFixture(t) f.ctx = f.ctx.WithBlockHeight(10) @@ -27,7 +23,7 @@ func TestEnforceEpochEnd_RecoversPostponedToStorageFullWhenDiskStillHigh(t *test // Persist a compliant report for epoch 1. f.supernodeKeeper.EXPECT().GetSuperNodeByAccount(gomock.Any(), reporter).Return(sn, true, nil).Times(1) - f.supernodeKeeper.EXPECT().GetParams(gomock.Any()).Return(sntypes.DefaultParams()).Times(1) + f.supernodeKeeper.EXPECT().GetParams(gomock.Any()).Return(sntypes.DefaultParams()).Times(2) err = f.keeper.SetReport(f.ctx, types.EpochReport{SupernodeAccount: reporter, EpochId: 1, ReportHeight: f.ctx.BlockHeight(), HostReport: types.HostReport{DiskUsagePercent: 95}}) require.NoError(t, err) @@ -60,7 +56,62 @@ func TestEnforceEpochEnd_RecoversPostponedToStorageFullWhenDiskStillHigh(t *test GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStatePostponed). Return([]sntypes.SuperNode{sn}, nil). Times(1) - f.supernodeKeeper.EXPECT().RecoverSuperNodeFromPostponed(gomock.AssignableToTypeOf(f.ctx), valAddr).Return(nil).Times(1) + f.supernodeKeeper.EXPECT(). + MarkSuperNodeStorageFull(gomock.AssignableToTypeOf(f.ctx), valAddr). + Return(nil). + Times(1) + + err = f.keeper.EnforceEpochEnd(f.ctx, 1, params) + require.NoError(t, err) + require.False(t, hasEventType(f.ctx.EventManager().Events(), sntypes.EventTypeSupernodeRecovered)) +} + +func TestEnforceEpochEnd_InvalidDiskReportDoesNotRecoverPostponedToActive(t *testing.T) { + f := initFixture(t) + f.ctx = f.ctx.WithBlockHeight(10) + + reporter := sdk.AccAddress([]byte("reporter_address_20g")).String() + reporterVal := sdk.ValAddress([]byte("reporter_val_addr_25")).String() + valAddr, err := sdk.ValAddressFromBech32(reporterVal) + require.NoError(t, err) + + sn := sntypes.SuperNode{ValidatorAddress: reporterVal, SupernodeAccount: reporter, States: []*sntypes.SuperNodeStateRecord{{State: sntypes.SuperNodeStatePostponed, Height: 9, Reason: "audit_missing_reports"}}} + + err = f.keeper.SetReportRaw(f.ctx, types.EpochReport{SupernodeAccount: reporter, EpochId: 1, ReportHeight: f.ctx.BlockHeight(), HostReport: types.HostReport{DiskUsagePercent: -1}}) + require.NoError(t, err) + + peer := sdk.AccAddress([]byte("peer_for_recovery_____")).String() + err = f.keeper.SetReport(f.ctx, types.EpochReport{ + SupernodeAccount: peer, + EpochId: 1, + ReportHeight: f.ctx.BlockHeight(), + HostReport: types.HostReport{}, + StorageChallengeObservations: []*types.StorageChallengeObservation{{ + TargetSupernodeAccount: reporter, + PortStates: []types.PortState{types.PortState_PORT_STATE_OPEN}, + }}, + }) + require.NoError(t, err) + f.keeper.SetStorageChallengeReportIndex(f.ctx, reporter, 1, peer) + + params := types.DefaultParams() + params.RequiredOpenPorts = []uint32{4444} + + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStateActive). + Return([]sntypes.SuperNode{}, nil). + Times(1) + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStatePostponed). + Return([]sntypes.SuperNode{sn}, nil). + Times(1) + f.supernodeKeeper.EXPECT(). + RecoverSuperNodeFromPostponed(gomock.Any(), gomock.Any()). + Times(0) + f.supernodeKeeper.EXPECT(). + MarkSuperNodeStorageFull(gomock.AssignableToTypeOf(f.ctx), valAddr). + Return(nil). + Times(1) err = f.keeper.EnforceEpochEnd(f.ctx, 1, params) require.NoError(t, err) @@ -78,7 +129,7 @@ func TestEnforceEpochEnd_RecoversPostponedToActiveWhenDiskBelowThreshold(t *test sn := sntypes.SuperNode{ValidatorAddress: reporterVal, SupernodeAccount: reporter, States: []*sntypes.SuperNodeStateRecord{{State: sntypes.SuperNodeStatePostponed, Height: 9, Reason: "audit_missing_reports"}}} f.supernodeKeeper.EXPECT().GetSuperNodeByAccount(gomock.Any(), reporter).Return(sn, true, nil).Times(1) - f.supernodeKeeper.EXPECT().GetParams(gomock.Any()).Return(sntypes.DefaultParams()).Times(1) + f.supernodeKeeper.EXPECT().GetParams(gomock.Any()).Return(sntypes.DefaultParams()).Times(2) err = f.keeper.SetReport(f.ctx, types.EpochReport{SupernodeAccount: reporter, EpochId: 1, ReportHeight: f.ctx.BlockHeight(), HostReport: types.HostReport{DiskUsagePercent: 40}}) require.NoError(t, err) @@ -116,6 +167,15 @@ func TestEnforceEpochEnd_RecoversPostponedToActiveWhenDiskBelowThreshold(t *test require.NoError(t, err) } +func hasEventType(events sdk.Events, eventType string) bool { + for _, event := range events { + if event.Type == eventType { + return true + } + } + return false +} + // TestEnforceEpochEnd_DiskPressureDoesNotPostponeStorageFull verifies that StorageFull nodes // are not evaluated or postponed by the audit enforcement path (per LEP-6 §17 which limits // audit enforcement to Active nodes only). @@ -154,3 +214,45 @@ func TestEnforceEpochEnd_DiskPressureDoesNotPostponeStorageFull(t *testing.T) { err = f.keeper.EnforceEpochEnd(f.ctx, 1, params) require.NoError(t, err) } + +func TestEnforceEpochEnd_DiskPressureDoesNotPostponeActive(t *testing.T) { + f := initFixture(t) + f.ctx = f.ctx.WithBlockHeight(10) + + reporter := sdk.AccAddress([]byte("reporter_address_20f")).String() + reporterVal := sdk.ValAddress([]byte("reporter_val_addr_24")).String() + sn := sntypes.SuperNode{ + ValidatorAddress: reporterVal, + SupernodeAccount: reporter, + States: []*sntypes.SuperNodeStateRecord{{State: sntypes.SuperNodeStateActive, Height: 9}}, + } + + f.supernodeKeeper.EXPECT().GetSuperNodeByAccount(gomock.Any(), reporter).Return(sn, true, nil).Times(1) + f.supernodeKeeper.EXPECT().GetParams(gomock.Any()).Return(sntypes.DefaultParams()).Times(1) + err := f.keeper.SetReport(f.ctx, types.EpochReport{ + SupernodeAccount: reporter, + EpochId: 1, + ReportHeight: f.ctx.BlockHeight(), + HostReport: types.HostReport{DiskUsagePercent: 88}, + }) + require.NoError(t, err) + + params := types.DefaultParams() + params.RequiredOpenPorts = []uint32{4444} + params.MinCpuFreePercent = 0 + params.MinMemFreePercent = 0 + params.MinDiskFreePercent = 20 + + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStateActive). + Return([]sntypes.SuperNode{sn}, nil). + Times(1) + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStatePostponed). + Return([]sntypes.SuperNode{}, nil). + Times(1) + f.supernodeKeeper.EXPECT().SetSuperNodePostponed(gomock.Any(), gomock.Any(), gomock.Any()).Times(0) + + err = f.keeper.EnforceEpochEnd(f.ctx, 1, params) + require.NoError(t, err) +} diff --git a/x/audit/v1/keeper/msg_submit_epoch_report.go b/x/audit/v1/keeper/msg_submit_epoch_report.go index 9af5083f..124ff47b 100644 --- a/x/audit/v1/keeper/msg_submit_epoch_report.go +++ b/x/audit/v1/keeper/msg_submit_epoch_report.go @@ -243,6 +243,15 @@ func (k Keeper) applyIncompleteReportPenalty(ctx sdk.Context, epochID uint64, re // This is the single enforcement point for HostReport host-metric invariants // (LEP-6 §12 — see proto/lumera/audit/v1/audit.proto::HostReport). func validateHostMetricFields(h types.HostReport) error { + if err := validateHostUsagePercent("cpu_usage_percent", h.CpuUsagePercent); err != nil { + return err + } + if err := validateHostUsagePercent("mem_usage_percent", h.MemUsagePercent); err != nil { + return err + } + if err := validateHostUsagePercent("disk_usage_percent", h.DiskUsagePercent); err != nil { + return err + } if math.IsNaN(h.CascadeKademliaDbBytes) || math.IsInf(h.CascadeKademliaDbBytes, 0) { return errorsmod.Wrap(types.ErrInvalidHostMetric, "cascade_kademlia_db_bytes must be a finite number") } @@ -252,6 +261,17 @@ func validateHostMetricFields(h types.HostReport) error { return nil } +func validateHostUsagePercent(name string, value float64) error { + if !isValidHostUsagePercent(value) { + return errorsmod.Wrapf(types.ErrInvalidHostMetric, "%s must be a finite percentage in [0,100], got %v", name, value) + } + return nil +} + +func isValidHostUsagePercent(value float64) bool { + return !math.IsNaN(value) && !math.IsInf(value, 0) && value >= 0 && value <= 100 +} + // bridgeCascadeBytesToSupernodeMetrics writes the cascade_kademlia_db_bytes // metric reported on the audit HostReport into x/supernode SupernodeMetricsState. // Read-modify-write semantics: any other Metrics fields previously persisted diff --git a/x/audit/v1/keeper/msg_submit_epoch_report_cascade_bytes_test.go b/x/audit/v1/keeper/msg_submit_epoch_report_cascade_bytes_test.go index d2580396..4c1cd6ed 100644 --- a/x/audit/v1/keeper/msg_submit_epoch_report_cascade_bytes_test.go +++ b/x/audit/v1/keeper/msg_submit_epoch_report_cascade_bytes_test.go @@ -149,6 +149,49 @@ func TestSubmitEpochReport_CascadeBytes_InvalidValuesRejected(t *testing.T) { } } +func TestSubmitEpochReport_HostUsagePercent_InvalidValuesRejected(t *testing.T) { + cases := []struct { + name string + report types.HostReport + }{ + {"cpu_nan", types.HostReport{CpuUsagePercent: math.NaN()}}, + {"cpu_negative", types.HostReport{CpuUsagePercent: -1}}, + {"cpu_over_100", types.HostReport{CpuUsagePercent: 100.1}}, + {"mem_inf", types.HostReport{MemUsagePercent: math.Inf(1)}}, + {"mem_negative", types.HostReport{MemUsagePercent: -1}}, + {"mem_over_100", types.HostReport{MemUsagePercent: 100.1}}, + {"disk_inf", types.HostReport{DiskUsagePercent: math.Inf(-1)}}, + {"disk_negative", types.HostReport{DiskUsagePercent: -1}}, + {"disk_over_100", types.HostReport{DiskUsagePercent: 100.1}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + f := initFixture(t) + f.ctx = f.ctx.WithBlockHeight(1) + ms := keeper.NewMsgServerImpl(f.keeper) + + reporter := sdk.AccAddress([]byte("reporter_address_20b")).String() + valAddrStr := sdk.ValAddress([]byte("validator_addr__20b")).String() + + f.supernodeKeeper.EXPECT(). + GetSuperNodeByAccount(gomock.Any(), reporter). + Return(sntypes.SuperNode{SupernodeAccount: reporter, ValidatorAddress: valAddrStr}, true, nil). + AnyTimes() + f.supernodeKeeper.EXPECT(). + SetMetricsState(gomock.Any(), gomock.Any()). + Times(0) + + _, err := ms.SubmitEpochReport(f.ctx, &types.MsgSubmitEpochReport{ + Creator: reporter, + EpochId: 0, + HostReport: tc.report, + }) + require.Error(t, err) + require.ErrorIs(t, err, types.ErrInvalidHostMetric) + }) + } +} + // TestSubmitEpochReport_Bridge_PreservesPriorNonCascadeMetrics covers I3: the // bridge is read-modify-write; pre-existing metrics fields owned by other // writers must not be clobbered to zero by the cascade-bytes write. diff --git a/x/audit/v1/keeper/msg_submit_epoch_report_storagefull_test.go b/x/audit/v1/keeper/msg_submit_epoch_report_storagefull_test.go index 490eac7c..0acc73f6 100644 --- a/x/audit/v1/keeper/msg_submit_epoch_report_storagefull_test.go +++ b/x/audit/v1/keeper/msg_submit_epoch_report_storagefull_test.go @@ -18,6 +18,8 @@ func TestSubmitEpochReport_TransitionsReporterToStorageFullFromHostReport(t *tes reporter := sdk.AccAddress([]byte("reporter_address_20b")).String() reporterVal := sdk.ValAddress([]byte("reporter_val_addr_20")).String() + valAddr, err := sdk.ValAddressFromBech32(reporterVal) + require.NoError(t, err) reporterSN := sntypes.SuperNode{ ValidatorAddress: reporterVal, @@ -36,12 +38,8 @@ func TestSubmitEpochReport_TransitionsReporterToStorageFullFromHostReport(t *tes Return(sntypes.DefaultParams()). Times(1) f.supernodeKeeper.EXPECT(). - SetSuperNode(gomock.Any(), gomock.Any()). - DoAndReturn(func(_ sdk.Context, sn sntypes.SuperNode) error { - require.NotEmpty(t, sn.States) - require.Equal(t, sntypes.SuperNodeStateStorageFull, sn.States[len(sn.States)-1].State) - return nil - }). + MarkSuperNodeStorageFull(gomock.Any(), valAddr). + Return(nil). Times(1) f.supernodeKeeper.EXPECT(). GetMetricsState(gomock.Any(), gomock.Any()). @@ -52,7 +50,7 @@ func TestSubmitEpochReport_TransitionsReporterToStorageFullFromHostReport(t *tes Return(nil). AnyTimes() - err := f.keeper.SetEpochAnchor(f.ctx, types.EpochAnchor{ + err = f.keeper.SetEpochAnchor(f.ctx, types.EpochAnchor{ EpochId: 0, EpochStartHeight: 1, EpochEndHeight: 400, @@ -104,6 +102,9 @@ func TestSubmitEpochReport_DoesNotTransitionPostponedReporterToStorageFull(t *te f.supernodeKeeper.EXPECT(). SetSuperNode(gomock.Any(), gomock.Any()). Times(0) + f.supernodeKeeper.EXPECT(). + MarkSuperNodeStorageFull(gomock.Any(), gomock.Any()). + Times(0) f.supernodeKeeper.EXPECT(). GetMetricsState(gomock.Any(), gomock.Any()). Return(sntypes.SupernodeMetricsState{}, false). diff --git a/x/audit/v1/keeper/state.go b/x/audit/v1/keeper/state.go index 68a7564a..c86277a6 100644 --- a/x/audit/v1/keeper/state.go +++ b/x/audit/v1/keeper/state.go @@ -40,6 +40,10 @@ func (k Keeper) SetReport(ctx sdk.Context, r types.EpochReport) error { ctx.EventManager().EmitEvent(sdk.NewEvent("audit_set_report_transition", sdk.NewAttribute("disk_usage_percent", "0"), sdk.NewAttribute("transition_skipped", "true"))) return nil } + if !isValidHostUsagePercent(r.HostReport.DiskUsagePercent) { + ctx.EventManager().EmitEvent(sdk.NewEvent("audit_set_report_transition", sdk.NewAttribute("transition_skipped", "true"), sdk.NewAttribute("reason", "invalid_disk_usage_percent"))) + return nil + } reporterSN, found, err := k.supernodeKeeper.GetSuperNodeByAccount(ctx, r.SupernodeAccount) if err != nil { return err @@ -58,9 +62,8 @@ func (k Keeper) SetReport(ctx sdk.Context, r types.EpochReport) error { } if isStorageFull && latest != supernodetypes.SuperNodeStateStorageFull { - reporterSN.States = append(reporterSN.States, &supernodetypes.SuperNodeStateRecord{State: supernodetypes.SuperNodeStateStorageFull, Height: ctx.BlockHeight()}) ctx.EventManager().EmitEvent(sdk.NewEvent("audit_set_report_transition", sdk.NewAttribute("to_state", "storage_full"))) - return k.supernodeKeeper.SetSuperNode(ctx, reporterSN) + return k.markSupernodeStorageFull(ctx, reporterSN) } if !isStorageFull && latest == supernodetypes.SuperNodeStateStorageFull { reporterSN.States = append(reporterSN.States, &supernodetypes.SuperNodeStateRecord{State: supernodetypes.SuperNodeStateActive, Height: ctx.BlockHeight()}) diff --git a/x/audit/v1/simulation/submit_epoch_report_variance.go b/x/audit/v1/simulation/submit_epoch_report_variance.go index 37f2b9bc..f5d9be83 100644 --- a/x/audit/v1/simulation/submit_epoch_report_variance.go +++ b/x/audit/v1/simulation/submit_epoch_report_variance.go @@ -32,7 +32,7 @@ func SimulateMsgSubmitEpochReportVariance(k keeper.Keeper) simtypes.Operation { host := types.HostReport{ CpuUsagePercent: 10 + r.Float64()*20, MemUsagePercent: 10 + r.Float64()*20, - DiskUsagePercent: 70 + r.Float64()*35, // exercises both sides of 90% threshold + DiskUsagePercent: 70 + r.Float64()*30, // exercises both sides of 90% threshold FailedActionsCount: uint32(r.Intn(3)), } msg := &types.MsgSubmitEpochReport{ diff --git a/x/supernode/v1/keeper/metrics_state.go b/x/supernode/v1/keeper/metrics_state.go index f75a814f..eb27e56c 100644 --- a/x/supernode/v1/keeper/metrics_state.go +++ b/x/supernode/v1/keeper/metrics_state.go @@ -56,8 +56,6 @@ func recoverFromPostponed(ctx sdk.Context, keeper types.SupernodeKeeper, sn *typ // markStorageFull transitions a supernode into STORAGE_FULL and emits the associated event. // Reserved for future use by audit enforcement; currently retained to lock in the state // machine contract while the triggering call sites land. -// -//nolint:unused // wired up in a follow-up audit/supernode integration. func markStorageFull(ctx sdk.Context, keeper types.SupernodeKeeper, sn *types.SuperNode) error { if len(sn.States) == 0 { return errorsmod.Wrap(sdkerrors.ErrInvalidRequest, "supernode state missing") diff --git a/x/supernode/v1/keeper/supernode.go b/x/supernode/v1/keeper/supernode.go index 8d38ab9d..989a1c8a 100644 --- a/x/supernode/v1/keeper/supernode.go +++ b/x/supernode/v1/keeper/supernode.go @@ -346,6 +346,16 @@ func (k Keeper) RecoverSuperNodeFromPostponed(ctx sdk.Context, valAddr sdk.ValAd return recoverFromPostponed(ctx, k, &supernode, types.SuperNodeStateActive) } +// MarkSuperNodeStorageFull transitions a validator into STORAGE_FULL and emits +// the canonical storage-full event. +func (k Keeper) MarkSuperNodeStorageFull(ctx sdk.Context, valAddr sdk.ValAddress) error { + supernode, found := k.QuerySuperNode(ctx, valAddr) + if !found { + return errorsmod.Wrapf(sdkerrors.ErrNotFound, "no supernode found for validator") + } + return markStorageFull(ctx, k, &supernode) +} + func (k Keeper) IsSuperNodeActive(ctx sdk.Context, valAddr sdk.ValAddress) bool { valOperAddr := valAddr diff --git a/x/supernode/v1/keeper/supernode_test.go b/x/supernode/v1/keeper/supernode_test.go index 60eeea7e..e576a132 100644 --- a/x/supernode/v1/keeper/supernode_test.go +++ b/x/supernode/v1/keeper/supernode_test.go @@ -118,6 +118,55 @@ func TestKeeper_SetAndQuerySuperNode(t *testing.T) { } } +func TestKeeper_MarkSuperNodeStorageFull(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + stakingKeeper := supernodemocks.NewMockStakingKeeper(ctrl) + slashingKeeper := supernodemocks.NewMockSlashingKeeper(ctrl) + bankKeeper := supernodemocks.NewMockBankKeeper(ctrl) + + k, ctx := setupKeeperForTest(t, stakingKeeper, slashingKeeper, bankKeeper) + ctx = ctx.WithBlockHeight(42) + + valAddr := sdk.ValAddress([]byte("validator_addr__20x")) + sn := types.SuperNode{ + ValidatorAddress: valAddr.String(), + SupernodeAccount: sdk.AccAddress([]byte("supernode_account_20")).String(), + PrevIpAddresses: []*types.IPAddressHistory{{Address: "127.0.0.1", Height: 1}}, + P2PPort: "4444", + States: []*types.SuperNodeStateRecord{ + {State: types.SuperNodeStatePostponed, Height: 10, Reason: "audit_missing_reports"}, + }, + } + require.NoError(t, k.SetSuperNode(ctx, sn)) + + require.NoError(t, k.MarkSuperNodeStorageFull(ctx, valAddr)) + + got, found := k.QuerySuperNode(ctx, valAddr) + require.True(t, found) + require.NotEmpty(t, got.States) + require.Equal(t, types.SuperNodeStateStorageFull, got.States[len(got.States)-1].State) + require.Equal(t, int64(42), got.States[len(got.States)-1].Height) + + var event sdk.Event + for _, e := range ctx.EventManager().Events() { + if e.Type == types.EventTypeSupernodeStorageFull { + event = e + break + } + } + require.Equal(t, types.EventTypeSupernodeStorageFull, event.Type) + foundOldState := false + for _, attr := range event.Attributes { + if string(attr.Key) == types.AttributeKeyOldState && string(attr.Value) == types.SuperNodeStatePostponed.String() { + foundOldState = true + break + } + } + require.True(t, foundOldState) +} + func TestKeeper_GetAllSuperNodes(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() diff --git a/x/supernode/v1/mocks/expected_keepers_mock.go b/x/supernode/v1/mocks/expected_keepers_mock.go index 0eddd811..cc6c3f5e 100644 --- a/x/supernode/v1/mocks/expected_keepers_mock.go +++ b/x/supernode/v1/mocks/expected_keepers_mock.go @@ -360,6 +360,20 @@ func (mr *MockSupernodeKeeperMockRecorder) IsSuperNodeActive(ctx, valAddr any) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsSuperNodeActive", reflect.TypeOf((*MockSupernodeKeeper)(nil).IsSuperNodeActive), ctx, valAddr) } +// MarkSuperNodeStorageFull mocks base method. +func (m *MockSupernodeKeeper) MarkSuperNodeStorageFull(ctx types1.Context, valAddr types1.ValAddress) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "MarkSuperNodeStorageFull", ctx, valAddr) + ret0, _ := ret[0].(error) + return ret0 +} + +// MarkSuperNodeStorageFull indicates an expected call of MarkSuperNodeStorageFull. +func (mr *MockSupernodeKeeperMockRecorder) MarkSuperNodeStorageFull(ctx, valAddr any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MarkSuperNodeStorageFull", reflect.TypeOf((*MockSupernodeKeeper)(nil).MarkSuperNodeStorageFull), ctx, valAddr) +} + // Logger mocks base method. func (m *MockSupernodeKeeper) Logger() log.Logger { m.ctrl.T.Helper() diff --git a/x/supernode/v1/types/expected_keepers.go b/x/supernode/v1/types/expected_keepers.go index b440ec84..6ca8b10f 100644 --- a/x/supernode/v1/types/expected_keepers.go +++ b/x/supernode/v1/types/expected_keepers.go @@ -28,6 +28,7 @@ type SupernodeKeeper interface { SetSuperNodeActive(ctx sdk.Context, valAddr sdk.ValAddress, reason string) error SetSuperNodePostponed(ctx sdk.Context, valAddr sdk.ValAddress, reason string) error RecoverSuperNodeFromPostponed(ctx sdk.Context, valAddr sdk.ValAddress) error + MarkSuperNodeStorageFull(ctx sdk.Context, valAddr sdk.ValAddress) error SetMetricsState(ctx sdk.Context, state SupernodeMetricsState) error GetMetricsState(ctx sdk.Context, valAddr sdk.ValAddress) (SupernodeMetricsState, bool) DeleteMetricsState(ctx sdk.Context, valAddr sdk.ValAddress)