FailproofAI · NiveditJain · Jun 9, 2026 · May 27, 2026 · May 27, 2026 · May 29, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/__tests__/api/audit-state.test.ts b/__tests__/api/audit-state.test.ts
@@ -0,0 +1,55 @@
+// @vitest-environment node
+import { describe, it, expect, beforeEach, vi, afterEach } from "vitest";
+import { tryAcquireRun, releaseRun, getRunState } from "../../app/api/audit/_state";
+
+const LOCK_MAX_AGE_MS = 5 * 60_000;
+
+describe("audit run-lock state", () => {
+  beforeEach(() => {
+    // Belt-and-suspenders: tests share module state, so always reset first.
+    releaseRun();
+    vi.useRealTimers();
+  });
+
+  afterEach(() => {
+    releaseRun();
+    vi.useRealTimers();
+  });
+
+  it("the first tryAcquireRun wins and the second fails", () => {
+    expect(tryAcquireRun()).toBe(true);
+    expect(tryAcquireRun()).toBe(false);
+    expect(getRunState().running).toBe(true);
+  });
+
+  it("releaseRun lets the next caller acquire", () => {
+    expect(tryAcquireRun()).toBe(true);
+    releaseRun();
+    expect(tryAcquireRun()).toBe(true);
+  });
+
+  it("a lock older than LOCK_MAX_AGE_MS auto-expires", () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date("2026-06-06T00:00:00Z"));
+    expect(tryAcquireRun()).toBe(true);
+    // Jump past the expiry window.
+    vi.setSystemTime(new Date(Date.now() + LOCK_MAX_AGE_MS + 1000));
+    expect(getRunState().running).toBe(false);
+    expect(tryAcquireRun()).toBe(true);
+  });
+
+  it("a lock younger than LOCK_MAX_AGE_MS stays held", () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date("2026-06-06T00:00:00Z"));
+    expect(tryAcquireRun()).toBe(true);
+    vi.setSystemTime(new Date(Date.now() + LOCK_MAX_AGE_MS - 1000));
+    expect(getRunState().running).toBe(true);
+    expect(tryAcquireRun()).toBe(false);
+  });
+
+  it("releaseRun on an unheld lock is a no-op", () => {
+    releaseRun();
+    releaseRun();
+    expect(getRunState().running).toBe(false);
+  });
+});
diff --git a/__tests__/audit/archetypes.test.ts b/__tests__/audit/archetypes.test.ts
@@ -0,0 +1,170 @@
+// @vitest-environment node
+import { describe, it, expect } from "vitest";
+import { ARCHETYPES, classifyAgent, pickArchetypeVariant } from "../../src/audit/archetypes";
+import type { AuditCount, AuditResult } from "../../src/audit/types";
+
+function mkRow(name: string, hits: number, opts: Partial<AuditCount> = {}): AuditCount {
+  return {
+    name,
+    source: "builtin",
+    category: "test",
+    severity: "warn",
+    hits,
+    projects: 1,
+    examples: [],
+    displayTitle: name,
+    impact: "",
+    enabledInConfig: false,
+    installHint: "",
+    ...opts,
+  };
+}
+
+/** `events` controls eventsScanned (the fault-rate denominator). Defaults to a
+ *  low value so a handful of hits clears the precision clean-rate threshold. */
+function mkResult(rows: AuditCount[], events = 200): AuditResult {
+  return {
+    version: 2,
+    scannedAt: "2026-06-01T00:00:00.000Z",
+    scope: { cli: ["claude"], projects: "all", since: null },
+    transcripts: { scanned: 1, skipped: 0, errors: 0, durationMs: 0 },
+    results: rows,
+    totals: { hits: rows.reduce((s, r) => s + r.hits, 0), projectsWithHits: 0 },
+    projectsScanned: [],
+    eventsScanned: events,
+    enabledBuiltinNames: [],
+  };
+}
+
+const det = (name: string, hits: number) =>
+  mkRow(name, hits, { source: "audit-detector", severity: "warn" });
+
+describe("classifyAgent — relational personas", () => {
+  it("precision when there is no signal at all", () => {
+    const cls = classifyAgent(mkResult([]));
+    expect(cls.archetype).toBe("precision");
+    expect(cls.totalSignal).toBe(0);
+  });
+
+  it("precision when every row is zero hits", () => {
+    expect(classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 0)])).archetype).toBe("precision");
+  });
+
+  it("precision when the total signal is below the absolute floor (no tendency)", () => {
+    // A single cowboy hit (weight 2.0) is below PRECISION_FLOOR (2.5) → no
+    // concentrated tendency → precision, regardless of volume.
+    expect(classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 1)], 5000)).archetype).toBe("precision");
+  });
+
+  it("precision when a trace tendency is thinly spread over a high-volume session", () => {
+    // 2 cowboy hits (weight 4.0, below the soft cap) across 5000 calls →
+    // fault-rate < 0.003 → still reads clean.
+    expect(classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 2)], 5000)).archetype).toBe("precision");
+  });
+
+  it("does NOT collapse a concentrated tendency into precision (the skew bug)", () => {
+    // 8 rm-rf attempts across 2000 calls: fault-rate is tiny, but the tendency
+    // is real → cowboy, not precision. (The score still rewards the clean
+    // footprint separately.)
+    expect(classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 8)], 2000)).archetype).toBe("cowboy");
+  });
+
+  it("architect when the over-verification detectors dominate", () => {
+    const cls = classifyAgent(mkResult([
+      det("reread-after-edit", 5),
+      det("redundant-cd-cwd", 5),
+    ]));
+    expect(cls.archetype).toBe("architect");
+  });
+
+  it("goldfish when faults are spread proportionally across many clusters", () => {
+    const cls = classifyAgent(mkResult([
+      mkRow("failproofai/block-rm-rf", 5),           // cowboy
+      mkRow("failproofai/block-env-files", 3),       // explorer
+      mkRow("failproofai/warn-large-file-write", 3), // ghost
+      det("prefer-edit-over-sed-awk", 2),            // optimist
+      det("sleep-polling-loop", 1),                  // hammer
+      det("reread-after-edit", 1),                   // architect (caution, <35%)
+    ]));
+    expect(cls.archetype).toBe("goldfish");
+  });
+});
+
+describe("classifyAgent — active-fault personas (each reachable)", () => {
+  const cases: [string, string][] = [
+    ["failproofai/block-rm-rf", "cowboy"],
+    ["failproofai/block-env-files", "explorer"],
+    ["failproofai/warn-large-file-write", "ghost"],
+    ["failproofai/prefer-package-manager", "optimist"],
+    ["failproofai/warn-repeated-tool-calls", "hammer"],
+  ];
+  for (const [policy, expected] of cases) {
+    it(`${expected} when ${policy} dominates`, () => {
+      expect(classifyAgent(mkResult([mkRow(policy, 10)])).archetype).toBe(expected);
+    });
+  }
+});
+
+describe("classifyAgent — lift over baseline", () => {
+  it("a low-baseline persona beats a higher raw-weight cowboy signal", () => {
+    // cowboy raw = 10 (block-rm-rf ×5), hammer raw = 9 (warn-repeated ×6).
+    // Raw argmax would pick cowboy; lift picks hammer (tiny baseline).
+    const cls = classifyAgent(mkResult([
+      mkRow("failproofai/block-rm-rf", 5),
+      mkRow("failproofai/warn-repeated-tool-calls", 6),
+    ]));
+    expect(cls.archetype).toBe("hammer");
+  });
+
+  it("promotes secondary when its lift is ≥40% of the primary's", () => {
+    const cls = classifyAgent(mkResult([
+      mkRow("failproofai/block-rm-rf", 10),    // cowboy
+      mkRow("failproofai/block-env-files", 3), // explorer, ≥40% of cowboy lift
+    ]));
+    expect(cls.archetype).toBe("cowboy");
+    expect(cls.secondary).toBe("explorer");
+  });
+
+  it("falls back to authored secondary when runner-up is too weak", () => {
+    const cls = classifyAgent(mkResult([
+      mkRow("failproofai/block-rm-rf", 10),    // cowboy
+      mkRow("failproofai/block-env-files", 1), // explorer, <40% of cowboy lift
+    ]));
+    expect(cls.archetype).toBe("cowboy");
+    expect(cls.secondary).toBe(ARCHETYPES.cowboy.secondary);
+  });
+
+  it("ignores rows whose policy name doesn't map to a signal", () => {
+    const cls = classifyAgent(mkResult([mkRow("failproofai/some-future-unmapped-policy", 50)]));
+    expect(cls.archetype).toBe("precision");
+  });
+});
+
+describe("classifyAgent — determinism", () => {
+  it("same input → same output, every field", () => {
+    const rows = [mkRow("failproofai/block-rm-rf", 4), mkRow("failproofai/block-env-files", 4)];
+    const a = classifyAgent(mkResult(rows), "proj");
+    const b = classifyAgent(mkResult(rows), "proj");
+    expect(a).toEqual(b);
+  });
+
+  it("variantSeed folds the behaviour fingerprint into the seed", () => {
+    const cls = classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 4)]), "proj");
+    expect(cls.variantSeed.startsWith("proj|")).toBe(true);
+  });
+});
+
+describe("pickArchetypeVariant", () => {
+  it("returns the same variant for the same seed", () => {
+    expect(pickArchetypeVariant("optimist", "my-project")).toEqual(
+      pickArchetypeVariant("optimist", "my-project"),
+    );
+  });
+
+  it("can return different variants for different seeds", () => {
+    const variants = new Set(
+      ["a", "b", "c", "d", "e", "f"].map((s) => pickArchetypeVariant("optimist", s).tagline),
+    );
+    expect(variants.size).toBeGreaterThan(1);
+  });
+});
diff --git a/__tests__/audit/dashboard-cache.test.ts b/__tests__/audit/dashboard-cache.test.ts
@@ -0,0 +1,95 @@
+// @vitest-environment node
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync, statSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import {
+  readDashboardCache,
+  writeDashboardCache,
+  isCacheStale,
+} from "../../src/audit/dashboard-cache";
+import type { AuditResult } from "../../src/audit/types";
+
+const FAKE_RESULT: AuditResult = {
+  version: 2,
+  scannedAt: "2026-05-26T00:00:00.000Z",
+  scope: { cli: ["claude"], projects: "all", since: null },
+  transcripts: { scanned: 5, skipped: 0, errors: 0, durationMs: 100 },
+  results: [],
+  totals: { hits: 0, projectsWithHits: 0 },
+  projectsScanned: ["/home/u/a", "/home/u/b"],
+  eventsScanned: 42,
+  enabledBuiltinNames: ["block-failproofai-commands"],
+};
+
+describe("dashboard cache", () => {
+  let tmpHome: string;
+  let originalHome: string | undefined;
+
+  beforeEach(() => {
+    // Redirect homedir() to a tmp directory by overriding HOME — os.homedir()
+    // reads it on every call on POSIX, so the dashboard-cache module sees
+    // our tmp path without needing module mocks.
+    tmpHome = mkdtempSync(join(tmpdir(), "fpa-audit-cache-test-"));
+    originalHome = process.env.HOME;
+    process.env.HOME = tmpHome;
+  });
+
+  afterEach(() => {
+    if (originalHome === undefined) delete process.env.HOME;
+    else process.env.HOME = originalHome;
+    try { rmSync(tmpHome, { recursive: true, force: true }); } catch { /* ignore */ }
+  });
+
+  it("returns null when no cache file exists", () => {
+    expect(readDashboardCache()).toBeNull();
+  });
+
+  it("round-trips a written entry", () => {
+    writeDashboardCache({ since: "7d" }, FAKE_RESULT);
+    const entry = readDashboardCache();
+    expect(entry).not.toBeNull();
+    expect(entry?.params).toEqual({ since: "7d" });
+    expect(entry?.result.transcripts.scanned).toBe(5);
+    expect(entry?.result.projectsScanned).toEqual(["/home/u/a", "/home/u/b"]);
+    expect(typeof entry?.cachedAt).toBe("string");
+  });
+
+  it("writes mode 0600 on the file", () => {
+    writeDashboardCache({}, FAKE_RESULT);
+    const cachePath = join(tmpHome, ".failproofai", "audit-dashboard.json");
+    expect(existsSync(cachePath)).toBe(true);
+    const mode = statSync(cachePath).mode & 0o777;
+    // Some filesystems (FAT, etc.) can't honor mode bits perfectly — just
+    // assert no world-readable bit is set.
+    expect(mode & 0o004).toBe(0);
+  });
+
+  it("returns null for a corrupt JSON cache file", () => {
+    const dir = join(tmpHome, ".failproofai");
+    mkdirSync(dir, { recursive: true });
+    writeFileSync(join(dir, "audit-dashboard.json"), "{ not json", "utf-8");
+    expect(readDashboardCache()).toBeNull();
+  });
+
+  it("returns null when shape is wrong", () => {
+    const dir = join(tmpHome, ".failproofai");
+    mkdirSync(dir, { recursive: true });
+    writeFileSync(join(dir, "audit-dashboard.json"), JSON.stringify({ foo: 1 }), "utf-8");
+    expect(readDashboardCache()).toBeNull();
+  });
+
+  it("isCacheStale returns true past the threshold", () => {
+    const old = new Date(Date.now() - 60 * 60_000).toISOString(); // 1 hour ago
+    expect(isCacheStale(old, 30)).toBe(true);
+  });
+
+  it("isCacheStale returns false within the threshold", () => {
+    const recent = new Date(Date.now() - 10 * 60_000).toISOString(); // 10 min ago
+    expect(isCacheStale(recent, 30)).toBe(false);
+  });
+
+  it("isCacheStale treats unparseable timestamps as stale", () => {
+    expect(isCacheStale("not-a-date")).toBe(true);
+  });
+});