Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
f41089e
feat(audit): /audit dashboard with archetype classifier, scoring, and…
SiddarthAA May 27, 2026
ba6bcab
docs(audit): CHANGELOG entry + /audit dashboard section in dashboard.mdx
SiddarthAA May 27, 2026
9a0b22b
feat(audit): persona variant catalog + scroll/poster/install-CTA polish
SiddarthAA May 29, 2026
1b38daf
feat(auth): email-OTP login for CLI + dashboard, wired to failproof-a…
SiddarthAA May 31, 2026
934080b
docs(auth): add docs/cli/auth.mdx and env-vars entry for FAILPROOF_AP…
SiddarthAA May 31, 2026
34bf971
feat(ui): unify dashboard around audit pixel-craft system; fix nav st…
SiddarthAA May 31, 2026
500e97a
update global css
SiddarthAA May 31, 2026
4e0f805
feat(ui): bigger type, score+share card, persistent reminder, re-audi…
SiddarthAA May 31, 2026
1e6ccff
docs(auth): document ~/.failproofai/next-audit.json + reminder endpoint
SiddarthAA May 31, 2026
187ee90
ui fixes
SiddarthAA Jun 1, 2026
31e17cf
ui fixes
SiddarthAA Jun 1, 2026
356bd17
ui fixes
SiddarthAA Jun 1, 2026
1884dda
feat(cli): update auth cli, rename commands
SiddarthAA Jun 1, 2026
bd623b2
feat(posthog) : add posthog telemetry, update cli docs, cleanup old t…
SiddarthAA Jun 1, 2026
75eefec
feat(telemetry): instrument auth/reminder routes and wire reminder sc…
SiddarthAA Jun 3, 2026
91df2e9
docs(changelog): note auth/reminder telemetry + scheduler wiring
SiddarthAA Jun 3, 2026
4bcd6eb
- app/globals.css — added .section-h-dot + pulse keyframes (green dot…
SiddarthAA Jun 4, 2026
507f3c9
fix(audit+auth): hardening sweep across dashboard + CLI
SiddarthAA Jun 4, 2026
7c2b961
test(audit+auth): cover archetypes classifier, findings, strengths, a…
SiddarthAA Jun 4, 2026
7fa1de9
feat(telemetry): identity-link on CLI auth + policy add/remove failur…
SiddarthAA Jun 4, 2026
685937b
docs(changelog): note identity-link CLI emit + policy add/remove fail…
SiddarthAA Jun 4, 2026
78c326e
feat(telemetry): close five funnel gaps in audit-page events
SiddarthAA Jun 5, 2026
d93392e
feat(auth): default api-server base URL to https://api.befailproof.ai
SiddarthAA Jun 5, 2026
34b6c99
docs(cli): note new https://api.befailproof.ai default in env-vars index
SiddarthAA Jun 5, 2026
d8703b7
docs(changelog): credit environment-variables.mdx in api-server URL e…
SiddarthAA Jun 5, 2026
45ad9ee
Merge remote-tracking branch 'origin/main' into stable
NiveditJain Jun 7, 2026
162b414
fix(audit+auth): apply CodeRabbit suggestions across auth + audit das…
NiveditJain Jun 7, 2026
11bf8e7
docs(changelog): document CodeRabbit hardening pass
NiveditJain Jun 7, 2026
57d1092
fix(audit+auth): max-effort review + new CodeRabbit pass
NiveditJain Jun 7, 2026
71a7ad3
fix(audit+auth): tier A correctness fixes from the deferred-review plan
NiveditJain Jun 7, 2026
639856e
refactor(audit+auth): tier B — share fetchWithTimeout, atomicWriteJson,
NiveditJain Jun 7, 2026
4c79ee0
perf+polish(audit): tier C — memoize derived view state, debounce ref…
NiveditJain Jun 7, 2026
d677852
feat(audit): migrate display font to Bitcount Prop Single + pin empty…
SiddarthAA Jun 8, 2026
97672a7
feat(navbar): drop icon, fetch logo from remote URL with local fallback
SiddarthAA Jun 8, 2026
356de0e
Merge remote-tracking branch 'origin/main' into stable
NiveditJain Jun 9, 2026
0206e31
feat(audit): lift-based persona classifier + dynamic saturating score
SiddarthAA Jun 9, 2026
290421c
fix(audit): make precision tendency-based, not rate-based (de-skew pe…
SiddarthAA Jun 9, 2026
5068588
docs(changelog): align audit entry with tendency-based precision gate
SiddarthAA Jun 9, 2026
c2644bd
feat(audit): personalised X/LinkedIn share templates + card-attached …
SiddarthAA Jun 9, 2026
1e2ee4e
polish: subtle refinement pass across all 5 pages (#1)
NiveditJain Jun 9, 2026
4c061ad
feat(audit): copy audit card to clipboard on share so it attaches to …
SiddarthAA Jun 9, 2026
354be4e
Merge feat/update-audit into stable: audit engine + personalised shar…
SiddarthAA Jun 9, 2026
05d63c0
feat(telemetry): send raw email + strengthen verified-account→device …
SiddarthAA Jun 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 45 additions & 5 deletions CHANGELOG.md

Large diffs are not rendered by default.

55 changes: 55 additions & 0 deletions __tests__/api/audit-state.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// @vitest-environment node
import { describe, it, expect, beforeEach, vi, afterEach } from "vitest";
import { tryAcquireRun, releaseRun, getRunState } from "../../app/api/audit/_state";

const LOCK_MAX_AGE_MS = 5 * 60_000;

describe("audit run-lock state", () => {
beforeEach(() => {
// Belt-and-suspenders: tests share module state, so always reset first.
releaseRun();
vi.useRealTimers();
});

afterEach(() => {
releaseRun();
vi.useRealTimers();
});

it("the first tryAcquireRun wins and the second fails", () => {
expect(tryAcquireRun()).toBe(true);
expect(tryAcquireRun()).toBe(false);
expect(getRunState().running).toBe(true);
});

it("releaseRun lets the next caller acquire", () => {
expect(tryAcquireRun()).toBe(true);
releaseRun();
expect(tryAcquireRun()).toBe(true);
});

it("a lock older than LOCK_MAX_AGE_MS auto-expires", () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2026-06-06T00:00:00Z"));
expect(tryAcquireRun()).toBe(true);
// Jump past the expiry window.
vi.setSystemTime(new Date(Date.now() + LOCK_MAX_AGE_MS + 1000));
expect(getRunState().running).toBe(false);
expect(tryAcquireRun()).toBe(true);
});

it("a lock younger than LOCK_MAX_AGE_MS stays held", () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2026-06-06T00:00:00Z"));
expect(tryAcquireRun()).toBe(true);
vi.setSystemTime(new Date(Date.now() + LOCK_MAX_AGE_MS - 1000));
expect(getRunState().running).toBe(true);
expect(tryAcquireRun()).toBe(false);
});

it("releaseRun on an unheld lock is a no-op", () => {
releaseRun();
releaseRun();
expect(getRunState().running).toBe(false);
});
});
170 changes: 170 additions & 0 deletions __tests__/audit/archetypes.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// @vitest-environment node
import { describe, it, expect } from "vitest";
import { ARCHETYPES, classifyAgent, pickArchetypeVariant } from "../../src/audit/archetypes";
import type { AuditCount, AuditResult } from "../../src/audit/types";

function mkRow(name: string, hits: number, opts: Partial<AuditCount> = {}): AuditCount {
return {
name,
source: "builtin",
category: "test",
severity: "warn",
hits,
projects: 1,
examples: [],
displayTitle: name,
impact: "",
enabledInConfig: false,
installHint: "",
...opts,
};
}

/** `events` controls eventsScanned (the fault-rate denominator). Defaults to a
* low value so a handful of hits clears the precision clean-rate threshold. */
function mkResult(rows: AuditCount[], events = 200): AuditResult {
return {
version: 2,
scannedAt: "2026-06-01T00:00:00.000Z",
scope: { cli: ["claude"], projects: "all", since: null },
transcripts: { scanned: 1, skipped: 0, errors: 0, durationMs: 0 },
results: rows,
totals: { hits: rows.reduce((s, r) => s + r.hits, 0), projectsWithHits: 0 },
projectsScanned: [],
eventsScanned: events,
enabledBuiltinNames: [],
};
}

const det = (name: string, hits: number) =>
mkRow(name, hits, { source: "audit-detector", severity: "warn" });

describe("classifyAgent — relational personas", () => {
it("precision when there is no signal at all", () => {
const cls = classifyAgent(mkResult([]));
expect(cls.archetype).toBe("precision");
expect(cls.totalSignal).toBe(0);
});

it("precision when every row is zero hits", () => {
expect(classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 0)])).archetype).toBe("precision");
});

it("precision when the total signal is below the absolute floor (no tendency)", () => {
// A single cowboy hit (weight 2.0) is below PRECISION_FLOOR (2.5) → no
// concentrated tendency → precision, regardless of volume.
expect(classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 1)], 5000)).archetype).toBe("precision");
});

it("precision when a trace tendency is thinly spread over a high-volume session", () => {
// 2 cowboy hits (weight 4.0, below the soft cap) across 5000 calls →
// fault-rate < 0.003 → still reads clean.
expect(classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 2)], 5000)).archetype).toBe("precision");
});

it("does NOT collapse a concentrated tendency into precision (the skew bug)", () => {
// 8 rm-rf attempts across 2000 calls: fault-rate is tiny, but the tendency
// is real → cowboy, not precision. (The score still rewards the clean
// footprint separately.)
expect(classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 8)], 2000)).archetype).toBe("cowboy");
});

it("architect when the over-verification detectors dominate", () => {
const cls = classifyAgent(mkResult([
det("reread-after-edit", 5),
det("redundant-cd-cwd", 5),
]));
expect(cls.archetype).toBe("architect");
});

it("goldfish when faults are spread proportionally across many clusters", () => {
const cls = classifyAgent(mkResult([
mkRow("failproofai/block-rm-rf", 5), // cowboy
mkRow("failproofai/block-env-files", 3), // explorer
mkRow("failproofai/warn-large-file-write", 3), // ghost
det("prefer-edit-over-sed-awk", 2), // optimist
det("sleep-polling-loop", 1), // hammer
det("reread-after-edit", 1), // architect (caution, <35%)
]));
expect(cls.archetype).toBe("goldfish");
});
});

describe("classifyAgent — active-fault personas (each reachable)", () => {
const cases: [string, string][] = [
["failproofai/block-rm-rf", "cowboy"],
["failproofai/block-env-files", "explorer"],
["failproofai/warn-large-file-write", "ghost"],
["failproofai/prefer-package-manager", "optimist"],
["failproofai/warn-repeated-tool-calls", "hammer"],
];
for (const [policy, expected] of cases) {
it(`${expected} when ${policy} dominates`, () => {
expect(classifyAgent(mkResult([mkRow(policy, 10)])).archetype).toBe(expected);
});
}
});

describe("classifyAgent — lift over baseline", () => {
it("a low-baseline persona beats a higher raw-weight cowboy signal", () => {
// cowboy raw = 10 (block-rm-rf ×5), hammer raw = 9 (warn-repeated ×6).
// Raw argmax would pick cowboy; lift picks hammer (tiny baseline).
const cls = classifyAgent(mkResult([
mkRow("failproofai/block-rm-rf", 5),
mkRow("failproofai/warn-repeated-tool-calls", 6),
]));
expect(cls.archetype).toBe("hammer");
});

it("promotes secondary when its lift is ≥40% of the primary's", () => {
const cls = classifyAgent(mkResult([
mkRow("failproofai/block-rm-rf", 10), // cowboy
mkRow("failproofai/block-env-files", 3), // explorer, ≥40% of cowboy lift
]));
expect(cls.archetype).toBe("cowboy");
expect(cls.secondary).toBe("explorer");
});

it("falls back to authored secondary when runner-up is too weak", () => {
const cls = classifyAgent(mkResult([
mkRow("failproofai/block-rm-rf", 10), // cowboy
mkRow("failproofai/block-env-files", 1), // explorer, <40% of cowboy lift
]));
expect(cls.archetype).toBe("cowboy");
expect(cls.secondary).toBe(ARCHETYPES.cowboy.secondary);
});

it("ignores rows whose policy name doesn't map to a signal", () => {
const cls = classifyAgent(mkResult([mkRow("failproofai/some-future-unmapped-policy", 50)]));
expect(cls.archetype).toBe("precision");
});
});

describe("classifyAgent — determinism", () => {
it("same input → same output, every field", () => {
const rows = [mkRow("failproofai/block-rm-rf", 4), mkRow("failproofai/block-env-files", 4)];
const a = classifyAgent(mkResult(rows), "proj");
const b = classifyAgent(mkResult(rows), "proj");
expect(a).toEqual(b);
});

it("variantSeed folds the behaviour fingerprint into the seed", () => {
const cls = classifyAgent(mkResult([mkRow("failproofai/block-rm-rf", 4)]), "proj");
expect(cls.variantSeed.startsWith("proj|")).toBe(true);
});
});

describe("pickArchetypeVariant", () => {
it("returns the same variant for the same seed", () => {
expect(pickArchetypeVariant("optimist", "my-project")).toEqual(
pickArchetypeVariant("optimist", "my-project"),
);
});

it("can return different variants for different seeds", () => {
const variants = new Set(
["a", "b", "c", "d", "e", "f"].map((s) => pickArchetypeVariant("optimist", s).tagline),
);
expect(variants.size).toBeGreaterThan(1);
});
});
95 changes: 95 additions & 0 deletions __tests__/audit/dashboard-cache.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// @vitest-environment node
import { describe, it, expect, beforeEach, afterEach } from "vitest";
import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync, statSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import {
readDashboardCache,
writeDashboardCache,
isCacheStale,
} from "../../src/audit/dashboard-cache";
import type { AuditResult } from "../../src/audit/types";

const FAKE_RESULT: AuditResult = {
version: 2,
scannedAt: "2026-05-26T00:00:00.000Z",
scope: { cli: ["claude"], projects: "all", since: null },
transcripts: { scanned: 5, skipped: 0, errors: 0, durationMs: 100 },
results: [],
totals: { hits: 0, projectsWithHits: 0 },
projectsScanned: ["/home/u/a", "/home/u/b"],
eventsScanned: 42,
enabledBuiltinNames: ["block-failproofai-commands"],
};

describe("dashboard cache", () => {
let tmpHome: string;
let originalHome: string | undefined;

beforeEach(() => {
// Redirect homedir() to a tmp directory by overriding HOME — os.homedir()
// reads it on every call on POSIX, so the dashboard-cache module sees
// our tmp path without needing module mocks.
tmpHome = mkdtempSync(join(tmpdir(), "fpa-audit-cache-test-"));
originalHome = process.env.HOME;
process.env.HOME = tmpHome;
});

afterEach(() => {
if (originalHome === undefined) delete process.env.HOME;
else process.env.HOME = originalHome;
try { rmSync(tmpHome, { recursive: true, force: true }); } catch { /* ignore */ }
});

it("returns null when no cache file exists", () => {
expect(readDashboardCache()).toBeNull();
});

it("round-trips a written entry", () => {
writeDashboardCache({ since: "7d" }, FAKE_RESULT);
const entry = readDashboardCache();
expect(entry).not.toBeNull();
expect(entry?.params).toEqual({ since: "7d" });
expect(entry?.result.transcripts.scanned).toBe(5);
expect(entry?.result.projectsScanned).toEqual(["/home/u/a", "/home/u/b"]);
expect(typeof entry?.cachedAt).toBe("string");
});

it("writes mode 0600 on the file", () => {
writeDashboardCache({}, FAKE_RESULT);
const cachePath = join(tmpHome, ".failproofai", "audit-dashboard.json");
expect(existsSync(cachePath)).toBe(true);
const mode = statSync(cachePath).mode & 0o777;
// Some filesystems (FAT, etc.) can't honor mode bits perfectly — just
// assert no world-readable bit is set.
expect(mode & 0o004).toBe(0);
});

it("returns null for a corrupt JSON cache file", () => {
const dir = join(tmpHome, ".failproofai");
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, "audit-dashboard.json"), "{ not json", "utf-8");
expect(readDashboardCache()).toBeNull();
});

it("returns null when shape is wrong", () => {
const dir = join(tmpHome, ".failproofai");
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, "audit-dashboard.json"), JSON.stringify({ foo: 1 }), "utf-8");
expect(readDashboardCache()).toBeNull();
});

it("isCacheStale returns true past the threshold", () => {
const old = new Date(Date.now() - 60 * 60_000).toISOString(); // 1 hour ago
expect(isCacheStale(old, 30)).toBe(true);
});

it("isCacheStale returns false within the threshold", () => {
const recent = new Date(Date.now() - 10 * 60_000).toISOString(); // 10 min ago
expect(isCacheStale(recent, 30)).toBe(false);
});

it("isCacheStale treats unparseable timestamps as stale", () => {
expect(isCacheStale("not-a-date")).toBe(true);
});
});
Loading