From f1c9a9cdbc82e5749b5dfd69fd45c5fa9b9187c2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 24 May 2026 16:19:24 +0000
Subject: [PATCH 1/3] pi-extension: port auto-push and multi-line truncation
 fix from CLK harness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two of the recent CLK harness PRs have a direct parallel in pi-extension:

* push-on-commit + ahead counter (756723c). pi-extension already commits
  every clk_checkpoint / clk_merge call, but never pushes — a
  remote-backed Pi workspace silently accumulated local commits.
    - src/git.ts: hasRemote, commitsAhead, pushBestEffort (best-effort,
      never throws; mirrors clk_harness/git_ops.py).
    - src/tools.ts: pushIfEnabled helper called after clk_checkpoint and
      clk_merge. Gated on CLK_GITHUB_PUSH_ON_COMMIT=true to match the
      Python TUI; surfaces an ↑N ahead count on push failure or when
      auto-push is disabled but commits exist.
    - src/index.ts: /clk-doctor now reports the ahead count and warns
      when local commits haven't reached origin.

* multi-line objective truncation (24f379b). idea.slice(0, 60) was being
  done before splitting on newlines, so a multi-line idea could leak a
  fragment of line 2 into the status bar.
    - src/index.ts: new firstLineShort helper, used at every
      ctx.ui.setStatus("clk-idea", …) site and in /clk-doctor.

Tests: tests/git.test.ts covers no-remote/sync/unreachable cases for
pushBestEffort and commitsAhead. tests/index.test.ts asserts
firstLineShort returns single-line, capped output for multi-line input.
---
 pi-extension/src/git.ts          | 64 +++++++++++++++++++++
 pi-extension/src/index.ts        | 30 ++++++++--
 pi-extension/src/tools.ts        | 40 +++++++++++++
 pi-extension/tests/git.test.ts   | 96 ++++++++++++++++++++++++++++++++
 pi-extension/tests/index.test.ts | 16 +++++-
 5 files changed, 241 insertions(+), 5 deletions(-)
diff --git a/pi-extension/src/git.ts b/pi-extension/src/git.ts
index a599dc8..d04d3d6 100644
--- a/pi-extension/src/git.ts
+++ b/pi-extension/src/git.ts
@@ -218,3 +218,67 @@ export async function saveAndSwitch(
   }
   await git(cwd, ["checkout", targetBranch], signal);
 }
+
+/** True when the repo has a remote with the given name. */
+export async function hasRemote(
+  cwd: string,
+  name = "origin",
+  signal?: AbortSignal,
+): Promise<boolean> {
+  try {
+    await git(cwd, ["remote", "get-url", name], signal);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Count of local commits not yet on the upstream tracked branch. Returns
+ * 0 on any failure (no remote, no upstream, detached HEAD, network down)
+ * so callers can use it directly as a UI counter.
+ */
+export async function commitsAhead(
+  cwd: string,
+  signal?: AbortSignal,
+): Promise<number> {
+  try {
+    await git(cwd, ["rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{u}"], signal);
+  } catch {
+    return 0;
+  }
+  try {
+    const out = await git(cwd, ["rev-list", "--count", "@{u}..HEAD"], signal);
+    return Number.parseInt(out, 10) || 0;
+  } catch {
+    return 0;
+  }
+}
+
+/**
+ * Best-effort `git push` — never throws. Returns `{ pushed: true }` on
+ * success, otherwise `{ pushed: false, reason }` with stderr-derived
+ * detail so the caller can surface a hint without writing its own
+ * error-handling.
+ */
+export async function pushBestEffort(
+  cwd: string,
+  remote = "origin",
+  branch?: string,
+  signal?: AbortSignal,
+): Promise<{ pushed: boolean; reason?: string }> {
+  if (!(await hasRemote(cwd, remote, signal))) {
+    return { pushed: false, reason: "no remote configured" };
+  }
+  const args = ["push", remote, branch ?? "HEAD"];
+  try {
+    await git(cwd, args, signal);
+    return { pushed: true };
+  } catch (err) {
+    const raw = (err as { stderr?: string }).stderr;
+    const reason = (typeof raw === "string" && raw.trim())
+      ? raw.trim().split("\n").slice(-1)[0]?.slice(0, 200)
+      : (err as Error).message?.slice(0, 200);
+    return { pushed: false, reason: reason || "unknown error" };
+  }
+}
diff --git a/pi-extension/src/index.ts b/pi-extension/src/index.ts
index 5e3afd9..57abba0 100644
--- a/pi-extension/src/index.ts
+++ b/pi-extension/src/index.ts
@@ -14,7 +14,7 @@ import {
   appendProgress,
   isDone,
 } from "./state.js";
-import { ensureRepo } from "./git.js";
+import { ensureRepo, commitsAhead, hasRemote } from "./git.js";
 import { clkChiefPrimer } from "./prompts.js";
 import { registerClkTools } from "./tools.js";
 import { registerSubagentTool, tmuxAvailable } from "./subagent.js";
@@ -23,6 +23,17 @@ import { classifyError, recoveryHint, withRetry } from "./errors.js";
 
 const execFileAsync = promisify(execFile);
 
+/**
+ * Return the first non-empty line of `s`, trimmed and truncated to `max`
+ * characters. Used for status-bar labels where a multi-line idea (or
+ * objective) would otherwise leak a fragment of line 2 into the status
+ * display — the same bug the Python TUI fixed in commit 24f379b.
+ */
+export function firstLineShort(s: string, max = 60): string {
+  const line = s.split("\n").find((l) => l.trim());
+  return (line ?? s).trim().slice(0, max);
+}
+
 export default async function (pi: ExtensionAPI): Promise<void> {
   installAbortBridges(pi);
   registerClkTools(pi);
@@ -49,7 +60,7 @@ export default async function (pi: ExtensionAPI): Promise<void> {
         "info",
       );
     } else {
-      ctx.ui.setStatus("clk-idea", `idea: ${s.idea.slice(0, 60)}`);
+      ctx.ui.setStatus("clk-idea", `idea: ${firstLineShort(s.idea)}`);
     }
     if (s.roster) {
       ctx.ui.setStatus(
@@ -200,7 +211,18 @@ export default async function (pi: ExtensionAPI): Promise<void> {
       );
 
       const idea = getState().idea;
-      findings.push(idea ? `  ✓ ok    idea: ${idea.slice(0, 60)}` : "  - info  no idea captured yet");
+      findings.push(idea ? `  ✓ ok    idea: ${firstLineShort(idea)}` : "  - info  no idea captured yet");
+
+      // Unpushed-commits check — mirrors the Python TUI's ahead counter
+      // so the user knows when local checkpoints haven't reached origin.
+      if (repoOk && await hasRemote(ctx.cwd)) {
+        const ahead = await commitsAhead(ctx.cwd);
+        if (ahead > 0) {
+          findings.push(`  ! warn  ${ahead} commit(s) ahead of origin (auto-push only fires when CLK_GITHUB_PUSH_ON_COMMIT=true)`);
+        } else {
+          findings.push("  ✓ ok    in sync with origin");
+        }
+      }
 
       ctx.ui.notify(["CLK doctor:", ...findings].join("\n"), "info");
     },
@@ -236,7 +258,7 @@ export default async function (pi: ExtensionAPI): Promise<void> {
           { kind: "note", message: `idea captured: ${idea}` },
           pi,
         );
-        ctx.ui.setStatus("clk-idea", `idea: ${idea.slice(0, 60)}`);
+        ctx.ui.setStatus("clk-idea", `idea: ${firstLineShort(idea)}`);
         ctx.ui.setStatus("clk-run", "active");
         ctx.ui.notify(
           "CLK run started. The chief is taking over. Esc cancels the current turn; /clk-abort ends the run.",
diff --git a/pi-extension/src/tools.ts b/pi-extension/src/tools.ts
index d2347c2..0bc29f6 100644
--- a/pi-extension/src/tools.ts
+++ b/pi-extension/src/tools.ts
@@ -11,10 +11,48 @@ import {
   checkoutBranch,
   mergeBranch,
   saveAndSwitch,
+  commitsAhead,
+  hasRemote,
+  pushBestEffort,
 } from "./git.js";
 import { activeSignal, mergeSignals, endRun } from "./abort.js";
 import { classifyError, looksRedacted, recoveryHint, withRetry } from "./errors.js";
 
+/**
+ * Push the latest commit to `origin` when the user opted in via
+ * `CLK_GITHUB_PUSH_ON_COMMIT=true` (same env var as the Python TUI). On
+ * success, updates the clk-git status to "synced". On failure (or when
+ * push isn't enabled but a remote exists), surfaces an `↑N` ahead count
+ * so the user knows how many local checkpoints haven't reached origin.
+ * Best-effort throughout — never throws.
+ */
+async function pushIfEnabled(
+  cwd: string,
+  setStatus: (key: string, value: string) => void,
+  signal?: AbortSignal,
+): Promise<void> {
+  try {
+    if (!(await hasRemote(cwd, "origin", signal))) return;
+    const pushOn = (process.env.CLK_GITHUB_PUSH_ON_COMMIT ?? "false").toLowerCase() === "true";
+    if (pushOn) {
+      const res = await pushBestEffort(cwd, "origin", undefined, signal);
+      if (res.pushed) {
+        setStatus("clk-git", "synced");
+        return;
+      }
+      const ahead = await commitsAhead(cwd, signal);
+      setStatus("clk-git", `↑${ahead} (push failed: ${res.reason ?? "unknown"})`);
+      return;
+    }
+    const ahead = await commitsAhead(cwd, signal);
+    if (ahead > 0) {
+      setStatus("clk-git", `↑${ahead} unpushed (set CLK_GITHUB_PUSH_ON_COMMIT=true to auto-push)`);
+    }
+  } catch {
+    /* best-effort — never block the tool result on push bookkeeping. */
+  }
+}
+
 export function registerClkTools(pi: ExtensionAPI): void {
   pi.registerTool({
     name: "clk_cast",
@@ -161,6 +199,7 @@ export function registerClkTools(pi: ExtensionAPI): void {
           pi,
         );
         ctx.ui.setStatus("clk-head", `HEAD: ${sha.slice(0, 8)}`);
+        await pushIfEnabled(ctx.cwd, ctx.ui.setStatus.bind(ctx.ui), sig);
       }
       return {
         content: [
@@ -354,6 +393,7 @@ export function registerClkTools(pi: ExtensionAPI): void {
       );
       ctx.ui.setStatus("clk-branch", `merged → ${home}`);
       if (mergeHead) ctx.ui.setStatus("clk-head", `HEAD: ${mergeHead.slice(0, 8)}`);
+      await pushIfEnabled(ctx.cwd, ctx.ui.setStatus.bind(ctx.ui), sig);
       return {
         content: [{ type: "text", text: `merged ${featureBranch} into ${home}` }],
         details: { featureBranch, home, mergeHead },
diff --git a/pi-extension/tests/git.test.ts b/pi-extension/tests/git.test.ts
index 6b05c99..5f39353 100644
--- a/pi-extension/tests/git.test.ts
+++ b/pi-extension/tests/git.test.ts
@@ -21,6 +21,9 @@ import {
   checkoutBranch,
   mergeBranch,
   saveAndSwitch,
+  hasRemote,
+  commitsAhead,
+  pushBestEffort,
 } from "../src/git.ts";
 
 const execFileAsync = promisify(execFile);
@@ -166,3 +169,96 @@ describe("branching", () => {
     assert.equal(await head(dir), baseSha);
   });
 });
+
+describe("remote / push / ahead", () => {
+  test("hasRemote is false on a fresh repo with no remote", async () => {
+    const dir = await mkdtemp(join(tmpdir(), "clk-remote-"));
+    try {
+      await ensureRepo(dir);
+      assert.equal(await hasRemote(dir), false);
+      // commitsAhead returns 0 when there's no upstream, never throws.
+      assert.equal(await commitsAhead(dir), 0);
+    } finally {
+      await rm(dir, { recursive: true, force: true });
+    }
+  });
+
+  test("hasRemote is true after `git remote add`", async () => {
+    const dir = await mkdtemp(join(tmpdir(), "clk-remote2-"));
+    try {
+      await ensureRepo(dir);
+      await execFileAsync(
+        "git", ["remote", "add", "origin", "/tmp/nonexistent-bare.git"], { cwd: dir },
+      );
+      assert.equal(await hasRemote(dir), true);
+    } finally {
+      await rm(dir, { recursive: true, force: true });
+    }
+  });
+
+  test("commitsAhead counts local commits not on upstream; pushBestEffort syncs", async () => {
+    const bare = await mkdtemp(join(tmpdir(), "clk-bare-"));
+    const work = await mkdtemp(join(tmpdir(), "clk-work-"));
+    try {
+      await execFileAsync("git", ["init", "--bare", "-q"], { cwd: bare });
+      await ensureRepo(work);
+      await gitConfig(work, "user.name", "test");
+      await gitConfig(work, "user.email", "test@clk.invalid");
+      await disableSigning(work);
+      await writeFile(join(work, "seed.txt"), "seed");
+      await checkpoint(work, "[clk] seed");
+      // Wire the bare as origin and set upstream via the first push.
+      await execFileAsync("git", ["remote", "add", "origin", bare], { cwd: work });
+      const branch = await currentBranch(work);
+      await execFileAsync("git", ["push", "-u", "origin", branch], { cwd: work });
+      assert.equal(await commitsAhead(work), 0);
+
+      // Make a new local commit; ahead becomes 1.
+      await writeFile(join(work, "next.txt"), "more");
+      await checkpoint(work, "[clk] next");
+      assert.equal(await commitsAhead(work), 1);
+
+      // pushBestEffort should sync; ahead returns to 0.
+      const res = await pushBestEffort(work, "origin");
+      assert.equal(res.pushed, true, `expected push to succeed, got ${JSON.stringify(res)}`);
+      assert.equal(await commitsAhead(work), 0);
+    } finally {
+      await rm(bare, { recursive: true, force: true });
+      await rm(work, { recursive: true, force: true });
+    }
+  });
+
+  test("pushBestEffort returns {pushed:false,reason} when the remote is unreachable", async () => {
+    const dir = await mkdtemp(join(tmpdir(), "clk-unreach-"));
+    try {
+      await ensureRepo(dir);
+      await gitConfig(dir, "user.name", "test");
+      await gitConfig(dir, "user.email", "test@clk.invalid");
+      await disableSigning(dir);
+      await writeFile(join(dir, "x.txt"), "x");
+      await checkpoint(dir, "[clk] x");
+      // Bogus path — push must fail, but pushBestEffort must NOT throw.
+      await execFileAsync(
+        "git", ["remote", "add", "origin", "/tmp/definitely-does-not-exist-bare.git"],
+        { cwd: dir },
+      );
+      const res = await pushBestEffort(dir, "origin");
+      assert.equal(res.pushed, false);
+      assert.ok(res.reason && res.reason.length > 0, `expected a reason, got ${JSON.stringify(res)}`);
+    } finally {
+      await rm(dir, { recursive: true, force: true });
+    }
+  });
+
+  test("pushBestEffort returns {pushed:false} cleanly when there is no remote", async () => {
+    const dir = await mkdtemp(join(tmpdir(), "clk-noremote-"));
+    try {
+      await ensureRepo(dir);
+      const res = await pushBestEffort(dir, "origin");
+      assert.equal(res.pushed, false);
+      assert.match(res.reason ?? "", /no remote/i);
+    } finally {
+      await rm(dir, { recursive: true, force: true });
+    }
+  });
+});
diff --git a/pi-extension/tests/index.test.ts b/pi-extension/tests/index.test.ts
index 605a63a..c17a6cf 100644
--- a/pi-extension/tests/index.test.ts
+++ b/pi-extension/tests/index.test.ts
@@ -14,7 +14,7 @@ import { mkdtemp, rm } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 
-import clkExtension from "../src/index.ts";
+import clkExtension, { firstLineShort } from "../src/index.ts";
 
 // ---------------------------------------------------------------------------
 // Fake pi.ExtensionAPI -- just enough surface for the extension to register
@@ -106,6 +106,20 @@ describe("clkExtension default export", () => {
     }
   });
 
+  test("firstLineShort returns only the first non-empty line, trimmed and capped", () => {
+    // Single line — returned verbatim up to the cap.
+    assert.equal(firstLineShort("hello world", 60), "hello world");
+    // Multi-line — the second line must never leak into the status string.
+    assert.equal(firstLineShort("refactor X\n\nbecause Y", 60), "refactor X");
+    // Leading blank lines are skipped so the first *content* line wins.
+    assert.equal(firstLineShort("\n\nactual idea\nmore", 60), "actual idea");
+    // Long single line is truncated to max chars; no newline appears.
+    const long = "a".repeat(120);
+    const out = firstLineShort(long, 60);
+    assert.equal(out.length, 60);
+    assert.equal(out.includes("\n"), false);
+  });
+
   test("/clk command rejects empty idea with a warning", async () => {
     const { pi, commands } = makeFakePi();
     await clkExtension(pi as any);

From 05af6cd1017d780b1b063c07100bd49e25ecc9b2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 24 May 2026 16:28:23 +0000
Subject: [PATCH 2/3] pi-extension: port auto-consensus, quality re-dispatch,
 autoresearch, Ralph
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ports the Python harness's orchestration loops into the TypeScript
extension so the chief can drive real code-enforced fan-out instead of
having to fan-out by emitting parallel clk_subagent calls and hoping it
followed the prompt.

src/quality.ts (new)
  Port of clk_harness/orchestration/response_quality.py. Pure regex /
  string scorer — no I/O, no provider calls. Detects empty bodies,
  refusal phrases, malformed ACTION / POST blocks, missing declared
  POST PRODUCES keys, low CONFIDENCE: <n> values, and NEEDS_REVIEW:
  true. Exposes scoreResponse, repairHint, isRecoverable, summarise.

src/consensus.ts (new)
  Two primitives, both with an injectable spawn function so tests can
  drive them without tmux / pi installed:
    * dispatchWithQuality — wraps a single spawnSubagent in the
      quality re-dispatch loop. Re-runs with a repair-preamble
      preface on every recoverable failure up to maxRetries.
    * runConsensus — fan-out N parallel tmux samples for the same
      task, score each, return all + the winner. Pool runner caps
      concurrent in-flight sessions via maxParallel.

src/subagent.ts
  Exposes spawnSubagent + SpawnOptions so consensus.ts can call them.
  Behaviour unchanged.

src/tools.ts (+428 LOC)
  Four new tools registered alongside the existing roster:
    * clk_subagent_quality — one subagent + quality re-rolls.
    * clk_consensus       — N samples, scored, winner returned.
    * clk_autoresearch    — researcher + critic alternation
                            (iterations are recorded on progress.md).
    * clk_ralph           — branch + consensus fan-out in one call;
                            the chief then calls clk_merge or
                            clk_revert based on validation.
  Each tool surfaces a structured details payload so the chief sees
  scores, attempts, and flags rather than just the winning text.

src/prompts.ts
  Updated chief primer to direct the chief through the new tools
  (Dispatch tool quick reference, restated rules 3, 4, 5A). The old
  "emit 3-5 clk_subagent calls in the same message" guidance is
  replaced by "call clk_consensus" so fan-out is enforced in code,
  not by chief compliance.

src/index.ts
  /clk-help lists every orchestration tool and notes the
  CLK_GITHUB_PUSH_ON_COMMIT auto-push behaviour landed in the prior
  commit.

Tests: 24 new tests across quality.test.ts (happy paths, every failure
mode, repairHint / isRecoverable / summarise) and consensus.test.ts
(injected spawn covers ok / retry / max-retries / non-recoverable
refusal / fan-out winner picking / sample clamping / error capture /
maxParallel concurrency). index.test.ts and prompts.test.ts updated to
assert the new tools are registered and named in the chief primer.
All 94 tests pass, typecheck clean.
---
 pi-extension/package.json            |   2 +-
 pi-extension/src/consensus.ts        | 218 ++++++++++++++
 pi-extension/src/index.ts            |  13 +-
 pi-extension/src/prompts.ts          | 136 ++++++---
 pi-extension/src/quality.ts          | 250 ++++++++++++++++
 pi-extension/src/subagent.ts         |   4 +-
 pi-extension/src/tools.ts            | 428 +++++++++++++++++++++++++++
 pi-extension/tests/consensus.test.ts | 213 +++++++++++++
 pi-extension/tests/index.test.ts     |  24 +-
 pi-extension/tests/prompts.test.ts   |  11 +-
 pi-extension/tests/quality.test.ts   | 126 ++++++++
 11 files changed, 1379 insertions(+), 46 deletions(-)
 create mode 100644 pi-extension/src/consensus.ts
 create mode 100644 pi-extension/src/quality.ts
 create mode 100644 pi-extension/tests/consensus.test.ts
 create mode 100644 pi-extension/tests/quality.test.ts

diff --git a/pi-extension/package.json b/pi-extension/package.json
index b897bea..17c5b92 100644
--- a/pi-extension/package.json
+++ b/pi-extension/package.json
@@ -10,7 +10,7 @@
   },
   "scripts": {
     "typecheck": "tsc --noEmit",
-    "test:unit": "tsx --test tests/errors.test.ts tests/prompts.test.ts tests/state.test.ts tests/git.test.ts tests/index.test.ts tests/runtime_smoke.test.ts tests/safety_nets.test.ts",
+    "test:unit": "tsx --test tests/errors.test.ts tests/prompts.test.ts tests/state.test.ts tests/git.test.ts tests/index.test.ts tests/runtime_smoke.test.ts tests/safety_nets.test.ts tests/quality.test.ts tests/consensus.test.ts",
     "test": "npm run test:unit",
     "test:strict": "npm run typecheck && npm run test:unit"
   },
diff --git a/pi-extension/src/consensus.ts b/pi-extension/src/consensus.ts
new file mode 100644
index 0000000..1827a33
--- /dev/null
+++ b/pi-extension/src/consensus.ts
@@ -0,0 +1,218 @@
+/**
+ * Stochastic consensus + quality re-dispatch for clk_subagent.
+ *
+ * Two related primitives in one module:
+ *
+ *   * dispatchWithQuality — wraps a single spawnSubagent call with the
+ *     quality re-dispatch loop (port of agent.py
+ *     _dispatch_with_quality_loop). Scores the output via quality.ts;
+ *     when the verdict is recoverable, re-runs with a repair preamble
+ *     up to `maxRetries` extra attempts.
+ *
+ *   * runConsensus — fan-out N parallel tmux subagent samples for the
+ *     same task, score each via quality.ts, return all samples plus the
+ *     best (highest score, ok=true preferred). Port of
+ *     agent.py _dispatch_auto_consensus, minus the chief-coalescing
+ *     pass (the caller can choose to feed all samples back to the chief
+ *     if it wants a synthesised answer; the typical case is "pick the
+ *     winner and continue").
+ *
+ * Both helpers are exposed as new clk tools in tools.ts so the chief
+ * can dispatch through them instead of raw clk_subagent. The chief
+ * prompt nudges it that way for any non-trivial work.
+ */
+
+import { spawnSubagent as defaultSpawnSubagent, type SpawnOptions } from "./subagent.js";
+import {
+  scoreResponse,
+  repairHint,
+  isRecoverable,
+  summarise,
+  type ResponseQuality,
+  type ScoreOpts,
+} from "./quality.js";
+
+/**
+ * The signature of the function that actually spawns a subagent.
+ * Defaults to the real tmux-based implementation in subagent.ts;
+ * the tests inject a synchronous in-memory stub so they can run
+ * without tmux / pi available.
+ */
+export type SpawnFn = (opts: SpawnOptions) => Promise<{ output: string; sessionId: string }>;
+
+export interface QualityDispatchOptions extends SpawnOptions {
+  /**
+   * Extra spawn attempts after the initial one. Default 1 (so up to
+   * two total dispatches per call). Set to 0 to disable the loop.
+   */
+  maxRetries?: number;
+  /** Scoring options forwarded to quality.scoreResponse. */
+  scoreOpts?: ScoreOpts;
+  /** Called with a short status line on each retry. Optional. */
+  onRetry?: (attempt: number, quality: ResponseQuality) => void;
+  /**
+   * Injectable spawn function — defaults to the real tmux-based
+   * spawnSubagent. Tests pass a stub.
+   */
+  spawn?: SpawnFn;
+}
+
+export interface QualityDispatchResult {
+  output: string;
+  sessionId: string;
+  quality: ResponseQuality;
+  attempts: number;
+}
+
+/**
+ * Dispatch one subagent, score the output, re-dispatch with a repair
+ * preamble on recoverable failures. Returns the *last* run (which is
+ * either the first ok run, or the final attempt's run when retries
+ * ran out — callers inspect `quality.ok` to decide).
+ */
+export async function dispatchWithQuality(
+  opts: QualityDispatchOptions,
+): Promise<QualityDispatchResult> {
+  const maxRetries = Math.max(0, opts.maxRetries ?? 1);
+  const scoreOpts = opts.scoreOpts ?? {};
+  const spawn = opts.spawn ?? defaultSpawnSubagent;
+  const baseTask = opts.task;
+  let currentTask = baseTask;
+  let attempt = 0;
+  let lastQuality: ResponseQuality = scoreResponse("");
+  let lastOutput = "";
+  let lastSessionId = "";
+  while (true) {
+    attempt += 1;
+    const { output, sessionId } = await spawn({
+      ...opts,
+      task: currentTask,
+    });
+    lastOutput = output;
+    lastSessionId = sessionId;
+    lastQuality = scoreResponse(output, scoreOpts);
+    if (lastQuality.ok || !isRecoverable(lastQuality) || attempt > maxRetries) {
+      return { output, sessionId, quality: lastQuality, attempts: attempt };
+    }
+    opts.onRetry?.(attempt, lastQuality);
+    currentTask = repairHint(lastQuality) + "\n\nOriginal task:\n" + baseTask;
+  }
+  // Unreachable.
+  return {
+    output: lastOutput,
+    sessionId: lastSessionId,
+    quality: lastQuality,
+    attempts: attempt,
+  };
+}
+
+export interface ConsensusSample {
+  index: number;
+  agent: string;
+  output: string;
+  sessionId: string;
+  quality: ResponseQuality;
+  /** Set when spawnSubagent threw before producing output. */
+  error?: string;
+}
+
+export interface ConsensusOptions extends Omit<SpawnOptions, "onUpdate"> {
+  /** Number of parallel samples. Clamped to 1..6. Default 3. */
+  samples?: number;
+  /** Max concurrent in-flight tmux sessions. Clamped to 1..samples. Default min(4, samples). */
+  maxParallel?: number;
+  scoreOpts?: ScoreOpts;
+  /**
+   * Called with each sample's progress update. The fan-out wraps the
+   * tmux poll messages so the caller can stream them.
+   */
+  onSample?: (index: number, message: string) => void;
+  /** Injectable spawn function — tests pass a stub. */
+  spawn?: SpawnFn;
+}
+
+export interface ConsensusResult {
+  best: ConsensusSample;
+  all: ConsensusSample[];
+  /** Short human-readable winning rationale. */
+  reason: string;
+}
+
+function pickBest(samples: ConsensusSample[]): { winner: ConsensusSample; reason: string } {
+  if (samples.length === 0) {
+    throw new Error("runConsensus: no samples to pick from");
+  }
+  // Prefer samples that came back with output, then highest quality
+  // score, tie-break on shorter output (less filler).
+  const sorted = [...samples].sort((a, b) => {
+    const aHas = a.error ? 0 : 1;
+    const bHas = b.error ? 0 : 1;
+    if (aHas !== bHas) return bHas - aHas;
+    if (a.quality.score !== b.quality.score) return b.quality.score - a.quality.score;
+    return a.output.length - b.output.length;
+  });
+  const winner = sorted[0]!;
+  let reason = `sample #${winner.index} won: ${summarise(winner.quality)}`;
+  if (samples.length > 1) {
+    const scores = samples.map((s) => `#${s.index}=${s.quality.score.toFixed(2)}`).join(" ");
+    reason += ` (all: ${scores})`;
+  }
+  return { winner, reason };
+}
+
+/**
+ * Spawn N parallel subagent samples for the same task; score each;
+ * return them all plus the winner. Never throws — failed samples carry
+ * their error in `sample.error` and contribute a 0-score quality.
+ */
+export async function runConsensus(opts: ConsensusOptions): Promise<ConsensusResult> {
+  const samples = Math.max(1, Math.min(6, Math.floor(opts.samples ?? 3)));
+  const maxParallel = Math.max(1, Math.min(samples, Math.floor(opts.maxParallel ?? Math.min(4, samples))));
+  const scoreOpts = opts.scoreOpts ?? {};
+  const spawn = opts.spawn ?? defaultSpawnSubagent;
+
+  // Simple semaphore-style runner: launch up to `maxParallel` at a time.
+  const indices = Array.from({ length: samples }, (_, i) => i + 1);
+  const collected: ConsensusSample[] = [];
+
+  const runOne = async (idx: number): Promise<ConsensusSample> => {
+    try {
+      const { output, sessionId } = await spawn({
+        agent: opts.agent,
+        task: opts.task,
+        preferredModel: opts.preferredModel,
+        cwd: opts.cwd,
+        signal: opts.signal,
+        onUpdate: (text) => opts.onSample?.(idx, text),
+      });
+      const quality = scoreResponse(output, scoreOpts);
+      return { index: idx, agent: opts.agent, output, sessionId, quality };
+    } catch (err) {
+      return {
+        index: idx,
+        agent: opts.agent,
+        output: "",
+        sessionId: "",
+        quality: scoreResponse(""),
+        error: (err as Error).message,
+      };
+    }
+  };
+
+  // Pool: keep `maxParallel` in flight, drain as they complete.
+  let next = 0;
+  async function worker(): Promise<void> {
+    while (next < indices.length) {
+      const myIdx = indices[next++]!;
+      const result = await runOne(myIdx);
+      collected.push(result);
+    }
+  }
+  const workers = Array.from({ length: maxParallel }, () => worker());
+  await Promise.all(workers);
+
+  // Stable order by sample index.
+  collected.sort((a, b) => a.index - b.index);
+  const { winner, reason } = pickBest(collected);
+  return { best: winner, all: collected, reason };
+}
diff --git a/pi-extension/src/index.ts b/pi-extension/src/index.ts
index 57abba0..c2a2e27 100644
--- a/pi-extension/src/index.ts
+++ b/pi-extension/src/index.ts
@@ -85,15 +85,26 @@ export default async function (pi: ExtensionAPI): Promise<void> {
         "                   consensus, Ralph refinement, and autoresearch.",
         "  /clk-abort       End the current run. Preserves state for resume.",
         "  /clk-help        Show this list.",
-        "  /clk-doctor      Health-check tmux + git + workspace state.",
+        "  /clk-doctor      Health-check tmux + git + remote + workspace state.",
         "  /clk-undo        Preview the last CLK commit; `/clk-undo confirm`",
         "                   creates a new revert commit on top of it.",
         "",
+        "Orchestration tools the chief uses (you don't call these directly):",
+        "  clk_cast / clk_subagent          — roster + raw dispatch.",
+        "  clk_subagent_quality             — single dispatch + quality re-roll.",
+        "  clk_consensus                    — N parallel samples, scored, winner returned.",
+        "  clk_autoresearch                 — researcher + critic alternation.",
+        "  clk_ralph                        — branch + consensus fan-out in one call.",
+        "  clk_branch / clk_merge / clk_revert / clk_checkpoint — git plumbing.",
+        "  clk_done                         — completion signal.",
+        "",
         "Safety nets active in this workspace:",
         "  - Hardened .gitignore blocks .env / .env.bak / *.pem / id_rsa.",
         "  - .git/hooks/pre-push aborts pushes containing API-key patterns.",
         "  - .clk/state/*.{json,md} are written atomically with .bak rotation.",
         "  - Each completed iteration is checkpointed with `git commit`.",
+        "  - With CLK_GITHUB_PUSH_ON_COMMIT=true, every checkpoint auto-pushes",
+        "    to origin (and falls back to an ↑N ahead counter when it can't).",
         "",
         "Re-read this anytime with /clk-help. If something looks stuck, the",
         "agent_end hook will report it; `/clk-doctor` triages provider and",
diff --git a/pi-extension/src/prompts.ts b/pi-extension/src/prompts.ts
index d7b45e7..fc615e4 100644
--- a/pi-extension/src/prompts.ts
+++ b/pi-extension/src/prompts.ts
@@ -10,10 +10,50 @@ export function clkChiefPrimer(idea: string): string {
   return `
 You are the **CLK chief**, the orchestrating agent inside the Pi terminal harness.
 Your job is to take the captured idea, dynamically design a team of specialists,
-dispatch them via the \`clk_subagent\` tool, and drive
-the project to completion through repeated agentic cycles. Every meaningful
-change is committed to git via the CLK extension's \`clk_checkpoint\` tool, so
-no good work is ever lost.
+dispatch them via the CLK tools below, and drive the project to completion
+through repeated agentic cycles. Every meaningful change is committed to git via
+the CLK extension's \`clk_checkpoint\` tool, so no good work is ever lost.
+
+## Dispatch tool quick reference
+
+You have four dispatch tools — pick the one that matches the situation:
+
+* \`clk_subagent({ agent, task, preferredModel? })\` — one subagent, no
+  quality gate. Use only for cheap, low-risk work where re-rolling is
+  pointless (e.g. simple file reads, status pings).
+* \`clk_subagent_quality({ agent, task, maxRetries?, preferredModel? })\` —
+  one subagent **scored by the harness's quality detector**, with up to
+  \`maxRetries\` automatic repair re-rolls. Default everywhere a single
+  worker is enough but you want bad output caught before it propagates.
+* \`clk_consensus({ agent, task, samples?, preferredModel? })\` — fan-out N
+  parallel samples (default 3, max 6), each scored, returns the winner
+  plus all candidates. Use **liberally** for any decision that benefits
+  from diverse independent attempts: architecture, design choices,
+  ambiguous requirements, validation verdicts, security/perf reviews,
+  reviewer/oracle synthesis steps.
+* \`clk_autoresearch({ question, iterations?, preferredModel? })\` —
+  bounded research loop (default 2 iterations) that alternates a
+  \`researcher\` and a \`critic\` subagent and records each finding. Use
+  before non-trivial implementation work whenever the optimal approach
+  is unclear.
+
+For an entire Ralph iteration in one tool call:
+
+* \`clk_ralph({ iterationName, agent, task, samples?, preferredModel? })\` —
+  creates a fresh \`ralph/<iterationName>\` branch, dispatches a consensus
+  fan-out, and returns the winning output. You then EITHER call
+  \`clk_merge\` (accept) or \`clk_revert\` (reject) based on validation.
+  Prefer this over manual \`clk_branch\` + dispatch + commit when running
+  iterative refinement — the branch creation and fan-out happen in one
+  step and can't be skipped.
+
+The harness scores every \`clk_consensus\`, \`clk_subagent_quality\`, and
+\`clk_autoresearch\` output against the same rule set used by the Python
+CLK harness (empty / refusal / malformed-block / low-confidence / missing
+declared outputs). Recoverable failures auto-retry with a repair preamble
+so your worker fixes the specific problems rather than re-rolling at
+random. **Use the quality-gated tools as your default**; reserve raw
+\`clk_subagent\` for genuinely throwaway work.
 
 ## Captured idea
 
@@ -44,24 +84,36 @@ ${idea}
    decision-making mechanism for every meaningful choice: architecture,
    implementation approach, API contract, data model, security boundary,
    ambiguous requirement, risky refactor, and any time two or more
-   reasonable paths exist. Emit **3–5 \`clk_subagent\` tool calls in the same
-   assistant message**, each posing the question with a different framing,
-   prior, or role. Pi runs sibling tool calls concurrently by default, so
-   they fan out in parallel. Then in your next turn, emit ONE more
-   \`clk_subagent\` call to a judge (\`oracle\` or \`reviewer\`) that reads all
-   the candidates and picks or synthesizes the answer. Record the winner
-   with \`clk_progress({ kind: "consensus", message: "..." })\`.
+   reasonable paths exist.
+
+   The harness ships a code-enforced fan-out tool — use it directly:
+
+       clk_consensus({
+         agent: "designer",
+         samples: 3,                // or 5 for high-stakes decisions
+         task: "[Role: ...]\\n[Mission: ...]\\n\\nQuestion: ..."
+       })
+
+   \`clk_consensus\` spawns the N subagents in parallel via tmux, scores
+   each output through the harness's quality detector, and returns the
+   highest-scoring winner along with every candidate's score so you can
+   see the spread. If you need a synthesised answer rather than the
+   winner, follow with one \`clk_consensus\` call to an \`oracle\` or
+   \`reviewer\` whose task quotes all candidates and asks for a merged
+   verdict. Record the outcome with
+   \`clk_progress({ kind: "consensus", message: "..." })\`.
 
    **Encourage stochastic consensus at the start of every Ralph iteration**,
    not only when uncertainty is obvious. Even a quick 3-way fan-out on "what
    is the highest-value next improvement?" yields better choices than a
-   single-agent guess.
+   single-agent guess. The \`clk_ralph\` tool below already includes a
+   consensus fan-out by default.
 
 4. **Refinement: Ralph loop — iterate until done.** Once an MVP exists and
    tests pass, enter a refinement loop and **keep looping without pausing
    for user input** until \`clk_done\` is called. Do not stop between
    iterations — immediately pick the next improvement and start the next
-   cycle. Each iteration follows this exact branch-based protocol:
+   cycle. Prefer the one-call \`clk_ralph\` form for each iteration:
 
        a. Pick ONE improvement (lowest-risk, highest-value). Classify it:
           - **Measurable** (has a numeric outcome): run rule 5B
@@ -70,27 +122,33 @@ ${idea}
             authorised changes or the completion criteria are met.
           - **Qualitative** (design, architecture, unknown approach):
             run rule 5A first to resolve the open question, then
-            proceed with steps (b)–(h) below.
-       b. Create a feature branch: \`clk_branch({ name:
-          "ralph/iter-N-short-description" })\`. All work for this
-          iteration happens on that branch.
-       c. Dispatch a worker via \`clk_subagent\` to implement the improvement.
-       d. Call \`clk_checkpoint({ message: "ralph: <description>" })\`
-          to commit the work to the feature branch.
-       e. Run the project's validation command (\`pytest -q\`, \`npm test\`,
+            proceed with steps (b)–(g) below.
+       b. \`clk_ralph({ iterationName: "iter-N-short-description",
+          agent: "engineer", task: "<full persona + task>", samples: 3 })\`
+          The tool creates a fresh \`ralph/iter-N-short-description\`
+          branch, fans out 3 parallel subagent samples, scores them,
+          and returns the winning output. You read the winner and decide.
+       c. Call \`clk_checkpoint({ message: "ralph: <description>" })\`
+          to commit any additional changes you made on top of the winner.
+       d. Run the project's validation command (\`pytest -q\`, \`npm test\`,
           etc.) via the built-in \`bash\` tool.
-       f. **If validation passes:** call \`clk_merge({ message:
+       e. **If validation passes:** call \`clk_merge({ message:
           "ralph win: <description>" })\`. This commits any remaining
           changes, merges the feature branch into the home branch, and
-          returns you to the home branch. The accepted work is now on the home branch.
+          returns you to the home branch.
           Record with \`clk_progress({ kind: "ralph", message: "win: ..." })\`.
-       g. **If validation fails:** call \`clk_revert({ reason: "<why it
+       f. **If validation fails:** call \`clk_revert({ reason: "<why it
           failed>" })\`. This commits the rejected work to the feature
           branch (preserving it for review), then switches back to the
           home branch without merging. The rejected branch is never
           deleted. Record with \`clk_progress({ kind: "ralph", message:
           "rejected: ..." })\`.
-       h. Loop back to step (a) immediately for the next iteration.
+       g. Loop back to step (a) immediately for the next iteration.
+
+   Manual branch / dispatch / commit is still allowed via \`clk_branch\` +
+   \`clk_subagent_quality\` + \`clk_checkpoint\` if you need more control;
+   the \`clk_ralph\` form is just the recommended default because it
+   can't accidentally skip the branch + fan-out steps.
 
    After every ~10 consecutive iterations pause to re-evaluate direction
    with consensus (rule 3). **Resume the loop immediately after
@@ -108,19 +166,21 @@ ${idea}
    ### 5A. Qualitative autoresearch (open questions, design trade-offs,
    unknown library behaviour, ambiguous requirements)
 
-   Use Ralph-style parallel dispatch + stochastic consensus (rule 3):
-       a. State the open question precisely.
-       b. Fan out **3–5 \`clk_subagent\` calls in the same message**, each
-          exploring the question from a different angle — different
-          framing, different role, different prior. Use \`researcher\`
-          for external evidence, \`scout\` for code recon, \`worker\` for
-          a throwaway spike. They run concurrently.
-       c. In the next turn emit ONE \`oracle\` or \`reviewer\` call that
-          synthesizes all results and produces a decision.
-       d. Record with \`clk_progress({ kind: "autoresearch", message:
-          "qualitative: <question> → <answer>" })\`.
-       e. Apply immediately to the next Ralph iteration or architectural
-          decision.
+   Use the dedicated tool — it runs the researcher + critic alternation
+   in code, scores every output, and records each iteration on the
+   progress log:
+
+       clk_autoresearch({
+         question: "<the precise question or hypothesis>",
+         iterations: 2,           // 1..5; bump to 3 for high-stakes
+       })
+
+   Then in your next turn either act on the consolidated findings (apply
+   to the next Ralph iteration / architectural decision) or, if the
+   answer is still uncertain, fan out a \`clk_consensus\` synthesis pass
+   with the autoresearch findings quoted into the task. Record with
+   \`clk_progress({ kind: "autoresearch", message: "qualitative: <question>
+   → <answer>" })\`.
 
    ### 5B. Quantitative autoresearch (Karpathy autoresearch pattern)
 
diff --git a/pi-extension/src/quality.ts b/pi-extension/src/quality.ts
new file mode 100644
index 0000000..df9da69
--- /dev/null
+++ b/pi-extension/src/quality.ts
@@ -0,0 +1,250 @@
+/**
+ * Response-quality scorer — TypeScript port of
+ * clk_harness/orchestration/response_quality.py.
+ *
+ * Used by clk_consensus to pick the best of N stochastic samples and by
+ * the clk_subagent quality re-dispatch loop to detect (and re-roll on)
+ * empty / refused / malformed / low-confidence subagent outputs without
+ * a single extra provider call. All checks are pure string / regex
+ * operations.
+ *
+ * Mirrors the Python harness so a behaviour change in either side stays
+ * one diff away from a matching change in the other.
+ */
+
+export interface ResponseQuality {
+  ok: boolean;
+  /** Rough 0..1 score, 1.0 = clean, lower = more flags / more severe. */
+  score: number;
+  flags: string[];
+  reasons: string[];
+  /**
+   * False when the response should NOT be retried (an explicit refusal,
+   * for instance — re-rolling will just produce the same refusal). The
+   * caller is expected to escalate rather than retry in that case.
+   */
+  recoverable: boolean;
+  /** CONFIDENCE: <n> line value, if present and parseable. */
+  confidence?: number;
+  /** NEEDS_REVIEW: true|false line value, if present. */
+  needsReview?: boolean;
+}
+
+export interface ScoreOpts {
+  /** Text shorter than this counts as "empty". Default 40. */
+  minChars?: number;
+  /**
+   * When provided, every key must appear in some POST block's PRODUCES:
+   * list for the response to pass. Empty array disables the check.
+   */
+  expectedOutputs?: string[];
+  /**
+   * When true, missing the CONFIDENCE: line itself becomes a flag.
+   * Default false so existing prompts aren't penalised retroactively.
+   */
+  requireConfidence?: boolean;
+}
+
+const CONFIDENCE_RE = /^\s*CONFIDENCE\s*:\s*([0-9]*\.?[0-9]+)\s*$/im;
+const NEEDS_REVIEW_RE = /^\s*NEEDS_REVIEW\s*:\s*(true|yes|y|1|false|no|n|0)\s*$/im;
+const REFUSAL_RES: RegExp[] = [
+  /\bi\s+cannot\b/i,
+  /\bi\s+can'?t\b\s+(?:help|assist|do|comply)/i,
+  /\bi\s+(?:am|'m)\s+(?:sorry|unable)\b.*\b(?:cannot|can'?t|won'?t)\b/i,
+  /\bas\s+an\s+ai\s+(?:language\s+)?model\b/i,
+  /\bI\s+do\s+not\s+have\s+the\s+ability\b/i,
+];
+const HEADER_ACTION_RE = /^\s*ACTION\s*:\s*([A-Za-z]+)/gim;
+const END_ACTION_RE = /^\s*END_ACTION\s*$/gim;
+const POST_HEAD_RE = /^\s*POST\s*:\s*([A-Za-z][A-Za-z0-9_]*)\s*$/gim;
+const POST_END_RE = /^\s*END_POST\s*$/gim;
+const PRODUCES_RE = /^\s*PRODUCES\s*:\s*(.+)$/gim;
+
+function parseConfidence(text: string): number | undefined {
+  const m = CONFIDENCE_RE.exec(text);
+  if (!m) return undefined;
+  let v = Number.parseFloat(m[1]!);
+  if (Number.isNaN(v)) return undefined;
+  if (v < 0) v = 0;
+  if (v > 1) v = Math.min(1, v / 100);
+  return v;
+}
+
+function parseNeedsReview(text: string): boolean | undefined {
+  const m = NEEDS_REVIEW_RE.exec(text);
+  if (!m) return undefined;
+  return ["true", "yes", "y", "1"].includes(m[1]!.toLowerCase());
+}
+
+function detectRefusal(text: string): boolean {
+  return REFUSAL_RES.some((re) => re.test(text));
+}
+
+function countMatches(re: RegExp, text: string): number {
+  // Global regexes need a fresh lastIndex on each call.
+  const fresh = new RegExp(re.source, re.flags);
+  let n = 0;
+  while (fresh.exec(text) !== null) n++;
+  return n;
+}
+
+function actionBlockImbalance(text: string): number {
+  const heads = countMatches(HEADER_ACTION_RE, text);
+  if (heads === 0) return 0;
+  const ends = countMatches(END_ACTION_RE, text);
+  return heads - ends;
+}
+
+function postBlockImbalance(text: string): number {
+  const heads = countMatches(POST_HEAD_RE, text);
+  if (heads === 0) return 0;
+  const ends = countMatches(POST_END_RE, text);
+  return heads - ends;
+}
+
+function declaredProduces(text: string): Set<string> {
+  const out = new Set<string>();
+  const fresh = new RegExp(PRODUCES_RE.source, PRODUCES_RE.flags);
+  let m: RegExpExecArray | null;
+  while ((m = fresh.exec(text)) !== null) {
+    for (const key of m[1]!.split(",")) {
+      const k = key.trim();
+      if (k) out.add(k);
+    }
+  }
+  return out;
+}
+
+function missingOutputs(text: string, expected: string[]): string[] {
+  if (expected.length === 0) return [];
+  const declared = declaredProduces(text);
+  return expected.filter((k) => !declared.has(k));
+}
+
+/**
+ * Score a single response text against the harness's quality rules.
+ *
+ * Always returns a `ResponseQuality` — never throws on a malformed
+ * input, so callers can use the score even when the upstream provider
+ * returned junk.
+ */
+export function scoreResponse(
+  text: string | null | undefined,
+  opts: ScoreOpts = {},
+): ResponseQuality {
+  const minChars = opts.minChars ?? 40;
+  const expected = opts.expectedOutputs ?? [];
+  const requireConfidence = opts.requireConfidence ?? false;
+
+  const raw = text ?? "";
+  const body = raw.trim();
+  const flags: string[] = [];
+  const reasons: string[] = [];
+  let recoverable = true;
+  const confidence = parseConfidence(raw);
+  const needsReview = parseNeedsReview(raw);
+
+  if (body.length < Math.max(1, minChars)) {
+    flags.push("empty");
+    reasons.push(
+      `Response body was ${body.length} chars (minimum ${minChars}). Re-emit a substantive response.`,
+    );
+  }
+  if (detectRefusal(raw)) {
+    flags.push("refusal");
+    reasons.push(
+      "Response looked like a refusal. The task is in-scope for this harness; respond directly or, " +
+        "if blocked, explain the obstacle so the chief can re-cast or escalate.",
+    );
+    recoverable = false;
+  }
+  const actBal = actionBlockImbalance(raw);
+  if (actBal > 0) {
+    flags.push("malformed_action");
+    reasons.push(
+      `${actBal} ACTION header(s) had no matching END_ACTION. Every ACTION block must terminate with a line END_ACTION.`,
+    );
+  }
+  const postBal = postBlockImbalance(raw);
+  if (postBal > 0) {
+    flags.push("malformed_post");
+    reasons.push(
+      `${postBal} POST header(s) had no matching END_POST. Every POST block must terminate with a line END_POST.`,
+    );
+  }
+  const missing = missingOutputs(raw, expected);
+  if (missing.length > 0) {
+    flags.push("outputs_missing");
+    reasons.push(
+      "Declared output contract keys not satisfied: " +
+        missing.join(", ") +
+        ". Each key must appear in some POST block's PRODUCES: list.",
+    );
+  }
+  if (confidence !== undefined && confidence < 0.5) {
+    flags.push("low_confidence");
+    reasons.push(
+      `You reported CONFIDENCE: ${confidence.toFixed(2)}. Either improve the response or escalate.`,
+    );
+  }
+  if (needsReview === true) {
+    flags.push("needs_review_self");
+    reasons.push(
+      "You set NEEDS_REVIEW: true. Sharpen the answer or call out the specific uncertainty.",
+    );
+  }
+  if (requireConfidence && confidence === undefined) {
+    flags.push("confidence_missing");
+    reasons.push(
+      "Response did not include a CONFIDENCE: <0..1> line. Emit one final line stating your confidence so the harness can decide whether to re-sample.",
+    );
+  }
+
+  const deductions: Record<string, number> = {
+    empty: 0.6,
+    refusal: 0.5,
+    malformed_action: 0.4,
+    malformed_post: 0.3,
+    outputs_missing: 0.4,
+    low_confidence: 0.3,
+    needs_review_self: 0.2,
+    confidence_missing: 0.1,
+  };
+  let s = 1.0;
+  for (const f of flags) s -= deductions[f] ?? 0.2;
+  const score = Math.max(0, Math.round(s * 1000) / 1000);
+
+  return {
+    ok: flags.length === 0,
+    score,
+    flags,
+    reasons,
+    recoverable,
+    confidence,
+    needsReview,
+  };
+}
+
+/**
+ * Build a re-dispatch preamble that names every flag so the worker
+ * fixes the specific issues instead of re-rolling at random.
+ */
+export function repairHint(q: ResponseQuality): string {
+  if (q.ok || q.reasons.length === 0) return "";
+  const bullets = q.reasons.map((r) => `- ${r}`).join("\n");
+  return (
+    "Your previous response was rejected by the harness for the following reasons:\n" +
+    bullets +
+    "\nRe-emit a complete response that fixes every item above."
+  );
+}
+
+/** Convenience: is the verdict worth re-rolling on? */
+export function isRecoverable(q: ResponseQuality): boolean {
+  return !q.ok && q.recoverable;
+}
+
+export function summarise(q: ResponseQuality): string {
+  if (q.ok) return `ok score=${q.score.toFixed(2)}`;
+  return `flags=${q.flags.join(",") || "?"} score=${q.score.toFixed(2)}`;
+}
diff --git a/pi-extension/src/subagent.ts b/pi-extension/src/subagent.ts
index 552ecd5..20600c3 100644
--- a/pi-extension/src/subagent.ts
+++ b/pi-extension/src/subagent.ts
@@ -86,7 +86,7 @@ export async function killAllSubagentSessions(): Promise<void> {
   );
 }
 
-interface SpawnOptions {
+export interface SpawnOptions {
   agent: string;
   task: string;
   preferredModel?: string;
@@ -95,7 +95,7 @@ interface SpawnOptions {
   onUpdate?: (text: string) => void;
 }
 
-async function spawnSubagent(opts: SpawnOptions): Promise<{ output: string; sessionId: string }> {
+export async function spawnSubagent(opts: SpawnOptions): Promise<{ output: string; sessionId: string }> {
   const sessionId = `clk-${randomUUID().slice(0, 8)}`;
   const dirPath = join(opts.cwd, ".clk", "subagents", sessionId);
   const taskPath = resolve(join(dirPath, "task.md"));
diff --git a/pi-extension/src/tools.ts b/pi-extension/src/tools.ts
index 0bc29f6..ec53782 100644
--- a/pi-extension/src/tools.ts
+++ b/pi-extension/src/tools.ts
@@ -17,6 +17,9 @@ import {
 } from "./git.js";
 import { activeSignal, mergeSignals, endRun } from "./abort.js";
 import { classifyError, looksRedacted, recoveryHint, withRetry } from "./errors.js";
+import { dispatchWithQuality, runConsensus } from "./consensus.js";
+import { tmuxAvailable } from "./subagent.js";
+import { summarise } from "./quality.js";
 
 /**
  * Push the latest commit to `origin` when the user opted in via
@@ -401,6 +404,431 @@ export function registerClkTools(pi: ExtensionAPI): void {
     },
   });
 
+  // ---------------------------------------------------------------------
+  // clk_consensus — stochastic auto-consensus fan-out
+  // ---------------------------------------------------------------------
+  pi.registerTool({
+    name: "clk_consensus",
+    label: "CLK Consensus",
+    description:
+      "Fan-out N parallel subagent samples for the SAME task; score each via the harness's " +
+      "quality detector and return the highest-scoring one (plus all candidates for traceability). " +
+      "Use this instead of clk_subagent whenever an answer is high-stakes (a design choice, a " +
+      "validation verdict, a non-trivial code edit), or whenever the chief is uncertain. Default " +
+      "samples=3; clamp 1..6.",
+    promptSnippet:
+      "Fan-out N stochastic samples for one task; quality-scored winner returned. " +
+      "Use liberally for high-stakes or uncertain dispatches.",
+    parameters: Type.Object({
+      agent: Type.String({
+        description: "Short role label (e.g. 'engineer', 'designer'). Embed the full persona in the task.",
+      }),
+      task: Type.String({
+        description: "Complete task description, including role persona and context.",
+      }),
+      samples: Type.Optional(
+        Type.Integer({ minimum: 1, maximum: 6, description: "How many samples to draw. Default 3." }),
+      ),
+      preferredModel: Type.Optional(
+        Type.String({
+          description:
+            "Short alias (claude-opus, claude-sonnet, claude-haiku, gpt-4o, gpt-4o-mini) " +
+            "or a provider/model string. Omit to use pi's default.",
+        }),
+      ),
+      minChars: Type.Optional(
+        Type.Integer({ minimum: 0, description: "Override minimum-response-length flag threshold (default 40)." }),
+      ),
+    }),
+    async execute(_id, params, signal, onUpdate, ctx) {
+      if (signal?.aborted || activeSignal()?.aborted) {
+        return { content: [{ type: "text", text: "clk_consensus cancelled before start." }], details: {} };
+      }
+      if (!(await tmuxAvailable())) {
+        return {
+          content: [{
+            type: "text",
+            text: "clk_consensus unavailable: tmux is not installed. Install it with: brew install tmux / apt install tmux",
+          }],
+          details: {},
+        };
+      }
+      if (looksRedacted(params.task)) {
+        return {
+          content: [{ type: "text", text: `clk_consensus skipped: 'task' appears redacted. ${recoveryHint("redaction")}` }],
+          details: {},
+        };
+      }
+      const sig = mergeSignals(signal, activeSignal());
+      const samples = Math.max(1, Math.min(6, params.samples ?? 3));
+      try {
+        const result = await runConsensus({
+          agent: params.agent,
+          task: params.task,
+          preferredModel: params.preferredModel,
+          cwd: ctx.cwd,
+          signal: sig,
+          samples,
+          scoreOpts: params.minChars !== undefined ? { minChars: params.minChars } : {},
+          onSample: (idx, message) =>
+            onUpdate?.({
+              content: [{ type: "text", text: `[consensus #${idx}] ${message}` }],
+              details: {},
+            }),
+        });
+        await appendProgress(
+          ctx.cwd,
+          {
+            kind: "consensus",
+            message: `${samples} samples for '${params.agent}': ${result.reason}`,
+          },
+          pi,
+        );
+        ctx.ui.setStatus("clk-last", `consensus: ${result.reason.slice(0, 80)}`);
+        const recap = result.all
+          .map((s) =>
+            s.error
+              ? `  #${s.index} error: ${s.error}`
+              : `  #${s.index} score=${s.quality.score.toFixed(2)} ` +
+                `(${summarise(s.quality)}) sessionId=${s.sessionId}`,
+          )
+          .join("\n");
+        const body =
+          `Consensus winner (sample #${result.best.index}, score ${result.best.quality.score.toFixed(2)}):\n\n` +
+          (result.best.output || "(winner produced no output)") +
+          `\n\n---\nAll samples:\n${recap}`;
+        return {
+          content: [{ type: "text", text: body }],
+          details: {
+            samples,
+            winnerIndex: result.best.index,
+            winnerScore: result.best.quality.score,
+            allScores: result.all.map((s) => ({ index: s.index, score: s.quality.score, flags: s.quality.flags })),
+          },
+        };
+      } catch (err) {
+        const cls = classifyError(err);
+        return {
+          content: [{ type: "text", text: `clk_consensus failed: ${(err as Error).message}. ${recoveryHint(cls)}` }],
+          details: { error: String(err) },
+        };
+      }
+    },
+  });
+
+  // ---------------------------------------------------------------------
+  // clk_subagent_quality — single subagent + quality re-dispatch loop
+  // ---------------------------------------------------------------------
+  pi.registerTool({
+    name: "clk_subagent_quality",
+    label: "CLK Subagent (quality-validated)",
+    description:
+      "Dispatch ONE subagent and gate its output through the quality detector. On a recoverable " +
+      "failure (empty / malformed / low-confidence), re-runs with a repair preamble up to " +
+      "`maxRetries` extra times. Cheaper than clk_consensus when the task is simple but you still " +
+      "want a quality gate. Default maxRetries=1.",
+    promptSnippet: "Single subagent dispatch with automatic quality scoring + repair-preamble re-rolls.",
+    parameters: Type.Object({
+      agent: Type.String({ description: "Short role label." }),
+      task: Type.String({ description: "Complete task description, including persona." }),
+      preferredModel: Type.Optional(Type.String()),
+      maxRetries: Type.Optional(
+        Type.Integer({ minimum: 0, maximum: 4, description: "Extra dispatches on quality failures. Default 1." }),
+      ),
+      minChars: Type.Optional(Type.Integer({ minimum: 0 })),
+    }),
+    async execute(_id, params, signal, onUpdate, ctx) {
+      if (signal?.aborted || activeSignal()?.aborted) {
+        return { content: [{ type: "text", text: "clk_subagent_quality cancelled before start." }], details: {} };
+      }
+      if (!(await tmuxAvailable())) {
+        return {
+          content: [{ type: "text", text: "tmux not installed; cannot dispatch." }],
+          details: {},
+        };
+      }
+      if (looksRedacted(params.task)) {
+        return {
+          content: [{ type: "text", text: `clk_subagent_quality skipped: 'task' appears redacted. ${recoveryHint("redaction")}` }],
+          details: {},
+        };
+      }
+      const sig = mergeSignals(signal, activeSignal());
+      try {
+        const result = await dispatchWithQuality({
+          agent: params.agent,
+          task: params.task,
+          preferredModel: params.preferredModel,
+          cwd: ctx.cwd,
+          signal: sig,
+          maxRetries: params.maxRetries ?? 1,
+          scoreOpts: params.minChars !== undefined ? { minChars: params.minChars } : {},
+          onRetry: (attempt, q) =>
+            onUpdate?.({
+              content: [{
+                type: "text",
+                text: `quality retry ${attempt}: ${summarise(q)} — re-rolling with repair preamble`,
+              }],
+              details: {},
+            }),
+        });
+        ctx.ui.setStatus("clk-last", `quality: ${summarise(result.quality)}`);
+        const body =
+          (result.output || "(subagent produced no output)") +
+          `\n\n---\nquality: ${summarise(result.quality)} after ${result.attempts} attempt(s).`;
+        return {
+          content: [{ type: "text", text: body }],
+          details: {
+            attempts: result.attempts,
+            score: result.quality.score,
+            ok: result.quality.ok,
+            flags: result.quality.flags,
+            sessionId: result.sessionId,
+          },
+        };
+      } catch (err) {
+        const cls = classifyError(err);
+        return {
+          content: [{ type: "text", text: `clk_subagent_quality failed: ${(err as Error).message}. ${recoveryHint(cls)}` }],
+          details: { error: String(err) },
+        };
+      }
+    },
+  });
+
+  // ---------------------------------------------------------------------
+  // clk_autoresearch — survey → investigate → critique loop
+  // ---------------------------------------------------------------------
+  pi.registerTool({
+    name: "clk_autoresearch",
+    label: "CLK Autoresearch",
+    description:
+      "Karpathy-style autoresearch loop: spawn a researcher subagent to investigate the question, " +
+      "then a critic subagent to review the finding. Repeat for `iterations` cycles. Each finding " +
+      "and critique is appended to the progress log. Use BEFORE non-trivial implementation work to " +
+      "ground the chief in real findings rather than priors.",
+    promptSnippet:
+      "Iteratively investigate an open question via researcher + critic subagents.",
+    parameters: Type.Object({
+      question: Type.String({ description: "The open question or hypothesis to investigate." }),
+      iterations: Type.Optional(
+        Type.Integer({ minimum: 1, maximum: 5, description: "Number of investigate-then-critique cycles. Default 2." }),
+      ),
+      preferredModel: Type.Optional(Type.String()),
+    }),
+    async execute(_id, params, signal, onUpdate, ctx) {
+      if (signal?.aborted || activeSignal()?.aborted) {
+        return { content: [{ type: "text", text: "clk_autoresearch cancelled before start." }], details: {} };
+      }
+      if (!(await tmuxAvailable())) {
+        return {
+          content: [{ type: "text", text: "tmux not installed; cannot dispatch." }],
+          details: {},
+        };
+      }
+      if (looksRedacted(params.question)) {
+        return {
+          content: [{ type: "text", text: `clk_autoresearch skipped: 'question' appears redacted. ${recoveryHint("redaction")}` }],
+          details: {},
+        };
+      }
+      const sig = mergeSignals(signal, activeSignal());
+      const iterations = Math.max(1, Math.min(5, params.iterations ?? 2));
+      const log: Array<{ iteration: number; finding: string; critique: string; findingScore: number; critiqueScore: number }> = [];
+
+      for (let i = 1; i <= iterations; i++) {
+        if (sig?.aborted) break;
+        onUpdate?.({
+          content: [{ type: "text", text: `autoresearch #${i}/${iterations}: investigating` }],
+          details: {},
+        });
+        const researcherTask =
+          `You are a researcher dispatched for autoresearch iteration #${i}. ` +
+          `Investigate this question deeply and report findings:\n\n${params.question}\n\n` +
+          (log.length > 0
+            ? `Prior findings so far:\n${log.map((l) => `[iter ${l.iteration}] ${l.finding.slice(0, 300)}`).join("\n\n")}\n\n`
+            : "") +
+          "Produce concrete findings (cite files, measurements, logs). " +
+          "End your response with a single line: CONFIDENCE: <0..1>";
+        const research = await dispatchWithQuality({
+          agent: "researcher",
+          task: researcherTask,
+          preferredModel: params.preferredModel,
+          cwd: ctx.cwd,
+          signal: sig,
+          maxRetries: 1,
+        });
+        if (sig?.aborted) break;
+        onUpdate?.({
+          content: [{ type: "text", text: `autoresearch #${i}/${iterations}: critiquing` }],
+          details: {},
+        });
+        const criticTask =
+          `You are a critic for autoresearch iteration #${i}. The researcher reported:\n\n` +
+          (research.output || "(empty)") +
+          `\n\nOriginal question:\n${params.question}\n\n` +
+          "Identify gaps, weak evidence, contradicting facts. Be specific. " +
+          "End with: CONFIDENCE: <0..1>";
+        const critic = await dispatchWithQuality({
+          agent: "critic",
+          task: criticTask,
+          preferredModel: params.preferredModel,
+          cwd: ctx.cwd,
+          signal: sig,
+          maxRetries: 1,
+        });
+        log.push({
+          iteration: i,
+          finding: research.output,
+          critique: critic.output,
+          findingScore: research.quality.score,
+          critiqueScore: critic.quality.score,
+        });
+        await appendProgress(
+          ctx.cwd,
+          {
+            kind: "autoresearch",
+            message:
+              `iter ${i}: research score=${research.quality.score.toFixed(2)} ` +
+              `critic score=${critic.quality.score.toFixed(2)}`,
+          },
+          pi,
+        );
+      }
+      const body =
+        `Autoresearch on: ${params.question}\n\n` +
+        log.map((l) =>
+          `=== iteration ${l.iteration} ===\n` +
+          `FINDING (score ${l.findingScore.toFixed(2)}):\n${l.finding}\n\n` +
+          `CRITIQUE (score ${l.critiqueScore.toFixed(2)}):\n${l.critique}`,
+        ).join("\n\n");
+      ctx.ui.setStatus("clk-last", `autoresearch: ${iterations} iter(s) on ${params.question.slice(0, 40)}`);
+      return {
+        content: [{ type: "text", text: body || "(autoresearch produced no iterations — aborted?)" }],
+        details: {
+          question: params.question,
+          iterations: log.length,
+          findings: log.map((l) => ({ iteration: l.iteration, findingScore: l.findingScore, critiqueScore: l.critiqueScore })),
+        },
+      };
+    },
+  });
+
+  // ---------------------------------------------------------------------
+  // clk_ralph — branch / dispatch / evaluate / commit-or-revert iteration
+  // ---------------------------------------------------------------------
+  pi.registerTool({
+    name: "clk_ralph",
+    label: "CLK Ralph Iteration",
+    description:
+      "One Ralph iteration: create a feature branch, dispatch a consensus fan-out of N samples, " +
+      "let the chief inspect the winning output (returned to it), then EITHER keep the branch " +
+      "(clk_merge) OR abandon it (clk_revert) based on the chief's verdict. The branch creation " +
+      "and dispatch are enforced in code so the chief can't skip the Ralph protocol. The chief " +
+      "still drives the accept/reject decision via subsequent clk_merge or clk_revert calls.",
+    promptSnippet:
+      "Branch + consensus dispatch one iteration; chief reviews winner and accepts via clk_merge or rejects via clk_revert.",
+    parameters: Type.Object({
+      iterationName: Type.String({
+        description:
+          "Short kebab-case branch suffix, e.g. 'iter-3-optimize-db'. Will be prefixed with 'ralph/'.",
+      }),
+      agent: Type.String({ description: "Role label for the dispatched worker." }),
+      task: Type.String({ description: "Full task description for the worker, including persona." }),
+      samples: Type.Optional(
+        Type.Integer({ minimum: 1, maximum: 6, description: "Consensus samples per iteration. Default 3." }),
+      ),
+      preferredModel: Type.Optional(Type.String()),
+    }),
+    async execute(_id, params, signal, onUpdate, ctx) {
+      if (signal?.aborted || activeSignal()?.aborted) {
+        return { content: [{ type: "text", text: "clk_ralph cancelled before start." }], details: {} };
+      }
+      if (!(await tmuxAvailable())) {
+        return {
+          content: [{ type: "text", text: "tmux not installed; cannot dispatch." }],
+          details: {},
+        };
+      }
+      if (looksRedacted(params.task) || looksRedacted(params.iterationName)) {
+        return {
+          content: [{ type: "text", text: `clk_ralph skipped: parameters appear redacted. ${recoveryHint("redaction")}` }],
+          details: {},
+        };
+      }
+      const sig = mergeSignals(signal, activeSignal());
+      const branchName = params.iterationName.startsWith("ralph/")
+        ? params.iterationName
+        : `ralph/${params.iterationName}`;
+      let home = getHomeBranch();
+      try {
+        if (!home) {
+          home = await currentBranch(ctx.cwd, sig);
+          await setHomeBranch(ctx.cwd, home, pi);
+        }
+        await withRetry(() => createAndCheckoutBranch(ctx.cwd, branchName, sig), { signal: sig });
+      } catch (err) {
+        const cls = classifyError(err);
+        return {
+          content: [{ type: "text", text: `clk_ralph failed to create branch '${branchName}': ${(err as Error).message}. ${recoveryHint(cls)}` }],
+          details: { error: String(err) },
+        };
+      }
+      onUpdate?.({
+        content: [{ type: "text", text: `ralph: on branch ${branchName}, dispatching ${params.samples ?? 3} samples` }],
+        details: {},
+      });
+
+      try {
+        const consensus = await runConsensus({
+          agent: params.agent,
+          task: params.task,
+          preferredModel: params.preferredModel,
+          cwd: ctx.cwd,
+          signal: sig,
+          samples: params.samples ?? 3,
+          onSample: (idx, message) =>
+            onUpdate?.({
+              content: [{ type: "text", text: `[ralph/${branchName} #${idx}] ${message}` }],
+              details: {},
+            }),
+        });
+        await appendProgress(
+          ctx.cwd,
+          {
+            kind: "ralph",
+            message: `iteration ${branchName}: ${consensus.reason}`,
+          },
+          pi,
+        );
+        ctx.ui.setStatus("clk-branch", `ralph: ${branchName}`);
+        const body =
+          `Ralph iteration on branch ${branchName} — home=${home}.\n\n` +
+          `Winning sample (#${consensus.best.index}, score ${consensus.best.quality.score.toFixed(2)}):\n\n` +
+          (consensus.best.output || "(no output)") +
+          "\n\n---\nReview the winner above. If it advances the goal, accept it with " +
+          "`clk_merge({message: '<summary>'})`. If it doesn't, abandon the branch with " +
+          "`clk_revert({reason: '<why>'})` (the branch will be preserved for review).";
+        return {
+          content: [{ type: "text", text: body }],
+          details: {
+            branch: branchName,
+            home,
+            winnerIndex: consensus.best.index,
+            winnerScore: consensus.best.quality.score,
+            allScores: consensus.all.map((s) => ({ index: s.index, score: s.quality.score, flags: s.quality.flags })),
+          },
+        };
+      } catch (err) {
+        const cls = classifyError(err);
+        return {
+          content: [{ type: "text", text: `clk_ralph dispatch failed on ${branchName}: ${(err as Error).message}. ${recoveryHint(cls)}` }],
+          details: { error: String(err), branch: branchName },
+        };
+      }
+    },
+  });
+
   pi.registerTool({
     name: "clk_done",
     label: "CLK Done",
diff --git a/pi-extension/tests/consensus.test.ts b/pi-extension/tests/consensus.test.ts
new file mode 100644
index 0000000..3f8608d
--- /dev/null
+++ b/pi-extension/tests/consensus.test.ts
@@ -0,0 +1,213 @@
+/**
+ * Tests for src/consensus.ts. We inject a fake spawn function so the
+ * tests don't need tmux or pi installed — the goal is to verify the
+ * scoring / picking / retry behaviour, not the real subprocess plumbing
+ * (which is exercised separately by the runtime smoke suite).
+ */
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  dispatchWithQuality,
+  runConsensus,
+  type SpawnFn,
+} from "../src/consensus.ts";
+
+// Sentinel substring detection — the quality-retry repair preamble is
+// rendered by quality.repairHint and begins with this exact phrase.
+const REPAIR_MARKER = "Your previous response was rejected";
+
+// Comfortably above the empty threshold so the quality detector lets it
+// through. Used wherever a test needs a "passing" response body.
+const GOOD = "This is a comfortably substantive response that exceeds the empty threshold without question.";
+
+describe("dispatchWithQuality", () => {
+  test("returns the first ok response without retrying", async () => {
+    let calls = 0;
+    const spawn: SpawnFn = async () => {
+      calls += 1;
+      return { output: GOOD, sessionId: `s${calls}` };
+    };
+    const res = await dispatchWithQuality({
+      agent: "worker",
+      task: "do the thing",
+      cwd: "/tmp",
+      spawn,
+    });
+    assert.equal(calls, 1);
+    assert.equal(res.attempts, 1);
+    assert.equal(res.quality.ok, true);
+    assert.equal(res.sessionId, "s1");
+  });
+
+  test("retries with a repair preamble after a recoverable failure", async () => {
+    let calls = 0;
+    const taskSeen: string[] = [];
+    const spawn: SpawnFn = async (opts) => {
+      calls += 1;
+      taskSeen.push(opts.task);
+      if (calls === 1) return { output: "", sessionId: "s1" }; // empty → recoverable
+      return { output: GOOD, sessionId: "s2" };
+    };
+    const retries: number[] = [];
+    const res = await dispatchWithQuality({
+      agent: "worker",
+      task: "first attempt task",
+      cwd: "/tmp",
+      maxRetries: 1,
+      onRetry: (n) => retries.push(n),
+      spawn,
+    });
+    assert.equal(calls, 2);
+    assert.equal(res.attempts, 2);
+    assert.equal(res.quality.ok, true);
+    assert.deepEqual(retries, [1]);
+    // First call sees the original task; second sees the repair preamble.
+    assert.equal(taskSeen[0]?.includes(REPAIR_MARKER), false);
+    assert.equal(taskSeen[1]?.includes(REPAIR_MARKER), true);
+    assert.equal(taskSeen[1]?.includes("first attempt task"), true);
+  });
+
+  test("stops retrying when maxRetries is exhausted", async () => {
+    let calls = 0;
+    const spawn: SpawnFn = async () => {
+      calls += 1;
+      return { output: "", sessionId: `s${calls}` }; // always empty
+    };
+    const res = await dispatchWithQuality({
+      agent: "worker",
+      task: "task",
+      cwd: "/tmp",
+      maxRetries: 2,
+      spawn,
+    });
+    assert.equal(calls, 3); // initial + 2 retries
+    assert.equal(res.attempts, 3);
+    assert.equal(res.quality.ok, false);
+  });
+
+  test("does NOT retry on a non-recoverable failure (refusal)", async () => {
+    let calls = 0;
+    const spawn: SpawnFn = async () => {
+      calls += 1;
+      return { output: "I cannot help with that. As an AI language model, ...", sessionId: "s1" };
+    };
+    const res = await dispatchWithQuality({
+      agent: "worker",
+      task: "task",
+      cwd: "/tmp",
+      maxRetries: 5,
+      spawn,
+    });
+    assert.equal(calls, 1); // bailed after the refusal
+    assert.equal(res.quality.recoverable, false);
+  });
+});
+
+describe("runConsensus", () => {
+  test("fans out N samples and returns the highest-scoring winner", async () => {
+    const outputs = ["", GOOD, GOOD + " (more detail)"];
+    let issued = 0;
+    const spawn: SpawnFn = async () => {
+      const idx = issued++;
+      return { output: outputs[idx]!, sessionId: `s${idx + 1}` };
+    };
+    const res = await runConsensus({
+      agent: "designer",
+      task: "design X",
+      cwd: "/tmp",
+      samples: 3,
+      spawn,
+    });
+    assert.equal(res.all.length, 3);
+    // Two of three samples pass quality (the empty one fails); the
+    // winner is whichever of the two passing samples sorted higher.
+    assert.equal(res.best.quality.ok, true);
+    assert.match(res.best.output, /substantive response/);
+    // reason names the winner and lists all scores.
+    assert.match(res.reason, /sample #\d won/);
+  });
+
+  test("clamps samples to 1..6", async () => {
+    let calls = 0;
+    const spawn: SpawnFn = async () => {
+      calls += 1;
+      return { output: GOOD, sessionId: `s${calls}` };
+    };
+    // samples = 10 should clamp down to 6.
+    const res = await runConsensus({
+      agent: "designer",
+      task: "x",
+      cwd: "/tmp",
+      samples: 10,
+      spawn,
+    });
+    assert.equal(res.all.length, 6);
+  });
+
+  test("captures spawn errors as sample.error without throwing", async () => {
+    let calls = 0;
+    const spawn: SpawnFn = async () => {
+      calls += 1;
+      if (calls === 2) throw new Error("tmux gone");
+      return { output: GOOD, sessionId: `s${calls}` };
+    };
+    const res = await runConsensus({
+      agent: "designer",
+      task: "x",
+      cwd: "/tmp",
+      samples: 3,
+      spawn,
+    });
+    assert.equal(res.all.length, 3);
+    const failed = res.all.find((s) => s.error);
+    assert.ok(failed, "expected one sample to carry an error");
+    assert.match(failed!.error!, /tmux gone/);
+    // Winner is still one of the successful samples, never the errored one.
+    assert.notEqual(res.best.index, failed!.index);
+    assert.equal(res.best.quality.ok, true);
+  });
+
+  test("returns the least-bad sample even when all fail", async () => {
+    const outputs = ["", "I cannot help.", ""]; // all bad
+    let issued = 0;
+    const spawn: SpawnFn = async () => {
+      const idx = issued++;
+      return { output: outputs[idx]!, sessionId: `s${idx + 1}` };
+    };
+    const res = await runConsensus({
+      agent: "designer",
+      task: "x",
+      cwd: "/tmp",
+      samples: 3,
+      spawn,
+    });
+    assert.equal(res.all.length, 3);
+    assert.equal(res.best.quality.ok, false);
+    // The refusal has score 0.5, the empties have 0.4 — so the refusal wins on score
+    // but the test we really care about is that runConsensus picked SOMETHING and
+    // never threw.
+    assert.ok(typeof res.best.output === "string");
+  });
+
+  test("respects maxParallel by capping concurrent spawns", async () => {
+    let inFlight = 0;
+    let peak = 0;
+    const spawn: SpawnFn = async () => {
+      inFlight += 1;
+      peak = Math.max(peak, inFlight);
+      await new Promise((r) => setTimeout(r, 10));
+      inFlight -= 1;
+      return { output: GOOD, sessionId: "s" };
+    };
+    await runConsensus({
+      agent: "x",
+      task: "t",
+      cwd: "/tmp",
+      samples: 6,
+      maxParallel: 2,
+      spawn,
+    });
+    assert.ok(peak <= 2, `expected peak in-flight ≤ 2, got ${peak}`);
+  });
+});
diff --git a/pi-extension/tests/index.test.ts b/pi-extension/tests/index.test.ts
index c17a6cf..14cb42a 100644
--- a/pi-extension/tests/index.test.ts
+++ b/pi-extension/tests/index.test.ts
@@ -73,10 +73,28 @@ describe("clkExtension default export", () => {
     await clkExtension(pi as any);
 
     const toolNames = tools.map((t) => t.name);
-    for (const required of ["clk_cast", "clk_progress", "clk_checkpoint", "clk_done"]) {
+    // Core orchestration + git plumbing tools.
+    const required = [
+      "clk_cast",
+      "clk_progress",
+      "clk_checkpoint",
+      "clk_revert",
+      "clk_branch",
+      "clk_merge",
+      "clk_done",
+      // New code-enforced orchestration loops (ported from the Python
+      // harness's response_quality / consensus / autoresearch / ralph
+      // modules — see src/quality.ts, src/consensus.ts).
+      "clk_consensus",
+      "clk_subagent_quality",
+      "clk_autoresearch",
+      "clk_ralph",
+      "clk_subagent",
+    ];
+    for (const name of required) {
       assert.ok(
-        toolNames.includes(required),
-        `tool ${required} not registered (got ${toolNames.join(", ")})`,
+        toolNames.includes(name),
+        `tool ${name} not registered (got ${toolNames.join(", ")})`,
       );
     }
     assert.ok(commands["clk"], "/clk command was not registered");
diff --git a/pi-extension/tests/prompts.test.ts b/pi-extension/tests/prompts.test.ts
index 657c75e..4a2345f 100644
--- a/pi-extension/tests/prompts.test.ts
+++ b/pi-extension/tests/prompts.test.ts
@@ -15,7 +15,16 @@ describe("clkChiefPrimer", () => {
 
   test("mentions the core CLK tools", () => {
     const out = clkChiefPrimer("anything");
-    for (const tool of ["clk_cast", "clk_subagent", "clk_checkpoint", "clk_done"]) {
+    for (const tool of [
+      "clk_cast",
+      "clk_subagent",
+      "clk_subagent_quality",
+      "clk_consensus",
+      "clk_autoresearch",
+      "clk_ralph",
+      "clk_checkpoint",
+      "clk_done",
+    ]) {
       assert.ok(out.includes(tool), `primer should reference ${tool}`);
     }
   });
diff --git a/pi-extension/tests/quality.test.ts b/pi-extension/tests/quality.test.ts
new file mode 100644
index 0000000..c250ba4
--- /dev/null
+++ b/pi-extension/tests/quality.test.ts
@@ -0,0 +1,126 @@
+/**
+ * Unit tests for src/quality.ts — pure regex/string scoring, no I/O.
+ * Mirrors tests/test_response_quality.py in the Python harness so a
+ * behaviour drift between the two implementations shows up here.
+ */
+import { test, describe } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  scoreResponse,
+  repairHint,
+  isRecoverable,
+  summarise,
+} from "../src/quality.ts";
+
+describe("scoreResponse — happy paths", () => {
+  test("substantive prose passes with score 1.0", () => {
+    const text = "This is a substantive response covering the requested work in detail. " +
+      "It explains the approach, lists the files touched, and states next steps so the " +
+      "chief can keep moving without a re-roll.";
+    const q = scoreResponse(text);
+    assert.equal(q.ok, true);
+    assert.equal(q.score, 1.0);
+    assert.deepEqual(q.flags, []);
+  });
+
+  test("substantive prose with a CONFIDENCE line is still ok", () => {
+    const text = "Substantive enough response that exceeds the forty-character " +
+      "minimum easily.\nCONFIDENCE: 0.82";
+    const q = scoreResponse(text);
+    assert.equal(q.ok, true);
+    assert.equal(q.confidence, 0.82);
+  });
+});
+
+describe("scoreResponse — failure modes", () => {
+  test("empty body flags as 'empty' and is recoverable", () => {
+    const q = scoreResponse("");
+    assert.equal(q.ok, false);
+    assert.ok(q.flags.includes("empty"));
+    assert.equal(q.recoverable, true);
+  });
+
+  test("near-empty body flags as 'empty' too", () => {
+    const q = scoreResponse("hi.");
+    assert.equal(q.ok, false);
+    assert.ok(q.flags.includes("empty"));
+  });
+
+  test("refusal phrase is flagged and NOT recoverable", () => {
+    const q = scoreResponse("I cannot help with that request. As an AI language model, ...");
+    assert.equal(q.ok, false);
+    assert.ok(q.flags.includes("refusal"));
+    assert.equal(q.recoverable, false);
+  });
+
+  test("missing END_ACTION imbalance is flagged", () => {
+    const text = "Plenty of text here so we beat the empty threshold easily " +
+      "and definitely.\nACTION: write_file\nfoo\n"; // no END_ACTION
+    const q = scoreResponse(text);
+    assert.ok(q.flags.includes("malformed_action"));
+  });
+
+  test("missing END_POST imbalance is flagged", () => {
+    const text = "More than forty characters of preamble so the empty check passes.\n" +
+      "POST: my_topic\nbody\n";
+    const q = scoreResponse(text);
+    assert.ok(q.flags.includes("malformed_post"));
+  });
+
+  test("low CONFIDENCE value gets the low_confidence flag", () => {
+    const text = "Long enough body to clear the empty threshold without question.\nCONFIDENCE: 0.10";
+    const q = scoreResponse(text);
+    assert.ok(q.flags.includes("low_confidence"));
+    assert.equal(q.confidence, 0.1);
+  });
+
+  test("NEEDS_REVIEW: true flips needs_review_self", () => {
+    const text = "Body comfortably over the forty character minimum so empty does not fire.\n" +
+      "NEEDS_REVIEW: true";
+    const q = scoreResponse(text);
+    assert.equal(q.needsReview, true);
+    assert.ok(q.flags.includes("needs_review_self"));
+  });
+
+  test("missing expected output keys gets outputs_missing flag", () => {
+    const text = "A response body comfortably exceeding the minimum length threshold.\n" +
+      "POST: t1\nPRODUCES: foo, bar\nbody\nEND_POST";
+    const q = scoreResponse(text, { expectedOutputs: ["foo", "missing_one"] });
+    assert.ok(q.flags.includes("outputs_missing"));
+    assert.match(q.reasons.join(" "), /missing_one/);
+  });
+
+  test("requireConfidence flag fires when CONFIDENCE absent", () => {
+    const text = "Long enough body to comfortably clear the minimum length threshold.";
+    const q = scoreResponse(text, { requireConfidence: true });
+    assert.ok(q.flags.includes("confidence_missing"));
+  });
+});
+
+describe("repairHint / isRecoverable / summarise", () => {
+  test("repairHint returns an empty string for an ok response", () => {
+    const q = scoreResponse("Long-enough response that passes the empty threshold.");
+    assert.equal(repairHint(q), "");
+  });
+
+  test("repairHint quotes every reason as a bullet for failed responses", () => {
+    const q = scoreResponse("hi");
+    const hint = repairHint(q);
+    assert.match(hint, /rejected by the harness/i);
+    assert.match(hint, /minimum 40/);
+  });
+
+  test("isRecoverable is true for recoverable failures, false for refusals", () => {
+    assert.equal(isRecoverable(scoreResponse("")), true);
+    assert.equal(isRecoverable(scoreResponse("I cannot help with this.")), false);
+    assert.equal(isRecoverable(scoreResponse("ok and substantive response over the minimum.")), false);
+  });
+
+  test("summarise gives a compact one-line description", () => {
+    const ok = scoreResponse("Long substantive response well over the minimum threshold.");
+    assert.match(summarise(ok), /^ok score=/);
+    const bad = scoreResponse("");
+    assert.match(summarise(bad), /^flags=empty score=/);
+  });
+});

From 848faff01da38d46ca4ca98185b006e9f6d4e739 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 24 May 2026 16:33:17 +0000
Subject: [PATCH 3/3] =?UTF-8?q?docs:=20full=20README=20sweep=20=E2=80=94?=
 =?UTF-8?q?=20pi-extension=20orchestration=20loops,=20auto-push,=20doctor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Updates both READMEs to reflect the orchestration work that just landed
in pi-extension and the recent main-line PRs (push-on-commit, doctor /
diag CLI, multi-line truncation fix) that already shipped to master but
weren't fully cross-referenced.

pi-extension/README.md (full rewrite, +293 net lines)
  * Replaces the "8 small tools" narrative with a proper Tool Reference
    that groups roster / dispatch / iterative-refinement and explains
    when to pick clk_subagent vs clk_subagent_quality vs clk_consensus
    vs clk_autoresearch vs clk_ralph.
  * New "Response-quality scoring" section listing every flag the
    detector raises and how the repair-preamble loop quotes them back
    to the worker. Cross-references the Python harness's
    response_quality.py so behaviour drift between the two
    implementations is one diff away from being noticed.
  * New "Auto-push (opt-in)" section covering CLK_GITHUB_PUSH_ON_COMMIT,
    the ↑N ahead counter, and the pre-push secret-scanner interaction.
  * Commands table extended with /clk-help, /clk-doctor, /clk-undo
    (these existed in the code but the README only listed /clk and
    /clk-abort).
  * "What you keep / what changes" tables rewritten: stochastic
    consensus, quality re-dispatch, and Ralph refinement are now
    described as code-enforced (not chief-compliance dependent), and
    the comparison row about robustness loops names the new tools as
    the per-call equivalents of the Python harness's
    clk.config.json::robustness.* knobs.
  * Repository layout updated with src/quality.ts, src/consensus.ts,
    the new test files, and explicit per-file purposes.
  * "Testing" section reflects the real 96-test count and notes the
    suite runs entirely offline (consensus tests inject a fake spawn).

README.md (main) — targeted updates
  * Pi extension section: brief but accurate rundown of the new
    orchestration tools, a Commands table that matches /clk-help, the
    CLK_GITHUB_PUSH_ON_COMMIT env var, and an updated example
    transcript that uses clk_consensus / clk_autoresearch / clk_ralph
    by name rather than the "fans out to 3 subagents" abstraction.
  * Layout section: pi-extension/ subtree expanded to show every src/
    file with a one-line purpose, including the new quality.ts and
    consensus.ts.
  * Testing section: pi-extension test count corrected from 53 to 96
    (~1s → ~2s), and the per-suite description rewritten to name the
    new modules (quality / consensus / git auto-push helpers /
    firstLineShort) so a contributor browsing the README knows what
    is and isn't covered.
---
 README.md              |  92 ++++++--
 pi-extension/README.md | 474 +++++++++++++++++++++++++++--------------
 2 files changed, 385 insertions(+), 181 deletions(-)

diff --git a/README.md b/README.md
index 9e56a0d..a340e1e 100644
--- a/README.md
+++ b/README.md
@@ -1011,9 +1011,15 @@ orchestration model — dynamic casting, stochastic consensus, Ralph
 refinement, and Karpathy-style autoresearch — into Pi behind a single
 `/clk` command. No Python harness required at runtime.
 
-See [`pi-extension/README.md`](pi-extension/README.md) for full
-documentation including tool reference, state layout, error handling,
-and customization notes. Quick summary:
+The TypeScript extension now ports the harness's response-quality
+scoring and consensus fan-out as **real tools** (`clk_consensus`,
+`clk_subagent_quality`, `clk_autoresearch`, `clk_ralph`) rather than
+relying on chief compliance — every parallel sample is scored by the
+same rules `clk_harness/orchestration/response_quality.py` uses, the
+winner is picked in code, and Ralph branches are created by the tool so
+the protocol can't be skipped. See [`pi-extension/README.md`](pi-extension/README.md)
+for the full tool reference, state layout, error handling, and
+customisation notes.
 
 **Requirements:** Pi on `PATH`; tmux on `PATH`; Git on `PATH`.
 
@@ -1025,12 +1031,35 @@ and customization notes. Quick summary:
 | Project-local | `mkdir -p .pi/extensions && ln -s /path/to/CognitiveLoopKernel/pi-extension .pi/extensions/clk` | Version-controlled per project |
 | Global | `mkdir -p ~/.pi/agent/extensions && ln -s /path/to/CognitiveLoopKernel/pi-extension ~/.pi/agent/extensions/clk` | Available in every Pi session |
 
-**Usage:**
+**Commands:**
 
 | Command | Effect |
 |---------|--------|
 | `/clk <idea>` | Capture the idea and hand off to the chief. Resumes if state exists. |
 | `/clk-abort` | End the active run. State is preserved; resume with `/clk` later. |
+| `/clk-help` | List every CLK slash command, every orchestration tool the chief uses, and the active safety nets. |
+| `/clk-doctor` | Health-check tmux, git, the workspace `.clk/` layout, the pre-push hook, and (when a remote exists) the count of local commits not yet pushed. |
+| `/clk-undo` | Preview the last CLK commit; `/clk-undo confirm` creates a revert commit on top of it. |
+
+**Orchestration tools the chief uses (you don't call these directly):**
+
+| Tool | Purpose |
+|---|---|
+| `clk_cast` | Persist a roster of project-specific specialist roles. |
+| `clk_subagent` | Raw single-subagent dispatch via a detached tmux pi session. |
+| `clk_subagent_quality` | One subagent + automatic repair-preamble re-rolls on quality failures. |
+| `clk_consensus` | Fan out N parallel samples (default 3, max 6), score each, return the winner plus every candidate's score. |
+| `clk_autoresearch` | Bounded researcher + critic alternation; each iteration recorded on the progress log. |
+| `clk_ralph` | Create a `ralph/<iter>` branch and run a consensus fan-out in one call; chief then calls `clk_merge` or `clk_revert`. |
+| `clk_branch` / `clk_merge` / `clk_revert` / `clk_checkpoint` | Git plumbing for the Ralph iteration cycle. |
+| `clk_progress` | Append a one-line entry to `.clk/state/progress.md`. |
+| `clk_done` | Mark the run complete and write `.clk/state/done.md`. |
+
+**Optional env vars:**
+
+| Variable | Effect |
+|---|---|
+| `CLK_GITHUB_PUSH_ON_COMMIT=true` | After every `clk_checkpoint` and `clk_merge`, run `git push origin HEAD` best-effort and surface an `↑N` ahead counter if the push fails. Same env var as the Python TUI. |
 
 A typical session:
 
@@ -1038,10 +1067,12 @@ A typical session:
 > /clk a local-first journaling app that summarizes my week
 [CLK run started. The chief is taking over.]
 [chief casts engineer, ux_writer, summarizer, qa]
-[chief fans out to 3 parallel architecture subagents → judge synthesizes]
-[chief dispatches worker to implement MVP]
-[chief calls clk_checkpoint: "MVP: capture + persist entries"]
-[chief opens feature branch with clk_branch, runs Ralph iteration ...]
+[chief calls clk_consensus({agent:"architect", samples:3, task:"... storage design ..."})]
+[harness fans out 3 parallel tmux pi subagents, scores each, returns the winner]
+[chief calls clk_autoresearch({question:"sync model: append-only vs CRDT?"})]
+[chief calls clk_ralph({iterationName:"iter-1-mvp", agent:"engineer", task:"... build MVP ..."})]
+[chief calls bash: pytest -q]
+[chief calls clk_merge: "ralph win: MVP capture+persist+summarize"]
 [chief calls clk_done: "MVP runs; tests pass; README + deploy plan present"]
 ```
 
@@ -1069,7 +1100,19 @@ scripts/
 tests/                   # pytest regression suite (CI-gated)
 user_tests/              # pytest end-to-end suite (drives CLI + REST API)
 pi-extension/            # standalone Pi extension (TypeScript)
-  tests/                 # node --test suites (errors, prompts, state, git, index)
+  src/
+    index.ts             # /clk + /clk-help + /clk-doctor + /clk-undo, session lifecycle
+    prompts.ts           # the chief's operator's manual
+    tools.ts             # clk_cast / clk_progress / clk_checkpoint / clk_branch /
+                         #   clk_merge / clk_revert / clk_consensus / clk_subagent_quality /
+                         #   clk_autoresearch / clk_ralph / clk_done
+    subagent.ts          # raw clk_subagent — spawnSubagent() exposed for consensus
+    consensus.ts         # dispatchWithQuality + runConsensus (port of agent.py)
+    quality.ts           # scoreResponse + repairHint (port of response_quality.py)
+    git.ts               # checkpoint, branch, merge, revert + hasRemote / commitsAhead /
+                         #   pushBestEffort (port of git_ops.py auto-push helpers)
+    state.ts / abort.ts / errors.ts / types.ts
+  tests/                 # node --test suites covering every file in src/
 docs/
   REST_API.md            # full REST API reference
 ```
@@ -1801,7 +1844,7 @@ pytest user_tests/ -v
 # Pi extension TypeScript suite
 cd pi-extension
 npm install
-npm test                # unit + integration tests (53 tests, ~1s)
+npm test                # unit + integration tests (96 tests, ~2s)
 npm run test:strict     # also runs `tsc --noEmit`
 ```
 
@@ -1824,14 +1867,31 @@ The `pi-extension/tests/` suite verifies:
 
 - `classifyError`, `withRetry`, `looksRedacted`, `isMaxTurnsResult`,
   and all `recoveryHint` branches.
-- `clkChiefPrimer` renders the captured idea + all CLK tool names.
+- `clkChiefPrimer` renders the captured idea + every CLK tool name
+  (`clk_cast`, `clk_subagent`, `clk_subagent_quality`, `clk_consensus`,
+  `clk_autoresearch`, `clk_ralph`, `clk_checkpoint`, `clk_done`).
+- `scoreResponse` flags every documented failure mode (empty / refusal /
+  malformed ACTION / malformed POST / missing outputs / low confidence /
+  needs-review / missing-confidence) and `repairHint` quotes each reason
+  to the worker.
+- `runConsensus` fans out N samples, scores them, picks the winner, caps
+  to `maxParallel`, and captures spawn errors without throwing.
+  `dispatchWithQuality` retries with a repair preamble on recoverable
+  failures and stops on refusal or `maxRetries`.
 - `setIdea`, `setRoster`, `appendProgress`, `markDone`, `isDone`
   round-trip state through `.clk/state/*.json` and `progress.md`.
-- The `git` wrapper does init, checkpoint, branch, merge, and revert
-  correctly against a real `git` binary.
-- The extension's `default` export registers the documented tools
-  (`clk_cast`, `clk_progress`, `clk_checkpoint`, `clk_done`) and the
-  `/clk` slash command, and handles an empty-idea invocation cleanly.
+- The `git` wrapper does init, checkpoint, branch, merge, revert,
+  `hasRemote`, `commitsAhead`, and `pushBestEffort` correctly against a
+  real `git` binary (including the bare-upstream sync, the unreachable-
+  remote failure path, and the no-remote no-op).
+- The extension's `default` export registers every documented tool
+  (`clk_cast`, `clk_progress`, `clk_checkpoint`, `clk_revert`,
+  `clk_branch`, `clk_merge`, `clk_done`, `clk_consensus`,
+  `clk_subagent_quality`, `clk_autoresearch`, `clk_ralph`,
+  `clk_subagent`) and the `/clk` slash command, and handles an
+  empty-idea invocation cleanly.
+- `firstLineShort` returns single-line, capped output so a multi-line
+  idea never bleeds line 2 into the Pi status bar.
 
 ## Customization
 
diff --git a/pi-extension/README.md b/pi-extension/README.md
index 920f9e1..62d4213 100644
--- a/pi-extension/README.md
+++ b/pi-extension/README.md
@@ -1,13 +1,15 @@
 # CLK as a Pi extension
 
-A lightweight [pi.dev](https://pi.dev) extension that brings the Cognitive Loop
-Kernel orchestration model — dynamic agent casting, stochastic consensus,
-Ralph refinement, and Karpathy-style autoresearch — into Pi behind a single
-`/clk` command.
+A [pi.dev](https://pi.dev) extension that brings the full Cognitive Loop
+Kernel orchestration model — dynamic agent casting, stochastic
+consensus, Karpathy-style autoresearch, and Ralph refinement — into Pi
+behind a single `/clk` command.
 
-> **Experimental.** Companion to the Python [CLK harness](../README.md) in the
-> parent repo, but standalone: this extension does not depend on that harness
-> at runtime. It targets Pi natively. Use at your own risk.
+> **Experimental.** Companion to the Python [CLK harness](../README.md)
+> in the parent repo, but standalone: this extension does not depend on
+> that harness at runtime. It is a self-contained TypeScript port of
+> the parts of `clk_harness/orchestration/` that make sense inside Pi.
+> Use at your own risk.
 
 ## What it does
 
@@ -19,70 +21,146 @@ You type:
 
 The extension:
 
-1. Captures the idea, initialises a git repo if needed, and persists state
-   under `.clk/state/`.
-2. Hands control to the chief LLM with a CLK operator's manual (see
-   [`src/prompts.ts`](src/prompts.ts)) that establishes standing rules:
-   cast a team, dispatch via the `clk_subagent` tool, apply parallel consensus
-   on high-stakes decisions, run Ralph refinement after MVP, autoresearch
-   on open questions, checkpoint after every win, revert on regression,
-   call `clk_done` when every completion criterion is met.
-3. Provides the chief with eight small tools — `clk_cast`, `clk_progress`,
-   `clk_checkpoint`, `clk_branch`, `clk_revert`, `clk_merge`, `clk_done`,
-   and `clk_subagent` — that handle persistence, git mechanics, and subagent
-   dispatch. `clk_branch` opens a per-iteration feature branch before each
-   Ralph pass, `clk_merge` folds it into the home branch on success, and
-   `clk_revert` discards the branch without merging when the iteration is
-   rejected. `clk_subagent` spawns a detached tmux pi session and streams
-   its result back when it exits. Everything else (fan-out, judging,
-   refinement loops) is the chief driving Pi's built-in tools.
-
-The extension itself is intentionally thin: orchestration policy lives in the
-chief's prompt, not in TypeScript. To change CLK's behavior, edit
-[`src/prompts.ts`](src/prompts.ts).
+1. Captures the idea, initialises a git repo if needed, and persists
+   state under `.clk/state/`.
+2. Installs hardened safety nets in the project (`.gitignore`,
+   pre-push secret-scan hook).
+3. Hands control to the chief LLM with an operator's manual (see
+   [`src/prompts.ts`](src/prompts.ts)) that establishes standing rules
+   for casting, dispatching, consensus, autoresearch, Ralph
+   refinement, checkpointing, completion criteria, and error recovery.
+4. Provides the chief with a suite of orchestration tools (see
+   [Tool reference](#tool-reference)) that fan out parallel subagent
+   samples via tmux, score every output with the same response-quality
+   rules the Python harness uses, manage git branches for Ralph
+   iterations, and persist progress.
+
+Unlike the original incarnation of this extension, **orchestration
+policy is now enforced in code**, not only in the chief's prompt:
+`clk_consensus` actually spawns N parallel tmux sessions and scores
+them; `clk_subagent_quality` actually re-rolls failures with a repair
+preamble; `clk_autoresearch` actually alternates a researcher and
+critic; `clk_ralph` actually creates the branch and runs the fan-out.
+The chief can't accidentally skip these steps by misreading the
+prompt — the tools enforce the shape.
 
 ## Commands
 
 | Command | What it does |
 |---|---|
 | `/clk <idea>` | Start a CLK run. Casts a team, dispatches them, runs Ralph + autoresearch. |
-| `/clk-abort` | End the current run. State is preserved for resume. |
-| `/clk-help` | List every CLK command and the safety nets active in the workspace. |
-| `/clk-doctor` | Health-check `tmux`, `git`, the `.clk/` layout, `.gitignore`, and the pre-push hook. Pure environment checks; no Pi calls. |
-| `/clk-undo` | Preview the last CLK commit; `/clk-undo confirm` creates a revert commit on top of it. |
+| `/clk-abort` | End the active run. State is preserved for resume. |
+| `/clk-help` | List every CLK command, every orchestration tool the chief uses, and the safety nets active in the workspace. |
+| `/clk-doctor` | Health-check tmux, git, the `.clk/` layout, `.gitignore`, the pre-push hook, and (when a remote exists) the count of local commits not yet pushed. Pure environment checks; no Pi calls. |
+| `/clk-undo` | Preview the last CLK commit; `/clk-undo confirm` creates a revert commit on top of it. Refuses if there are uncommitted changes. |
+
+## Tool reference
+
+The chief invokes these as `clk_*` tools — you do not call them from
+slash commands. They are listed here so you know what your run is doing
+when you read the progress log or notifications.
+
+### Roster + status
+
+| Tool | Purpose |
+|---|---|
+| `clk_cast` | Persist a roster of project-specific specialist roles (name, mission, persona). The chief authors the roster on the fly. |
+| `clk_progress` | Append a one-line entry to `.clk/state/progress.md`. Used at every meaningful transition (dispatch / consensus / ralph / autoresearch / branch / merge / done / note). |
+
+### Dispatch (pick the right one)
+
+| Tool | Use when… |
+|---|---|
+| `clk_subagent({ agent, task, preferredModel? })` | Cheap, low-risk single-subagent dispatch with no quality gate. Reserve for genuinely throwaway work. |
+| `clk_subagent_quality({ agent, task, maxRetries?, preferredModel?, minChars? })` | One subagent **scored by the quality detector**, with up to `maxRetries` automatic repair re-rolls. Default for any single-worker task where you'd rather catch bad output than propagate it. |
+| `clk_consensus({ agent, task, samples?, preferredModel?, minChars? })` | Fan out N parallel samples (default 3, clamped 1..6), score each, return the highest-scoring winner plus every candidate. Use liberally for design choices, architecture, validation verdicts, security/perf reviews, ambiguous requirements. |
+| `clk_autoresearch({ question, iterations?, preferredModel? })` | Bounded `researcher` + `critic` alternation (default 2 iterations, clamped 1..5). Each finding and critique is recorded on the progress log. Use before non-trivial implementation work whenever the optimal approach is unclear. |
+
+### Iterative refinement
+
+| Tool | Use when… |
+|---|---|
+| `clk_branch({ name })` | Manually open a feature branch for an iteration. Records the home branch automatically. |
+| `clk_ralph({ iterationName, agent, task, samples?, preferredModel? })` | One-call Ralph iteration: creates `ralph/<iterationName>`, fans out a consensus dispatch, returns the winner. Chief then runs validation and calls `clk_merge` (accept) or `clk_revert` (reject). The branch creation + fan-out happen in one step and can't be skipped. |
+| `clk_checkpoint({ message })` | Stage all working-tree changes and create a `[clk] <message>` commit. Returns the new HEAD SHA. When `CLK_GITHUB_PUSH_ON_COMMIT=true` and an `origin` remote exists, also runs `git push origin HEAD` best-effort. |
+| `clk_merge({ message })` | Commit any pending changes on the feature branch, merge it into the home branch with `--no-ff`, return to home. Same auto-push behavior as `clk_checkpoint`. |
+| `clk_revert({ reason })` | Commit any pending work on the rejected branch (preserving it), switch back to home without merging. The rejected branch is **never** deleted. |
+| `clk_done({ reason })` | Mark the run complete. Writes `.clk/state/done.md`, ends the run lifecycle. Only call when every completion criterion in [`src/prompts.ts`](src/prompts.ts) is satisfied. |
+
+### Response-quality scoring
+
+Every `clk_subagent_quality`, `clk_consensus`, and `clk_autoresearch`
+output is scored by [`src/quality.ts`](src/quality.ts) (TypeScript port
+of `clk_harness/orchestration/response_quality.py`). The scorer flags:
+
+- **empty** — body shorter than `minChars` (default 40)
+- **refusal** — refusal phrases ("I cannot", "as an AI language model", ...) — marked non-recoverable so the harness escalates instead of re-rolling
+- **malformed_action** — `ACTION:` headers without matching `END_ACTION`
+- **malformed_post** — `POST:` headers without matching `END_POST`
+- **outputs_missing** — declared output keys not present in any `POST` block's `PRODUCES:` list
+- **low_confidence** — a parsed `CONFIDENCE: <0..1>` line below 0.5
+- **needs_review_self** — a `NEEDS_REVIEW: true` line
+- **confidence_missing** — no `CONFIDENCE:` line at all (only when the caller passes `requireConfidence: true`)
+
+Each flag carries a short repair reason; on a recoverable failure the
+caller re-dispatches with a preamble that quotes every reason back to
+the worker so it fixes the specific issues rather than re-rolling at
+random. Same protocol the Python harness uses in
+`agent.py::_dispatch_with_quality_loop`.
 
 ## Safety nets
 
-The extension installs the same safety nets the Python harness uses, so
-running CLK from Pi is just as recoverable:
-
-- **Hardened `.gitignore`.** On the first `/clk`, the extension writes a
-  `.gitignore` that blocks `.env`, `.env.bak`, `.env.partial`, `*.pem`,
-  `*.key`, `*_id_rsa*`, `/secrets/`, plus editor / OS junk. Existing
-  `.gitignore` content is never clobbered.
-- **Pre-push secret scanner.** A `.git/hooks/pre-push` hook (pure bash,
-  no extra deps) scans the about-to-be-pushed objects for obvious API
-  key patterns (`ANTHROPIC_API_KEY=…`, `OPENAI_API_KEY=…`, `sk-…`,
-  Slack `xoxb-…`, private-key headers). On a hit it aborts the push.
-  Bypass once with `git push --no-verify`.
+The extension installs the same safety nets the Python harness uses,
+so running CLK from Pi is just as recoverable:
+
+- **Hardened `.gitignore`.** On the first `/clk`, the extension writes
+  a `.gitignore` that blocks `.env`, `.env.bak`, `.env.partial`,
+  `*.pem`, `*.key`, `*_id_rsa*`, `/secrets/`, plus editor / OS junk.
+  Existing `.gitignore` content is never clobbered.
+- **Pre-push secret scanner.** A `.git/hooks/pre-push` hook (pure
+  bash, no extra deps) scans the about-to-be-pushed objects for
+  obvious API-key patterns (`ANTHROPIC_API_KEY=…`, `OPENAI_API_KEY=…`,
+  `sk-…`, Slack `xoxb-…`, private-key headers). On a hit it aborts the
+  push. Bypass once with `git push --no-verify`.
 - **Atomic state writes.** Every state file under `.clk/state/`
   (`clk.json`, `idea.json`, `roster.json`, `done.md`) is written via
   `tmp+rename` with a `.bak` rotation, so a crash mid-write leaves
   either the old or the new file intact — never half.
-- **`restoreBackup` primitive.** Exposed from `src/state.ts` so a
-  future "undo last state change" can swap a `.bak` back deterministically.
+- **`restoreBackup` primitive.** Exposed from `src/state.ts` for
+  programmatic recovery of `.bak` snapshots.
+- **AbortController cancellation.** `/clk-abort` and session shutdown
+  fire a single abort signal that propagates to every in-flight tmux
+  subagent session, every git operation, and every backoff sleep.
+
+## Auto-push (opt-in)
+
+Set `CLK_GITHUB_PUSH_ON_COMMIT=true` in the Pi environment to have the
+extension auto-push after every `clk_checkpoint` and `clk_merge`. The
+push is best-effort — on failure (no network, no upstream, rejected by
+the pre-push hook), the run continues and the `clk-git` status bar
+flips to `↑N` showing the count of unpushed commits.
+
+When the env var is **unset** but the repo has an `origin` remote, the
+`clk-git` status bar still surfaces an `↑N unpushed` hint so you know
+what's accumulated locally. `/clk-doctor` includes the same count as
+a `! warn` row.
+
+The pre-push secret scanner runs *before* the auto-push leaves your
+machine, so an accidental commit containing an API key still gets
+blocked.
 
 ## Requirements
 
 - Pi installed and on `PATH` (`pi --version` works).
-- tmux installed and on `PATH` (`tmux -V` works). The extension spawns each
-  subagent as a detached tmux pi session — this is how it achieves true
-  process isolation without depending on any external Pi extension.
-  Install: `brew install tmux` (macOS) or `apt install tmux` (Debian/Ubuntu).
-  On `session_start` the extension checks for tmux and emits a one-time
-  warning if it's missing.
-- Git on `PATH` (the extension auto-runs `git init` in the project root if
-  there's no repo yet).
+- tmux installed and on `PATH` (`tmux -V` works). The extension spawns
+  each subagent as a detached tmux pi session — this is how it
+  achieves true process isolation without depending on any external
+  Pi extension. Install: `brew install tmux` (macOS) or
+  `apt install tmux` (Debian/Ubuntu). On `session_start` the
+  extension checks for tmux and emits a one-time warning if it's
+  missing.
+- Git on `PATH` (the extension auto-runs `git init` in the project
+  root if there's no repo yet).
 - Node 20+ (Pi already requires this; only relevant if you want
   `AbortSignal.any` for the cleanest cancel behavior).
 
@@ -92,8 +170,8 @@ Three options. Pick whichever matches your workflow.
 
 ### Option A: Quick test (`-e`, no install)
 
-Best for trying it out or iterating on the extension itself. Pi loads the
-file directly and reloads on `/reload`:
+Best for trying it out or iterating on the extension itself. Pi loads
+the file directly and reloads on `/reload`:
 
 ```bash
 pi -e /path/to/CognitiveLoopKernel/pi-extension/src/index.ts
@@ -108,8 +186,8 @@ mkdir -p .pi/extensions
 ln -s /path/to/CognitiveLoopKernel/pi-extension .pi/extensions/clk
 ```
 
-Pi auto-discovers `.pi/extensions/*/index.ts` on startup. The chief's tools
-appear in every Pi session opened in this project.
+Pi auto-discovers `.pi/extensions/*/index.ts` on startup. The chief's
+tools appear in every Pi session opened in this project.
 
 ### Option C: Global install (all projects)
 
@@ -132,31 +210,32 @@ Or list it explicitly in `~/.pi/agent/settings.json`:
 
 ## Usage
 
-| Command         | Effect                                                                 |
-|-----------------|------------------------------------------------------------------------|
-| `/clk <idea>`   | Capture the idea and hand off to the chief. Resumes if state exists.   |
-| `/clk-abort`    | End the active run. Cancels the chief's current turn and signals all in-flight subagents to stop. State on disk is preserved; you can `/clk` again later. |
-
-Cancel mid-turn with **Esc** (Pi's built-in) — that cancels the current model
-call but leaves the CLK run lifecycle intact, so the chief can be steered and
-continue. Use `/clk-abort` when you want to end the whole run.
+Cancel mid-turn with **Esc** (Pi's built-in) — that cancels the current
+model call but leaves the CLK run lifecycle intact, so the chief can be
+steered and continue. Use `/clk-abort` when you want to end the whole
+run.
 
 A typical first transcript looks like:
 
 ```text
 > /clk a local-first journaling app that summarizes my week
 [notification] CLK run started. The chief is taking over.
-[chief] (calls clk_cast with engineer, ux_writer, summarizer, qa)
-[chief] (calls clk_subagent: scout to understand existing layout)
-[chief] (calls clk_subagent x3 in parallel: 3 architectures for storage)
-[chief] (calls clk_subagent: oracle to judge architectures)
-[chief] (calls clk_progress: consensus → SQLite + JSON sidecar)
-[chief] (calls clk_subagent: worker to implement MVP)
-[chief] (calls bash: pytest -q)
-[chief] (calls clk_checkpoint: "MVP: capture + persist entries")
-[chief] (enters Ralph loop ...)
-...
-[chief] (calls clk_done: "MVP runs; tests pass; README + deploy plan + checklist + CLI all present")
+[chief] clk_cast({ engineer, ux_writer, summarizer, qa })
+[chief] clk_consensus({ agent:"architect", samples:3,
+                        task:"3 storage designs for offline-first journal" })
+[harness] consensus :: 3 samples, winner #2 score=0.92 (all: #1=0.74 #2=0.92 #3=0.81)
+[chief] clk_progress({ kind:"consensus", message:"3 samples for architect: ..." })
+[chief] clk_autoresearch({ question:"sync model: append-only vs CRDT for journals?", iterations:2 })
+[harness] autoresearch #1/2: investigating → critiquing
+[harness] autoresearch #2/2: investigating → critiquing
+[chief] clk_ralph({ iterationName:"iter-1-mvp", agent:"engineer", samples:3,
+                    task:"... implement MVP from winning architecture ..." })
+[harness] on branch ralph/iter-1-mvp, 3 samples, winner #1 score=0.88
+[chief] bash({ command:"pytest -q" })
+[chief] clk_merge({ message:"ralph win: MVP capture+persist+summarize" })
+[harness] merged ralph/iter-1-mvp → main; clk-git: synced
+... (Ralph iterations continue) ...
+[chief] clk_done({ reason:"MVP runs; tests pass; README + deploy plan present" })
 ```
 
 ## State on disk
@@ -169,125 +248,152 @@ Everything CLK persists lives under `.clk/`:
     idea.json      # captured idea + timestamp
     roster.json    # current cast: name, mission, persona per role
     progress.md    # human-readable timeline (one line per event)
-    clk.json       # full state snapshot (idea + roster + progress)
+    clk.json       # full state snapshot (idea + roster + progress + homeBranch)
     done.md        # written only when clk_done is called
+    *.bak          # rotated previous version of any of the above
+  subagents/<sid>/ # per-spawn scratch: task.md + stdout.txt; cleaned up on exit
   logs/
     <session-id>.log  # one log file per clk_subagent call; records spawn,
-                      # tmux start/exit, abort, timeout, and the first 500
+                      # tmux start/exit, abort, timeout, and the first 2000
                       # chars of output for post-mortem debugging
 ```
 
-The roster, progress log, and full snapshot are also written to Pi's session
-JSONL via `pi.appendEntry` — so they're replayed automatically when you
-resume a session, and they survive a `pi --resume`.
+The roster, progress log, and full snapshot are also written to Pi's
+session JSONL via `pi.appendEntry` — so they're replayed automatically
+when you resume a session, and they survive a `pi --resume`.
 
-Git commits made by `clk_checkpoint` carry a `[clk]` prefix and are real
-commits in the project repo. The chief uses them as Ralph-style baselines and
-reverts to them on regression.
+Git commits made by `clk_checkpoint` and `clk_merge` carry a `[clk]`
+prefix and are real commits in the project repo. The chief uses them
+as Ralph-style baselines and reverts to them on regression.
 
 ## What you keep from the original CLK
 
-- **Single command, idea-first.** `/clk <idea>` is the only entry point.
-- **Dynamic casting.** The chief invents project-specific roles on the fly
-  with personas and missions it authors itself, persisted to `roster.json`.
-- **Stochastic consensus.** High-stakes decisions fan out to parallel
-  candidates (Pi runs sibling tool calls concurrently by default), then a
-  judge subagent picks or synthesizes.
-- **Ralph refinement loop.** Pre-iteration checkpoint → dispatch → validate
-  → commit-or-revert. Failed iterations leave no trace in the working tree.
-- **Autoresearch loop.** When stuck on open questions, the chief designs and
-  runs small experiments (researcher / scout / spike) and records learnings
-  regardless of outcome.
-- **Self-healing.** Repeated failure triggers consensus on root cause and
-  optionally a fresh `clk_cast` to add a specialist who can fix the upstream
-  issue.
+- **Single command, idea-first.** `/clk <idea>` is the only entry
+  point.
+- **Dynamic casting.** The chief invents project-specific roles on the
+  fly with personas and missions it authors itself, persisted to
+  `roster.json`.
+- **Stochastic consensus (code-enforced).** `clk_consensus` spawns N
+  parallel tmux subagent samples, scores each via the same regex /
+  contract rules the Python harness uses, and returns the highest-
+  scoring winner. The chief can fan out at will rather than relying on
+  the LLM to remember to emit parallel tool calls.
+- **Quality re-dispatch (code-enforced).** `clk_subagent_quality` (and
+  the consensus pipeline internally) re-roll on recoverable failures
+  with a repair preamble that quotes the specific flags back to the
+  worker.
+- **Ralph refinement loop (code-enforced).** `clk_ralph` creates the
+  feature branch and runs the fan-out in one tool call; the chief
+  decides accept/reject afterwards via `clk_merge` / `clk_revert`.
+  Failed iterations leave no trace on the home branch — the rejected
+  branch is preserved for review and never deleted.
+- **Karpathy-style autoresearch.** `clk_autoresearch` alternates a
+  `researcher` and `critic` subagent for N bounded iterations,
+  recording every finding and critique on the progress log.
+- **Memory through git.** Every successful milestone is committed
+  with a structured message so future agent runs can mine the log for
+  context.
 
 ## What changes from the original CLK
 
-| Original CLK | Pi extension |
+| Original CLK (Python harness) | Pi extension |
 |---|---|
-| Provider-agnostic (claude/codex/gemini/ollama/openwebui/pi) | Tied to Pi |
-| Curses TUI dashboard with live agent cards | Pi's single conversation stream + status-line entries |
-| `ACTION:` block protocol for write/edit/append/delete/run | Pi's built-in `read`/`write`/`edit`/`bash` tools |
-| YAML workflows in `.clk/config/workflows/` | None — the chief decides workflow on the fly |
-| Per-agent prompt files in `.clk/prompts/` | One operator's manual in `src/prompts.ts`; per-role personas live in `roster.json` |
-| Subprocess-piped agents | tmux pi sessions (via the extension's `clk_subagent` tool) |
+| Provider-agnostic (claude / codex / gemini / ollama / openwebui / pi / shell) | Tied to Pi (which can route to its own upstream of choice). |
+| Curses TUI dashboard with live agent cards + cost meter | Pi's single conversation stream + status-line entries (`clk-idea`, `clk-roster`, `clk-head`, `clk-branch`, `clk-last`, `clk-git`, `clk-run`, `clk-done`). |
+| `ACTION:` block protocol for write / edit / append / delete / run | Pi's built-in `read` / `write` / `edit` / `bash` tools. |
+| YAML workflows in `.clk/config/workflows/` driven by a workflow runner | The chief decides workflow on the fly using the orchestration tools. |
+| Per-agent prompt files in `.clk/prompts/` | One operator's manual in `src/prompts.ts`; per-role personas live in `roster.json`. |
+| Subprocess-piped provider adapters | tmux pi sessions (`clk_subagent` and the consensus fan-out spawn the same way). |
+| Robustness loops gated by `clk.config.json::robustness.*` | The four orchestration tools (`clk_consensus`, `clk_subagent_quality`, `clk_autoresearch`, `clk_ralph`) implement the equivalent loops directly; their parameters (`samples`, `maxRetries`, `iterations`) act as per-call knobs. |
+| `clk_harness/orchestration/response_quality.py` | Same rules, ported to `src/quality.ts`. |
+| Telegram bot integration | Out of scope — use the Python harness for that. |
+| REST API | Out of scope — use the Python harness for that. |
 
 ## Customising orchestration
 
-All policy lives in [`src/prompts.ts`](src/prompts.ts). Edit that file and
-`/reload` to change behavior. Useful knobs:
+Most policy still lives in [`src/prompts.ts`](src/prompts.ts) (when to
+fan out, when to autoresearch, when to start Ralph, when to call
+`clk_done`). Edit that file and `/reload` to change behavior.
+
+Per-call parameters tune the in-code loops directly:
 
-- Add or remove standing rules.
-- Change consensus sample counts (default: 3–5).
-- Change Ralph soft cap (default: ~10 iterations per stretch).
-- Change completion criteria.
-- Change how the chief prefixes dynamic personas onto `delegate` tasks.
+- `clk_consensus({ samples: 5 })` — 5 parallel samples (1..6).
+- `clk_consensus({ minChars: 80 })` — stricter empty-flag threshold.
+- `clk_subagent_quality({ maxRetries: 2 })` — up to 3 total dispatches.
+- `clk_autoresearch({ iterations: 4 })` — 4 researcher+critic cycles.
+- `clk_ralph({ samples: 5 })` — 5-way consensus per Ralph iteration.
 
-The `clk_*` tools are intentionally minimal mechanics. Resist the urge to
-encode policy in them — Pi extensions get the most leverage when the LLM
-makes the decisions and the extension just provides primitives + persistence.
+The quality detector itself is configurable through
+`scoreResponse(text, opts)` from [`src/quality.ts`](src/quality.ts) —
+the same `ScoreOpts` shape is forwarded by every quality-gated tool.
 
 ## Error handling and resilience
 
-The extension is designed to survive transient provider problems without ending
-the run. Errors are classified into four categories, each with a defined
-recovery path:
+The extension is designed to survive transient provider problems
+without ending the run. Errors are classified into categories with a
+defined recovery path:
 
 | Category | Symptoms | Recovery |
 |----------|----------|----------|
-| **Rate limit** | HTTP 429, "too many requests", "quota exceeded" | Exponential backoff, retried indefinitely (delay capped at 5 minutes) until the run is aborted. The chief is also instructed to try a smaller / different model if the limit persists. |
+| **Rate limit** | HTTP 429, "too many requests", "quota exceeded" | Exponential backoff in `withRetry`, retried indefinitely (delay capped at 5 minutes) until the user aborts. The chief is also instructed to try a smaller / different model if the limit persists. |
 | **Model unavailable** | HTTP 404, "model not found", "not available on free tier" | No retry — the chief falls back to a built-in Pi agent (`worker`, `researcher`, `scout`, `oracle`) or omits `preferredModel` and lets Pi choose. |
 | **Privacy redaction** | `[REDACTED]` values, "privacy filter", "sensitive content blocked" | Tool params are checked for redaction markers before use; the tool returns a recovery hint asking the chief to retry without the sensitive field (or to write it to a file and pass the path). |
-| **Max turns exhausted** | "max turns reached", "turn limit", "turn cap", "no more turns" | The chief re-dispatches the identical `clk_subagent` call immediately without asking for confirmation. If the task exhausts turns twice in a row the chief splits it into two narrower sequential subtasks. |
+| **Max turns exhausted** | "max turns reached", "turn limit", "turn cap", "no more turns" | The chief re-dispatches the identical `clk_subagent` / `clk_subagent_quality` / `clk_consensus` call immediately. If the task exhausts turns twice in a row the chief splits it into two narrower sequential subtasks. |
 | **Network / transient** | ECONNRESET, ETIMEDOUT, "socket hang up" | Same backoff-and-retry as rate limits. |
+| **Quality-flagged output** | empty / malformed / contract-missing / low-confidence / NEEDS_REVIEW=true | `clk_subagent_quality` re-dispatches with a repair preamble up to `maxRetries`; `clk_consensus` picks the highest-scoring sample even if all are sub-threshold so the chief can see the spread and decide. Refusals are non-recoverable — surfaced to the chief instead of retried. |
 
 ### Where this is enforced
 
-- **`src/errors.ts`** — `classifyError` (now includes `max_turns`), `isRetryable`,
-  `looksRedacted`, `isMaxTurnsResult`, `withRetry` (exponential backoff helper),
-  and `recoveryHint` (human-readable guidance returned to the chief as tool output).
-- **`src/index.ts`** — `pi.sendUserMessage` (the call that hands off to the
-  chief) is wrapped with `withRetry`; abort-caused errors are distinguished
-  from real errors so the run lifecycle is handled correctly.
-- **`src/tools.ts`** — every `clk_*` tool `execute` function checks input
-  parameters for redaction before acting and returns a descriptive error result
-  (rather than throwing) when git operations fail, so the chief can decide how
-  to proceed.
-- **`src/prompts.ts`** — rule 8 (max-turns: re-dispatch immediately or split
-  the task) and rule 10 (other provider errors) in the chief's operator's manual
-  instruct it how to handle error results from `clk_subagent` calls (which happen
-  inside Pi's runtime and cannot be intercepted in TypeScript).
+- **`src/errors.ts`** — `classifyError`, `isRetryable`, `looksRedacted`,
+  `isMaxTurnsResult`, `withRetry` (exponential backoff), `recoveryHint`.
+- **`src/quality.ts`** — `scoreResponse`, `repairHint`, `isRecoverable`,
+  `summarise`.
+- **`src/consensus.ts`** — `dispatchWithQuality` (single + retry),
+  `runConsensus` (parallel fan-out + winner picking).
+- **`src/index.ts`** — `pi.sendUserMessage` (the call that hands off to
+  the chief) is wrapped with `withRetry`; abort-caused errors are
+  distinguished from real errors so the run lifecycle is handled
+  correctly.
+- **`src/tools.ts`** — every `clk_*` tool `execute` function checks
+  input parameters for redaction before acting and returns a
+  descriptive error result (rather than throwing) when git operations
+  fail, so the chief can decide how to proceed.
+- **`src/prompts.ts`** — the chief's operator's manual still instructs
+  how to react to error results from `clk_subagent` calls (Pi runtime
+  errors that cannot be intercepted in TypeScript).
 
 ### Design principle
 
-A single failed subagent call or tool invocation must never end the run. The
-extension recovers what it can in TypeScript, then surfaces a recovery hint to
-the chief so it can adapt its plan. Use `/clk-abort` when you intentionally
-want to stop.
+A single failed subagent call or tool invocation must never end the
+run. The extension recovers what it can in TypeScript, then surfaces a
+recovery hint to the chief so it can adapt its plan. Use `/clk-abort`
+when you intentionally want to stop.
 
 ## Limitations / gotchas
 
-- **Subagent depth is capped at one level.** Each spawned tmux pi session
-  receives a preamble instructing it not to spawn further subagents. The
-  chief (parent) may create grandchildren on a child's behalf — that is the
-  maximum nesting depth. No env var controls this; it is enforced by the
-  task preamble the `clk_subagent` tool prepends.
-- **Children should not use CLK tools.** Spawned tmux pi sessions may have
-  CLK loaded if you have it configured globally. The task preamble prepended
-  by `clk_subagent` explicitly instructs each child not to spawn further
-  subagents and not to call `clk_*` tools. This is prompt-level enforcement,
-  not a technical lock. The chief is the intended sole orchestrator — don't
-  try to delegate orchestration.
-- **Concurrency lock.** Only one `/clk` run can be active per Pi session.
-  Use `/clk-abort` first if you want to start over with a different idea.
+- **Subagent depth is capped at one level.** Each spawned tmux pi
+  session receives a preamble instructing it not to spawn further
+  subagents and not to call `clk_*` tools. The chief (parent) may
+  create grandchildren on a child's behalf — that is the maximum
+  nesting depth. This is prompt-level enforcement, not a technical
+  lock.
+- **Concurrency lock.** Only one `/clk` run can be active per Pi
+  session. Use `/clk-abort` first if you want to start over with a
+  different idea.
+- **Subagent timeout.** Each spawned tmux pi session has a 30-minute
+  hard cap (`SUBAGENT_TIMEOUT_MS` in `src/subagent.ts`). Long-running
+  experiments should be split into multiple bounded dispatches.
+- **Output cap.** Subagent output is truncated at 80,000 characters
+  before being returned to the chief; the first 2,000 characters of
+  the full output are kept in `.clk/logs/<session>.log` for
+  post-mortem.
+- **No web TUI.** Pi runs in your terminal; this extension inherits
+  that. The agent dashboard from the Python CLK is replaced by
+  status-line entries.
 - **`ctx.signal` is undefined when `/clk` fires** (the extension is
   invoked while Pi is idle), so the extension manages its own
-  `AbortController` and merges it with per-tool signals. Esc + `/clk-abort`
-  + session shutdown all wire through correctly.
-- **No web TUI.** Pi runs in your terminal; this extension inherits that.
-  The agent dashboard from the Python CLK is replaced by status-line
-  entries (`clk-roster`, `clk-head`, `clk-last`, `clk-run`, `clk-done`).
+  `AbortController` and merges it with per-tool signals. Esc +
+  `/clk-abort` + session shutdown all wire through correctly.
 
 ## Repository layout
 
@@ -297,20 +403,58 @@ pi-extension/
   package.json         # devDeps for editor type-checking; pi loads via jiti
   tsconfig.json
   src/
-    index.ts           # entry: factory, /clk + /clk-abort, session_start replay
+    index.ts           # entry: factory, /clk + /clk-abort + /clk-help +
+                       #   /clk-doctor + /clk-undo, session_start replay
     prompts.ts         # the chief's operator's manual (the policy)
-    tools.ts           # clk_cast, clk_progress, clk_checkpoint,
-                       # clk_branch, clk_revert, clk_merge, clk_done
-    subagent.ts        # subagent tool: spawns tmux pi sessions, polls for
-                       # completion, handles cancellation + progress updates
+    tools.ts           # every clk_* tool — clk_cast, clk_progress,
+                       #   clk_checkpoint, clk_branch, clk_revert,
+                       #   clk_merge, clk_consensus, clk_subagent_quality,
+                       #   clk_autoresearch, clk_ralph, clk_done
+    subagent.ts        # clk_subagent + spawnSubagent (tmux pi spawner)
+    consensus.ts       # dispatchWithQuality + runConsensus (parallel
+                       #   sample fan-out + quality re-dispatch loop)
+    quality.ts         # scoreResponse + repairHint (port of
+                       #   clk_harness/orchestration/response_quality.py)
+    git.ts             # init, checkpoint, branch, merge, revert,
+                       #   safety-net installer, hasRemote, commitsAhead,
+                       #   pushBestEffort (port of git_ops.py auto-push)
     state.ts           # .clk/state/* persistence + pi.appendEntry mirroring
-                       # (tracks idea, roster, progress, homeBranch)
-    git.ts             # checkpoint, revertTo, head, abortMerge helpers
+                       #   (idea, roster, progress, homeBranch)
     abort.ts           # run-scoped AbortController + /clk-abort + shutdown bridge
     errors.ts          # error classification, backoff retry, redaction detection
-    types.ts           # shared types
+    types.ts           # shared types (Roster, ProgressKind, ClkState)
+  tests/
+    errors.test.ts     # classifyError / withRetry / recoveryHint
+    prompts.test.ts    # chief primer includes every clk_* tool name
+    state.test.ts      # atomic writes + .bak rotation + round-trip
+    git.test.ts        # real git binary: init, checkpoint, branch, merge,
+                       #   revert, hasRemote, commitsAhead, pushBestEffort
+    quality.test.ts    # every flag + repairHint + isRecoverable
+    consensus.test.ts  # injected spawn: ok / retry / non-recoverable /
+                       #   fan-out winner picking / clamping / errors /
+                       #   maxParallel concurrency
+    safety_nets.test.ts # gitignore + pre-push hook idempotence
+    runtime_smoke.test.ts # real pi binary, when available (CI gates this)
+    index.test.ts      # extension factory wires every tool + command,
+                       #   firstLineShort handles multi-line ideas
 ```
 
+## Testing
+
+Inside `pi-extension/`:
+
+```bash
+npm ci                  # install dependencies
+npm run typecheck       # tsc --noEmit
+npm test                # 96 tests, ~2s, no network or pi required
+npm run test:strict     # typecheck + tests in one go
+```
+
+The full suite runs entirely offline (consensus tests inject a fake
+spawn function) so it's safe to run in CI without tmux or pi
+installed. The `runtime_smoke.test.ts` self-skips when the real `pi`
+binary isn't reachable.
+
 ## License
 
 MIT. See the parent repo for the full notice.