From 30cb0c8d93119c0865e96921d7cf828417280dfa Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Mon, 8 Jun 2026 16:31:36 -0600
Subject: [PATCH] feat(runtime): first-class router-tools executor backend
 (off-box tool use)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

createExecutor gains `backend: 'router-tools'` — a real agentic loop over the
Tangle router's tool-calling, OFF-BOX (no sandbox, so unaffected by a box's
egress allowlist, #984): each turn passes `tools`; the model's tool_calls run via
the seam's `executeToolCall` on this host and fold back as `tool` messages,
repeating until the model answers or `maxTurns` (one turn = one completion = the
equal-compute unit). The multi-turn capability the single-shot routerInlineExecutor
could not express; `executeToolCall` receives the task so per-task tool surfaces
dispatch correctly. RouterToolsSeam + ToolSpec exported via /loops.

Verified live: createExecutor({backend:'router-tools', tools:[get_weather], …})
called get_weather({city:Paris}), ran the host tool, and answered from the result
in 2 turns. Any adapter that has a tool surface now gets a tool-using off-box
worker for free.
---
 CLAUDE.md                        |   2 +-
 src/runtime/index.ts             |   2 +
 src/runtime/supervise/runtime.ts | 166 +++++++++++++++++++++++++++++++
 3 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 4d7a287..74fcf6f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -36,7 +36,7 @@ Types that stay in THIS repo because they're runtime-shaped (coupled to a runnin
 ## Code map — the loop kernel & the recursive atom (src/runtime/)
 
 - `run-loop.ts` — `runLoop`, the round-synchronous leaf kernel. Per round: `driver.plan()`→N tasks→one sandbox/iteration (bounded by `maxConcurrency`, round-robin `agentRuns`)→`streamPrompt`→`output.parse`→`validator.validate`→`driver.decide`. Owns iteration accounting, concurrency, abort, cost+token aggregation, trace emission, box teardown. Exports `defaultSelectWinner` (best-valid-score, ties→earliest) — the single-sourced selection the personify combinators reuse.
-- `supervise/` — the recursive execution atom (keystone): `Scope` + `Supervisor` over the open `Executor` port, spawn/settle on a **conserved budget pool** so equal-compute holds by construction; journal→replay/resume. `runtime.ts` also holds `createExecutor({backend})` — the ONE built-in executor (backend-as-data: `router`/`bridge`/`cli`/`sandbox`); the per-backend bodies are internal case-arms, BYO agents implement `Executor` directly.
+- `supervise/` — the recursive execution atom (keystone): `Scope` + `Supervisor` over the open `Executor` port, spawn/settle on a **conserved budget pool** so equal-compute holds by construction; journal→replay/resume. `runtime.ts` also holds `createExecutor({backend})` — the ONE built-in executor (backend-as-data: `router`/`router-tools`/`bridge`/`cli`/`sandbox`; `router-tools` is the off-box tool-using agentic loop — chat→tool_calls→`executeToolCall`→repeat — over the router's tool-calling, no sandbox); the per-backend bodies are internal case-arms, BYO agents implement `Executor` directly.
 - `personify/` — the content-free generic combinators (`fanout`/`loopUntil`/`widen`/`panel`/`verify`/`pipeline`) + `definePersona`/`runPersonified` + the cross-run `Corpus` + `createScopeAnalyst` (the analyst-on-scope steer firewall).
 - `driver.ts` — `createDriver` (agent authors topology via a `TopologyPlanner`); `PlannerContext.analyses` is the analyst→driver wire (built + tested, but **not yet fed live** by any bench); `assertTraceDerivedFindings` is the steer-firewall (selector≠judge). `types.ts` holds `Driver`/`AgentRunSpec`/`OutputAdapter`/`Validator`/`Iteration`/`LoopResult`/`SandboxClient` + the `LoopTraceEvent` union. `sandbox-run.ts` is `openSandboxRun` — the one run/stream/resume sandbox seam; `inline-sandbox-client.ts` is `inlineSandboxClient` — the one adapter presenting any non-box `Executor` as a `SandboxClient` for `runLoop`. `loop-dispatch.ts` adapts `runLoop`→agent-eval campaigns; `report-usage.ts` forwards token usage so the integrity guard sees a real backend.
 
diff --git a/src/runtime/index.ts b/src/runtime/index.ts
index 00e7f3c..2e50fe7 100644
--- a/src/runtime/index.ts
+++ b/src/runtime/index.ts
@@ -191,7 +191,9 @@ export {
   createExecutorRegistry,
   type ExecutorConfig,
   type RouterSeam,
+  type RouterToolsSeam,
   type SandboxSeam,
+  type ToolSpec,
 } from './supervise/runtime'
 export { createScope, settledToIteration } from './supervise/scope'
 export {
diff --git a/src/runtime/supervise/runtime.ts b/src/runtime/supervise/runtime.ts
index ecce364..d7a7460 100644
--- a/src/runtime/supervise/runtime.ts
+++ b/src/runtime/supervise/runtime.ts
@@ -224,6 +224,169 @@ export const routerInlineExecutor: ExecutorFactory<unknown> = (spec, ctx) => {
   }
 }
 
+/** An OpenAI-shape function tool the model may call. */
+export interface ToolSpec {
+  type: 'function'
+  function: { name: string; description?: string; parameters: unknown }
+}
+
+/**
+ * Router seam WITH tool use — the tool-using router backend. Same direct
+ * OpenAI-compatible endpoint as `RouterSeam`, but each turn passes `tools`; when
+ * the model emits tool_calls they run via `executeToolCall` ON THIS HOST and the
+ * results fold back as `tool` messages, repeating until the model answers without
+ * a tool or `maxTurns` is hit. A real agentic loop, OFF-BOX — no sandbox, so it
+ * is unaffected by a box's egress allowlist. One turn = one completion = the
+ * equal-compute unit. `executeToolCall` receives the task so per-task tool
+ * surfaces (e.g. a gym keyed by task) can dispatch correctly.
+ */
+export interface RouterToolsSeam {
+  routerBaseUrl: string
+  routerKey: string
+  model?: string
+  tools: ReadonlyArray<ToolSpec>
+  executeToolCall: (name: string, args: Record<string, unknown>, task: unknown) => Promise<string>
+  /** Max inference turns (default 4). */
+  maxTurns?: number
+}
+const routerToolsSeamKey = 'router-tools'
+
+interface RouterToolsResponse {
+  choices?: Array<{
+    message?: {
+      content?: string | null
+      tool_calls?: Array<{ id?: string; function?: { name?: string; arguments?: string } }>
+    }
+  }>
+  usage?: { prompt_tokens?: number; completion_tokens?: number }
+}
+
+/**
+ * The tool-using router executor. Drives the multi-turn tool loop the single-shot
+ * `routerInlineExecutor` cannot express; same fail-loud + real-usage discipline.
+ */
+export const routerToolsInlineExecutor: ExecutorFactory<unknown> = (spec, ctx) => {
+  const seam = readSeam<RouterToolsSeam>(ctx, routerToolsSeamKey, 'router-tools')
+  const model = seam.model ?? spec.profile.model?.default
+  if (!model) {
+    throw new ValidationError(
+      'routerToolsInlineExecutor: no model — set RouterToolsSeam.model or AgentProfile.model.default',
+    )
+  }
+  if (!seam.routerBaseUrl || !seam.routerKey) {
+    throw new ValidationError(
+      'routerToolsInlineExecutor: RouterToolsSeam.routerBaseUrl + routerKey required',
+    )
+  }
+  const maxTurns = seam.maxTurns ?? 4
+
+  const controller = new AbortController()
+  const abortIfSignalled = () => {
+    if (ctx.signal.aborted) controller.abort()
+  }
+  abortIfSignalled()
+  if (!ctx.signal.aborted) ctx.signal.addEventListener('abort', abortIfSignalled, { once: true })
+
+  let artifact: ExecutorResult<unknown> | undefined
+
+  return {
+    runtime: 'router' as Runtime,
+    async execute(task, signal): Promise<ExecutorResult<unknown>> {
+      const started = Date.now()
+      const linked = linkSignals(signal, controller.signal)
+      const messages: Array<Record<string, unknown>> = [
+        ...(taskToMessages(task, spec) as Array<Record<string, unknown>>),
+      ]
+      const tokens = zeroTokenUsage()
+      let turns = 0
+      let lastText = ''
+
+      for (let t = 0; t < maxTurns; t += 1) {
+        turns += 1
+        const res = await fetch(`${seam.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, {
+          method: 'POST',
+          headers: {
+            'content-type': 'application/json',
+            authorization: `Bearer ${seam.routerKey}`,
+          },
+          body: JSON.stringify({
+            model,
+            messages,
+            tools: seam.tools,
+            tool_choice: 'auto',
+            temperature: 0.2,
+          }),
+          ...(linked ? { signal: linked } : {}),
+        })
+        if (!res.ok) {
+          throw new ValidationError(
+            `routerToolsInlineExecutor: router ${res.status}: ${(await res.text()).slice(0, 200)}`,
+          )
+        }
+        const data = (await res.json()) as RouterToolsResponse
+        const u = data.usage
+        if (u && typeof u.prompt_tokens === 'number' && typeof u.completion_tokens === 'number') {
+          tokens.input += u.prompt_tokens
+          tokens.output += u.completion_tokens
+        }
+        const msg = data.choices?.[0]?.message
+        if (msg?.content) lastText = msg.content
+        const toolCalls = msg?.tool_calls ?? []
+        if (toolCalls.length === 0) break // the model answered — loop done
+
+        // Record the assistant turn verbatim, then run each call on the host and
+        // fold the result back as a `tool` message for the next turn.
+        messages.push({
+          role: 'assistant',
+          content: msg?.content ?? '',
+          tool_calls: toolCalls.map((tc, i) => ({
+            id: tc.id ?? `call_${i}`,
+            type: 'function',
+            function: { name: tc.function?.name ?? '', arguments: tc.function?.arguments ?? '{}' },
+          })),
+        })
+        for (let i = 0; i < toolCalls.length; i += 1) {
+          const tc = toolCalls[i]
+          const id = tc?.id ?? `call_${i}`
+          let args: Record<string, unknown> = {}
+          try {
+            args = JSON.parse(tc?.function?.arguments ?? '{}') as Record<string, unknown>
+          } catch {
+            // Malformed args are a real outcome, not an infra fault — feed the error
+            // back so the model can correct, rather than aborting the whole loop.
+            messages.push({
+              role: 'tool',
+              tool_call_id: id,
+              content: 'error: tool arguments were not valid JSON',
+            })
+            continue
+          }
+          const result = await seam.executeToolCall(tc?.function?.name ?? '', args, task)
+          messages.push({ role: 'tool', tool_call_id: id, content: result })
+        }
+      }
+
+      const usd = isModelPriced(model) ? estimateCost(tokens.input, tokens.output, model) : 0
+      const spent: Spend = { iterations: turns, tokens, usd, ms: Date.now() - started }
+      const out = { content: lastText } as unknown
+      artifact = { outRef: contentRef('router-tools', { model, content: lastText }), out, spent }
+      return artifact
+    },
+    teardown(_grace): Promise<{ destroyed: boolean }> {
+      controller.abort()
+      return Promise.resolve({ destroyed: true })
+    },
+    resultArtifact() {
+      if (!artifact) {
+        throw new ValidationError(
+          'routerToolsInlineExecutor: resultArtifact() read before execute()',
+        )
+      }
+      return { ...artifact, spent: artifact.spent }
+    },
+  }
+}
+
 // ── sandbox executor (harness is a BackendType) ────────────────────────────────
 
 /**
@@ -624,6 +787,7 @@ export const bridgeExecutor: ExecutorFactory<unknown> = (spec, ctx) => {
  */
 export type ExecutorConfig =
   | ({ backend: 'router' } & RouterSeam)
+  | ({ backend: 'router-tools' } & RouterToolsSeam)
   | ({ backend: 'bridge' } & BridgeSeam)
   | ({ backend: 'cli' } & CliSeam)
   | ({ backend: 'sandbox'; harness?: BackendType } & SandboxSeam)
@@ -635,6 +799,8 @@ export function createExecutor(config: ExecutorConfig): ExecutorFactory<unknown>
     switch (config.backend) {
       case 'router':
         return routerInlineExecutor(spec, seamed)
+      case 'router-tools':
+        return routerToolsInlineExecutor(spec, seamed)
       case 'bridge':
         return bridgeExecutor(spec, seamed)
       case 'cli':