From 30cb0c8d93119c0865e96921d7cf828417280dfa Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Mon, 8 Jun 2026 16:31:36 -0600 Subject: [PATCH] feat(runtime): first-class router-tools executor backend (off-box tool use) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit createExecutor gains `backend: 'router-tools'` — a real agentic loop over the Tangle router's tool-calling, OFF-BOX (no sandbox, so unaffected by a box's egress allowlist, #984): each turn passes `tools`; the model's tool_calls run via the seam's `executeToolCall` on this host and fold back as `tool` messages, repeating until the model answers or `maxTurns` (one turn = one completion = the equal-compute unit). The multi-turn capability the single-shot routerInlineExecutor could not express; `executeToolCall` receives the task so per-task tool surfaces dispatch correctly. RouterToolsSeam + ToolSpec exported via /loops. Verified live: createExecutor({backend:'router-tools', tools:[get_weather], …}) called get_weather({city:Paris}), ran the host tool, and answered from the result in 2 turns. Any adapter that has a tool surface now gets a tool-using off-box worker for free. --- CLAUDE.md | 2 +- src/runtime/index.ts | 2 + src/runtime/supervise/runtime.ts | 166 +++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 4d7a287..74fcf6f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,7 +36,7 @@ Types that stay in THIS repo because they're runtime-shaped (coupled to a runnin ## Code map — the loop kernel & the recursive atom (src/runtime/) - `run-loop.ts` — `runLoop`, the round-synchronous leaf kernel. Per round: `driver.plan()`→N tasks→one sandbox/iteration (bounded by `maxConcurrency`, round-robin `agentRuns`)→`streamPrompt`→`output.parse`→`validator.validate`→`driver.decide`. Owns iteration accounting, concurrency, abort, cost+token aggregation, trace emission, box teardown. Exports `defaultSelectWinner` (best-valid-score, ties→earliest) — the single-sourced selection the personify combinators reuse. -- `supervise/` — the recursive execution atom (keystone): `Scope` + `Supervisor` over the open `Executor` port, spawn/settle on a **conserved budget pool** so equal-compute holds by construction; journal→replay/resume. `runtime.ts` also holds `createExecutor({backend})` — the ONE built-in executor (backend-as-data: `router`/`bridge`/`cli`/`sandbox`); the per-backend bodies are internal case-arms, BYO agents implement `Executor` directly. +- `supervise/` — the recursive execution atom (keystone): `Scope` + `Supervisor` over the open `Executor` port, spawn/settle on a **conserved budget pool** so equal-compute holds by construction; journal→replay/resume. `runtime.ts` also holds `createExecutor({backend})` — the ONE built-in executor (backend-as-data: `router`/`router-tools`/`bridge`/`cli`/`sandbox`; `router-tools` is the off-box tool-using agentic loop — chat→tool_calls→`executeToolCall`→repeat — over the router's tool-calling, no sandbox); the per-backend bodies are internal case-arms, BYO agents implement `Executor` directly. - `personify/` — the content-free generic combinators (`fanout`/`loopUntil`/`widen`/`panel`/`verify`/`pipeline`) + `definePersona`/`runPersonified` + the cross-run `Corpus` + `createScopeAnalyst` (the analyst-on-scope steer firewall). - `driver.ts` — `createDriver` (agent authors topology via a `TopologyPlanner`); `PlannerContext.analyses` is the analyst→driver wire (built + tested, but **not yet fed live** by any bench); `assertTraceDerivedFindings` is the steer-firewall (selector≠judge). `types.ts` holds `Driver`/`AgentRunSpec`/`OutputAdapter`/`Validator`/`Iteration`/`LoopResult`/`SandboxClient` + the `LoopTraceEvent` union. `sandbox-run.ts` is `openSandboxRun` — the one run/stream/resume sandbox seam; `inline-sandbox-client.ts` is `inlineSandboxClient` — the one adapter presenting any non-box `Executor` as a `SandboxClient` for `runLoop`. `loop-dispatch.ts` adapts `runLoop`→agent-eval campaigns; `report-usage.ts` forwards token usage so the integrity guard sees a real backend. diff --git a/src/runtime/index.ts b/src/runtime/index.ts index 00e7f3c..2e50fe7 100644 --- a/src/runtime/index.ts +++ b/src/runtime/index.ts @@ -191,7 +191,9 @@ export { createExecutorRegistry, type ExecutorConfig, type RouterSeam, + type RouterToolsSeam, type SandboxSeam, + type ToolSpec, } from './supervise/runtime' export { createScope, settledToIteration } from './supervise/scope' export { diff --git a/src/runtime/supervise/runtime.ts b/src/runtime/supervise/runtime.ts index ecce364..d7a7460 100644 --- a/src/runtime/supervise/runtime.ts +++ b/src/runtime/supervise/runtime.ts @@ -224,6 +224,169 @@ export const routerInlineExecutor: ExecutorFactory = (spec, ctx) => { } } +/** An OpenAI-shape function tool the model may call. */ +export interface ToolSpec { + type: 'function' + function: { name: string; description?: string; parameters: unknown } +} + +/** + * Router seam WITH tool use — the tool-using router backend. Same direct + * OpenAI-compatible endpoint as `RouterSeam`, but each turn passes `tools`; when + * the model emits tool_calls they run via `executeToolCall` ON THIS HOST and the + * results fold back as `tool` messages, repeating until the model answers without + * a tool or `maxTurns` is hit. A real agentic loop, OFF-BOX — no sandbox, so it + * is unaffected by a box's egress allowlist. One turn = one completion = the + * equal-compute unit. `executeToolCall` receives the task so per-task tool + * surfaces (e.g. a gym keyed by task) can dispatch correctly. + */ +export interface RouterToolsSeam { + routerBaseUrl: string + routerKey: string + model?: string + tools: ReadonlyArray + executeToolCall: (name: string, args: Record, task: unknown) => Promise + /** Max inference turns (default 4). */ + maxTurns?: number +} +const routerToolsSeamKey = 'router-tools' + +interface RouterToolsResponse { + choices?: Array<{ + message?: { + content?: string | null + tool_calls?: Array<{ id?: string; function?: { name?: string; arguments?: string } }> + } + }> + usage?: { prompt_tokens?: number; completion_tokens?: number } +} + +/** + * The tool-using router executor. Drives the multi-turn tool loop the single-shot + * `routerInlineExecutor` cannot express; same fail-loud + real-usage discipline. + */ +export const routerToolsInlineExecutor: ExecutorFactory = (spec, ctx) => { + const seam = readSeam(ctx, routerToolsSeamKey, 'router-tools') + const model = seam.model ?? spec.profile.model?.default + if (!model) { + throw new ValidationError( + 'routerToolsInlineExecutor: no model — set RouterToolsSeam.model or AgentProfile.model.default', + ) + } + if (!seam.routerBaseUrl || !seam.routerKey) { + throw new ValidationError( + 'routerToolsInlineExecutor: RouterToolsSeam.routerBaseUrl + routerKey required', + ) + } + const maxTurns = seam.maxTurns ?? 4 + + const controller = new AbortController() + const abortIfSignalled = () => { + if (ctx.signal.aborted) controller.abort() + } + abortIfSignalled() + if (!ctx.signal.aborted) ctx.signal.addEventListener('abort', abortIfSignalled, { once: true }) + + let artifact: ExecutorResult | undefined + + return { + runtime: 'router' as Runtime, + async execute(task, signal): Promise> { + const started = Date.now() + const linked = linkSignals(signal, controller.signal) + const messages: Array> = [ + ...(taskToMessages(task, spec) as Array>), + ] + const tokens = zeroTokenUsage() + let turns = 0 + let lastText = '' + + for (let t = 0; t < maxTurns; t += 1) { + turns += 1 + const res = await fetch(`${seam.routerBaseUrl.replace(/\/$/, '')}/chat/completions`, { + method: 'POST', + headers: { + 'content-type': 'application/json', + authorization: `Bearer ${seam.routerKey}`, + }, + body: JSON.stringify({ + model, + messages, + tools: seam.tools, + tool_choice: 'auto', + temperature: 0.2, + }), + ...(linked ? { signal: linked } : {}), + }) + if (!res.ok) { + throw new ValidationError( + `routerToolsInlineExecutor: router ${res.status}: ${(await res.text()).slice(0, 200)}`, + ) + } + const data = (await res.json()) as RouterToolsResponse + const u = data.usage + if (u && typeof u.prompt_tokens === 'number' && typeof u.completion_tokens === 'number') { + tokens.input += u.prompt_tokens + tokens.output += u.completion_tokens + } + const msg = data.choices?.[0]?.message + if (msg?.content) lastText = msg.content + const toolCalls = msg?.tool_calls ?? [] + if (toolCalls.length === 0) break // the model answered — loop done + + // Record the assistant turn verbatim, then run each call on the host and + // fold the result back as a `tool` message for the next turn. + messages.push({ + role: 'assistant', + content: msg?.content ?? '', + tool_calls: toolCalls.map((tc, i) => ({ + id: tc.id ?? `call_${i}`, + type: 'function', + function: { name: tc.function?.name ?? '', arguments: tc.function?.arguments ?? '{}' }, + })), + }) + for (let i = 0; i < toolCalls.length; i += 1) { + const tc = toolCalls[i] + const id = tc?.id ?? `call_${i}` + let args: Record = {} + try { + args = JSON.parse(tc?.function?.arguments ?? '{}') as Record + } catch { + // Malformed args are a real outcome, not an infra fault — feed the error + // back so the model can correct, rather than aborting the whole loop. + messages.push({ + role: 'tool', + tool_call_id: id, + content: 'error: tool arguments were not valid JSON', + }) + continue + } + const result = await seam.executeToolCall(tc?.function?.name ?? '', args, task) + messages.push({ role: 'tool', tool_call_id: id, content: result }) + } + } + + const usd = isModelPriced(model) ? estimateCost(tokens.input, tokens.output, model) : 0 + const spent: Spend = { iterations: turns, tokens, usd, ms: Date.now() - started } + const out = { content: lastText } as unknown + artifact = { outRef: contentRef('router-tools', { model, content: lastText }), out, spent } + return artifact + }, + teardown(_grace): Promise<{ destroyed: boolean }> { + controller.abort() + return Promise.resolve({ destroyed: true }) + }, + resultArtifact() { + if (!artifact) { + throw new ValidationError( + 'routerToolsInlineExecutor: resultArtifact() read before execute()', + ) + } + return { ...artifact, spent: artifact.spent } + }, + } +} + // ── sandbox executor (harness is a BackendType) ──────────────────────────────── /** @@ -624,6 +787,7 @@ export const bridgeExecutor: ExecutorFactory = (spec, ctx) => { */ export type ExecutorConfig = | ({ backend: 'router' } & RouterSeam) + | ({ backend: 'router-tools' } & RouterToolsSeam) | ({ backend: 'bridge' } & BridgeSeam) | ({ backend: 'cli' } & CliSeam) | ({ backend: 'sandbox'; harness?: BackendType } & SandboxSeam) @@ -635,6 +799,8 @@ export function createExecutor(config: ExecutorConfig): ExecutorFactory switch (config.backend) { case 'router': return routerInlineExecutor(spec, seamed) + case 'router-tools': + return routerToolsInlineExecutor(spec, seamed) case 'bridge': return bridgeExecutor(spec, seamed) case 'cli':