diff --git a/apps/planning-demo/index.html b/apps/planning-demo/index.html new file mode 100644 index 0000000..d78f9b8 --- /dev/null +++ b/apps/planning-demo/index.html @@ -0,0 +1,107 @@ + + + + + + Forgewisp Planning Agent + + + +
+ + +
+
+
+ + + +
+
+
+
+ + + + + + + + diff --git a/apps/planning-demo/package.json b/apps/planning-demo/package.json new file mode 100644 index 0000000..357c273 --- /dev/null +++ b/apps/planning-demo/package.json @@ -0,0 +1,28 @@ +{ + "name": "forgewisp-planning-demo", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview", + "typecheck": "tsc --noEmit", + "lint": "eslint --config ../../eslint.config.mjs src tests", + "test": "vitest run", + "test:watch": "vitest" + }, + "dependencies": { + "@forgewisp/core": "workspace:*", + "@forgewisp/bundled-tools": "workspace:*", + "dompurify": "^3.2.0", + "marked": "^18.0.5" + }, + "devDependencies": { + "@testing-library/dom": "^10.0.0", + "jsdom": "^25.0.0", + "typescript": "^5.4.0", + "vite": "^5.4.0", + "vitest": "^1.6.0" + } +} diff --git a/apps/planning-demo/src/main.ts b/apps/planning-demo/src/main.ts new file mode 100644 index 0000000..90d8b67 --- /dev/null +++ b/apps/planning-demo/src/main.ts @@ -0,0 +1,546 @@ +import { createAgent, defineToolSet } from '@forgewisp/core'; +import type { + AgentResult, + AuditEvent, + ChatMessage, + PendingCall, + ForgewispConfig, +} from '@forgewisp/core'; +import { + getCurrentTime, + generateUuid, + downloadFile, + PLANNING_TOOLS, +} from '@forgewisp/bundled-tools'; +import { + renderArgsHtml, + renderArtifact, + renderAuditDetail, + renderMarkdown, + renderToolsList, + escapeHtml, +} from './render.js'; +import { PlanBoard } from './plan-board.js'; + +// ─── Sanitization note ──────────────────────────────────────────────────────── +// Every sink that turns model- or user-adjacent text into HTML goes through +// DOMPurify (via the helpers in render.ts). The model can return arbitrary +// markdown (including raw HTML); without sanitization, +// `` would execute in the page. + +// ─── Cached DOM refs ────────────────────────────────────────────────────────── + +interface Elements { + toolsList: HTMLDivElement; + artifactsList: HTMLUListElement; + clearArtifactsBtn: HTMLButtonElement; + chatMessages: HTMLDivElement; + chatForm: HTMLFormElement; + chatInput: HTMLInputElement; + sendButton: HTMLButtonElement; + examplePrompts: HTMLDivElement; + reasoningSection: HTMLElement; + reasoningOutput: HTMLDivElement; + auditLog: HTMLUListElement; + clearAuditBtn: HTMLButtonElement; + configOverlay: HTMLDivElement; + configForm: HTMLFormElement; + configEndpoint: HTMLInputElement; + configModel: HTMLInputElement; + configApikey: HTMLInputElement; + confirmOverlay: HTMLDivElement; + confirmTitle: HTMLHeadingElement; + confirmDescription: HTMLParagraphElement; + confirmArgs: HTMLDivElement; + confirmAccept: HTMLButtonElement; + confirmReject: HTMLButtonElement; +} + +function getEl(id: string): T { + const el = document.getElementById(id); + if (!el) throw new Error(`[planning-demo] Missing element #${id}`); + return el as T; +} + +const els: Elements = { + toolsList: getEl('tools-list'), + artifactsList: getEl('artifacts-list'), + clearArtifactsBtn: getEl('clear-artifacts-btn'), + chatMessages: getEl('chat-messages'), + chatForm: getEl('chat-form'), + chatInput: getEl('chat-input'), + sendButton: getEl('chat-form').querySelector( + 'button[type="submit"]', + ) as HTMLButtonElement, + examplePrompts: getEl('example-prompts'), + reasoningSection: getEl('reasoning-section'), + reasoningOutput: getEl('reasoning-output'), + auditLog: getEl('audit-log'), + clearAuditBtn: getEl('clear-audit-btn'), + configOverlay: getEl('config-overlay'), + configForm: getEl('config-form'), + configEndpoint: getEl('config-endpoint'), + configModel: getEl('config-model'), + configApikey: getEl('config-apikey'), + confirmOverlay: getEl('confirm-overlay'), + confirmTitle: getEl('confirm-title'), + confirmDescription: getEl('confirm-description'), + confirmArgs: getEl('confirm-args'), + confirmAccept: getEl('confirm-accept'), + confirmReject: getEl('confirm-reject'), +}; + +// Derived, in-place view of the plans the agent tracks. Fed by audit events +// (see plan-board.ts); the agent owns the authoritative state in localStorage. +const board = new PlanBoard(els.artifactsList); + +// ─── Streaming output helpers ───────────────────────────────────────────────── + +function getOrCreateStreamingMessage(): HTMLDivElement { + let el = document.getElementById('streaming-message') as HTMLDivElement | null; + if (!el) { + // The first text token swaps the "Thinking…" placeholder out for the real + // streaming bubble. + removeThinkingPlaceholder(); + el = document.createElement('div'); + el.id = 'streaming-message'; + el.className = 'message message-assistant streaming'; + els.chatMessages.appendChild(el); + } + return el; +} + +// "Thinking…" placeholder shown in the chat area between submit and the first +// streamed text token. Lives only for the current turn. +let currentTurnThinkingEl: HTMLDivElement | null = null; + +function showThinkingPlaceholder(): void { + removeThinkingPlaceholder(); + const el = document.createElement('div'); + el.className = 'message message-assistant thinking-indicator'; + el.setAttribute('aria-label', 'Thinking'); + el.appendChild(document.createElement('span')).className = 'dot'; + el.appendChild(document.createElement('span')).className = 'dot'; + el.appendChild(document.createElement('span')).className = 'dot'; + els.chatMessages.appendChild(el); + currentTurnThinkingEl = el; +} + +function removeThinkingPlaceholder(): void { + if (currentTurnThinkingEl) { + currentTurnThinkingEl.remove(); + currentTurnThinkingEl = null; + } +} + +function finalizeStreamingMessage(): HTMLDivElement | null { + const el = document.getElementById('streaming-message') as HTMLDivElement | null; + if (el) { + el.id = ''; + el.classList.remove('streaming'); + } + return el; +} + +// ─── Confirmation dialog ────────────────────────────────────────────────────── + +// The core executor calls onConfirmRequired once per write/destructive tool +// call, concurrently (Promise.allSettled over every call in a round). The UI +// can only show one modal at a time, so we serialize the prompts with a FIFO +// queue: each enqueued call resolves its own promise when the user answers its +// dialog, then the next queued call is shown. This keeps the core's per-call +// contract intact while ensuring no confirmation is silently auto-rejected. +interface QueuedConfirm { + pendingCall: PendingCall; + resolve: (result: boolean) => void; +} + +const confirmQueue: QueuedConfirm[] = []; +let activeConfirm: QueuedConfirm | null = null; + +function showConfirmDialog(pendingCall: PendingCall): Promise { + return new Promise((resolve) => { + confirmQueue.push({ pendingCall, resolve }); + processNextConfirm(); + }); +} + +function processNextConfirm(): void { + if (activeConfirm) return; // a dialog is already open; it'll drain the queue + const next = confirmQueue.shift(); + if (!next) return; + activeConfirm = next; + renderConfirmDialog(next.pendingCall, (result) => { + activeConfirm = null; + next.resolve(result); + processNextConfirm(); + }); +} + +function renderConfirmDialog(pendingCall: PendingCall, done: (result: boolean) => void): void { + els.confirmTitle.textContent = + pendingCall.riskTier === 'destructive' ? '⚠️ Destructive Action' : 'Action Required'; + els.confirmDescription.textContent = `Function: ${pendingCall.functionName}`; + // Args came through AJV validation, but escape defensively — never raw. + els.confirmArgs.innerHTML = renderArgsHtml(pendingCall.args); + + els.confirmOverlay.classList.remove('hidden'); + els.confirmAccept.focus(); + + const previouslyFocused = document.activeElement as HTMLElement | null; + + const cleanup = (result: boolean): void => { + els.confirmOverlay.classList.add('hidden'); + els.confirmAccept.removeEventListener('click', onAccept); + els.confirmReject.removeEventListener('click', onReject); + document.removeEventListener('keydown', onKeydown); + previouslyFocused?.focus?.(); + done(result); + }; + const onAccept = (): void => cleanup(true); + const onReject = (): void => cleanup(false); + const onKeydown = (e: KeyboardEvent): void => { + if (e.key === 'Escape') { + e.preventDefault(); + cleanup(false); + } + const target = e.target as Element | null; + if (e.key === 'Enter' && !(target instanceof HTMLTextAreaElement)) { + e.preventDefault(); + cleanup(true); + } + }; + els.confirmAccept.addEventListener('click', onAccept); + els.confirmReject.addEventListener('click', onReject); + document.addEventListener('keydown', onKeydown); +} + +// ─── Audit log + artifacts ──────────────────────────────────────────────────── + +const EVENT_LABELS: Record = { + function_requested: 'requested', + validation_passed: 'validation passed', + validation_failed: 'validation failed', + confirmation_requested: 'confirm?', + confirmation_accepted: 'confirmed', + confirmation_rejected: 'rejected', + function_executed: 'executed', + function_errored: 'errored', + audit_callback_errored: 'audit callback errored', + max_tool_rounds_reached: 'max rounds', + stream_malformed: 'stream malformed', +}; + +function appendAuditEntry(event: AuditEvent): void { + const li = document.createElement('li'); + li.className = `audit-event audit-${event.type.replace(/_/g, '-')}`; + const label = EVENT_LABELS[event.type] ?? event.type; + li.innerHTML = + `${escapeHtml(event.functionName)}` + + `${escapeHtml(label)}` + + `${renderAuditDetail(event)}`; + els.auditLog.prepend(li); +} + +function onAuditEvent(event: AuditEvent): void { + appendAuditEntry(event); + // Plan `function_executed` events update the live, in-place plan cards. + board.applyEvent(event); + // `function_errored` (and only it) renders an append-only error card. + const errHtml = renderArtifact(event); + if (errHtml) { + const li = document.createElement('li'); + li.className = 'artifact artifact-error'; + li.innerHTML = errHtml; + els.artifactsList.prepend(li); + } +} + +function clearAuditUI(): void { + els.auditLog.innerHTML = ''; +} + +els.clearAuditBtn.addEventListener('click', () => { + if (!agent) return; + agent.clearAuditLog(); + clearAuditUI(); +}); + +els.clearArtifactsBtn.addEventListener('click', () => { + board.clear(); +}); + +// ─── Chat UI ────────────────────────────────────────────────────────────────── + +function appendUserMessage(text: string): void { + const el = document.createElement('div'); + el.className = 'message message-user'; + el.textContent = text; + els.chatMessages.appendChild(el); +} + +function appendAssistantMessage(text: string): void { + const el = document.createElement('div'); + el.className = 'message message-assistant'; + el.innerHTML = renderMarkdown(text); + els.chatMessages.appendChild(el); +} + +// ─── Agent setup ────────────────────────────────────────────────────────────── + +type Agent = ReturnType; +let agent: Agent | null = null; +let inFlightController: AbortController | null = null; + +// Conversation history threaded back into agent.run so the model sees prior +// user/assistant turns. Cleared whenever the agent is rebuilt (new config). +const conversation: ChatMessage[] = []; + +interface AgentConfig { + endpoint: string; + apiKey: string; + model: string; +} + +function buildAgent(cfg: AgentConfig): void { + // Abort any in-flight run from the previous agent before swapping it out. + if (inFlightController) { + inFlightController.abort(); + inFlightController = null; + } + + // A new agent means a fresh conversation — don't leak prior turns (which may + // have been produced by a different model/endpoint) into the new session. + conversation.length = 0; + // The live plan cards are a derived view of the prior agent's session; a new + // agent rehydrates them from listPlans/getPlan as it resumes, so drop stale ones. + board.clear(); + + const config: ForgewispConfig = { + llmEndpoint: cfg.endpoint, + apiKey: cfg.apiKey || undefined, + model: cfg.model, + systemPrompt: + 'You are a planning agent that breaks large requests into concrete steps and tracks them ' + + 'to completion using plan tools. For any request with 2+ steps, call createPlan up front ' + + 'with a short title and the 3-8 steps you foresee — one item per distinct step, merging ' + + 'trivial substeps. Keep one active plan per task; do not create a second plan for the same ' + + 'task. Work the plan in order: set the item to "in_progress" via updatePlanItem when you ' + + 'start it and "done" when you complete it, adding a short notes line about what you found ' + + 'or decided, and prefer one item in_progress at a time. If scope changes, re-plan with ' + + 'addPlanItem/removePlanItem rather than starting over. Call getPlan to re-read the full ' + + 'plan before editing if unsure of current state, and listPlans at the start of a turn to ' + + 'resume an in-progress plan. Prefer removePlanItem over deleting a whole plan; once every ' + + 'item is "done" and the task is complete, call deletePlan to tear down the finished plan ' + + 'before giving your final summary. Use getCurrentTime when scheduling or deadlines matter. ' + + 'Narrate briefly in chat as you complete each step, and give a one-line summary when the ' + + 'plan is done.', + // Planning turns fan out across many tool calls (createPlan + per-item + // in_progress/done updates + listPlans/getPlan reads), so lift the cap well + // above the default 10 — an 8-step plan already needs ~20 rounds. + maxToolRounds: 40, + // All registered tools are read-tier, so onConfirmRequired is never invoked. The + // confirm wiring is retained as a reference for consumers who later add write/destructive tools. + onConfirmRequired: showConfirmDialog, + onAuditEvent, + streaming: { + reasoning: { mode: 'native' }, + onTextChunk: (chunk: string) => { + const acc = currentTurnStreamingText; + if (acc === null) return; + acc.text += chunk; + const el = getOrCreateStreamingMessage(); + el.innerHTML = renderMarkdown(acc.text); + }, + onReasoningChunk: (chunk: string) => { + els.reasoningSection.classList.remove('hidden'); + els.reasoningOutput.textContent += chunk; + }, + }, + }; + + agent = createAgent(config); + registerTools(agent); +} + +interface TurnStreamingState { + text: string; +} +let currentTurnStreamingText: TurnStreamingState | null = null; + +// ─── Tool registration ──────────────────────────────────────────────────────── + +// Extras registered alongside the planning set: getCurrentTime/generateUuid for +// general use, downloadFile (write-tier) as the example task's final step — +// triggers the confirm flow rendered from schema-validated args. Grouped as a +// ToolSet so registration is a single call and the heterogeneous-args tuple +// needs no `as unknown as` cast (defineToolSet erases via FunctionDefinition). +const EXTRA_TOOLS = defineToolSet({ + name: 'planning-extras', + description: 'Time/UUID helpers plus file download for the example task.', + tools: [getCurrentTime, generateUuid, downloadFile], +}); + +// The full toolkit surfaced in the sidebar: the planning set plus the extras. +const SIDEBAR_TOOLS = [...PLANNING_TOOLS.tools, ...EXTRA_TOOLS.tools]; + +function registerTools(a: Agent): void { + // Register the 7 plan-management tools plus the extras, each set in one call. + a.registerToolSet(PLANNING_TOOLS); + a.registerToolSet(EXTRA_TOOLS); +} + +// ─── Config overlay ─────────────────────────────────────────────────────────── + +function showConfigForm(): void { + els.configOverlay.classList.remove('hidden'); + els.configEndpoint.focus(); +} + +function hideConfigForm(): void { + els.configOverlay.classList.add('hidden'); +} + +// ─── Chat form ──────────────────────────────────────────────────────────────── + +function setFormDisabled(disabled: boolean): void { + els.chatInput.disabled = disabled; + els.sendButton.disabled = disabled; + els.examplePrompts.querySelectorAll('button.example-prompt').forEach((b) => { + b.disabled = disabled; + }); +} + +async function handleChatSubmit(e: SubmitEvent): Promise { + e.preventDefault(); + const text = els.chatInput.value.trim(); + if (!text || !agent) return; + if (inFlightController) return; // race guard — already a run in flight + + appendUserMessage(text); + els.chatInput.value = ''; + setFormDisabled(true); + showThinkingPlaceholder(); + + // Per-turn streaming buffer; not module-level, so concurrent turns can't + // cross-pollinate (and rebuilds don't inherit stale state). + currentTurnStreamingText = { text: '' }; + els.reasoningOutput.textContent = ''; + els.reasoningSection.classList.add('hidden'); + + const controller = new AbortController(); + inFlightController = controller; + + try { + // `history` is the prior turns only — the current `text` is passed as the + // userMessage arg. We append both turns to `conversation` only after the + // run succeeds, so a failed/aborted exchange never pollutes future history. + const result: AgentResult = await agent.run(text, { + signal: controller.signal, + history: conversation, + }); + const streamingEl = finalizeStreamingMessage(); + if (result.response) { + conversation.push({ role: 'user', content: text }); + conversation.push({ role: 'assistant', content: result.response }); + } + // If the response was already rendered via streaming chunks, don't duplicate it. + if (result.response && !streamingEl) { + appendAssistantMessage(result.response); + } + } catch (err) { + finalizeStreamingMessage(); + const msg = err instanceof Error ? err.message : String(err); + appendAssistantMessage(`[error] ${msg}`); + } finally { + inFlightController = null; + currentTurnStreamingText = null; + removeThinkingPlaceholder(); + setFormDisabled(false); + els.chatInput.focus(); + } +} + +els.chatForm.addEventListener('submit', (e: SubmitEvent) => void handleChatSubmit(e)); + +// ─── Config form handler ────────────────────────────────────────────────────── + +els.configForm.addEventListener('submit', (e: SubmitEvent) => { + e.preventDefault(); + const cfg: AgentConfig = { + endpoint: els.configEndpoint.value.trim(), + model: els.configModel.value.trim(), + apiKey: els.configApikey.value.trim(), + }; + if (!cfg.endpoint || !cfg.model) return; + localStorage.setItem('forgewisp.planning-demo.config', JSON.stringify(cfg)); + buildAgent(cfg); + hideConfigForm(); +}); + +// ─── Safe localStorage config ───────────────────────────────────────────────── + +function isAgentConfig(v: unknown): v is AgentConfig { + if (typeof v !== 'object' || v === null) return false; + const o = v as Record; + return ( + typeof o.endpoint === 'string' && + typeof o.model === 'string' && + (o.apiKey === undefined || typeof o.apiKey === 'string') + ); +} + +function loadStoredConfig(): AgentConfig | null { + const stored = localStorage.getItem('forgewisp.planning-demo.config'); + if (!stored) return null; + let parsed: unknown; + try { + parsed = JSON.parse(stored); + } catch { + return null; + } + if (!isAgentConfig(parsed)) { + // Corrupt or shape-mismatched — clear it so the user gets a clean form. + localStorage.removeItem('forgewisp.planning-demo.config'); + return null; + } + return parsed; +} + +// ─── Example prompt chips ────────────────────────────────────────────────────── + +// One-click "large task" prompts so visitors immediately see the agent +// decompose a concrete multi-step request. Text is set via textContent (no +// parsing), so the prompts are safe even if edited to include markup. +const EXAMPLE_PROMPTS = [ + 'Write a product launch announcement document with an intro, key features, pricing, and a call to action. Download it as a Markdown file as the last step.', +]; + +function renderExamplePrompts(): void { + els.examplePrompts.innerHTML = ''; + for (const prompt of EXAMPLE_PROMPTS) { + const btn = document.createElement('button'); + btn.type = 'button'; + btn.className = 'example-prompt'; + btn.textContent = prompt; + btn.addEventListener('click', () => { + // The race guard in handleChatSubmit covers a click during an in-flight run. + if (inFlightController) return; + els.chatInput.value = prompt; + els.chatForm.dispatchEvent(new SubmitEvent('submit', { cancelable: true, bubbles: true })); + }); + els.examplePrompts.appendChild(btn); + } +} + +// ─── Boot ───────────────────────────────────────────────────────────────────── + +els.toolsList.innerHTML = renderToolsList(SIDEBAR_TOOLS); +renderExamplePrompts(); + +const stored = loadStoredConfig(); +if (stored) { + buildAgent(stored); +} else { + showConfigForm(); +} diff --git a/apps/planning-demo/src/plan-board.ts b/apps/planning-demo/src/plan-board.ts new file mode 100644 index 0000000..e4d298f --- /dev/null +++ b/apps/planning-demo/src/plan-board.ts @@ -0,0 +1,183 @@ +import type { AuditEvent } from '@forgewisp/core'; +import type { Plan, PlanItem } from '@forgewisp/bundled-tools'; +import { renderLivePlanCard } from './render.js'; + +// ─── PlanBoard ────────────────────────────────────────────────────────────── +// +// A derived, in-place view of the plans the agent is tracking. The agent owns +// the authoritative state in localStorage (`forgewisp.plans` via plan-store); the +// board does NOT read localStorage. Instead it reconstructs each plan from the +// `function_executed` audit events, which carry the tool `result` (and `args` +// for the planId/itemId the tool acted on): +// +// createPlan / getPlan → result.plan is the full Plan → replace the plan. +// addPlanItem → result.item + args.planId → append the item. +// updatePlanItem → result.item + args.planId → patch the item by id. +// removePlanItem → args.planId + args.itemId → drop the item. +// deletePlan → args.planId → remove the plan. +// listPlans / others → no item data → no change. +// +// One persistent
  • card per plan, keyed by planId, is re-rendered in place +// on each event (same DOM node) and moved to the top — so a multi-step task +// shows a single evolving checklist (◻ → ◑ → ✓) rather than a stack of cards. +// On reload the board is empty until the agent calls listPlans/getPlan to resume, +// at which point the live card rehydrates from those events. + +function str(v: unknown): string { + return typeof v === 'string' ? v : ''; +} + +/** Coerce an unknown value to a `PlanItem` (field-by-field safe). */ +function asItem(v: unknown): PlanItem | null { + if (typeof v !== 'object' || v === null) return null; + const o = v as Record; + if (typeof o.id !== 'string' || typeof o.title !== 'string' || typeof o.status !== 'string') { + return null; + } + return o as unknown as PlanItem; +} + +/** Coerce an unknown value to a `Plan` (field-by-field safe). */ +function asPlan(v: unknown): Plan | null { + if (typeof v !== 'object' || v === null) return null; + const o = v as Record; + if (typeof o.id !== 'string' || typeof o.title !== 'string' || !Array.isArray(o.items)) { + return null; + } + return o as unknown as Plan; +} + +export class PlanBoard { + private readonly container: HTMLUListElement; + private readonly plans = new Map(); + // Card element per planId, so updates re-render the SAME node in place. + private readonly cards = new Map(); + + constructor(container: HTMLUListElement) { + this.container = container; + } + + /** Apply an audit event; only `function_executed` plan events update the board. */ + applyEvent(event: AuditEvent): void { + if (event.type !== 'function_executed') return; + // `event.args` is already `Record | undefined`, so `?? {}` + // collapses to `Record` — no assertion needed. `result` is + // `unknown`, so it still needs the cast to be indexable. + const args: Record = event.args ?? {}; + const result = (event.result ?? {}) as Record; + + switch (event.functionName) { + case 'createPlan': + case 'getPlan': { + const plan = asPlan(result.plan); + if (plan) this.setPlan(plan); + break; + } + case 'addPlanItem': { + const planId = str(args.planId); + const item = asItem(result.item); + if (planId && item) this.addItem(planId, item); + break; + } + case 'updatePlanItem': { + const planId = str(args.planId); + const item = asItem(result.item); + if (planId && item) this.patchItem(planId, item); + break; + } + case 'removePlanItem': { + const planId = str(args.planId); + const itemId = str(args.itemId); + if (planId && itemId) this.removeItem(planId, itemId); + break; + } + case 'deletePlan': { + const planId = str(args.planId); + if (planId) this.removePlan(planId); + break; + } + default: + break; // listPlans, getCurrentTime, generateUuid — no board change + } + } + + /** Drop all known plans and clear the panel (including any error cards). */ + clear(): void { + this.plans.clear(); + this.cards.clear(); + this.container.innerHTML = ''; + } + + // ── internal ──────────────────────────────────────────────────────────── + + /** Replace (or seed) a plan and move its card to the top. */ + private setPlan(plan: Plan): void { + this.plans.set(plan.id, plan); + this.renderCard(plan.id); + } + + /** Append an item to a plan, seeding a stub if the plan wasn't seen in full. */ + private addItem(planId: string, item: PlanItem): void { + const plan = this.getOrSeed(planId, item); + plan.items = plan.items.filter((i) => i.id !== item.id); + plan.items.push(item); + this.renderCard(planId); + } + + /** Patch an item by id (or push it if new), seeding a stub plan if needed. */ + private patchItem(planId: string, item: PlanItem): void { + const plan = this.getOrSeed(planId, item); + const idx = plan.items.findIndex((i) => i.id === item.id); + if (idx === -1) plan.items.push(item); + else plan.items[idx] = item; + this.renderCard(planId); + } + + /** Remove an item from a plan (no-op if the plan is unknown). */ + private removeItem(planId: string, itemId: string): void { + const plan = this.plans.get(planId); + if (!plan) return; + plan.items = plan.items.filter((i) => i.id !== itemId); + this.renderCard(planId); + } + + /** Remove a plan and its card. */ + private removePlan(planId: string): void { + this.plans.delete(planId); + const li = this.cards.get(planId); + if (li) { + li.remove(); + this.cards.delete(planId); + } + } + + /** + * Return the plan for planId, seeding a minimal stub if the board hasn't seen + * it in full yet (rare: the agent used listPlans → addPlanItem without a prior + * createPlan/getPlan). The stub self-corrects on the next getPlan/createPlan. + */ + private getOrSeed(planId: string, item: PlanItem): Plan { + let plan = this.plans.get(planId); + if (!plan) { + plan = { id: planId, title: '(unsaved plan)', createdAt: item.createdAt, items: [] }; + this.plans.set(planId, plan); + } + return plan; + } + + /** Re-render a plan's card in place, creating it and moving it to top. */ + private renderCard(planId: string): void { + const plan = this.plans.get(planId); + if (!plan) return; + let li = this.cards.get(planId); + if (!li) { + li = document.createElement('li'); + li.className = 'artifact artifact-plan'; + li.dataset.planId = planId; + this.cards.set(planId, li); + } + li.innerHTML = renderLivePlanCard(plan); + // Most recently touched plan on top. + this.container.prepend(li); + } +} diff --git a/apps/planning-demo/src/render.ts b/apps/planning-demo/src/render.ts new file mode 100644 index 0000000..82a9f76 --- /dev/null +++ b/apps/planning-demo/src/render.ts @@ -0,0 +1,201 @@ +import DOMPurify from 'dompurify'; +import { marked } from 'marked'; +import type { AuditEvent, RiskTier } from '@forgewisp/core'; +import type { Plan, PlanItem, PlanStatus } from '@forgewisp/bundled-tools'; + +marked.setOptions({ breaks: true, gfm: true }); + +// Model-controlled markdown is untrusted: it may contain raw HTML. This allowlist +// is deliberately tight (no , no '; + +function hasExecutablePayload(html: string): boolean { + const tpl = document.createElement('template'); + tpl.innerHTML = html; + const hasOnerror = Array.from(tpl.content.querySelectorAll('*')).some((el) => + el.hasAttribute('onerror'), + ); + const hasScript = tpl.content.querySelector('script') !== null; + return hasOnerror || hasScript; +} + +describe('sanitize — renderMarkdown (assistant message + streaming sinks)', () => { + it('strips onerror handlers from img tags', () => { + const out = renderMarkdown(XSS_IMG); + expect(out).not.toContain('onerror'); + expect(hasExecutablePayload(out)).toBe(false); + }); + + it('strips script tags', () => { + const out = renderMarkdown(`${XSS_SCRIPT}\n\nHello`); + expect(out).not.toContain(' { + const out = renderMarkdown('**bold** and [link](https://example.com)'); + expect(out).toContain('bold'); + expect(out).toContain('href="https://example.com"'); + }); +}); + +describe('sanitize — renderArgsHtml (confirm dialog args sink)', () => { + it('escapes attacker-controlled arg values', () => { + const out = renderArgsHtml({ title: XSS_IMG }); + expect(hasExecutablePayload(out)).toBe(false); + expect(out).not.toContain(' { + const out = renderArgsHtml({ '">': 'v' }); + expect(hasExecutablePayload(out)).toBe(false); + expect(out).not.toContain(' { + const baseEvent: AuditEvent = { + id: 'e1', + timestamp: new Date().toISOString(), + type: 'function_errored', + functionName: 'fn', + }; + + it('escapes error strings', () => { + const out = renderAuditDetail({ ...baseEvent, error: XSS_IMG }); + expect(hasExecutablePayload(out)).toBe(false); + expect(out).toContain('<img'); + }); + + it('escapes result JSON', () => { + const out = renderAuditDetail({ ...baseEvent, type: 'function_executed', result: XSS_IMG }); + expect(hasExecutablePayload(out)).toBe(false); + expect(out).toContain('<img'); + }); +}); + +describe('sanitize — renderToolsList (toolkit sidebar sink)', () => { + const tools: Array<{ name: string; description: string; riskTier: RiskTier }> = [ + { name: 'createPlan', description: 'a safe tool', riskTier: 'read' }, + { name: XSS_IMG, description: XSS_SCRIPT, riskTier: 'destructive' }, + ]; + + it('escapes tool names and descriptions', () => { + const out = renderToolsList(tools); + expect(hasExecutablePayload(out)).toBe(false); + expect(out).not.toContain(' { + const xssPlan = { + id: 'p1', + title: XSS_IMG, + createdAt: '2026-01-01T00:00:00.000Z', + items: [ + { + id: 'i1', + title: XSS_SCRIPT, + status: 'in_progress', + priority: 'high', + notes: XSS_IMG, + createdAt: '2026-01-01T00:00:00.000Z', + }, + ], + } as unknown as Plan; + + it('escapes plan title, item title, and notes', () => { + const out = renderLivePlanCard(xssPlan); + expect(hasExecutablePayload(out)).toBe(false); + expect(out).toContain('<img'); + expect(out).not.toContain(' { + const out = renderLivePlanCard({ + id: 'p2', + title: 'Empty', + createdAt: '2026-01-01T00:00:00.000Z', + items: [], + }); + expect(hasExecutablePayload(out)).toBe(false); + expect(out).toContain('No items yet'); + }); +}); + +describe('sanitize — renderArtifact (error card sink)', () => { + const baseEvent: AuditEvent = { + id: 'e1', + timestamp: new Date().toISOString(), + type: 'function_executed', + functionName: 'createPlan', + }; + + it('escapes function_errored messages', () => { + const out = renderArtifact({ + ...baseEvent, + type: 'function_errored', + error: XSS_IMG, + }); + expect(out).not.toBeNull(); + expect(hasExecutablePayload(out!)).toBe(false); + expect(out).toContain('<img'); + }); + + it('returns null for function_executed events (plan cards are rendered by PlanBoard)', () => { + const out = renderArtifact({ + ...baseEvent, + args: { title: 'x' }, + result: { plan: { id: 'p1', title: 'x', createdAt: '', items: [] } }, + }); + expect(out).toBeNull(); + }); + + it('returns null for unrendered event types', () => { + expect(renderArtifact({ ...baseEvent, type: 'function_requested' })).toBeNull(); + }); + + it('returns null for unrecognized tool names', () => { + expect(renderArtifact({ ...baseEvent, functionName: 'unknownTool' })).toBeNull(); + }); +}); + +describe('sanitize — escapeHtml', () => { + it('escapes all five HTML metacharacters', () => { + expect(escapeHtml(`<>"'&`)).toBe('<>"'&'); + }); +}); diff --git a/apps/planning-demo/tests/submit.test.ts b/apps/planning-demo/tests/submit.test.ts new file mode 100644 index 0000000..4ddc7ce --- /dev/null +++ b/apps/planning-demo/tests/submit.test.ts @@ -0,0 +1,385 @@ +import { describe, it, expect, beforeAll, vi, afterAll, afterEach } from 'vitest'; + +// Builds a non-streaming-style SSE Response whose `content` deltas concatenate +// to `text`. The demo agent is configured with streaming, so fetch must return +// an SSE stream (not JSON) for the run to complete. +function sseResponse(text: string): Response { + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + controller.enqueue( + encoder.encode(`data: ${JSON.stringify({ choices: [{ delta: { content: text } }] })}\n`), + ); + controller.enqueue(encoder.encode('data: [DONE]\n')); + controller.close(); + }, + }); + return new Response(stream, { status: 200 }); +} + +// Builds an SSE Response whose deltas carry one tool call each (assembled by the +// streaming parser by `index`). The `arguments` strings are raw JSON, as the +// OpenAI API sends them. +function sseToolCallsResponse(calls: Array<{ name: string; arguments: string }>): Response { + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + calls.forEach((c, i) => { + controller.enqueue( + encoder.encode( + `data: ${JSON.stringify({ + choices: [ + { + delta: { + tool_calls: [ + { + index: i, + id: `call_${i + 1}`, + function: { name: c.name, arguments: c.arguments }, + }, + ], + }, + }, + ], + })}\n`, + ), + ); + }); + controller.enqueue(encoder.encode('data: [DONE]\n')); + controller.close(); + }, + }); + return new Response(stream, { status: 200 }); +} + +// Loads the full planning-demo body so main.ts's getEl() lookups succeed at import. +const DEMO_BODY = ` +
    + +
    +
    +
    + + + +
    +
    +
    +
    + + +`; + +const CONFIG_KEY = 'forgewisp.planning-demo.config'; +const PLANS_KEY = 'forgewisp.plans'; +const VALID_CONFIG = { endpoint: 'https://llm.example/v1/chat', model: 'gpt-4o', apiKey: 'k' }; + +afterEach(() => { + localStorage.removeItem(PLANS_KEY); +}); + +describe('submit race guard', () => { + beforeAll(async () => { + vi.resetModules(); + localStorage.setItem(CONFIG_KEY, JSON.stringify(VALID_CONFIG)); + document.body.innerHTML = DEMO_BODY; + + // Stub fetch with a never-resolving promise so agent.run stays in flight. + globalThis.fetch = vi.fn().mockReturnValue(new Promise(() => {})) as typeof fetch; + + await import('../src/main.js'); + }); + + afterAll(() => { + vi.restoreAllMocks(); + }); + + it('blocks a second submit while a run is in flight', async () => { + const form = document.getElementById('chat-form') as HTMLFormElement; + const input = document.getElementById('chat-input') as HTMLInputElement; + const chat = document.getElementById('chat-messages'); + + input.value = 'first message'; + form.dispatchEvent(new SubmitEvent('submit', { cancelable: true, bubbles: true })); + + // Let the synchronous portion of the handler run (sets inFlightController). + await Promise.resolve(); + + expect(chat!.querySelectorAll('.message-user').length).toBe(1); + + // Second submit while the first is still in flight. + input.value = 'second message'; + form.dispatchEvent(new SubmitEvent('submit', { cancelable: true, bubbles: true })); + await Promise.resolve(); + + // Race guard prevented the second message from being appended. + expect(chat!.querySelectorAll('.message-user').length).toBe(1); + expect(input.disabled).toBe(true); + }); +}); + +describe('chat history', () => { + const recordedBodies: Array<{ messages: Array<{ role: string; content: string }> }> = []; + + beforeAll(async () => { + vi.resetModules(); + localStorage.setItem(CONFIG_KEY, JSON.stringify(VALID_CONFIG)); + document.body.innerHTML = DEMO_BODY; + + const fetchMock = vi.fn().mockImplementation((_url, init: RequestInit) => { + const body = JSON.parse(init.body as string) as { + messages: Array<{ role: string; content: string }>; + }; + recordedBodies.push(body); + const lastUser = body.messages[body.messages.length - 1]; + const reply = lastUser ? `reply to: ${lastUser.content}` : 'reply'; + return Promise.resolve(sseResponse(reply)); + }); + globalThis.fetch = fetchMock as typeof fetch; + + await import('../src/main.js'); + }); + + afterAll(() => { + vi.restoreAllMocks(); + }); + + it('threads prior turns into the next request as history', async () => { + const form = document.getElementById('chat-form') as HTMLFormElement; + const input = document.getElementById('chat-input') as HTMLInputElement; + + input.value = 'turn one'; + form.dispatchEvent(new SubmitEvent('submit', { cancelable: true, bubbles: true })); + await vi.waitFor(() => { + expect(document.querySelectorAll('.message-assistant:not(.thinking-indicator)').length).toBe( + 1, + ); + }); + + input.value = 'turn two'; + form.dispatchEvent(new SubmitEvent('submit', { cancelable: true, bubbles: true })); + await vi.waitFor(() => { + expect(recordedBodies.length).toBeGreaterThanOrEqual(2); + expect(document.querySelectorAll('.message-user').length).toBe(2); + }); + + const second = recordedBodies[recordedBodies.length - 1]!.messages; + expect(second[0]!.role).toBe('system'); + expect(second).toContainEqual({ role: 'user', content: 'turn one' }); + expect(second).toContainEqual({ role: 'assistant', content: 'reply to: turn one' }); + expect(second[second.length - 1]).toEqual({ role: 'user', content: 'turn two' }); + }); +}); + +describe('createPlan runs directly (read-tier, no confirmation)', () => { + beforeAll(async () => { + vi.resetModules(); + localStorage.setItem(CONFIG_KEY, JSON.stringify(VALID_CONFIG)); + document.body.innerHTML = DEMO_BODY; + + // First request returns two read-tier tools (listPlans + createPlan) in one + // assistant turn; subsequent requests return a plain final reply so the + // tool loop terminates. Both tools are read-tier, so neither triggers a + // confirmation — the agent self-manages its scratchpad without prompts. + let callCount = 0; + const fetchMock = vi.fn().mockImplementation(() => { + callCount += 1; + if (callCount === 1) { + return Promise.resolve( + sseToolCallsResponse([ + { name: 'listPlans', arguments: JSON.stringify({}) }, + { + name: 'createPlan', + arguments: JSON.stringify({ + title: 'Saturday', + items: [{ title: 'gym' }, { title: 'groceries', priority: 'high' }], + }), + }, + ]), + ); + } + return Promise.resolve(sseResponse('Plan created and tracked.')); + }); + globalThis.fetch = fetchMock as typeof fetch; + + await import('../src/main.js'); + }); + + afterAll(() => { + vi.restoreAllMocks(); + }); + + it('runs createPlan with no confirm dialog and persists the plan', async () => { + const form = document.getElementById('chat-form') as HTMLFormElement; + const input = document.getElementById('chat-input') as HTMLInputElement; + const overlay = document.getElementById('confirm-overlay') as HTMLDivElement; + const artifacts = document.getElementById('artifacts-list') as HTMLUListElement; + + input.value = 'plan my Saturday'; + form.dispatchEvent(new SubmitEvent('submit', { cancelable: true, bubbles: true })); + + // The confirm overlay must stay hidden — createPlan is read-tier, so the + // agent runs it directly with no user prompt. + await vi.waitFor(() => { + // The live plan card renders in place (one
  • per plan, keyed by planId), + // not a per-event prepend. + const card = artifacts.querySelector('.artifact-plan'); + expect(card).not.toBeNull(); + expect(card?.textContent).toContain('Saturday'); + expect(card?.textContent).toContain('gym'); + expect(card?.textContent).toContain('groceries'); + expect(artifacts.querySelectorAll('.artifact-plan').length).toBe(1); + }); + expect(overlay.classList.contains('hidden')).toBe(true); + + // The plan was persisted under the namespaced localStorage key. + expect(localStorage.getItem(PLANS_KEY)).not.toBeNull(); + }); +}); + +describe('deletePlan runs directly (read-tier, no confirmation)', () => { + beforeAll(async () => { + vi.resetModules(); + localStorage.setItem(CONFIG_KEY, JSON.stringify(VALID_CONFIG)); + document.body.innerHTML = DEMO_BODY; + + let callCount = 0; + const fetchMock = vi.fn().mockImplementation(() => { + callCount += 1; + if (callCount === 1) { + return Promise.resolve( + sseToolCallsResponse([ + { name: 'deletePlan', arguments: JSON.stringify({ planId: 'nope' }) }, + ]), + ); + } + return Promise.resolve(sseResponse('okay')); + }); + globalThis.fetch = fetchMock as typeof fetch; + + await import('../src/main.js'); + }); + + afterAll(() => { + vi.restoreAllMocks(); + }); + + it('runs the deletePlan handler immediately with no confirm dialog', async () => { + const form = document.getElementById('chat-form') as HTMLFormElement; + const input = document.getElementById('chat-input') as HTMLInputElement; + const overlay = document.getElementById('confirm-overlay') as HTMLDivElement; + const auditLog = document.getElementById('audit-log') as HTMLUListElement; + + input.value = 'delete a plan'; + form.dispatchEvent(new SubmitEvent('submit', { cancelable: true, bubbles: true })); + + // The handler runs directly (read-tier). deletePlan on an unknown id is a + // no-op that returns existed:false without throwing and persists nothing. + await vi.waitFor(() => { + const types = Array.from(auditLog.querySelectorAll('.audit-type')).map( + (el) => el.textContent ?? '', + ); + expect(types).toContain('executed'); + }); + expect(overlay.classList.contains('hidden')).toBe(true); + expect(localStorage.getItem(PLANS_KEY)).toBeNull(); + + // No rejection ever occurs — there was no confirmation to reject. + const types = Array.from(auditLog.querySelectorAll('.audit-type')).map( + (el) => el.textContent ?? '', + ); + expect(types).not.toContain('rejected'); + }); +}); + +describe('example prompt chips', () => { + const recordedBodies: Array<{ messages: Array<{ role: string; content: string }> }> = []; + + beforeAll(async () => { + vi.resetModules(); + localStorage.setItem(CONFIG_KEY, JSON.stringify(VALID_CONFIG)); + document.body.innerHTML = DEMO_BODY; + + const fetchMock = vi.fn().mockImplementation((_url, init: RequestInit) => { + const body = JSON.parse(init.body as string) as { + messages: Array<{ role: string; content: string }>; + }; + recordedBodies.push(body); + const lastUser = body.messages[body.messages.length - 1]; + const reply = lastUser ? `reply to: ${lastUser.content}` : 'reply'; + return Promise.resolve(sseResponse(reply)); + }); + globalThis.fetch = fetchMock as typeof fetch; + + await import('../src/main.js'); + }); + + afterAll(() => { + vi.restoreAllMocks(); + }); + + it('clicking a chip fills the input and submits the prompt', async () => { + const input = document.getElementById('chat-input') as HTMLInputElement; + const chat = document.getElementById('chat-messages'); + const chips = document.querySelectorAll('#example-prompts .example-prompt'); + + expect(chips.length).toBeGreaterThan(0); + const prompt = chips[0]!.textContent ?? ''; + expect(prompt.length).toBeGreaterThan(0); + + chips[0]!.click(); + + await vi.waitFor(() => { + expect(recordedBodies.length).toBeGreaterThanOrEqual(1); + }); + + // The chip's prompt became the user message of the request. + const last = recordedBodies[recordedBodies.length - 1]!.messages; + expect(last[last.length - 1]).toEqual({ role: 'user', content: prompt }); + + // A user bubble with the prompt was appended, and the input was cleared. + expect(chat!.querySelector('.message-user')?.textContent).toBe(prompt); + expect(input.value).toBe(''); + }); + + it('disables chips while a run is in flight', async () => { + vi.resetModules(); + localStorage.setItem(CONFIG_KEY, JSON.stringify(VALID_CONFIG)); + document.body.innerHTML = DEMO_BODY; + + // Never-resolving fetch so the run stays in flight. + globalThis.fetch = vi.fn().mockReturnValue(new Promise(() => {})) as typeof fetch; + await import('../src/main.js'); + + const chips = document.querySelectorAll('#example-prompts .example-prompt'); + chips[0]!.click(); + + // Let the submit handler run (it disables input + chips). + await Promise.resolve(); + + expect(chips[0]!.disabled).toBe(true); + vi.restoreAllMocks(); + }); +}); diff --git a/apps/planning-demo/tsconfig.json b/apps/planning-demo/tsconfig.json new file mode 100644 index 0000000..9d80718 --- /dev/null +++ b/apps/planning-demo/tsconfig.json @@ -0,0 +1,20 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "lib": ["ES2021", "DOM", "DOM.Iterable"], + "types": ["vite/client"], + // Resolve the workspace packages to their source so type-checking and + // type-checked ESLint work without a prior `build` of @forgewisp/core or + // @forgewisp/bundled-tools (whose package.json `types` point at dist/, + // which is gitignored and absent in a fresh CI checkout). Vite still + // bundles from dist via the workspace symlinks for the actual dev/build + // output. + "baseUrl": ".", + "paths": { + "@forgewisp/core": ["../../packages/core/src/index.ts"], + "@forgewisp/bundled-tools": ["../../packages/bundled-tools/src/index.ts"] + } + }, + "include": ["src", "tests", "vite.config.ts", "vitest.config.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/apps/planning-demo/vite.config.ts b/apps/planning-demo/vite.config.ts new file mode 100644 index 0000000..ba767e0 --- /dev/null +++ b/apps/planning-demo/vite.config.ts @@ -0,0 +1,12 @@ +import { defineConfig } from 'vite'; + +export default defineConfig({ + root: '.', + build: { + outDir: 'dist', + sourcemap: true, + }, + server: { + port: 5173, + }, +}); diff --git a/apps/planning-demo/vitest.config.ts b/apps/planning-demo/vitest.config.ts new file mode 100644 index 0000000..882b980 --- /dev/null +++ b/apps/planning-demo/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + environment: 'jsdom', + include: ['tests/**/*.test.ts'], + globals: true, + }, +}); diff --git a/packages/bundled-tools/package.json b/packages/bundled-tools/package.json index 489eb3a..ab4d39e 100644 --- a/packages/bundled-tools/package.json +++ b/packages/bundled-tools/package.json @@ -1,6 +1,6 @@ { "name": "@forgewisp/bundled-tools", - "version": "0.2.0", + "version": "0.3.0", "description": "Browser-safe, ready-to-register FunctionDefinition tools for Forgewisp agents", "license": "MIT", "type": "module", diff --git a/packages/bundled-tools/src/index.ts b/packages/bundled-tools/src/index.ts index 5b4e833..a014e0f 100644 --- a/packages/bundled-tools/src/index.ts +++ b/packages/bundled-tools/src/index.ts @@ -1,6 +1,20 @@ // Re-export the core types this package is built around so consumers can import // everything they need from one place. Type-only re-exports don't affect treeshaking. -export type { FunctionDefinition, RiskTier, JSONSchema, JSONSchemaProperty } from '@forgewisp/core'; +export type { + FunctionDefinition, + ToolSet, + RiskTier, + JSONSchema, + JSONSchemaProperty, +} from '@forgewisp/core'; + +// Type-only import for the local `ToolSet` annotation below. This MUST stay +// type-only: a runtime value import from `@forgewisp/core` would force the +// IIFE/global build (which inlines all deps) to resolve core's `dist`, racing +// with core's own `clean: true` watch under `turbo dev --parallel`. Keeping the +// relationship types-only preserves the original "no runtime import of core" +// property so the IIFE build stays self-contained. +import type { ToolSet } from '@forgewisp/core'; export { defineTool } from './define-tool.js'; @@ -52,6 +66,32 @@ export type { GetGeolocationArgs, GetGeolocationResult } from './tools/index.js' export { removeLocalStorageItem } from './tools/index.js'; export type { RemoveLocalStorageItemArgs, RemoveLocalStorageItemResult } from './tools/index.js'; +// Planning tools (agent job-tracking scratchpad persisted in localStorage). +export { listPlans } from './tools/index.js'; +export type { ListPlansArgs, ListPlansResult } from './tools/index.js'; + +export { getPlan } from './tools/index.js'; +export type { GetPlanArgs, GetPlanResult } from './tools/index.js'; + +export { createPlan } from './tools/index.js'; +export type { CreatePlanArgs, CreatePlanResult, CreatePlanItemInput } from './tools/index.js'; + +export { addPlanItem } from './tools/index.js'; +export type { AddPlanItemArgs, AddPlanItemResult } from './tools/index.js'; + +export { updatePlanItem } from './tools/index.js'; +export type { UpdatePlanItemArgs, UpdatePlanItemResult } from './tools/index.js'; + +export { removePlanItem } from './tools/index.js'; +export type { RemovePlanItemArgs, RemovePlanItemResult } from './tools/index.js'; + +export { deletePlan } from './tools/index.js'; +export type { DeletePlanArgs, DeletePlanResult } from './tools/index.js'; + +// Shared plan domain types (the store module itself is not re-exported as a value — +// it is an internal helper, like `eval-math.ts`). +export type { Plan, PlanItem, PlanStatus, PlanPriority, PlanSummary } from './plan-store.js'; + import { getCurrentTime } from './tools/index.js'; import { generateUuid } from './tools/index.js'; import { evaluateMath } from './tools/index.js'; @@ -68,6 +108,13 @@ import { downloadFile } from './tools/index.js'; import { setLocalStorageItem } from './tools/index.js'; import { getGeolocation } from './tools/index.js'; import { removeLocalStorageItem } from './tools/index.js'; +import { listPlans } from './tools/index.js'; +import { getPlan } from './tools/index.js'; +import { createPlan } from './tools/index.js'; +import { addPlanItem } from './tools/index.js'; +import { updatePlanItem } from './tools/index.js'; +import { removePlanItem } from './tools/index.js'; +import { deletePlan } from './tools/index.js'; /** * Every bundled tool, ready to register. Use: @@ -92,6 +139,16 @@ export const BUNDLED_TOOLS = [ getBatteryInfo, listLocalStorageKeys, getLocalStorageItem, + // read — agent job-tracking scratchpad (forgewisp.plans); see plan-store.ts. + // read-tier by exception: agent-owned, bounded, schema-validated scratchpad, + // so the agent self-manages its job without onConfirmRequired prompts. + listPlans, + getPlan, + createPlan, + addPlanItem, + updatePlanItem, + removePlanItem, + deletePlan, // write copyToClipboard, speakText, @@ -101,3 +158,24 @@ export const BUNDLED_TOOLS = [ // destructive removeLocalStorageItem, ] as const; + +/** + * The 7 plan-management tools as a ready-to-register `ToolSet`: + * `listPlans`, `getPlan`, `createPlan`, `addPlanItem`, `updatePlanItem`, + * `removePlanItem`, `deletePlan`. All read-tier by exception (agent-owned scratchpad; + * see plan-store.ts header), so the agent self-manages them with no `onConfirmRequired` + * prompts. Register in one call: + * + * agent.registerToolSet(PLANNING_TOOLS); + * + * Built as a plain `ToolSet`-typed literal (not via `defineToolSet`) so this package + * keeps a types-only relationship with `@forgewisp/core` — see the import note above. + * The heterogeneous tuple is assignable to `readonly FunctionDefinition[]` + * without a cast (handler contravariance + `never` as the covariant read type). + * Compose with other tools by spreading `.tools` into a `defineToolSet` call. + */ +export const PLANNING_TOOLS: ToolSet = { + name: 'planning', + description: 'Agent job-tracking scratchpad persisted in localStorage.', + tools: [listPlans, getPlan, createPlan, addPlanItem, updatePlanItem, removePlanItem, deletePlan], +}; diff --git a/packages/bundled-tools/src/plan-store.ts b/packages/bundled-tools/src/plan-store.ts new file mode 100644 index 0000000..9e2fb37 --- /dev/null +++ b/packages/bundled-tools/src/plan-store.ts @@ -0,0 +1,272 @@ +/** + * Persistent plan store for the agent-facing planning tools. + * + * This is an internal helper module — it is intentionally **not** re-exported from + * the package barrel (`src/index.ts`) as a value, mirroring the `eval-math.ts` + * pattern. The tool files in `src/tools/` are thin `defineTool` wrappers over these + * functions; consumers reach the capability through the tools, not the store. + * + * Plans are persisted in `localStorage` under a single namespaced key + * (`forgewisp.plans`) as a serialized JSON blob: `Record`. A single + * blob (rather than one key per plan) keeps listing atomic and avoids key pollution + * alongside the existing `setLocalStorageItem` / `removeLocalStorageItem` tools. + * + * Reads never throw on corrupt state — a missing or unparseable blob is treated as + * an empty store so the agent can recover by recreating plans. Mutations throw + * clear `Error`s (missing plan/item, empty patch, quota) so the core can surface + * them as `function_errored` audit events. + * + * ── Risk-tier rationale ────────────────────────────────────────────────────── + * The planning tools are the agent's own job-tracking scratchpad: the agent uses + * them to lay out and track the steps of the task it is working on. Accordingly + * every plan tool is `read`-tier, so the agent self-manages its scratchpad without + * `onConfirmRequired` prompts. This is a **deliberate, narrow exception** to the + * rule that localStorage-mutating tools are gated (`setLocalStorageItem` is `write`, + * `removeLocalStorageItem` is `destructive`). It is justified because + * `forgewisp.plans` is an agent-owned, bounded (MAX_PLAN_ITEMS), single-key, + * schema-validated scratchpad — analogous to the agent's reasoning buffer, not + * arbitrary user-controlled storage. It is an exception, not a precedent; it does + * not license demoting the generic localStorage tools. + * + * ── Concurrency invariant ──────────────────────────────────────────────────── + * The core executor runs validated tool calls concurrently via `Promise.allSettled` + * within a round, but it evaluates each handler as `await Promise.resolve(handler(args))` + * — i.e. the handler (a sync function) runs to completion *before* the await yields. + * Because every mutation below is fully synchronous with no `await` between + * `loadAll()` and `saveAll()`, each read-modify-write is atomic and two mutations + * issued in the same round cannot interleave. **Keep the mutation functions + * synchronous and never insert an `await` between `loadAll()` and `saveAll()`**, + * or that atomicity is lost and a real race appears. + */ + +/** The localStorage key holding the serialized `Record` blob. */ +export const PLAN_STORE_KEY = 'forgewisp.plans'; + +/** Maximum number of items a single plan may hold. */ +export const MAX_PLAN_ITEMS = 200; + +export type PlanStatus = 'todo' | 'in_progress' | 'done'; + +export type PlanPriority = 'low' | 'medium' | 'high'; + +export interface PlanItem { + /** RFC 4122 v4 UUID. */ + id: string; + title: string; + status: PlanStatus; + priority?: PlanPriority; + notes?: string; + /** ISO 8601 creation timestamp. */ + createdAt: string; +} + +export interface Plan { + id: string; + title: string; + createdAt: string; + items: PlanItem[]; +} + +/** Compact projection returned by `listPlans` — cheap for the model to read. */ +export interface PlanSummary { + id: string; + title: string; + itemCount: number; + doneCount: number; + createdAt: string; +} + +/** Input shape for a new item (no id/status/createdAt — those are assigned). */ +export interface NewPlanItemInput { + title: string; + priority?: PlanPriority; + notes?: string; +} + +/** Patch shape for `updatePlanItem` — every field is optional. */ +export interface PlanItemPatch { + title?: string; + status?: PlanStatus; + priority?: PlanPriority; + notes?: string; +} + +/** Result shape for removal operations — the literal `true` is an ack marker. */ +export interface RemovalResult { + removed: true; + existed: boolean; +} + +// ─── Storage access ────────────────────────────────────────────────────────── + +function requireStorage(): Storage { + const storage = globalThis.localStorage; + if (!storage) { + throw new Error('localStorage is unavailable in this environment.'); + } + return storage; +} + +/** + * Read and parse the plan store. Returns an empty record when the key is absent + * or the blob fails to parse — never throws, so a corrupt store is recoverable. + */ +export function loadAll(): Record { + const storage = globalThis.localStorage; + if (!storage) return {}; + const raw = storage.getItem(PLAN_STORE_KEY); + if (!raw) return {}; + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch { + return {}; + } + if (parsed === null || typeof parsed !== 'object' || Array.isArray(parsed)) { + return {}; + } + return parsed as Record; +} + +/** Serialize and write the whole store. Throws on quota errors. */ +export function saveAll(plans: Record): void { + const storage = requireStorage(); + storage.setItem(PLAN_STORE_KEY, JSON.stringify(plans)); +} + +// ─── Utilities ─────────────────────────────────────────────────────────────── + +/** Generate an RFC 4122 v4 UUID, throwing if the Web Crypto API is missing. */ +export function genId(): string { + const cryptoObj = globalThis.crypto; + if (!cryptoObj || typeof cryptoObj.randomUUID !== 'function') { + throw new Error('crypto.randomUUID is unavailable in this environment.'); + } + return cryptoObj.randomUUID(); +} + +/** Current instant as an ISO 8601 string. */ +export function nowIso(): string { + return new Date().toISOString(); +} + +function findPlan(plans: Record, planId: string): Plan { + const plan = plans[planId]; + if (!plan) { + throw new Error(`No plan with id "${planId}".`); + } + return plan; +} + +function findItem(plan: Plan, itemId: string): PlanItem { + const item = plan.items.find((i) => i.id === itemId); + if (!item) { + throw new Error(`No item with id "${itemId}" in plan "${plan.id}".`); + } + return item; +} + +function makeItem(input: NewPlanItemInput): PlanItem { + const item: PlanItem = { + id: genId(), + title: input.title, + status: 'todo', + createdAt: nowIso(), + }; + if (input.priority !== undefined) item.priority = input.priority; + if (input.notes !== undefined) item.notes = input.notes; + return item; +} + +// ─── Domain operations ─────────────────────────────────────────────────────── + +/** Create and persist a new plan, optionally seeded with items. Returns the plan. */ +export function createPlan(title: string, items?: NewPlanItemInput[]): Plan { + if (title.length === 0) { + throw new Error('Plan title must not be empty.'); + } + const plans = loadAll(); + const plan: Plan = { + id: genId(), + title, + createdAt: nowIso(), + items: (items ?? []).map(makeItem), + }; + plans[plan.id] = plan; + saveAll(plans); + return plan; +} + +/** List all plans as compact summaries (no item bodies). */ +export function listPlans(): PlanSummary[] { + const plans = loadAll(); + return Object.values(plans).map((plan) => ({ + id: plan.id, + title: plan.title, + itemCount: plan.items.length, + doneCount: plan.items.filter((i) => i.status === 'done').length, + createdAt: plan.createdAt, + })); +} + +/** Get a full plan (with all items). Throws if the plan id is unknown. */ +export function getPlan(planId: string): Plan { + const plans = loadAll(); + return findPlan(plans, planId); +} + +/** Add an item to a plan and persist it. Returns the created item. */ +export function addPlanItem(planId: string, input: NewPlanItemInput): PlanItem { + const plans = loadAll(); + const plan = findPlan(plans, planId); + if (plan.items.length >= MAX_PLAN_ITEMS) { + throw new Error(`Plan "${plan.title}" is full (max ${MAX_PLAN_ITEMS} items).`); + } + const item = makeItem(input); + plan.items.push(item); + saveAll(plans); + return item; +} + +/** Update fields of an item. Throws if the plan/item id is unknown or the patch is empty. */ +export function updatePlanItem(planId: string, itemId: string, patch: PlanItemPatch): PlanItem { + const hasPatch = + patch.title !== undefined || + patch.status !== undefined || + patch.priority !== undefined || + patch.notes !== undefined; + if (!hasPatch) { + throw new Error('Provide at least one field to update (title, status, priority, or notes).'); + } + const plans = loadAll(); + const plan = findPlan(plans, planId); + const item = findItem(plan, itemId); + if (patch.title !== undefined) item.title = patch.title; + if (patch.status !== undefined) item.status = patch.status; + if (patch.priority !== undefined) item.priority = patch.priority; + if (patch.notes !== undefined) item.notes = patch.notes; + saveAll(plans); + return item; +} + +/** Remove an item from a plan. Returns whether it existed. */ +export function removePlanItem(planId: string, itemId: string): RemovalResult { + const plans = loadAll(); + const plan = findPlan(plans, planId); + const before = plan.items.length; + plan.items = plan.items.filter((i) => i.id !== itemId); + const existed = plan.items.length < before; + saveAll(plans); + return { removed: true, existed }; +} + +/** Delete an entire plan. Returns whether it existed. */ +export function deletePlan(planId: string): RemovalResult { + const plans = loadAll(); + const existed = planId in plans; + if (existed) { + delete plans[planId]; + saveAll(plans); + } + return { removed: true, existed }; +} diff --git a/packages/bundled-tools/src/tools/add-plan-item.ts b/packages/bundled-tools/src/tools/add-plan-item.ts new file mode 100644 index 0000000..85149f3 --- /dev/null +++ b/packages/bundled-tools/src/tools/add-plan-item.ts @@ -0,0 +1,80 @@ +import type { FunctionDefinition, JSONSchema } from '@forgewisp/core'; +import { defineTool } from '../define-tool.js'; + +import { + addPlanItem as addPlanItemStore, + type PlanItem, + type PlanPriority, +} from '../plan-store.js'; + +// ─── Args & result ────────────────────────────────────────────────────────── + +export interface AddPlanItemArgs { + /** The id of the plan to add the item to (obtain it from listPlans). */ + planId: string; + /** Title of the new item, e.g. "Book flight". */ + title: string; + /** Optional priority. */ + priority?: PlanPriority; + /** Optional longer notes describing the item. */ + notes?: string; +} + +export interface AddPlanItemResult { + /** The created item, with its generated id and starting status "todo". */ + item: PlanItem; +} + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const parameters: JSONSchema = { + type: 'object', + properties: { + planId: { + type: 'string', + description: 'The id of the plan to add the item to. Use listPlans to find plan ids.', + minLength: 1, + maxLength: 100, + }, + title: { + type: 'string', + description: 'A short title for the new item.', + minLength: 1, + maxLength: 200, + }, + priority: { + type: 'string', + enum: ['low', 'medium', 'high'], + description: 'Optional priority for the new item.', + }, + notes: { + type: 'string', + description: 'Optional longer notes describing the item.', + minLength: 0, + maxLength: 2000, + }, + }, + required: ['planId', 'title'], + additionalProperties: false, +}; + +// ─── Tool definition ───────────────────────────────────────────────────────── + +export const addPlanItem: FunctionDefinition = defineTool({ + name: 'addPlanItem', + description: + 'Add a step to a plan you are tracking. Use this to break a task into concrete steps before ' + + 'starting work. The step starts with status "todo". Throws if the plan does not exist or is ' + + 'full (max 200 items). Use listPlans to find the plan id.', + // read-tier: agent-owned scratchpad — see plan-store.ts header. + riskTier: 'read', + parameters, + handler: (args: AddPlanItemArgs): AddPlanItemResult => { + const item = addPlanItemStore(args.planId, { + title: args.title, + ...(args.priority !== undefined && { priority: args.priority }), + ...(args.notes !== undefined && { notes: args.notes }), + }); + return { item }; + }, +}); diff --git a/packages/bundled-tools/src/tools/create-plan.ts b/packages/bundled-tools/src/tools/create-plan.ts new file mode 100644 index 0000000..c0dcdd7 --- /dev/null +++ b/packages/bundled-tools/src/tools/create-plan.ts @@ -0,0 +1,83 @@ +import type { FunctionDefinition, JSONSchema } from '@forgewisp/core'; +import { defineTool } from '../define-tool.js'; + +import { createPlan as createPlanStore, type Plan, type PlanPriority } from '../plan-store.js'; + +// ─── Args & result ────────────────────────────────────────────────────────── + +export interface CreatePlanItemInput { + /** Title of the item, e.g. "Book flight". */ + title: string; + /** Optional priority. Defaults to unset (treated as normal). */ + priority?: PlanPriority; +} + +export interface CreatePlanArgs { + /** Name of the plan, e.g. "Weekend trip" or "Q3 launch". */ + title: string; + /** Optional initial items to seed the plan with. */ + items?: CreatePlanItemInput[]; +} + +export interface CreatePlanResult { + /** The created plan, including generated id and items. */ + plan: Plan; +} + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const parameters: JSONSchema = { + type: 'object', + properties: { + title: { + type: 'string', + description: 'A short, human-readable name for the plan.', + minLength: 1, + maxLength: 200, + }, + items: { + type: 'array', + description: 'Optional initial items to seed the plan with.', + maxItems: 50, + items: { + type: 'object', + properties: { + title: { + type: 'string', + description: 'Title of the item.', + minLength: 1, + maxLength: 200, + }, + priority: { + type: 'string', + enum: ['low', 'medium', 'high'], + description: 'Optional priority for the item.', + }, + }, + required: ['title'], + additionalProperties: false, + }, + }, + }, + required: ['title'], + additionalProperties: false, +}; + +// ─── Tool definition ───────────────────────────────────────────────────────── + +export const createPlan: FunctionDefinition = defineTool({ + name: 'createPlan', + description: + 'Create a plan to track a multi-step task you are working on. Call this up front for any ' + + 'non-trivial request with 2+ steps, then add items for each step. The plan is your job ' + + 'scratchpad, persisted in localStorage under "forgewisp.plans" so you stay oriented across ' + + 'rounds. Returns the new plan with its generated id and items. Use listPlans to discover ' + + 'existing plans and getPlan to read one in full.', + // read-tier: agent-owned scratchpad — see plan-store.ts header. + riskTier: 'read', + parameters, + handler: (args: CreatePlanArgs): CreatePlanResult => { + const plan = createPlanStore(args.title, args.items); + return { plan }; + }, +}); diff --git a/packages/bundled-tools/src/tools/delete-plan.ts b/packages/bundled-tools/src/tools/delete-plan.ts new file mode 100644 index 0000000..58cb44a --- /dev/null +++ b/packages/bundled-tools/src/tools/delete-plan.ts @@ -0,0 +1,49 @@ +import type { FunctionDefinition, JSONSchema } from '@forgewisp/core'; +import { defineTool } from '../define-tool.js'; + +import { deletePlan as deletePlanStore } from '../plan-store.js'; + +// ─── Args & result ────────────────────────────────────────────────────────── + +export interface DeletePlanArgs { + /** The id of the plan to delete (obtain it from listPlans). */ + planId: string; +} + +export interface DeletePlanResult { + /** Whether the plan existed before deletion. */ + existed: boolean; + removed: true; +} + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const parameters: JSONSchema = { + type: 'object', + properties: { + planId: { + type: 'string', + description: 'The id of the plan to delete. Use listPlans to find plan ids.', + minLength: 1, + maxLength: 100, + }, + }, + required: ['planId'], + additionalProperties: false, +}; + +// ─── Tool definition ───────────────────────────────────────────────────────── + +export const deletePlan: FunctionDefinition = defineTool({ + name: 'deletePlan', + description: + 'Delete a plan you finished with or abandoned. Call this to clean up your scratchpad once ' + + 'all steps are done or the task is no longer relevant. Use listPlans to find the plan id ' + + 'first. Prefer removePlanItem to prune individual steps rather than deleting the whole plan.', + // read-tier: agent-owned scratchpad — see plan-store.ts header. + riskTier: 'read', + parameters, + handler: (args: DeletePlanArgs): DeletePlanResult => { + return deletePlanStore(args.planId); + }, +}); diff --git a/packages/bundled-tools/src/tools/get-plan.ts b/packages/bundled-tools/src/tools/get-plan.ts new file mode 100644 index 0000000..eacf8a7 --- /dev/null +++ b/packages/bundled-tools/src/tools/get-plan.ts @@ -0,0 +1,47 @@ +import type { FunctionDefinition, JSONSchema } from '@forgewisp/core'; +import { defineTool } from '../define-tool.js'; + +import { getPlan as getPlanStore, type Plan } from '../plan-store.js'; + +// ─── Args & result ────────────────────────────────────────────────────────── + +export interface GetPlanArgs { + /** The id of the plan to read (obtain it from listPlans). */ + planId: string; +} + +export interface GetPlanResult { + /** The full plan, including every item. */ + plan: Plan; +} + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const parameters: JSONSchema = { + type: 'object', + properties: { + planId: { + type: 'string', + description: 'The id of the plan to read. Use listPlans to find plan ids.', + minLength: 1, + maxLength: 100, + }, + }, + required: ['planId'], + additionalProperties: false, +}; + +// ─── Tool definition ───────────────────────────────────────────────────────── + +export const getPlan: FunctionDefinition = defineTool({ + name: 'getPlan', + description: + 'Read a plan in full (all items with their ids, statuses, priorities, notes, and creation ' + + 'times) before editing it, so you act on current state rather than a stale snapshot. Use ' + + 'listPlans first to obtain the plan id. Throws if the plan does not exist.', + riskTier: 'read', + parameters, + handler: (args: GetPlanArgs): GetPlanResult => { + return { plan: getPlanStore(args.planId) }; + }, +}); diff --git a/packages/bundled-tools/src/tools/index.ts b/packages/bundled-tools/src/tools/index.ts index 1a81982..9c65328 100644 --- a/packages/bundled-tools/src/tools/index.ts +++ b/packages/bundled-tools/src/tools/index.ts @@ -60,3 +60,25 @@ export type { RemoveLocalStorageItemArgs, RemoveLocalStorageItemResult, } from './remove-local-storage-item.js'; + +// Planning tools (agent job-tracking scratchpad persisted in localStorage). +export { listPlans } from './list-plans.js'; +export type { ListPlansArgs, ListPlansResult } from './list-plans.js'; + +export { getPlan } from './get-plan.js'; +export type { GetPlanArgs, GetPlanResult } from './get-plan.js'; + +export { createPlan } from './create-plan.js'; +export type { CreatePlanArgs, CreatePlanResult, CreatePlanItemInput } from './create-plan.js'; + +export { addPlanItem } from './add-plan-item.js'; +export type { AddPlanItemArgs, AddPlanItemResult } from './add-plan-item.js'; + +export { updatePlanItem } from './update-plan-item.js'; +export type { UpdatePlanItemArgs, UpdatePlanItemResult } from './update-plan-item.js'; + +export { removePlanItem } from './remove-plan-item.js'; +export type { RemovePlanItemArgs, RemovePlanItemResult } from './remove-plan-item.js'; + +export { deletePlan } from './delete-plan.js'; +export type { DeletePlanArgs, DeletePlanResult } from './delete-plan.js'; diff --git a/packages/bundled-tools/src/tools/list-plans.ts b/packages/bundled-tools/src/tools/list-plans.ts new file mode 100644 index 0000000..d09f0c8 --- /dev/null +++ b/packages/bundled-tools/src/tools/list-plans.ts @@ -0,0 +1,39 @@ +import type { FunctionDefinition, JSONSchema } from '@forgewisp/core'; +import { defineTool } from '../define-tool.js'; + +import { listPlans as listPlansStore, type PlanSummary } from '../plan-store.js'; + +// ─── Args & result ────────────────────────────────────────────────────────── + +// eslint-disable-next-line @typescript-eslint/no-empty-object-type -- no-arg tool +export interface ListPlansArgs {} + +export interface ListPlansResult { + /** Compact summaries of every stored plan. Empty array when none exist. */ + plans: PlanSummary[]; +} + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const parameters: JSONSchema = { + type: 'object', + properties: {}, + required: [], + additionalProperties: false, +}; + +// ─── Tool definition ───────────────────────────────────────────────────────── + +export const listPlans: FunctionDefinition = defineTool({ + name: 'listPlans', + description: + 'List your plans as compact summaries (id, title, item counts by status, creation time) to ' + + 'see what you are currently tracking. Call this at the start of a turn if you may have an ' + + 'in-progress plan to resume, and before getPlan/addPlanItem/updatePlanItem/removePlanItem/' + + 'deletePlan to discover the plan id to operate on. Returns an empty array when none exist.', + riskTier: 'read', + parameters, + handler: (): ListPlansResult => { + return { plans: listPlansStore() }; + }, +}); diff --git a/packages/bundled-tools/src/tools/remove-plan-item.ts b/packages/bundled-tools/src/tools/remove-plan-item.ts new file mode 100644 index 0000000..1c93295 --- /dev/null +++ b/packages/bundled-tools/src/tools/remove-plan-item.ts @@ -0,0 +1,57 @@ +import type { FunctionDefinition, JSONSchema } from '@forgewisp/core'; +import { defineTool } from '../define-tool.js'; + +import { removePlanItem as removePlanItemStore } from '../plan-store.js'; + +// ─── Args & result ────────────────────────────────────────────────────────── + +export interface RemovePlanItemArgs { + /** The id of the plan that contains the item. */ + planId: string; + /** The id of the item to remove (obtain it from getPlan). */ + itemId: string; +} + +export interface RemovePlanItemResult { + /** Whether the item existed before removal. */ + existed: boolean; + removed: true; +} + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const parameters: JSONSchema = { + type: 'object', + properties: { + planId: { + type: 'string', + description: 'The id of the plan that contains the item.', + minLength: 1, + maxLength: 100, + }, + itemId: { + type: 'string', + description: 'The id of the item to remove. Use getPlan to find item ids.', + minLength: 1, + maxLength: 100, + }, + }, + required: ['planId', 'itemId'], + additionalProperties: false, +}; + +// ─── Tool definition ───────────────────────────────────────────────────────── + +export const removePlanItem: FunctionDefinition = defineTool({ + name: 'removePlanItem', + description: + 'Remove a step from your plan. Use this to drop steps that turned out to be unnecessary; ' + + 'prefer it over deleting the whole plan. Use getPlan to find the item id first. Throws if ' + + 'the plan does not exist.', + // read-tier: agent-owned scratchpad — see plan-store.ts header. + riskTier: 'read', + parameters, + handler: (args: RemovePlanItemArgs): RemovePlanItemResult => { + return removePlanItemStore(args.planId, args.itemId); + }, +}); diff --git a/packages/bundled-tools/src/tools/update-plan-item.ts b/packages/bundled-tools/src/tools/update-plan-item.ts new file mode 100644 index 0000000..4dabc49 --- /dev/null +++ b/packages/bundled-tools/src/tools/update-plan-item.ts @@ -0,0 +1,98 @@ +import type { FunctionDefinition, JSONSchema } from '@forgewisp/core'; +import { defineTool } from '../define-tool.js'; + +import { + updatePlanItem as updatePlanItemStore, + type PlanItem, + type PlanPriority, + type PlanStatus, +} from '../plan-store.js'; + +// ─── Args & result ────────────────────────────────────────────────────────── + +export interface UpdatePlanItemArgs { + /** The id of the plan that contains the item. */ + planId: string; + /** The id of the item to update (obtain it from getPlan). */ + itemId: string; + /** New title. */ + title?: string; + /** New status. */ + status?: PlanStatus; + /** New priority. */ + priority?: PlanPriority; + /** New notes. Pass an empty string to clear notes. */ + notes?: string; +} + +export interface UpdatePlanItemResult { + /** The updated item. */ + item: PlanItem; +} + +// ─── Schema ────────────────────────────────────────────────────────────────── + +const parameters: JSONSchema = { + type: 'object', + properties: { + planId: { + type: 'string', + description: 'The id of the plan that contains the item.', + minLength: 1, + maxLength: 100, + }, + itemId: { + type: 'string', + description: 'The id of the item to update. Use getPlan to find item ids.', + minLength: 1, + maxLength: 100, + }, + title: { + type: 'string', + description: 'New title for the item.', + minLength: 1, + maxLength: 200, + }, + status: { + type: 'string', + enum: ['todo', 'in_progress', 'done'], + description: 'New status for the item.', + }, + priority: { + type: 'string', + enum: ['low', 'medium', 'high'], + description: 'New priority for the item.', + }, + notes: { + type: 'string', + description: 'New notes for the item. Pass an empty string to clear notes.', + minLength: 0, + maxLength: 2000, + }, + }, + required: ['planId', 'itemId'], + additionalProperties: false, +}; + +// ─── Tool definition ───────────────────────────────────────────────────────── + +export const updatePlanItem: FunctionDefinition = defineTool({ + name: 'updatePlanItem', + description: + 'Update a step in your plan: set status to "in_progress" when you start it and "done" when ' + + 'you complete it, and add notes about progress or findings. At least one optional field must ' + + 'be provided. Call getPlan first if you need the current item state, then mark progress, ' + + 'reprioritize, rename, or annotate. Throws if the plan or item does not exist.', + // read-tier: agent-owned scratchpad — see plan-store.ts header. + riskTier: 'read', + parameters, + handler: (args: UpdatePlanItemArgs): UpdatePlanItemResult => { + const item = updatePlanItemStore(args.planId, args.itemId, { + ...(args.title !== undefined && { title: args.title }), + ...(args.status !== undefined && { status: args.status }), + ...(args.priority !== undefined && { priority: args.priority }), + ...(args.notes !== undefined && { notes: args.notes }), + }); + return { item }; + }, +}); diff --git a/packages/bundled-tools/tests/add-plan-item.test.ts b/packages/bundled-tools/tests/add-plan-item.test.ts new file mode 100644 index 0000000..f3abe72 --- /dev/null +++ b/packages/bundled-tools/tests/add-plan-item.test.ts @@ -0,0 +1,46 @@ +// @vitest-environment jsdom +import { afterEach, describe, expect, it } from 'vitest'; + +import { addPlanItem } from '../src/tools/add-plan-item.js'; +import type { AddPlanItemResult } from '../src/tools/add-plan-item.js'; +import { createPlan } from '../src/tools/create-plan.js'; +import type { CreatePlanResult } from '../src/tools/create-plan.js'; + +describe('addPlanItem', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('has the correct FunctionDefinition shape', () => { + expect(addPlanItem.name).toBe('addPlanItem'); + expect(addPlanItem.riskTier).toBe('read'); + expect(typeof addPlanItem.handler).toBe('function'); + + const { parameters } = addPlanItem; + expect(parameters.type).toBe('object'); + expect(parameters.required).toEqual(['planId', 'title']); + expect(parameters.additionalProperties).toBe(false); + expect(parameters.properties.priority?.enum).toEqual(['low', 'medium', 'high']); + }); + + it('adds an item with optional priority and notes', () => { + const created = createPlan.handler({ title: 'P' }) as CreatePlanResult; + const result = addPlanItem.handler({ + planId: created.plan.id, + title: 'Call mom', + priority: 'high', + notes: 'afternoon', + }) as AddPlanItemResult; + + expect(result.item).toMatchObject({ + title: 'Call mom', + status: 'todo', + priority: 'high', + notes: 'afternoon', + }); + }); + + it('throws on an unknown plan', () => { + expect(() => addPlanItem.handler({ planId: 'nope', title: 'x' })).toThrow(/No plan with id/); + }); +}); diff --git a/packages/bundled-tools/tests/create-plan.test.ts b/packages/bundled-tools/tests/create-plan.test.ts new file mode 100644 index 0000000..b6ff9cb --- /dev/null +++ b/packages/bundled-tools/tests/create-plan.test.ts @@ -0,0 +1,51 @@ +// @vitest-environment jsdom +import { afterEach, describe, expect, it } from 'vitest'; + +import { createPlan } from '../src/tools/create-plan.js'; +import type { CreatePlanResult } from '../src/tools/create-plan.js'; + +describe('createPlan', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('has the correct FunctionDefinition shape', () => { + expect(createPlan.name).toBe('createPlan'); + expect(createPlan.riskTier).toBe('read'); + expect(typeof createPlan.handler).toBe('function'); + + const { parameters } = createPlan; + expect(parameters.type).toBe('object'); + expect(parameters.required).toEqual(['title']); + expect(parameters.additionalProperties).toBe(false); + + const title = parameters.properties.title; + expect(title?.type).toBe('string'); + expect(title?.minLength).toBe(1); + expect(title?.maxLength).toBe(200); + + const items = parameters.properties.items; + expect(items?.type).toBe('array'); + expect(items?.maxItems).toBe(50); + // The element object schema rejects extra properties. + expect(items?.items?.additionalProperties).toBe(false); + expect(items?.items?.required).toEqual(['title']); + }); + + it('creates a plan with seeded items', () => { + const result = createPlan.handler({ + title: 'Weekend trip', + items: [{ title: 'Book flight', priority: 'high' }, { title: 'Pack bags' }], + }) as CreatePlanResult; + + expect(result.plan.title).toBe('Weekend trip'); + expect(result.plan.id).toMatch(/^[0-9a-f-]{36}$/); + expect(result.plan.items).toHaveLength(2); + expect(result.plan.items[0]).toMatchObject({ title: 'Book flight', priority: 'high' }); + }); + + it('creates an empty plan when items are omitted', () => { + const result = createPlan.handler({ title: 'Empty plan' }) as CreatePlanResult; + expect(result.plan.items).toEqual([]); + }); +}); diff --git a/packages/bundled-tools/tests/delete-plan.test.ts b/packages/bundled-tools/tests/delete-plan.test.ts new file mode 100644 index 0000000..2b3a896 --- /dev/null +++ b/packages/bundled-tools/tests/delete-plan.test.ts @@ -0,0 +1,38 @@ +// @vitest-environment jsdom +import { afterEach, describe, expect, it } from 'vitest'; + +import { deletePlan } from '../src/tools/delete-plan.js'; +import type { DeletePlanResult } from '../src/tools/delete-plan.js'; +import { createPlan } from '../src/tools/create-plan.js'; +import type { CreatePlanResult } from '../src/tools/create-plan.js'; +import { listPlans } from '../src/tools/list-plans.js'; +import type { ListPlansResult } from '../src/tools/list-plans.js'; + +describe('deletePlan', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('has the correct FunctionDefinition shape', () => { + expect(deletePlan.name).toBe('deletePlan'); + expect(deletePlan.riskTier).toBe('read'); + expect(typeof deletePlan.handler).toBe('function'); + + const { parameters } = deletePlan; + expect(parameters.type).toBe('object'); + expect(parameters.required).toEqual(['planId']); + expect(parameters.additionalProperties).toBe(false); + }); + + it('deletes an existing plan and reports existence', () => { + const created = createPlan.handler({ title: 'P', items: [{ title: 'A' }] }) as CreatePlanResult; + const result = deletePlan.handler({ planId: created.plan.id }) as DeletePlanResult; + expect(result).toEqual({ removed: true, existed: true }); + expect((listPlans.handler({}) as ListPlansResult).plans).toHaveLength(0); + }); + + it('reports existed: false for an unknown plan without throwing', () => { + const result = deletePlan.handler({ planId: 'nope' }) as DeletePlanResult; + expect(result).toEqual({ removed: true, existed: false }); + }); +}); diff --git a/packages/bundled-tools/tests/get-plan.test.ts b/packages/bundled-tools/tests/get-plan.test.ts new file mode 100644 index 0000000..d01ae96 --- /dev/null +++ b/packages/bundled-tools/tests/get-plan.test.ts @@ -0,0 +1,42 @@ +// @vitest-environment jsdom +import { afterEach, describe, expect, it } from 'vitest'; + +import { getPlan } from '../src/tools/get-plan.js'; +import type { GetPlanResult } from '../src/tools/get-plan.js'; +import { createPlan } from '../src/tools/create-plan.js'; +import type { CreatePlanResult } from '../src/tools/create-plan.js'; + +describe('getPlan', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('has the correct FunctionDefinition shape', () => { + expect(getPlan.name).toBe('getPlan'); + expect(getPlan.riskTier).toBe('read'); + expect(typeof getPlan.handler).toBe('function'); + + const { parameters } = getPlan; + expect(parameters.type).toBe('object'); + expect(parameters.required).toEqual(['planId']); + expect(parameters.additionalProperties).toBe(false); + expect(parameters.properties.planId?.type).toBe('string'); + expect(parameters.properties.planId?.maxLength).toBe(100); + }); + + it('returns the full plan with items', () => { + const created = createPlan.handler({ + title: 'Launch', + items: [{ title: 'A' }, { title: 'B' }], + }) as CreatePlanResult; + + const result = getPlan.handler({ planId: created.plan.id }) as GetPlanResult; + expect(result.plan.id).toBe(created.plan.id); + expect(result.plan.title).toBe('Launch'); + expect(result.plan.items.map((i) => i.title)).toEqual(['A', 'B']); + }); + + it('throws on an unknown plan id', () => { + expect(() => getPlan.handler({ planId: 'nope' })).toThrow(/No plan with id "nope"/); + }); +}); diff --git a/packages/bundled-tools/tests/list-plans.test.ts b/packages/bundled-tools/tests/list-plans.test.ts new file mode 100644 index 0000000..dcfda1b --- /dev/null +++ b/packages/bundled-tools/tests/list-plans.test.ts @@ -0,0 +1,39 @@ +// @vitest-environment jsdom +import { afterEach, describe, expect, it } from 'vitest'; + +import { listPlans } from '../src/tools/list-plans.js'; +import type { ListPlansResult } from '../src/tools/list-plans.js'; +import { createPlan } from '../src/tools/create-plan.js'; + +describe('listPlans', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('has the correct FunctionDefinition shape', () => { + expect(listPlans.name).toBe('listPlans'); + expect(listPlans.riskTier).toBe('read'); + expect(typeof listPlans.handler).toBe('function'); + + const { parameters } = listPlans; + expect(parameters.type).toBe('object'); + expect(parameters.required).toEqual([]); + expect(parameters.additionalProperties).toBe(false); + }); + + it('returns an empty array when no plans exist', () => { + const result = listPlans.handler({}) as ListPlansResult; + expect(result.plans).toEqual([]); + }); + + it('lists stored plans with item and done counts', () => { + createPlan.handler({ title: 'A', items: [{ title: 'x' }, { title: 'y' }] }); + createPlan.handler({ title: 'B' }); + + const result = listPlans.handler({}) as ListPlansResult; + const titles = result.plans.map((p) => p.title).sort(); + expect(titles).toEqual(['A', 'B']); + const a = result.plans.find((p) => p.title === 'A'); + expect(a).toMatchObject({ itemCount: 2, doneCount: 0 }); + }); +}); diff --git a/packages/bundled-tools/tests/plan-store.test.ts b/packages/bundled-tools/tests/plan-store.test.ts new file mode 100644 index 0000000..627b214 --- /dev/null +++ b/packages/bundled-tools/tests/plan-store.test.ts @@ -0,0 +1,158 @@ +// @vitest-environment jsdom +import { afterEach, describe, expect, it } from 'vitest'; + +import { + PLAN_STORE_KEY, + addPlanItem, + createPlan, + deletePlan, + genId, + getPlan, + listPlans, + loadAll, + nowIso, + removePlanItem, + updatePlanItem, +} from '../src/plan-store.js'; + +describe('plan-store utilities', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('genId returns distinct UUID-shaped strings', () => { + const a = genId(); + const b = genId(); + expect(a).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/); + expect(b).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/); + expect(a).not.toBe(b); + }); + + it('nowIso returns a parseable ISO timestamp', () => { + expect(new Date(nowIso()).getTime()).not.toBeNaN(); + }); +}); + +describe('plan-store load/save', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('loadAll returns an empty record when the key is absent', () => { + expect(loadAll()).toEqual({}); + }); + + it('loadAll returns an empty record on corrupt JSON', () => { + localStorage.setItem(PLAN_STORE_KEY, 'not valid json {{{'); + expect(loadAll()).toEqual({}); + }); + + it('loadAll returns an empty record on a non-object blob', () => { + localStorage.setItem(PLAN_STORE_KEY, JSON.stringify([1, 2, 3])); + expect(loadAll()).toEqual({}); + }); +}); + +describe('plan-store domain operations', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('createPlan persists a plan with generated id and seeded items', () => { + const plan = createPlan('Weekend trip', [ + { title: 'Book flight', priority: 'high' }, + { title: 'Pack bags' }, + ]); + expect(plan.title).toBe('Weekend trip'); + expect(plan.id).toMatch(/^[0-9a-f-]{36}$/); + expect(plan.items).toHaveLength(2); + expect(plan.items[0]).toMatchObject({ title: 'Book flight', status: 'todo', priority: 'high' }); + expect(plan.items[1]).toMatchObject({ title: 'Pack bags', status: 'todo' }); + expect(plan.items[1]?.priority).toBeUndefined(); + + const reloaded = loadAll(); + expect(reloaded[plan.id]?.title).toBe('Weekend trip'); + }); + + it('createPlan throws on an empty title', () => { + expect(() => createPlan('')).toThrow(/title must not be empty/); + }); + + it('listPlans returns compact summaries with item and done counts', () => { + const plan = createPlan('Launch', [{ title: 'A' }, { title: 'B' }]); + addPlanItem(plan.id, { title: 'C' }); + updatePlanItem(plan.id, plan.items[0]!.id, { status: 'done' }); + + const summaries = listPlans(); + expect(summaries).toHaveLength(1); + expect(summaries[0]).toMatchObject({ + id: plan.id, + title: 'Launch', + itemCount: 3, + doneCount: 1, + }); + }); + + it('getPlan throws on an unknown plan id', () => { + expect(() => getPlan('nope')).toThrow(/No plan with id "nope"/); + }); + + it('addPlanItem appends an item with starting status todo and persists it', () => { + const plan = createPlan('P', [{ title: 'first' }]); + const item = addPlanItem(plan.id, { title: 'second', priority: 'medium', notes: 'n' }); + expect(item).toMatchObject({ title: 'second', status: 'todo', priority: 'medium', notes: 'n' }); + expect(item.id).toMatch(/^[0-9a-f-]{36}$/); + + const reloaded = getPlan(plan.id); + expect(reloaded.items.map((i) => i.title)).toEqual(['first', 'second']); + }); + + it('addPlanItem throws on an unknown plan', () => { + expect(() => addPlanItem('nope', { title: 'x' })).toThrow(/No plan with id/); + }); + + it('updatePlanItem applies provided fields only', () => { + const plan = createPlan('P', [{ title: 'A', priority: 'low' }]); + const itemId = plan.items[0]!.id; + const updated = updatePlanItem(plan.id, itemId, { status: 'done', priority: 'high' }); + expect(updated).toMatchObject({ title: 'A', status: 'done', priority: 'high' }); + + const again = updatePlanItem(plan.id, itemId, { notes: 'finished early' }); + // Untouched fields are preserved across separate updates. + expect(again).toMatchObject({ status: 'done', priority: 'high', notes: 'finished early' }); + }); + + it('updatePlanItem throws on an empty patch', () => { + const plan = createPlan('P', [{ title: 'A' }]); + expect(() => updatePlanItem(plan.id, plan.items[0]!.id, {})).toThrow( + /at least one field to update/, + ); + }); + + it('updatePlanItem throws on an unknown item', () => { + const plan = createPlan('P', [{ title: 'A' }]); + expect(() => updatePlanItem(plan.id, 'missing', { status: 'done' })).toThrow( + /No item with id "missing"/, + ); + }); + + it('removePlanItem removes the item and reports whether it existed', () => { + const plan = createPlan('P', [{ title: 'A' }, { title: 'B' }]); + const aId = plan.items[0]!.id; + expect(removePlanItem(plan.id, aId)).toEqual({ removed: true, existed: true }); + expect(getPlan(plan.id).items.map((i) => i.title)).toEqual(['B']); + // Removing again reports existed: false but does not throw. + expect(removePlanItem(plan.id, aId)).toEqual({ removed: true, existed: false }); + }); + + it('removePlanItem throws on an unknown plan', () => { + expect(() => removePlanItem('nope', 'whatever')).toThrow(/No plan with id/); + }); + + it('deletePlan removes the whole plan and reports whether it existed', () => { + const plan = createPlan('P', [{ title: 'A' }]); + expect(deletePlan(plan.id)).toEqual({ removed: true, existed: true }); + expect(listPlans()).toHaveLength(0); + expect(deletePlan(plan.id)).toEqual({ removed: true, existed: false }); + }); +}); diff --git a/packages/bundled-tools/tests/planning-tools.test.ts b/packages/bundled-tools/tests/planning-tools.test.ts new file mode 100644 index 0000000..758cbc4 --- /dev/null +++ b/packages/bundled-tools/tests/planning-tools.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, it } from 'vitest'; + +import { PLANNING_TOOLS } from '../src/index.js'; + +describe('PLANNING_TOOLS', () => { + it('is a named ToolSet', () => { + expect(PLANNING_TOOLS.name).toBe('planning'); + expect(typeof PLANNING_TOOLS.description).toBe('string'); + }); + + it('contains exactly the 7 plan tools in order, all read-tier', () => { + expect(PLANNING_TOOLS.tools.map((t) => t.name)).toEqual([ + 'listPlans', + 'getPlan', + 'createPlan', + 'addPlanItem', + 'updatePlanItem', + 'removePlanItem', + 'deletePlan', + ]); + expect(PLANNING_TOOLS.tools).toHaveLength(7); + expect(PLANNING_TOOLS.tools.every((t) => t.riskTier === 'read')).toBe(true); + }); +}); diff --git a/packages/bundled-tools/tests/remove-plan-item.test.ts b/packages/bundled-tools/tests/remove-plan-item.test.ts new file mode 100644 index 0000000..20862ce --- /dev/null +++ b/packages/bundled-tools/tests/remove-plan-item.test.ts @@ -0,0 +1,44 @@ +// @vitest-environment jsdom +import { afterEach, describe, expect, it } from 'vitest'; + +import { removePlanItem } from '../src/tools/remove-plan-item.js'; +import type { RemovePlanItemResult } from '../src/tools/remove-plan-item.js'; +import { createPlan } from '../src/tools/create-plan.js'; +import type { CreatePlanResult } from '../src/tools/create-plan.js'; + +describe('removePlanItem', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('has the correct FunctionDefinition shape', () => { + expect(removePlanItem.name).toBe('removePlanItem'); + expect(removePlanItem.riskTier).toBe('read'); + expect(typeof removePlanItem.handler).toBe('function'); + + const { parameters } = removePlanItem; + expect(parameters.type).toBe('object'); + expect(parameters.required).toEqual(['planId', 'itemId']); + expect(parameters.additionalProperties).toBe(false); + }); + + it('removes an item and reports existence', () => { + const created = createPlan.handler({ + title: 'P', + items: [{ title: 'A' }, { title: 'B' }], + }) as CreatePlanResult; + const aId = created.plan.items[0]!.id; + + const result = removePlanItem.handler({ + planId: created.plan.id, + itemId: aId, + }) as RemovePlanItemResult; + expect(result).toEqual({ removed: true, existed: true }); + }); + + it('throws on an unknown plan', () => { + expect(() => removePlanItem.handler({ planId: 'nope', itemId: 'x' })).toThrow( + /No plan with id/, + ); + }); +}); diff --git a/packages/bundled-tools/tests/update-plan-item.test.ts b/packages/bundled-tools/tests/update-plan-item.test.ts new file mode 100644 index 0000000..7445e7a --- /dev/null +++ b/packages/bundled-tools/tests/update-plan-item.test.ts @@ -0,0 +1,65 @@ +// @vitest-environment jsdom +import { afterEach, describe, expect, it } from 'vitest'; + +import { updatePlanItem } from '../src/tools/update-plan-item.js'; +import type { UpdatePlanItemResult } from '../src/tools/update-plan-item.js'; +import { createPlan } from '../src/tools/create-plan.js'; +import type { CreatePlanResult } from '../src/tools/create-plan.js'; + +describe('updatePlanItem', () => { + afterEach(() => { + localStorage.clear(); + }); + + it('has the correct FunctionDefinition shape', () => { + expect(updatePlanItem.name).toBe('updatePlanItem'); + expect(updatePlanItem.riskTier).toBe('read'); + expect(typeof updatePlanItem.handler).toBe('function'); + + const { parameters } = updatePlanItem; + expect(parameters.type).toBe('object'); + expect(parameters.required).toEqual(['planId', 'itemId']); + expect(parameters.additionalProperties).toBe(false); + expect(parameters.properties.status?.enum).toEqual(['todo', 'in_progress', 'done']); + }); + + it('updates status and preserves untouched fields', () => { + const created = createPlan.handler({ + title: 'P', + items: [{ title: 'A', priority: 'low' }], + }) as CreatePlanResult; + const itemId = created.plan.items[0]!.id; + + const result = updatePlanItem.handler({ + planId: created.plan.id, + itemId, + status: 'done', + }) as UpdatePlanItemResult; + + expect(result.item).toMatchObject({ title: 'A', status: 'done', priority: 'low' }); + }); + + it('clears notes when passed an empty string', () => { + const created = createPlan.handler({ + title: 'P', + items: [{ title: 'A' }], + }) as CreatePlanResult; + const itemId = created.plan.items[0]!.id; + + updatePlanItem.handler({ planId: created.plan.id, itemId, notes: 'temp' }); + const result = updatePlanItem.handler({ + planId: created.plan.id, + itemId, + notes: '', + }) as UpdatePlanItemResult; + + expect(result.item.notes).toBe(''); + }); + + it('throws on an empty patch', () => { + const created = createPlan.handler({ title: 'P', items: [{ title: 'A' }] }) as CreatePlanResult; + expect(() => + updatePlanItem.handler({ planId: created.plan.id, itemId: created.plan.items[0]!.id }), + ).toThrow(/at least one field to update/); + }); +}); diff --git a/packages/core/package.json b/packages/core/package.json index 0b59ae4..07971b5 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,6 +1,6 @@ { "name": "@forgewisp/core", - "version": "0.2.0", + "version": "0.3.0", "description": "Safe, function-calling AI agents for the browser", "license": "MIT", "type": "module", diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts index f0026ad..d122668 100644 --- a/packages/core/src/agent.ts +++ b/packages/core/src/agent.ts @@ -9,6 +9,7 @@ import { ChatMessage, FunctionDefinition, ForgewispConfig, + ToolSet, } from './types.js'; import type { LLMMessage } from './wire.js'; @@ -52,6 +53,15 @@ export class ForgewispAgent { this.registry.register(def as FunctionDefinition); } + /** + * Register every tool in a ToolSet in one call. Delegates to `registerFunction`, so the + * write/destructive tier invariant (onConfirmRequired must be configured) is enforced + * per-tool, in input order — the first write/destructive tool without a handler throws. + */ + registerToolSet(set: ToolSet): void { + for (const def of set.tools) this.registerFunction(def); + } + deregisterFunction(name: string): void { this.registry.deregister(name); } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 9123f91..619b858 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -10,11 +10,14 @@ export function createAgent(config: ForgewispConfig): ForgewispAgent { return new ForgewispAgent(config); } +export { defineToolSet } from './tool-set.js'; + export type { ForgewispConfig, AuditConfig, ChatMessage, FunctionDefinition, + ToolSet, RiskTier, JSONSchema, JSONSchemaProperty, diff --git a/packages/core/src/tool-set.ts b/packages/core/src/tool-set.ts new file mode 100644 index 0000000..8751fb8 --- /dev/null +++ b/packages/core/src/tool-set.ts @@ -0,0 +1,28 @@ +import type { FunctionDefinition, ToolSet } from './types.js'; + +/** + * Identity helper for authoring a `ToolSet` from a literal tool tuple, giving you + * `name`/`description` authoring DX and centralizing the covariant erasure. + * + * `TTools` is inferred as the readonly tuple of whatever tools you pass (each a + * `FunctionDefinition`), which is assignable to + * `readonly FunctionDefinition[]` without a cast — the handler is contravariant in + * `TArgs` and `never` is the bottom type, so every specific tool is assignable to + * `FunctionDefinition`. This is why a heterogeneous tool tuple needs no `as unknown as` + * workaround here, unlike a plain `readonly FunctionDefinition[]`. Erased at runtime. + * + * Compose sets from existing tools or other sets by spreading `.tools`: + * + * defineToolSet({ name: 'mixed', tools: [...PLANNING_TOOLS.tools, getCurrentTime] }) + */ +export function defineToolSet[]>(def: { + readonly name: string; + readonly description?: string; + readonly tools: TTools; +}): ToolSet { + return { + name: def.name, + ...(def.description !== undefined ? { description: def.description } : {}), + tools: def.tools, + }; +} diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 13ec5c8..e182008 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -15,9 +15,12 @@ export interface JSONSchemaProperty { maximum?: number; minLength?: number; maxLength?: number; + minItems?: number; + maxItems?: number; items?: JSONSchemaProperty; properties?: Record; required?: string[]; + additionalProperties?: boolean; } // ─── Risk Tiers ───────────────────────────────────────────────────────────── @@ -40,6 +43,27 @@ export interface FunctionDefinition> { handler: (args: TArgs) => unknown; } +// ─── Tool Sets ─────────────────────────────────────────────────────────────── + +/** + * A named, ordered group of tools registered together in a single call + * (`agent.registerToolSet(set)`). `tools` is typed with the covariant read type + * `FunctionDefinition` so a heterogeneous array of `FunctionDefinition` + * tools is accepted without a per-call cast: the handler is contravariant in `TArgs`, and + * `never` is the bottom type, so every specific tool is assignable to `FunctionDefinition`. + * Registration erases to `FunctionDefinition` internally, exactly as `registerFunction` + * already does — runtime behavior is unchanged. + * + * Build one with the `defineToolSet` helper, or compose new sets from existing tools and other + * sets by spreading their `.tools`: + * defineToolSet({ name: 'mixed', tools: [...SOME_SET.tools, anotherTool] }) + */ +export interface ToolSet { + readonly name: string; + readonly description?: string; + readonly tools: readonly FunctionDefinition[]; +} + // ─── Pending Call ──────────────────────────────────────────────────────────── export interface PendingCall { diff --git a/packages/core/tests/tool-set.test.ts b/packages/core/tests/tool-set.test.ts new file mode 100644 index 0000000..3bf1f84 --- /dev/null +++ b/packages/core/tests/tool-set.test.ts @@ -0,0 +1,133 @@ +import { describe, it, expect, vi, afterEach } from 'vitest'; +import { createAgent, defineToolSet } from '../src/index.js'; +import type { FunctionDefinition, ForgewispConfig, ToolSet } from '../src/types.js'; +import type { LLMMessage } from '../src/wire.js'; + +const baseAgentConfig: ForgewispConfig = { + llmEndpoint: 'https://example.com/v1/chat/completions', + model: 'gpt-4o', + requestTimeoutMs: 0, +}; + +function nonStreamResponse(message: LLMMessage): Response { + return new Response(JSON.stringify({ choices: [{ message }] }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); +} + +function finalMessage(text: string): LLMMessage { + return { role: 'assistant', content: text }; +} + +function readTool(name: string, result: unknown): FunctionDefinition { + return { + name, + description: name, + riskTier: 'read', + parameters: { type: 'object', properties: {}, required: [] }, + handler: () => result, + }; +} + +describe('defineToolSet', () => { + it('round-trips name, description, and tools', () => { + const a = readTool('a', 1); + const b = readTool('b', 2); + const set = defineToolSet({ name: 'demo', description: 'd', tools: [a, b] }); + expect(set.name).toBe('demo'); + expect(set.description).toBe('d'); + expect(set.tools).toEqual([a, b]); + }); + + it('makes description optional', () => { + const set = defineToolSet({ name: 'bare', tools: [readTool('a', 1)] }); + expect(set.description).toBeUndefined(); + }); + + it('accepts a heterogeneous tool tuple without a cast', () => { + // A ToolSet built from tools with differing TArgs shapes — this compiles with no + // `as unknown as` thanks to the `FunctionDefinition` covariant read type. + const stringTool: FunctionDefinition<{ prefix?: string }> = { + name: 'stringTool', + description: 'string arg', + riskTier: 'read', + parameters: { type: 'object', properties: {}, required: [] }, + handler: (args) => args.prefix ?? '', + }; + const numberTool: FunctionDefinition<{ count: number }> = { + name: 'numberTool', + description: 'number arg', + riskTier: 'read', + parameters: { type: 'object', properties: {}, required: [] }, + handler: (args) => args.count, + }; + const set: ToolSet = defineToolSet({ name: 'mixed', tools: [stringTool, numberTool] }); + expect(set.tools.map((t) => t.name)).toEqual(['stringTool', 'numberTool']); + }); + + it('composes from another set by spreading .tools', () => { + const inner = defineToolSet({ name: 'inner', tools: [readTool('a', 1), readTool('b', 2)] }); + const outer = defineToolSet({ name: 'outer', tools: [...inner.tools, readTool('c', 3)] }); + expect(outer.tools.map((t) => t.name)).toEqual(['a', 'b', 'c']); + }); +}); + +describe('ForgewispAgent.registerToolSet', () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it('registers every tool in the set so they are exposed to the LLM', async () => { + const fetchMock = vi + .fn() + .mockImplementation(() => Promise.resolve(nonStreamResponse(finalMessage('done')))); + vi.stubGlobal('fetch', fetchMock); + + const agent = createAgent(baseAgentConfig); + agent.registerToolSet( + defineToolSet({ name: 'pair', tools: [readTool('alpha', 1), readTool('beta', 2)] }), + ); + + await agent.run('hi'); + + const calls = fetchMock.mock.calls as Array<[string, RequestInit]>; + const body = JSON.parse(calls[0]![1].body as string) as Record; + const tools = body['tools'] as Array<{ function: { name: string } }>; + expect(tools.map((t) => t.function.name)).toEqual(['alpha', 'beta']); + expect(body['tool_choice']).toBe('auto'); + }); + + it('throws at registration time when a set contains a write tool without onConfirmRequired', () => { + const agent = createAgent(baseAgentConfig); + const writeTool: FunctionDefinition = { + name: 'w', + description: 'W', + riskTier: 'write', + parameters: { type: 'object', properties: {}, required: [] }, + handler: () => 1, + }; + expect(() => + agent.registerToolSet( + defineToolSet({ name: 'has-write', tools: [readTool('r', 1), writeTool] }), + ), + ).toThrow('onConfirmRequired'); + }); + + it('registers write tools when onConfirmRequired is configured', () => { + const agent = createAgent({ + ...baseAgentConfig, + onConfirmRequired: () => Promise.resolve(true), + }); + const writeTool: FunctionDefinition = { + name: 'w', + description: 'W', + riskTier: 'write', + parameters: { type: 'object', properties: {}, required: [] }, + handler: () => 1, + }; + expect(() => + agent.registerToolSet(defineToolSet({ name: 'has-write', tools: [writeTool] })), + ).not.toThrow(); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f17c5bc..d322b34 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -95,6 +95,37 @@ importers: specifier: ^1.6.0 version: 1.6.1(jsdom@25.0.1) + apps/planning-demo: + dependencies: + '@forgewisp/bundled-tools': + specifier: workspace:* + version: link:../../packages/bundled-tools + '@forgewisp/core': + specifier: workspace:* + version: link:../../packages/core + dompurify: + specifier: ^3.2.0 + version: 3.4.11 + marked: + specifier: ^18.0.5 + version: 18.0.5 + devDependencies: + '@testing-library/dom': + specifier: ^10.0.0 + version: 10.4.1 + jsdom: + specifier: ^25.0.0 + version: 25.0.1 + typescript: + specifier: ^5.4.0 + version: 5.9.3 + vite: + specifier: ^5.4.0 + version: 5.4.21 + vitest: + specifier: ^1.6.0 + version: 1.6.1(jsdom@25.0.1) + packages/bundled-tools: dependencies: '@forgewisp/core':