From 64f7779b554915913013d2673cb72a9fc79f7959 Mon Sep 17 00:00:00 2001 From: ThoughtProof Date: Sat, 30 May 2026 10:48:53 +0200 Subject: [PATCH] feat: add ThoughtProof verification plugin (Sentinel + RV) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds epistemic verification for AI agents as a new plugin: Actions: - thoughtproof.sentinel — Pre-execution triage (ALLOW/BLOCK/UNCERTAIN) - thoughtproof.verify — Adversarial reality verification (standard/deep tiers) - thoughtproof.attest — On-chain attestation (EAS for Sentinel, TP-VC for RV) - thoughtproof.status — Health check for both backends Features: - x402 pay-per-call support (zero-config payments via HTTP 402) - maxPaymentAmount safety cap against compromised servers - Internal ERC-8004 reputation hook (not agent-callable) - Configurable registry addresses for reputation feedback Also available as standalone npm package: @thoughtproof/goat-plugin 31 tests, 0 TS errors. --- examples/thoughtproof-verification/index.ts | 256 +++++++ plugins/index.ts | 1 + plugins/thoughtproof/README.md | 205 ++++++ plugins/thoughtproof/actions/attest.ts | 40 ++ plugins/thoughtproof/actions/sentinel.ts | 37 ++ plugins/thoughtproof/actions/status.ts | 24 + plugins/thoughtproof/actions/verify.ts | 49 ++ .../adapters/http-thoughtproof.ts | 320 +++++++++ plugins/thoughtproof/adapters/types.ts | 158 +++++ plugins/thoughtproof/hooks/reputation.ts | 94 +++ plugins/thoughtproof/index.ts | 28 + tests/unit/thoughtproof.test.ts | 628 ++++++++++++++++++ 12 files changed, 1840 insertions(+) create mode 100644 examples/thoughtproof-verification/index.ts create mode 100644 plugins/thoughtproof/README.md create mode 100644 plugins/thoughtproof/actions/attest.ts create mode 100644 plugins/thoughtproof/actions/sentinel.ts create mode 100644 plugins/thoughtproof/actions/status.ts create mode 100644 plugins/thoughtproof/actions/verify.ts create mode 100644 plugins/thoughtproof/adapters/http-thoughtproof.ts create mode 100644 plugins/thoughtproof/adapters/types.ts create mode 100644 plugins/thoughtproof/hooks/reputation.ts create mode 100644 plugins/thoughtproof/index.ts create mode 100644 tests/unit/thoughtproof.test.ts diff --git a/examples/thoughtproof-verification/index.ts b/examples/thoughtproof-verification/index.ts new file mode 100644 index 0000000..af1dba4 --- /dev/null +++ b/examples/thoughtproof-verification/index.ts @@ -0,0 +1,256 @@ +/** + * ThoughtProof Verification Agent — Demo for GOAT AgentKit + * + * Demonstrates the full Sentinel → RV escalation flow: + * + * Agent decides → Sentinel (fast triage, every decision) + * ├── ALLOW → Execute action + * ├── BLOCK → Stop, log reason + * └── UNCERTAIN → Escalate to RV (deep verification) + * ├── ALLOW → Execute + * └── BLOCK → Stop + * + * After verification, optionally attest the result on-chain. + * + * Run: npx tsx examples/thoughtproof-verification/index.ts + * Env: THOUGHTPROOF_API_KEY (optional — public endpoints work without key) + */ + +// NOTE: These imports use monorepo-relative paths. If you copy this example +// into your own project, replace with package imports: +// import { ActionProvider } from '@goatnetwork/agentkit/providers'; +// import { PolicyEngine } from '@goatnetwork/agentkit/core'; +// import { ... } from '@goatnetwork/agentkit/plugins'; +import { ActionProvider } from '../../providers/action-provider'; +import { PolicyEngine } from '../../core/policy/policy-engine'; +import { ExecutionRuntime } from '../../core/runtime/execution-runtime'; +import { + thoughtproofSentinelAction, + thoughtproofVerifyAction, + thoughtproofAttestAction, + thoughtproofStatusAction, + HttpThoughtProofAdapter, +} from '../../plugins/thoughtproof/index'; +import type { + SentinelVerifyOutput, + RVVerifyOutput, + StatusOutput, +} from '../../plugins/thoughtproof/adapters/types'; + +// ── Configuration ─────────────────────────────────────────── + +const adapter = new HttpThoughtProofAdapter({ + apiKey: process.env.THOUGHTPROOF_API_KEY, +}); + +const provider = new ActionProvider(); +provider.register(thoughtproofSentinelAction(adapter)); +provider.register(thoughtproofVerifyAction(adapter)); +provider.register(thoughtproofAttestAction(adapter)); +provider.register(thoughtproofStatusAction(adapter)); + +const policy = new PolicyEngine({ + allowedNetworks: ['goat-mainnet', 'goat-testnet'], + maxRiskWithoutConfirm: 'medium', + writeEnabled: true, +}); + +const runtime = new ExecutionRuntime(policy, { + maxRetries: 1, + retryDelayMs: 500, +}); + +// ── Helpers ───────────────────────────────────────────────── + +function makeContext(traceId: string) { + return { + traceId, + network: 'goat-mainnet', + now: Date.now(), + caller: 'thoughtproof-demo-agent', + }; +} + +function logSection(title: string) { + console.log(`\n${'═'.repeat(60)}`); + console.log(` ${title}`); + console.log('═'.repeat(60)); +} + +function logVerdict(source: string, verdict: string, confidence: number, latencyMs: number) { + const icon = verdict === 'ALLOW' ? '✅' : verdict === 'BLOCK' ? '🛑' : '⚠️'; + console.log(` ${icon} [${source}] ${verdict} (confidence: ${confidence}, ${latencyMs}ms)`); +} + +// ── Core: Sentinel → RV Escalation ───────────────────────── + +interface VerificationResult { + finalVerdict: 'ALLOW' | 'BLOCK'; + source: 'sentinel' | 'rv'; + requestId: string; + details: SentinelVerifyOutput | RVVerifyOutput; +} + +async function verifyAgentDecision( + claim: string, + context: string, + traceId: string, +): Promise { + const ctx = makeContext(traceId); + + // Step 1: Sentinel pre-check (~$0.003, ~2s) + console.log('\n → Running Sentinel pre-check...'); + const sentinelResult = await runtime.run( + provider.get('thoughtproof.sentinel'), + ctx, + { claim, context }, + ); + + if (!sentinelResult.ok || !sentinelResult.output) { + console.log(` ❌ Sentinel error: ${sentinelResult.error}`); + return { finalVerdict: 'BLOCK', source: 'sentinel', requestId: '', details: {} as SentinelVerifyOutput }; + } + + const sentinel = sentinelResult.output as SentinelVerifyOutput; + logVerdict('Sentinel', sentinel.verdict, sentinel.confidence, sentinel.latencyMs); + + if (sentinel.reasons.length) { + sentinel.reasons.forEach((r) => console.log(` └─ ${r}`)); + } + + if (sentinel.verdict === 'ALLOW') { + return { finalVerdict: 'ALLOW', source: 'sentinel', requestId: sentinel.requestId, details: sentinel }; + } + + if (sentinel.verdict === 'BLOCK') { + return { finalVerdict: 'BLOCK', source: 'sentinel', requestId: sentinel.requestId, details: sentinel }; + } + + // Step 2: UNCERTAIN → escalate to RV (~$0.02, ~5-15s) + console.log('\n → Sentinel UNCERTAIN — escalating to RV deep verification...'); + const rvResult = await runtime.run( + provider.get('thoughtproof.verify'), + ctx, + { claim, context, tier: 'standard' as const }, + ); + + if (!rvResult.ok || !rvResult.output) { + console.log(` ❌ RV error: ${rvResult.error}`); + return { finalVerdict: 'BLOCK', source: 'rv', requestId: '', details: {} as RVVerifyOutput }; + } + + const rv = rvResult.output as RVVerifyOutput; + logVerdict('RV', rv.verdict, rv.confidence, rv.latencyMs); + + if (rv.synthesis) { + console.log(` └─ Synthesis: ${rv.synthesis.slice(0, 200)}${rv.synthesis.length > 200 ? '...' : ''}`); + } + + return { + finalVerdict: rv.verdict === 'ALLOW' ? 'ALLOW' : 'BLOCK', + source: 'rv', + requestId: rv.requestId, + details: rv, + }; +} + +// ── Demo Scenarios ────────────────────────────────────────── + +interface Scenario { + name: string; + claim: string; + context: string; + expectedBehavior: string; +} + +const SCENARIOS: Scenario[] = [ + { + name: 'Safe trade — clear signal', + claim: 'Based on the 4h MACD crossover confirmed by volume increase of 340%, and price breaking above the 200-day moving average, execute a limit buy of 0.3 ETH at $3,450. This is within our 2% position size limit.', + context: 'Conservative trading agent. Max position: 1 ETH. Max drawdown: 5%. Only trades on confirmed multi-indicator signals.', + expectedBehavior: 'Should ALLOW — clear reasoning, within risk parameters, multiple confirming indicators.', + }, + { + name: 'Risky trade — reasoning gaps', + claim: 'ETH will pump because of the upcoming merge. All in. Buy 50 ETH at market price immediately.', + context: 'Conservative trading agent. Max position: 1 ETH. Max drawdown: 5%. Only trades on confirmed multi-indicator signals.', + expectedBehavior: 'Should BLOCK — violates position limits, no technical analysis, uses "will pump" (speculative), "all in" (emotional).', + }, + { + name: 'Ambiguous — needs deeper analysis', + claim: 'Correlation between BTC dominance declining and ETH outperformance suggests a rotation. Place a 0.5 ETH buy order 2% below current price as a swing trade.', + context: 'Moderate risk trading agent. Position limits respected. However the correlation analysis is based on a 30-day window which may be insufficient.', + expectedBehavior: 'May trigger UNCERTAIN → RV escalation — claim is plausible but methodology could be questioned.', + }, +]; + +// ── Main ──────────────────────────────────────────────────── + +async function main() { + console.log('🐐 ThoughtProof × GOAT AgentKit — Verification Demo'); + console.log(' Sentinel (triage) → RV (deep verification) escalation flow\n'); + + // Step 0: Health check + logSection('Health Check'); + const statusResult = await runtime.run( + provider.get('thoughtproof.status'), + makeContext('health-check'), + {}, + ); + + if (statusResult.ok && statusResult.output) { + const status = statusResult.output as StatusOutput; + console.log(` Sentinel: ${status.sentinel.healthy ? '✅ healthy' : '❌ down'} (${status.sentinel.latencyMs}ms)`); + console.log(` RV: ${status.rv.healthy ? '✅ healthy' : '❌ down'} (${status.rv.latencyMs}ms)`); + + if (!status.sentinel.healthy && !status.rv.healthy) { + console.log('\n ⚠️ Both APIs unreachable. Running in demo mode (expect errors).'); + } + } else { + console.log(` ⚠️ Status check failed: ${statusResult.error}`); + console.log(' Running scenarios anyway...'); + } + + // Run scenarios + for (let i = 0; i < SCENARIOS.length; i++) { + const scenario = SCENARIOS[i]; + logSection(`Scenario ${i + 1}: ${scenario.name}`); + console.log(` Expected: ${scenario.expectedBehavior}`); + console.log(` Claim: "${scenario.claim.slice(0, 100)}..."`); + + try { + const result = await verifyAgentDecision( + scenario.claim, + scenario.context, + `demo-scenario-${i + 1}`, + ); + + console.log(`\n 📋 Final Decision:`); + console.log(` Verdict: ${result.finalVerdict}`); + console.log(` Decided by: ${result.source}`); + console.log(` Request ID: ${result.requestId}`); + + if (result.finalVerdict === 'ALLOW') { + console.log(' → Agent would EXECUTE the trade'); + } else { + console.log(' → Agent would SKIP the trade'); + } + } catch (err) { + console.log(` ❌ Scenario failed: ${err instanceof Error ? err.message : String(err)}`); + } + } + + // Summary + logSection('Demo Complete'); + console.log(' The ThoughtProof plugin provides:'); + console.log(' • Sentinel: Fast pre-execution triage (~$0.003, ~2s)'); + console.log(' • RV: Adversarial deep verification (~$0.02-0.08, 5-45s)'); + console.log(' • Attest: On-chain attestation (EAS / TP-VC)'); + console.log(' • Automatic ERC-8004 reputation feedback (internal hook)'); + console.log(''); + console.log(' Sentinel catches obvious issues fast and cheap.'); + console.log(' RV handles the edge cases with multi-model adversarial analysis.'); + console.log(' Together: cost-efficient verification for every agent decision.\n'); +} + +main().catch(console.error); diff --git a/plugins/index.ts b/plugins/index.ts index 205d0b9..1c77c05 100644 --- a/plugins/index.ts +++ b/plugins/index.ts @@ -27,3 +27,4 @@ export * from './erc8004/index'; export * from './x402-merchant/index'; export * from './giftcard/index'; export * from './gns/index'; +export * from './thoughtproof/index'; diff --git a/plugins/thoughtproof/README.md b/plugins/thoughtproof/README.md new file mode 100644 index 0000000..4c6ab41 --- /dev/null +++ b/plugins/thoughtproof/README.md @@ -0,0 +1,205 @@ +# ThoughtProof Plugin + +Epistemic verification for AI agents — catch bad reasoning before it costs money. + +## Why + +Autonomous agents hallucinate, drift from instructions, and make unsupported logical leaps. When agents control wallets, one bad output costs more than thousands of verification checks. ThoughtProof adds a verification layer between "agent decided" and "transaction sent." + +**Two products, one flow:** +- **Sentinel** — fast pre-execution triage (~$0.003, ~2s). Call on every decision. +- **RV** — adversarial deep verification (~$0.02–0.08, 5–45s). Call when Sentinel is uncertain or stakes are high. + +## Actions + +| Action | Purpose | Risk | Cost | Latency | +|--------|---------|------|------|---------| +| `thoughtproof.sentinel` | Pre-execution check (ALLOW / BLOCK / UNCERTAIN) | read | ~$0.003 | ~2s | +| `thoughtproof.verify` | Adversarial reality verification (evaluate → critique → synthesize) | read | $0.02–0.08 | 5–45s | +| `thoughtproof.attest` | On-chain attestation (EAS for Sentinel, TP-VC for RV) | medium | gas | ~10s | +| `thoughtproof.status` | Health check both APIs | read | free | <1s | + +## Agent Decision Flow + +``` +Agent decides action + │ + ├─► thoughtproof.sentinel (every decision, ~$0.003) + │ ├── ALLOW → Execute + │ ├── BLOCK → Stop + │ └── UNCERTAIN → Escalate ─┐ + │ │ + └───────────────────────────────►│ + │ + thoughtproof.verify (tier: standard|deep) + ├── ALLOW → Execute + └── BLOCK → Stop +``` + +## Setup + +Two auth modes — choose one: + +### Option A: API Key (simple) + +```typescript +import { HttpThoughtProofAdapter } from '@goatnetwork/agentkit/plugins'; + +const adapter = new HttpThoughtProofAdapter({ + apiKey: process.env.THOUGHTPROOF_API_KEY, +}); +``` + +### Option B: x402 Pay-per-call (no API key, agent wallet pays) + +```typescript +import { privateKeyToAccount } from 'viem/accounts'; +import { HttpThoughtProofAdapter } from '@goatnetwork/agentkit/plugins'; + +const adapter = new HttpThoughtProofAdapter({ + x402Signer: privateKeyToAccount(process.env.WALLET_PRIVATE_KEY as `0x${string}`), +}); +``` + +The server dictates the price via the `PAYMENT-REQUIRED` header. The adapter signs and pays automatically. No hardcoded prices, no subscription — pure pay-per-use. + +### Option C: Pre-configured x402 fetch (advanced) + +If you've already wired up `@x402/fetch`: + +```typescript +import { wrapFetchWithPayment } from '@x402/fetch'; +import { x402Client } from '@x402/core/client'; +import { ExactEvmScheme } from '@x402/evm/exact/client'; + +const client = new x402Client(); +client.register('eip155:*', new ExactEvmScheme(signer)); + +const adapter = new HttpThoughtProofAdapter({ + x402Fetch: wrapFetchWithPayment(fetch, client), +}); +``` + +### Register Actions + +```typescript +import { ActionProvider } from '@goatnetwork/agentkit/providers'; +import { + thoughtproofSentinelAction, + thoughtproofVerifyAction, + thoughtproofAttestAction, + thoughtproofStatusAction, +} from '@goatnetwork/agentkit/plugins'; + +const provider = new ActionProvider(); +provider.register(thoughtproofSentinelAction(adapter)); +provider.register(thoughtproofVerifyAction(adapter)); +provider.register(thoughtproofAttestAction(adapter)); +provider.register(thoughtproofStatusAction(adapter)); +``` + +## Demo + +Run the included demo agent that tests three scenarios (safe trade, risky trade, ambiguous signal): + +```bash +npx tsx examples/thoughtproof-verification/index.ts +``` + +Expected output: + +``` +🐐 ThoughtProof × GOAT AgentKit — Verification Demo + Sentinel (triage) → RV (deep verification) escalation flow + +═══════════════════════════════════════════════════════════════ + Health Check +═══════════════════════════════════════════════════════════════ + Sentinel: ✅ healthy (142ms) + RV: ✅ healthy (305ms) + +═══════════════════════════════════════════════════════════════ + Scenario 1: Safe trade — clear signal +═══════════════════════════════════════════════════════════════ + Expected: Should ALLOW — clear reasoning, within risk parameters + + → Running Sentinel pre-check... + ✅ [Sentinel] ALLOW (confidence: 0.94, 1847ms) + └─ Reasoning is sound and within stated parameters + + 📋 Final Decision: + Verdict: ALLOW + Decided by: sentinel + → Agent would EXECUTE the trade + +═══════════════════════════════════════════════════════════════ + Scenario 2: Risky trade — reasoning gaps +═══════════════════════════════════════════════════════════════ + Expected: Should BLOCK — violates position limits, speculative + + → Running Sentinel pre-check... + 🛑 [Sentinel] BLOCK (confidence: 0.97, 1203ms) + └─ Violates position size limit (50 ETH > 1 ETH max) + └─ No technical analysis provided + + 📋 Final Decision: + Verdict: BLOCK + Decided by: sentinel + → Agent would SKIP the trade + +═══════════════════════════════════════════════════════════════ + Scenario 3: Ambiguous — needs deeper analysis +═══════════════════════════════════════════════════════════════ + Expected: May trigger UNCERTAIN → RV escalation + + → Running Sentinel pre-check... + ⚠️ [Sentinel] UNCERTAIN (confidence: 0.52, 1654ms) + └─ Correlation analysis methodology unclear + + → Sentinel UNCERTAIN — escalating to RV deep verification... + ✅ [RV] ALLOW (confidence: 0.71, 9234ms) + └─ Synthesis: Correlation is plausible but weak. Position size conservative... + + 📋 Final Decision: + Verdict: ALLOW + Decided by: rv + → Agent would EXECUTE the trade +``` + +## ERC-8004 Reputation + +Verification results are submitted as ERC-8004 reputation feedback **automatically** via an internal hook — not as an agent-callable action. This prevents agents from self-scoring. + +Score mapping: ALLOW → 100, UNCERTAIN → 50, BLOCK → 0. + +## Tests + +```bash +npx vitest run tests/unit/thoughtproof.test.ts +``` + +30+ tests covering all 4 actions, x402 payment flow, and reputation hook (metadata, execute, signal propagation, error handling, verdict mapping). + +## Architecture + +``` +plugins/thoughtproof/ +├── actions/ +│ ├── sentinel.ts # Pre-execution triage +│ ├── verify.ts # Adversarial RV verification (tier: standard|deep) +│ ├── attest.ts # On-chain attestation (source: sentinel|rv) +│ └── status.ts # Health check +├── adapters/ +│ ├── types.ts # Shared types + adapter interface +│ └── http-thoughtproof.ts # HTTP client for Sentinel + RV APIs +├── hooks/ +│ └── reputation.ts # Internal ERC-8004 reputation hook +└── index.ts # Plugin exports +``` + +## Links + +- [ThoughtProof](https://thoughtproof.ai) +- [Sentinel API](https://sentinel.thoughtproof.ai) +- [ERC-8004](https://eips.ethereum.org/EIPS/eip-8004) +- [x402](https://docs.x402.org) diff --git a/plugins/thoughtproof/actions/attest.ts b/plugins/thoughtproof/actions/attest.ts new file mode 100644 index 0000000..5ec89fa --- /dev/null +++ b/plugins/thoughtproof/actions/attest.ts @@ -0,0 +1,40 @@ +import { z } from 'zod'; +import type { ActionDefinition } from '../../../core/schema/action'; +import type { ThoughtProofAdapter, AttestInput, AttestOutput } from '../adapters/types'; + +const inputSchema = z.object({ + source: z.enum(['sentinel', 'rv']).describe( + 'Which verification result to attest: "sentinel" → EAS on-chain attestation, "rv" → TP-VC attestation', + ), + requestId: z.string().min(1, 'requestId from the verification response is required'), + recipient: z.string().regex(/^0x[0-9a-fA-F]{40}$/, 'recipient must be a valid EVM address'), +}); + +/** + * On-chain attestation of a ThoughtProof verification result. + * + * Creates a permanent, verifiable record of the verification: + * - source="sentinel" → EAS attestation on Base mainnet + * - source="rv" → ThoughtProof Verifiable Credential (TP-VC) + * + * Requires the requestId from a prior sentinel or verify call. + */ +export function thoughtproofAttestAction( + adapter: ThoughtProofAdapter, +): ActionDefinition { + return { + name: 'thoughtproof.attest', + description: + 'Create an on-chain attestation of a verification result. ' + + 'Pass source="sentinel" for EAS attestation or source="rv" for TP-VC. ' + + 'Requires the requestId from a prior thoughtproof.sentinel or thoughtproof.verify call. ' + + 'The recipient address receives the attestation.', + riskLevel: 'medium', + requiresConfirmation: true, + networks: ['goat-mainnet', 'goat-testnet'], + zodInputSchema: inputSchema, + async execute(ctx, input) { + return adapter.attest(input, ctx.signal); + }, + }; +} diff --git a/plugins/thoughtproof/actions/sentinel.ts b/plugins/thoughtproof/actions/sentinel.ts new file mode 100644 index 0000000..0c7ab02 --- /dev/null +++ b/plugins/thoughtproof/actions/sentinel.ts @@ -0,0 +1,37 @@ +import { z } from 'zod'; +import type { ActionDefinition } from '../../../core/schema/action'; +import type { ThoughtProofAdapter, SentinelVerifyInput, SentinelVerifyOutput } from '../adapters/types'; + +const inputSchema = z.object({ + claim: z.string().min(1, 'claim must not be empty — the agent reasoning or planned action to verify'), + context: z.string().optional().describe('Context or instructions the agent was given'), + task: z.string().optional().describe('The agent task or goal'), +}); + +/** + * Pre-execution verification via ThoughtProof Sentinel. + * + * Checks agent reasoning and planned actions BEFORE execution. + * Returns ALLOW (safe to proceed), BLOCK (stop), or UNCERTAIN (escalate to RV). + * + * Cost: ~$0.003 per call. Designed for every agent decision cycle. + */ +export function thoughtproofSentinelAction( + adapter: ThoughtProofAdapter, +): ActionDefinition { + return { + name: 'thoughtproof.sentinel', + description: + 'Pre-execution verification: check agent reasoning before executing an action. ' + + 'Send the agent\'s planned action and reasoning as "claim", get back ALLOW, BLOCK, or UNCERTAIN. ' + + 'Use before every economic decision. Fast (~2s) and cheap (~$0.003). ' + + 'If UNCERTAIN, escalate to thoughtproof.verify for deeper analysis.', + riskLevel: 'read', + requiresConfirmation: false, + networks: ['goat-mainnet', 'goat-testnet'], + zodInputSchema: inputSchema, + async execute(ctx, input) { + return adapter.sentinelVerify(input, ctx.signal); + }, + }; +} diff --git a/plugins/thoughtproof/actions/status.ts b/plugins/thoughtproof/actions/status.ts new file mode 100644 index 0000000..db4f254 --- /dev/null +++ b/plugins/thoughtproof/actions/status.ts @@ -0,0 +1,24 @@ +import type { ActionDefinition } from '../../../core/schema/action'; +import type { ThoughtProofAdapter, StatusOutput } from '../adapters/types'; + +/** + * Health check for both ThoughtProof APIs (Sentinel + RV). + * No input required. Returns health status and latency for each backend. + */ +export function thoughtproofStatusAction( + adapter: ThoughtProofAdapter, +): ActionDefinition, StatusOutput> { + return { + name: 'thoughtproof.status', + description: + 'Check health of ThoughtProof verification APIs. ' + + 'Returns availability and latency for both Sentinel and RV backends. ' + + 'Call before critical verification flows to ensure services are reachable.', + riskLevel: 'read', + requiresConfirmation: false, + networks: ['goat-mainnet', 'goat-testnet'], + async execute(ctx) { + return adapter.status(ctx.signal); + }, + }; +} diff --git a/plugins/thoughtproof/actions/verify.ts b/plugins/thoughtproof/actions/verify.ts new file mode 100644 index 0000000..eca075f --- /dev/null +++ b/plugins/thoughtproof/actions/verify.ts @@ -0,0 +1,49 @@ +import { z } from 'zod'; +import type { ActionDefinition } from '../../../core/schema/action'; +import type { ThoughtProofAdapter, RVVerifyInput, RVVerifyOutput } from '../adapters/types'; + +const inputSchema = z.object({ + claim: z.string().min(1, 'claim must not be empty — the agent output or decision to verify'), + context: z.string().optional().describe('Supporting context, evidence, or source material'), + tier: z.enum(['standard', 'deep']).default('standard').describe( + 'Verification depth: "standard" (~$0.02, 5-15s) for most decisions, "deep" (~$0.08, 15-45s) for high-stakes', + ), + domain: z.string().optional().describe( + 'Domain hint for domain-specific verification profiles (e.g., "finance", "medical", "legal")', + ), +}); + +/** + * Adversarial reality verification via ThoughtProof RV. + * + * Three-stage pipeline: evaluate → critique (red-team) → synthesize. + * Checks whether an agent's output is substantively correct — not just + * process compliance, but actual factual/logical correctness. + * + * Use when: + * - Sentinel returns UNCERTAIN and you need deeper analysis + * - High-stakes decisions where correctness matters (trading, compliance) + * - Post-execution audit of agent reasoning quality + * + * Cost: ~$0.02 (standard) or ~$0.08 (deep) per call. + */ +export function thoughtproofVerifyAction( + adapter: ThoughtProofAdapter, +): ActionDefinition { + return { + name: 'thoughtproof.verify', + description: + 'Adversarial reality verification: check whether an agent output is substantively correct. ' + + 'Uses a multi-model evaluate → red-team critique → synthesize pipeline. ' + + 'Set tier="standard" for most checks (~$0.02, 5-15s), tier="deep" for high-stakes (~$0.08, 15-45s). ' + + 'Use after Sentinel returns UNCERTAIN, or directly for critical decisions. ' + + 'Returns verdict (ALLOW/BLOCK/UNCERTAIN), confidence score, and full reasoning chain.', + riskLevel: 'read', + requiresConfirmation: false, + networks: ['goat-mainnet', 'goat-testnet'], + zodInputSchema: inputSchema, + async execute(ctx, input) { + return adapter.rvVerify(input, ctx.signal); + }, + }; +} diff --git a/plugins/thoughtproof/adapters/http-thoughtproof.ts b/plugins/thoughtproof/adapters/http-thoughtproof.ts new file mode 100644 index 0000000..1819834 --- /dev/null +++ b/plugins/thoughtproof/adapters/http-thoughtproof.ts @@ -0,0 +1,320 @@ +import type { + ThoughtProofAdapter, + ThoughtProofConfig, + SentinelVerifyInput, + SentinelVerifyOutput, + RVVerifyInput, + RVVerifyOutput, + AttestInput, + AttestOutput, + StatusOutput, + X402Signer, +} from './types'; + +const DEFAULT_SENTINEL_URL = 'https://sentinel.thoughtproof.ai'; +const DEFAULT_RV_URL = 'https://api.thoughtproof.ai'; +const DEFAULT_SENTINEL_TIMEOUT = 30_000; +const DEFAULT_RV_TIMEOUT = 120_000; + +/** + * Parse the base64-encoded PAYMENT-REQUIRED header from a 402 response. + * Returns the decoded payment requirements object. + */ +function parsePaymentRequired(headerValue: string): Record { + // Use Buffer for Node compatibility (atob is Browser + Node 16+ only) + const json = typeof Buffer !== 'undefined' + ? Buffer.from(headerValue, 'base64').toString('utf-8') + : atob(headerValue); + return JSON.parse(json); +} + +/** + * Build a PAYMENT-SIGNATURE header value from a signed payment payload. + */ +function encodePaymentSignature(payload: Record): string { + const json = JSON.stringify(payload); + return typeof Buffer !== 'undefined' + ? Buffer.from(json, 'utf-8').toString('base64') + : btoa(json); +} + +/** + * HTTP adapter for ThoughtProof Sentinel + RV APIs. + * + * Supports two auth modes: + * 1. **API Key** (default): Bearer token auth. Simple, no wallet needed. + * 2. **x402 Pay-per-call**: Agent wallet pays per verification via HTTP 402. + * Server dictates price. No API key needed. Zero subscription. + * + * Priority: x402Fetch > x402Signer > apiKey + */ +export class HttpThoughtProofAdapter implements ThoughtProofAdapter { + private readonly sentinelBase: string; + private readonly rvBase: string; + private readonly apiKey?: string; + private readonly x402Signer?: X402Signer; + private readonly maxPaymentAmount?: bigint; + private readonly fetchFn: typeof fetch; + private readonly sentinelTimeout: number; + private readonly rvTimeout: number; + + constructor(config: ThoughtProofConfig = {}) { + this.sentinelBase = (config.sentinelBaseUrl ?? DEFAULT_SENTINEL_URL).replace(/\/$/, ''); + this.rvBase = (config.rvBaseUrl ?? DEFAULT_RV_URL).replace(/\/$/, ''); + this.apiKey = config.apiKey; + this.x402Signer = config.x402Signer; + this.maxPaymentAmount = config.maxPaymentAmount ? BigInt(config.maxPaymentAmount) : undefined; + this.sentinelTimeout = config.sentinelTimeoutMs ?? DEFAULT_SENTINEL_TIMEOUT; + this.rvTimeout = config.rvTimeoutMs ?? DEFAULT_RV_TIMEOUT; + + // Priority: x402Fetch (user-managed) > native fetch (we handle 402 ourselves) + this.fetchFn = config.x402Fetch ?? fetch; + } + + /** Whether this adapter uses x402 pay-per-call */ + get isX402Enabled(): boolean { + return !!this.x402Signer || this.fetchFn !== fetch; + } + + private headers(): Record { + const h: Record = { 'Content-Type': 'application/json' }; + // API key auth only when NOT using x402 + if (this.apiKey && !this.x402Signer) { + h['Authorization'] = `Bearer ${this.apiKey}`; + } + return h; + } + + /** + * Handle x402 payment flow: + * 1. Server responds 402 + PAYMENT-REQUIRED header + * 2. Parse payment requirements (amount, asset, network, scheme) + * 3. Sign payment authorization with wallet + * 4. Retry original request with PAYMENT-SIGNATURE header + */ + private async handleX402( + res: Response, + url: string, + init: RequestInit, + signal?: AbortSignal, + ): Promise { + if (!this.x402Signer?.signTypedData) { + throw new Error( + 'ThoughtProof API requires payment (HTTP 402) but no x402Signer is configured. ' + + 'Provide an x402Signer in ThoughtProofConfig or use an apiKey instead.' + ); + } + + const paymentRequiredHeader = res.headers.get('payment-required'); + if (!paymentRequiredHeader) { + throw new Error( + 'ThoughtProof API returned 402 but no PAYMENT-REQUIRED header. ' + + 'The server may not support x402 yet.' + ); + } + + const requirements = parsePaymentRequired(paymentRequiredHeader); + + // Safety cap: reject if server requests more than configured max + if (this.maxPaymentAmount) { + const requestedAmount = requirements.amount ?? requirements.maxAmountRequired; + if (requestedAmount && BigInt(String(requestedAmount)) > this.maxPaymentAmount) { + throw new Error( + `ThoughtProof x402 payment amount ${requestedAmount} exceeds configured maxPaymentAmount ${this.maxPaymentAmount}. ` + + 'Refusing to sign. Increase maxPaymentAmount in config if this is expected.' + ); + } + } + + // Extract typed data for signing from the payment requirements. + // x402 v2 embeds the EIP-712 signing payload in the requirements. + const typedData = requirements.typedData as { + domain: Record; + types: Record>; + primaryType: string; + message: Record; + } | undefined; + + if (!typedData) { + // Fallback: construct minimal payment authorization + // This handles servers that use a simpler payment-required format + const signature = await this.x402Signer.signTypedData({ + domain: (requirements.domain as Record) ?? {}, + types: (requirements.types as Record>) ?? {}, + primaryType: (requirements.primaryType as string) ?? 'Payment', + message: (requirements.message as Record) ?? requirements, + }); + + const paymentPayload = { + signature, + signer: this.x402Signer.address, + scheme: requirements.scheme ?? 'exact', + }; + + return this.fetchFn(url, { + ...init, + headers: { + ...init.headers as Record, + 'payment-signature': encodePaymentSignature(paymentPayload), + }, + signal, + }); + } + + // Standard x402 v2 path: sign the embedded typed data + const signature = await this.x402Signer.signTypedData(typedData); + + const paymentPayload = { + signature, + signer: this.x402Signer.address, + scheme: requirements.scheme ?? 'exact', + }; + + return this.fetchFn(url, { + ...init, + headers: { + ...init.headers as Record, + 'payment-signature': encodePaymentSignature(paymentPayload), + }, + signal, + }); + } + + private async fetchWithTimeout( + url: string, + body: unknown, + timeoutMs: number, + signal?: AbortSignal, + ): Promise { + const start = Date.now(); + + const makeSignal = (): AbortSignal => { + const elapsed = Date.now() - start; + const remaining = Math.max(timeoutMs - elapsed, 1_000); + const timeoutSignal = AbortSignal.timeout(remaining); + return signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal; + }; + + const init: RequestInit = { + method: 'POST', + headers: this.headers(), + body: JSON.stringify(body), + signal: makeSignal(), + }; + + let res = await this.fetchFn(url, init); + + // x402: server requires payment — retry with fresh timeout + if (res.status === 402 && this.x402Signer) { + const retryInit: RequestInit = { + ...init, + signal: makeSignal(), // fresh timeout for the payment retry + }; + res = await this.handleX402(res, url, retryInit, makeSignal()); + } + + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`ThoughtProof API error ${res.status}: ${text}`); + } + + const data = await res.json(); + return { ...data, _latencyMs: Date.now() - start }; + } + + async sentinelVerify(input: SentinelVerifyInput, signal?: AbortSignal): Promise { + const body = { + claim: input.claim, + ...(input.context && { context: input.context }), + ...(input.task && { task: input.task }), + }; + + const raw = await this.fetchWithTimeout( + `${this.sentinelBase}/sentinel/verify`, + body, + this.sentinelTimeout, + signal, + ) as Record; + + return { + verdict: raw.verdict as SentinelVerifyOutput['verdict'], + confidence: (raw.confidence as number) ?? 0, + reasons: (raw.reasons as string[]) ?? [], + requestId: (raw.request_id as string) ?? (raw.requestId as string) ?? '', + latencyMs: raw._latencyMs as number, + }; + } + + async rvVerify(input: RVVerifyInput, signal?: AbortSignal): Promise { + const body = { + claim: input.claim, + ...(input.context && { context: input.context }), + tier: input.tier, + ...(input.domain && { domain: input.domain }), + }; + + const raw = await this.fetchWithTimeout( + `${this.rvBase}/v1/verify`, + body, + this.rvTimeout, + signal, + ) as Record; + + return { + verdict: raw.verdict as RVVerifyOutput['verdict'], + confidence: (raw.confidence as number) ?? 0, + evaluation: (raw.evaluation as string) ?? '', + critique: (raw.critique as string) ?? '', + synthesis: (raw.synthesis as string) ?? '', + requestId: (raw.request_id as string) ?? (raw.requestId as string) ?? '', + latencyMs: raw._latencyMs as number, + }; + } + + async attest(input: AttestInput, signal?: AbortSignal): Promise { + const baseUrl = input.source === 'sentinel' ? this.sentinelBase : this.rvBase; + const path = input.source === 'sentinel' + ? '/sentinel/attest' + : '/v1/attest'; + const timeout = input.source === 'sentinel' ? this.sentinelTimeout : this.rvTimeout; + + const body = { + request_id: input.requestId, + recipient: input.recipient, + }; + + const raw = await this.fetchWithTimeout( + `${baseUrl}${path}`, + body, + timeout, + signal, + ) as Record; + + return { + attestationId: (raw.attestation_id as string) ?? (raw.attestationId as string) ?? '', + txHash: (raw.tx_hash as string) ?? (raw.txHash as string), + }; + } + + async status(signal?: AbortSignal): Promise { + const check = async (url: string, timeout: number): Promise<{ healthy: boolean; latencyMs: number }> => { + const start = Date.now(); + try { + const res = await this.fetchFn(url, { + method: 'GET', + signal: signal ? AbortSignal.any([signal, AbortSignal.timeout(timeout)]) : AbortSignal.timeout(timeout), + }); + return { healthy: res.ok, latencyMs: Date.now() - start }; + } catch { + return { healthy: false, latencyMs: Date.now() - start }; + } + }; + + const [sentinel, rv] = await Promise.all([ + check(`${this.sentinelBase}/sentinel/health`, 10_000), + check(`${this.rvBase}/v1/health`, 10_000), + ]); + + return { sentinel, rv }; + } +} diff --git a/plugins/thoughtproof/adapters/types.ts b/plugins/thoughtproof/adapters/types.ts new file mode 100644 index 0000000..3b3ed82 --- /dev/null +++ b/plugins/thoughtproof/adapters/types.ts @@ -0,0 +1,158 @@ +/** + * ThoughtProof API adapter types. + * + * Two backend APIs: + * - Sentinel: sentinel.thoughtproof.ai/sentinel/verify + * - RV: api.thoughtproof.ai/v1/verify + */ + +// ── Shared ────────────────────────────────────────────────── + +export type Verdict = 'ALLOW' | 'BLOCK' | 'UNCERTAIN'; + +/** + * x402 signer interface — any object that can sign EIP-712 typed data. + * Compatible with viem's `privateKeyToAccount()` and ethers' `Wallet`. + */ +export interface X402Signer { + /** Sign EIP-712 typed data. Compatible with viem Account or ethers Signer. */ + signTypedData?(args: { + domain: Record; + types: Record>; + primaryType: string; + message: Record; + }): Promise; + /** EVM address of the signer */ + address?: string; +} + +export interface ThoughtProofConfig { + /** Sentinel API base URL (default: https://sentinel.thoughtproof.ai) */ + sentinelBaseUrl?: string; + /** RV API base URL (default: https://api.thoughtproof.ai) */ + rvBaseUrl?: string; + /** API key for authenticated requests. Ignored when x402Signer is provided. */ + apiKey?: string; + /** + * Maximum payment amount (in smallest unit, e.g. USDC micro-units) the adapter + * will sign per request. Prevents a compromised server from draining the wallet. + * Default: no limit. Recommended: set to 10x your expected max per-call cost. + */ + maxPaymentAmount?: string; + /** + * x402 wallet signer for pay-per-call verification. + * When provided, the adapter pays via x402 (HTTP 402) instead of using an API key. + * The server dictates the price — the adapter just signs and pays. + * + * Pass a viem account: `privateKeyToAccount('0x...')` + * Or any object implementing X402Signer. + */ + x402Signer?: X402Signer; + /** + * Custom fetch function with x402 payment handling already wired in. + * Use this if you've already set up `@x402/fetch` `wrapFetchWithPayment()`. + * When provided, x402Signer is ignored (you handle payment externally). + */ + x402Fetch?: typeof fetch; + /** Timeout in ms for API calls (default: 30_000 for Sentinel, 120_000 for RV) */ + sentinelTimeoutMs?: number; + rvTimeoutMs?: number; +} + +// ── API Response Shapes ───────────────────────────────────── +// +// Sentinel API (sentinel.thoughtproof.ai) returns snake_case: +// { verdict, confidence, reasons, request_id, ... } +// +// RV API (api.thoughtproof.ai) returns snake_case: +// { verdict, confidence, evaluation, critique, synthesis, request_id, ... } +// +// The adapter normalizes both to camelCase TypeScript interfaces below. +// Defensive fallback to camelCase keys exists in case API changes. + +// ── Sentinel ──────────────────────────────────────────────── + +export interface SentinelVerifyInput { + /** The agent's reasoning or planned action to verify */ + claim: string; + /** Context / instructions the agent was given */ + context?: string; + /** The agent's task or goal */ + task?: string; +} + +export interface SentinelVerifyOutput { + verdict: Verdict; + confidence: number; + reasons: string[]; + /** Request ID for tracing */ + requestId: string; + /** Latency in ms */ + latencyMs: number; +} + +// ── RV (Reality Verification) ─────────────────────────────── + +export type RVTier = 'standard' | 'deep'; + +export interface RVVerifyInput { + /** The claim or agent output to verify */ + claim: string; + /** Supporting context or evidence */ + context?: string; + /** Verification depth */ + tier: RVTier; + /** Domain hint for domain-specific verification profiles */ + domain?: string; +} + +export interface RVVerifyOutput { + verdict: Verdict; + confidence: number; + /** Adversarial evaluation result */ + evaluation: string; + /** Red-team critique */ + critique: string; + /** Final synthesized reasoning */ + synthesis: string; + /** Request ID for tracing */ + requestId: string; + /** Latency in ms */ + latencyMs: number; +} + +// ── Attestation ───────────────────────────────────────────── + +export type AttestationSource = 'sentinel' | 'rv'; + +export interface AttestInput { + /** Which verification result to attest */ + source: AttestationSource; + /** The request ID from the verification response */ + requestId: string; + /** Recipient address for the attestation */ + recipient: string; +} + +export interface AttestOutput { + /** EAS attestation UID (for Sentinel) or TP-VC ID (for RV) */ + attestationId: string; + /** Transaction hash if on-chain */ + txHash?: string; +} + +// ── Status ────────────────────────────────────────────────── + +export interface StatusOutput { + sentinel: { healthy: boolean; latencyMs: number }; + rv: { healthy: boolean; latencyMs: number }; +} + +// ── Adapter Interface ─────────────────────────────────────── + +export interface ThoughtProofAdapter { + sentinelVerify(input: SentinelVerifyInput, signal?: AbortSignal): Promise; + rvVerify(input: RVVerifyInput, signal?: AbortSignal): Promise; + attest(input: AttestInput, signal?: AbortSignal): Promise; + status(signal?: AbortSignal): Promise; +} diff --git a/plugins/thoughtproof/hooks/reputation.ts b/plugins/thoughtproof/hooks/reputation.ts new file mode 100644 index 0000000..858fc99 --- /dev/null +++ b/plugins/thoughtproof/hooks/reputation.ts @@ -0,0 +1,94 @@ +import type { WalletProvider } from '../../../core/wallet/wallet-provider'; +import type { SentinelVerifyOutput, RVVerifyOutput, Verdict } from '../adapters/types'; + +/** + * Internal post-verification hook that submits ERC-8004 reputation + * feedback based on verification results. + * + * NOT exposed as an agent action — this runs automatically after + * verification to prevent agents from gaming their own reputation. + * + * Called by the plugin internally after sentinel/verify actions complete. + */ + +const REPUTATION_REGISTRY_ABI = [ + 'function giveFeedback(uint256 agentId, int128 value, uint8 valueDecimals, string tag1, string tag2, string endpoint, string feedbackURI, bytes32 feedbackHash)', +]; + +// Default addresses — override via ReputationHookConfig.registryAddresses +const DEFAULT_REPUTATION_ADDRESSES: Record = { + 'goat-mainnet': '0x8004BAa1000000000000000000000000000000a1', + 'goat-testnet': '0xd914000000000000000000000000000000a964', +}; + +function verdictToScore(verdict: Verdict): number { + switch (verdict) { + case 'ALLOW': return 100; + case 'UNCERTAIN': return 50; + case 'BLOCK': return 0; + } +} + +export interface ReputationHookConfig { + /** ERC-8004 agent ID to submit feedback for */ + agentId: string; + /** Whether to submit reputation feedback (default: true) */ + enabled?: boolean; + /** Override reputation registry addresses per network */ + registryAddresses?: Record; +} + +/** + * Submit verification result as ERC-8004 reputation feedback. + * + * Tags: + * - tag1: "thoughtproof" (verifier identity) + * - tag2: "sentinel" | "rv" (verification type) + * + * Score mapping: + * - ALLOW → 100 (decimals=0) + * - UNCERTAIN → 50 + * - BLOCK → 0 + */ +export async function submitReputationFeedback( + wallet: WalletProvider, + network: string, + config: ReputationHookConfig, + source: 'sentinel' | 'rv', + result: SentinelVerifyOutput | RVVerifyOutput, +): Promise<{ txHash: string } | null> { + if (config.enabled === false) return null; + + const registryAddress = config.registryAddresses?.[network] ?? DEFAULT_REPUTATION_ADDRESSES[network]; + if (!registryAddress) return null; + + const score = verdictToScore(result.verdict); + + // Build a deterministic feedback hash from requestId + const encoder = new TextEncoder(); + const data = encoder.encode(`thoughtproof:${source}:${result.requestId}`); + const hashBuffer = await crypto.subtle.digest('SHA-256', data); + const hashArray = Array.from(new Uint8Array(hashBuffer)); + const feedbackHash = '0x' + hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); + + try { + return await wallet.writeContract( + registryAddress, + REPUTATION_REGISTRY_ABI, + 'giveFeedback', + [ + BigInt(config.agentId), + score, + 0, // decimals + 'thoughtproof', + source, + `thoughtproof.${source}`, + `thoughtproof://${result.requestId}`, + feedbackHash, + ], + ); + } catch { + // Reputation submission is best-effort — don't block verification + return null; + } +} diff --git a/plugins/thoughtproof/index.ts b/plugins/thoughtproof/index.ts new file mode 100644 index 0000000..b4386b2 --- /dev/null +++ b/plugins/thoughtproof/index.ts @@ -0,0 +1,28 @@ +// ThoughtProof Plugin for GOAT AgentKit +// Epistemic verification layer: Sentinel (triage) + RV (deep verification) + +export { thoughtproofSentinelAction } from './actions/sentinel'; +export { thoughtproofVerifyAction } from './actions/verify'; +export { thoughtproofAttestAction } from './actions/attest'; +export { thoughtproofStatusAction } from './actions/status'; + +export { HttpThoughtProofAdapter } from './adapters/http-thoughtproof'; + +export type { + ThoughtProofAdapter, + ThoughtProofConfig, + X402Signer, + Verdict, + SentinelVerifyInput, + SentinelVerifyOutput, + RVTier, + RVVerifyInput, + RVVerifyOutput, + AttestationSource, + AttestInput, + AttestOutput, + StatusOutput, +} from './adapters/types'; + +export type { ReputationHookConfig } from './hooks/reputation'; +export { submitReputationFeedback } from './hooks/reputation'; diff --git a/tests/unit/thoughtproof.test.ts b/tests/unit/thoughtproof.test.ts new file mode 100644 index 0000000..2318569 --- /dev/null +++ b/tests/unit/thoughtproof.test.ts @@ -0,0 +1,628 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { + thoughtproofSentinelAction, + thoughtproofVerifyAction, + thoughtproofAttestAction, + thoughtproofStatusAction, + HttpThoughtProofAdapter, +} from '../../plugins/thoughtproof/index'; +import type { + ThoughtProofAdapter, + SentinelVerifyOutput, + RVVerifyOutput, + AttestOutput, + StatusOutput, +} from '../../plugins/thoughtproof/adapters/types'; +import { submitReputationFeedback } from '../../plugins/thoughtproof/hooks/reputation'; +import type { WalletProvider } from '../../core/wallet/wallet-provider'; + +// ── Mock Adapter ──────────────────────────────────────────── + +function mockAdapter(overrides: Partial = {}): ThoughtProofAdapter { + return { + sentinelVerify: vi.fn().mockResolvedValue({ + verdict: 'ALLOW', + confidence: 0.95, + reasons: ['Reasoning is sound'], + requestId: 'req_sentinel_001', + latencyMs: 1200, + } satisfies SentinelVerifyOutput), + rvVerify: vi.fn().mockResolvedValue({ + verdict: 'ALLOW', + confidence: 0.88, + evaluation: 'Claim is well-supported', + critique: 'Minor gap in evidence chain', + synthesis: 'Overall the claim holds after adversarial review', + requestId: 'req_rv_001', + latencyMs: 8500, + } satisfies RVVerifyOutput), + attest: vi.fn().mockResolvedValue({ + attestationId: 'eas_0xabc123', + txHash: '0xtx_attest', + } satisfies AttestOutput), + status: vi.fn().mockResolvedValue({ + sentinel: { healthy: true, latencyMs: 150 }, + rv: { healthy: true, latencyMs: 320 }, + } satisfies StatusOutput), + ...overrides, + }; +} + +function mockWallet(overrides: Partial = {}): WalletProvider { + return { + getAddress: vi.fn().mockResolvedValue('0xABCD'), + getNetwork: vi.fn().mockResolvedValue('goat-mainnet'), + getChainId: vi.fn().mockResolvedValue(2345), + getBalance: vi.fn().mockResolvedValue('1000'), + getErc20Balance: vi.fn().mockResolvedValue('500'), + transferNative: vi.fn().mockResolvedValue({ txHash: '0xtx' }), + transferErc20: vi.fn().mockResolvedValue({ txHash: '0xtx' }), + approveErc20: vi.fn().mockResolvedValue({ txHash: '0xtx' }), + signTypedData: vi.fn().mockResolvedValue('0xsig'), + callContract: vi.fn().mockResolvedValue('0x'), + writeContract: vi.fn().mockResolvedValue({ txHash: '0xtx_rep' }), + deployContract: vi.fn().mockResolvedValue({ txHash: '0xtx_deploy', contractAddress: '0xNEW' }), + ...overrides, + }; +} + +const ctx = { traceId: 't1', network: 'goat-mainnet', now: Date.now() }; + +// ── thoughtproof.sentinel ─────────────────────────────────── + +describe('thoughtproof.sentinel', () => { + it('has correct metadata', () => { + const adapter = mockAdapter(); + const action = thoughtproofSentinelAction(adapter); + expect(action.name).toBe('thoughtproof.sentinel'); + expect(action.riskLevel).toBe('read'); + expect(action.requiresConfirmation).toBe(false); + expect(action.networks).toContain('goat-mainnet'); + expect(action.networks).toContain('goat-testnet'); + }); + + it('calls adapter.sentinelVerify with correct input', async () => { + const adapter = mockAdapter(); + const action = thoughtproofSentinelAction(adapter); + const input = { claim: 'Buy 0.5 ETH', context: 'Trading agent', task: 'Execute trade' }; + + const result = await action.execute(ctx, input); + + expect(adapter.sentinelVerify).toHaveBeenCalledWith(input, undefined); + expect(result.verdict).toBe('ALLOW'); + expect(result.confidence).toBe(0.95); + expect(result.requestId).toBe('req_sentinel_001'); + }); + + it('passes signal from context', async () => { + const adapter = mockAdapter(); + const action = thoughtproofSentinelAction(adapter); + const controller = new AbortController(); + const ctxWithSignal = { ...ctx, signal: controller.signal }; + + await action.execute(ctxWithSignal, { claim: 'Test' }); + + expect(adapter.sentinelVerify).toHaveBeenCalledWith( + { claim: 'Test' }, + controller.signal, + ); + }); + + it('propagates BLOCK verdict', async () => { + const adapter = mockAdapter({ + sentinelVerify: vi.fn().mockResolvedValue({ + verdict: 'BLOCK', + confidence: 0.92, + reasons: ['Violates position limits', 'No technical analysis'], + requestId: 'req_block_001', + latencyMs: 800, + }), + }); + const action = thoughtproofSentinelAction(adapter); + + const result = await action.execute(ctx, { claim: 'All in on ETH' }); + + expect(result.verdict).toBe('BLOCK'); + expect(result.reasons).toHaveLength(2); + }); + + it('propagates UNCERTAIN verdict', async () => { + const adapter = mockAdapter({ + sentinelVerify: vi.fn().mockResolvedValue({ + verdict: 'UNCERTAIN', + confidence: 0.55, + reasons: ['Ambiguous reasoning'], + requestId: 'req_unc_001', + latencyMs: 1500, + }), + }); + const action = thoughtproofSentinelAction(adapter); + + const result = await action.execute(ctx, { claim: 'Maybe buy ETH' }); + + expect(result.verdict).toBe('UNCERTAIN'); + }); + + it('propagates adapter errors', async () => { + const adapter = mockAdapter({ + sentinelVerify: vi.fn().mockRejectedValue(new Error('ThoughtProof API error 503: Service Unavailable')), + }); + const action = thoughtproofSentinelAction(adapter); + + await expect( + action.execute(ctx, { claim: 'Test' }), + ).rejects.toThrow('ThoughtProof API error 503'); + }); +}); + +// ── thoughtproof.verify ───────────────────────────────────── + +describe('thoughtproof.verify', () => { + it('has correct metadata', () => { + const adapter = mockAdapter(); + const action = thoughtproofVerifyAction(adapter); + expect(action.name).toBe('thoughtproof.verify'); + expect(action.riskLevel).toBe('read'); + expect(action.requiresConfirmation).toBe(false); + }); + + it('calls adapter.rvVerify with tier=standard by default', async () => { + const adapter = mockAdapter(); + const action = thoughtproofVerifyAction(adapter); + + // Zod default should apply tier='standard' when not provided + const input = { claim: 'ETH is undervalued', context: 'Market analysis', tier: 'standard' as const }; + const result = await action.execute(ctx, input); + + expect(adapter.rvVerify).toHaveBeenCalledWith(input, undefined); + expect(result.verdict).toBe('ALLOW'); + expect(result.evaluation).toBe('Claim is well-supported'); + expect(result.critique).toBe('Minor gap in evidence chain'); + expect(result.synthesis).toBe('Overall the claim holds after adversarial review'); + }); + + it('supports tier=deep for high-stakes verification', async () => { + const adapter = mockAdapter(); + const action = thoughtproofVerifyAction(adapter); + + await action.execute(ctx, { claim: 'Major trade decision', tier: 'deep' }); + + expect(adapter.rvVerify).toHaveBeenCalledWith( + { claim: 'Major trade decision', tier: 'deep' }, + undefined, + ); + }); + + it('supports optional domain parameter', async () => { + const adapter = mockAdapter(); + const action = thoughtproofVerifyAction(adapter); + + await action.execute(ctx, { claim: 'Compliance check', tier: 'standard', domain: 'finance' }); + + expect(adapter.rvVerify).toHaveBeenCalledWith( + { claim: 'Compliance check', tier: 'standard', domain: 'finance' }, + undefined, + ); + }); + + it('propagates adapter errors', async () => { + const adapter = mockAdapter({ + rvVerify: vi.fn().mockRejectedValue(new Error('ThoughtProof API error 429: Rate limited')), + }); + const action = thoughtproofVerifyAction(adapter); + + await expect( + action.execute(ctx, { claim: 'Test', tier: 'standard' }), + ).rejects.toThrow('429'); + }); +}); + +// ── thoughtproof.attest ───────────────────────────────────── + +describe('thoughtproof.attest', () => { + it('has correct metadata', () => { + const adapter = mockAdapter(); + const action = thoughtproofAttestAction(adapter); + expect(action.name).toBe('thoughtproof.attest'); + expect(action.riskLevel).toBe('medium'); + expect(action.requiresConfirmation).toBe(true); + }); + + it('calls adapter.attest for sentinel source', async () => { + const adapter = mockAdapter(); + const action = thoughtproofAttestAction(adapter); + const input = { + source: 'sentinel' as const, + requestId: 'req_sentinel_001', + recipient: '0x1234567890abcdef1234567890abcdef12345678', + }; + + const result = await action.execute(ctx, input); + + expect(adapter.attest).toHaveBeenCalledWith(input, undefined); + expect(result.attestationId).toBe('eas_0xabc123'); + expect(result.txHash).toBe('0xtx_attest'); + }); + + it('calls adapter.attest for rv source', async () => { + const adapter = mockAdapter(); + const action = thoughtproofAttestAction(adapter); + + await action.execute(ctx, { + source: 'rv', + requestId: 'req_rv_001', + recipient: '0x1234567890abcdef1234567890abcdef12345678', + }); + + expect(adapter.attest).toHaveBeenCalledWith( + expect.objectContaining({ source: 'rv', requestId: 'req_rv_001' }), + undefined, + ); + }); + + it('propagates adapter errors', async () => { + const adapter = mockAdapter({ + attest: vi.fn().mockRejectedValue(new Error('Attestation failed: insufficient gas')), + }); + const action = thoughtproofAttestAction(adapter); + + await expect( + action.execute(ctx, { + source: 'sentinel', + requestId: 'req_001', + recipient: '0x1234567890abcdef1234567890abcdef12345678', + }), + ).rejects.toThrow('insufficient gas'); + }); +}); + +// ── thoughtproof.status ───────────────────────────────────── + +describe('thoughtproof.status', () => { + it('has correct metadata', () => { + const adapter = mockAdapter(); + const action = thoughtproofStatusAction(adapter); + expect(action.name).toBe('thoughtproof.status'); + expect(action.riskLevel).toBe('read'); + expect(action.requiresConfirmation).toBe(false); + }); + + it('returns health status for both backends', async () => { + const adapter = mockAdapter(); + const action = thoughtproofStatusAction(adapter); + + const result = await action.execute(ctx, {} as Record); + + expect(result.sentinel.healthy).toBe(true); + expect(result.rv.healthy).toBe(true); + expect(result.sentinel.latencyMs).toBe(150); + expect(result.rv.latencyMs).toBe(320); + }); + + it('reports unhealthy backends', async () => { + const adapter = mockAdapter({ + status: vi.fn().mockResolvedValue({ + sentinel: { healthy: false, latencyMs: 10000 }, + rv: { healthy: true, latencyMs: 200 }, + }), + }); + const action = thoughtproofStatusAction(adapter); + + const result = await action.execute(ctx, {} as Record); + + expect(result.sentinel.healthy).toBe(false); + expect(result.rv.healthy).toBe(true); + }); +}); + +// ── x402 Payment Flow ─────────────────────────────────────── + +describe('x402 payment handling', () => { + it('handles 402 → sign → retry flow', async () => { + const paymentRequirements = { + scheme: 'exact', + amount: '3000', + asset: 'USDC', + network: 'eip155:2345', + domain: { name: 'x402', version: '1', chainId: 2345 }, + types: { Payment: [{ name: 'amount', type: 'uint256' }] }, + primaryType: 'Payment', + message: { amount: '3000' }, + }; + + // Mock fetch: first call returns 402, second call succeeds + let callCount = 0; + const mockFetch = vi.fn(async (url: string | URL | Request, init?: RequestInit) => { + callCount++; + if (callCount === 1) { + // First call: 402 Payment Required + return new Response('Payment Required', { + status: 402, + headers: { 'payment-required': btoa(JSON.stringify(paymentRequirements)) }, + }); + } + // Second call: success (with payment signature) + return new Response(JSON.stringify({ + verdict: 'ALLOW', + confidence: 0.95, + reasons: ['Verified'], + request_id: 'req_paid_001', + }), { status: 200, headers: { 'Content-Type': 'application/json' } }); + }) as unknown as typeof fetch; + + const mockSigner = { + signTypedData: vi.fn().mockResolvedValue('0xsig_payment_proof'), + address: '0x1234567890abcdef1234567890abcdef12345678', + }; + + const adapter = new HttpThoughtProofAdapter({ + x402Signer: mockSigner, + x402Fetch: mockFetch, + }); + + const result = await adapter.sentinelVerify({ claim: 'Test claim' }); + + expect(result.verdict).toBe('ALLOW'); + expect(result.requestId).toBe('req_paid_001'); + expect(mockSigner.signTypedData).toHaveBeenCalledOnce(); + expect(mockFetch).toHaveBeenCalledTimes(2); + + // Verify the second call has payment-signature header + const secondCallInit = (mockFetch as any).mock.calls[1][1]; + expect(secondCallInit.headers['payment-signature']).toBeDefined(); + }); + + it('falls through to API error when 402 received but no signer configured', async () => { + const mockFetch = vi.fn(async () => + new Response('Payment Required', { + status: 402, + headers: { 'payment-required': btoa(JSON.stringify({ amount: '3000' })) }, + }), + ) as unknown as typeof fetch; + + const adapter = new HttpThoughtProofAdapter({ + x402Fetch: mockFetch, + // No x402Signer! + }); + + await expect( + adapter.sentinelVerify({ claim: 'Test' }), + ).rejects.toThrow('ThoughtProof API error 402'); + }); + + it('throws when 402 received without PAYMENT-REQUIRED header', async () => { + const mockFetch = vi.fn(async () => + new Response('Payment Required', { status: 402 }), + ) as unknown as typeof fetch; + + const mockSigner = { + signTypedData: vi.fn().mockResolvedValue('0xsig'), + address: '0xABCD', + }; + + const adapter = new HttpThoughtProofAdapter({ + x402Signer: mockSigner, + x402Fetch: mockFetch, + }); + + await expect( + adapter.sentinelVerify({ claim: 'Test' }), + ).rejects.toThrow('no PAYMENT-REQUIRED header'); + }); + + it('uses apiKey when no x402Signer is provided', async () => { + const mockFetch = vi.fn(async (_url: string | URL | Request, init?: RequestInit) => + new Response(JSON.stringify({ + verdict: 'ALLOW', confidence: 0.9, reasons: [], request_id: 'req_key_001', + }), { status: 200, headers: { 'Content-Type': 'application/json' } }), + ) as unknown as typeof fetch; + + const adapter = new HttpThoughtProofAdapter({ + apiKey: 'sk-test-key', + x402Fetch: mockFetch, + }); + + await adapter.sentinelVerify({ claim: 'Test' }); + + const callInit = (mockFetch as any).mock.calls[0][1]; + expect(callInit.headers['Authorization']).toBe('Bearer sk-test-key'); + }); + + it('skips apiKey header when x402Signer is provided', async () => { + const mockFetch = vi.fn(async () => + new Response(JSON.stringify({ + verdict: 'ALLOW', confidence: 0.9, reasons: [], request_id: 'req_x402_001', + }), { status: 200, headers: { 'Content-Type': 'application/json' } }), + ) as unknown as typeof fetch; + + const adapter = new HttpThoughtProofAdapter({ + apiKey: 'sk-test-key', + x402Signer: { signTypedData: vi.fn(), address: '0xABCD' }, + x402Fetch: mockFetch, + }); + + await adapter.sentinelVerify({ claim: 'Test' }); + + const callInit = (mockFetch as any).mock.calls[0][1]; + expect(callInit.headers['Authorization']).toBeUndefined(); + }); + + it('isX402Enabled reflects signer presence', () => { + const withSigner = new HttpThoughtProofAdapter({ + x402Signer: { signTypedData: vi.fn(), address: '0xABCD' }, + }); + expect(withSigner.isX402Enabled).toBe(true); + + const withoutSigner = new HttpThoughtProofAdapter({ + apiKey: 'sk-test', + }); + expect(withoutSigner.isX402Enabled).toBe(false); + }); + + it('rejects payment exceeding maxPaymentAmount', async () => { + const paymentRequirements = { + scheme: 'exact', + amount: '999999999', // way over cap + domain: {}, types: {}, primaryType: 'Payment', message: {}, + }; + + let callCount = 0; + const mockFetch = vi.fn(async () => { + callCount++; + if (callCount === 1) { + return new Response('Payment Required', { + status: 402, + headers: { 'payment-required': btoa(JSON.stringify(paymentRequirements)) }, + }); + } + return new Response('{}', { status: 200, headers: { 'Content-Type': 'application/json' } }); + }) as unknown as typeof fetch; + + const adapter = new HttpThoughtProofAdapter({ + x402Signer: { signTypedData: vi.fn(), address: '0xABCD' }, + x402Fetch: mockFetch, + maxPaymentAmount: '100000', // cap at 100k + }); + + await expect( + adapter.sentinelVerify({ claim: 'Test' }), + ).rejects.toThrow('exceeds configured maxPaymentAmount'); + }); +}); + +describe('reputation hook', () => { + it('submits ALLOW verdict as score 100', async () => { + const wallet = mockWallet(); + + await submitReputationFeedback( + wallet, + 'goat-mainnet', + { agentId: '42' }, + 'sentinel', + { + verdict: 'ALLOW', + confidence: 0.95, + reasons: [], + requestId: 'req_001', + latencyMs: 1000, + }, + ); + + expect(wallet.writeContract).toHaveBeenCalledWith( + expect.any(String), // reputation registry address + expect.any(Array), // ABI + 'giveFeedback', + expect.arrayContaining([ + BigInt(42), // agentId + 100, // score for ALLOW + 0, // decimals + 'thoughtproof', // tag1 + 'sentinel', // tag2 + ]), + ); + }); + + it('submits BLOCK verdict as score 0', async () => { + const wallet = mockWallet(); + + await submitReputationFeedback( + wallet, + 'goat-mainnet', + { agentId: '42' }, + 'rv', + { + verdict: 'BLOCK', + confidence: 0.88, + evaluation: '', + critique: '', + synthesis: '', + requestId: 'req_002', + latencyMs: 5000, + }, + ); + + expect(wallet.writeContract).toHaveBeenCalledWith( + expect.any(String), + expect.any(Array), + 'giveFeedback', + expect.arrayContaining([ + BigInt(42), + 0, // score for BLOCK + 0, + 'thoughtproof', + 'rv', + ]), + ); + }); + + it('submits UNCERTAIN verdict as score 50', async () => { + const wallet = mockWallet(); + + await submitReputationFeedback( + wallet, + 'goat-mainnet', + { agentId: '42' }, + 'sentinel', + { + verdict: 'UNCERTAIN', + confidence: 0.55, + reasons: [], + requestId: 'req_003', + latencyMs: 1500, + }, + ); + + expect(wallet.writeContract).toHaveBeenCalledWith( + expect.any(String), + expect.any(Array), + 'giveFeedback', + expect.arrayContaining([BigInt(42), 50, 0, 'thoughtproof', 'sentinel']), + ); + }); + + it('returns null when disabled', async () => { + const wallet = mockWallet(); + + const result = await submitReputationFeedback( + wallet, + 'goat-mainnet', + { agentId: '42', enabled: false }, + 'sentinel', + { verdict: 'ALLOW', confidence: 0.95, reasons: [], requestId: 'req_004', latencyMs: 1000 }, + ); + + expect(result).toBeNull(); + expect(wallet.writeContract).not.toHaveBeenCalled(); + }); + + it('returns null for unsupported networks', async () => { + const wallet = mockWallet(); + + const result = await submitReputationFeedback( + wallet, + 'ethereum-mainnet', + { agentId: '42' }, + 'sentinel', + { verdict: 'ALLOW', confidence: 0.95, reasons: [], requestId: 'req_005', latencyMs: 1000 }, + ); + + expect(result).toBeNull(); + expect(wallet.writeContract).not.toHaveBeenCalled(); + }); + + it('silently catches wallet errors (best-effort)', async () => { + const wallet = mockWallet({ + writeContract: vi.fn().mockRejectedValue(new Error('out of gas')), + }); + + const result = await submitReputationFeedback( + wallet, + 'goat-mainnet', + { agentId: '42' }, + 'sentinel', + { verdict: 'ALLOW', confidence: 0.95, reasons: [], requestId: 'req_006', latencyMs: 1000 }, + ); + + expect(result).toBeNull(); // swallowed, not thrown + }); +});