testomatio · DavertMik · May 23, 2026 · May 24, 2026 · May 24, 2026 · May 24, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## 2026-05-25
+
+### Changes
+- [Navigator] Can now stop on its own when the page reacts to a submit but the cause is data, not the locator. A new `stop(reason)` tool is exposed to the Navigator's AI; the model is instructed to call it when an alert/validation message indicates the user must fix something (wrong credentials, missing knowledge, captcha). Until now the retry prompt told the model both "this is not a locator issue" AND "propose new solutions" in the same turn, so Navigator burned its full retry budget mutating locators that were already correct. The retry prompt is now branched: app-side rejection → `stop(reason)` or correct the submitted data; click that missed entirely → propose new locator strategies. When the model calls `stop()`, the reason is logged and surfaced in the interactive failure prompt so the user knows what to fix.
+
 ## 2026-05-24
 
 ### New CLI Options
@@ -9,8 +14,9 @@
   explorbot navigate /dashboard --session auth.json
   explorbot navigate /unreachable && echo ok       # exit code reflects reachability
   ```
+- [Navigator] When a click succeeds but the URL does not change to the expected target, the ARIA diff between the pre-click and post-click page is now included in the next retry prompt. The AI is instructed to read the diff and decide whether the application rejected the submit (in which case it should fix the submitted data, not the locator) or the click simply missed its target. This breaks the "9-attempt syntactic-variant loop" that used to happen when a form submit was rejected by the server — the model now has the evidence to tell the two cases apart.
+
 
-## 2026-05-11
 
 ### New CLI Options
 - **`explorbot explore --configure <spec>`** — Reuse a saved plan, mix old picks with newly planned tests, filter by style/priority, and control sub-page behavior. Spec is a single string of `key=value` (or `key:value`) pairs joined by `;`. Keys: `new` (share of `--max-tests` reserved for new tests, also enables reuse), `from` (explicit plan file, also enables reuse), `style` (planning styles to use; also filters old picks tagged with that style), `priority` (filter both old picks and new tests to the listed priorities), `pick_by` (`priority`|`random`|`index` — order in which old tests are selected and executed), `subpages` (`none`|`same`|`new`|`both` — sub-page behavior in reuse mode). Without `new` or `from`, reuse is off and exploration runs as before.

diff --git a/src/ai/navigator.ts b/src/ai/navigator.ts
@@ -1,4 +1,6 @@
+import { tool } from 'ai';
 import dedent from 'dedent';
+import { z } from 'zod';
 import { ActionResult } from '../action-result.js';
 import type Action from '../action.ts';
 import { ExperienceTracker, renderExperienceToc } from '../experience-tracker.js';
@@ -238,21 +240,45 @@ class Navigator implements Agent {
     const conversation = this.provider.startConversation(this.systemPrompt, 'navigator');
     conversation.addUserText(prompt);
 
-    const tools = undefined;
+    let stopReason: string | null = null;
+    const tools = {
+      stop: tool({
+        description: dedent`
+          Stop the navigation because no locator change can resolve the goal.
+          Use this when the application rejected the submission (wrong credentials, missing CSRF,
+          captcha, validation failure you cannot satisfy from available data), required knowledge
+          is missing, or the page shows a blocking error you cannot dismiss.
+          Do NOT use this for locator or strategy problems — for those, emit new code blocks instead.
+        `,
+        inputSchema: z.object({
+          reason: z.string().describe('Short user-facing explanation. Quote the alert / validation text you saw and name what data or knowledge is missing.'),
+        }),
+        execute: async ({ reason }) => {
+          stopReason = reason;
+          return { success: true, message: 'Recorded. Navigator will stop and surface the reason.' };
+        },
+      }),
+    };
 
     let codeBlocks: string[] = [];
     let htmlContextAdded = false;
     let codeBlockIndex = 0;
     let totalAttempts = 0;
     const progressBlocks: string[] = [];
-    const batchFailures: Array<{ code: string; error: string }> = [];
+    const batchFailures: Array<{ code: string; error: string; ariaChanges?: string | null; urlAfter?: string }> = [];
 
     let resolved = false;
     await loop(
       async ({ stop }) => {
         if (codeBlocks.length === 0) {
           const result = await this.provider.invokeConversation(conversation, tools);
           if (!result) return;
+          if (stopReason) {
+            tag('error').log(`Navigator stopped: ${stopReason}`);
+            resolved = false;
+            stop();
+            return;
+          }
           const aiResponse = result?.response?.text;
           debugLog('AI:', aiResponse?.split('\n')[0]);
           debugLog('Received AI response:', aiResponse?.length ?? 0, 'characters');
@@ -274,14 +300,35 @@ class Navigator implements Agent {
           tag('substep').log('Feeding failures back to AI for a new batch...');
           let contextMsg = 'Previous solutions did not work. Analyze the failures and try DIFFERENT strategies (not syntactic variants of the same locator).\n\n';
           if (batchFailures.length > 0) {
-            const lines = batchFailures.map((f) => `- \`${f.code.split('\n')[0]}\` → ${f.error}`).join('\n');
+            const lines = batchFailures
+              .map((f) => {
+                const head = `- \`${f.code.split('\n')[0]}\` → ${f.error}`;
+                if (!f.ariaChanges) return head;
+                const trimmed = f.ariaChanges.split('\n').slice(0, 12).join('\n    ');
+                return `${head}\n  • ARIA changes after the action:\n    ${trimmed}`;
+              })
+              .join('\n');
             contextMsg += `<previous_failures>\n${lines}\n</previous_failures>\n\n`;
           }
           if (!htmlContextAdded) {
             htmlContextAdded = true;
             contextMsg += `Full HTML context:\n\n<page_html>\n${await actionResult.combinedHtml()}\n</page_html>\n\n`;
           }
-          contextMsg += 'Propose new solutions. If errors mention "intercepts pointer events" or timeouts on visible elements, an overlay is blocking — dismiss it first (Escape, click outside, Close button) before retrying the original action.';
+          const pageReacted = batchFailures.some((f) => f.ariaChanges);
+          if (pageReacted) {
+            contextMsg += dedent`
+              Some actions did not throw, but the URL did not change to the expected target and the page changed in other ways (see the ARIA changes listed above).
+              Read the ARIA diff above and judge what happened. Look for any new role that conveys a server response — e.g. an alert, alertdialog, status, validation message, banner, or text that names a problem ("invalid", "required", "expired", "incorrect", "denied", "captcha", "verify"). Different sites express rejection differently; do not look for a specific phrase, read what is there.
+
+              Decide between exactly two paths:
+              1. The diff shows the application rejected the action and the fix is something only the user can provide (wrong credentials, missing data, captcha, knowledge-file gap) — call the stop() tool and quote what you saw in the diff and what is needed.
+              2. The diff shows the application rejected the action but you can correct the SUBMITTED DATA using values present in the knowledge / hint context above — emit corrected code blocks. Do not change the locator.
+
+              Only change locators if the diff shows NOTHING relevant happened in response to your click — that is the only signal that the click missed its target.
+            `;
+          } else {
+            contextMsg += 'Propose new solutions. If errors mention "intercepts pointer events" or timeouts on visible elements, an overlay is blocking — dismiss it first (Escape, click outside, Close button) before retrying the original action.';
+          }
           conversation.addUserText(contextMsg);
           codeBlocks = [];
           batchFailures.length = 0;
@@ -292,7 +339,8 @@ class Navigator implements Agent {
 
         await this.explorer.switchToMainFrame();
 
-        const prevHash = action.actionResult?.getStateHash() ?? actionResult.getStateHash();
+        const prevActionResult = action.actionResult ?? actionResult;
+        const prevHash = prevActionResult.getStateHash();
 
         debugLog(`Attempting resolution: ${codeBlock}`);
         const attemptOk = await action.attempt(codeBlock, message);
@@ -328,6 +376,22 @@ class Navigator implements Agent {
           resolved = urlMatches && stateChanged;
 
           if (!resolved && attemptOk) {
+            let ariaChanges: string | null = null;
+            if (freshState.getStateHash() !== prevHash) {
+              try {
+                const diff = await freshState.diff(prevActionResult);
+                await diff.calculate();
+                ariaChanges = diff.ariaChanged;
+              } catch (err) {
+                debugLog('Failed to compute pageDiff for failed URL verification:', err);
+              }
+            }
+            batchFailures.push({
+              code: codeBlock,
+              error: `URL did not change (still ${freshState.url})`,
+              ariaChanges,
+              urlAfter: freshState.url,
+            });
             tag('warning').log(`URL verification failed: expected ${expectedUrl}, got ${freshState.url}`);
           }
           if (freshState.getStateHash() !== prevHash && (attemptOk || urlMatches)) {
@@ -380,12 +444,15 @@ class Navigator implements Agent {
       }
     }
 
-    if (!resolved && totalAttempts > 0) {
+    if (!resolved && stopReason) {
+      tag('error').log(`Navigator stopped: ${stopReason}`);
+    } else if (!resolved && totalAttempts > 0) {
       tag('error').log(`Navigation failed after ${totalAttempts} attempts`);
     }
 
     if (!resolved && isInteractive()) {
-      const userInput = await pause(`Navigator failed to resolve. Current: ${action.stateManager.getCurrentState()?.url}\n` + `Target: ${expectedUrl ?? '(none)'}\nEnter CodeceptJS commands (or press Enter to skip):`);
+      const stopLine = stopReason ? `Navigator stopped: ${stopReason}\n` : '';
+      const userInput = await pause(`${stopLine}Navigator failed to resolve. Current: ${action.stateManager.getCurrentState()?.url}\n` + `Target: ${expectedUrl ?? '(none)'}\nEnter CodeceptJS commands (or press Enter to skip):`);
 
       if (userInput?.trim()) {
         resolved = await action.attempt(userInput, message);