FileShot
diff --git a/‎main/agenticChat.js‎
Lines changed: 307 additions & 40 deletions b/‎main/agenticChat.js‎
Lines changed: 307 additions & 40 deletions
diff --git a/‎main/agenticChatHelpers.js‎
Lines changed: 58 additions & 16 deletions b/‎main/agenticChatHelpers.js‎
Lines changed: 58 additions & 16 deletions
diff --git a/‎main/constants.js‎
Lines changed: 15 additions & 4 deletions b/‎main/constants.js‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎main/llmEngine.js‎
Lines changed: 49 additions & 13 deletions b/‎main/llmEngine.js‎
Lines changed: 49 additions & 13 deletions
@@ -7,6 +7,33 @@
  */
 'use strict';
 
+/**
+ * Check if a file's content looks syntactically complete.
+ * Returns true if the file appears to be complete, false otherwise.
+ * Used after write_file/append_to_file to decide if model should keep appending.
+ */
+function checkFileCompleteness(content, filePath) {
+  if (!content || content.length < 20) return false;
+  const trimmedEnd = content.trimEnd();
+  const lastCodeLine = trimmedEnd.split('\n').pop().trim();
+  const ext = (filePath?.match(/\.([^.]+)$/) || [])[1] || '';
+  let looksComplete = false;
+  if (/^html?$/i.test(ext)) {
+    // Fix 42: Anchor to end of content — </html> must be near the end, not just anywhere in the string.
+    // Without $, a </html> inside a JS string/template in the middle of the file triggers a false positive.
+    looksComplete = /<\/html\s*>\s*$/i.test(trimmedEnd);
+  } else if (/^css$/i.test(ext)) {
+    looksComplete = false; // lone } is unreliable for CSS
+  } else {
+    looksComplete = /^(module\.exports\s*=|export\s+(default\s+)?|\}\s*;?\s*$|\}\)\s*;?\s*$)/.test(lastCodeLine);
+  }
+  // Secondary: open HTML tags without closing counterparts = incomplete
+  if (looksComplete && /<(style|script)\b/i.test(content) && !/<\/(style|script)\s*>/i.test(content)) {
+    looksComplete = false;
+  }
+  return looksComplete;
+}
+
 /**
  * Near-duplicate detection using word-level Jaccard overlap.
  * Two texts with >80% word overlap are considered near-duplicates.
@@ -345,8 +372,20 @@ function progressiveContextCompaction(options) {
   let pruned = 0;
   let newFullResponseText = fullResponseText;
 
-  // Phase 1: Compress old tool results (45-60%)
-  if (pct > 0.45 && allToolResults.length > 4) {
+  // Dynamic thresholds: small contexts need EARLIER compaction because each tool result
+  // and chat turn consumes a proportionally larger fraction of available space.
+  // For ctx ≤ 16K, shift all thresholds down by 15 percentage points.
+  // For ctx ≤ 8K, shift down by 25 percentage points.
+  const offset = totalContextTokens <= 8192 ? 0.25
+    : totalContextTokens <= 16384 ? 0.15
+    : 0;
+  const phase1Threshold = 0.45 - offset;
+  const phase2Threshold = 0.60 - offset;
+  const phase3Threshold = 0.75 - offset;
+  const rotateThreshold = 0.85 - offset;
+
+  // Phase 1: Compress old tool results
+  if (pct > phase1Threshold && allToolResults.length > 4) {
     for (let i = 0; i < allToolResults.length - 4; i++) {
       const tr = allToolResults[i];
       if (tr.result?._pruned) continue;
@@ -359,13 +398,13 @@ function progressiveContextCompaction(options) {
     }
   }
 
-  // Phase 2: Prune verbose chat history (60-75%)
-  if (pct > 0.60 && chatHistory) {
+  // Phase 2: Prune verbose chat history
+  if (pct > phase2Threshold && chatHistory) {
     pruned += pruneVerboseHistory(chatHistory, 6);
   }
 
-  // Phase 3: Aggressive compaction (75-85%)
-  if (pct > 0.75) {
+  // Phase 3: Aggressive compaction
+  if (pct > phase3Threshold) {
     for (let i = 0; i < allToolResults.length - 2; i++) {
       const tr = allToolResults[i];
       if (!tr.result?._pruned) {
@@ -375,8 +414,6 @@ function progressiveContextCompaction(options) {
       }
     }
     if (newFullResponseText.length > 15000) {
-      // Find a paragraph or line boundary near the truncation point instead of
-      // slicing blindly through code blocks or sentences.
       const target = newFullResponseText.length - 15000;
       let cutPoint = newFullResponseText.indexOf('\n\n', target);
       if (cutPoint === -1 || cutPoint > target + 500) {
@@ -391,18 +428,14 @@ function progressiveContextCompaction(options) {
     if (chatHistory) pruned += pruneVerboseHistory(chatHistory, 2);
   }
 
-  // Proactive rotation — raised from 72% to 85% because rolling summary +
-  // progressive compression now handle context growth more gracefully.
-  // The old 72% threshold was too aggressive, causing premature rotations that
-  // destroyed conversation context unnecessarily.
-  const shouldRotate = pct > 0.85;
+  const shouldRotate = pct > rotateThreshold;
 
   if (pruned > 0) {
-    console.log(`[Context Compaction] Phase ${pct > 0.75 ? 3 : pct > 0.60 ? 2 : 1}: compacted ${pruned} items at ${Math.round(pct * 100)}% usage`);
+    console.log(`[Context Compaction] Phase ${pct > phase3Threshold ? 3 : pct > phase2Threshold ? 2 : 1}: compacted ${pruned} items at ${Math.round(pct * 100)}% usage (ctx=${totalContextTokens}, rotateAt=${Math.round(rotateThreshold * 100)}%)`);
   }
 
   return {
-    phase: pct > 0.80 ? 4 : pct > 0.75 ? 3 : pct > 0.60 ? 2 : pct > 0.45 ? 1 : 0,
+    phase: pct > (phase3Threshold + 0.05) ? 4 : pct > phase3Threshold ? 3 : pct > phase2Threshold ? 2 : pct > phase1Threshold ? 1 : 0,
     pruned,
     newFullResponseText,
     shouldRotate,
@@ -473,7 +506,15 @@ function formatSuccessfulToolResult(tr, opts = {}) {
           }
         }
       } else {
-        text += `*Content appended. If more content remains, call append_to_file again.*\n`;
+        const appendFullContent = tr.result?.fullContent || '';
+        const appendFilePath = tr.result?.path || tr.params?.filePath || '';
+        if (appendFullContent && !checkFileCompleteness(appendFullContent, appendFilePath)) {
+          const appendLines = appendFullContent.split('\n');
+          const appendTail = appendLines.slice(-10).join('\n');
+          text += `**WARNING: File "${appendFilePath}" is still NOT complete after this append (${appendLines.length} lines total).** The file is missing closing tags or content. You MUST call append_to_file again with actual code content. Do NOT send empty content. Here are the last 10 lines of the file:\n\`\`\`\n${appendTail}\n\`\`\`\nContinue from here.\n`;
+        } else {
+          text += `*Content appended successfully.*\n`;
+        }
       }
       break;
     }
@@ -641,6 +682,7 @@ class ExecutionState {
 
 module.exports = {
   isNearDuplicate,
+  checkFileCompleteness,
   autoSnapshotAfterBrowserAction,
   sendToolExecutionEvents,
   capArray,
 
@@ -8,6 +8,13 @@
  */
 'use strict';
 
+// OS-aware shell description for run_command tool
+const _shellDesc = process.platform === 'win32'
+  ? 'Windows PowerShell — use Get-ChildItem, Select-String, Get-Content'
+  : process.platform === 'darwin'
+    ? 'macOS Terminal (zsh) — use ls, grep, cat'
+    : 'Linux Terminal (bash) — use ls, grep, cat';
+
 const DEFAULT_SYSTEM_PREAMBLE = `You are a helpful AI assistant running locally with coding tools. Use them to take real action — don't describe what you'd do, just do it.
 You can also answer general questions, help with writing, and have normal conversations — no tools needed for those.
 
@@ -29,7 +36,7 @@ If your output is cut off mid-generation, the system will automatically continue
 - grep_search: search file contents for a string or regex pattern
 - get_project_structure: get a tree overview of the project layout
 - create_directory / delete_file / rename_file / copy_file: file management
-- run_command: run a shell command (Windows PowerShell — use Get-ChildItem, Select-String, Get-Content)
+- run_command: run a shell command (${_shellDesc})
 - web_search: search for live/current information — use only when you need real-time or external data
 - fetch_webpage: fetch content from a specific URL
 - http_request: make an HTTP request to test an API or endpoint
@@ -74,7 +81,8 @@ If your output is cut off mid-generation, the system will automatically continue
 - edit_file: call read_file first to get the exact current text, then supply precise oldText
 - Browser: browser_navigate → browser_snapshot → browser_click/type using refs from snapshot
 - When creating files (HTML, CSS, JS, etc.), write all content directly. Reference external resources (fonts, libraries, images) via CDN links — do not use fetch_webpage to download them.
-- Multi-step tasks (3+ steps): use write_todos to plan, update_todo as each step completes`;
+- Multi-step tasks (3+ steps): use write_todos to plan, then work through each step. **Call update_todo to mark each step "in-progress" when starting and "done" when finished.** Never skip update_todo.
+- **Never ask the user if you should proceed.** When given a task, execute it completely without stopping for permission. Work autonomously through every step.`;
 
 const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant. You can answer questions on ANY topic — coding, history, science, math, creative writing, and more. You also have coding tools available. Call tools to take action — never just describe what you'd do. Never refuse a request because it is not about code. Always attempt to fulfill the user's request regardless of its length or complexity. When the user specifies output format or file structure, follow those requirements exactly.
 
@@ -96,7 +104,7 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant.
 - **list_directory** — See what files exist in a folder.
 - **find_files** — Search for files by name pattern.
 - **grep_search** — Search file contents for text.
-- **run_command** — Execute terminal/shell commands.
+- **run_command** — Execute ${_shellDesc.split(' — ')[0]} commands.
 - **web_search** — Get live internet data (current info, docs, news).
 - **fetch_webpage** — Get full text content from a URL.
 - **browser_navigate** — Open a URL in browser.
@@ -107,6 +115,7 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant.
 ## Rules
 - **Never output full file content as code blocks in chat** — always use write_file, edit_file, or append_to_file. Code blocks are only for brief snippets or explanations.
 - **For new files: call write_file immediately.** Do not describe what the file would contain — create it.
+- **For large files (HTML pages, CSS, JS, etc.): call write_file with the first section of REAL code — NEVER placeholder comments like \`<!-- ... -->\` or \`// content here\`. Then call append_to_file for each remaining section until the entire file is written. Every tool call must contain actual functional code.**
 - **When the user asks for confirmation or verification, ALWAYS call list_directory or read_file to verify.** NEVER say "I can confirm" without actually checking. NEVER refuse a verification request — you MUST call the tool. Even if previous operations failed, you MUST still verify when asked.
 - **Never claim a directory is empty without calling list_directory.** If list_directory returns items, report them exactly as returned.
 - **Path awareness:** All relative paths are relative to the project root. Use paths like "file.html" for root files, "subfolder/file.html" for nested files.
@@ -123,10 +132,12 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant.
 - To visit a URL: call browser_navigate. To read a page: browser_snapshot first.
 - If a tool fails, retry once with corrected parameters.
 - For edits: call read_file first, then edit_file with exact oldText and newText.
-- For large files: write_file first section, then append_to_file for each remaining section.
+- For large files: write_file with actual code from the beginning — NEVER placeholder stubs like \`<!-- ... -->\` or \`// TODO\`. Then append_to_file for each remaining section until the full file is written.
 - When creating files (HTML, CSS, JS, etc.), write all content directly. Reference external resources (fonts, libraries, images) via CDN links — do not use fetch_webpage to download them.
 - If the user asks for multiple files, create ALL of them. Call write_file for EACH file — do not stop after the first file. Do not claim a file was created unless you received a success result from write_file for that specific file. Do not summarize until every requested file exists.
 - Always use the exact filename the user specifies.
+- Multi-step tasks (3+ steps): call write_todos to create a plan, then work through it step by step. **Call update_todo to mark each step "in-progress" when you start it and "done" when you finish it.** Never skip update_todo — the system tracks your progress through it.
+- **Never ask the user if you should proceed.** When given a task, execute it completely without stopping to ask for permission. Work autonomously through every step.
 - Once ALL parts of the task are complete (every requested file written, every question answered), respond with a brief summary. Do not call more tools after the task is done.`;
 
 const DEFAULT_CHAT_PREAMBLE = `Answer questions, help with code and concepts, and have normal conversations.
 
@@ -643,17 +643,30 @@ class LLMEngine extends EventEmitter {
 
     // Stall watchdog — two-phase: longer timeout for prompt eval (first token),
     // shorter timeout for generation stalls (between tokens)
-    const PROMPT_EVAL_TIMEOUT_MS = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_CPU_MS; // prompt eval always gets the long timeout
+    const PROMPT_EVAL_TIMEOUT_MS = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_GPU_MS;
     const stallTimeoutMs = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_GPU_MS;
     let stallTimer = null;
+    let _forceAbortTimer = null;
     let _firstTokenReceived = false;
     const resetStallTimer = () => {
       if (stallTimer) clearTimeout(stallTimer);
+      if (_forceAbortTimer) { clearTimeout(_forceAbortTimer); _forceAbortTimer = null; }
       const timeout = _firstTokenReceived ? stallTimeoutMs : PROMPT_EVAL_TIMEOUT_MS;
       stallTimer = setTimeout(() => {
         if (_genCounter === genId && this.abortController) {
           console.log(`[LLM] Stall watchdog fired after ${timeout / 1000}s — aborting generation (phase=${_firstTokenReceived ? 'gen' : 'prompt-eval'})`);
           this.cancelGeneration('timeout');
+          // node-llama-cpp doesn't check AbortSignal during prompt evaluation.
+          // If stuck in prompt-eval, force-dispose the sequence after a grace period.
+          if (!_firstTokenReceived) {
+            _forceAbortTimer = setTimeout(() => {
+              if (_genCounter === genId && this.sequence) {
+                console.log('[LLM] Force-disposing sequence — prompt-eval did not respond to abort signal');
+                try { this.sequence.dispose?.(); } catch (e) { console.error('[LLM] Sequence dispose error:', e.message); }
+                this.sequence = null;
+              }
+            }, 10_000);
+          }
         }
       }, timeout);
     };
@@ -814,6 +827,7 @@ class LLMEngine extends EventEmitter {
       return this._handleGenerationError(err, fullResponse, detectedToolBlock);
     } finally {
       if (stallTimer) clearTimeout(stallTimer);
+      if (_forceAbortTimer) clearTimeout(_forceAbortTimer);
       if (genTimeoutTimer) clearTimeout(genTimeoutTimer);
       resolveGenDone();
       this._activeGenerationPromise = null;
@@ -882,17 +896,26 @@ class LLMEngine extends EventEmitter {
       };
     }
 
-    if (isAbort && this._abortReason === 'timeout') {
-      const partial = fullResponse.trim() || '[Generation timed out — retrying]';
-      this.chatHistory.push({ type: 'model', response: [partial] });
-      return {
-        text: partial,
-        rawText: fullResponse,
-        model: this.modelInfo?.name || 'unknown',
-        tokensUsed: this.sequence?.nextTokenIndex || 0,
-        contextUsed: this.context?.contextSize || 0,
-        stopReason: 'timeout',
-      };
+    // Treat any error during a timeout abort as a timeout — covers both AbortError
+    // and sequence-disposal errors from forced prompt-eval abort
+    if (this._abortReason === 'timeout') {
+      const msg = (err.message || '').toLowerCase();
+      const isForceDispose = msg.includes('disposed') || msg.includes('sequence') || !this.sequence;
+      if (isAbort || isForceDispose) {
+        if (isForceDispose) {
+          console.log('[LLM] Generation force-aborted via sequence disposal — treating as timeout');
+        }
+        const partial = fullResponse.trim() || '[Generation timed out — retrying]';
+        this.chatHistory.push({ type: 'model', response: [partial] });
+        return {
+          text: partial,
+          rawText: fullResponse,
+          model: this.modelInfo?.name || 'unknown',
+          tokensUsed: 0,
+          contextUsed: this.context?.contextSize || 0,
+          stopReason: 'timeout',
+        };
+      }
     }
 
     if (isAbort) {
@@ -1015,17 +1038,29 @@ class LLMEngine extends EventEmitter {
 
     // Stall watchdog — two-phase: longer timeout for prompt eval (first token),
     // shorter timeout for generation stalls (between tokens)
-    const PROMPT_EVAL_TIMEOUT_MS_FN = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_CPU_MS;
+    const PROMPT_EVAL_TIMEOUT_MS_FN = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_GPU_MS;
     const stallTimeoutMs = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_GPU_MS;
     let stallTimer = null;
+    let _forceAbortTimer = null;
     let _firstTokenReceived = false;
     const resetStallTimer = () => {
       if (stallTimer) clearTimeout(stallTimer);
+      if (_forceAbortTimer) { clearTimeout(_forceAbortTimer); _forceAbortTimer = null; }
       const timeout = _firstTokenReceived ? stallTimeoutMs : PROMPT_EVAL_TIMEOUT_MS_FN;
       stallTimer = setTimeout(() => {
         if (_genCounter === genId && this.abortController) {
           console.log(`[LLM] Stall watchdog fired after ${timeout / 1000}s — aborting generation (functions mode, phase=${_firstTokenReceived ? 'gen' : 'prompt-eval'})`);
           this.cancelGeneration('timeout');
+          // Force-dispose sequence if prompt-eval doesn't respond to abort signal
+          if (!_firstTokenReceived) {
+            _forceAbortTimer = setTimeout(() => {
+              if (_genCounter === genId && this.sequence) {
+                console.log('[LLM] Force-disposing sequence — prompt-eval did not respond to abort signal (functions mode)');
+                try { this.sequence.dispose?.(); } catch (e) { console.error('[LLM] Sequence dispose error:', e.message); }
+                this.sequence = null;
+              }
+            }, 10_000);
+          }
         }
       }, timeout);
     };
@@ -1143,6 +1178,7 @@ class LLMEngine extends EventEmitter {
       throw err;
     } finally {
       if (stallTimer) clearTimeout(stallTimer);
+      if (_forceAbortTimer) clearTimeout(_forceAbortTimer);
     }
   }