Skip to content

Commit 9f32d82

Browse files
author
Brendan Gray
committed
fix: MUSTFIXBUGS315 batch - 9 fixes covering 10 bugs
- BUG 2/19: Frontend reads result.fullContent for append_to_file code blocks - BUG 1/16: Multi-strategy auto-convert dedup with 3 overlap methods - BUG 11/17: Two-stage progressive disposal recovery - BUG 12: Dynamic context compaction thresholds based on context size - BUG 20: BLOCKED messages include file tail for model continuation - BUG 4: Line-level overlap fallback for near-duplicate detection - BUG 6: Smart HTML insert before closing tag - BUG 7/13/15: Runtime OS detection for shell descriptions - BUG 8/18: No-progress stall detection triggers context rotation
1 parent b6818ac commit 9f32d82

9 files changed

Lines changed: 639 additions & 112 deletions

File tree

main/agenticChat.js

Lines changed: 307 additions & 40 deletions
Large diffs are not rendered by default.

main/agenticChatHelpers.js

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,33 @@
77
*/
88
'use strict';
99

10+
/**
11+
* Check if a file's content looks syntactically complete.
12+
* Returns true if the file appears to be complete, false otherwise.
13+
* Used after write_file/append_to_file to decide if model should keep appending.
14+
*/
15+
function checkFileCompleteness(content, filePath) {
16+
if (!content || content.length < 20) return false;
17+
const trimmedEnd = content.trimEnd();
18+
const lastCodeLine = trimmedEnd.split('\n').pop().trim();
19+
const ext = (filePath?.match(/\.([^.]+)$/) || [])[1] || '';
20+
let looksComplete = false;
21+
if (/^html?$/i.test(ext)) {
22+
// Fix 42: Anchor to end of content — </html> must be near the end, not just anywhere in the string.
23+
// Without $, a </html> inside a JS string/template in the middle of the file triggers a false positive.
24+
looksComplete = /<\/html\s*>\s*$/i.test(trimmedEnd);
25+
} else if (/^css$/i.test(ext)) {
26+
looksComplete = false; // lone } is unreliable for CSS
27+
} else {
28+
looksComplete = /^(module\.exports\s*=|export\s+(default\s+)?|\}\s*;?\s*$|\}\)\s*;?\s*$)/.test(lastCodeLine);
29+
}
30+
// Secondary: open HTML tags without closing counterparts = incomplete
31+
if (looksComplete && /<(style|script)\b/i.test(content) && !/<\/(style|script)\s*>/i.test(content)) {
32+
looksComplete = false;
33+
}
34+
return looksComplete;
35+
}
36+
1037
/**
1138
* Near-duplicate detection using word-level Jaccard overlap.
1239
* Two texts with >80% word overlap are considered near-duplicates.
@@ -345,8 +372,20 @@ function progressiveContextCompaction(options) {
345372
let pruned = 0;
346373
let newFullResponseText = fullResponseText;
347374

348-
// Phase 1: Compress old tool results (45-60%)
349-
if (pct > 0.45 && allToolResults.length > 4) {
375+
// Dynamic thresholds: small contexts need EARLIER compaction because each tool result
376+
// and chat turn consumes a proportionally larger fraction of available space.
377+
// For ctx ≤ 16K, shift all thresholds down by 15 percentage points.
378+
// For ctx ≤ 8K, shift down by 25 percentage points.
379+
const offset = totalContextTokens <= 8192 ? 0.25
380+
: totalContextTokens <= 16384 ? 0.15
381+
: 0;
382+
const phase1Threshold = 0.45 - offset;
383+
const phase2Threshold = 0.60 - offset;
384+
const phase3Threshold = 0.75 - offset;
385+
const rotateThreshold = 0.85 - offset;
386+
387+
// Phase 1: Compress old tool results
388+
if (pct > phase1Threshold && allToolResults.length > 4) {
350389
for (let i = 0; i < allToolResults.length - 4; i++) {
351390
const tr = allToolResults[i];
352391
if (tr.result?._pruned) continue;
@@ -359,13 +398,13 @@ function progressiveContextCompaction(options) {
359398
}
360399
}
361400

362-
// Phase 2: Prune verbose chat history (60-75%)
363-
if (pct > 0.60 && chatHistory) {
401+
// Phase 2: Prune verbose chat history
402+
if (pct > phase2Threshold && chatHistory) {
364403
pruned += pruneVerboseHistory(chatHistory, 6);
365404
}
366405

367-
// Phase 3: Aggressive compaction (75-85%)
368-
if (pct > 0.75) {
406+
// Phase 3: Aggressive compaction
407+
if (pct > phase3Threshold) {
369408
for (let i = 0; i < allToolResults.length - 2; i++) {
370409
const tr = allToolResults[i];
371410
if (!tr.result?._pruned) {
@@ -375,8 +414,6 @@ function progressiveContextCompaction(options) {
375414
}
376415
}
377416
if (newFullResponseText.length > 15000) {
378-
// Find a paragraph or line boundary near the truncation point instead of
379-
// slicing blindly through code blocks or sentences.
380417
const target = newFullResponseText.length - 15000;
381418
let cutPoint = newFullResponseText.indexOf('\n\n', target);
382419
if (cutPoint === -1 || cutPoint > target + 500) {
@@ -391,18 +428,14 @@ function progressiveContextCompaction(options) {
391428
if (chatHistory) pruned += pruneVerboseHistory(chatHistory, 2);
392429
}
393430

394-
// Proactive rotation — raised from 72% to 85% because rolling summary +
395-
// progressive compression now handle context growth more gracefully.
396-
// The old 72% threshold was too aggressive, causing premature rotations that
397-
// destroyed conversation context unnecessarily.
398-
const shouldRotate = pct > 0.85;
431+
const shouldRotate = pct > rotateThreshold;
399432

400433
if (pruned > 0) {
401-
console.log(`[Context Compaction] Phase ${pct > 0.75 ? 3 : pct > 0.60 ? 2 : 1}: compacted ${pruned} items at ${Math.round(pct * 100)}% usage`);
434+
console.log(`[Context Compaction] Phase ${pct > phase3Threshold ? 3 : pct > phase2Threshold ? 2 : 1}: compacted ${pruned} items at ${Math.round(pct * 100)}% usage (ctx=${totalContextTokens}, rotateAt=${Math.round(rotateThreshold * 100)}%)`);
402435
}
403436

404437
return {
405-
phase: pct > 0.80 ? 4 : pct > 0.75 ? 3 : pct > 0.60 ? 2 : pct > 0.45 ? 1 : 0,
438+
phase: pct > (phase3Threshold + 0.05) ? 4 : pct > phase3Threshold ? 3 : pct > phase2Threshold ? 2 : pct > phase1Threshold ? 1 : 0,
406439
pruned,
407440
newFullResponseText,
408441
shouldRotate,
@@ -473,7 +506,15 @@ function formatSuccessfulToolResult(tr, opts = {}) {
473506
}
474507
}
475508
} else {
476-
text += `*Content appended. If more content remains, call append_to_file again.*\n`;
509+
const appendFullContent = tr.result?.fullContent || '';
510+
const appendFilePath = tr.result?.path || tr.params?.filePath || '';
511+
if (appendFullContent && !checkFileCompleteness(appendFullContent, appendFilePath)) {
512+
const appendLines = appendFullContent.split('\n');
513+
const appendTail = appendLines.slice(-10).join('\n');
514+
text += `**WARNING: File "${appendFilePath}" is still NOT complete after this append (${appendLines.length} lines total).** The file is missing closing tags or content. You MUST call append_to_file again with actual code content. Do NOT send empty content. Here are the last 10 lines of the file:\n\`\`\`\n${appendTail}\n\`\`\`\nContinue from here.\n`;
515+
} else {
516+
text += `*Content appended successfully.*\n`;
517+
}
477518
}
478519
break;
479520
}
@@ -641,6 +682,7 @@ class ExecutionState {
641682

642683
module.exports = {
643684
isNearDuplicate,
685+
checkFileCompleteness,
644686
autoSnapshotAfterBrowserAction,
645687
sendToolExecutionEvents,
646688
capArray,

main/constants.js

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
*/
99
'use strict';
1010

11+
// OS-aware shell description for run_command tool
12+
const _shellDesc = process.platform === 'win32'
13+
? 'Windows PowerShell — use Get-ChildItem, Select-String, Get-Content'
14+
: process.platform === 'darwin'
15+
? 'macOS Terminal (zsh) — use ls, grep, cat'
16+
: 'Linux Terminal (bash) — use ls, grep, cat';
17+
1118
const DEFAULT_SYSTEM_PREAMBLE = `You are a helpful AI assistant running locally with coding tools. Use them to take real action — don't describe what you'd do, just do it.
1219
You can also answer general questions, help with writing, and have normal conversations — no tools needed for those.
1320
@@ -29,7 +36,7 @@ If your output is cut off mid-generation, the system will automatically continue
2936
- grep_search: search file contents for a string or regex pattern
3037
- get_project_structure: get a tree overview of the project layout
3138
- create_directory / delete_file / rename_file / copy_file: file management
32-
- run_command: run a shell command (Windows PowerShell — use Get-ChildItem, Select-String, Get-Content)
39+
- run_command: run a shell command (${_shellDesc})
3340
- web_search: search for live/current information — use only when you need real-time or external data
3441
- fetch_webpage: fetch content from a specific URL
3542
- http_request: make an HTTP request to test an API or endpoint
@@ -74,7 +81,8 @@ If your output is cut off mid-generation, the system will automatically continue
7481
- edit_file: call read_file first to get the exact current text, then supply precise oldText
7582
- Browser: browser_navigate → browser_snapshot → browser_click/type using refs from snapshot
7683
- When creating files (HTML, CSS, JS, etc.), write all content directly. Reference external resources (fonts, libraries, images) via CDN links — do not use fetch_webpage to download them.
77-
- Multi-step tasks (3+ steps): use write_todos to plan, update_todo as each step completes`;
84+
- Multi-step tasks (3+ steps): use write_todos to plan, then work through each step. **Call update_todo to mark each step "in-progress" when starting and "done" when finished.** Never skip update_todo.
85+
- **Never ask the user if you should proceed.** When given a task, execute it completely without stopping for permission. Work autonomously through every step.`;
7886

7987
const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant. You can answer questions on ANY topic — coding, history, science, math, creative writing, and more. You also have coding tools available. Call tools to take action — never just describe what you'd do. Never refuse a request because it is not about code. Always attempt to fulfill the user's request regardless of its length or complexity. When the user specifies output format or file structure, follow those requirements exactly.
8088
@@ -96,7 +104,7 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant.
96104
- **list_directory** — See what files exist in a folder.
97105
- **find_files** — Search for files by name pattern.
98106
- **grep_search** — Search file contents for text.
99-
- **run_command** — Execute terminal/shell commands.
107+
- **run_command** — Execute ${_shellDesc.split(' — ')[0]} commands.
100108
- **web_search** — Get live internet data (current info, docs, news).
101109
- **fetch_webpage** — Get full text content from a URL.
102110
- **browser_navigate** — Open a URL in browser.
@@ -107,6 +115,7 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant.
107115
## Rules
108116
- **Never output full file content as code blocks in chat** — always use write_file, edit_file, or append_to_file. Code blocks are only for brief snippets or explanations.
109117
- **For new files: call write_file immediately.** Do not describe what the file would contain — create it.
118+
- **For large files (HTML pages, CSS, JS, etc.): call write_file with the first section of REAL code — NEVER placeholder comments like \`<!-- ... -->\` or \`// content here\`. Then call append_to_file for each remaining section until the entire file is written. Every tool call must contain actual functional code.**
110119
- **When the user asks for confirmation or verification, ALWAYS call list_directory or read_file to verify.** NEVER say "I can confirm" without actually checking. NEVER refuse a verification request — you MUST call the tool. Even if previous operations failed, you MUST still verify when asked.
111120
- **Never claim a directory is empty without calling list_directory.** If list_directory returns items, report them exactly as returned.
112121
- **Path awareness:** All relative paths are relative to the project root. Use paths like "file.html" for root files, "subfolder/file.html" for nested files.
@@ -123,10 +132,12 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a helpful, knowledgeable AI assistant.
123132
- To visit a URL: call browser_navigate. To read a page: browser_snapshot first.
124133
- If a tool fails, retry once with corrected parameters.
125134
- For edits: call read_file first, then edit_file with exact oldText and newText.
126-
- For large files: write_file first section, then append_to_file for each remaining section.
135+
- For large files: write_file with actual code from the beginning — NEVER placeholder stubs like \`<!-- ... -->\` or \`// TODO\`. Then append_to_file for each remaining section until the full file is written.
127136
- When creating files (HTML, CSS, JS, etc.), write all content directly. Reference external resources (fonts, libraries, images) via CDN links — do not use fetch_webpage to download them.
128137
- If the user asks for multiple files, create ALL of them. Call write_file for EACH file — do not stop after the first file. Do not claim a file was created unless you received a success result from write_file for that specific file. Do not summarize until every requested file exists.
129138
- Always use the exact filename the user specifies.
139+
- Multi-step tasks (3+ steps): call write_todos to create a plan, then work through it step by step. **Call update_todo to mark each step "in-progress" when you start it and "done" when you finish it.** Never skip update_todo — the system tracks your progress through it.
140+
- **Never ask the user if you should proceed.** When given a task, execute it completely without stopping to ask for permission. Work autonomously through every step.
130141
- Once ALL parts of the task are complete (every requested file written, every question answered), respond with a brief summary. Do not call more tools after the task is done.`;
131142

132143
const DEFAULT_CHAT_PREAMBLE = `Answer questions, help with code and concepts, and have normal conversations.

main/llmEngine.js

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -643,17 +643,30 @@ class LLMEngine extends EventEmitter {
643643

644644
// Stall watchdog — two-phase: longer timeout for prompt eval (first token),
645645
// shorter timeout for generation stalls (between tokens)
646-
const PROMPT_EVAL_TIMEOUT_MS = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_CPU_MS; // prompt eval always gets the long timeout
646+
const PROMPT_EVAL_TIMEOUT_MS = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_GPU_MS;
647647
const stallTimeoutMs = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_GPU_MS;
648648
let stallTimer = null;
649+
let _forceAbortTimer = null;
649650
let _firstTokenReceived = false;
650651
const resetStallTimer = () => {
651652
if (stallTimer) clearTimeout(stallTimer);
653+
if (_forceAbortTimer) { clearTimeout(_forceAbortTimer); _forceAbortTimer = null; }
652654
const timeout = _firstTokenReceived ? stallTimeoutMs : PROMPT_EVAL_TIMEOUT_MS;
653655
stallTimer = setTimeout(() => {
654656
if (_genCounter === genId && this.abortController) {
655657
console.log(`[LLM] Stall watchdog fired after ${timeout / 1000}s — aborting generation (phase=${_firstTokenReceived ? 'gen' : 'prompt-eval'})`);
656658
this.cancelGeneration('timeout');
659+
// node-llama-cpp doesn't check AbortSignal during prompt evaluation.
660+
// If stuck in prompt-eval, force-dispose the sequence after a grace period.
661+
if (!_firstTokenReceived) {
662+
_forceAbortTimer = setTimeout(() => {
663+
if (_genCounter === genId && this.sequence) {
664+
console.log('[LLM] Force-disposing sequence — prompt-eval did not respond to abort signal');
665+
try { this.sequence.dispose?.(); } catch (e) { console.error('[LLM] Sequence dispose error:', e.message); }
666+
this.sequence = null;
667+
}
668+
}, 10_000);
669+
}
657670
}
658671
}, timeout);
659672
};
@@ -814,6 +827,7 @@ class LLMEngine extends EventEmitter {
814827
return this._handleGenerationError(err, fullResponse, detectedToolBlock);
815828
} finally {
816829
if (stallTimer) clearTimeout(stallTimer);
830+
if (_forceAbortTimer) clearTimeout(_forceAbortTimer);
817831
if (genTimeoutTimer) clearTimeout(genTimeoutTimer);
818832
resolveGenDone();
819833
this._activeGenerationPromise = null;
@@ -882,17 +896,26 @@ class LLMEngine extends EventEmitter {
882896
};
883897
}
884898

885-
if (isAbort && this._abortReason === 'timeout') {
886-
const partial = fullResponse.trim() || '[Generation timed out — retrying]';
887-
this.chatHistory.push({ type: 'model', response: [partial] });
888-
return {
889-
text: partial,
890-
rawText: fullResponse,
891-
model: this.modelInfo?.name || 'unknown',
892-
tokensUsed: this.sequence?.nextTokenIndex || 0,
893-
contextUsed: this.context?.contextSize || 0,
894-
stopReason: 'timeout',
895-
};
899+
// Treat any error during a timeout abort as a timeout — covers both AbortError
900+
// and sequence-disposal errors from forced prompt-eval abort
901+
if (this._abortReason === 'timeout') {
902+
const msg = (err.message || '').toLowerCase();
903+
const isForceDispose = msg.includes('disposed') || msg.includes('sequence') || !this.sequence;
904+
if (isAbort || isForceDispose) {
905+
if (isForceDispose) {
906+
console.log('[LLM] Generation force-aborted via sequence disposal — treating as timeout');
907+
}
908+
const partial = fullResponse.trim() || '[Generation timed out — retrying]';
909+
this.chatHistory.push({ type: 'model', response: [partial] });
910+
return {
911+
text: partial,
912+
rawText: fullResponse,
913+
model: this.modelInfo?.name || 'unknown',
914+
tokensUsed: 0,
915+
contextUsed: this.context?.contextSize || 0,
916+
stopReason: 'timeout',
917+
};
918+
}
896919
}
897920

898921
if (isAbort) {
@@ -1015,17 +1038,29 @@ class LLMEngine extends EventEmitter {
10151038

10161039
// Stall watchdog — two-phase: longer timeout for prompt eval (first token),
10171040
// shorter timeout for generation stalls (between tokens)
1018-
const PROMPT_EVAL_TIMEOUT_MS_FN = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_CPU_MS;
1041+
const PROMPT_EVAL_TIMEOUT_MS_FN = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_GPU_MS;
10191042
const stallTimeoutMs = (this.modelInfo?.gpuMode === false) ? STALL_TIMEOUT_CPU_MS : STALL_TIMEOUT_GPU_MS;
10201043
let stallTimer = null;
1044+
let _forceAbortTimer = null;
10211045
let _firstTokenReceived = false;
10221046
const resetStallTimer = () => {
10231047
if (stallTimer) clearTimeout(stallTimer);
1048+
if (_forceAbortTimer) { clearTimeout(_forceAbortTimer); _forceAbortTimer = null; }
10241049
const timeout = _firstTokenReceived ? stallTimeoutMs : PROMPT_EVAL_TIMEOUT_MS_FN;
10251050
stallTimer = setTimeout(() => {
10261051
if (_genCounter === genId && this.abortController) {
10271052
console.log(`[LLM] Stall watchdog fired after ${timeout / 1000}s — aborting generation (functions mode, phase=${_firstTokenReceived ? 'gen' : 'prompt-eval'})`);
10281053
this.cancelGeneration('timeout');
1054+
// Force-dispose sequence if prompt-eval doesn't respond to abort signal
1055+
if (!_firstTokenReceived) {
1056+
_forceAbortTimer = setTimeout(() => {
1057+
if (_genCounter === genId && this.sequence) {
1058+
console.log('[LLM] Force-disposing sequence — prompt-eval did not respond to abort signal (functions mode)');
1059+
try { this.sequence.dispose?.(); } catch (e) { console.error('[LLM] Sequence dispose error:', e.message); }
1060+
this.sequence = null;
1061+
}
1062+
}, 10_000);
1063+
}
10291064
}
10301065
}, timeout);
10311066
};
@@ -1143,6 +1178,7 @@ class LLMEngine extends EventEmitter {
11431178
throw err;
11441179
} finally {
11451180
if (stallTimer) clearTimeout(stallTimer);
1181+
if (_forceAbortTimer) clearTimeout(_forceAbortTimer);
11461182
}
11471183
}
11481184

0 commit comments

Comments
 (0)