FileShot
diff --git a/‎main/agenticChat.js‎
Lines changed: 31 additions & 215 deletions b/‎main/agenticChat.js‎
Lines changed: 31 additions & 215 deletions
diff --git a/‎main/agenticChatHelpers.js‎
Lines changed: 7 additions & 27 deletions b/‎main/agenticChatHelpers.js‎
Lines changed: 7 additions & 27 deletions
diff --git a/‎main/ipc/modelHandlers.js‎
Lines changed: 6 additions & 8 deletions b/‎main/ipc/modelHandlers.js‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎main/llmEngine.js‎
Lines changed: 19 additions & 1 deletion b/‎main/llmEngine.js‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎main/mcpToolServer.js‎
Lines changed: 4 additions & 125 deletions b/‎main/mcpToolServer.js‎
Lines changed: 4 additions & 125 deletions
@@ -326,35 +326,14 @@ function evaluateResponse(responseText, functionCalls, taskType, iteration) {
   return { verdict: 'COMMIT', reason: 'default' };
 }
 
-/**
- * Progressive tool disclosure — returns a filtered list of tool names
- * based on model tier limits.
- */
-function getProgressiveTools(taskType, iteration, recentTools, maxTools) {
-  if (!maxTools) return null;
-
-  const priorityTools = [
-    'read_file', 'write_file', 'append_to_file', 'edit_file', 'list_directory', 'run_command',
-    'web_search', 'search_codebase', 'grep_search', 'find_files',
-    'browser_navigate', 'browser_snapshot', 'browser_click', 'browser_type',
-    'browser_scroll', 'browser_press_key', 'browser_select_option',
-    'browser_evaluate', 'browser_get_content', 'browser_screenshot',
-    'browser_back', 'browser_hover', 'browser_tabs', 'fetch_webpage',
-    'write_todos', 'update_todo', 'save_memory', 'get_memory',
-    'git_status', 'git_diff', 'git_commit',
-    'delete_file', 'rename_file', 'get_file_info', 'analyze_error',
-  ];
-  return priorityTools.slice(0, maxTools);
-}
-
 /**
  * Failure classification — only stops loop on genuine infinite repetition.
  */
 function classifyResponseFailure(responseText, hasToolCalls, taskType, iteration, originalMessage, lastResponse, options = {}) {
   if (hasToolCalls) return null;
 
   const text = (responseText || '').trim();
-  if (lastResponse && text.length > 100 && iteration > 2) {
+  if (lastResponse && text.length > 100) {
     if (isNearDuplicate(lastResponse, text, 0.80)) {
       return { type: 'repetition', severity: 'stop', recovery: { action: 'stop', prompt: '' } };
     }
@@ -403,13 +382,15 @@ function progressiveContextCompaction(options) {
     pruned += pruneVerboseHistory(chatHistory, 6);
   }
 
-  // Phase 3: Aggressive compaction
+  // Phase 3: Aggressive compaction — protect last 4 results so model can see recent tool output
   if (pct > phase3Threshold) {
-    for (let i = 0; i < allToolResults.length - 2; i++) {
+    const protectCount = Math.min(4, allToolResults.length);
+    for (let i = 0; i < allToolResults.length - protectCount; i++) {
       const tr = allToolResults[i];
       if (!tr.result?._pruned) {
-        const status = tr.result?.success ? 'ok' : 'fail';
-        tr.result = { _pruned: true, tool: tr.tool, status };
+        const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result || '');
+        const status = tr.result?.success !== false ? 'ok' : 'fail';
+        tr.result = { _pruned: true, tool: tr.tool, status, snippet: resultStr.substring(0, 300) };
         pruned++;
       }
     }
@@ -831,7 +812,6 @@ module.exports = {
   pruneVerboseHistory,
   pruneCloudHistory,
   evaluateResponse,
-  getProgressiveTools,
   classifyResponseFailure,
   progressiveContextCompaction,
   buildToolFeedback,
 
@@ -50,18 +50,16 @@ function register(ctx) {
     const maxModelGB = vramGB > 2 ? Math.max(vramGB - 1.5, 1) : totalRAM * 0.6;
 
     const allModels = [
+      { name: 'Qwen3.5-0.6B', file: 'Qwen3.5-0.6B-Q8_0.gguf', size: 0.6, hfRepo: 'unsloth/Qwen3.5-0.6B-GGUF', desc: 'Ultra-lightweight chat model', category: 'general', vision: false },
       { name: 'Qwen2.5-Coder-1.5B-Instruct', file: 'Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf', size: 1.0, hfRepo: 'lmstudio-community/Qwen2.5-Coder-1.5B-Instruct-GGUF', desc: 'Fast coding model, great for autocomplete', category: 'coding', vision: false },
-      { name: 'Qwen3-0.6B', file: 'Qwen3-0.6B-Q8_0.gguf', size: 0.6, hfRepo: 'unsloth/Qwen3-0.6B-GGUF', desc: 'Ultra-lightweight general chat model', category: 'general', vision: false },
-      { name: 'Qwen3-4B', file: 'Qwen3-4B-Q4_K_M.gguf', size: 2.5, hfRepo: 'lmstudio-community/Qwen3-4B-GGUF', desc: 'Fast reasoning model with thinking mode', category: 'general', vision: false },
+      { name: 'Qwen3.5-2B', file: 'Qwen3.5-2B-Q8_0.gguf', size: 1.9, hfRepo: 'unsloth/Qwen3.5-2B-GGUF', desc: 'Compact general model, excellent for its size', category: 'general', vision: false },
+      { name: 'Qwen3.5-4B', file: 'Qwen3.5-4B-Q4_K_M.gguf', size: 2.5, hfRepo: 'unsloth/Qwen3.5-4B-GGUF', desc: 'Fast reasoning model with thinking mode', category: 'general', vision: false },
       { name: 'Qwen2.5-Coder-7B-Instruct', file: 'Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf', size: 4.7, hfRepo: 'lmstudio-community/Qwen2.5-Coder-7B-Instruct-GGUF', desc: 'Strong coding model, Q4 quantized', category: 'coding', vision: false },
-      { name: 'Llama-3.1-8B-Instruct', file: 'Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf', size: 4.9, hfRepo: 'bartowski/Meta-Llama-3.1-8B-Instruct-GGUF', desc: 'Excellent general-purpose model by Meta', category: 'general', vision: false },
-      { name: 'Qwen3-8B', file: 'Qwen3-8B-Q4_K_M.gguf', size: 5.0, hfRepo: 'lmstudio-community/Qwen3-8B-GGUF', desc: 'Strong reasoning model with thinking', category: 'general', vision: false },
-      { name: 'DeepSeek-R1-Distill-Qwen-14B', file: 'DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf', size: 8.7, hfRepo: 'bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF', desc: 'DeepSeek R1 reasoning distilled into 14B', category: 'reasoning', vision: false },
-      { name: 'Qwen3-14B', file: 'Qwen3-14B-Q4_K_M.gguf', size: 9.0, hfRepo: 'lmstudio-community/Qwen3-14B-GGUF', desc: 'High-quality reasoning model', category: 'general', vision: false },
+      { name: 'Qwen3.5-9B', file: 'Qwen3.5-9B-Q4_K_M.gguf', size: 5.2, hfRepo: 'unsloth/Qwen3.5-9B-GGUF', desc: 'Best quality-to-size ratio, strong reasoning', category: 'general', vision: false },
+      { name: 'Qwen3.5-14B', file: 'Qwen3.5-14B-Q4_K_M.gguf', size: 8.7, hfRepo: 'unsloth/Qwen3.5-14B-GGUF', desc: 'High-quality reasoning model', category: 'general', vision: false },
       { name: 'Mistral-Small-3.1-24B', file: 'Mistral-Small-3.1-24B-Instruct-2503-Q4_K_M.gguf', size: 14.3, hfRepo: 'lmstudio-community/Mistral-Small-3.1-24B-Instruct-2503-GGUF', desc: 'Powerful multi-language coding + reasoning', category: 'general', vision: false },
       { name: 'Qwen3-Coder-30B-A3B (MoE)', file: 'Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf', size: 18.6, hfRepo: 'lmstudio-community/Qwen3-Coder-30B-A3B-Instruct-GGUF', desc: 'Best coding model — only uses 3B active params (fast!)', category: 'coding', vision: false },
-      { name: 'Qwen3-30B-A3B (MoE)', file: 'Qwen3-30B-A3B-Q4_K_M.gguf', size: 18.6, hfRepo: 'lmstudio-community/Qwen3-30B-A3B-GGUF', desc: 'Best general model — MoE, fast + smart', category: 'general', vision: false },
-      { name: 'Qwen3-32B', file: 'Qwen3-32B-Q4_K_M.gguf', size: 19.8, hfRepo: 'lmstudio-community/Qwen3-32B-GGUF', desc: 'Top-tier reasoning, dense 32B', category: 'general', vision: false },
+      { name: 'Qwen3.5-32B', file: 'Qwen3.5-32B-Q4_K_M.gguf', size: 19.8, hfRepo: 'unsloth/Qwen3.5-32B-GGUF', desc: 'Top-tier reasoning, dense 32B', category: 'general', vision: false },
     ];
 
     const recommended = [];
 
@@ -844,7 +844,25 @@ class LLMEngine extends EventEmitter {
     if (!useKvCache && this.sequence && this.sequence.nextTokenIndex > 0) {
       try { this.chat?.dispose?.(); } catch {}
       try { this.sequence.dispose?.(); } catch {}
-      this.sequence = this.context.getSequence();
+      this.sequence = null;
+      try {
+        this.sequence = this.context.getSequence();
+      } catch (seqErr) {
+        const log = require('./logger');
+        log.warn(`[_runGeneration] getSequence failed: ${seqErr.message} — recreating context`);
+        try { this.context.dispose?.(); } catch {}
+        const gpuIsActive = this.modelInfo && this.modelInfo.gpuMode !== false;
+        const ctxSize = gpuIsActive
+          ? this._computeGpuContextSize({ vramGB: this._cachedVramGB || 0, modelSizeGB: this.modelInfo?.modelSizeGB || 0 })
+          : this._computeMaxContext(this.modelInfo?.modelSizeGB || 0);
+        this.context = await this.model.createContext({
+          contextSize: ctxSize,
+          flashAttention: gpuIsActive,
+          ignoreMemorySafetyChecks: true,
+          failedCreationRemedy: { retries: 8, autoContextSizeShrink: 0.5 },
+        });
+        this.sequence = this.context.getSequence();
+      }
       const llamaCppPath = this._getNodeLlamaCppPath();
       const { LlamaChat } = await import(pathToFileURL(llamaCppPath).href);
       this.chat = new LlamaChat({ contextSequence: this.sequence });
 
@@ -20,8 +20,6 @@ const {
   parseToolCalls: standaloneParseToolCalls,
   repairToolCalls,
   _recoverWriteFileContent,
-  _detectProseCommands,
-  _detectFallbackFileOperations: standaloneFallbackDetect,
   TOOL_NAME_ALIASES,
   VALID_TOOLS,
 } = require('./tools/toolParser');
@@ -2615,83 +2613,11 @@ class MCPToolServer {
       console.log(`[MCP] Capped tool calls: executing ${maxToolsPerResponse}, skipping ${skippedCount}`);
     }
 
-    // Fallback detection if no formal tool calls
+    // No formal tool calls found — return without attempting fallback detection.
+    // The model should use proper tool call format (native functions or JSON fences).
+    // Removed: prose command detection and fallback file operation classification.
     if (toolCalls.length === 0) {
-      console.log('[MCP] No formal tool calls found, trying fallback detection...');
-
-      const proseCommands = _detectProseCommands(responseText);
-      if (proseCommands.length > 0) {
-        console.log('[MCP] Found prose command fallback:', proseCommands.length);
-        toolCalls.push(...proseCommands);
-      }
-
-      const fallbackCalls = this._detectFallbackFileOperations(responseText, options.userMessage, [..._repairDropped, ...(options.lastDroppedFilePaths || [])]);
-      if (fallbackCalls.length > 0) {
-        console.log('[MCP] Found fallback tool calls:', fallbackCalls.length);
-        let effectiveFallbackCalls = fallbackCalls;
-        let fbCapped = false;
-        let fbSkipped = 0;
-        if (maxToolsPerResponse > 0 && fallbackCalls.length > maxToolsPerResponse) {
-          fbSkipped = fallbackCalls.length - maxToolsPerResponse;
-          effectiveFallbackCalls = fallbackCalls.slice(0, maxToolsPerResponse);
-          fbCapped = true;
-        }
-        const results = [];
-        for (const call of effectiveFallbackCalls) {
-          if (toolPaceMs > 0 && results.length > 0) {
-            await new Promise(r => setTimeout(r, toolPaceMs));
-          }
-          if (options.writeFileHistory && call.tool === 'write_file') {
-            const wfPath = call.params?.filePath || call.params?.path || call.params?.file_path;
-            const wfLimit = (options.continuationCount || 0) > 0 ? 5 : 6;
-            if (wfPath && options.writeFileHistory[wfPath] && options.writeFileHistory[wfPath].count >= wfLimit) {
-              console.log(`[MCP] Write dedup: blocking ${call.tool} to "${wfPath}" (already written ${options.writeFileHistory[wfPath].count}x)`);
-              let autoConverted = false;
-              const newContent = call.params?.content || '';
-              if (newContent.length > 50) {
-                try {
-                  const _fs = require('fs'), _path = require('path');
-                  const fullPath = _path.resolve(this.projectPath || '.', wfPath);
-                  const existing = _fs.existsSync(fullPath) ? _fs.readFileSync(fullPath, 'utf-8') : '';
-                  if (existing.length > 0) {
-                    const extracted = this._extractNewContentForAutoConvert(existing, newContent);
-                    if (extracted) {
-                      console.log(`[MCP] Write dedup auto-convert (${extracted.method}): "${wfPath}" (${extracted.overlapLines} overlap lines)`);
-                      const ar = await this.executeTool('append_to_file', { filePath: wfPath, content: extracted.newContent });
-                      results.push({ tool: 'append_to_file', params: { filePath: wfPath, content: '...(auto-converted)' }, result: ar });
-                      autoConverted = true;
-                    }
-                    if (!autoConverted) {
-                      // No extractable new content — file content is a subset or duplicate
-                      results.push({ tool: call.tool, params: call.params, result: { success: true, message: `File "${wfPath}" already has this content (${existing.split('\n').length} lines). Use append_to_file to add new content, or move on to the next task.` } });
-                      autoConverted = true;
-                    }
-                  }
-                } catch (e) { console.warn(`[MCP] Write dedup auto-convert failed: ${e.message}`); }
-              }
-              if (!autoConverted) {
-                // Include file tail so model knows where to append from
-                let fileTailHint = '';
-                try {
-                  const _fs2 = require('fs'), _path2 = require('path');
-                  const fp = _path2.resolve(this.projectPath || '.', wfPath);
-                  if (_fs2.existsSync(fp)) {
-                    const lines = _fs2.readFileSync(fp, 'utf-8').split('\n');
-                    const tail = lines.slice(-10).join('\n');
-                    fileTailHint = ` The file currently has ${lines.length} lines. Last 10 lines:\n${tail}\nUse append_to_file with filePath="${wfPath}" to continue from here.`;
-                  }
-                } catch (_) {}
-                results.push({ tool: call.tool, params: call.params, result: { success: false, error: `BLOCKED: "${wfPath}" already written ${options.writeFileHistory[wfPath].count} times.${fileTailHint || ' Use append_to_file or edit_file instead.'}` } });
-              }
-              continue;
-            }
-          }
-          const result = await this.executeTool(call.tool, call.params || {});
-          results.push({ tool: call.tool, params: call.params, result });
-        }
-        return { hasToolCalls: true, results, capped: fbCapped, skippedToolCalls: fbSkipped, formalCallCount: 0, droppedFilePaths: [] };
-      }
-      console.log('[MCP] No fallback tool calls either');
+      console.log('[MCP] No formal tool calls found');
       return { hasToolCalls: false, results: [], formalCallCount: 0, droppedFilePaths: _repairDropped };
     }
 
@@ -2750,49 +2676,6 @@ class MCPToolServer {
         if (call.tool.startsWith('browser_')) call.params = this._normalizeBrowserParams(call.tool, call.params || {});
         else call.params = this._normalizeFsParams(call.tool, call.params || {});
       }
-      if (options.writeFileHistory && call.tool === 'write_file') {
-        const wfPath = call.params?.filePath || call.params?.path || call.params?.file_path;
-        const wfLimit = (options.continuationCount || 0) > 0 ? 5 : 6;
-        if (wfPath && options.writeFileHistory[wfPath] && options.writeFileHistory[wfPath].count >= wfLimit) {
-          console.log(`[MCP] Write dedup: blocking ${call.tool} to "${wfPath}" (already written ${options.writeFileHistory[wfPath].count}x)`);
-          let autoConverted = false;
-          const newContent = call.params?.content || '';
-          if (newContent.length > 50) {
-            try {
-              const _fs = require('fs'), _path = require('path');
-              const fullPath = _path.resolve(this.projectPath || '.', wfPath);
-              const existing = _fs.existsSync(fullPath) ? _fs.readFileSync(fullPath, 'utf-8') : '';
-              if (existing.length > 0) {
-                const extracted = this._extractNewContentForAutoConvert(existing, newContent);
-                if (extracted) {
-                  console.log(`[MCP] Write dedup auto-convert (${extracted.method}): "${wfPath}" (${extracted.overlapLines} overlap lines)`);
-                  const ar = await this.executeTool('append_to_file', { filePath: wfPath, content: extracted.newContent });
-                  results.push({ tool: 'append_to_file', params: { filePath: wfPath, content: '...(auto-converted)' }, result: ar });
-                  autoConverted = true;
-                }
-                if (!autoConverted) {
-                  results.push({ tool: call.tool, params: call.params, result: { success: true, message: `File "${wfPath}" already has this content (${existing.split('\n').length} lines). Use append_to_file to add new content, or move on to the next task.` } });
-                  autoConverted = true;
-                }
-              }
-            } catch (e) { console.warn(`[MCP] Write dedup auto-convert failed: ${e.message}`); }
-          }
-          if (!autoConverted) {
-            let fileTailHint = '';
-            try {
-              const _fs2 = require('fs'), _path2 = require('path');
-              const fp = _path2.resolve(this.projectPath || '.', wfPath);
-              if (_fs2.existsSync(fp)) {
-                const lines = _fs2.readFileSync(fp, 'utf-8').split('\n');
-                const tail = lines.slice(-10).join('\n');
-                fileTailHint = ` The file currently has ${lines.length} lines. Last 10 lines:\n${tail}\nUse append_to_file with filePath="${wfPath}" to continue from here.`;
-              }
-            } catch (_) {}
-            results.push({ tool: call.tool, params: call.params, result: { success: false, error: `BLOCKED: "${wfPath}" already written ${options.writeFileHistory[wfPath].count} times.${fileTailHint || ' Use append_to_file or edit_file instead.'}` } });
-          }
-          continue;
-        }
-      }
       const result = await this.executeTool(call.tool, call.params || {});
       console.log('[MCP] Executed tool:', call.tool, 'result:', result.success ? 'success' : 'failed');
       results.push({ tool: call.tool, params: call.params, result });
@@ -2809,10 +2692,6 @@ class MCPToolServer {
     return { hasToolCalls: true, results, capped: capped || browserCapped, skippedToolCalls: skippedCount + browserSkipped, formalCallCount: toolCalls.length, droppedFilePaths: _repairDropped };
   }
 
-  _detectFallbackFileOperations(responseText, userMessage, lastDroppedFilePaths = []) {
-    return standaloneFallbackDetect(responseText, userMessage, lastDroppedFilePaths);
-  }
-
   // ─── Tool Prompt Building ────────────────────────────────────────────────
 
   getToolPrompt() {