fix: Gemma4 max_new_tokens 512 → 2048 — responses were truncating mid-answer

ijbo · ijbo · commit 37e344939065 · 2026-04-03T11:13:45.000+09:00
diff --git a/changelogs/CHANGELOG-gemma4-max-tokens.md b/changelogs/CHANGELOG-gemma4-max-tokens.md
@@ -0,0 +1 @@
+# Fix: Gemma 4 response truncation — max_new_tokens 512 → 2048
diff --git a/public/ai-worker-gemma4.js b/public/ai-worker-gemma4.js
@@ -213,7 +213,8 @@ async function generate({ userPrompt, prompt, attachments = [], context, chatHis
 
         await model.generate({
             ...inputs,
-            max_new_tokens: options.maxTokens || 512,
+            max_new_tokens: options.maxTokens || 2048,
+
             do_sample: true,
             temperature: options.temperature || 0.7,
             top_p: options.topP || 0.9,

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# Fix: Gemma 4 response truncation — max_new_tokens 512 → 2048`