From 31cd555856aef96278c28d36f4a07a53ec90ba6d Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Thu, 4 Jun 2026 22:24:52 -0700
Subject: [PATCH 1/3] docs: fix Gemma 4 Colab notebook

---
 README.md                     | 4 +++-
 examples/colab/notebook.ipynb | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 3f801285e..ff1b02d95 100644
--- a/README.md
+++ b/README.md
@@ -535,9 +535,11 @@ Below are the supported multi-modal models and their respective chat handlers (P
 | [llama-3-vision-alpha](https://huggingface.co/abetlen/llama-3-vision-alpha-gguf) | `Llama3VisionAlphaChatHandler` | `llama-3-vision-alpha` |
 | [minicpm-v-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `MiniCPMv26ChatHandler` | `minicpm-v-2.6` |
 | [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` |
-| [gemma-4](https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abetlen/llama-cpp-python/blob/main/examples/colab/notebook.ipynb) | `Gemma4ChatHandler` | `gemma4` |
+| [gemma-4](https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF) | `Gemma4ChatHandler` | `gemma4` |
 | GGUF models with an mtmd projector and embedded chat template | `MTMDChatHandler` | `mtmd` |
 
+Gemma 4 Colab example: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abetlen/llama-cpp-python/blob/main/examples/colab/notebook.ipynb)
+
 Then you'll need to use a custom chat handler to load the clip model and process the chat messages and images.
 
 ```python
diff --git a/examples/colab/notebook.ipynb b/examples/colab/notebook.ipynb
index c9b8d8dcb..8eaf3a140 100644
--- a/examples/colab/notebook.ipynb
+++ b/examples/colab/notebook.ipynb
@@ -51,7 +51,7 @@
         "from llama_cpp.llama_chat_format import Gemma4ChatHandler\n",
         "\n",
         "MODEL_REPO = \"ggml-org/gemma-4-12B-it-GGUF\"\n",
-        "MODEL_FILE = \"gemma-4-12B-it-Q4_K_M.gguf\"\n",
+        "MODEL_FILE = \"gemma-4-12B-it-Q8_0.gguf\"\n",
         "MMPROJ_FILE = \"mmproj-gemma-4-12B-it-Q8_0.gguf\"\n",
         "\n",
         "chat_handler = Gemma4ChatHandler.from_pretrained(\n",
@@ -81,7 +81,7 @@
         "    messages=[\n",
         "        {\n",
         "            \"role\": \"user\",\n",
-        "            \"content\": \"Write the exact string `<stdio.h>` and nothing else.\",\n",
+        "            \"content\": \"What is the capital of France? Answer in one sentence.\",\n",
         "        }\n",
         "    ],\n",
         "    max_tokens=32,\n",
@@ -99,7 +99,7 @@
       "source": [
         "from IPython.display import Image, display\n",
         "\n",
-        "IMAGE_URL = \"https://raw.githubusercontent.com/abetlen/llama-cpp-python/main/vendor/llama.cpp/tools/mtmd/test-1.jpeg\"\n",
+        "IMAGE_URL = \"https://raw.githubusercontent.com/ggml-org/llama.cpp/master/tools/mtmd/test-1.jpeg\"\n",
         "\n",
         "display(Image(url=IMAGE_URL, width=320))\n"
       ]

From bc728b6556a0d2ec77aa54c454b8c760a5c3393c Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Thu, 4 Jun 2026 22:38:08 -0700
Subject: [PATCH 2/3] docs: use smaller Gemma 4 Colab model

---
 examples/colab/notebook.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/colab/notebook.ipynb b/examples/colab/notebook.ipynb
index 8eaf3a140..8e258b9c0 100644
--- a/examples/colab/notebook.ipynb
+++ b/examples/colab/notebook.ipynb
@@ -51,7 +51,7 @@
         "from llama_cpp.llama_chat_format import Gemma4ChatHandler\n",
         "\n",
         "MODEL_REPO = \"ggml-org/gemma-4-12B-it-GGUF\"\n",
-        "MODEL_FILE = \"gemma-4-12B-it-Q8_0.gguf\"\n",
+        "MODEL_FILE = \"gemma-4-12B-it-Q4_K_M.gguf\"\n",
         "MMPROJ_FILE = \"mmproj-gemma-4-12B-it-Q8_0.gguf\"\n",
         "\n",
         "chat_handler = Gemma4ChatHandler.from_pretrained(\n",

From 8e2f80edf4b0c5d4b10da3a31cd9aa35ad14fb4c Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Thu, 4 Jun 2026 22:38:48 -0700
Subject: [PATCH 3/3] docs: update Gemma 4 Colab CTA

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ff1b02d95..8f7b65e83 100644
--- a/README.md
+++ b/README.md
@@ -538,7 +538,7 @@ Below are the supported multi-modal models and their respective chat handlers (P
 | [gemma-4](https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF) | `Gemma4ChatHandler` | `gemma4` |
 | GGUF models with an mtmd projector and embedded chat template | `MTMDChatHandler` | `mtmd` |
 
-Gemma 4 Colab example: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abetlen/llama-cpp-python/blob/main/examples/colab/notebook.ipynb)
+Try Gemma 4 12B in Google Colab -> [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abetlen/llama-cpp-python/blob/main/examples/colab/notebook.ipynb)
 
 Then you'll need to use a custom chat handler to load the clip model and process the chat messages and images.