From 31cd555856aef96278c28d36f4a07a53ec90ba6d Mon Sep 17 00:00:00 2001 From: abetlen Date: Thu, 4 Jun 2026 22:24:52 -0700 Subject: [PATCH 1/3] docs: fix Gemma 4 Colab notebook --- README.md | 4 +++- examples/colab/notebook.ipynb | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3f801285e..ff1b02d95 100644 --- a/README.md +++ b/README.md @@ -535,9 +535,11 @@ Below are the supported multi-modal models and their respective chat handlers (P | [llama-3-vision-alpha](https://huggingface.co/abetlen/llama-3-vision-alpha-gguf) | `Llama3VisionAlphaChatHandler` | `llama-3-vision-alpha` | | [minicpm-v-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `MiniCPMv26ChatHandler` | `minicpm-v-2.6` | | [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` | -| [gemma-4](https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abetlen/llama-cpp-python/blob/main/examples/colab/notebook.ipynb) | `Gemma4ChatHandler` | `gemma4` | +| [gemma-4](https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF) | `Gemma4ChatHandler` | `gemma4` | | GGUF models with an mtmd projector and embedded chat template | `MTMDChatHandler` | `mtmd` | +Gemma 4 Colab example: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abetlen/llama-cpp-python/blob/main/examples/colab/notebook.ipynb) + Then you'll need to use a custom chat handler to load the clip model and process the chat messages and images. ```python diff --git a/examples/colab/notebook.ipynb b/examples/colab/notebook.ipynb index c9b8d8dcb..8eaf3a140 100644 --- a/examples/colab/notebook.ipynb +++ b/examples/colab/notebook.ipynb @@ -51,7 +51,7 @@ "from llama_cpp.llama_chat_format import Gemma4ChatHandler\n", "\n", "MODEL_REPO = \"ggml-org/gemma-4-12B-it-GGUF\"\n", - "MODEL_FILE = \"gemma-4-12B-it-Q4_K_M.gguf\"\n", + "MODEL_FILE = \"gemma-4-12B-it-Q8_0.gguf\"\n", "MMPROJ_FILE = \"mmproj-gemma-4-12B-it-Q8_0.gguf\"\n", "\n", "chat_handler = Gemma4ChatHandler.from_pretrained(\n", @@ -81,7 +81,7 @@ " messages=[\n", " {\n", " \"role\": \"user\",\n", - " \"content\": \"Write the exact string `` and nothing else.\",\n", + " \"content\": \"What is the capital of France? Answer in one sentence.\",\n", " }\n", " ],\n", " max_tokens=32,\n", @@ -99,7 +99,7 @@ "source": [ "from IPython.display import Image, display\n", "\n", - "IMAGE_URL = \"https://raw.githubusercontent.com/abetlen/llama-cpp-python/main/vendor/llama.cpp/tools/mtmd/test-1.jpeg\"\n", + "IMAGE_URL = \"https://raw.githubusercontent.com/ggml-org/llama.cpp/master/tools/mtmd/test-1.jpeg\"\n", "\n", "display(Image(url=IMAGE_URL, width=320))\n" ] From bc728b6556a0d2ec77aa54c454b8c760a5c3393c Mon Sep 17 00:00:00 2001 From: abetlen Date: Thu, 4 Jun 2026 22:38:08 -0700 Subject: [PATCH 2/3] docs: use smaller Gemma 4 Colab model --- examples/colab/notebook.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/colab/notebook.ipynb b/examples/colab/notebook.ipynb index 8eaf3a140..8e258b9c0 100644 --- a/examples/colab/notebook.ipynb +++ b/examples/colab/notebook.ipynb @@ -51,7 +51,7 @@ "from llama_cpp.llama_chat_format import Gemma4ChatHandler\n", "\n", "MODEL_REPO = \"ggml-org/gemma-4-12B-it-GGUF\"\n", - "MODEL_FILE = \"gemma-4-12B-it-Q8_0.gguf\"\n", + "MODEL_FILE = \"gemma-4-12B-it-Q4_K_M.gguf\"\n", "MMPROJ_FILE = \"mmproj-gemma-4-12B-it-Q8_0.gguf\"\n", "\n", "chat_handler = Gemma4ChatHandler.from_pretrained(\n", From 8e2f80edf4b0c5d4b10da3a31cd9aa35ad14fb4c Mon Sep 17 00:00:00 2001 From: abetlen Date: Thu, 4 Jun 2026 22:38:48 -0700 Subject: [PATCH 3/3] docs: update Gemma 4 Colab CTA --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ff1b02d95..8f7b65e83 100644 --- a/README.md +++ b/README.md @@ -538,7 +538,7 @@ Below are the supported multi-modal models and their respective chat handlers (P | [gemma-4](https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF) | `Gemma4ChatHandler` | `gemma4` | | GGUF models with an mtmd projector and embedded chat template | `MTMDChatHandler` | `mtmd` | -Gemma 4 Colab example: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abetlen/llama-cpp-python/blob/main/examples/colab/notebook.ipynb) +Try Gemma 4 12B in Google Colab -> [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abetlen/llama-cpp-python/blob/main/examples/colab/notebook.ipynb) Then you'll need to use a custom chat handler to load the clip model and process the chat messages and images.