From 76e8098887269f4642938bdb73e747e04dde1e14 Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Sun, 7 Jun 2026 15:13:31 -0700
Subject: [PATCH] feat: update llama.cpp to ggml-org/llama.cpp@9e3b928fd

---
 CHANGELOG.md           | 1 +
 llama_cpp/llama_cpp.py | 7 +++++++
 vendor/llama.cpp       | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4912e00e4..e20ed73c2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- feat: update llama.cpp to ggml-org/llama.cpp@9e3b928fd
 - feat(example): add OpenAI-compatible embeddings endpoint by @abetlen in #2281
 
 ## [0.3.27]
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
index 44c25a519..21f85c81c 100644
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@@ -949,6 +949,10 @@ class llama_sampler_seq_config(ctypes.Structure):
 #                       // ref: https://github.com/ggml-org/llama.cpp/pull/14363
 #     struct llama_sampler_seq_config * samplers;
 #     size_t                            n_samplers;
+#
+#     // a source/target/parent context
+#     // can be utilized in various ways, for example by sharing results or llama_memory between 2 contexts
+#     struct llama_context * ctx_other;
 # };
 class llama_context_params(ctypes.Structure):
     """Parameters for llama_context
@@ -989,6 +993,7 @@ class llama_context_params(ctypes.Structure):
         kv_unified (bool): use a unified buffer across the input sequences when computing the attention
         samplers (ctypes.POINTER(llama_sampler_seq_config)): backend sampler chain configuration
         n_samplers (int): number of backend sampler chain configurations
+        ctx_other (llama_context_p): source, target, or parent context
     """
 
     if TYPE_CHECKING:
@@ -1027,6 +1032,7 @@ class llama_context_params(ctypes.Structure):
         kv_unified: bool
         samplers: ctypes.POINTER(llama_sampler_seq_config)
         n_samplers: int
+        ctx_other: llama_context_p
 
     _fields_ = [
         ("n_ctx", ctypes.c_uint32),
@@ -1064,6 +1070,7 @@ class llama_context_params(ctypes.Structure):
         ("kv_unified", ctypes.c_bool),
         ("samplers", ctypes.POINTER(llama_sampler_seq_config)),
         ("n_samplers", ctypes.c_size_t),
+        ("ctx_other", llama_context_p_ctypes),
     ]
 
 
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index 465b1f0e7..9e3b928fd 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 465b1f0e75c590426cff3ca998bcd25297071a5b
+Subproject commit 9e3b928fd8c9d14dbf15a8768b9fdd7e5c721d66