From 76e8098887269f4642938bdb73e747e04dde1e14 Mon Sep 17 00:00:00 2001 From: abetlen Date: Sun, 7 Jun 2026 15:13:31 -0700 Subject: [PATCH] feat: update llama.cpp to ggml-org/llama.cpp@9e3b928fd --- CHANGELOG.md | 1 + llama_cpp/llama_cpp.py | 7 +++++++ vendor/llama.cpp | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4912e00e4..e20ed73c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- feat: update llama.cpp to ggml-org/llama.cpp@9e3b928fd - feat(example): add OpenAI-compatible embeddings endpoint by @abetlen in #2281 ## [0.3.27] diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 44c25a519..21f85c81c 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -949,6 +949,10 @@ class llama_sampler_seq_config(ctypes.Structure): # // ref: https://github.com/ggml-org/llama.cpp/pull/14363 # struct llama_sampler_seq_config * samplers; # size_t n_samplers; +# +# // a source/target/parent context +# // can be utilized in various ways, for example by sharing results or llama_memory between 2 contexts +# struct llama_context * ctx_other; # }; class llama_context_params(ctypes.Structure): """Parameters for llama_context @@ -989,6 +993,7 @@ class llama_context_params(ctypes.Structure): kv_unified (bool): use a unified buffer across the input sequences when computing the attention samplers (ctypes.POINTER(llama_sampler_seq_config)): backend sampler chain configuration n_samplers (int): number of backend sampler chain configurations + ctx_other (llama_context_p): source, target, or parent context """ if TYPE_CHECKING: @@ -1027,6 +1032,7 @@ class llama_context_params(ctypes.Structure): kv_unified: bool samplers: ctypes.POINTER(llama_sampler_seq_config) n_samplers: int + ctx_other: llama_context_p _fields_ = [ ("n_ctx", ctypes.c_uint32), @@ -1064,6 +1070,7 @@ class llama_context_params(ctypes.Structure): ("kv_unified", ctypes.c_bool), ("samplers", ctypes.POINTER(llama_sampler_seq_config)), ("n_samplers", ctypes.c_size_t), + ("ctx_other", llama_context_p_ctypes), ] diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 465b1f0e7..9e3b928fd 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 465b1f0e75c590426cff3ca998bcd25297071a5b +Subproject commit 9e3b928fd8c9d14dbf15a8768b9fdd7e5c721d66