From a449adcd439b23339b96f30f7547e6ffb2957aff Mon Sep 17 00:00:00 2001 From: "tattn (Tatsuya Tanaka)" Date: Sat, 25 Apr 2026 12:32:44 +0900 Subject: [PATCH 1/3] Support Gemma 4 - Added symlinks for new headers: unicode.h, mtmd-debug.h, jinja, nlohmann, and module.modulemap. - Updated llama.cpp submodule to a new commit. - Enhanced utils.h and utils.cpp with new functions for handling chat parameters and tool inputs. - Removed obsolete minja symlink. - Introduced new mtmd-image files for improved image handling. - Updated Context.swift and Utility.swift for better integration with MLX. - Modified glob patterns in Globs.swift to include new jinja files. - Removed outdated LlamaToolCallParserTests and refactored LlamaToolTests for improved functionality. - Added new tests for message processing and tool call parsing. - Updated update_dependencies.sh to verify symlink integrity after submodule updates. --- .github/workflows/docc.yml | 8 +- .github/workflows/test.yml | 8 +- .github/workflows/update-dependencies.yml | 4 +- Example/LocalLLMClientExample/AI.swift | 29 +- .../LocalLLMClientExample/ChatViewModel.swift | 7 + .../LocalLLMClientExample/Downloader.swift | 4 +- Package.resolved | 17 +- Package.swift | 40 +- Sources/LocalLLMClientLlama/LlamaClient.swift | 31 +- .../LlamaToolCallParser.swift | 44 +- .../MessageProcessing/MessageProcessor.swift | 16 +- Sources/LocalLLMClientLlama/Model.swift | 47 +- Sources/LocalLLMClientLlama/Multimodal.swift | 3 +- Sources/LocalLLMClientLlamaC/clip-graph.h | 1 + Sources/LocalLLMClientLlamaC/clip-model.h | 1 + .../common/build-info.cpp | 14 + .../LocalLLMClientLlamaC/common/build-info.h | 1 + .../common/chat-auto-parser-generator.cpp | 1 + .../common/chat-auto-parser-helpers.cpp | 1 + .../common/chat-auto-parser-helpers.h | 1 + .../common/chat-auto-parser.h | 1 + .../common/chat-diff-analyzer.cpp | 1 + .../common/chat-parser.cpp | 1 - .../LocalLLMClientLlamaC/common/chat-parser.h | 1 - .../common/chat-peg-parser.cpp | 1 + .../common/chat-peg-parser.h | 1 + Sources/LocalLLMClientLlamaC/common/log.cpp | 1 + Sources/LocalLLMClientLlamaC/common/log.h | 1 + .../common/peg-parser.cpp | 1 + .../LocalLLMClientLlamaC/common/peg-parser.h | 1 + .../common/reasoning-budget.cpp | 1 + .../common/reasoning-budget.h | 1 + .../LocalLLMClientLlamaC/common/sampling.cpp | 1 + .../LocalLLMClientLlamaC/common/sampling.h | 1 + .../LocalLLMClientLlamaC/common/unicode.cpp | 1 + Sources/LocalLLMClientLlamaC/common/unicode.h | 1 + .../LocalLLMClientLlamaC/debug/mtmd-debug.h | 1 + .../LocalLLMClientLlamaC/exclude/llama.cpp | 2 +- Sources/LocalLLMClientLlamaC/include/jinja | 1 + .../include/module.modulemap | 15 + Sources/LocalLLMClientLlamaC/include/nlohmann | 1 + Sources/LocalLLMClientLlamaC/include/utils.h | 20 + Sources/LocalLLMClientLlamaC/minja | 1 - Sources/LocalLLMClientLlamaC/models | 1 + Sources/LocalLLMClientLlamaC/mtmd-image.cpp | 1 + Sources/LocalLLMClientLlamaC/mtmd-image.h | 1 + Sources/LocalLLMClientLlamaC/utils.cpp | 52 ++ Sources/LocalLLMClientMLX/Context.swift | 22 +- Sources/LocalLLMClientMLX/Utility.swift | 2 +- Sources/LocalLLMClientUtility/Globs.swift | 6 +- .../LlamaToolCallParserTests.swift | 146 ------ .../LocalLLMClientLlamaToolTests.swift | 474 ++++-------------- .../MessageProcessorTests.swift | 16 + .../LocalLLMClientLlamaTests/ModelTests.swift | 6 +- .../FilesMetadataTests.swift | 3 +- scripts/update_dependencies.sh | 27 +- 56 files changed, 432 insertions(+), 660 deletions(-) create mode 120000 Sources/LocalLLMClientLlamaC/clip-graph.h create mode 120000 Sources/LocalLLMClientLlamaC/clip-model.h create mode 100644 Sources/LocalLLMClientLlamaC/common/build-info.cpp create mode 120000 Sources/LocalLLMClientLlamaC/common/build-info.h create mode 120000 Sources/LocalLLMClientLlamaC/common/chat-auto-parser-generator.cpp create mode 120000 Sources/LocalLLMClientLlamaC/common/chat-auto-parser-helpers.cpp create mode 120000 Sources/LocalLLMClientLlamaC/common/chat-auto-parser-helpers.h create mode 120000 Sources/LocalLLMClientLlamaC/common/chat-auto-parser.h create mode 120000 Sources/LocalLLMClientLlamaC/common/chat-diff-analyzer.cpp delete mode 120000 Sources/LocalLLMClientLlamaC/common/chat-parser.cpp delete mode 120000 Sources/LocalLLMClientLlamaC/common/chat-parser.h create mode 120000 Sources/LocalLLMClientLlamaC/common/chat-peg-parser.cpp create mode 120000 Sources/LocalLLMClientLlamaC/common/chat-peg-parser.h create mode 120000 Sources/LocalLLMClientLlamaC/common/peg-parser.cpp create mode 120000 Sources/LocalLLMClientLlamaC/common/peg-parser.h create mode 120000 Sources/LocalLLMClientLlamaC/common/reasoning-budget.cpp create mode 120000 Sources/LocalLLMClientLlamaC/common/reasoning-budget.h create mode 120000 Sources/LocalLLMClientLlamaC/common/sampling.cpp create mode 120000 Sources/LocalLLMClientLlamaC/common/sampling.h create mode 120000 Sources/LocalLLMClientLlamaC/common/unicode.cpp create mode 120000 Sources/LocalLLMClientLlamaC/common/unicode.h create mode 120000 Sources/LocalLLMClientLlamaC/debug/mtmd-debug.h create mode 120000 Sources/LocalLLMClientLlamaC/include/jinja create mode 100644 Sources/LocalLLMClientLlamaC/include/module.modulemap create mode 120000 Sources/LocalLLMClientLlamaC/include/nlohmann delete mode 120000 Sources/LocalLLMClientLlamaC/minja create mode 120000 Sources/LocalLLMClientLlamaC/models create mode 120000 Sources/LocalLLMClientLlamaC/mtmd-image.cpp create mode 120000 Sources/LocalLLMClientLlamaC/mtmd-image.h delete mode 100644 Tests/LocalLLMClientLlamaTests/LlamaToolCallParserTests.swift diff --git a/.github/workflows/docc.yml b/.github/workflows/docc.yml index 4775469..c47e6c5 100644 --- a/.github/workflows/docc.yml +++ b/.github/workflows/docc.yml @@ -16,9 +16,9 @@ concurrency: jobs: generate-docc: - runs-on: macos-15 + runs-on: macos-26 env: - DEVELOPER_DIR: "/Applications/Xcode_16.4.app/Contents/Developer" + DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer" steps: - uses: actions/checkout@v4 with: @@ -27,6 +27,10 @@ jobs: - name: Setup Pages uses: actions/configure-pages@v4 + - name: Download Metal Toolchain + continue-on-error: true + run: xcodebuild -downloadComponent MetalToolchain + - name: Build DocC # NOTE: LocalLLMClientMLX documentation is excluded because mlx-swift # symbol extraction requires Metal GPU support which is not available diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cd2725b..2e11da1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,7 +35,7 @@ jobs: # To enable MLX tests, use self-hosted runners: runs-on: [self-hosted, macos] test-type: [Llama, FoundationModels] env: - DEVELOPER_DIR: "/Applications/Xcode_26.2.app/Contents/Developer" + DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer" TEST_RUNNER_GITHUB_MODEL_CACHE: "${{ github.workspace }}/model_cache" steps: - &checkout @@ -74,7 +74,7 @@ jobs: runs-on: macos-26 needs: test-macos env: - DEVELOPER_DIR: "/Applications/Xcode_26.2.app/Contents/Developer" + DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer" steps: - *checkout @@ -90,7 +90,7 @@ jobs: runs-on: macos-26 needs: test-macos env: - DEVELOPER_DIR: "/Applications/Xcode_26.2.app/Contents/Developer" + DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer" steps: - *checkout @@ -100,7 +100,7 @@ jobs: working-directory: Example run: | xcodebuild -downloadPlatform iOS - xcodebuild build -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=iOS Simulator,name=iPhone 17 Pro,OS=26.2' CODE_SIGN_IDENTITY="-" + xcodebuild build -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=iOS Simulator,name=iPhone 17 Pro,OS=26.4' CODE_SIGN_IDENTITY="-" test-ubuntu-x86_64: runs-on: ubuntu-latest diff --git a/.github/workflows/update-dependencies.yml b/.github/workflows/update-dependencies.yml index 62c200a..4380f1f 100644 --- a/.github/workflows/update-dependencies.yml +++ b/.github/workflows/update-dependencies.yml @@ -7,9 +7,9 @@ on: jobs: update-dependencies: - runs-on: macos-15 + runs-on: macos-26 env: - DEVELOPER_DIR: "/Applications/Xcode_16.4.app/Contents/Developer" + DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer" permissions: contents: write pull-requests: write diff --git a/Example/LocalLLMClientExample/AI.swift b/Example/LocalLLMClientExample/AI.swift index 114ffca..7f5b6d6 100644 --- a/Example/LocalLLMClientExample/AI.swift +++ b/Example/LocalLLMClientExample/AI.swift @@ -12,9 +12,11 @@ enum LLMModel: Sendable, CaseIterable, Identifiable { case qwen3_4b case qwen2_5VL_3b case gemma3_4b_mlx + case gemma4_e2b_mlx case phi4mini case gemma3 case gemma3_4b + case gemma4_E2B case mobileVLM_3b static let `default` = qwen3 @@ -25,9 +27,11 @@ enum LLMModel: Sendable, CaseIterable, Identifiable { case .qwen3_4b: "MLX / Qwen3 4B" case .qwen2_5VL_3b: "MLX / Qwen2.5VL 3B" case .gemma3_4b_mlx: "MLX / Gemma3 4B" + case .gemma4_e2b_mlx: "MLX / Gemma4 E2B (4bit)" case .phi4mini: "llama.cpp / Phi-4 Mini 3.8B" case .gemma3: "llama.cpp / Gemma3 1B" case .gemma3_4b: "llama.cpp / Gemma3 4B" + case .gemma4_E2B: "llama.cpp / Gemma4 E2B" case .mobileVLM_3b: "llama.cpp / MobileVLM 3B" } } @@ -38,30 +42,35 @@ enum LLMModel: Sendable, CaseIterable, Identifiable { case .qwen3_4b: "mlx-community/Qwen3-4B-4bit" case .qwen2_5VL_3b: "mlx-community/Qwen2.5-VL-3B-Instruct-abliterated-4bit" case .gemma3_4b_mlx: "mlx-community/gemma-3-4b-it-qat-4bit" + case .gemma4_e2b_mlx: "mlx-community/gemma-4-e2b-it-4bit" case .phi4mini: "unsloth/Phi-4-mini-instruct-GGUF" case .gemma3: "lmstudio-community/gemma-3-1B-it-qat-GGUF" case .gemma3_4b: "lmstudio-community/gemma-3-4B-it-qat-GGUF" + case .gemma4_E2B: "lmstudio-community/gemma-4-E2B-it-GGUF" case .mobileVLM_3b: "Blombert/MobileVLM-3B-GGUF" } } var filename: String? { switch self { - case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx: nil + case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx: nil case .phi4mini: "Phi-4-mini-instruct-Q4_K_M.gguf" case .gemma3: "gemma-3-1B-it-QAT-Q4_0.gguf" case .gemma3_4b: "gemma-3-4B-it-QAT-Q4_0.gguf" + case .gemma4_E2B: "gemma-4-E2B-it-Q4_K_M.gguf" case .mobileVLM_3b: "ggml-MobileVLM-3B-q5_k_s.gguf" } } var mmprojFilename: String? { switch self { - case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .phi4mini, .gemma3: nil + case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx, .phi4mini, .gemma3: nil #if os(macOS) case .gemma3_4b: "mmproj-model-f16.gguf" + case .gemma4_E2B: "mmproj-gemma-4-E4B-it-BF16.gguf" #elseif os(iOS) - case .gemma3_4b: nil + // Total footprint (model + mmproj ≈ 6 GB) exceeds what most iPhones can map; text-only on iOS. + case .gemma3_4b, .gemma4_E2B: nil #endif case .mobileVLM_3b: "mmproj-model-f16.gguf" } @@ -75,11 +84,11 @@ enum LLMModel: Sendable, CaseIterable, Identifiable { switch self { case .qwen3, .qwen3_4b, .phi4mini, .gemma3: false #if os(macOS) - case .gemma3_4b: true + case .gemma3_4b, .gemma4_E2B: true #elseif os(iOS) - case .gemma3_4b: false + case .gemma3_4b, .gemma4_E2B: false #endif - case .qwen2_5VL_3b, .gemma3_4b_mlx, .mobileVLM_3b: true + case .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx, .mobileVLM_3b: true } } @@ -87,14 +96,16 @@ enum LLMModel: Sendable, CaseIterable, Identifiable { switch self { case .gemma3_4b_mlx: return [""] - case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .phi4mini, .gemma3, .gemma3_4b, .mobileVLM_3b: + case .gemma4_e2b_mlx: + return [""] + case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .phi4mini, .gemma3, .gemma3_4b, .gemma4_E2B, .mobileVLM_3b: return [] } } - + var supportsTools: Bool { switch self { - case .qwen3, .qwen3_4b, .phi4mini, .gemma3, .gemma3_4b: + case .qwen3, .qwen3_4b, .phi4mini, .gemma3, .gemma3_4b, .gemma4_E2B, .gemma4_e2b_mlx: return true case .qwen2_5VL_3b, .gemma3_4b_mlx, .mobileVLM_3b: return false diff --git a/Example/LocalLLMClientExample/ChatViewModel.swift b/Example/LocalLLMClientExample/ChatViewModel.swift index ab7924c..e2685b0 100644 --- a/Example/LocalLLMClientExample/ChatViewModel.swift +++ b/Example/LocalLLMClientExample/ChatViewModel.swift @@ -14,9 +14,14 @@ final class ChatViewModel { private var ai: AI private var generateTask: Task? private var generatingText = "" + /// Optimistically displayed user message until it lands in `ai.messages`. + private var pendingUserMessage: LLMInput.Message? var messages: [LLMInput.Message] { var messages = ai.messages + if let pendingUserMessage, messages.last?.role != .user { + messages.append(pendingUserMessage) + } if !generatingText.isEmpty, messages.last?.role != .assistant { messages.append(.assistant(generatingText)) } @@ -33,6 +38,7 @@ final class ChatViewModel { let currentInput = (text: inputText, images: inputAttachments) inputText = "" inputAttachments = [] + pendingUserMessage = .user(currentInput.text, attachments: currentInput.images) generateTask = Task { generatingText = "" @@ -46,6 +52,7 @@ final class ChatViewModel { (inputText, inputAttachments) = currentInput } + pendingUserMessage = nil generateTask = nil generatingText = "" } diff --git a/Example/LocalLLMClientExample/Downloader.swift b/Example/LocalLLMClientExample/Downloader.swift index cd367ff..8ac15d6 100644 --- a/Example/LocalLLMClientExample/Downloader.swift +++ b/Example/LocalLLMClientExample/Downloader.swift @@ -5,8 +5,8 @@ struct Downloader: Sendable { init(model: LLMModel) { self.model = model let globs: Globs = switch model { - case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx: .mlx - case .phi4mini, .gemma3, .gemma3_4b, .mobileVLM_3b: .init( + case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx: .mlx + case .phi4mini, .gemma3, .gemma3_4b, .gemma4_E2B, .mobileVLM_3b: .init( (model.filename.map { [$0] } ?? []) + (model.mmprojFilename.map { [$0] } ?? []) )} #if os(macOS) diff --git a/Package.resolved b/Package.resolved index 917377d..1fe8ed1 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "c708fe6da241f5f654a397439002b63f3d60f654ef69d1075851dde3f967586f", + "originHash" : "f2adb03887aa68beac074e1a360a1252567315c0efcac21bf08bf605120abb14", "pins" : [ { "identity" : "eventsource", @@ -24,7 +24,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/ml-explore/mlx-swift-lm", "state" : { - "revision" : "2a296f145c3129fea4290bb6e4a0a5fb458efa06" + "revision" : "1c05248bb0899e2a7a4962b84d319cf12f4e12aa", + "version" : "3.31.3" } }, { @@ -104,8 +105,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/huggingface/swift-jinja", "state" : { - "revision" : "62b91283572c80a9d79fe77e2fa344cfd9233cfa", - "version" : "2.0.2" + "revision" : "0aeefadec459ce8e11a333769950fb86183aca43", + "version" : "2.3.5" } }, { @@ -129,7 +130,7 @@ { "identity" : "swift-syntax", "kind" : "remoteSourceControl", - "location" : "https://github.com/swiftlang/swift-syntax.git", + "location" : "https://github.com/swiftlang/swift-syntax", "state" : { "revision" : "0687f71944021d616d34d922343dcef086855920", "version" : "600.0.1" @@ -147,10 +148,10 @@ { "identity" : "swift-transformers", "kind" : "remoteSourceControl", - "location" : "https://github.com/huggingface/swift-transformers.git", + "location" : "https://github.com/huggingface/swift-transformers", "state" : { - "revision" : "58c4bc11963a140358d791f678a60a2745a23146", - "version" : "1.2.1" + "revision" : "b38443e44d93eca770f2eb68e2a4d0fa100f9aa2", + "version" : "1.3.0" } }, { diff --git a/Package.swift b/Package.swift index aca59cf..e58e16f 100644 --- a/Package.swift +++ b/Package.swift @@ -3,36 +3,21 @@ import PackageDescription import CompilerPluginSupport -let llamaVersion = "b6871" +let llamaVersion = "b8851" +let llamaBuildNumber = String(llamaVersion.dropFirst()) // MARK: - Package Dependencies var packageDependencies: [Package.Dependency] = [ .package(url: "https://github.com/apple/swift-argument-parser.git", .upToNextMinor(from: "1.4.0")), - .package(url: "https://github.com/huggingface/swift-jinja", .upToNextMinor(from: "2.0.0")), + .package(url: "https://github.com/huggingface/swift-jinja", from: "2.3.5"), .package(url: "https://github.com/swiftlang/swift-syntax", from: "600.0.0") ] #if os(iOS) || os(macOS) packageDependencies.append(contentsOf: [ - // mlx-swift-lm v3 (PR #118 merged 2026-04-01) removed - // `loadTokenizer(configuration:hub:)` and reshaped the Hub/Downloader - // API; `LocalLLMClientMLX/Context.swift` still uses the old API. Until - // the MLX backend is migrated to v3 (`AutoTokenizer.from(directory:)` + - // `Downloader`), pin to the last pre-v3 commit so consumers can build. - // Tracked in LocalLLMClient#93 — switch back to `branch: "main"` once - // Context.swift is migrated. - .package( - url: "https://github.com/ml-explore/mlx-swift-lm", - revision: "2a296f145c3129fea4290bb6e4a0a5fb458efa06" // 2026-03-27, last pre-v3 - ), - // `Tokenizers` (from swift-transformers) is what `LocalLLMClientMLX` - // imports for `any Tokenizer`. Pre-v3 mlx-swift-lm transitively pulled - // swift-transformers in, but its Package.swift didn't declare it as a - // public re-export, so consumers still need to depend on it directly. - // Range matches the pre-v3 mlx-swift-lm transitive pin so SPM resolves. - // Bump to `from: "1.3.0"` once Context.swift is migrated to mlx-swift-lm v3. - .package(url: "https://github.com/huggingface/swift-transformers.git", "1.2.0"..<"1.3.0"), + .package(url: "https://github.com/ml-explore/mlx-swift-lm", from: "3.31.3"), + .package(url: "https://github.com/huggingface/swift-transformers", from: "1.3.0"), .package(url: "https://github.com/apple/swift-docc-plugin", from: "1.4.0") ]) #endif @@ -152,6 +137,7 @@ packageTargets.append(contentsOf: [ "LocalLLMClientCore", .product(name: "MLXLLM", package: "mlx-swift-lm"), .product(name: "MLXVLM", package: "mlx-swift-lm"), + .product(name: "MLXHuggingFace", package: "mlx-swift-lm"), .product(name: "Tokenizers", package: "swift-transformers"), ], ), @@ -172,7 +158,7 @@ packageTargets.append(contentsOf: [ name: "LocalLLMClientLlamaFramework", url: "https://github.com/ggml-org/llama.cpp/releases/download/\(llamaVersion)/llama-\(llamaVersion)-xcframework.zip", - checksum: "ac657d70112efadbf5cd1db5c4f67eea94ca38556ada9e7442d5a5a461010d6f" + checksum: "f5eb26820b9890ae026aee4963cd4f43af1c567d39534012f2685601a59c2519" ), .target( name: "LocalLLMClientLlamaC", @@ -180,10 +166,15 @@ packageTargets.append(contentsOf: [ exclude: ["exclude"], cSettings: [ .unsafeFlags(["-w"]), - .headerSearchPath(".") + .define("LLAMA_BUILD_NUMBER", to: llamaBuildNumber), + .headerSearchPath("."), + .headerSearchPath("common") ], cxxSettings: [ - .headerSearchPath(".") + .unsafeFlags(["-UDEBUG"]), + .define("LLAMA_BUILD_NUMBER", to: llamaBuildNumber), + .headerSearchPath("."), + .headerSearchPath("common") ], swiftSettings: [ .interoperabilityMode(.Cxx) @@ -194,7 +185,8 @@ packageTargets.append(contentsOf: [ name: "LocalLLMClientUtilityTests", dependencies: [ "LocalLLMClientUtility", - .product(name: "MLXLMCommon", package: "mlx-swift-lm") + .product(name: "MLXLMCommon", package: "mlx-swift-lm"), + .product(name: "Hub", package: "swift-transformers"), ] ) ]) diff --git a/Sources/LocalLLMClientLlama/LlamaClient.swift b/Sources/LocalLLMClientLlama/LlamaClient.swift index 0ddd82e..14e0320 100644 --- a/Sources/LocalLLMClientLlama/LlamaClient.swift +++ b/Sources/LocalLLMClientLlama/LlamaClient.swift @@ -10,9 +10,11 @@ public final class LlamaClient: LLMClient { private let multimodal: MultimodalContext? private let messageProcessor: MessageProcessor let tools: [AnyLLMTool] + /// Owned by this client; freed in `deinit`. + nonisolated(unsafe) private let chatParamsPtr: UnsafeMutablePointer? var chatFormat: common_chat_format { - context.model.chatFormat() + get_chat_params_format(chatParamsPtr) } /// Initializes a new Llama client. @@ -38,7 +40,15 @@ public final class LlamaClient: LLMClient { multimodal = nil } self.messageProcessor = messageProcessor ?? MessageProcessorFactory.createAutoProcessor(chatTemplate: context.model.chatTemplate) - self.tools = tools.map { AnyLLMTool($0) } + let wrappedTools = tools.map { AnyLLMTool($0) } + self.tools = wrappedTools + self.chatParamsPtr = context.model.buildChatParams(tools: wrappedTools) + } + + deinit { + if let chatParamsPtr { + free_chat_params(chatParamsPtr) + } } /// Generates a text stream from the given input. @@ -82,8 +92,7 @@ public final class LlamaClient: LLMClient { public func responseStream(from input: LLMInput) async throws -> AsyncThrowingStream { // Create the stream first (this can throw) let textStreamGenerator = try textStream(from: input) - let chatFormat = self.chatFormat - + return AsyncThrowingStream { continuation in let processor = StreamingToolCallProcessor( startTag: getToolCallStartTag(), @@ -103,7 +112,11 @@ public final class LlamaClient: LLMClient { } } - var toolCalls = processor.toolCalls + (LlamaToolCallParser.parseToolCalls(from: fullText, format: chatFormat) ?? []) + let parserToolCalls = LlamaToolCallParser.parseToolCalls( + from: fullText, + chatParams: chatParamsPtr + ) ?? [] + var toolCalls = processor.toolCalls + parserToolCalls toolCalls = toolCalls.reduce(into: []) { result, toolCall in if !result.contains(where: { $0.name == toolCall.name }) { result.append(toolCall) @@ -124,17 +137,19 @@ public final class LlamaClient: LLMClient { /// Get the tool call start tag based on chat format private func getToolCallStartTag() -> String { - // Different chat formats may use different tags switch chatFormat { + case COMMON_CHAT_FORMAT_PEG_GEMMA4: + return "<|tool_call>" default: return "" } } - + /// Get the tool call end tag based on chat format private func getToolCallEndTag() -> String { - // Different chat formats may use different tags switch chatFormat { + case COMMON_CHAT_FORMAT_PEG_GEMMA4: + return "" default: return "" } diff --git a/Sources/LocalLLMClientLlama/LlamaToolCallParser.swift b/Sources/LocalLLMClientLlama/LlamaToolCallParser.swift index af29f68..c0b3b94 100644 --- a/Sources/LocalLLMClientLlama/LlamaToolCallParser.swift +++ b/Sources/LocalLLMClientLlama/LlamaToolCallParser.swift @@ -11,49 +11,33 @@ import LocalLLMClientCore /// A utility to parse tool calls from a response generated by a model using llama.cpp's common_chat_parse struct LlamaToolCallParser { - /// Parses a string for tool calls with a specific chat format + /// Parses a string for tool calls using the parser context derived from the model's chat template. /// /// - Parameters: /// - response: The string to parse for tool calls - /// - format: The specific chat format to use for parsing + /// - chatParams: Pointer to the model-owned `llm_chat_params` (obtained via `Model.chatParams()`). /// - Returns: An array of LLMToolCall objects if any were found, otherwise nil - public static func parseToolCalls(from response: String, format: common_chat_format) -> [LLMToolCall]? { - var syntax = common_chat_syntax() - syntax.format = format - syntax.reasoning_format = COMMON_REASONING_FORMAT_NONE - syntax.reasoning_in_content = false - syntax.thinking_forced_open = false - syntax.parse_tool_calls = true - - let parsedMessage = common_chat_parse(std.string(response), false, syntax) + public static func parseToolCalls(from response: String, chatParams: UnsafeMutablePointer?) -> [LLMToolCall]? { + guard let chatParams else { return nil } + + let parsedMessage = response.withCString { cstr in + parse_chat_response(chatParams, cstr, false) + } guard !parsedMessage.tool_calls.empty() else { return nil } - + var toolCalls: [LLMToolCall] = [] - for i in 0..") ) } - + + /// Create a processor for Gemma4 models + public static func gemma4Processor() -> MessageProcessor { + MessageProcessor( + transformer: StandardMessageTransformer(), + renderer: JinjaChatTemplateRenderer(), + chunkExtractor: RegexChunkExtractor(imageTokenPattern: "<\\|image\\|>") + ) + } + /// Create a processor for SmolVLM models public static func smolVLMProcessor() -> MessageProcessor { MessageProcessor( @@ -199,7 +208,10 @@ public struct MessageProcessorFactory { /// Create an auto-detecting processor based on chat template public static func createAutoProcessor(chatTemplate: String) -> MessageProcessor { // Check for specific template patterns - if chatTemplate.contains("<|im_start|>") && chatTemplate.contains("") { + if chatTemplate.contains("<|turn>") { + // Gemma4 format (checked first because the template also contains content-type checks similar to Qwen2VL) + return gemma4Processor() + } else if chatTemplate.contains("<|im_start|>") && chatTemplate.contains("") { // SmolVLM format return smolVLMProcessor() } else if chatTemplate.contains("content[i].type == 'image'") || chatTemplate.contains("") { diff --git a/Sources/LocalLLMClientLlama/Model.swift b/Sources/LocalLLMClientLlama/Model.swift index 5a8714d..a6493db 100644 --- a/Sources/LocalLLMClientLlama/Model.swift +++ b/Sources/LocalLLMClientLlama/Model.swift @@ -24,7 +24,7 @@ final class Model { self.model = model - let chatTemplate = getString(capacity: 8192) { buffer, length in + let chatTemplate = getString { buffer, length in // LLM_KV_TOKENIZER_CHAT_TEMPLATE llama_model_meta_val_str(model, "tokenizer.chat_template", buffer, length) } @@ -47,33 +47,56 @@ final class Model { func tokenizerConfigs() -> [String: Any] { let numberOfConfigs = llama_model_meta_count(model) return (0.. common_chat_format { + /// Build a chat parser context for this model using the provided tools. + /// + /// In the PEG-grammar era of llama.cpp, the generated parser depends on both + /// the chat template and the tool list, so ownership belongs to whoever has + /// the tool list (i.e. `LlamaClient`), not the `Model` itself. + /// + /// The returned pointer must be freed with `free_chat_params`. + func buildChatParams(tools: [AnyLLMTool]) -> UnsafeMutablePointer? { let inputs = create_chat_templates_inputs() - defer { + defer { free_chat_templates_inputs(inputs) } - - add_message_to_inputs(inputs, "user", "test") - let params = apply_chat_templates_with_model(model, inputs) - return params.format + add_message_to_inputs(inputs, "user", "probe") + for tool in tools { + let oaiJSON = tool.toOAICompatJSON() + guard let function = oaiJSON["function"] as? [String: Any], + let name = function["name"] as? String else { continue } + let description = function["description"] as? String ?? "" + let parametersJSON: String + if let parameters = function["parameters"], + let data = try? JSONSerialization.data(withJSONObject: parameters), + let str = String(data: data, encoding: .utf8) { + parametersJSON = str + } else { + parametersJSON = "{}" + } + add_tool_to_inputs(inputs, name, description, parametersJSON) + } + return create_chat_params(model, inputs) } } -private func getString(capacity: Int = 1024, getter: (UnsafeMutablePointer?, Int) -> Int32) -> String { - String(unsafeUninitializedCapacity: capacity) { buffer in +private func getString(minimumCapacity: Int = 1024, getter: (UnsafeMutablePointer?, Int) -> Int32) -> String { + var probe: CChar = 0 + let required = Int(getter(&probe, 1)) + let capacity = max(minimumCapacity, required + 1) + return String(unsafeUninitializedCapacity: capacity) { buffer in buffer.withMemoryRebound(to: CChar.self) { buffer in let length = Int(getter(buffer.baseAddress, capacity)) - return max(0, length) + return max(0, min(length, capacity)) } } } diff --git a/Sources/LocalLLMClientLlama/Multimodal.swift b/Sources/LocalLLMClientLlama/Multimodal.swift index 4bc31e4..50246a6 100644 --- a/Sources/LocalLLMClientLlama/Multimodal.swift +++ b/Sources/LocalLLMClientLlama/Multimodal.swift @@ -13,7 +13,6 @@ public class MultimodalContext: @unchecked Sendable { if let numberOfThreads = parameter.numberOfThreads { mparams.n_threads = Int32(numberOfThreads) } - mparams.verbosity = parameter.options.verbose ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_CONT; guard let multimodalContext = mtmd_init_from_file(url.path(percentEncoded: false), context.model.model, mparams) else { throw .failedToLoad(reason: "Failed to load the mmproj file") } @@ -40,7 +39,7 @@ public class MultimodalContext: @unchecked Sendable { let chunks = mtmd_input_chunks_init()! - let textStorage = " \(MTMD_DEFAULT_IMAGE_MARKER) " // spaces for the workaround of tokenizer + let textStorage = " \(String(cString: mtmd_default_marker())) " // spaces for the workaround of tokenizer var text = textStorage.withCString { mtmd_input_text(text: $0, add_special: false, parse_special: true) } diff --git a/Sources/LocalLLMClientLlamaC/clip-graph.h b/Sources/LocalLLMClientLlamaC/clip-graph.h new file mode 120000 index 0000000..0027171 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/clip-graph.h @@ -0,0 +1 @@ +exclude/llama.cpp/tools/mtmd/clip-graph.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/clip-model.h b/Sources/LocalLLMClientLlamaC/clip-model.h new file mode 120000 index 0000000..4b450cb --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/clip-model.h @@ -0,0 +1 @@ +exclude/llama.cpp/tools/mtmd/clip-model.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/build-info.cpp b/Sources/LocalLLMClientLlamaC/common/build-info.cpp new file mode 100644 index 0000000..44bc8be --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/build-info.cpp @@ -0,0 +1,14 @@ +#include "build-info.h" + +#ifndef LLAMA_BUILD_NUMBER +#define LLAMA_BUILD_NUMBER 0 +#endif + +#define LLAMA_BUILD_INFO_STRINGIFY_(x) #x +#define LLAMA_BUILD_INFO_STRINGIFY(x) LLAMA_BUILD_INFO_STRINGIFY_(x) + +int llama_build_number(void) { return LLAMA_BUILD_NUMBER; } +const char * llama_commit(void) { return ""; } +const char * llama_compiler(void) { return ""; } +const char * llama_build_target(void) { return ""; } +const char * llama_build_info(void) { return "b" LLAMA_BUILD_INFO_STRINGIFY(LLAMA_BUILD_NUMBER); } diff --git a/Sources/LocalLLMClientLlamaC/common/build-info.h b/Sources/LocalLLMClientLlamaC/common/build-info.h new file mode 120000 index 0000000..b4f9fbe --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/build-info.h @@ -0,0 +1 @@ +../exclude/llama.cpp/common/build-info.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/chat-auto-parser-generator.cpp b/Sources/LocalLLMClientLlamaC/common/chat-auto-parser-generator.cpp new file mode 120000 index 0000000..c1664cb --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/chat-auto-parser-generator.cpp @@ -0,0 +1 @@ +../exclude/llama.cpp/common/chat-auto-parser-generator.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/chat-auto-parser-helpers.cpp b/Sources/LocalLLMClientLlamaC/common/chat-auto-parser-helpers.cpp new file mode 120000 index 0000000..a9da30b --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/chat-auto-parser-helpers.cpp @@ -0,0 +1 @@ +../exclude/llama.cpp/common/chat-auto-parser-helpers.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/chat-auto-parser-helpers.h b/Sources/LocalLLMClientLlamaC/common/chat-auto-parser-helpers.h new file mode 120000 index 0000000..0f9c92b --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/chat-auto-parser-helpers.h @@ -0,0 +1 @@ +../exclude/llama.cpp/common/chat-auto-parser-helpers.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/chat-auto-parser.h b/Sources/LocalLLMClientLlamaC/common/chat-auto-parser.h new file mode 120000 index 0000000..d1c0da5 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/chat-auto-parser.h @@ -0,0 +1 @@ +../exclude/llama.cpp/common/chat-auto-parser.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/chat-diff-analyzer.cpp b/Sources/LocalLLMClientLlamaC/common/chat-diff-analyzer.cpp new file mode 120000 index 0000000..0af8837 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/chat-diff-analyzer.cpp @@ -0,0 +1 @@ +../exclude/llama.cpp/common/chat-diff-analyzer.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/chat-parser.cpp b/Sources/LocalLLMClientLlamaC/common/chat-parser.cpp deleted file mode 120000 index 1304458..0000000 --- a/Sources/LocalLLMClientLlamaC/common/chat-parser.cpp +++ /dev/null @@ -1 +0,0 @@ -../exclude/llama.cpp/common/chat-parser.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/chat-parser.h b/Sources/LocalLLMClientLlamaC/common/chat-parser.h deleted file mode 120000 index 0ffce12..0000000 --- a/Sources/LocalLLMClientLlamaC/common/chat-parser.h +++ /dev/null @@ -1 +0,0 @@ -../exclude/llama.cpp/common/chat-parser.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/chat-peg-parser.cpp b/Sources/LocalLLMClientLlamaC/common/chat-peg-parser.cpp new file mode 120000 index 0000000..6ffe432 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/chat-peg-parser.cpp @@ -0,0 +1 @@ +../exclude/llama.cpp/common/chat-peg-parser.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/chat-peg-parser.h b/Sources/LocalLLMClientLlamaC/common/chat-peg-parser.h new file mode 120000 index 0000000..3c8dd29 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/chat-peg-parser.h @@ -0,0 +1 @@ +../exclude/llama.cpp/common/chat-peg-parser.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/log.cpp b/Sources/LocalLLMClientLlamaC/common/log.cpp index ec3b36e..ca48376 100644 --- a/Sources/LocalLLMClientLlamaC/common/log.cpp +++ b/Sources/LocalLLMClientLlamaC/common/log.cpp @@ -4,3 +4,4 @@ int common_log_verbosity_thold = 0; struct common_log * common_log_main() { return nullptr; } void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...) {} +void common_log_default_callback(enum ggml_log_level level, const char * text, void * user_data) {} diff --git a/Sources/LocalLLMClientLlamaC/common/log.h b/Sources/LocalLLMClientLlamaC/common/log.h index 69c2ad7..9b4f1e7 100644 --- a/Sources/LocalLLMClientLlamaC/common/log.h +++ b/Sources/LocalLLMClientLlamaC/common/log.h @@ -15,3 +15,4 @@ struct common_log; struct common_log * common_log_init(); struct common_log * common_log_main(); void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...); +void common_log_default_callback(enum ggml_log_level level, const char * text, void * user_data); diff --git a/Sources/LocalLLMClientLlamaC/common/peg-parser.cpp b/Sources/LocalLLMClientLlamaC/common/peg-parser.cpp new file mode 120000 index 0000000..3ce9c4e --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/peg-parser.cpp @@ -0,0 +1 @@ +../exclude/llama.cpp/common/peg-parser.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/peg-parser.h b/Sources/LocalLLMClientLlamaC/common/peg-parser.h new file mode 120000 index 0000000..be21892 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/peg-parser.h @@ -0,0 +1 @@ +../exclude/llama.cpp/common/peg-parser.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/reasoning-budget.cpp b/Sources/LocalLLMClientLlamaC/common/reasoning-budget.cpp new file mode 120000 index 0000000..b2d4a33 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/reasoning-budget.cpp @@ -0,0 +1 @@ +../exclude/llama.cpp/common/reasoning-budget.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/reasoning-budget.h b/Sources/LocalLLMClientLlamaC/common/reasoning-budget.h new file mode 120000 index 0000000..dc1642e --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/reasoning-budget.h @@ -0,0 +1 @@ +../exclude/llama.cpp/common/reasoning-budget.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/sampling.cpp b/Sources/LocalLLMClientLlamaC/common/sampling.cpp new file mode 120000 index 0000000..1e54e29 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/sampling.cpp @@ -0,0 +1 @@ +../exclude/llama.cpp/common/sampling.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/sampling.h b/Sources/LocalLLMClientLlamaC/common/sampling.h new file mode 120000 index 0000000..fa7f215 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/sampling.h @@ -0,0 +1 @@ +../exclude/llama.cpp/common/sampling.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/unicode.cpp b/Sources/LocalLLMClientLlamaC/common/unicode.cpp new file mode 120000 index 0000000..114dea4 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/unicode.cpp @@ -0,0 +1 @@ +../exclude/llama.cpp/common/unicode.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/common/unicode.h b/Sources/LocalLLMClientLlamaC/common/unicode.h new file mode 120000 index 0000000..cac46ea --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/common/unicode.h @@ -0,0 +1 @@ +../exclude/llama.cpp/common/unicode.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/debug/mtmd-debug.h b/Sources/LocalLLMClientLlamaC/debug/mtmd-debug.h new file mode 120000 index 0000000..ffe5f85 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/debug/mtmd-debug.h @@ -0,0 +1 @@ +../exclude/llama.cpp/tools/mtmd/debug/mtmd-debug.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/exclude/llama.cpp b/Sources/LocalLLMClientLlamaC/exclude/llama.cpp index 9a3ea68..e365e65 160000 --- a/Sources/LocalLLMClientLlamaC/exclude/llama.cpp +++ b/Sources/LocalLLMClientLlamaC/exclude/llama.cpp @@ -1 +1 @@ -Subproject commit 9a3ea685b937c0f0cbfda2e50004ea54bf187512 +Subproject commit e365e658f07b63371489570dfde597f199b26c23 diff --git a/Sources/LocalLLMClientLlamaC/include/jinja b/Sources/LocalLLMClientLlamaC/include/jinja new file mode 120000 index 0000000..14e028e --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/include/jinja @@ -0,0 +1 @@ +../exclude/llama.cpp/common/jinja \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/include/module.modulemap b/Sources/LocalLLMClientLlamaC/include/module.modulemap new file mode 100644 index 0000000..bdea50a --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/include/module.modulemap @@ -0,0 +1,15 @@ +module LocalLLMClientLlamaC { + header "LocalLLMClientLlamaC.h" + header "clip.h" + header "ggml-alloc.h" + header "ggml-backend.h" + header "ggml-cpu.h" + header "ggml-opt.h" + header "ggml.h" + header "gguf.h" + header "llama.h" + header "mtmd-helper.h" + header "mtmd.h" + header "utils.h" + export * +} diff --git a/Sources/LocalLLMClientLlamaC/include/nlohmann b/Sources/LocalLLMClientLlamaC/include/nlohmann new file mode 120000 index 0000000..8b02d58 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/include/nlohmann @@ -0,0 +1 @@ +../exclude/llama.cpp/vendor/nlohmann \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/include/utils.h b/Sources/LocalLLMClientLlamaC/include/utils.h index 4ed6570..90124d8 100644 --- a/Sources/LocalLLMClientLlamaC/include/utils.h +++ b/Sources/LocalLLMClientLlamaC/include/utils.h @@ -11,6 +11,26 @@ common_chat_templates* get_common_chat_templates(const common_chat_templates_ptr // Wrapper functions for Swift C++ interop common_chat_templates_inputs* create_chat_templates_inputs(); void add_message_to_inputs(common_chat_templates_inputs* inputs, const char* role, const char* content); +void add_tool_to_inputs(common_chat_templates_inputs* inputs, const char* name, const char* description, const char* parameters_json); common_chat_params apply_chat_templates_safe(const common_chat_templates* tmpls, common_chat_templates_inputs* inputs); common_chat_params apply_chat_templates_with_model(const struct llama_model* model, common_chat_templates_inputs* inputs); void free_chat_templates_inputs(common_chat_templates_inputs* inputs); + +// Heap-allocated common_chat_params plus pre-built PEG arena. +// The `parser` string on common_chat_params is the serialized PEG grammar; +// callers need the deserialized common_peg_arena to run common_chat_parse. +// Keeping both together lets the model own a stable pointer Swift can retain. +struct llm_chat_params { + common_chat_params chat_params; + common_chat_parser_params parser_params; +}; + +// Build a llm_chat_params from the model and a probing message set. +// Returns nullptr if the template cannot be applied. +llm_chat_params* create_chat_params(const struct llama_model* model, common_chat_templates_inputs* inputs); +void free_chat_params(llm_chat_params* params); +common_chat_format get_chat_params_format(const llm_chat_params* params); + +// Parse a response using the pre-built parser params. Returns a message whose +// tool_calls vector is populated when the response matches the grammar. +common_chat_msg parse_chat_response(const llm_chat_params* params, const char* response, bool is_partial); diff --git a/Sources/LocalLLMClientLlamaC/minja b/Sources/LocalLLMClientLlamaC/minja deleted file mode 120000 index 03ae71a..0000000 --- a/Sources/LocalLLMClientLlamaC/minja +++ /dev/null @@ -1 +0,0 @@ -exclude/llama.cpp/vendor/minja \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/models b/Sources/LocalLLMClientLlamaC/models new file mode 120000 index 0000000..aaaed15 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/models @@ -0,0 +1 @@ +exclude/llama.cpp/tools/mtmd/models \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/mtmd-image.cpp b/Sources/LocalLLMClientLlamaC/mtmd-image.cpp new file mode 120000 index 0000000..6c0c387 --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/mtmd-image.cpp @@ -0,0 +1 @@ +exclude/llama.cpp/tools/mtmd/mtmd-image.cpp \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/mtmd-image.h b/Sources/LocalLLMClientLlamaC/mtmd-image.h new file mode 120000 index 0000000..5db91cd --- /dev/null +++ b/Sources/LocalLLMClientLlamaC/mtmd-image.h @@ -0,0 +1 @@ +exclude/llama.cpp/tools/mtmd/mtmd-image.h \ No newline at end of file diff --git a/Sources/LocalLLMClientLlamaC/utils.cpp b/Sources/LocalLLMClientLlamaC/utils.cpp index 0e0c108..9fb4923 100644 --- a/Sources/LocalLLMClientLlamaC/utils.cpp +++ b/Sources/LocalLLMClientLlamaC/utils.cpp @@ -1,5 +1,7 @@ #include "include/utils.h" +#include + template void* get_raw_pointer_from_unique_ptr(const std::unique_ptr& ptr) { return static_cast(ptr.get()); @@ -26,6 +28,16 @@ void add_message_to_inputs(common_chat_templates_inputs* inputs, const char* rol } } +void add_tool_to_inputs(common_chat_templates_inputs* inputs, const char* name, const char* description, const char* parameters_json) { + if (inputs && name) { + common_chat_tool tool; + tool.name = std::string(name); + tool.description = description ? std::string(description) : std::string(); + tool.parameters = parameters_json ? std::string(parameters_json) : std::string("{}"); + inputs->tools.push_back(std::move(tool)); + } +} + common_chat_params apply_chat_templates_safe(const common_chat_templates* tmpls, common_chat_templates_inputs* inputs) { if (tmpls && inputs) { return common_chat_templates_apply(tmpls, *inputs); @@ -46,3 +58,43 @@ common_chat_params apply_chat_templates_with_model(const struct llama_model* mod void free_chat_templates_inputs(common_chat_templates_inputs* inputs) { delete inputs; } + +llm_chat_params* create_chat_params(const struct llama_model* model, common_chat_templates_inputs* inputs) { + if (!model || !inputs) { + return nullptr; + } + auto templates = common_chat_templates_init(model, "", "", ""); + if (!templates) { + return nullptr; + } + auto* out = new llm_chat_params{}; + out->chat_params = common_chat_templates_apply(templates.get(), *inputs); + out->parser_params = common_chat_parser_params(out->chat_params); + if (!out->chat_params.parser.empty()) { + out->parser_params.parser.load(out->chat_params.parser); + } + return out; +} + +void free_chat_params(llm_chat_params* params) { + delete params; +} + +common_chat_format get_chat_params_format(const llm_chat_params* params) { + return params ? params->chat_params.format : COMMON_CHAT_FORMAT_CONTENT_ONLY; +} + +common_chat_msg parse_chat_response(const llm_chat_params* params, const char* response, bool is_partial) { + if (!params || !response) { + return {}; + } + try { + return common_chat_parse(response, is_partial, params->parser_params); + } catch (const std::exception & e) { + // Grammar-mismatched input throws from the PEG parser. Treat it as "no + // tool calls" but surface the reason on stderr so genuine errors + // (allocation failure, invariant violations, ...) remain diagnosable. + fprintf(stderr, "[LocalLLMClient] parse_chat_response: %s\n", e.what()); + return {}; + } +} diff --git a/Sources/LocalLLMClientMLX/Context.swift b/Sources/LocalLLMClientMLX/Context.swift index 0a3ae75..30c2308 100644 --- a/Sources/LocalLLMClientMLX/Context.swift +++ b/Sources/LocalLLMClientMLX/Context.swift @@ -4,6 +4,7 @@ import LocalLLMClientCore import MLX import MLXLLM import MLXLMCommon +import MLXHuggingFace import Tokenizers public final class Context: Sendable { @@ -36,7 +37,7 @@ public final class Context: Sendable { private static func loadModel( url: URL, configuration: ModelConfiguration - ) async throws(LLMError) -> (any LanguageModel, any Tokenizer) { + ) async throws(LLMError) -> (any LanguageModel, any MLXLMCommon.Tokenizer) { do { let configurationURL = url.appending(component: "config.json") let configurationData = try Data(contentsOf: configurationURL) @@ -58,7 +59,8 @@ public final class Context: Sendable { try loadWeights(modelDirectory: url, model: model, perLayerQuantization: baseConfiguration.perLayerQuantization) - let tokenizer = try await loadTokenizer(configuration: configuration, hub: .shared) + let tokenizerLoader: any MLXLMCommon.TokenizerLoader = #huggingFaceTokenizerLoader() + let tokenizer = try await tokenizerLoader.load(from: url) return (model, tokenizer) } catch { throw .failedToLoad(reason: error.localizedDescription) @@ -66,13 +68,15 @@ public final class Context: Sendable { } private static func makeProcessor( - url: URL, configuration: ModelConfiguration, tokenizer: any Tokenizer, + url: URL, configuration: ModelConfiguration, tokenizer: any MLXLMCommon.Tokenizer, ) async -> (any UserInputProcessor, Bool) { do { - let processorConfiguration = url.appending( - component: "preprocessor_config.json" - ) - let configurationData = try Data(contentsOf: processorConfiguration) + let preprocessorURL = url.appending(component: "preprocessor_config.json") + let processorURL = url.appending(component: "processor_config.json") + let configURL = FileManager.default.fileExists(atPath: preprocessorURL.path) + ? preprocessorURL + : processorURL + let configurationData = try Data(contentsOf: configURL) let baseProcessorConfig = try JSONDecoder().decode( BaseProcessorConfiguration.self, from: configurationData @@ -94,12 +98,12 @@ public final class Context: Sendable { } private struct LLMUserInputProcessor: UserInputProcessor { - let tokenizer: Tokenizer + let tokenizer: MLXLMCommon.Tokenizer let configuration: ModelConfiguration let messageGenerator: MessageGenerator init( - tokenizer: any Tokenizer, configuration: ModelConfiguration, + tokenizer: any MLXLMCommon.Tokenizer, configuration: ModelConfiguration, messageGenerator: MessageGenerator ) { self.tokenizer = tokenizer diff --git a/Sources/LocalLLMClientMLX/Utility.swift b/Sources/LocalLLMClientMLX/Utility.swift index e202c97..9b835c4 100644 --- a/Sources/LocalLLMClientMLX/Utility.swift +++ b/Sources/LocalLLMClientMLX/Utility.swift @@ -9,5 +9,5 @@ nonisolated(unsafe) private var isMLXInitialized = false public func initializeMLX() { guard !isMLXInitialized else { return } isMLXInitialized = true - MLX.GPU.set(cacheLimit: 20 * 1024 * 1024) + MLX.Memory.cacheLimit = 20 * 1024 * 1024 } diff --git a/Sources/LocalLLMClientUtility/Globs.swift b/Sources/LocalLLMClientUtility/Globs.swift index 08b4e2f..3fb86f1 100644 --- a/Sources/LocalLLMClientUtility/Globs.swift +++ b/Sources/LocalLLMClientUtility/Globs.swift @@ -9,8 +9,10 @@ public struct Globs: Sendable, Equatable { self.rawValue = globs } - /// Default glob patterns for MLX models, typically including "*.safetensors" and "*.json". - public static let mlx = Globs(["*.safetensors", "*.json"]) + /// Default glob patterns for MLX models, covering weights, config JSON and + /// the chat template (newer models such as Gemma 4 ship it as a separate + /// `chat_template.jinja` file instead of inlining it in `tokenizer_config.json`). + public static let mlx = Globs(["*.safetensors", "*.json", "*.jinja"]) /// Appends a new glob pattern to the set. /// - Parameter glob: A string representing a glob pattern to be added. diff --git a/Tests/LocalLLMClientLlamaTests/LlamaToolCallParserTests.swift b/Tests/LocalLLMClientLlamaTests/LlamaToolCallParserTests.swift deleted file mode 100644 index 2980f86..0000000 --- a/Tests/LocalLLMClientLlamaTests/LlamaToolCallParserTests.swift +++ /dev/null @@ -1,146 +0,0 @@ -import Testing -import Foundation -import LocalLLMClientCore -@testable import LocalLLMClientLlama - -@Suite -struct LlamaToolCallParserTests { - - @Test - func parseEmptyResponse() async throws { - let result = LlamaToolCallParser.parseToolCalls(from: "", format: COMMON_CHAT_FORMAT_HERMES_2_PRO) - #expect(result == nil) - } - - @Test - func parseNoToolCalls() async throws { - let response = "This is just a regular response without any tool calls." - let result = LlamaToolCallParser.parseToolCalls(from: response, format: COMMON_CHAT_FORMAT_HERMES_2_PRO) - #expect(result == nil) - } - - @Test - func parseGenericJSONToolCall() async throws { - // Generic format expects {"tool_call": {"name": "...", "arguments": "..."}} - let response = """ - I'll help you with that. Let me call a function to get the weather. - - {"tool_call": {"name": "get_weather", "arguments": {"location": "New York", "unit": "celsius"}}} - - Here's the weather information. - """ - - let result = LlamaToolCallParser.parseToolCalls(from: response, format: COMMON_CHAT_FORMAT_HERMES_2_PRO) - #expect(result != nil) - #expect(result?.count == 1) - - if let toolCall = result?.first { - #expect(toolCall.name == "get_weather") - #expect(toolCall.arguments.contains("New York")) - #expect(toolCall.arguments.contains("celsius")) - } - } - - @Test - func parseHermesFormatToolCall() async throws { - let response = """ - I'll search for that information. - - - {"name": "search", "arguments": {"query": "Swift programming"}} - - - Found some results for you. - """ - - let result = LlamaToolCallParser.parseToolCalls(from: response, format: COMMON_CHAT_FORMAT_HERMES_2_PRO) - #expect(result != nil) - #expect(result?.count == 1) - - if let toolCall = result?.first { - #expect(toolCall.name == "search") - #expect(toolCall.arguments.contains("Swift programming")) - } - } - - @Test - func parseMultipleToolCalls() async throws { - // Currently the parser only captures the first tool call when multiple are present - // This is a limitation of the current llama.cpp integration - let response = """ - I'll need to call several functions to help you. - - - {"name": "function1", "arguments": {"param": "value1"}} - - - - {"name": "function2", "arguments": {"param": "value2"}} - - - Both functions have been called. - """ - - let result = LlamaToolCallParser.parseToolCalls(from: response, format: COMMON_CHAT_FORMAT_HERMES_2_PRO) - #expect(result != nil, "Parser should return a result for valid tool calls") - #expect(result?.count == 2, "Currently only first tool call is parsed (known limitation)") - - if let toolCall = result?.first { - #expect(toolCall.name == "function1", "First tool call should be function1") - #expect(toolCall.arguments.contains("value1"), "Arguments should contain value1") - - // Verify the arguments can be parsed as JSON - let data = toolCall.arguments.data(using: .utf8)! - let parsed = try JSONSerialization.jsonObject(with: data) as? [String: Any] - #expect(parsed?["param"] as? String == "value1", "Parsed arguments should have correct value") - } else { - Issue.record("Expected at least 1 tool call but got \(result?.count ?? 0)") - } - } - - @Test - func parseToolCallWithID() async throws { - let response = """ - - {"name": "my_function", "id": "call_123", "arguments": {"test": "data"}} - - """ - - let result = LlamaToolCallParser.parseToolCalls(from: response, format: COMMON_CHAT_FORMAT_HERMES_2_PRO) - #expect(result != nil) - #expect(result?.count == 1) - - if let toolCall = result?.first { - #expect(toolCall.id == "call_123") - #expect(toolCall.name == "my_function") - #expect(toolCall.arguments.contains("test")) - } - } - - @Test - func parseToolCallWithoutID() async throws { - let response = """ - - {"name": "my_function", "arguments": {"test": "data"}} - - """ - - let result = LlamaToolCallParser.parseToolCalls(from: response, format: COMMON_CHAT_FORMAT_HERMES_2_PRO) - #expect(result != nil, "Parser should handle tool calls without ID") - #expect(result?.count == 1, "Should parse exactly one tool call") - - if let toolCall = result?.first { - // Should generate a UUID when no ID is provided - #expect(!toolCall.id.isEmpty, "Auto-generated ID should not be empty") - #expect(toolCall.id.count >= 36, "Auto-generated ID should be UUID-like") // UUID format - #expect(toolCall.name == "my_function", "Tool name should match") - #expect(toolCall.arguments.contains("test"), "Arguments should contain test data") - - // Verify UUID format (basic check) - let uuidPattern = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" - let uuidRegex = try? NSRegularExpression(pattern: uuidPattern) - let matches = uuidRegex?.matches(in: toolCall.id, range: NSRange(location: 0, length: toolCall.id.count)) - #expect((matches?.count ?? 0) > 0 || toolCall.id.count >= 8, "ID should be in valid format") - } - } -} diff --git a/Tests/LocalLLMClientLlamaTests/LocalLLMClientLlamaToolTests.swift b/Tests/LocalLLMClientLlamaTests/LocalLLMClientLlamaToolTests.swift index edb4f3f..3b48856 100644 --- a/Tests/LocalLLMClientLlamaTests/LocalLLMClientLlamaToolTests.swift +++ b/Tests/LocalLLMClientLlamaTests/LocalLLMClientLlamaToolTests.swift @@ -17,416 +17,114 @@ extension ModelTests { } extension ModelTests.LocalLLMClientLlamaToolTests { - // Note: LlamaClient tool calling tests are focused on Llama-specific features - // Full integration tests would require model download which is skipped in CI - - @Test - func llamaSpecificChatFormatSupport() async throws { - // Test Llama-specific chat formats that support tool calling - let toolSupportingFormats = [ - (COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, "llama3_builtin"), - (COMMON_CHAT_FORMAT_FIREFUNCTION_V2, "firefunction_v2"), - (COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, "functionary_v3.2"), - (COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, "functionary_v3.1_llama3.1"), - (COMMON_CHAT_FORMAT_HERMES_2_PRO, "hermes_2_pro") - ] - - for (format, name) in toolSupportingFormats { - // Each format has different tool call syntax - let testResponse = "{\"name\": \"test_tool\", \"arguments\": {}}" - let calls = LlamaToolCallParser.parseToolCalls(from: testResponse, format: format) - - // Some formats may not support this syntax - if calls != nil && !calls!.isEmpty { - #expect(calls?.first?.name == "test_tool", "Format \(name) should parse tool calls") - } - } - } - - @Test - func llamaSpecificToolParsing() async throws { - // Test Llama-specific tool parsing logic - // Test parsing tool calls from Llama format - let llamaResponse = """ - I'll help you with that. - {"name": "get_weather", "arguments": {"location": "Tokyo"}} - - """ - - // Try different formats that support tool calling - var toolCalls: [LLMToolCall]? = nil - - // Try formats that might support tool calling - let formatsToTry = [ - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1 - ] - - for format in formatsToTry { - toolCalls = LlamaToolCallParser.parseToolCalls(from: llamaResponse, format: format) - if toolCalls != nil && !toolCalls!.isEmpty { - break - } - } - - // If no format worked, skip the test - if toolCalls == nil || toolCalls!.isEmpty { - // This test requires a specific chat format that supports tool calling - // Skip if the format is not available - return - } - #expect(toolCalls?.count == 1) - #expect(toolCalls?.first?.name == "get_weather") + private func makeToolClient() async throws -> LlamaClient { + try await LocalLLMClient.llama( + testType: .tool, + // Qwen2.5 uses tags; tools must be declared so the PEG parser + // includes the tool-call grammar branches. + // These tools come from LocalLLMClientTestUtilities (WeatherTool, CalculatorTool). + ) } - - @Test - func llamaChunkedToolCallParsing() async throws { - // Test parsing chunked tool calls - // Test parsing chunked tool calls - let chunks = [ - "I'll check the weather. \n{\"name\": \"get", - "_weather\", \"arguments\": {", - "\"location\": \"Tokyo\"}}", - "\n" - ] - - var fullResponse = "" - var parsedCalls: [LLMToolCall] = [] - - // Try formats that support tool calling - let formatsToTry = [ - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1 - ] - - for chunk in chunks { - fullResponse += chunk - - // Try different formats - for format in formatsToTry { - parsedCalls = LlamaToolCallParser.parseToolCalls(from: fullResponse, format: format) ?? [] - if !parsedCalls.isEmpty { - break - } - } - } - - // If no format worked, skip the test - if parsedCalls.isEmpty { - // This test requires a specific chat format that supports tool calling - // Skip if the format is not available - return - } - - #expect(parsedCalls.count == 1) - #expect(parsedCalls.first?.name == "get_weather") - } - - @Test - func llamaModelCapabilityCheck() async throws { - // Test model capability detection based on model name patterns - let toolSupportingModels = [ - "qwen2.5-1.5b-instruct-q5_k_m.gguf", - "hermes-2-pro", - "functionary", - "firefunction" - ] - - let nonToolModels = [ - "llama-2-7b.gguf", - "mistral-7b.gguf" - ] - - // Check tool-supporting models - for modelName in toolSupportingModels { - let supportsTools = modelName.contains("qwen2") || - modelName.contains("hermes") || - modelName.contains("functionary") || - modelName.contains("firefunction") - #expect(supportsTools == true) - } - - // Check non-tool models - for modelName in nonToolModels { - let supportsTools = modelName.contains("qwen2") || - modelName.contains("hermes") || - modelName.contains("functionary") || - modelName.contains("firefunction") - #expect(supportsTools == false) - } + + private func buildChatParams(tools: [any LLMTool]) async throws -> (LlamaClient, UnsafeMutablePointer?) { + let client = try await LocalLLMClient.llama(tools: tools, testType: .tool) + let wrapped = tools.map { AnyLLMTool($0) } + let params = client._context.model.buildChatParams(tools: wrapped) + return (client, params) } - + @Test - func llamaToolCallFormat() async throws { - // Test multiple tool calls - let multiToolResponse = """ - I'll help you with both tasks. + func parseToolCallFromHermesStyleResponse() async throws { + let (_, chatParams) = try await buildChatParams(tools: [WeatherTool()]) + defer { if let chatParams { free_chat_params(chatParams) } } + + let response = """ {"name": "get_weather", "arguments": {"location": "Tokyo", "unit": "celsius"}} - - {"name": "calculator", "arguments": {"expression": "2 + 2"}} - """ - - // Try formats that support tool calling - let formatsToTry = [ - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1 - ] - - var toolCalls: [LLMToolCall] = [] - for format in formatsToTry { - toolCalls = LlamaToolCallParser.parseToolCalls(from: multiToolResponse, format: format) ?? [] - if !toolCalls.isEmpty { - break - } - } - - // If no format worked, skip the test - if toolCalls.isEmpty { - return - } - - #expect(toolCalls.count == 2) - #expect(toolCalls[0].name == "get_weather") - #expect(toolCalls[1].name == "calculator") + + let calls = LlamaToolCallParser.parseToolCalls(from: response, chatParams: chatParams) + try #require(calls != nil, "Expected tool calls to be extracted from a well-formed response") + #expect(calls?.count == 1) + #expect(calls?.first?.name == "get_weather") + #expect(calls?.first?.arguments.contains("Tokyo") == true) } - + @Test - func llamaInvalidToolCallParsing() async throws { - // Test invalid JSON in tool call - let invalidResponse = """ + func parseToolCallWithCalculatorTool() async throws { + let (_, chatParams) = try await buildChatParams(tools: [WeatherTool(), CalculatorTool()]) + defer { if let chatParams { free_chat_params(chatParams) } } + + let response = """ - {"name": "test", invalid json here} + {"name": "calculate", "arguments": {"expression": "15 * 4"}} """ - - // For invalid JSON, all formats should return empty - let formatsToTry = [ - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, - COMMON_CHAT_FORMAT_GENERIC - ] - - for format in formatsToTry { - let toolCalls = LlamaToolCallParser.parseToolCalls(from: invalidResponse, format: format) ?? [] - #expect(toolCalls.isEmpty) // Should gracefully handle invalid JSON - } + + let calls = LlamaToolCallParser.parseToolCalls(from: response, chatParams: chatParams) + try #require(calls != nil) + #expect(calls?.first?.name == "calculate") + #expect(calls?.first?.arguments.contains("15 * 4") == true) + } + + @Test + func parseResponseWithoutToolCalls() async throws { + let (_, chatParams) = try await buildChatParams(tools: [WeatherTool()]) + defer { if let chatParams { free_chat_params(chatParams) } } + + let calls = LlamaToolCallParser.parseToolCalls( + from: "This is a plain response without any tool calls.", + chatParams: chatParams + ) + #expect(calls == nil) } - + @Test - func llamaToolArgumentExtraction() async throws { - let response = """ - - { - "name": "complex_tool", - "arguments": { - "string_arg": "test", - "number_arg": 42, - "bool_arg": true, - "array_arg": ["a", "b", "c"], - "nested_arg": { - "field1": "value1", - "field2": 123 - } - } - } - - """ - - // Try formats that support tool calling - let formatsToTry = [ - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1 - ] - - var toolCalls: [LLMToolCall] = [] - for format in formatsToTry { - toolCalls = LlamaToolCallParser.parseToolCalls(from: response, format: format) ?? [] - if !toolCalls.isEmpty { - break - } - } - - // If no format worked, skip the test - if toolCalls.isEmpty { - return - } - - #expect(toolCalls.count == 1) - - let call = toolCalls[0] - #expect(call.name == "complex_tool") - - // Verify JSON string contains all arguments - #expect(call.arguments.contains("string_arg")) - #expect(call.arguments.contains("number_arg")) - #expect(call.arguments.contains("bool_arg")) - #expect(call.arguments.contains("array_arg")) - #expect(call.arguments.contains("nested_arg")) + func parseEmptyResponse() async throws { + let (_, chatParams) = try await buildChatParams(tools: [WeatherTool()]) + defer { if let chatParams { free_chat_params(chatParams) } } + + let calls = LlamaToolCallParser.parseToolCalls(from: "", chatParams: chatParams) + #expect(calls == nil) } - + @Test - func llamaToolResponseCleaning() async throws { - // Test that parser extracts clean text without tool calls - let responseWithTools = """ - Here's the weather information: + func parseToolCallAssignsIDWhenMissing() async throws { + let (_, chatParams) = try await buildChatParams(tools: [WeatherTool()]) + defer { if let chatParams { free_chat_params(chatParams) } } + + let response = """ - {"name": "get_weather", "arguments": {"location": "Tokyo"}} + {"name": "get_weather", "arguments": {"location": "Tokyo", "unit": "celsius"}} - The weather in Tokyo is sunny. """ - - // Try formats that support tool calling - let formatsToTry = [ - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1 - ] - - var toolCalls: [LLMToolCall] = [] - for format in formatsToTry { - toolCalls = LlamaToolCallParser.parseToolCalls(from: responseWithTools, format: format) ?? [] - if !toolCalls.isEmpty { - break - } - } - - // If no format worked, skip the test - if toolCalls.isEmpty { - return - } - - #expect(toolCalls.count == 1) - - // The parser should extract tool calls but not modify the original text - // That's handled by other components + + let calls = LlamaToolCallParser.parseToolCalls(from: response, chatParams: chatParams) + try #require(calls?.first != nil) + #expect(!calls!.first!.id.isEmpty, "An auto-generated UUID should be assigned when the model omits an id") } - + @Test - func llamaStreamingToolCallParsing() async throws { - // Test parsing tool calls from streaming responses - actor StreamingParser { - private var buffer = "" - private var lastParsedCalls: [LLMToolCall] = [] - - func appendChunk(_ chunk: String) -> [LLMToolCall]? { - buffer += chunk - - // Try to parse with different formats - let formats = [ - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_HERMES_2_PRO - ] - - for format in formats { - if let calls = LlamaToolCallParser.parseToolCalls(from: buffer, format: format), - !calls.isEmpty { - // Check if we have new complete tool calls - if calls.count > lastParsedCalls.count { - lastParsedCalls = calls - return calls - } - } - } - - return nil - } - } - - let parser = StreamingParser() - - // Simulate streaming chunks - let chunks = [ - "I'll help you check the weather. ", - "\n{", - "\"name\": \"get_weather\",", - " \"arguments\": {", - "\"location\": \"Paris\",", - " \"unit\": \"celsius\"", - "}}\n" - ] - - var foundToolCall = false - for chunk in chunks { - if let calls = await parser.appendChunk(chunk), !calls.isEmpty { - foundToolCall = true - #expect(calls.first?.name == "get_weather") - #expect(calls.first?.arguments.contains("Paris") == true) - break - } - } - - #expect(foundToolCall, "Should have found tool call in streaming chunks") + func parseToolCallsReturnsNilWhenChatParamsIsNil() { + // Guard clause: a missing chatParams pointer must not crash and must return nil. + let calls = LlamaToolCallParser.parseToolCalls( + from: "{\"name\":\"x\",\"arguments\":{}}", + chatParams: nil + ) + #expect(calls == nil) } - + @Test - func llamaMultipleSequentialToolCalls() async throws { - // Test handling multiple tool calls in sequence - // Note: Current llama.cpp implementation only supports parsing the first tool call - - // Test with HERMES format which is known to support tags - let hermesResponse = """ - Let me help you with multiple tasks. - - - {"name": "get_weather", "arguments": {"location": "London"}} - - - - {"name": "calculate", "arguments": {"expression": "15 * 4"}} - - - - {"name": "search", "arguments": {"query": "Swift programming"}} - - """ - - // Parse with HERMES format - let hermesCalls = LlamaToolCallParser.parseToolCalls(from: hermesResponse, format: COMMON_CHAT_FORMAT_HERMES_2_PRO) - - // Verify at least one tool call is parsed (current limitation: only first is parsed) - #expect(hermesCalls != nil, "HERMES format should return parsed tool calls") - #expect(hermesCalls?.count ?? 0 >= 1, "Should parse at least one tool call") - - if let firstCall = hermesCalls?.first { - #expect(firstCall.name == "get_weather", "First tool call should be get_weather") - #expect(firstCall.arguments.contains("London"), "Arguments should contain London") - } - - // Test with generic JSON format for comparison - let genericResponse = """ - I'll help you with that. - - {"tool_call": {"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}}} - """ - - // Note: Generic format expects different structure - let genericCalls = LlamaToolCallParser.parseToolCalls(from: genericResponse, format: COMMON_CHAT_FORMAT_GENERIC) - - // Count successful formats - var successfulFormats = 0 - if (hermesCalls?.count ?? 0) > 0 { successfulFormats += 1 } - if (genericCalls?.count ?? 0) > 0 { successfulFormats += 1 } - - // At least one format should work - #expect(successfulFormats >= 1, "At least one format should successfully parse tool calls") - - // Document current limitation - print("Note: Current implementation only parses the first tool call. Found \(hermesCalls?.count ?? 0) tool calls with HERMES format.") + func chatFormatIsReportedForToolClient() async throws { + let client = try await LocalLLMClient.llama(tools: [WeatherTool()], testType: .tool) + let format = client.chatFormat + let validFormats: [common_chat_format] = [ + COMMON_CHAT_FORMAT_CONTENT_ONLY, + COMMON_CHAT_FORMAT_PEG_SIMPLE, + COMMON_CHAT_FORMAT_PEG_NATIVE, + COMMON_CHAT_FORMAT_PEG_GEMMA4, + ] + #expect(validFormats.contains(format), "Unexpected chat format: \(format)") } } diff --git a/Tests/LocalLLMClientLlamaTests/MessageProcessorTests.swift b/Tests/LocalLLMClientLlamaTests/MessageProcessorTests.swift index ab046c4..ff4033a 100644 --- a/Tests/LocalLLMClientLlamaTests/MessageProcessorTests.swift +++ b/Tests/LocalLLMClientLlamaTests/MessageProcessorTests.swift @@ -96,6 +96,22 @@ struct MessageProcessorTests { #expect(chunks == [.text("[INST] \(userMarker) [/INST]\(assistantMarker)eos_token")]) } + @Test + func gemma_4() async throws { + // Gemma 4 uses <|turn>... framing with <|image|> for multimodal content. + // Real template: https://huggingface.co/google/gemma-4-E4B-it/raw/main/chat_template.jinja (≈16 KB). + // Use a minimal variant here so the test does not depend on that specific file. + let template = #"{% for message in messages %}<|turn>{{ message['role'] }} {%- if message['content'] is string -%}{{ message['content'] }}{%- else -%}{%- for item in message['content'] -%}{%- if item['type'] == 'image' -%}<|image|>{%- elif item['type'] == 'text' -%}{{ item['text'] }}{%- endif -%}{%- endfor -%}{%- endif -%}{% endfor %}{% if add_generation_prompt %}<|turn>model {% endif %}"# + let autoProcessor = MessageProcessorFactory.createAutoProcessor(chatTemplate: template) + let (rendered, chunks) = try validate(processor: autoProcessor, chatTemplate: template) + #expect(rendered.contains("<|turn>") && rendered.contains("")) + // Image content must be split out as a dedicated `.image` chunk via the `<|image|>` pattern. + let containsImageChunk = chunks.contains { chunk in + if case .image = chunk { return true } else { return false } + } + #expect(containsImageChunk, "Gemma 4 auto-detection should pick the processor whose chunk extractor matches <|image|>") + } + @Test func autoDetection() async throws { // Test that auto-detection works correctly for different templates diff --git a/Tests/LocalLLMClientLlamaTests/ModelTests.swift b/Tests/LocalLLMClientLlamaTests/ModelTests.swift index a3f6f10..20552cd 100644 --- a/Tests/LocalLLMClientLlamaTests/ModelTests.swift +++ b/Tests/LocalLLMClientLlamaTests/ModelTests.swift @@ -53,9 +53,9 @@ extension LocalLLMClient { ) case .normal: return ( - id: "lmstudio-community/gemma-3-4B-it-qat-GGUF", - model: "gemma-3-4B-it-QAT-Q4_0.gguf", - clip: "mmproj-model-f16.gguf" + id: "lmstudio-community/gemma-4-E4B-it-GGUF", + model: "gemma-4-E4B-it-Q4_K_M.gguf", + clip: "mmproj-gemma-4-E4B-it-BF16.gguf" ) } } diff --git a/Tests/LocalLLMClientUtilityTests/FilesMetadataTests.swift b/Tests/LocalLLMClientUtilityTests/FilesMetadataTests.swift index f4bbdff..92f3452 100644 --- a/Tests/LocalLLMClientUtilityTests/FilesMetadataTests.swift +++ b/Tests/LocalLLMClientUtilityTests/FilesMetadataTests.swift @@ -39,8 +39,9 @@ struct FilesMetadataTests { func testHuggingFaceGlobsMLXDefault() { let mlxGlobs = Globs.mlx - #expect(mlxGlobs.rawValue.count == 2) + #expect(mlxGlobs.rawValue.count == 3) #expect(mlxGlobs.rawValue.contains("*.safetensors")) #expect(mlxGlobs.rawValue.contains("*.json")) + #expect(mlxGlobs.rawValue.contains("*.jinja")) } } diff --git a/scripts/update_dependencies.sh b/scripts/update_dependencies.sh index dfca73d..2330dfd 100755 --- a/scripts/update_dependencies.sh +++ b/scripts/update_dependencies.sh @@ -66,4 +66,29 @@ echo "Package.swift has been updated to use llama.cpp version $TARGET_TAG" echo "Updating git submodules..." git fetch --tags git -C "$PROJECT_ROOT/Sources/LocalLLMClientLlamaC/exclude/llama.cpp" checkout tags/$TARGET_TAG -echo "All submodules have been updated." \ No newline at end of file +echo "All submodules have been updated." + +# Verify that every symlink in LocalLLMClientLlamaC still resolves against the new submodule. +echo +echo "Verifying symlinks under LocalLLMClientLlamaC/..." +LLAMAC_DIR="$PROJECT_ROOT/Sources/LocalLLMClientLlamaC" +BROKEN_SYMLINKS_FILE="$(mktemp)" +find "$LLAMAC_DIR" -path "$LLAMAC_DIR/exclude" -prune -o -type l -print | while read -r link; do + if [ ! -e "$link" ]; then + target=$(readlink "$link") + printf ' %s -> %s\n' "${link#"$PROJECT_ROOT/"}" "$target" >> "$BROKEN_SYMLINKS_FILE" + fi +done + +if [ -s "$BROKEN_SYMLINKS_FILE" ]; then + echo "WARNING: the following symlinks no longer resolve after updating to $TARGET_TAG:" + cat "$BROKEN_SYMLINKS_FILE" + rm -f "$BROKEN_SYMLINKS_FILE" + echo + echo "Upstream probably renamed or moved these files. Inspect the new tree under" + echo " Sources/LocalLLMClientLlamaC/exclude/llama.cpp/" + echo "and update the symlinks (and any compile/header references) before committing." + exit 1 +fi +rm -f "$BROKEN_SYMLINKS_FILE" +echo "All symlinks resolve." \ No newline at end of file From 4f16951736259a5d0f19ea4f17d8942de19572ed Mon Sep 17 00:00:00 2001 From: "tattn (Tatsuya Tanaka)" Date: Mon, 20 Apr 2026 20:33:01 +0900 Subject: [PATCH 2/3] Fix CI --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2e11da1..ad9e37e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -58,7 +58,7 @@ jobs: xcodebuild -downloadComponent MetalToolchain - name: Run ${{ matrix.test-type }} tests with Xcode 26 - run: TEST_RUNNER_GITHUB_ACTIONS_TEST="${{ matrix.test-type }}" xcodebuild test -scheme LocalLLMClient-Package -destination 'platform=macOS' + run: TEST_RUNNER_GITHUB_ACTIONS_TEST="${{ matrix.test-type }}" xcodebuild test -skipMacroValidation -scheme LocalLLMClient-Package -destination 'platform=macOS' - name: Upload test results if: failure() @@ -84,7 +84,7 @@ jobs: working-directory: Example run: | xcodebuild -downloadPlatform iOS - xcodebuild build -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=macOS' CODE_SIGN_IDENTITY="-" + xcodebuild build -skipMacroValidation -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=macOS' CODE_SIGN_IDENTITY="-" build-example-ios: runs-on: macos-26 @@ -100,7 +100,7 @@ jobs: working-directory: Example run: | xcodebuild -downloadPlatform iOS - xcodebuild build -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=iOS Simulator,name=iPhone 17 Pro,OS=26.4' CODE_SIGN_IDENTITY="-" + xcodebuild build -skipMacroValidation -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=iOS Simulator,name=iPhone 17 Pro,OS=26.4' CODE_SIGN_IDENTITY="-" test-ubuntu-x86_64: runs-on: ubuntu-latest From 758a62e9403eb123b47278d3e435dac3d5bca703 Mon Sep 17 00:00:00 2001 From: "tattn (Tatsuya Tanaka)" Date: Tue, 21 Apr 2026 00:18:13 +0900 Subject: [PATCH 3/3] Fix CI --- .../LLMSessionLlamaTests.swift | 2 +- .../LocalLLMClientLlamaToolTests.swift | 160 +++++++++++++----- .../LocalLLMClientLlamaTests/ModelTests.swift | 8 +- 3 files changed, 126 insertions(+), 44 deletions(-) diff --git a/Tests/LocalLLMClientLlamaTests/LLMSessionLlamaTests.swift b/Tests/LocalLLMClientLlamaTests/LLMSessionLlamaTests.swift index 467a701..a667835 100644 --- a/Tests/LocalLLMClientLlamaTests/LLMSessionLlamaTests.swift +++ b/Tests/LocalLLMClientLlamaTests/LLMSessionLlamaTests.swift @@ -16,7 +16,7 @@ extension ModelTests { private static func makeToolModel(size: LocalLLMClient.ModelSize = .default) -> LLMSession.DownloadModel { let info = LocalLLMClient.modelInfo(for: .tool, modelSize: size) - return .llama(id: info.id, model: info.model, mmproj: info.clip, parameter: .init(context: 1800)) + return .llama(id: info.id, model: info.model, mmproj: info.clip, parameter: .init(context: 2500)) } @Test diff --git a/Tests/LocalLLMClientLlamaTests/LocalLLMClientLlamaToolTests.swift b/Tests/LocalLLMClientLlamaTests/LocalLLMClientLlamaToolTests.swift index 3b48856..4bf43e4 100644 --- a/Tests/LocalLLMClientLlamaTests/LocalLLMClientLlamaToolTests.swift +++ b/Tests/LocalLLMClientLlamaTests/LocalLLMClientLlamaToolTests.swift @@ -17,15 +17,6 @@ extension ModelTests { } extension ModelTests.LocalLLMClientLlamaToolTests { - private func makeToolClient() async throws -> LlamaClient { - try await LocalLLMClient.llama( - testType: .tool, - // Qwen2.5 uses tags; tools must be declared so the PEG parser - // includes the tool-call grammar branches. - // These tools come from LocalLLMClientTestUtilities (WeatherTool, CalculatorTool). - ) - } - private func buildChatParams(tools: [any LLMTool]) async throws -> (LlamaClient, UnsafeMutablePointer?) { let client = try await LocalLLMClient.llama(tools: tools, testType: .tool) let wrapped = tools.map { AnyLLMTool($0) } @@ -33,34 +24,141 @@ extension ModelTests.LocalLLMClientLlamaToolTests { return (client, params) } + private func makeToolCallResponse( + name: String, + argumentsJSON: String, + client: LlamaClient + ) throws -> String { + if usesQwen35XMLToolCallSyntax(chatTemplate: client._context.model.chatTemplate) { + let argumentsData = try #require(argumentsJSON.data(using: .utf8)) + let argumentsObject = try JSONSerialization.jsonObject(with: argumentsData) + let qwen35Parameters = try #require(renderQwen35Parameters(argumentsObject)) + return """ + + + \(qwen35Parameters) + + + """ + } + + switch client.chatFormat { + case COMMON_CHAT_FORMAT_PEG_GEMMA4: + let argumentsData = try #require(argumentsJSON.data(using: .utf8)) + let argumentsObject = try JSONSerialization.jsonObject(with: argumentsData) + let gemmaArguments = try #require(renderGemma4Arguments(argumentsObject)) + return "<|tool_call>call:\(name)\(gemmaArguments)" + default: + return """ + + {"name": "\(name)", "arguments": \(argumentsJSON)} + + """ + } + } + + private func usesQwen35XMLToolCallSyntax(chatTemplate: String) -> Bool { + chatTemplate.contains(" + \(renderQwen35Value(value)) + + """ + } + .joined(separator: "\n") + } + + private func renderQwen35Value(_ value: Any) -> String { + switch value { + case let string as String: + return string + case let number as NSNumber: + if CFGetTypeID(number) == CFBooleanGetTypeID() { + return number.boolValue ? "true" : "false" + } + return number.stringValue + case _ as NSNull: + return "null" + default: + let data = try? JSONSerialization.data(withJSONObject: value) + return data.flatMap { String(data: $0, encoding: .utf8) } ?? String(describing: value) + } + } + + private func renderGemma4Arguments(_ value: Any) -> String? { + guard let object = value as? [String: Any] else { + return nil + } + + let renderedPairs = object.keys.sorted().map { key in + let value = object[key]! + return "\(key):\(renderGemma4Value(value))" + } + return "{\(renderedPairs.joined(separator: ","))}" + } + + private func renderGemma4Value(_ value: Any) -> String { + switch value { + case let string as String: + return #"<|\"|>\#(string)<|\"|>"# + case let number as NSNumber: + if CFGetTypeID(number) == CFBooleanGetTypeID() { + return number.boolValue ? "true" : "false" + } + return number.stringValue + case _ as NSNull: + return "null" + case let array as [Any]: + return "[\(array.map(renderGemma4Value).joined(separator: ","))]" + case let dictionary as [String: Any]: + let pairs = dictionary.keys.sorted().map { key in + let value = dictionary[key]! + return "\(key):\(renderGemma4Value(value))" + } + return "{\(pairs.joined(separator: ","))}" + default: + fatalError("Unsupported Gemma 4 argument type: \(type(of: value))") + } + } + @Test - func parseToolCallFromHermesStyleResponse() async throws { - let (_, chatParams) = try await buildChatParams(tools: [WeatherTool()]) + func parseToolCallFromModelNativeResponse() async throws { + let (client, chatParams) = try await buildChatParams(tools: [WeatherTool()]) defer { if let chatParams { free_chat_params(chatParams) } } - let response = """ - - {"name": "get_weather", "arguments": {"location": "Tokyo", "unit": "celsius"}} - - """ + let response = try makeToolCallResponse( + name: "get_weather", + argumentsJSON: #"{"location":"Tokyo","unit":"celsius"}"#, + client: client + ) let calls = LlamaToolCallParser.parseToolCalls(from: response, chatParams: chatParams) try #require(calls != nil, "Expected tool calls to be extracted from a well-formed response") #expect(calls?.count == 1) #expect(calls?.first?.name == "get_weather") + #expect(calls?.first?.id.isEmpty == false, "An auto-generated UUID should be assigned when the model omits an id") #expect(calls?.first?.arguments.contains("Tokyo") == true) } @Test - func parseToolCallWithCalculatorTool() async throws { - let (_, chatParams) = try await buildChatParams(tools: [WeatherTool(), CalculatorTool()]) + func parseToolCallWithDeclaredToolSet() async throws { + let (client, chatParams) = try await buildChatParams(tools: [WeatherTool(), CalculatorTool()]) defer { if let chatParams { free_chat_params(chatParams) } } - let response = """ - - {"name": "calculate", "arguments": {"expression": "15 * 4"}} - - """ + let response = try makeToolCallResponse( + name: "calculate", + argumentsJSON: #"{"expression":"15 * 4"}"#, + client: client + ) let calls = LlamaToolCallParser.parseToolCalls(from: response, chatParams: chatParams) try #require(calls != nil) @@ -89,22 +187,6 @@ extension ModelTests.LocalLLMClientLlamaToolTests { #expect(calls == nil) } - @Test - func parseToolCallAssignsIDWhenMissing() async throws { - let (_, chatParams) = try await buildChatParams(tools: [WeatherTool()]) - defer { if let chatParams { free_chat_params(chatParams) } } - - let response = """ - - {"name": "get_weather", "arguments": {"location": "Tokyo", "unit": "celsius"}} - - """ - - let calls = LlamaToolCallParser.parseToolCalls(from: response, chatParams: chatParams) - try #require(calls?.first != nil) - #expect(!calls!.first!.id.isEmpty, "An auto-generated UUID should be assigned when the model omits an id") - } - @Test func parseToolCallsReturnsNilWhenChatParamsIsNil() { // Guard clause: a missing chatParams pointer must not crash and must return nil. diff --git a/Tests/LocalLLMClientLlamaTests/ModelTests.swift b/Tests/LocalLLMClientLlamaTests/ModelTests.swift index 20552cd..60bb1f3 100644 --- a/Tests/LocalLLMClientLlamaTests/ModelTests.swift +++ b/Tests/LocalLLMClientLlamaTests/ModelTests.swift @@ -32,14 +32,14 @@ extension LocalLLMClient { switch size { case .light: return ( - id: "Qwen/Qwen2.5-0.5B-Instruct-GGUF", - model: "qwen2.5-0.5b-instruct-q8_0.gguf", + id: "unsloth/Qwen3.5-2B-GGUF", + model: "Qwen3.5-2B-Q4_K_M.gguf", clip: nil ) case .normal: return ( - id: "Qwen/Qwen2.5-1.5B-Instruct-GGUF", - model: "qwen2.5-1.5b-instruct-q4_k_m.gguf", + id: "unsloth/Qwen3.5-2B-GGUF", + model: "Qwen3.5-2B-Q4_K_M.gguf", clip: nil ) }