From 52f4d6aa901979e9c86b5cc8f9a18c6297ac1d9d Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Mon, 22 Jun 2026 09:10:55 +0200 Subject: [PATCH 01/20] use parser --- src/llm/BUILD | 10 ++++++++++ src/llm/io_processing/output_parser.cpp | 3 +++ 2 files changed, 13 insertions(+) diff --git a/src/llm/BUILD b/src/llm/BUILD index a91a38e575..7d7fc2deee 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -226,6 +226,15 @@ ovms_cc_library( visibility = ["//visibility:public"], ) +ovms_cc_library( + name = "io_processing_lfm2_reasoning_parser", + hdrs = ["io_processing/lfm2/lfm2_reasoning_parser.hpp"], + deps = [ + "io_processing_qwen3_reasoning_parser" + ], + visibility = ["//visibility:public"], +) + ovms_cc_library( # TODO split further so we don't have to recompile everything when changing one parser ... name = "output_parsers", hdrs = [ @@ -262,6 +271,7 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w ":io_processing_lfm2_tool_parser", ":io_processing_gemma4_tool_parser", ":io_processing_qwen3_reasoning_parser", + ":io_processing_lfm2_reasoning_parser", ":io_processing_utils", ":apis_tool_schema_wrapper", ], diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp index 490af53512..0a6a888ddd 100644 --- a/src/llm/io_processing/output_parser.cpp +++ b/src/llm/io_processing/output_parser.cpp @@ -31,6 +31,7 @@ #include "gemma4/gemma4_reasoning_parser.hpp" #include "gptoss/reasoning_parser.hpp" #include "lfm2/lfm2_tool_parser.hpp" +#include "lfm2/lfm2_reasoning_parser.hpp" #include "gemma4/gemma4_tool_parser.hpp" namespace ovms { @@ -205,6 +206,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to reasoningParser = std::make_unique(tokenizer); } else if (reasoningParserName == "gptoss") { reasoningParser = std::make_unique(tokenizer); + } else if (reasoningParserName == "lfm2") { + reasoningParser = std::make_unique(tokenizer); } else if (!reasoningParserName.empty()) { throw std::runtime_error("Unsupported reasoning parser: " + reasoningParserName); } From 677ca74212c29343fa077cce55cfaaeb13162889 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Mon, 22 Jun 2026 09:15:44 +0200 Subject: [PATCH 02/20] adding reasoning parser --- .../io_processing/lfm2/lfm2_reasoning_parser.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp new file mode 100644 index 0000000000..ea2a502018 --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp @@ -0,0 +1,14 @@ +#include "../qwen3/reasoning_parser.hpp" + +namespace ovms { +class Lfm2ReasoningParser : public Qwen3ReasoningParser { +public: + + Lfm2ReasoningParser(ov::genai::Tokenizer& tokenizer) : Qwen3ReasoningParser(tokenizer) {} + + bool requiresStreamingWithSpecialTokens() const override { + return true; + } + +}; +} \ No newline at end of file From e46fc6c26282ef1bf9de270006ad1a9c1056e125 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Mon, 22 Jun 2026 10:41:24 +0200 Subject: [PATCH 03/20] style --- .../lfm2/lfm2_reasoning_parser.hpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp index ea2a502018..ddd1cdfa65 100644 --- a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp @@ -1,3 +1,19 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + #include "../qwen3/reasoning_parser.hpp" namespace ovms { @@ -11,4 +27,4 @@ class Lfm2ReasoningParser : public Qwen3ReasoningParser { } }; -} \ No newline at end of file +} From d1c5c7b1aab84ac1918b4e3825272b0ce0d0be0a Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Mon, 22 Jun 2026 10:42:15 +0200 Subject: [PATCH 04/20] year --- src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp index ddd1cdfa65..d7c5451c1b 100644 --- a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright 2025 Intel Corporation +// Copyright 2026 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. From 87f4bd0b06cbc61045fd557fef229f4a96b0192b Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Mon, 22 Jun 2026 10:44:13 +0200 Subject: [PATCH 05/20] fixes --- src/llm/BUILD | 2 +- src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llm/BUILD b/src/llm/BUILD index 7d7fc2deee..647f4e53b9 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -230,7 +230,7 @@ ovms_cc_library( name = "io_processing_lfm2_reasoning_parser", hdrs = ["io_processing/lfm2/lfm2_reasoning_parser.hpp"], deps = [ - "io_processing_qwen3_reasoning_parser" + ":io_processing_qwen3_reasoning_parser" ], visibility = ["//visibility:public"], ) diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp index d7c5451c1b..92a3bf7fb7 100644 --- a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp @@ -19,7 +19,7 @@ namespace ovms { class Lfm2ReasoningParser : public Qwen3ReasoningParser { public: - + Lfm2ReasoningParser() = delete; Lfm2ReasoningParser(ov::genai::Tokenizer& tokenizer) : Qwen3ReasoningParser(tokenizer) {} bool requiresStreamingWithSpecialTokens() const override { From 628497edae16c09cf928f8eb8992f7f7661e5d9a Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Mon, 22 Jun 2026 10:47:53 +0200 Subject: [PATCH 06/20] explicit constructor --- src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp index 92a3bf7fb7..6bb614ea90 100644 --- a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp @@ -20,7 +20,7 @@ namespace ovms { class Lfm2ReasoningParser : public Qwen3ReasoningParser { public: Lfm2ReasoningParser() = delete; - Lfm2ReasoningParser(ov::genai::Tokenizer& tokenizer) : Qwen3ReasoningParser(tokenizer) {} + explicit Lfm2ReasoningParser(ov::genai::Tokenizer& tokenizer) : Qwen3ReasoningParser(tokenizer) {} bool requiresStreamingWithSpecialTokens() const override { return true; From df2756d0070b832338a063395035bf4b42182155 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Mon, 22 Jun 2026 11:16:48 +0200 Subject: [PATCH 07/20] docs updated~ --- demos/common/export_models/export_model.py | 4 ++-- docs/llm/reference.md | 5 +++++ docs/parameters.md | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index 137804cd20..9192b91449 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -54,8 +54,8 @@ def add_common_arguments(parser): parser_text.add_argument('--max_prompt_len', required=False, type=int, default=None, help='Sets NPU specific property for maximum number of tokens in the prompt. ' 'Not effective if target device is not NPU', dest='max_prompt_len') parser_text.add_argument('--prompt_lookup_decoding', action='store_true', help='Set pipeline to use prompt lookup decoding', dest='prompt_lookup_decoding') -parser_text.add_argument('--reasoning_parser', choices=["qwen3", "gptoss"], help='Set the type of the reasoning parser for reasoning content extraction', dest='reasoning_parser') -parser_text.add_argument('--tool_parser', choices=["llama3", "phi4", "hermes3", "mistral", "qwen3coder", "gptoss", "devstral", "lfm2"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') +parser_text.add_argument('--reasoning_parser', choices=["qwen3", "gptoss", "lfm2", "gemma4"], help='Set the type of the reasoning parser for reasoning content extraction', dest='reasoning_parser') +parser_text.add_argument('--tool_parser', choices=["llama3", "phi4", "hermes3", "mistral", "qwen3coder", "gptoss", "devstral", "lfm2", "gemma4"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') parser_text.add_argument('--enable_tool_guided_generation', action='store_true', help='Enables enforcing tool schema during generation. Requires setting tool_parser', dest='enable_tool_guided_generation') parser_embeddings_ov = subparsers.add_parser('embeddings_ov', help='export model for embeddings endpoint with directory structure aligned with OpenVINO tools') diff --git a/docs/llm/reference.md b/docs/llm/reference.md index 698d05031b..6317096aa7 100644 --- a/docs/llm/reference.md +++ b/docs/llm/reference.md @@ -284,9 +284,14 @@ __Tool parsers:__ - `devstral` - `gptoss` - `qwen3coder` +- `lfm2` +- `gemma4` __Reasoning parsers:__ - `qwen3` +- `gptoss` +- `lfm2` +- `gemma4` Note that using `tools` might require a chat template other than the original. We recommend using templates from the [vLLM repository](https://github.com/vllm-project/vllm/tree/main/examples) for `hermes3`, `llama3`, `phi4`, `mistral`, `devstral`, `gptoss`, and `qwen3coder` models (if available). Save the selected template as `chat_template.jinja` in the model directory and it will be used instead of the default one. If a template is not available for your model, please refer to the model's documentation or use the default template provided by the model server. diff --git a/docs/parameters.md b/docs/parameters.md index edfddafa7e..b023672b89 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -134,7 +134,7 @@ Task specific parameters for different tasks (text generation/image generation/e | `--max_prompt_len` | `integer` | Sets NPU specific property for maximum number of tokens in the prompt. | | `--kv_cache_precision` | `string` | Reduced kv cache precision to `u8` lowers the cache size consumption. Accepted values: `u8` or empty (default). | | `--model_distribution_policy` | `string` | TENSOR_PARALLEL distributes tensor to multiple sockets/devices and processes it in parallel. PIPELINE_PARALLEL distributes different tensors to process by each device. Accepted values: `TENSOR_PARALLEL`, `PIPELINE_PARALLEL` or empty (default). | -| `--reasoning_parser` | `string` | Type of parser to use for reasoning content extraction from model output. Currently supported: [qwen3, gptoss, gemma4] | +| `--reasoning_parser` | `string` | Type of parser to use for reasoning content extraction from model output. Currently supported: [qwen3, gptoss, lfm2, gemma4] | | `--tool_parser` | `string` | Type of parser to use for tool calls extraction from model output. Currently supported: [llama3, phi4, hermes3, mistral, qwen3coder, gptoss, devstral, lfm2, gemma4] | | `--enable_tool_guided_generation` | `bool` | Enables enforcing tool schema during generation. Requires setting response parser. Default: false. | From 0096992ffe45db779704f2def953ada725d1fc9d Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Mon, 22 Jun 2026 11:49:44 +0200 Subject: [PATCH 08/20] save --- src/llm/BUILD | 9 ++++++- .../lfm2/lfm2_reasoning_parser.cpp | 0 .../lfm2/lfm2_reasoning_parser.hpp | 26 ++++++++++++++----- 3 files changed, 28 insertions(+), 7 deletions(-) create mode 100644 src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp diff --git a/src/llm/BUILD b/src/llm/BUILD index 647f4e53b9..9e544773f7 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -229,8 +229,15 @@ ovms_cc_library( ovms_cc_library( name = "io_processing_lfm2_reasoning_parser", hdrs = ["io_processing/lfm2/lfm2_reasoning_parser.hpp"], + srcs = ["io_processing/lfm2/lfm2_reasoning_parser.cpp"], deps = [ - ":io_processing_qwen3_reasoning_parser" + "@com_github_tencent_rapidjson//:rapidjson", + "//src/port:rapidjson_document", + "//src:libovmslogging", + "//src:libovmsstring_utils", + ":io_processing_utils", + ":io_processing_base_output_parser", + "//third_party:genai", ], visibility = ["//visibility:public"], ) diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp index 6bb614ea90..dd6a8bc072 100644 --- a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp @@ -14,17 +14,31 @@ // limitations under the License. //***************************************************************************** -#include "../qwen3/reasoning_parser.hpp" +#include "../base_output_parser.hpp" namespace ovms { -class Lfm2ReasoningParser : public Qwen3ReasoningParser { +class Lfm2ReasoningParser : public BaseOutputParser { +protected: + const std::string parsingStartTag = ""; + const std::string parsingEndTag = ""; + public: Lfm2ReasoningParser() = delete; - explicit Lfm2ReasoningParser(ov::genai::Tokenizer& tokenizer) : Qwen3ReasoningParser(tokenizer) {} + explicit Lfm2ReasoningParser(ov::genai::Tokenizer& tokenizer) : + BaseOutputParser(tokenizer) {} - bool requiresStreamingWithSpecialTokens() const override { - return true; + void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; + std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; + const std::vector& getParsingStartTags() const override { + static const std::vector parsingStartTags{this->parsingStartTag}; + return parsingStartTags; + } + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags{}; + return specialParsingStartTags; + } + const std::string& getParsingEndTag() const override { + return parsingEndTag; } - }; } From a06abec0d547efd2b860e30e47c5303ee774e782 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Mon, 22 Jun 2026 12:02:18 +0200 Subject: [PATCH 09/20] usual reasoning parser approach --- .../lfm2/lfm2_reasoning_parser.cpp | 70 +++++++++++++++++++ .../lfm2/lfm2_reasoning_parser.hpp | 5 +- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp index e69de29bb2..2962182df4 100644 --- a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp @@ -0,0 +1,70 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include +#include +#include + +#include "src/port/rapidjson_document.hpp" + +#include "../../../logging.hpp" +#include "lfm2_reasoning_parser.hpp" +#include "../utils.hpp" + +namespace ovms { +void Lfm2ReasoningParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { + auto startReasoningIt = std::find(generatedTokens.begin(), generatedTokens.end(), reasoningStartTokenId); + auto endReasoningIt = std::find(generatedTokens.begin(), generatedTokens.end(), reasoningEndTokenId); + + if (startReasoningIt == generatedTokens.end() || endReasoningIt == generatedTokens.end() || startReasoningIt >= endReasoningIt) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Lfm2ReasoningParser: Reasoning start or end token not found in the generated tokens, or in wrong order. Start token found: {}, End token found: {}, Start position: {}, End position: {}", + startReasoningIt != generatedTokens.end(), endReasoningIt != generatedTokens.end(), std::distance(generatedTokens.begin(), startReasoningIt), std::distance(generatedTokens.begin(), endReasoningIt)); + return; + } + + std::string reasoningContent = tokenizer.decode(std::vector(startReasoningIt + 1, endReasoningIt)); + std::string remainingContent = tokenizer.decode(std::vector(endReasoningIt + 1, generatedTokens.end())); + + parsedOutput.reasoning = reasoningContent; + parsedOutput.content = remainingContent; +} + +std::optional Lfm2ReasoningParser::parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) { + if (tokens.empty()) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Received empty tokens for Lfm2ReasoningParser"); + return std::nullopt; + } + + if (std::find(tokens.begin(), tokens.end(), reasoningStartTokenId) == tokens.end() && + std::find(tokens.begin(), tokens.end(), reasoningEndTokenId) == tokens.end()) { + return std::nullopt; + } else { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + writer.StartObject(); + writer.String("delta"); + writer.StartObject(); + writer.String("reasoning_content"); + writer.String(chunk.c_str()); + writer.EndObject(); + writer.EndObject(); + rapidjson::Document doc; + doc.Parse(buffer.GetString()); + return doc; + } + return std::nullopt; +} +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp index dd6a8bc072..540cd857af 100644 --- a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp @@ -22,13 +22,16 @@ class Lfm2ReasoningParser : public BaseOutputParser { const std::string parsingStartTag = ""; const std::string parsingEndTag = ""; + const int64_t reasoningStartTokenId = 50280; + const int64_t reasoningEndTokenId = 50281; + public: Lfm2ReasoningParser() = delete; explicit Lfm2ReasoningParser(ov::genai::Tokenizer& tokenizer) : BaseOutputParser(tokenizer) {} void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; - std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; + std::optional parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) override; const std::vector& getParsingStartTags() const override { static const std::vector parsingStartTags{this->parsingStartTag}; return parsingStartTags; From d6f1911214ad2441dc4a31845f9fd961ccd79007 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Tue, 23 Jun 2026 10:25:46 +0200 Subject: [PATCH 10/20] new parser for lfm25, potential solution --- src/llm/BUILD | 19 ++++++-- ..._parser.cpp => lfm25_reasoning_parser.cpp} | 21 +++++---- ..._parser.hpp => lfm25_reasoning_parser.hpp} | 15 ++++-- .../io_processing/lfm2/lfm25_tool_parser.hpp | 46 +++++++++++++++++++ .../io_processing/lfm2/lfm2_tool_parser.cpp | 2 - .../io_processing/lfm2/lfm2_tool_parser.hpp | 9 ++-- src/llm/io_processing/output_parser.cpp | 9 ++-- 7 files changed, 93 insertions(+), 28 deletions(-) rename src/llm/io_processing/lfm2/{lfm2_reasoning_parser.cpp => lfm25_reasoning_parser.cpp} (72%) rename src/llm/io_processing/lfm2/{lfm2_reasoning_parser.hpp => lfm25_reasoning_parser.hpp} (75%) create mode 100644 src/llm/io_processing/lfm2/lfm25_tool_parser.hpp diff --git a/src/llm/BUILD b/src/llm/BUILD index 9e544773f7..e58fe46fef 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -193,6 +193,16 @@ ovms_cc_library( ], visibility = ["//visibility:public"], ) + +ovms_cc_library( + name = "io_processing_lfm25_tool_parser", + hdrs = ["io_processing/lfm2/lfm25_tool_parser.hpp"], + deps = [ + ":io_processing_lfm2_tool_parser", + ], + visibility = ["//visibility:public"], +) + ovms_cc_library( name = "io_processing_gemma4_tool_parser", hdrs = ["io_processing/gemma4/gemma4_tool_parser.hpp", "io_processing/gemma4/gemma4_reasoning_parser.hpp"], @@ -227,9 +237,9 @@ ovms_cc_library( ) ovms_cc_library( - name = "io_processing_lfm2_reasoning_parser", - hdrs = ["io_processing/lfm2/lfm2_reasoning_parser.hpp"], - srcs = ["io_processing/lfm2/lfm2_reasoning_parser.cpp"], + name = "io_processing_lfm25_reasoning_parser", + hdrs = ["io_processing/lfm2/lfm25_reasoning_parser.hpp"], + srcs = ["io_processing/lfm2/lfm25_reasoning_parser.cpp"], deps = [ "@com_github_tencent_rapidjson//:rapidjson", "//src/port:rapidjson_document", @@ -276,9 +286,10 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w ":io_processing_base_output_parser", ":io_processing_qwen3coder_tool_parser", ":io_processing_lfm2_tool_parser", + ":io_processing_lfm25_tool_parser", ":io_processing_gemma4_tool_parser", ":io_processing_qwen3_reasoning_parser", - ":io_processing_lfm2_reasoning_parser", + ":io_processing_lfm25_reasoning_parser", ":io_processing_utils", ":apis_tool_schema_wrapper", ], diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp similarity index 72% rename from src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp rename to src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp index 2962182df4..c60244a272 100644 --- a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp @@ -21,35 +21,36 @@ #include "src/port/rapidjson_document.hpp" #include "../../../logging.hpp" -#include "lfm2_reasoning_parser.hpp" +#include "lfm25_reasoning_parser.hpp" #include "../utils.hpp" namespace ovms { -void Lfm2ReasoningParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { +void Lfm25ReasoningParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { auto startReasoningIt = std::find(generatedTokens.begin(), generatedTokens.end(), reasoningStartTokenId); auto endReasoningIt = std::find(generatedTokens.begin(), generatedTokens.end(), reasoningEndTokenId); if (startReasoningIt == generatedTokens.end() || endReasoningIt == generatedTokens.end() || startReasoningIt >= endReasoningIt) { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Lfm2ReasoningParser: Reasoning start or end token not found in the generated tokens, or in wrong order. Start token found: {}, End token found: {}, Start position: {}, End position: {}", + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Lfm25ReasoningParser: Reasoning start or end token not found in the generated tokens, or in wrong order. Start token found: {}, End token found: {}, Start position: {}, End position: {}", startReasoningIt != generatedTokens.end(), endReasoningIt != generatedTokens.end(), std::distance(generatedTokens.begin(), startReasoningIt), std::distance(generatedTokens.begin(), endReasoningIt)); return; } - std::string reasoningContent = tokenizer.decode(std::vector(startReasoningIt + 1, endReasoningIt)); - std::string remainingContent = tokenizer.decode(std::vector(endReasoningIt + 1, generatedTokens.end())); + auto startPos = std::distance(generatedTokens.begin(), startReasoningIt); + auto endPos = std::distance(generatedTokens.begin(), endReasoningIt); + std::string reasoningContent = tokenizer.decode(std::vector(startPos + generatedTokens.begin() + 1, endPos + generatedTokens.begin()), ov::genai::skip_special_tokens(true)); + parsedOutput.reasoning = reasoningContent; - parsedOutput.content = remainingContent; } -std::optional Lfm2ReasoningParser::parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) { +std::optional Lfm25ReasoningParser::parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) { if (tokens.empty()) { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Received empty tokens for Lfm2ReasoningParser"); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Received empty tokens for Lfm25ReasoningParser"); return std::nullopt; } - if (std::find(tokens.begin(), tokens.end(), reasoningStartTokenId) == tokens.end() && - std::find(tokens.begin(), tokens.end(), reasoningEndTokenId) == tokens.end()) { + if (std::find(tokens.begin(), tokens.end(), reasoningStartTokenId) != tokens.end() || + std::find(tokens.begin(), tokens.end(), reasoningEndTokenId) != tokens.end()) { return std::nullopt; } else { rapidjson::StringBuffer buffer; diff --git a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp similarity index 75% rename from src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp rename to src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp index 540cd857af..b832d6cd12 100644 --- a/src/llm/io_processing/lfm2/lfm2_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp @@ -17,17 +17,17 @@ #include "../base_output_parser.hpp" namespace ovms { -class Lfm2ReasoningParser : public BaseOutputParser { +class Lfm25ReasoningParser : public BaseOutputParser { protected: const std::string parsingStartTag = ""; const std::string parsingEndTag = ""; - const int64_t reasoningStartTokenId = 50280; - const int64_t reasoningEndTokenId = 50281; + const int64_t reasoningStartTokenId = 124901; // + const int64_t reasoningEndTokenId = 124902; // public: - Lfm2ReasoningParser() = delete; - explicit Lfm2ReasoningParser(ov::genai::Tokenizer& tokenizer) : + Lfm25ReasoningParser() = delete; + explicit Lfm25ReasoningParser(ov::genai::Tokenizer& tokenizer) : BaseOutputParser(tokenizer) {} void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; @@ -43,5 +43,10 @@ class Lfm2ReasoningParser : public BaseOutputParser { const std::string& getParsingEndTag() const override { return parsingEndTag; } + + // It may be removed after changing logic in Lfm2ToolParser to use tokens in streaming instead of chunk content, both tool parser and reasoning parser need to have the same value for this function + bool requiresStreamingWithSpecialTokens() const override { + return true; + } }; } diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp new file mode 100644 index 0000000000..bb0d86084a --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include "./lfm2_tool_parser.hpp" + +namespace ovms { +class Lfm25ToolParser : public Lfm2ToolParser { +protected: + static const int64_t toolCallStartTokenId = 124905; // <|tool_call_start|> + static const int64_t toolCallEndTokenId = 124906; // <|tool_call_end|> + static const int64_t reasoningEndTokenId = 124902; // +public: + Lfm25ToolParser() = delete; + explicit Lfm25ToolParser(ov::genai::Tokenizer& tokenizer) : + Lfm2ToolParser(tokenizer, toolCallStartTokenId, toolCallEndTokenId) {} + + void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override { + Lfm2ToolParser::parse(parsedOutput, generatedTokens); + + auto contentTokens = std::vector(generatedTokens.begin(), generatedTokens.end()); + auto reasoningEnd = std::find(contentTokens.begin(), contentTokens.end(), reasoningEndTokenId); + if (reasoningEnd != contentTokens.end()) { + contentTokens.erase(contentTokens.begin(), reasoningEnd + 1); + } + auto toolCallStart = std::find(contentTokens.begin(), contentTokens.end(), toolCallStartTokenId); + auto toolCallEnd = std::find(contentTokens.begin(), contentTokens.end(), toolCallEndTokenId); + if (toolCallStart != contentTokens.end() && toolCallEnd != contentTokens.end() && toolCallStart < toolCallEnd) { + contentTokens.erase(toolCallStart, toolCallEnd + 1); + } + parsedOutput.content = tokenizer.decode(contentTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); + } +}; +} diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp index 056a5bead9..640cefa726 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp @@ -34,8 +34,6 @@ const std::string Lfm2ToolParser::TOOL_ARGS_START_INDICATOR = "("; const std::string Lfm2ToolParser::TOOL_ARGS_END_INDICATOR = ")"; const std::string Lfm2ToolParser::TOOL_SEPARATOR_STR = ", "; -const int64_t Lfm2ToolParser::botTokenId = 10; -const int64_t Lfm2ToolParser::eotTokenId = 11; std::string Lfm2ToolParser::parseArrayParameter(std::string argumentStr) { int quoteDepth = 0; diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp index cd56634eb4..c3b52e5ef7 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp @@ -31,8 +31,9 @@ class Lfm2ToolParser : public BaseOutputParser { static const std::string TOOL_ARGS_END_INDICATOR; static const std::string TOOL_SEPARATOR_STR; - static const int64_t botTokenId; - static const int64_t eotTokenId; + const int64_t botTokenId; + const int64_t eotTokenId; + static const int64_t reasoningEndTokenId = 124902; // static constexpr size_t MAX_TOOL_CALLS = 100; static constexpr size_t MAX_TOOLS_PER_CALL = 100; @@ -51,8 +52,8 @@ class Lfm2ToolParser : public BaseOutputParser { std::string value; }; Lfm2ToolParser() = delete; - explicit Lfm2ToolParser(ov::genai::Tokenizer& tokenizer) : - BaseOutputParser(tokenizer) {} + explicit Lfm2ToolParser(ov::genai::Tokenizer& tokenizer, int64_t botTokenId = 10, int64_t eotTokenId = 11) : + BaseOutputParser(tokenizer), botTokenId(botTokenId), eotTokenId(eotTokenId) {} void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) override; diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp index 0a6a888ddd..6cf07e8fb2 100644 --- a/src/llm/io_processing/output_parser.cpp +++ b/src/llm/io_processing/output_parser.cpp @@ -31,7 +31,8 @@ #include "gemma4/gemma4_reasoning_parser.hpp" #include "gptoss/reasoning_parser.hpp" #include "lfm2/lfm2_tool_parser.hpp" -#include "lfm2/lfm2_reasoning_parser.hpp" +#include "lfm2/lfm25_tool_parser.hpp" +#include "lfm2/lfm25_reasoning_parser.hpp" #include "gemma4/gemma4_tool_parser.hpp" namespace ovms { @@ -194,6 +195,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to toolParser = std::make_unique(tokenizer, toolNameSchemaMap); } else if (toolParserName == "lfm2") { toolParser = std::make_unique(tokenizer); + } else if (toolParserName == "lfm2.5") { + toolParser = std::make_unique(tokenizer); } else if (toolParserName == "gemma4") { toolParser = std::make_unique(tokenizer); } else if (!toolParserName.empty()) { @@ -206,8 +209,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to reasoningParser = std::make_unique(tokenizer); } else if (reasoningParserName == "gptoss") { reasoningParser = std::make_unique(tokenizer); - } else if (reasoningParserName == "lfm2") { - reasoningParser = std::make_unique(tokenizer); + } else if (reasoningParserName == "lfm2.5") { + reasoningParser = std::make_unique(tokenizer); } else if (!reasoningParserName.empty()) { throw std::runtime_error("Unsupported reasoning parser: " + reasoningParserName); } From 26ce24feda88445067f3615f2ede7201376d310d Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Wed, 24 Jun 2026 07:09:25 +0200 Subject: [PATCH 11/20] save --- prepare_llm_models.sh | 30 +- src/llm/BUILD | 21 +- .../lfm2/lfm25_reasoning_parser.hpp | 4 +- .../io_processing/lfm2/lfm25_tool_parser.cpp | 107 +++++ .../io_processing/lfm2/lfm25_tool_parser.hpp | 64 ++- .../io_processing/lfm2/lfm2_tool_parser.cpp | 401 +---------------- .../io_processing/lfm2/lfm2_tool_parser.hpp | 55 +-- src/llm/io_processing/lfm2/lfm2_utils.cpp | 404 ++++++++++++++++++ src/llm/io_processing/lfm2/lfm2_utils.hpp | 60 +++ .../lfm2_output_parser_test.cpp | 5 +- 10 files changed, 667 insertions(+), 484 deletions(-) create mode 100644 src/llm/io_processing/lfm2/lfm25_tool_parser.cpp create mode 100644 src/llm/io_processing/lfm2/lfm2_utils.cpp create mode 100644 src/llm/io_processing/lfm2/lfm2_utils.hpp diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index 813326ba9e..1186ad1919 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -39,6 +39,7 @@ MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3" GPT_OSS_MODEL="openai/gpt-oss-20b" DEVSTRAL_MODEL="unsloth/Devstral-Small-2507" LFM2_MODEL="LiquidAI/LFM2-2.6B" +LFM25_MODEL="LiquidAI/LFM2.5-8B-A1B" GEMMA4_MODEL="OpenVINO/gemma-4-E4B-it-int4-ov" if [ "$(python3 -c 'import sys; print(sys.version_info[1])')" -le "8" ]; then echo "Prepare models with python > 3.8."; exit 1 ; fi @@ -82,25 +83,6 @@ if [ ! -f "$1/$FACEBOOK_MODEL/chat_template.jinja" ]; then cp src/test/llm/dummy_facebook_template.jinja "$1/$FACEBOOK_MODEL/chat_template.jinja" fi -if [ -f "$1/$TTS_MODEL/$TOKENIZER_FILE" ]; then - echo "Model file $1/$TTS_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." -else - python3 demos/common/export_models/export_model.py text2speech --source_model "$TTS_MODEL" --weight-format int4 --model_repository_path $1 --vocoder microsoft/speecht5_hifigan -fi -if [ ! -f "$1/$TTS_MODEL/$TOKENIZER_FILE" ]; then - echo "[ERROR] Model file $1/$TTS_MODEL/$TOKENIZER_FILE does not exist." - exit 1 -fi - -if [ -f "$1/$STT_MODEL/$TOKENIZER_FILE" ]; then - echo "Model file $1/$STT_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." -else - python3 demos/common/export_models/export_model.py speech2text --source_model "$STT_MODEL" --weight-format int4 --model_repository_path $1 -fi -if [ ! -f "$1/$STT_MODEL/$TOKENIZER_FILE" ]; then - echo "[ERROR] Model file $1/$STT_MODEL/$TOKENIZER_FILE does not exist." - exit 1 -fi if [ -f "$1/$VLM_MODEL/$TOKENIZER_FILE" ]; then echo "Model file $1/$VLM_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." @@ -220,6 +202,16 @@ if [ ! -f "$1/$LFM2_MODEL/$TOKENIZER_FILE" ]; then echo "[ERROR] Models file $1/$LFM2_MODEL/$TOKENIZER_FILE does not exist." exit 1 fi +if [ -f "$1/$LFM25_MODEL/$TOKENIZER_FILE" ]; then + echo "Models file $1/$LFM25_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." +else + mkdir -p $1/$LFM25_MODEL + convert_tokenizer $LFM25_MODEL --with_detokenizer -o $1/$LFM25_MODEL +fi +if [ ! -f "$1/$LFM25_MODEL/$TOKENIZER_FILE" ]; then + echo "[ERROR] Models file $1/$LFM25_MODEL/$TOKENIZER_FILE does not exist." + exit 1 +fi if [ -f "$1/$GEMMA4_MODEL/$TOKENIZER_FILE" ]; then echo "Models file $1/$GEMMA4_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." else diff --git a/src/llm/BUILD b/src/llm/BUILD index 8a42228688..1509b8a5e7 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -189,9 +189,9 @@ ovms_cc_library( ) ovms_cc_library( - name = "io_processing_lfm2_tool_parser", - hdrs = ["io_processing/lfm2/lfm2_tool_parser.hpp"], - srcs = ["io_processing/lfm2/lfm2_tool_parser.cpp"], + name = "io_processing_lfm2_utils", + hdrs = ["io_processing/lfm2/lfm2_utils.hpp"], + srcs = ["io_processing/lfm2/lfm2_utils.cpp"], deps = [ "@com_github_tencent_rapidjson//:rapidjson", "//src/port:rapidjson_document", @@ -204,11 +204,24 @@ ovms_cc_library( visibility = ["//visibility:public"], ) +ovms_cc_library( + name = "io_processing_lfm2_tool_parser", + hdrs = ["io_processing/lfm2/lfm2_tool_parser.hpp"], + srcs = ["io_processing/lfm2/lfm2_tool_parser.cpp"], + deps = [ + ":io_processing_lfm2_utils", + ], + visibility = ["//visibility:public"], +) + + ovms_cc_library( name = "io_processing_lfm25_tool_parser", hdrs = ["io_processing/lfm2/lfm25_tool_parser.hpp"], + srcs = ["io_processing/lfm2/lfm25_tool_parser.cpp"], deps = [ - ":io_processing_lfm2_tool_parser", + ":io_processing_lfm2_utils", + "//src:libovmslogging", ], visibility = ["//visibility:public"], ) diff --git a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp index b832d6cd12..bc35eb07f9 100644 --- a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp @@ -22,8 +22,8 @@ class Lfm25ReasoningParser : public BaseOutputParser { const std::string parsingStartTag = ""; const std::string parsingEndTag = ""; - const int64_t reasoningStartTokenId = 124901; // - const int64_t reasoningEndTokenId = 124902; // + const int64_t reasoningStartTokenId = 124901; // + const int64_t reasoningEndTokenId = 124902; // public: Lfm25ReasoningParser() = delete; diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp new file mode 100644 index 0000000000..ad73c18e0c --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp @@ -0,0 +1,107 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "./lfm25_tool_parser.hpp" + +namespace ovms { + +const std::string Lfm25ToolParser::TOOL_CALL_START_TAG = "<|tool_call_start|>"; +const std::string Lfm25ToolParser::TOOL_CALL_END_TAG = "<|tool_call_end|>"; + +const int64_t Lfm25ToolParser::toolCallStartTokenId = 124905; // <|tool_call_start|> +const int64_t Lfm25ToolParser::toolCallEndTokenId = 124906; // <|tool_call_end|> +const int64_t Lfm25ToolParser::reasoningEndTokenId = 124902; // + +bool Lfm25ToolParser::parseNewContent() { + switch (currentState) { + case State::Content: { + return parseInContentState(this->streamingContent, this->streamingPosition, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); + } + case State::ToolCallStarted: { + auto wasParsedCorrectly = parseInToolCallState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); + if (wasParsedCorrectly) { + this->toolCallIndex++; + } + return wasParsedCorrectly; + } + case State::ToolCallParameters: { + return parseToolCallParametersState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); + } + case State::ToolCallEnded: { + return parseInToolCallEndedState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_END_TAG); + } + case State::AfterToolCall: + break; + } + return false; +} + +std::optional Lfm25ToolParser::parseChunk(const std::string& chunk, const std::vector& /*tokens*/, ov::genai::GenerationFinishReason finishReason) { + if (chunk.empty()) { + return std::nullopt; + } + + this->streamingContent += chunk; + + if (parseNewContent()) { + if (this->currentState == State::ToolCallParameters) { + return BaseOutputParser::wrapFirstDelta(this->toolCall.name, this->toolCallIndex); + } + if (this->currentState == State::ToolCallEnded) { + return wrapDeltaArgs(this->toolCall.arguments, this->toolCallIndex); + } + if (this->currentState == State::Content) { + size_t contentEnd = this->streamingContent.find(TOOL_CALL_START_TAG, this->streamingPosition); + std::string content; + if (contentEnd != std::string::npos) { + content = this->streamingContent.substr(this->streamingPosition, contentEnd - this->streamingPosition); + } else { + content = this->streamingContent.substr(this->streamingPosition); + } + this->streamingPosition += content.size(); + cutEOSFromContent(content); + + if (!content.empty()) { + return wrapDeltaContent(content); + } + } + if (this->currentState == State::AfterToolCall) { + this->currentState = State::Content; + } + } + + if (finishReason != ov::genai::GenerationFinishReason::NONE) { + if ((this->currentState == State::ToolCallParameters || this->currentState == State::ToolCallEnded) && !this->toolCall.arguments.empty()) { + return wrapDeltaArgs(this->toolCall.arguments, this->toolCallIndex); + } + + if (this->currentState == State::Content && this->streamingPosition < this->streamingContent.size()) { + auto content = this->streamingContent.substr(this->streamingPosition); + this->streamingPosition += content.size(); + cutEOSFromContent(content); + + if (!content.empty()) { + return wrapDeltaContent(content); + } + } + } + + return std::nullopt; +} + +void Lfm25ToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { + parseUnaryResponse(parsedOutput, generatedTokens, tokenizer, toolCallStartTokenId, toolCallEndTokenId, reasoningEndTokenId); +} +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp index bb0d86084a..8dac71dadf 100644 --- a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp @@ -14,33 +14,55 @@ // limitations under the License. //***************************************************************************** -#include "./lfm2_tool_parser.hpp" +#include "./lfm2_utils.hpp" namespace ovms { -class Lfm25ToolParser : public Lfm2ToolParser { +class Lfm25ToolParser : public BaseOutputParser { protected: - static const int64_t toolCallStartTokenId = 124905; // <|tool_call_start|> - static const int64_t toolCallEndTokenId = 124906; // <|tool_call_end|> - static const int64_t reasoningEndTokenId = 124902; // + static const std::string TOOL_CALL_START_TAG; + static const std::string TOOL_CALL_END_TAG; + + static const int64_t toolCallStartTokenId; + static const int64_t toolCallEndTokenId; + static const int64_t reasoningEndTokenId; public: Lfm25ToolParser() = delete; explicit Lfm25ToolParser(ov::genai::Tokenizer& tokenizer) : - Lfm2ToolParser(tokenizer, toolCallStartTokenId, toolCallEndTokenId) {} - - void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override { - Lfm2ToolParser::parse(parsedOutput, generatedTokens); - - auto contentTokens = std::vector(generatedTokens.begin(), generatedTokens.end()); - auto reasoningEnd = std::find(contentTokens.begin(), contentTokens.end(), reasoningEndTokenId); - if (reasoningEnd != contentTokens.end()) { - contentTokens.erase(contentTokens.begin(), reasoningEnd + 1); - } - auto toolCallStart = std::find(contentTokens.begin(), contentTokens.end(), toolCallStartTokenId); - auto toolCallEnd = std::find(contentTokens.begin(), contentTokens.end(), toolCallEndTokenId); - if (toolCallStart != contentTokens.end() && toolCallEnd != contentTokens.end() && toolCallStart < toolCallEnd) { - contentTokens.erase(toolCallStart, toolCallEnd + 1); - } - parsedOutput.content = tokenizer.decode(contentTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); + BaseOutputParser(tokenizer) {} + + bool parseNewContent(); + + void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; + std::optional parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) override; + const std::vector& getParsingStartTags() const override { + static const std::vector parsingStartTags = {TOOL_CALL_START_TAG}; + return parsingStartTags; + } + + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector beginningOnlyTags = {}; + return beginningOnlyTags; + } + + const std::vector& getSpecialTagsToErase() const override { + static const std::vector tagsToErase = {EOS_TOKEN_STR}; + return tagsToErase; } + + const std::string& getParsingEndTag() const override { + return TOOL_CALL_END_TAG; + } + + bool requiresStreamingWithSpecialTokens() const override { + return true; + } + +private: + std::string streamingContent; + size_t streamingPosition{0}; + State currentState{State::Content}; + ToolCall toolCall; + + int toolCallIndex{TOOL_CALL_INDEX_START}; }; } diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp index 640cefa726..fc5825f6be 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp @@ -14,291 +14,32 @@ // limitations under the License. //***************************************************************************** #include "lfm2_tool_parser.hpp" -#include "../utils.hpp" -#include "../../../logging.hpp" -#include "../../../stringutils.hpp" -#include "rapidjson/error/en.h" -#include -#include -#include namespace ovms { const std::string Lfm2ToolParser::TOOL_CALL_START_TAG = "<|tool_call_start|>"; const std::string Lfm2ToolParser::TOOL_CALL_END_TAG = "<|tool_call_end|>"; -const std::string Lfm2ToolParser::EOS_TOKEN_STR = "<|im_end|>"; -const std::string Lfm2ToolParser::TOOL_LIST_START_INDICATOR = "["; -const std::string Lfm2ToolParser::TOOL_LIST_END_INDICATOR = "]"; -const std::string Lfm2ToolParser::TOOL_ARGS_START_INDICATOR = "("; -const std::string Lfm2ToolParser::TOOL_ARGS_END_INDICATOR = ")"; -const std::string Lfm2ToolParser::TOOL_SEPARATOR_STR = ", "; - - -std::string Lfm2ToolParser::parseArrayParameter(std::string argumentStr) { - int quoteDepth = 0; - - for (size_t i = 1; i < argumentStr.size() - 1; ++i) { - if (argumentStr[i] != '\'') { - continue; - } - - bool isLastElement = (i == argumentStr.size() - 2); - bool isFollowedByComma = !isLastElement && argumentStr[i + 1] == ','; - - if (quoteDepth == 0) { - argumentStr[i] = '"'; - quoteDepth++; - } else if (quoteDepth > 0 && (isFollowedByComma || isLastElement)) { - argumentStr[i] = '"'; - quoteDepth--; - } - } - - return argumentStr; -} - -std::string Lfm2ToolParser::parseObjectParameter(std::string argumentStr) { - int quoteDepth = 0; - - for (size_t i = 1; i < argumentStr.size() - 1; ++i) { - if (argumentStr[i] != '\'') { - continue; - } - - bool isLastElement = (i == argumentStr.size() - 2); - bool isFollowedByComma = !isLastElement && argumentStr[i + 1] == ','; - bool isFollowedByColon = !isLastElement && argumentStr[i + 1] == ':'; - - if (quoteDepth == 0) { - argumentStr[i] = '"'; - quoteDepth++; - } else if (quoteDepth > 0 && (isFollowedByComma || isLastElement || isFollowedByColon)) { - argumentStr[i] = '"'; - quoteDepth--; - } - } - - return argumentStr; -} - -std::string Lfm2ToolParser::normalizeArgStr(const std::string& arg) { - if (arg.empty()) { - return arg; - } - - std::string normalized = arg; - trim(normalized); - std::string lower = normalized; - std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); - - if (lower == "true" || lower == "false" || lower == "null") { - return lower; - } - - const char first = normalized.front(); - const char last = normalized.back(); - if (first == '{' && last == '}') { - normalized = parseObjectParameter(normalized); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument contains is an object, replaced single quotes with double quotes for JSON parsing. Modified string: {}", normalized); - } - - if (first == '[' && last == ']') { - normalized = parseArrayParameter(normalized); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument is an array, normalized quotes for JSON parsing. Modified string: {}", normalized); - } - - if ((first == '\'' && last == '\'')) { - normalized[0] = '"'; - normalized[normalized.size() - 1] = '"'; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument is enclosed in quotes, replaced outer quotes with double quotes for JSON parsing. Modified string: {}", normalized); - } - - rapidjson::Document tempDoc; - rapidjson::Value finalValue; - tempDoc.Parse(normalized.c_str()); - if (tempDoc.HasParseError()) { - auto errorCode = tempDoc.GetParseError(); - auto errorMessage = rapidjson::GetParseError_En(errorCode); - size_t errorOffset = tempDoc.GetErrorOffset(); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Failed to parse argument string as JSON. Argument string: {}, Error: {} Offset: {}", normalized, errorMessage, errorOffset); - - if (first == '\"' && last == '\"') { - normalized = normalized.substr(1, normalized.size() - 2); - } - finalValue.SetString(normalized.c_str(), static_cast(normalized.size()), tempDoc.GetAllocator()); - } else { - finalValue.CopyFrom(tempDoc, tempDoc.GetAllocator()); - } - - { - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - finalValue.Accept(writer); - normalized = buffer.GetString(); - } - - return normalized; -} - -void Lfm2ToolParser::writeArgumentToWriter(const std::string& arg, rapidjson::Writer& writer) { - std::string normalized = normalizeArgStr(arg); - - rapidjson::Document doc; - doc.Parse(normalized.c_str()); - - rapidjson::Value& argumentDoc = doc; - writeArgumentOfAnyType(argumentDoc, writer); -} - -Lfm2ToolParser::Argument Lfm2ToolParser::parseSingleArgument(const std::string& argumentStr) { - Lfm2ToolParser::Argument argument; - - size_t equalPos = argumentStr.find('='); - if (equalPos != std::string::npos) { - argument.name = argumentStr.substr(0, equalPos); - argument.value = argumentStr.substr(equalPos + 1); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed argument - name: {}, value: {}", argument.name, argument.value); - } else { - argument.name = argumentStr; - argument.value = ""; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument string: {} does not contain '=', setting name as entire string and value as empty", argumentStr); - } - return argument; -} - -std::vector Lfm2ToolParser::parseArguments(const std::string& argumentsStr) { - std::vector args; - std::vector parsedArgs; - - size_t argPos = 0; - while (argPos < argumentsStr.length()) { - size_t commaPos = findInStringRespectingSpecialChars(argumentsStr, TOOL_SEPARATOR_STR, argPos); - if (commaPos == std::string::npos) { - auto remainingStr = argumentsStr.substr(argPos); - args.push_back(remainingStr); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "No more commas found, adding remaining argument string: {}", remainingStr); - break; - } - auto argStr = argumentsStr.substr(argPos, commaPos - argPos); - args.push_back(argStr); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed argument string: {}", argStr); - argPos = commaPos + TOOL_SEPARATOR_STR.length(); - } - - for (const std::string& arg : args) { - parsedArgs.push_back(parseSingleArgument(arg)); - } - return parsedArgs; -} - -bool Lfm2ToolParser::parseInContentState() { - size_t toolCallStartTagPos = this->streamingContent.find(TOOL_CALL_START_TAG, this->streamingPosition); - size_t toolCallEndTagPos = this->streamingContent.find(TOOL_CALL_END_TAG, this->streamingPosition); - if (toolCallEndTagPos != std::string::npos && toolCallStartTagPos == std::string::npos) { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool call at position: {}", toolCallEndTagPos); - this->streamingPosition = toolCallEndTagPos + TOOL_CALL_END_TAG.length(); - return false; - } - if (toolCallStartTagPos != std::string::npos) { - if (toolCallStartTagPos > this->streamingPosition) { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Content found before tool call start tag at position: {}", toolCallStartTagPos); - return true; - } - this->streamingPosition = toolCallStartTagPos + TOOL_CALL_START_TAG.length(); - this->currentState = State::ToolCallStarted; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected start of tool call at position: {}", toolCallStartTagPos); - return false; - } - - return true; -} - -bool Lfm2ToolParser::parseInToolCallState() { - size_t toolListStartPos = this->streamingContent.find(TOOL_LIST_START_INDICATOR, this->streamingPosition); - size_t argsPos = this->streamingContent.find(TOOL_ARGS_START_INDICATOR, this->streamingPosition); - - if (toolListStartPos != std::string::npos) { - this->streamingPosition = toolListStartPos + TOOL_LIST_START_INDICATOR.length(); - } - - if (argsPos == std::string::npos) { - return false; - } - - std::string toolName = this->streamingContent.substr(this->streamingPosition, argsPos - this->streamingPosition); - trim(toolName); - this->toolCall = ToolCall{generateRandomId(), toolName, ""}; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool name: {}", toolName); - this->streamingPosition = argsPos + TOOL_ARGS_START_INDICATOR.length(); - this->currentState = State::ToolCallParameters; - this->toolCallIndex++; - return true; -} - -bool Lfm2ToolParser::parseToolCallParametersState() { - size_t pos = findInStringRespectingSpecialChars(this->streamingContent, TOOL_ARGS_END_INDICATOR, this->streamingPosition); - if (pos == std::string::npos) { - return false; - } - std::string argumentsStr = this->streamingContent.substr(this->streamingPosition, pos - this->streamingPosition); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed arguments string: {}", argumentsStr); - std::vector arguments = parseArguments(argumentsStr); - - rapidjson::Document argsDoc(rapidjson::kObjectType); - rapidjson::StringBuffer sb; - rapidjson::Writer argsWriter(sb); - argsWriter.StartObject(); - - for (const Argument& argument : arguments) { - argsWriter.Key(argument.name.c_str()); - writeArgumentToWriter(argument.value, argsWriter); - } - - argsWriter.EndObject(); - this->toolCall.arguments = sb.GetString(); - this->currentState = State::ToolCallEnded; - this->streamingPosition = pos + TOOL_ARGS_END_INDICATOR.length(); - - return true; -} - -bool Lfm2ToolParser::parseInToolCallEndedState() { - size_t pos = this->streamingContent.find(TOOL_LIST_END_INDICATOR, this->streamingPosition); - size_t toolSeparatorPos = this->streamingContent.find(TOOL_SEPARATOR_STR, this->streamingPosition); - size_t toolCallEndTagPos = this->streamingContent.find(TOOL_CALL_END_TAG, this->streamingPosition); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Current state: ToolCallEnded. Streaming content from current position: {}", this->streamingContent.substr(this->streamingPosition)); - if (pos == std::string::npos && toolSeparatorPos == std::string::npos && toolCallEndTagPos == std::string::npos) { - return false; - } else if (toolSeparatorPos != std::string::npos && toolSeparatorPos < pos) { - this->streamingPosition = toolSeparatorPos + TOOL_SEPARATOR_STR.length(); - this->currentState = State::ToolCallStarted; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected separator between tool calls at position: {}, expecting another tool call to start", toolSeparatorPos); - } else if (toolCallEndTagPos != std::string::npos) { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool call at position: {}", toolCallEndTagPos); - this->streamingPosition = toolCallEndTagPos + TOOL_CALL_END_TAG.length(); - this->currentState = State::AfterToolCall; - } else { - this->streamingPosition = pos + TOOL_LIST_END_INDICATOR.length(); - this->currentState = State::AfterToolCall; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool list at position: {}, returning to content state", pos); - } - return true; -} +const int64_t Lfm2ToolParser::botTokenId = 10; +const int64_t Lfm2ToolParser::eotTokenId = 11; bool Lfm2ToolParser::parseNewContent() { - switch (this->currentState) { + switch (currentState) { case State::Content: { - return parseInContentState(); + return parseInContentState(this->streamingContent, this->streamingPosition, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); } case State::ToolCallStarted: { - return parseInToolCallState(); + auto wasParsedCorrectly = parseInToolCallState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); + if (wasParsedCorrectly) { + this->toolCallIndex++; + } + return wasParsedCorrectly; } case State::ToolCallParameters: { - return parseToolCallParametersState(); + return parseToolCallParametersState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); } case State::ToolCallEnded: { - return parseInToolCallEndedState(); + return parseInToolCallEndedState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_END_TAG); } case State::AfterToolCall: break; @@ -306,28 +47,6 @@ bool Lfm2ToolParser::parseNewContent() { return false; } -rapidjson::Document Lfm2ToolParser::wrapDeltaContent(const std::string& content) { - rapidjson::Document doc(rapidjson::kObjectType); - rapidjson::Value deltaObj(rapidjson::kObjectType); - deltaObj.AddMember("content", rapidjson::Value(content.c_str(), doc.GetAllocator()), doc.GetAllocator()); - doc.AddMember("delta", deltaObj, doc.GetAllocator()); - return doc; -} - -rapidjson::Document Lfm2ToolParser::wrapDeltaArgs(const std::string& argsStr) { - rapidjson::Document doc(rapidjson::kObjectType); - doc.AddMember("arguments", rapidjson::Value(argsStr.c_str(), doc.GetAllocator()), doc.GetAllocator()); - - return BaseOutputParser::wrapDelta(doc, this->toolCallIndex); -} - -void Lfm2ToolParser::cutEOSFromContent(std::string& content) { - size_t eosPos = content.find(EOS_TOKEN_STR); - if (eosPos != std::string::npos) { - content = content.substr(0, eosPos); - } -} - std::optional Lfm2ToolParser::parseChunk(const std::string& chunk, const std::vector& /*tokens*/, ov::genai::GenerationFinishReason finishReason) { if (chunk.empty()) { return std::nullopt; @@ -340,7 +59,7 @@ std::optional Lfm2ToolParser::parseChunk(const std::string& return BaseOutputParser::wrapFirstDelta(this->toolCall.name, this->toolCallIndex); } if (this->currentState == State::ToolCallEnded) { - return wrapDeltaArgs(this->toolCall.arguments); + return wrapDeltaArgs(this->toolCall.arguments, this->toolCallIndex); } if (this->currentState == State::Content) { size_t contentEnd = this->streamingContent.find(TOOL_CALL_START_TAG, this->streamingPosition); @@ -364,7 +83,7 @@ std::optional Lfm2ToolParser::parseChunk(const std::string& if (finishReason != ov::genai::GenerationFinishReason::NONE) { if ((this->currentState == State::ToolCallParameters || this->currentState == State::ToolCallEnded) && !this->toolCall.arguments.empty()) { - return wrapDeltaArgs(this->toolCall.arguments); + return wrapDeltaArgs(this->toolCall.arguments, this->toolCallIndex); } if (this->currentState == State::Content && this->streamingPosition < this->streamingContent.size()) { @@ -381,99 +100,7 @@ std::optional Lfm2ToolParser::parseChunk(const std::string& return std::nullopt; } -bool Lfm2ToolParser::parseSingleToolCall(const std::string& toolStr, ToolCall& toolCall) { - size_t argsPos = toolStr.find(TOOL_ARGS_START_INDICATOR); - if (argsPos != std::string::npos) { - std::string toolName = toolStr.substr(0, argsPos); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool name: {}", toolName); - - int argsStrLen = toolStr.length() - argsPos - TOOL_ARGS_START_INDICATOR.length() - TOOL_ARGS_END_INDICATOR.length(); - std::string argsStr = toolStr.substr(argsPos + TOOL_ARGS_START_INDICATOR.length(), argsStrLen); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed args string: {}", argsStr); - std::vector arguments = parseArguments(argsStr); - - toolCall.name = toolName; - rapidjson::Document argsDoc(rapidjson::kObjectType); - rapidjson::StringBuffer sb; - rapidjson::Writer argsWriter(sb); - argsWriter.StartObject(); - for (const Lfm2ToolParser::Argument& argument : arguments) { - argsWriter.Key(argument.name.c_str()); - writeArgumentToWriter(argument.value, argsWriter); - } - argsWriter.EndObject(); - toolCall.arguments = sb.GetString(); - toolCall.id = generateRandomId(); - return true; - } - return false; -} - void Lfm2ToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { - std::vector tools; - std::vector> toolCallPositions; - size_t pos = 0; - int mainGuard = 0; - - while (pos != std::string::npos && mainGuard < MAX_TOOL_CALLS) { - size_t start, end; - auto it = std::find(generatedTokens.begin() + pos, generatedTokens.end(), botTokenId); - if (it != generatedTokens.end()) { - start = std::distance(generatedTokens.begin(), it); - } else { - break; - } - auto itArgs = std::find(generatedTokens.begin() + start, generatedTokens.end(), eotTokenId); - if (itArgs != generatedTokens.end()) { - end = std::distance(generatedTokens.begin(), itArgs); - } else { - break; - } - - std::string toolListStr = tokenizer.decode(std::vector(generatedTokens.begin() + start + 1, generatedTokens.begin() + end), ov::AnyMap{ov::genai::skip_special_tokens(false)}); - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool list string: {}", toolListStr); - int toolGuard = 0; - toolListStr = toolListStr.substr(TOOL_LIST_START_INDICATOR.length(), toolListStr.length() - TOOL_LIST_START_INDICATOR.length() - TOOL_LIST_END_INDICATOR.length()); - - while (!toolListStr.empty() && toolGuard < MAX_TOOLS_PER_CALL) { - size_t toolEndPos = findInStringRespectingSpecialChars(toolListStr, TOOL_ARGS_END_INDICATOR, 0); - std::string singleTool; - if (toolEndPos != std::string::npos) { - singleTool = toolListStr.substr(0, toolEndPos + TOOL_ARGS_END_INDICATOR.length()); - if (toolEndPos + TOOL_ARGS_END_INDICATOR.length() < toolListStr.length()) { - toolListStr = toolListStr.substr(toolEndPos + TOOL_ARGS_END_INDICATOR.length() + TOOL_SEPARATOR_STR.length()); - } else { - toolListStr.clear(); - } - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed single tool string {}", singleTool); - } - - if (!singleTool.empty()) { - tools.push_back(singleTool); - } - toolGuard++; - } - mainGuard++; - - pos = end; - toolCallPositions.emplace_back(start, end); - } - - for (const std::string& tool : tools) { - ToolCall toolCall; - auto wasToolCallParsed = parseSingleToolCall(tool, toolCall); - if (wasToolCallParsed) { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool call - name: {}, args: {}", toolCall.name, toolCall.arguments); - parsedOutput.toolCalls.push_back(toolCall); - } else { - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Failed to parse tool call from string: {}", tool); - } - } - - std::vector contentWithoutToolCalls = generatedTokens; - for (auto it = toolCallPositions.rbegin(); it != toolCallPositions.rend(); ++it) { - contentWithoutToolCalls.erase(contentWithoutToolCalls.begin() + it->first, contentWithoutToolCalls.begin() + it->second + 1); - } - parsedOutput.content = tokenizer.decode(contentWithoutToolCalls, ov::AnyMap{ov::genai::skip_special_tokens(true)}); + parseUnaryResponse(parsedOutput, generatedTokens, tokenizer, botTokenId, eotTokenId); } } // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp index c3b52e5ef7..2f291ee371 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp @@ -17,43 +17,24 @@ #include #include #include "src/llm/io_processing/base_output_parser.hpp" +#include "../../../logging.hpp" +#include "./lfm2_utils.hpp" namespace ovms { class Lfm2ToolParser : public BaseOutputParser { protected: static const std::string TOOL_CALL_START_TAG; static const std::string TOOL_CALL_END_TAG; - static const std::string EOS_TOKEN_STR; - static const std::string TOOL_LIST_START_INDICATOR; - static const std::string TOOL_LIST_END_INDICATOR; - static const std::string TOOL_ARGS_START_INDICATOR; - static const std::string TOOL_ARGS_END_INDICATOR; - static const std::string TOOL_SEPARATOR_STR; - - const int64_t botTokenId; - const int64_t eotTokenId; - static const int64_t reasoningEndTokenId = 124902; // - - static constexpr size_t MAX_TOOL_CALLS = 100; - static constexpr size_t MAX_TOOLS_PER_CALL = 100; - static constexpr int TOOL_CALL_INDEX_START = -1; - enum class State { - Content, // Content -> ToolCallStarted (on TOOL_CALL_START_TAG) - ToolCallStarted, // ToolCallStarted -> ToolCallParameters (on TOOL_ARGS_START_INDICATOR, emits name) - ToolCallParameters, // ToolCallParameters -> ToolCallEnded (on TOOL_ARGS_END_INDICATOR, emits args) - ToolCallEnded, // ToolCallEnded -> ToolCallStarted (on separator) | AfterToolCall (on end tag/list end) - AfterToolCall // AfterToolCall -> Content - }; + static const int64_t botTokenId; + static const int64_t eotTokenId; public: - struct Argument { - std::string name; - std::string value; - }; Lfm2ToolParser() = delete; - explicit Lfm2ToolParser(ov::genai::Tokenizer& tokenizer, int64_t botTokenId = 10, int64_t eotTokenId = 11) : - BaseOutputParser(tokenizer), botTokenId(botTokenId), eotTokenId(eotTokenId) {} + explicit Lfm2ToolParser(ov::genai::Tokenizer& tokenizer) : + BaseOutputParser(tokenizer) {} + + bool parseNewContent(); void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) override; @@ -80,27 +61,7 @@ class Lfm2ToolParser : public BaseOutputParser { return true; } - static std::string normalizeArgStr(const std::string& arg); - static std::string parseArrayParameter(std::string argumentStr); - static std::string parseObjectParameter(std::string argumentStr); - private: - void writeArgumentToWriter(const std::string& arg, rapidjson::Writer& writer); - - Argument parseSingleArgument(const std::string& argumentStr); - std::vector parseArguments(const std::string& argumentsStr); - void cutEOSFromContent(std::string& content); - - bool parseSingleToolCall(const std::string& toolStr, ToolCall& toolCall); - bool parseNewContent(); - bool parseInContentState(); - bool parseInToolCallState(); - bool parseToolCallParametersState(); - bool parseInToolCallEndedState(); - - rapidjson::Document wrapDeltaContent(const std::string& content); - rapidjson::Document wrapDeltaArgs(const std::string& argsStr); - std::string streamingContent; size_t streamingPosition{0}; State currentState{State::Content}; diff --git a/src/llm/io_processing/lfm2/lfm2_utils.cpp b/src/llm/io_processing/lfm2/lfm2_utils.cpp new file mode 100644 index 0000000000..1d8edd6249 --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm2_utils.cpp @@ -0,0 +1,404 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "lfm2_utils.hpp" +#include "../utils.hpp" +#include "../../../logging.hpp" +#include "../../../stringutils.hpp" +#include "rapidjson/error/en.h" +#include +#include +#include + +namespace ovms { +const std::string TOOL_LIST_START_INDICATOR = "["; +const std::string TOOL_LIST_END_INDICATOR = "]"; +const std::string TOOL_ARGS_START_INDICATOR = "("; +const std::string TOOL_ARGS_END_INDICATOR = ")"; +const std::string TOOL_SEPARATOR_STR = ", "; +const std::string EOS_TOKEN_STR = "<|im_end|>"; + +const int TOOL_CALL_INDEX_START = -1; + + + +std::string parseArrayParameter(std::string argumentStr) { + int quoteDepth = 0; + + for (size_t i = 1; i < argumentStr.size() - 1; ++i) { + if (argumentStr[i] != '\'') { + continue; + } + + bool isLastElement = (i == argumentStr.size() - 2); + bool isFollowedByComma = !isLastElement && argumentStr[i + 1] == ','; + + if (quoteDepth == 0) { + argumentStr[i] = '"'; + quoteDepth++; + } else if (quoteDepth > 0 && (isFollowedByComma || isLastElement)) { + argumentStr[i] = '"'; + quoteDepth--; + } + } + + return argumentStr; +} + +std::string parseObjectParameter(std::string argumentStr) { + int quoteDepth = 0; + + for (size_t i = 1; i < argumentStr.size() - 1; ++i) { + if (argumentStr[i] != '\'') { + continue; + } + + bool isLastElement = (i == argumentStr.size() - 2); + bool isFollowedByComma = !isLastElement && argumentStr[i + 1] == ','; + bool isFollowedByColon = !isLastElement && argumentStr[i + 1] == ':'; + + if (quoteDepth == 0) { + argumentStr[i] = '"'; + quoteDepth++; + } else if (quoteDepth > 0 && (isFollowedByComma || isLastElement || isFollowedByColon)) { + argumentStr[i] = '"'; + quoteDepth--; + } + } + + return argumentStr; +} + +std::string normalizeArgStr(const std::string& arg) { + if (arg.empty()) { + return arg; + } + + std::string normalized = arg; + trim(normalized); + std::string lower = normalized; + std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); + + if (lower == "true" || lower == "false" || lower == "null") { + return lower; + } + + const char first = normalized.front(); + const char last = normalized.back(); + if (first == '{' && last == '}') { + normalized = parseObjectParameter(normalized); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument contains is an object, replaced single quotes with double quotes for JSON parsing. Modified string: {}", normalized); + } + + if (first == '[' && last == ']') { + normalized = parseArrayParameter(normalized); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument is an array, normalized quotes for JSON parsing. Modified string: {}", normalized); + } + + if ((first == '\'' && last == '\'')) { + normalized[0] = '"'; + normalized[normalized.size() - 1] = '"'; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument is enclosed in quotes, replaced outer quotes with double quotes for JSON parsing. Modified string: {}", normalized); + } + + rapidjson::Document tempDoc; + rapidjson::Value finalValue; + tempDoc.Parse(normalized.c_str()); + if (tempDoc.HasParseError()) { + auto errorCode = tempDoc.GetParseError(); + auto errorMessage = rapidjson::GetParseError_En(errorCode); + size_t errorOffset = tempDoc.GetErrorOffset(); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Failed to parse argument string as JSON. Argument string: {}, Error: {} Offset: {}", normalized, errorMessage, errorOffset); + + if (first == '\"' && last == '\"') { + normalized = normalized.substr(1, normalized.size() - 2); + } + finalValue.SetString(normalized.c_str(), static_cast(normalized.size()), tempDoc.GetAllocator()); + } else { + finalValue.CopyFrom(tempDoc, tempDoc.GetAllocator()); + } + + { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + finalValue.Accept(writer); + normalized = buffer.GetString(); + } + + return normalized; +} + +void writeArgumentToWriter(const std::string& arg, rapidjson::Writer& writer) { + std::string normalized = normalizeArgStr(arg); + + rapidjson::Document doc; + doc.Parse(normalized.c_str()); + + rapidjson::Value& argumentDoc = doc; + writeArgumentOfAnyType(argumentDoc, writer); +} + +Argument parseSingleArgument(const std::string& argumentStr) { + Argument argument; + + size_t equalPos = argumentStr.find('='); + if (equalPos != std::string::npos) { + argument.name = argumentStr.substr(0, equalPos); + argument.value = argumentStr.substr(equalPos + 1); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed argument - name: {}, value: {}", argument.name, argument.value); + } else { + argument.name = argumentStr; + argument.value = ""; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Argument string: {} does not contain '=', setting name as entire string and value as empty", argumentStr); + } + return argument; +} + +std::vector parseArguments(const std::string& argumentsStr) { + std::vector args; + std::vector parsedArgs; + + size_t argPos = 0; + while (argPos < argumentsStr.length()) { + size_t commaPos = findInStringRespectingSpecialChars(argumentsStr, TOOL_SEPARATOR_STR, argPos); + if (commaPos == std::string::npos) { + auto remainingStr = argumentsStr.substr(argPos); + args.push_back(remainingStr); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "No more commas found, adding remaining argument string: {}", remainingStr); + break; + } + auto argStr = argumentsStr.substr(argPos, commaPos - argPos); + args.push_back(argStr); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed argument string: {}", argStr); + argPos = commaPos + TOOL_SEPARATOR_STR.length(); + } + + for (const std::string& arg : args) { + parsedArgs.push_back(parseSingleArgument(arg)); + } + return parsedArgs; +} + +bool parseInContentState(const std::string& streamingContent, size_t& streamingPosition, const std::string& toolCallStartTag, const std::string& toolCallEndTag) { + size_t toolCallStartTagPos = streamingContent.find(toolCallStartTag, streamingPosition); + size_t toolCallEndTagPos = streamingContent.find(toolCallEndTag, streamingPosition); + if (toolCallEndTagPos != std::string::npos && toolCallStartTagPos == std::string::npos) { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool call at position: {}", toolCallEndTagPos); + streamingPosition = toolCallEndTagPos + toolCallEndTag.length(); + return false; + } + if (toolCallStartTagPos != std::string::npos) { + if (toolCallStartTagPos > streamingPosition) { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Content found before tool call start tag at position: {}", toolCallStartTagPos); + return true; + } + streamingPosition = toolCallStartTagPos + toolCallStartTag.length(); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected start of tool call at position: {}", toolCallStartTagPos); + return false; + } + + return true; +} +bool parseInToolCallState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState) { + size_t toolListStartPos = streamingContent.find(TOOL_LIST_START_INDICATOR, streamingPosition); + size_t argsPos = streamingContent.find(TOOL_ARGS_START_INDICATOR, streamingPosition); + + if (toolListStartPos != std::string::npos) { + streamingPosition = toolListStartPos + TOOL_LIST_START_INDICATOR.length(); + } + + if (argsPos == std::string::npos) { + return false; + } + + std::string toolName = streamingContent.substr(streamingPosition, argsPos - streamingPosition); + trim(toolName); + toolCall = ToolCall{generateRandomId(), toolName, ""}; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool name: {}", toolName); + streamingPosition = argsPos + TOOL_ARGS_START_INDICATOR.length(); + currentState = State::ToolCallParameters; + return true; +} + +bool parseToolCallParametersState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState) { + size_t pos = findInStringRespectingSpecialChars(streamingContent, TOOL_ARGS_END_INDICATOR, streamingPosition); + if (pos == std::string::npos) { + return false; + } + std::string argumentsStr = streamingContent.substr(streamingPosition, pos - streamingPosition); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed arguments string: {}", argumentsStr); + std::vector arguments = parseArguments(argumentsStr); + + rapidjson::Document argsDoc(rapidjson::kObjectType); + rapidjson::StringBuffer sb; + rapidjson::Writer argsWriter(sb); + argsWriter.StartObject(); + + for (const Argument& argument : arguments) { + argsWriter.Key(argument.name.c_str()); + writeArgumentToWriter(argument.value, argsWriter); + } + + argsWriter.EndObject(); + toolCall.arguments = sb.GetString(); + currentState = State::ToolCallEnded; + streamingPosition = pos + TOOL_ARGS_END_INDICATOR.length(); + + return true; +} + +bool parseInToolCallEndedState(const std::string& streamingContent, size_t& streamingPosition, State& currentState, const std::string& toolCallEndTag) { + size_t pos = streamingContent.find(TOOL_LIST_END_INDICATOR, streamingPosition); + size_t toolSeparatorPos = streamingContent.find(TOOL_SEPARATOR_STR, streamingPosition); + size_t toolCallEndTagPos = streamingContent.find(toolCallEndTag, streamingPosition); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Current state: ToolCallEnded. Streaming content from current position: {}", streamingContent.substr(streamingPosition)); + if (pos == std::string::npos && toolSeparatorPos == std::string::npos && toolCallEndTagPos == std::string::npos) { + return false; + } else if (toolSeparatorPos != std::string::npos && toolSeparatorPos < pos) { + streamingPosition = toolSeparatorPos + TOOL_SEPARATOR_STR.length(); + currentState = State::ToolCallStarted; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected separator between tool calls at position: {}, expecting another tool call to start", toolSeparatorPos); + } else if (toolCallEndTagPos != std::string::npos) { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool call at position: {}", toolCallEndTagPos); + streamingPosition = toolCallEndTagPos + toolCallEndTag.length(); + currentState = State::AfterToolCall; + } else { + streamingPosition = pos + TOOL_LIST_END_INDICATOR.length(); + currentState = State::AfterToolCall; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected end of tool list at position: {}, returning to content state", pos); + } + return true; +} + +rapidjson::Document wrapDeltaContent(const std::string& content) { + rapidjson::Document doc(rapidjson::kObjectType); + rapidjson::Value deltaObj(rapidjson::kObjectType); + deltaObj.AddMember("content", rapidjson::Value(content.c_str(), doc.GetAllocator()), doc.GetAllocator()); + doc.AddMember("delta", deltaObj, doc.GetAllocator()); + return doc; +} + +rapidjson::Document wrapDeltaArgs(const std::string& argsStr, int toolCallIndex) { + rapidjson::Document doc(rapidjson::kObjectType); + doc.AddMember("arguments", rapidjson::Value(argsStr.c_str(), doc.GetAllocator()), doc.GetAllocator()); + + return BaseOutputParser::wrapDelta(doc, toolCallIndex); +} + +void cutEOSFromContent(std::string& content) { + size_t eosPos = content.find(EOS_TOKEN_STR); + if (eosPos != std::string::npos) { + content = content.substr(0, eosPos); + } +} + +bool parseSingleToolCall(const std::string& toolStr, ToolCall& toolCall) { + size_t argsPos = toolStr.find(TOOL_ARGS_START_INDICATOR); + if (argsPos != std::string::npos) { + std::string toolName = toolStr.substr(0, argsPos); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool name: {}", toolName); + + int argsStrLen = toolStr.length() - argsPos - TOOL_ARGS_START_INDICATOR.length() - TOOL_ARGS_END_INDICATOR.length(); + std::string argsStr = toolStr.substr(argsPos + TOOL_ARGS_START_INDICATOR.length(), argsStrLen); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed args string: {}", argsStr); + std::vector arguments = parseArguments(argsStr); + + toolCall.name = toolName; + rapidjson::Document argsDoc(rapidjson::kObjectType); + rapidjson::StringBuffer sb; + rapidjson::Writer argsWriter(sb); + argsWriter.StartObject(); + for (const Argument& argument : arguments) { + argsWriter.Key(argument.name.c_str()); + writeArgumentToWriter(argument.value, argsWriter); + } + argsWriter.EndObject(); + toolCall.arguments = sb.GetString(); + toolCall.id = generateRandomId(); + return true; + } + return false; +} + +void parseUnaryResponse(ParsedOutput& parsedOutput, const std::vector& generatedTokens, ov::genai::Tokenizer& tokenizer, const int64_t botTokenId, const int64_t eotTokenId, const std::optional reasoningEndTokenId) { + std::vector tools; + std::vector> toolCallPositions; + size_t pos = 0; + + while (pos != std::string::npos) { + size_t start, end; + auto it = std::find(generatedTokens.begin() + pos, generatedTokens.end(), botTokenId); + if (it != generatedTokens.end()) { + start = std::distance(generatedTokens.begin(), it); + } else { + break; + } + auto itArgs = std::find(generatedTokens.begin() + start, generatedTokens.end(), eotTokenId); + if (itArgs != generatedTokens.end()) { + end = std::distance(generatedTokens.begin(), itArgs); + } else { + break; + } + + std::string toolListStr = tokenizer.decode(std::vector(generatedTokens.begin() + start + 1, generatedTokens.begin() + end), ov::AnyMap{ov::genai::skip_special_tokens(false)}); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool list string: {}", toolListStr); + toolListStr = toolListStr.substr(TOOL_LIST_START_INDICATOR.length(), toolListStr.length() - TOOL_LIST_START_INDICATOR.length() - TOOL_LIST_END_INDICATOR.length()); + + while (!toolListStr.empty()) { + size_t toolEndPos = findInStringRespectingSpecialChars(toolListStr, TOOL_ARGS_END_INDICATOR, 0); + std::string singleTool; + if (toolEndPos != std::string::npos) { + singleTool = toolListStr.substr(0, toolEndPos + TOOL_ARGS_END_INDICATOR.length()); + if (toolEndPos + TOOL_ARGS_END_INDICATOR.length() < toolListStr.length()) { + toolListStr = toolListStr.substr(toolEndPos + TOOL_ARGS_END_INDICATOR.length() + TOOL_SEPARATOR_STR.length()); + } else { + toolListStr.clear(); + } + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed single tool string {}", singleTool); + } + + if (!singleTool.empty()) { + tools.push_back(singleTool); + } + } + pos = end; + toolCallPositions.emplace_back(start, end); + } + + for (const std::string& tool : tools) { + ToolCall toolCall; + auto wasToolCallParsed = parseSingleToolCall(tool, toolCall); + if (wasToolCallParsed) { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed tool call - name: {}, args: {}", toolCall.name, toolCall.arguments); + parsedOutput.toolCalls.push_back(toolCall); + } else { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Failed to parse tool call from string: {}", tool); + } + } + + std::vector contentWithoutToolCalls = generatedTokens; + for (auto it = toolCallPositions.rbegin(); it != toolCallPositions.rend(); ++it) { + contentWithoutToolCalls.erase(contentWithoutToolCalls.begin() + it->first, contentWithoutToolCalls.begin() + it->second + 1); + } + if (reasoningEndTokenId.has_value()) { + auto reasoningEndIt = std::find(contentWithoutToolCalls.begin(), contentWithoutToolCalls.end(), reasoningEndTokenId.value()); + if (reasoningEndIt != contentWithoutToolCalls.end()) { + contentWithoutToolCalls.erase(reasoningEndIt, contentWithoutToolCalls.end()); + } + } + + parsedOutput.content = tokenizer.decode(contentWithoutToolCalls, ov::AnyMap{ov::genai::skip_special_tokens(true)}); +} +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm2_utils.hpp b/src/llm/io_processing/lfm2/lfm2_utils.hpp new file mode 100644 index 0000000000..09a53465bd --- /dev/null +++ b/src/llm/io_processing/lfm2/lfm2_utils.hpp @@ -0,0 +1,60 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include "src/llm/io_processing/base_output_parser.hpp" + +namespace ovms { + +extern const std::string TOOL_LIST_START_INDICATOR; +extern const std::string TOOL_LIST_END_INDICATOR; +extern const std::string TOOL_ARGS_START_INDICATOR; +extern const std::string TOOL_ARGS_END_INDICATOR; +extern const std::string TOOL_SEPARATOR_STR; +extern const std::string EOS_TOKEN_STR; +extern const int TOOL_CALL_INDEX_START; + +struct Argument { + std::string name; + std::string value; +}; + +enum class State { + Content, + ToolCallStarted, + ToolCallParameters, + ToolCallEnded, + AfterToolCall +}; + +std::string parseArrayParameter(std::string argumentStr); +std::string parseObjectParameter(std::string argumentStr); +std::string normalizeArgStr(const std::string& arg); +void writeArgumentToWriter(const std::string& arg, rapidjson::Writer& writer); +Argument parseSingleArgument(const std::string& argumentStr); +std::vector parseArguments(const std::string& argumentsStr); +bool parseInContentState(const std::string& streamingContent, size_t& streamingPosition, const std::string& toolCallStartTag, const std::string& toolCallEndTag); +bool parseInToolCallState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState); +bool parseToolCallParametersState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState); +bool parseInToolCallEndedState(const std::string& streamingContent, size_t& streamingPosition, State& currentState, const std::string& toolCallEndTag); +rapidjson::Document wrapDeltaContent(const std::string& content); +rapidjson::Document wrapDeltaArgs(const std::string& argsStr, int toolCallIndex); +void cutEOSFromContent(std::string& content); +bool parseSingleToolCall(const std::string& toolStr, ToolCall& toolCall); +void parseUnaryResponse(ParsedOutput& parsedOutput, const std::vector& generatedTokens, ov::genai::Tokenizer& tokenizer, const int64_t botTokenId, const int64_t eotTokenId, const std::optional reasoningEndTokenId = std::nullopt); + +} // namespace ovms diff --git a/src/test/llm/output_parsers/lfm2_output_parser_test.cpp b/src/test/llm/output_parsers/lfm2_output_parser_test.cpp index e20920aa6b..69ad93c6ba 100644 --- a/src/test/llm/output_parsers/lfm2_output_parser_test.cpp +++ b/src/test/llm/output_parsers/lfm2_output_parser_test.cpp @@ -167,8 +167,6 @@ TEST_F(LFM2OutputParserTest, ParseToolCallOutputWithNoToolsInTheRequest) { TEST_F(LFM2OutputParserTest, ParseToolCallWithObjectArguments) { std::string inputWithProperClosure = "<|tool_call_start|>[dummy(config={'name': 'astro_config', 'value': 99})]<|tool_call_end|>"; - // LFM2 may produce last tool call without closing tag, so we test both cases - // The results should be identical std::vector inputs = {inputWithProperClosure}; for (auto& input : inputs) { auto generatedTensor = lfm2Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; @@ -188,8 +186,7 @@ TEST_F(LFM2OutputParserTest, ParseToolCallWithObjectArguments) { TEST_F(LFM2OutputParserTest, ParseToolCallWithStringArguments) { std::string inputWithProperClosure = "<|tool_call_start|>[test1(arg1=\"data1, data2\")]<|tool_call_end|>"; - // LFM2 may produce last tool call without closing tag, so we test both cases - // The results should be identical + std::vector inputs = {inputWithProperClosure}; for (auto& input : inputs) { auto generatedTensor = lfm2Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; From 600e7e2c5c513c2a9e922e9d77b8734d23c565e5 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 25 Jun 2026 10:58:05 +0200 Subject: [PATCH 12/20] save --- .../io_processing/lfm2/lfm25_tool_parser.cpp | 2 +- .../io_processing/lfm2/lfm2_tool_parser.cpp | 2 +- src/llm/io_processing/lfm2/lfm2_utils.cpp | 6 +- src/llm/io_processing/lfm2/lfm2_utils.hpp | 2 +- .../parser_config_validation.cpp | 2 + .../lfm25_output_parser_test.cpp | 869 ++++++++++++++++++ .../parser_config_validation_test.cpp | 5 +- 7 files changed, 882 insertions(+), 6 deletions(-) create mode 100644 src/test/llm/output_parsers/lfm25_output_parser_test.cpp diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp index ad73c18e0c..a203aeb857 100644 --- a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp @@ -27,7 +27,7 @@ const int64_t Lfm25ToolParser::reasoningEndTokenId = 124902; // bool Lfm25ToolParser::parseNewContent() { switch (currentState) { case State::Content: { - return parseInContentState(this->streamingContent, this->streamingPosition, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); + return parseInContentState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); } case State::ToolCallStarted: { auto wasParsedCorrectly = parseInToolCallState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp index fc5825f6be..942139af43 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp @@ -26,7 +26,7 @@ const int64_t Lfm2ToolParser::eotTokenId = 11; bool Lfm2ToolParser::parseNewContent() { switch (currentState) { case State::Content: { - return parseInContentState(this->streamingContent, this->streamingPosition, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); + return parseInContentState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); } case State::ToolCallStarted: { auto wasParsedCorrectly = parseInToolCallState(this->streamingContent, this->toolCall, this->streamingPosition, this->currentState); diff --git a/src/llm/io_processing/lfm2/lfm2_utils.cpp b/src/llm/io_processing/lfm2/lfm2_utils.cpp index 1d8edd6249..d7f115ea57 100644 --- a/src/llm/io_processing/lfm2/lfm2_utils.cpp +++ b/src/llm/io_processing/lfm2/lfm2_utils.cpp @@ -191,7 +191,7 @@ std::vector parseArguments(const std::string& argumentsStr) { return parsedArgs; } -bool parseInContentState(const std::string& streamingContent, size_t& streamingPosition, const std::string& toolCallStartTag, const std::string& toolCallEndTag) { +bool parseInContentState(const std::string& streamingContent, size_t& streamingPosition, State& currentState, const std::string& toolCallStartTag, const std::string& toolCallEndTag) { size_t toolCallStartTagPos = streamingContent.find(toolCallStartTag, streamingPosition); size_t toolCallEndTagPos = streamingContent.find(toolCallEndTag, streamingPosition); if (toolCallEndTagPos != std::string::npos && toolCallStartTagPos == std::string::npos) { @@ -204,6 +204,7 @@ bool parseInContentState(const std::string& streamingContent, size_t& streamingP SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Content found before tool call start tag at position: {}", toolCallStartTagPos); return true; } + currentState = State::ToolCallStarted; streamingPosition = toolCallStartTagPos + toolCallStartTag.length(); SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Detected start of tool call at position: {}", toolCallStartTagPos); return false; @@ -367,6 +368,9 @@ void parseUnaryResponse(ParsedOutput& parsedOutput, const std::vector& toolListStr.clear(); } SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Parsed single tool string {}", singleTool); + } else { + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "No more tool calls found in tool list string: {}", toolListStr); + break; } if (!singleTool.empty()) { diff --git a/src/llm/io_processing/lfm2/lfm2_utils.hpp b/src/llm/io_processing/lfm2/lfm2_utils.hpp index 09a53465bd..f98586cbd7 100644 --- a/src/llm/io_processing/lfm2/lfm2_utils.hpp +++ b/src/llm/io_processing/lfm2/lfm2_utils.hpp @@ -47,7 +47,7 @@ std::string normalizeArgStr(const std::string& arg); void writeArgumentToWriter(const std::string& arg, rapidjson::Writer& writer); Argument parseSingleArgument(const std::string& argumentStr); std::vector parseArguments(const std::string& argumentsStr); -bool parseInContentState(const std::string& streamingContent, size_t& streamingPosition, const std::string& toolCallStartTag, const std::string& toolCallEndTag); +bool parseInContentState(const std::string& streamingContent, size_t& streamingPosition, State& currentState, const std::string& toolCallStartTag, const std::string& toolCallEndTag); bool parseInToolCallState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState); bool parseToolCallParametersState(const std::string& streamingContent, ToolCall& toolCall, size_t& streamingPosition, State& currentState); bool parseInToolCallEndedState(const std::string& streamingContent, size_t& streamingPosition, State& currentState, const std::string& toolCallEndTag); diff --git a/src/llm/io_processing/parser_config_validation.cpp b/src/llm/io_processing/parser_config_validation.cpp index 4f355fc20f..c40411f3a8 100644 --- a/src/llm/io_processing/parser_config_validation.cpp +++ b/src/llm/io_processing/parser_config_validation.cpp @@ -31,6 +31,7 @@ const std::vector& getSupportedToolParserNames() { "qwen3coder", "devstral", "lfm2", + "lfm2.5", "gemma4", }; return names; @@ -41,6 +42,7 @@ const std::vector& getSupportedReasoningParserNames() { "qwen3", "gemma4", "gptoss", + "lfm2.5", }; return names; } diff --git a/src/test/llm/output_parsers/lfm25_output_parser_test.cpp b/src/test/llm/output_parsers/lfm25_output_parser_test.cpp new file mode 100644 index 0000000000..a43cefee2a --- /dev/null +++ b/src/test/llm/output_parsers/lfm25_output_parser_test.cpp @@ -0,0 +1,869 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../llm/io_processing/base_output_parser.hpp" +#include "../../../llm/io_processing/output_parser.hpp" +#include "../../platform_utils.hpp" + +using namespace ovms; + +#ifdef _WIN32 +const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\LiquidAI\\LFM2.5-8B-A1B"; +#else +// Hardcoded for usage in docker container +const std::string tokenizerPath = "/ovms/src/test/llm_testing/LiquidAI/LFM2.5-8B-A1B"; +#endif + +static std::unique_ptr lfm25Tokenizer; +static const ToolsSchemas_t& EMPTY_TOOLS_SCHEMA = {}; // not used in lfm2.5 + +class LFM25OutputParserTest : public ::testing::Test { +protected: + std::unique_ptr outputParserWithRegularToolParsing; + + static void SetUpTestSuite() { + try { + lfm25Tokenizer = std::make_unique(tokenizerPath); + } catch (const std::exception& e) { + FAIL() << "Failed to initialize lfm2.5 tokenizer: " << e.what(); + } catch (...) { + FAIL() << "Failed to initialize lfm2 tokenizer due to unknown error."; + } + } + + static void TearDownTestSuite() { + lfm25Tokenizer.reset(); + } + + void SetUp() override { + // For LFM2 model there is only tool parser available + outputParserWithRegularToolParsing = std::make_unique(*lfm25Tokenizer, "lfm2.5", "lfm2.5", EMPTY_TOOLS_SCHEMA); + } + + void assertChunkEqual(const std::optional& doc, const std::optional& expectedDelta, const std::string& chunk) { + if (!expectedDelta.has_value() && !doc.has_value()) { + return; + } + if (expectedDelta.has_value() && doc.has_value()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + std::string docStr = buffer.GetString(); + std::string expected = expectedDelta.value(); + EXPECT_EQ(docStr, expected) << "Mismatch for chunk: " << chunk; + } else { + FAIL() << "Mismatch between expectedDelta and doc for chunk: " << chunk; + } + } + void assertStreamingVec(const std::vector>>& chunkToDeltaVec) { + for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVec) { + std::optional doc = outputParserWithRegularToolParsing->parseChunk(chunk, {}, true, finishReason); + if (!expectedDelta.has_value() && !doc.has_value()) { + continue; // Both are nullopt, OK + } + if (expectedDelta.has_value() && doc.has_value()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + std::string docStr = buffer.GetString(); + // If both strings contain "id":"...", compare id values by length and alphanumeric, else compare whole strings + std::string expected = expectedDelta.value(); + std::string idKey = "\"id\":\""; + auto docIdPos = docStr.find(idKey); + auto expectedIdPos = expected.find(idKey); + if (docIdPos != std::string::npos && expectedIdPos != std::string::npos) { + auto docIdStart = docIdPos + idKey.size(); + auto docIdEnd = docStr.find("\"", docIdStart); + auto expectedIdStart = expectedIdPos + idKey.size(); + auto expectedIdEnd = expected.find("\"", expectedIdStart); + ASSERT_NE(docIdEnd, std::string::npos); + ASSERT_NE(expectedIdEnd, std::string::npos); + std::string docId = docStr.substr(docIdStart, docIdEnd - docIdStart); + std::string expectedId = expected.substr(expectedIdStart, expectedIdEnd - expectedIdStart); + EXPECT_EQ(docId.size(), expectedId.size()) << "ID length mismatch for chunk: " << chunk; + EXPECT_TRUE(std::all_of(docId.begin(), docId.end(), ::isalnum)) << "ID not alphanumeric for chunk: " << chunk; + // Compare everything except the id value + std::string docStrNoId = docStr; + std::string expectedNoId = expected; + docStrNoId.replace(docIdStart, docId.size(), std::string(docId.size(), '*')); + expectedNoId.replace(expectedIdStart, expectedId.size(), std::string(expectedId.size(), '*')); + EXPECT_EQ(docStrNoId, expectedNoId) << "Mismatch for chunk (ignoring id value): " << chunk; + } else { + EXPECT_EQ(docStr, expected) << "Mismatch for chunk: " << chunk; + } + } else { + std::string expectedStr = expectedDelta.has_value() ? expectedDelta.value() : "std::nullopt"; + std::string docStr = doc.has_value() ? [&]() { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + return std::string(buffer.GetString()); + }() + : "std::nullopt"; + FAIL() << "Mismatch between expectedDelta and doc for chunk: " << chunk + << "\nexpectedDelta: " << expectedStr + << "\ndoc: " << docStr; + } + } + } +}; + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithSingleToolCall) { + std::string inputWithProperClosure = "<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithNoToolsInTheRequest) { + std::string inputWithProperClosure = "<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + std::string testInput = input; + auto generatedTensor = lfm25Tokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, false); + EXPECT_EQ(parsedOutput.content, testInput); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithObjectArguments) { + std::string inputWithProperClosure = "<|tool_call_start|>[dummy(config={'name': 'astro_config', 'value': 99})]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "dummy"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"config\":{\"name\":\"astro_config\",\"value\":99}}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArguments) { + std::string inputWithProperClosure = "<|tool_call_start|>[test1(arg1=\"data1, data2\")]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "test1"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"data1, data2\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithListOfStringsAsArgument) { + std::string inputWithProperClosure = "<|tool_call_start|>[generate_DNA_sequence(length=100, preferences=['G', 'C'])]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "generate_DNA_sequence"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"length\":100,\"preferences\":[\"G\",\"C\"]}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParserToolCallWithBooleanArgument) { + std::string inputWithProperClosure = "<|tool_call_start|>[check_status(flag=True)]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "check_status"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"flag\":true}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseTwoToolCallsAtOnce) { + std::string inputWithProperClosure = "<|tool_call_start|>[dummy1(config={'name': 'astro_config', 'value': 99}), dummy2(config={'name': 'second_config', 'value': 199})]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 2); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "dummy1"); + EXPECT_EQ(parsedOutput.toolCalls[1].name, "dummy2"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"config\":{\"name\":\"astro_config\",\"value\":99}}"); + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"config\":{\"name\":\"second_config\",\"value\":199}}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithArrayArguments) { + std::string inputWithProperClosure = "<|tool_call_start|>[sort(array=[42, 17, 89, 5, 33], order=\"descending\")]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "sort"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringWithSingleQuotesArguments) { + std::string inputWithProperClosure = "<|tool_call_start|>[sort(array=[42, 17, 89, 5, 33], order='descending')]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "sort"); + // Parser removes whitespaces, so we expect arguments value to be without spaces + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); // ID should be generated + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithThreeToolCalls) { + std::string inputWithProperClosure = "<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>" + "<|tool_call_start|>[another_tool(param1=\"data\", param2=true)]<|tool_call_end|>" + "<|tool_call_start|>[third_tool(key=\"value\")]<|tool_call_end|>"; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 3); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); + auto firstToolCallId = parsedOutput.toolCalls[0].id; + + EXPECT_EQ(parsedOutput.toolCalls[1].name, "another_tool"); + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"param1\":\"data\",\"param2\":true}"); + EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); + auto secondToolCallId = parsedOutput.toolCalls[1].id; + EXPECT_NE(firstToolCallId, secondToolCallId); + + EXPECT_EQ(parsedOutput.toolCalls[2].name, "third_tool"); + EXPECT_EQ(parsedOutput.toolCalls[2].arguments, "{\"key\":\"value\"}"); + EXPECT_EQ(parsedOutput.toolCalls[2].id.empty(), false); + auto thirdToolCallId = parsedOutput.toolCalls[2].id; + EXPECT_NE(firstToolCallId, thirdToolCallId); + EXPECT_NE(secondToolCallId, thirdToolCallId); + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithThreeToolCallsWithContentInBetween) { + std::string inputWithProperClosure = "Before tool calls content. " + "<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>" + "This is some content between tool calls." + "<|tool_call_start|>[another_tool(param1=\"data\", param2=true)]<|tool_call_end|>" + " This is some content between second and third tool call. " + "<|tool_call_start|>[third_tool(key=\"value\")]<|tool_call_end|>" + "After tool calls content."; + + std::vector inputs = {inputWithProperClosure}; + for (auto& input : inputs) { + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "Before tool calls content. This is some content between tool calls. This is some content between second and third tool call. After tool calls content."); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 3); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); + auto firstToolCallId = parsedOutput.toolCalls[0].id; + + EXPECT_EQ(parsedOutput.toolCalls[1].name, "another_tool"); + EXPECT_EQ(parsedOutput.toolCalls[1].arguments, "{\"param1\":\"data\",\"param2\":true}"); + EXPECT_EQ(parsedOutput.toolCalls[1].id.empty(), false); + auto secondToolCallId = parsedOutput.toolCalls[1].id; + EXPECT_NE(firstToolCallId, secondToolCallId); + + EXPECT_EQ(parsedOutput.toolCalls[2].name, "third_tool"); + EXPECT_EQ(parsedOutput.toolCalls[2].arguments, "{\"key\":\"value\"}"); + EXPECT_EQ(parsedOutput.toolCalls[2].id.empty(), false); + auto thirdToolCallId = parsedOutput.toolCalls[2].id; + EXPECT_NE(firstToolCallId, thirdToolCallId); + EXPECT_NE(secondToolCallId, thirdToolCallId); + } +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithEmptyArguments) { + // Tool call with empty parentheses (no arguments) + std::string input = "<|tool_call_start|>[no_args_tool()]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "no_args_tool"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { + std::string input = "This is a regular model response without tool calls."; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls."); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); + EXPECT_EQ(parsedOutput.reasoning, ""); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { + std::string input = "This is a content part and next will be a tool call.\n\n<|tool_call_start|>[example_tool(arg1=\"value1\", arg2=42)]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n"); + EXPECT_EQ(parsedOutput.reasoning, ""); + + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} + +TEST_F(LFM25OutputParserTest, HolisticStreaming) { + std::vector>> chunkToDeltaVec{ + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"sort", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(array", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"sort"}}]}})"}, + {"=[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"42", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 17", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 89", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 5", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 33", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"],", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" order", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"=\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"desc", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ending", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"\"),", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"}}]}})"}, + {" d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ANOTHER_CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"ANOTHER_CONTENT_AFTER_TOOL_CALL"}})"}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithBiggerChunks) { + std::vector>> chunkToDeltaVec{ + {"SOME_CONTENT", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"SOME_CONTENT"}})"}, + {"MORE_CONTENT<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"MORE_CONTENT"}})"}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"sort(array=", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"sort"}}]}})"}, + {"[42, 17, 89, 5, 33], order=\"descending\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {")]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ANOTHER_CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"ANOTHER_CONTENT_AFTER_TOOL_CALL"}})"}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithToolCallAndFinishReason) { + std::vector>> chunkToDeltaVec{ + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|><|im_end|>", ov::genai::GenerationFinishReason::STOP, std::nullopt}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithToolCallAndEOSToken) { + std::vector>> chunkToDeltaVec{ + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"<|im_end|>", ov::genai::GenerationFinishReason::STOP, std::nullopt}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithToolCallAndEOSTokenAndContent) { + std::vector>> chunkToDeltaVec{ + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"SOME_CONTENT<|im_end|>", ov::genai::GenerationFinishReason::STOP, R"({"delta":{"content":"SOME_CONTENT"}})"}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, StreamingWithContentBetweenToolCalls) { + std::vector>> chunkToDeltaVec{ + // Tool call phase + // Starting first tool. Collecting chunk until full name is received. Don't return until then. + {"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"JUST_SOME_STRING_BEFORE_SPECIAL_STARTING_TAG"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"sort", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(array", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"sort"}}]}})"}, + {"=[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"42", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 17", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 89", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 5", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {",", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 33", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"],", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" order", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"=\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"desc", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ending", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"\")]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"array\":[42,17,89,5,33],\"order\":\"descending\"}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"Some ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"Some "}})"}, + {"content ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"content "}})"}, + {"between ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"between "}})"}, + {"tool ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"tool "}})"}, + {"calls.", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"calls."}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"d", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ummy", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(config", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":1,"function":{"name":"dummy"}}]}})"}, + {"={", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"'", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"name", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"astro_config", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"',", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" '", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"value", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"':", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 99", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"})]", ov ::genai ::GenerationFinishReason ::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{\"config\":{\"name\":\"astro_config\",\"value\":99}}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ANOTHER_CONTENT_AFTER_TOOL_CALL", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"ANOTHER_CONTENT_AFTER_TOOL_CALL"}})"}, + {"<|tool_call_start|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"solve", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(e", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":2,"function":{"name":"solve"}}]}})"}, + {"quation", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"=\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"2", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"*", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"(", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"x", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"+", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"5)", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" =", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" 13", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"\")]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"{\"equation\":\"2*(x+5) = 13\"}"}}]}})"}, + {"<|tool_call_end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"And some content after second tool call", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"And some content after second tool call"}})"}, + }; + + assertStreamingVec(chunkToDeltaVec); +} + +TEST_F(LFM25OutputParserTest, ToolCallsWithoutToolsInTheRequestStreaming) { + std::vector>> chunkToDeltaVec{ + // Tool parser is available, but tools are not in the request so every chunk is just a regular content + {"<|tool_call_start|>\n", "{\"delta\":{\"content\":\"<|tool_call_start|>\\n\"}}"}, + {"{\"", "{\"delta\":{\"content\":\"{\\\"\"}}"}, + {"name", "{\"delta\":{\"content\":\"name\"}}"}, + {"\":", "{\"delta\":{\"content\":\"\\\":\"}}"}, + {" \"", "{\"delta\":{\"content\":\" \\\"\"}}"}, + {"super", "{\"delta\":{\"content\":\"super\"}}"}, + {"_tool", "{\"delta\":{\"content\":\"_tool\"}}"}, + {"_number", "{\"delta\":{\"content\":\"_number\"}}"}, + {"_two", "{\"delta\":{\"content\":\"_two\"}}"}, + {"\",", "{\"delta\":{\"content\":\"\\\",\"}}"}, + {" \"", "{\"delta\":{\"content\":\" \\\"\"}}"}, + {"arguments", "{\"delta\":{\"content\":\"arguments\"}}"}, + {"\":", "{\"delta\":{\"content\":\"\\\":\"}}"}, + {" {", "{\"delta\":{\"content\":\" {\"}}"}, + {"\"", "{\"delta\":{\"content\":\"\\\"\"}}"}, + {"arg1", "{\"delta\":{\"content\":\"arg1\"}}"}, + {"\": ", "{\"delta\":{\"content\":\"\\\": \"}}"}, + {"\"", "{\"delta\":{\"content\":\"\\\"\"}}"}, + {"val{{{ue1", "{\"delta\":{\"content\":\"val{{{ue1\"}}"}, + {"\"", "{\"delta\":{\"content\":\"\\\"\"}}"}, + {"}", "{\"delta\":{\"content\":\"}\"}}"}, + {"}", "{\"delta\":{\"content\":\"}\"}}"}, + {"<|tool_call_end|>\n", "{\"delta\":{\"content\":\"<|tool_call_end|>\\n\"}}"}, + }; + + for (const auto& [chunk, expectedDelta] : chunkToDeltaVec) { + // Second argument is false as we simulate the case where tools have not been provided in the request + std::optional doc = outputParserWithRegularToolParsing->parseChunk(chunk, {}, false, ov::genai::GenerationFinishReason::NONE); + assertChunkEqual(doc, expectedDelta, chunk); + } +} + +// Malformed tool calls + +TEST_F(LFM25OutputParserTest, ParseToolCallWithMissingParentheses) { + std::string input = "<|tool_call_start|>[broken_tool]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithMissingClosingParenthesis) { + std::string input = "<|tool_call_start|>[broken_tool(arg1=\"value1\"]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithArgumentMissingEquals) { + // Argument without '=' sign - parseSingleArgument sets isValid = false + std::string input = "<|tool_call_start|>[broken(malformed_arg)]<|tool_call_end|>"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + // The tool call is parsed but the argument value will be empty and invalid + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "broken"); +} + +// Tests with special characters +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingComparison) { + std::string input = R"x(<|tool_call_start|>[search(query="price >= 100, (sale)", limit=5)]<|tool_call_end|>)x"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "search"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"x({"query":"price >= 100, (sale)","limit":5})x"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingBracesAndBrackets) { + std::string input = R"(<|tool_call_start|>[format(template="Hello {name}, items: [a, b, c]", count=3)]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "format"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"template":"Hello {name}, items: [a, b, c]","count":3})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingSpecialCharacters) { + std::string impl = "import package\nimport package2\n\ndef func(a, b):\n\td={\"python\": \"dict\"}\n\tl = [\"list \\\"with escaped text\\\"\", 123, []]\n\treturn f\"formatted {a} and {b}\""; + std::string input = R"(<|tool_call_start|>[execute(code=")" + impl + R"(")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "execute"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"code":"import package\nimport package2\n\ndef func(a, b):\n\td={\"python\": \"dict\"}\n\tl = [\"list \\\"with escaped text\\\"\", 123, []]\n\treturn f\"formatted {a} and {b}\""})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingEscapedQuotes) { + std::string input = R"x(<|tool_call_start|>[execute(code="print(\"hello world\")", verbose=true)]<|tool_call_end|>)x"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "execute"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"x({"code":"print(\"hello world\")","verbose":true})x"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingApostrophes) { + std::string input = R"(<|tool_call_start|>[log(message="it's a test, isn't it?", level="warn")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "log"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"message":"it's a test, isn't it?","level":"warn"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingBackslashes) { + std::string input = R"(<|tool_call_start|>[read_file(path="C:\Users\test\file.txt", encoding="utf-8")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "read_file"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"path":"C:\\Users\\test\\file.txt","encoding":"utf-8"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsArrayWithStringsContainingQuotes) { + std::string input = R"(<|tool_call_start|>[save(lines=['it's the wonderful day', 'My name's Jan', 'That's Johns' car.'])]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "save"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"lines":["it's the wonderful day","My name's Jan","That's Johns' car."]})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentContainingSingleQuotes) { + std::string input = R"(<|tool_call_start|>[save(line="I've had line with single quotes")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "save"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"line":"I've had line with single quotes"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsObjectWithStringsContainingQuotes) { + std::string input = R"(<|tool_call_start|>[save(obj={'name':'it's the wonderful day', 'greeting':'Hello, my name's Jan', 'note':'That's Johns' car.'})]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "save"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"obj":{"name":"it's the wonderful day","greeting":"Hello, my name's Jan","note":"That's Johns' car."}})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithStringArgumentsContainingNestedJSON) { + std::string input = R"(<|tool_call_start|>[send(payload="{'key': 'value', 'count': 42}", endpoint="api")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "send"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"payload":"{'key': 'value', 'count': 42}","endpoint":"api"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithEmptyStringArgument) { + std::string input = R"(<|tool_call_start|>[create(name="", value=0)]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "create"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"name":"","value":0})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithUnicodeCharactersInArguments) { + std::string input = R"(<|tool_call_start|>[translate(text="zażółć gęślą jaźń", lang="pl")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "translate"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"text":"zażółć gęślą jaźń","lang":"pl"})"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithPythonCodeAsArgument) { + std::string input = R"x(<|tool_call_start|>[string_tool(param=" + if __name__ == "__main__": + addresses = {} + addresses["Hodor"] = """The door""" + addresses["Arya"] = "Winterfell" + for name, address in addresses.items(): + print(f'\n\t{name} lives at {address}\n\r')")]<|tool_call_end|>)x"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "string_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"x({"param":"\n if __name__ == \"__main__\":\n addresses = {}\n addresses[\"Hodor\"] = \"\"\"The door\"\"\"\n addresses[\"Arya\"] = \"Winterfell\"\n for name, address in addresses.items():\n print(f'\\n\\t{name} lives at {address}\\n\\r')"})x"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithReasoning) { + std::string input = R"(User wants me to translate string "zażółć gęślą jaźń" from polish. Polish parameter language signature is "pl". I should use function translate. [...]<|tool_call_start|>[translate(text="zażółć gęślą jaźń", lang="pl")]<|tool_call_end|>)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + + std::cout << std::endl; + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "translate"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"text":"zażółć gęślą jaźń","lang":"pl"})"); + EXPECT_EQ(parsedOutput.reasoning, R"(User wants me to translate string "zażółć gęślą jaźń" from polish. Polish parameter language signature is "pl". I should use function translate. [...])"); +} + +TEST_F(LFM25OutputParserTest, ParseToolCallWithReasoningAndContent) { + std::string input = R"(User wants me to translate string "zażółć gęślą jaźń" from polish. Polish parameter language signature is "pl". I should use function translate. [...]<|tool_call_start|>[translate(text="zażółć gęślą jaźń", lang="pl")]<|tool_call_end|> This is the content after the tool call.)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, " This is the content after the tool call."); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "translate"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, R"({"text":"zażółć gęślą jaźń","lang":"pl"})"); + EXPECT_EQ(parsedOutput.reasoning, R"(User wants me to translate string "zażółć gęślą jaźń" from polish. Polish parameter language signature is "pl". I should use function translate. [...])"); +} + +TEST_F(LFM25OutputParserTest, ParseOutputWithReasoningAndContent) { + std::string input = R"(User wants me to answer what is the difference between "foo" and "bar". I should answer with a short explanation. [...] The difference between "foo" and "bar" is that "foo" is often used as a placeholder name in programming, while "bar" is another placeholder name that is commonly used alongside "foo".)"; + auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, " The difference between \"foo\" and \"bar\" is that \"foo\" is often used as a placeholder name in programming, while \"bar\" is another placeholder name that is commonly used alongside \"foo\"."); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); + EXPECT_EQ(parsedOutput.reasoning, R"(User wants me to answer what is the difference between "foo" and "bar". I should answer with a short explanation. [...])"); +} diff --git a/src/test/llm/output_parsers/parser_config_validation_test.cpp b/src/test/llm/output_parsers/parser_config_validation_test.cpp index f7c841ccaa..f173627b3f 100644 --- a/src/test/llm/output_parsers/parser_config_validation_test.cpp +++ b/src/test/llm/output_parsers/parser_config_validation_test.cpp @@ -52,7 +52,7 @@ class ParserConfigValidationTest : public ::testing::Test { TEST_F(ParserConfigValidationTest, RegistryHasExpectedToolParsers) { const auto& names = getSupportedToolParserNames(); for (const auto& expected : {"llama3", "hermes3", "phi4", "mistral", "gptoss", - "qwen3coder", "devstral", "lfm2", "gemma4"}) { + "qwen3coder", "devstral", "lfm2", "lfm2.5", "gemma4"}) { EXPECT_NE(std::find(names.begin(), names.end(), expected), names.end()) << "Expected tool parser '" << expected << "' missing from registry"; } @@ -63,7 +63,7 @@ TEST_F(ParserConfigValidationTest, RegistryHasExpectedToolParsers) { TEST_F(ParserConfigValidationTest, RegistryHasExpectedReasoningParsers) { const auto& names = getSupportedReasoningParserNames(); - for (const auto& expected : {"qwen3", "gemma4", "gptoss"}) { + for (const auto& expected : {"qwen3", "gemma4", "gptoss", "lfm2.5"}) { EXPECT_NE(std::find(names.begin(), names.end(), expected), names.end()) << "Expected reasoning parser '" << expected << "' missing from registry"; } @@ -79,6 +79,7 @@ TEST_F(ParserConfigValidationTest, SupportedNamesStringContainsAllParsers) { const std::string reasoningNames = getSupportedReasoningParserNamesAsString(); EXPECT_NE(reasoningNames.find("qwen3"), std::string::npos); EXPECT_NE(reasoningNames.find("gptoss"), std::string::npos); + EXPECT_NE(reasoningNames.find("lfm2.5"), std::string::npos); } TEST_F(ParserConfigValidationTest, OutputParserThrowsOnUnknownToolParser) { From 218a008b2529445c7bd7a7ba6e010d6ec27f8bcd Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 25 Jun 2026 13:52:16 +0200 Subject: [PATCH 13/20] fix reasoning --- src/llm/io_processing/lfm2/lfm2_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llm/io_processing/lfm2/lfm2_utils.cpp b/src/llm/io_processing/lfm2/lfm2_utils.cpp index d7f115ea57..6b31227c93 100644 --- a/src/llm/io_processing/lfm2/lfm2_utils.cpp +++ b/src/llm/io_processing/lfm2/lfm2_utils.cpp @@ -399,7 +399,7 @@ void parseUnaryResponse(ParsedOutput& parsedOutput, const std::vector& if (reasoningEndTokenId.has_value()) { auto reasoningEndIt = std::find(contentWithoutToolCalls.begin(), contentWithoutToolCalls.end(), reasoningEndTokenId.value()); if (reasoningEndIt != contentWithoutToolCalls.end()) { - contentWithoutToolCalls.erase(reasoningEndIt, contentWithoutToolCalls.end()); + contentWithoutToolCalls.erase(contentWithoutToolCalls.begin(), reasoningEndIt + 1); } } From f676fb6ce85fdc15df5aba876371a8260dccf7db Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 25 Jun 2026 14:05:40 +0200 Subject: [PATCH 14/20] minor fixes --- docs/parameters.md | 4 ++-- prepare_llm_models.sh | 19 +++++++++++++++++++ src/llm/io_processing/lfm2/lfm2_utils.cpp | 2 -- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/docs/parameters.md b/docs/parameters.md index b023672b89..edc92f5006 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -134,8 +134,8 @@ Task specific parameters for different tasks (text generation/image generation/e | `--max_prompt_len` | `integer` | Sets NPU specific property for maximum number of tokens in the prompt. | | `--kv_cache_precision` | `string` | Reduced kv cache precision to `u8` lowers the cache size consumption. Accepted values: `u8` or empty (default). | | `--model_distribution_policy` | `string` | TENSOR_PARALLEL distributes tensor to multiple sockets/devices and processes it in parallel. PIPELINE_PARALLEL distributes different tensors to process by each device. Accepted values: `TENSOR_PARALLEL`, `PIPELINE_PARALLEL` or empty (default). | -| `--reasoning_parser` | `string` | Type of parser to use for reasoning content extraction from model output. Currently supported: [qwen3, gptoss, lfm2, gemma4] | -| `--tool_parser` | `string` | Type of parser to use for tool calls extraction from model output. Currently supported: [llama3, phi4, hermes3, mistral, qwen3coder, gptoss, devstral, lfm2, gemma4] | +| `--reasoning_parser` | `string` | Type of parser to use for reasoning content extraction from model output. Currently supported: [qwen3, gptoss, lfm2.5, gemma4] | +| `--tool_parser` | `string` | Type of parser to use for tool calls extraction from model output. Currently supported: [llama3, phi4, hermes3, mistral, qwen3coder, gptoss, devstral, lfm2, lfm2.5, gemma4] | | `--enable_tool_guided_generation` | `bool` | Enables enforcing tool schema during generation. Requires setting response parser. Default: false. | ### Image generation diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index 1186ad1919..8956d7df1a 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -83,6 +83,25 @@ if [ ! -f "$1/$FACEBOOK_MODEL/chat_template.jinja" ]; then cp src/test/llm/dummy_facebook_template.jinja "$1/$FACEBOOK_MODEL/chat_template.jinja" fi +if [ -f "$1/$TTS_MODEL/$TOKENIZER_FILE" ]; then + echo "Model file $1/$TTS_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." +else + python3 demos/common/export_models/export_model.py text2speech --source_model "$TTS_MODEL" --weight-format int4 --model_repository_path $1 --vocoder microsoft/speecht5_hifigan +fi +if [ ! -f "$1/$TTS_MODEL/$TOKENIZER_FILE" ]; then + echo "[ERROR] Model file $1/$TTS_MODEL/$TOKENIZER_FILE does not exist." + exit 1 +fi + +if [ -f "$1/$STT_MODEL/$TOKENIZER_FILE" ]; then + echo "Model file $1/$STT_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." +else + python3 demos/common/export_models/export_model.py speech2text --source_model "$STT_MODEL" --weight-format int4 --model_repository_path $1 +fi +if [ ! -f "$1/$STT_MODEL/$TOKENIZER_FILE" ]; then + echo "[ERROR] Model file $1/$STT_MODEL/$TOKENIZER_FILE does not exist." + exit 1 +fi if [ -f "$1/$VLM_MODEL/$TOKENIZER_FILE" ]; then echo "Model file $1/$VLM_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." diff --git a/src/llm/io_processing/lfm2/lfm2_utils.cpp b/src/llm/io_processing/lfm2/lfm2_utils.cpp index 6b31227c93..160e377c4d 100644 --- a/src/llm/io_processing/lfm2/lfm2_utils.cpp +++ b/src/llm/io_processing/lfm2/lfm2_utils.cpp @@ -32,8 +32,6 @@ const std::string EOS_TOKEN_STR = "<|im_end|>"; const int TOOL_CALL_INDEX_START = -1; - - std::string parseArrayParameter(std::string argumentStr) { int quoteDepth = 0; From c6c52de2ebfeaf6f1a46eea5fa049ceeb2f0e043 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 25 Jun 2026 14:32:27 +0200 Subject: [PATCH 15/20] minor fixes --- docs/llm/reference.md | 3 ++- src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp | 3 +-- src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp | 2 +- src/llm/io_processing/lfm2/lfm25_tool_parser.cpp | 2 +- src/llm/io_processing/lfm2/lfm25_tool_parser.hpp | 4 ++-- src/llm/io_processing/lfm2/lfm2_tool_parser.cpp | 6 +++--- src/llm/io_processing/lfm2/lfm2_tool_parser.hpp | 4 ++-- src/llm/io_processing/lfm2/lfm2_utils.cpp | 1 + 8 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/llm/reference.md b/docs/llm/reference.md index 6317096aa7..4037897908 100644 --- a/docs/llm/reference.md +++ b/docs/llm/reference.md @@ -285,12 +285,13 @@ __Tool parsers:__ - `gptoss` - `qwen3coder` - `lfm2` +- `lfm2.5` - `gemma4` __Reasoning parsers:__ - `qwen3` - `gptoss` -- `lfm2` +- `lfm2.5` - `gemma4` Note that using `tools` might require a chat template other than the original. diff --git a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp index c60244a272..8320fe4d7d 100644 --- a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** - +#pragma once #include #include #include @@ -66,6 +66,5 @@ std::optional Lfm25ReasoningParser::parseChunk(const std::s doc.Parse(buffer.GetString()); return doc; } - return std::nullopt; } } // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp index bc35eb07f9..e43561ee23 100644 --- a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** - +#pragma once #include "../base_output_parser.hpp" namespace ovms { diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp index a203aeb857..318a247981 100644 --- a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp @@ -25,7 +25,7 @@ const int64_t Lfm25ToolParser::toolCallEndTokenId = 124906; // <|tool_call_end const int64_t Lfm25ToolParser::reasoningEndTokenId = 124902; // bool Lfm25ToolParser::parseNewContent() { - switch (currentState) { + switch (this->currentState) { case State::Content: { return parseInContentState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); } diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp index 8dac71dadf..20ede48336 100644 --- a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp @@ -30,8 +30,6 @@ class Lfm25ToolParser : public BaseOutputParser { explicit Lfm25ToolParser(ov::genai::Tokenizer& tokenizer) : BaseOutputParser(tokenizer) {} - bool parseNewContent(); - void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) override; const std::vector& getParsingStartTags() const override { @@ -64,5 +62,7 @@ class Lfm25ToolParser : public BaseOutputParser { ToolCall toolCall; int toolCallIndex{TOOL_CALL_INDEX_START}; + + bool parseNewContent(); }; } diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp index 942139af43..37949afce9 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp @@ -20,11 +20,11 @@ namespace ovms { const std::string Lfm2ToolParser::TOOL_CALL_START_TAG = "<|tool_call_start|>"; const std::string Lfm2ToolParser::TOOL_CALL_END_TAG = "<|tool_call_end|>"; -const int64_t Lfm2ToolParser::botTokenId = 10; -const int64_t Lfm2ToolParser::eotTokenId = 11; +const int64_t Lfm2ToolParser::botTokenId = 10; // <|tool_call_start|> +const int64_t Lfm2ToolParser::eotTokenId = 11; // <|tool_call_end|> bool Lfm2ToolParser::parseNewContent() { - switch (currentState) { + switch (this->currentState) { case State::Content: { return parseInContentState(this->streamingContent, this->streamingPosition, this->currentState, TOOL_CALL_START_TAG, TOOL_CALL_END_TAG); } diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp index 2f291ee371..c607d827ff 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.hpp @@ -34,8 +34,6 @@ class Lfm2ToolParser : public BaseOutputParser { explicit Lfm2ToolParser(ov::genai::Tokenizer& tokenizer) : BaseOutputParser(tokenizer) {} - bool parseNewContent(); - void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, const std::vector& tokens, ov::genai::GenerationFinishReason finishReason) override; const std::vector& getParsingStartTags() const override { @@ -68,5 +66,7 @@ class Lfm2ToolParser : public BaseOutputParser { ToolCall toolCall; int toolCallIndex{TOOL_CALL_INDEX_START}; + + bool parseNewContent(); }; } // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm2_utils.cpp b/src/llm/io_processing/lfm2/lfm2_utils.cpp index 160e377c4d..1f6a932ca6 100644 --- a/src/llm/io_processing/lfm2/lfm2_utils.cpp +++ b/src/llm/io_processing/lfm2/lfm2_utils.cpp @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** +#pragma once #include "lfm2_utils.hpp" #include "../utils.hpp" #include "../../../logging.hpp" From e8ee7bb876ecf939ad051228307f3fd0f9620f15 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 25 Jun 2026 14:35:39 +0200 Subject: [PATCH 16/20] prams docs --- demos/common/export_models/export_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py index 9192b91449..0a08eea3e1 100644 --- a/demos/common/export_models/export_model.py +++ b/demos/common/export_models/export_model.py @@ -54,8 +54,8 @@ def add_common_arguments(parser): parser_text.add_argument('--max_prompt_len', required=False, type=int, default=None, help='Sets NPU specific property for maximum number of tokens in the prompt. ' 'Not effective if target device is not NPU', dest='max_prompt_len') parser_text.add_argument('--prompt_lookup_decoding', action='store_true', help='Set pipeline to use prompt lookup decoding', dest='prompt_lookup_decoding') -parser_text.add_argument('--reasoning_parser', choices=["qwen3", "gptoss", "lfm2", "gemma4"], help='Set the type of the reasoning parser for reasoning content extraction', dest='reasoning_parser') -parser_text.add_argument('--tool_parser', choices=["llama3", "phi4", "hermes3", "mistral", "qwen3coder", "gptoss", "devstral", "lfm2", "gemma4"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') +parser_text.add_argument('--reasoning_parser', choices=["qwen3", "gptoss", "lfm2.5", "gemma4"], help='Set the type of the reasoning parser for reasoning content extraction', dest='reasoning_parser') +parser_text.add_argument('--tool_parser', choices=["llama3", "phi4", "hermes3", "mistral", "qwen3coder", "gptoss", "devstral", "lfm2", "lfm2.5", "gemma4"], help='Set the type of the tool parser for tool calls extraction', dest='tool_parser') parser_text.add_argument('--enable_tool_guided_generation', action='store_true', help='Enables enforcing tool schema during generation. Requires setting tool_parser', dest='enable_tool_guided_generation') parser_embeddings_ov = subparsers.add_parser('embeddings_ov', help='export model for embeddings endpoint with directory structure aligned with OpenVINO tools') From 9f55fdb4d732978dc18aca58a099288e3fd1ac17 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 25 Jun 2026 14:37:39 +0200 Subject: [PATCH 17/20] spelling --- spelling-whitelist.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/spelling-whitelist.txt b/spelling-whitelist.txt index b458909bb3..81f083e9b5 100644 --- a/spelling-whitelist.txt +++ b/spelling-whitelist.txt @@ -29,6 +29,7 @@ demos/vlm_npu/README.md:157: mane ==> main, many, maine demos/vlm_npu/README.md:218: mane ==> main, many, maine demos/integration_with_OpenWebUI/README.md:423: Buildin ==> Building, Build in src/test/llm/output_parsers/lfm2_output_parser_test.cpp +src/test/llm/output_parsers/lfm25_output_parser_test.cpp windows_parse_tests.bat:35: seh ==> she windows_parse_tests.bat:119: SEH ==> SHE windows_parse_tests.bat:123: SEH ==> SHE From f49d53aa9e527dafd6cdacdded231aae86471cd8 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 25 Jun 2026 14:40:34 +0200 Subject: [PATCH 18/20] styles --- src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp | 2 +- src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp | 4 ++-- src/llm/io_processing/lfm2/lfm25_tool_parser.cpp | 6 +++--- src/llm/io_processing/lfm2/lfm25_tool_parser.hpp | 5 +++-- src/llm/io_processing/lfm2/lfm2_tool_parser.cpp | 4 ++-- src/llm/io_processing/lfm2/lfm2_utils.cpp | 2 +- src/test/llm/output_parsers/lfm25_output_parser_test.cpp | 2 +- src/test/llm/output_parsers/lfm2_output_parser_test.cpp | 1 - 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp index 8320fe4d7d..13ecbd3742 100644 --- a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp @@ -39,7 +39,7 @@ void Lfm25ReasoningParser::parse(ParsedOutput& parsedOutput, const std::vector(startPos + generatedTokens.begin() + 1, endPos + generatedTokens.begin()), ov::genai::skip_special_tokens(true)); - + parsedOutput.reasoning = reasoningContent; } diff --git a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp index e43561ee23..77fe84f139 100644 --- a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp @@ -44,9 +44,9 @@ class Lfm25ReasoningParser : public BaseOutputParser { return parsingEndTag; } - // It may be removed after changing logic in Lfm2ToolParser to use tokens in streaming instead of chunk content, both tool parser and reasoning parser need to have the same value for this function + // It may be removed after changing logic in Lfm2ToolParser to use tokens in streaming instead of chunk content, both tool parser and reasoning parser need to have the same value for this function bool requiresStreamingWithSpecialTokens() const override { return true; } }; -} +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp index 318a247981..c85112939f 100644 --- a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp @@ -20,9 +20,9 @@ namespace ovms { const std::string Lfm25ToolParser::TOOL_CALL_START_TAG = "<|tool_call_start|>"; const std::string Lfm25ToolParser::TOOL_CALL_END_TAG = "<|tool_call_end|>"; -const int64_t Lfm25ToolParser::toolCallStartTokenId = 124905; // <|tool_call_start|> -const int64_t Lfm25ToolParser::toolCallEndTokenId = 124906; // <|tool_call_end|> -const int64_t Lfm25ToolParser::reasoningEndTokenId = 124902; // +const int64_t Lfm25ToolParser::toolCallStartTokenId = 124905; // <|tool_call_start|> +const int64_t Lfm25ToolParser::toolCallEndTokenId = 124906; // <|tool_call_end|> +const int64_t Lfm25ToolParser::reasoningEndTokenId = 124902; // bool Lfm25ToolParser::parseNewContent() { switch (this->currentState) { diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp index 20ede48336..d88a613984 100644 --- a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp @@ -25,6 +25,7 @@ class Lfm25ToolParser : public BaseOutputParser { static const int64_t toolCallStartTokenId; static const int64_t toolCallEndTokenId; static const int64_t reasoningEndTokenId; + public: Lfm25ToolParser() = delete; explicit Lfm25ToolParser(ov::genai::Tokenizer& tokenizer) : @@ -62,7 +63,7 @@ class Lfm25ToolParser : public BaseOutputParser { ToolCall toolCall; int toolCallIndex{TOOL_CALL_INDEX_START}; - + bool parseNewContent(); }; -} +} // namespace ovms diff --git a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp index 37949afce9..0230097b30 100644 --- a/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm2_tool_parser.cpp @@ -20,8 +20,8 @@ namespace ovms { const std::string Lfm2ToolParser::TOOL_CALL_START_TAG = "<|tool_call_start|>"; const std::string Lfm2ToolParser::TOOL_CALL_END_TAG = "<|tool_call_end|>"; -const int64_t Lfm2ToolParser::botTokenId = 10; // <|tool_call_start|> -const int64_t Lfm2ToolParser::eotTokenId = 11; // <|tool_call_end|> +const int64_t Lfm2ToolParser::botTokenId = 10; // <|tool_call_start|> +const int64_t Lfm2ToolParser::eotTokenId = 11; // <|tool_call_end|> bool Lfm2ToolParser::parseNewContent() { switch (this->currentState) { diff --git a/src/llm/io_processing/lfm2/lfm2_utils.cpp b/src/llm/io_processing/lfm2/lfm2_utils.cpp index 1f6a932ca6..3f19848869 100644 --- a/src/llm/io_processing/lfm2/lfm2_utils.cpp +++ b/src/llm/io_processing/lfm2/lfm2_utils.cpp @@ -404,4 +404,4 @@ void parseUnaryResponse(ParsedOutput& parsedOutput, const std::vector& parsedOutput.content = tokenizer.decode(contentWithoutToolCalls, ov::AnyMap{ov::genai::skip_special_tokens(true)}); } -} // namespace ovms +} // namespace ovms diff --git a/src/test/llm/output_parsers/lfm25_output_parser_test.cpp b/src/test/llm/output_parsers/lfm25_output_parser_test.cpp index a43cefee2a..1c06a5a59d 100644 --- a/src/test/llm/output_parsers/lfm25_output_parser_test.cpp +++ b/src/test/llm/output_parsers/lfm25_output_parser_test.cpp @@ -836,7 +836,7 @@ TEST_F(LFM25OutputParserTest, ParseToolCallWithReasoning) { std::string input = R"(User wants me to translate string "zażółć gęślą jaźń" from polish. Polish parameter language signature is "pl". I should use function translate. [...]<|tool_call_start|>[translate(text="zażółć gęślą jaźń", lang="pl")]<|tool_call_end|>)"; auto generatedTensor = lfm25Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); - + std::cout << std::endl; ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); diff --git a/src/test/llm/output_parsers/lfm2_output_parser_test.cpp b/src/test/llm/output_parsers/lfm2_output_parser_test.cpp index 69ad93c6ba..5b698d2a8c 100644 --- a/src/test/llm/output_parsers/lfm2_output_parser_test.cpp +++ b/src/test/llm/output_parsers/lfm2_output_parser_test.cpp @@ -186,7 +186,6 @@ TEST_F(LFM2OutputParserTest, ParseToolCallWithObjectArguments) { TEST_F(LFM2OutputParserTest, ParseToolCallWithStringArguments) { std::string inputWithProperClosure = "<|tool_call_start|>[test1(arg1=\"data1, data2\")]<|tool_call_end|>"; - std::vector inputs = {inputWithProperClosure}; for (auto& input : inputs) { auto generatedTensor = lfm2Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; From 8b330edd115d1656af81e86ed86347aed0008333 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 25 Jun 2026 14:47:08 +0200 Subject: [PATCH 19/20] cpplint --- src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp | 2 ++ src/llm/io_processing/lfm2/lfm25_tool_parser.cpp | 2 +- src/llm/io_processing/lfm2/lfm25_tool_parser.hpp | 6 ++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp index 77fe84f139..afc52c7f56 100644 --- a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.hpp @@ -15,6 +15,8 @@ //***************************************************************************** #pragma once #include "../base_output_parser.hpp" +#include +#include namespace ovms { class Lfm25ReasoningParser : public BaseOutputParser { diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp index c85112939f..c1c37e99ef 100644 --- a/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.cpp @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** -#include "./lfm25_tool_parser.hpp" +#include "lfm25_tool_parser.hpp" namespace ovms { diff --git a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp index d88a613984..d07b277077 100644 --- a/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp +++ b/src/llm/io_processing/lfm2/lfm25_tool_parser.hpp @@ -13,8 +13,10 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** - -#include "./lfm2_utils.hpp" +#pragma once +#include +#include +#include "lfm2_utils.hpp" namespace ovms { class Lfm25ToolParser : public BaseOutputParser { From 4a8b862ce7ef6f700d1f39b0d86a34df3e73d4d9 Mon Sep 17 00:00:00 2001 From: Pawel Rzepecki Date: Thu, 25 Jun 2026 14:52:22 +0200 Subject: [PATCH 20/20] build fixes --- src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp | 1 - src/llm/io_processing/lfm2/lfm2_utils.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp index 13ecbd3742..f3191fd6f5 100644 --- a/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp +++ b/src/llm/io_processing/lfm2/lfm25_reasoning_parser.cpp @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** -#pragma once #include #include #include diff --git a/src/llm/io_processing/lfm2/lfm2_utils.cpp b/src/llm/io_processing/lfm2/lfm2_utils.cpp index 3f19848869..3ccc74b399 100644 --- a/src/llm/io_processing/lfm2/lfm2_utils.cpp +++ b/src/llm/io_processing/lfm2/lfm2_utils.cpp @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** -#pragma once #include "lfm2_utils.hpp" #include "../utils.hpp" #include "../../../logging.hpp"