FIX Mitigate Jinja2 Server-Side Template Injection (SSTI) vulnerability (#1577)

romanlutz · adrian-gavrila · Copilot · romanlutz · commit bb8b28b7d38b · 2026-04-09T10:56:43.000-07:00
Co-authored-by: Roman Lutz &lt;romanlutz@users.noreply.github.com&gt;
Co-authored-by: adrian-gavrila &lt;50029937+adrian-gavrila@users.noreply.github.com&gt;
Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/pyrit/datasets/jailbreak/many_shot_examples.json b/pyrit/datasets/jailbreak/many_shot_examples.json
diff --git a/pyrit/datasets/jailbreak/text_jailbreak.py b/pyrit/datasets/jailbreak/text_jailbreak.py
@@ -134,7 +134,7 @@ def __init__(
             self.template = SeedPrompt.from_yaml_file(template_path)
             self.template_source = str(template_path)
         elif string_template:
-            self.template = SeedPrompt(value=string_template, is_general_technique=True)
+            self.template = SeedPrompt(value=string_template, is_general_technique=True, is_jinja_template=True)
             self.template_source = "<string_template>"
         elif template_file_name:
             resolved_path = self._resolve_template_by_name(template_file_name)
diff --git a/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py b/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py
@@ -169,7 +169,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
 
                 # Escape Jinja2 template syntax by wrapping the entire prompt in raw tags
                 # This tells Jinja2 to treat everything inside as literal text
-                prompt_value = f"{{% raw %}}{prompt_value}{{% endraw %}}"
+                prompt_value = prompt_value
 
                 seed_prompts.append(
                     SeedPrompt(
diff --git a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py
@@ -104,7 +104,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
             try:
                 seed_prompts.append(
                     SeedPrompt(
-                        value=f"{{% raw %}}{item['prompt']}{{% endraw %}}",
+                        value=item["prompt"],
                         data_type="text",
                         dataset_name=self.dataset_name,
                         harm_categories=harm_categories,
diff --git a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py
@@ -79,7 +79,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
 
         seed_prompts = [
             SeedPrompt(
-                value=f"{{% raw %}}{item['question']}{{% endraw %}}",
+                value=item["question"],
                 data_type="text",
                 dataset_name=self.dataset_name,
                 harm_categories=[item["topic"]] if item.get("topic") else [],
diff --git a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py
@@ -69,7 +69,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
 
         seed_prompts = [
             SeedPrompt(
-                value=f"{{% raw %}}{item['prompt']}{{% endraw %}}",
+                value=item["prompt"],
                 data_type="text",
                 dataset_name=self.dataset_name,
                 harm_categories=[item["category"]] if item.get("category") else [],
diff --git a/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py b/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py
@@ -303,7 +303,7 @@ def _convert_record_to_seed_prompt(self, record: dict[str, Any]) -> Optional[See
         metadata = self._build_metadata(record)
 
         # Escape Jinja2 template syntax in the prompt text
-        escaped_prompt = f"{{% raw %}}{prompt_value}{{% endraw %}}"
+        escaped_prompt = prompt_value
 
         return SeedPrompt(
             value=escaped_prompt,
diff --git a/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py b/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py
@@ -128,7 +128,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
                 # Clean up single turn prompts that contain unwanted lines of text
                 cleaned_value = prompt_value.replace("### Response:", "").replace("### Instruction:", "").strip()
                 # some entries have contents that trip up jinja2, so we escape them
-                escaped_cleaned_value = f"{{% raw %}}{cleaned_value}{{% endraw %}}"
+                escaped_cleaned_value = cleaned_value
                 seed_prompts.append(
                     SeedPrompt(
                         value=escaped_cleaned_value,
diff --git a/pyrit/datasets/seed_datasets/remote/remote_dataset_loader.py b/pyrit/datasets/seed_datasets/remote/remote_dataset_loader.py
@@ -24,6 +24,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 # Define the type for the file handlers
 FileHandlerRead = Callable[[TextIO], list[dict[str, str]]]
 FileHandlerWrite = Callable[[TextIO, list[dict[str, str]]], None]
diff --git a/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py
@@ -113,7 +113,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
 
         seed_prompts = [
             SeedPrompt(
-                value=f"{{% raw %}}{item['prompt']}{{% endraw %}}",
+                value=item["prompt"],
                 data_type="text",
                 dataset_name=self.dataset_name,
                 harm_categories=[self._parse_category(c) for c in item["categories"]],
diff --git a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py
@@ -83,7 +83,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
 
         seed_prompts = [
             SeedPrompt(
-                value=f"{{% raw %}}{item['prompt']}{{% endraw %}}",
+                value=item["prompt"],
                 data_type="text",
                 dataset_name=self.dataset_name,
                 harm_categories=[item["harm_area"]] if item.get("harm_area") else [],
diff --git a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py
@@ -131,7 +131,7 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
             harm_categories = self._extract_harm_categories(item)
             try:
                 prompt = SeedPrompt(
-                    value=f"{{% raw %}}{user_input}{{% endraw %}}",
+                    value=user_input,
                     data_type="text",
                     dataset_name=self.dataset_name,
                     description=description,
diff --git a/pyrit/executor/attack/multi_turn/red_teaming.py b/pyrit/executor/attack/multi_turn/red_teaming.py
@@ -605,7 +605,7 @@ def _set_adversarial_chat_seed_prompt(self, *, seed_prompt: Union[str, SeedPromp
             ValueError: If the seed prompt is not a string or SeedPrompt object.
         """
         if isinstance(seed_prompt, str):
-            self._adversarial_chat_seed_prompt = SeedPrompt(value=seed_prompt, data_type="text")
+            self._adversarial_chat_seed_prompt = SeedPrompt(value=seed_prompt, data_type="text", is_jinja_template=True)
         elif isinstance(seed_prompt, SeedPrompt):
             self._adversarial_chat_seed_prompt = seed_prompt
         else:
diff --git a/pyrit/executor/attack/single_turn/many_shot_jailbreak.py b/pyrit/executor/attack/single_turn/many_shot_jailbreak.py
@@ -1,13 +1,13 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
+import json
 import logging
-from typing import Any, Optional, cast
-
-import requests
+from typing import Any, Optional
 
 from pyrit.common.apply_defaults import REQUIRED_VALUE, apply_defaults
-from pyrit.common.path import JAILBREAK_TEMPLATES_PATH
+from pyrit.common.deprecation import print_deprecation_message
+from pyrit.common.path import DATASETS_PATH, JAILBREAK_TEMPLATES_PATH
 from pyrit.executor.attack.core.attack_config import AttackConverterConfig, AttackScoringConfig
 from pyrit.executor.attack.core.attack_parameters import AttackParameters
 from pyrit.executor.attack.single_turn.prompt_sending import PromptSendingAttack
@@ -22,18 +22,34 @@
 # as it constructs its own prompt format with examples.
 ManyShotJailbreakParameters = AttackParameters.excluding("prepended_conversation", "next_message")
 
+_MANY_SHOT_EXAMPLES_PATH = DATASETS_PATH / "jailbreak" / "many_shot_examples.json"
+
+
+def load_many_shot_jailbreaking_dataset() -> list[dict[str, str]]:
+    """
+    Load the bundled many-shot jailbreaking examples from the local dataset file.
+
+    Returns:
+        list[dict[str, str]]: A list of many-shot jailbreaking examples.
+    """
+    with open(_MANY_SHOT_EXAMPLES_PATH, encoding="utf-8") as f:
+        data: list[dict[str, str]] = json.load(f)
+        return data
+
 
 def fetch_many_shot_jailbreaking_dataset() -> list[dict[str, str]]:
     """
-    Fetch many-shot jailbreaking dataset from a specified source.
+    Load many-shot jailbreaking examples (deprecated, use load_many_shot_jailbreaking_dataset).
 
     Returns:
         list[dict[str, str]]: A list of many-shot jailbreaking examples.
     """
-    source = "https://raw.githubusercontent.com/KutalVolkan/many-shot-jailbreaking-dataset/5eac855/examples.json"
-    response = requests.get(source)
-    response.raise_for_status()
-    return cast("list[dict[str, str]]", response.json())
+    print_deprecation_message(
+        old_item=fetch_many_shot_jailbreaking_dataset,
+        new_item=load_many_shot_jailbreaking_dataset,
+        removed_in="0.14.0",
+    )
+    return load_many_shot_jailbreaking_dataset()
 
 
 class ManyShotJailbreakAttack(PromptSendingAttack):
@@ -87,7 +103,7 @@ def __init__(
         self._examples = (
             many_shot_examples[:example_count]
             if (many_shot_examples is not None)
-            else fetch_many_shot_jailbreaking_dataset()[:example_count]
+            else load_many_shot_jailbreaking_dataset()[:example_count]
         )
         if not self._examples:
             raise ValueError("Many shot examples must be provided.")
diff --git a/pyrit/executor/promptgen/fuzzer/fuzzer.py b/pyrit/executor/promptgen/fuzzer/fuzzer.py
@@ -832,7 +832,7 @@ async def _execute_generation_iteration_async(self, context: FuzzerContext) -> N
             raise
 
         # Create template node for tracking
-        target_template = SeedPrompt(value=target_seed, data_type="text", parameters=["prompt"])
+        target_template = SeedPrompt(value=target_seed, data_type="text", parameters=["prompt"], is_jinja_template=True)
         target_template_node = _PromptNode(template=target_seed, parent=None)
 
         # Generate prompts from template
diff --git a/pyrit/models/seeds/seed.py b/pyrit/models/seeds/seed.py
@@ -17,8 +17,11 @@
 from datetime import datetime, timezone
 from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
 
-from jinja2 import Environment, StrictUndefined, Template, Undefined
+import yaml
+from jinja2 import StrictUndefined, Undefined
+from jinja2.sandbox import SandboxedEnvironment
 
+from pyrit.common.utils import verify_and_resolve_path
 from pyrit.common.yaml_loadable import YamlLoadable
 
 if TYPE_CHECKING:
@@ -130,6 +133,11 @@ class Seed(YamlLoadable):
     # Whether this seed represents a general attack technique (not tied to a specific objective)
     is_general_technique: bool = False
 
+    # When True, value contains Jinja2 template syntax that should be rendered as-is.
+    # When False (default), value is treated as literal text and auto-escaped with {% raw %} tags
+    # to prevent template injection. Trusted sources (YAML files) set this to True automatically.
+    is_jinja_template: bool = False
+
     @property
     def data_type(self) -> PromptDataType:
         """
@@ -157,8 +165,9 @@ def render_template_value(self, **kwargs: Any) -> str:
         template_identifier = self.name or "<unnamed template>"
 
         try:
-            jinja_template = Template(self.value, undefined=StrictUndefined)
-            return jinja_template.render(**kwargs)
+            env = SandboxedEnvironment(undefined=StrictUndefined)
+            is_jinja_template = env.from_string(self.value)
+            return is_jinja_template.render(**kwargs)
         except Exception as e:
             raise ValueError(
                 f"Error rendering template '{template_identifier}': {str(e)}. "
@@ -194,12 +203,12 @@ def render_template_value_silent(self, **kwargs: Any) -> str:
                 return self.value
 
         # Create a Jinja template with PartialUndefined placeholders
-        env = Environment(undefined=PartialUndefined)
-        jinja_template = env.from_string(self.value)
+        env = SandboxedEnvironment(undefined=PartialUndefined)
+        is_jinja_template = env.from_string(self.value)
 
         try:
             # Render the template with the provided kwargs
-            return jinja_template.render(**kwargs)
+            return is_jinja_template.render(**kwargs)
         except Exception as e:
             logger.error("Error rendering template: %s", e)
             return self.value
@@ -221,6 +230,46 @@ async def set_sha256_value_async(self) -> None:
 
         self.value_sha256 = await original_serializer.get_sha256()
 
+    @staticmethod
+    def escape_for_jinja(value: str) -> str:
+        """
+        Wrap a string in Jinja2 {% raw %}...{% endraw %} tags to prevent template evaluation.
+
+        Use this for any untrusted or externally-fetched text that will be stored as a
+        Seed value, to ensure it is treated as literal text by the Jinja2 renderer.
+
+        Args:
+            value: The raw string to escape.
+
+        Returns:
+            str: The string wrapped in {% raw %}...{% endraw %} tags.
+        """
+        return f"{{% raw %}}{value}{{% endraw %}}"
+
+    @classmethod
+    def from_yaml_file(cls: type[T], file: Union[str, Path]) -> T:
+        """
+        Create a new Seed from a YAML file, marking it as a trusted Jinja2 template.
+
+        Args:
+            file: The input file path.
+
+        Returns:
+            A new Seed of the specific subclass type.
+
+        Raises:
+            ValueError: If the YAML file is invalid.
+        """
+        file = verify_and_resolve_path(file)
+
+        try:
+            yaml_data = yaml.safe_load(file.read_text("utf-8"))
+        except yaml.YAMLError as exc:
+            raise ValueError(f"Invalid YAML file '{file}': {exc}") from exc
+
+        yaml_data["is_jinja_template"] = True
+        return cls(**yaml_data)
+
     @classmethod
     @abc.abstractmethod
     def from_yaml_with_required_parameters(
diff --git a/pyrit/models/seeds/seed_dataset.py b/pyrit/models/seeds/seed_dataset.py
@@ -15,7 +15,10 @@
 from datetime import datetime, timezone
 from typing import TYPE_CHECKING, Any, Optional, Union
 
+import yaml
+
 from pyrit.common import utils
+from pyrit.common.utils import verify_and_resolve_path
 from pyrit.common.yaml_loadable import YamlLoadable
 from pyrit.models.seeds.seed_attack_group import SeedAttackGroup
 from pyrit.models.seeds.seed_group import SeedGroup
@@ -25,6 +28,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
+    from pathlib import Path
 
     from pydantic.types import PositiveInt
 
@@ -55,6 +59,31 @@ class SeedDataset(YamlLoadable):
     # Now the actual prompts
     seeds: Sequence[Seed]
 
+    @classmethod
+    def from_yaml_file(cls, file: Union[str, Path]) -> SeedDataset:
+        """
+        Create a SeedDataset from a YAML file, marking nested seeds as trusted templates.
+
+        Args:
+            file: The input file path.
+
+        Returns:
+            SeedDataset: The loaded dataset.
+
+        Raises:
+            ValueError: If the YAML file is invalid.
+        """
+        file = verify_and_resolve_path(file)
+        try:
+            yaml_data = yaml.safe_load(file.read_text("utf-8"))
+        except yaml.YAMLError as exc:
+            raise ValueError(f"Invalid YAML file '{file}': {exc}") from exc
+
+        yaml_data["is_jinja_template"] = True
+        if hasattr(cls, "from_dict") and callable(getattr(cls, "from_dict")):  # noqa: B009
+            return cls.from_dict(yaml_data)
+        return cls(**yaml_data)
+
     def __init__(
         self,
         *,
@@ -71,6 +100,7 @@ def __init__(
         added_by: Optional[str] = None,
         seed_type: Optional[SeedType] = None,
         is_objective: bool = False,  # Deprecated in 0.13.0: Use seed_type="objective" instead
+        is_jinja_template: bool = False,
     ):
         """
         Initialize the dataset.
@@ -93,6 +123,7 @@ def __init__(
             added_by: User who added the dataset.
             seed_type: The type of seeds in this dataset ("prompt", "objective", or "simulated_conversation").
             is_objective: Deprecated in 0.13.0. Use seed_type="objective" instead.
+            is_jinja_template: When True, seed values are Jinja2 templates. Set by from_yaml_file.
 
         Raises:
             ValueError: If seeds are missing or contain invalid/contradictory seed definitions.
@@ -168,6 +199,7 @@ def __init__(
                     "added_by": p.get("added_by"),
                     "metadata": p.get("metadata", {}),
                     "prompt_group_id": p.get("prompt_group_id"),
+                    "is_jinja_template": is_jinja_template,
                 }
 
                 if effective_type == "simulated_conversation":
diff --git a/pyrit/models/seeds/seed_group.py b/pyrit/models/seeds/seed_group.py
@@ -49,6 +49,7 @@ def __init__(
         self,
         *,
         seeds: Sequence[Union[Seed, dict[str, Any]]],
+        is_jinja_template: bool = False,
     ):
         """
         Initialize a SeedGroup.
@@ -59,6 +60,8 @@ def __init__(
                 - SeedSimulatedConversation (or dict with seed_type="simulated_conversation")
                 - SeedPrompt for prompts (or dict with seed_type="prompt" or no seed_type)
                 Note: is_objective and is_simulated_conversation are deprecated since 0.13.0.
+            is_jinja_template: When True, seed values are treated as Jinja2 templates.
+                Set automatically by from_yaml_file for trusted sources.
 
         Raises:
             ValueError: If seeds is empty.
@@ -74,6 +77,7 @@ def __init__(
             if isinstance(seed, Seed):
                 self.seeds.append(seed)
             elif isinstance(seed, dict):
+                seed["is_jinja_template"] = is_jinja_template
                 # Support new seed_type field with backward compatibility for deprecated fields
                 seed_type = seed.pop("seed_type", None)
                 is_objective = seed.pop("is_objective", False)
diff --git a/pyrit/models/seeds/seed_objective.py b/pyrit/models/seeds/seed_objective.py
@@ -35,6 +35,8 @@ def __post_init__(self) -> None:
         """
         if self.is_general_technique:
             raise ValueError("SeedObjective cannot be a general technique.")
+        if not self.is_jinja_template:
+            self.value = self.escape_for_jinja(self.value)
         self.value = super().render_template_value_silent(**PATHS_DICT)
 
     @classmethod
diff --git a/pyrit/models/seeds/seed_prompt.py b/pyrit/models/seeds/seed_prompt.py
diff --git a/tests/unit/executor/attack/single_turn/test_many_shot_jailbreak.py b/tests/unit/executor/attack/single_turn/test_many_shot_jailbreak.py
diff --git a/tests/unit/models/test_seed.py b/tests/unit/models/test_seed.py
diff --git a/tests/unit/scenarios/test_jailbreak.py b/tests/unit/scenarios/test_jailbreak.py