feat: Add support for refusal messages in ApigeeLlm

google-genai-bot · copybara-github · commit d6594a1a2c11 · 2026-04-17T13:56:29.000-07:00
If content and refusal chunks are interleaved, this will drop the remaining content chunks after the first refusal chunk appears.

PiperOrigin-RevId: 901457248
diff --git a/src/google/adk/models/apigee_llm.py b/src/google/adk/models/apigee_llm.py
@@ -60,6 +60,8 @@
     'object',
 )
 
+_REFUSAL_PREFIX = '[[REFUSAL]]: '
+
 
 class ApigeeLlm(Gemini):
   """A BaseLlm implementation for calling Apigee proxy.
@@ -658,11 +660,14 @@ def _content_to_messages(
 
     tool_calls = []
     content_parts = []
+    refusals: list[str] = []
 
     function_responses = []
 
     for part in content.parts or []:
-      self._process_content_part(content, part, tool_calls, content_parts)
+      self._process_content_part(
+          content, part, tool_calls, content_parts, refusals
+      )
       if part.function_response:
         function_responses.append({
             'role': 'tool',
@@ -673,6 +678,8 @@ def _content_to_messages(
       return function_responses
 
     message = {'role': role}
+    if refusals:
+      message['refusal'] = '\n'.join(refusals)
     if tool_calls:
       message['tool_calls'] = tool_calls
       if not content_parts:
@@ -691,6 +698,7 @@ def _process_content_part(
       part: types.Part,
       tool_calls: list[dict[str, Any]],
       content_parts: list[dict[str, Any]],
+      refusals: list[str],
   ) -> None:
     """Processes a single Part and updates tool_calls or content_parts."""
     if content.role != 'user' and (
@@ -731,7 +739,14 @@ def _process_content_part(
       # Handled in the loop to return immediately
       pass
     elif part.text:
-      content_parts.append({'type': 'text', 'text': part.text})
+      if part.text.startswith(_REFUSAL_PREFIX):
+        refusals.append(part.text.removeprefix(_REFUSAL_PREFIX))
+      else:
+        before, sep, after = part.text.partition('\n' + _REFUSAL_PREFIX)
+        if sep:
+          refusals.append(after)
+        if before:
+          content_parts.append({'type': 'text', 'text': before})
     elif part.inline_data:
       mime_type = part.inline_data.mime_type
       data = base64.b64encode(part.inline_data.data).decode('utf-8')
@@ -843,6 +858,7 @@ def __init__(self):
     self.usage = {}
     self.logprobs = {}
     self.custom_metadata = {}
+    self._refusal_started = False
 
   def process_response(self, response: dict[str, Any]) -> LlmResponse:
     """Processes a complete non-streaming response."""
@@ -989,19 +1005,49 @@ def _accumulate_logprobs(self, logprobs_chunk: dict[str, Any]) -> None:
         self.logprobs['refusal'] = []
       self.logprobs['refusal'].extend(logprobs_chunk['refusal'])
 
-  def _append_content(self, content: str, refusal: str) -> str:
-    if content and refusal:
-      content += '\n'
-      content += refusal
-    elif refusal:
-      content = refusal
+  def _accumulate_content(self, choice: dict[str, Any]) -> str:
+    """Processes a message or delta chunk to accumulate content and refusals.
+
+    This method extracts 'content' and 'refusal' from the chunk, updates the
+    accumulated state (self.content_parts), and returns the text content for
+    this chunk (handling prefixes and newlines if it's a refusal).
+
+    Args:
+      choice: A dictionary representing a message choice or a streaming delta.
+
+    Returns:
+      The text content to be appended or yielded for this chunk.
+    """
+    content = choice.get('content', '')
+    refusal = choice.get('refusal', '')
+
+    if content and self._refusal_started:
+      logging.warning(
+          'Received content after refusal has started. Dropping content.'
+      )
+      content = ''
+
+    chunk_text = ''
     if content:
-      self.content_parts += content
-    return content
+      chunk_text += content
+
+    if refusal and not self._refusal_started:
+      self._refusal_started = True
+      if self.content_parts or chunk_text:
+        chunk_text += '\n'
+      chunk_text += _REFUSAL_PREFIX
+
+    if refusal:
+      chunk_text += refusal
+
+    if chunk_text:
+      self.content_parts += chunk_text
+
+    return chunk_text
 
   def _add_chat_completion_chunk_delta(
       self, delta: dict[str, Any]
-  ) -> (list[types.Part], str):
+  ) -> tuple[list[types.Part], str]:
     """Adds a chunk delta from a streaming chat completions response.
 
     This method processes a single delta chunk from a streaming chat completions
@@ -1021,9 +1067,7 @@ def _add_chat_completion_chunk_delta(
     for tool_call in delta.get('tool_calls', []):
       chunk_part = self._upsert_tool_call(tool_call)
       parts.append(chunk_part)
-    content = delta.get('content')
-    refusal = delta.get('refusal')
-    merged_content = self._append_content(content, refusal)
+    merged_content = self._accumulate_content(delta)
     if merged_content:
       parts.append(types.Part.from_text(text=merged_content))
 
@@ -1057,9 +1101,7 @@ def _add_chat_completion_message(
           'type': 'function',
           'function': function_call,
       })
-    content = message.get('content')
-    refusal = message.get('refusal')
-    self._append_content(content, refusal)
+    self._accumulate_content(message)
 
     self._get_or_create_role(message.get('role', 'model'))
     return self._get_content_parts(), self.role
diff --git a/tests/unittests/models/test_apigee_llm.py b/tests/unittests/models/test_apigee_llm.py
@@ -649,3 +649,86 @@ def test_parse_response_usage_metadata():
   assert llm_response.usage_metadata.candidates_token_count == 5
   assert llm_response.usage_metadata.total_token_count == 15
   assert llm_response.usage_metadata.thoughts_token_count == 4
+
+
+def test_parse_response_with_refusal():
+  """Tests that CompletionsHTTPClient parses refusal correctly."""
+  client = CompletionsHTTPClient(base_url='http://test')
+
+  response_dict = {
+      'choices': [{
+          'message': {
+              'role': 'assistant',
+              'refusal': 'I refuse to answer',
+          },
+          'finish_reason': 'stop',
+      }],
+  }
+  llm_response = client._parse_response(response_dict)
+  assert len(llm_response.content.parts) == 1
+  assert llm_response.content.parts[0].text == '[[REFUSAL]]: I refuse to answer'
+
+  response_dict_mixed = {
+      'choices': [{
+          'message': {
+              'role': 'assistant',
+              'content': 'Here is some content',
+              'refusal': 'But I refuse to answer the rest',
+          },
+          'finish_reason': 'stop',
+      }],
+  }
+  llm_response_mixed = client._parse_response(response_dict_mixed)
+  assert len(llm_response_mixed.content.parts) == 1
+  assert (
+      llm_response_mixed.content.parts[0].text
+      == 'Here is some content\n[[REFUSAL]]: But I refuse to answer the rest'
+  )
+
+
+@pytest.mark.parametrize(
+    ('parts', 'expected_message'),
+    [
+        (
+            [
+                types.Part.from_text(text='[[REFUSAL]]: I refuse to answer'),
+                types.Part.from_text(text='normal content'),
+            ],
+            {
+                'role': 'assistant',
+                'refusal': 'I refuse to answer',
+                'content': 'normal content',
+            },
+        ),
+        (
+            [
+                types.Part.from_text(
+                    text=(
+                        'Here is some content\n[[REFUSAL]]: But I refuse to'
+                        ' answer the rest'
+                    )
+                ),
+            ],
+            {
+                'role': 'assistant',
+                'refusal': 'But I refuse to answer the rest',
+                'content': 'Here is some content',
+            },
+        ),
+    ],
+)
+def test_construct_payload_with_refusal(parts, expected_message):
+  """Tests that CompletionsHTTPClient constructs payload with refusal correctly."""
+  client = CompletionsHTTPClient(base_url='http://test')
+  req = LlmRequest(
+      model='apigee/openai/gpt-4o',
+      contents=[
+          types.Content(
+              role='model',
+              parts=parts,
+          )
+      ],
+  )
+  payload = client._construct_payload(req, stream=False)
+  messages = payload['messages']
+  assert messages == [expected_message]
diff --git a/tests/unittests/models/test_completions_http_client.py b/tests/unittests/models/test_completions_http_client.py
@@ -16,6 +16,7 @@
 from unittest import mock
 from unittest.mock import AsyncMock
 
+from google.adk.models.apigee_llm import ChatCompletionsResponseHandler
 from google.adk.models.apigee_llm import CompletionsHTTPClient
 from google.adk.models.llm_request import LlmRequest
 from google.genai import types
@@ -771,3 +772,60 @@ async def mock_aiter_lines():
     ]
     assert len(responses) == expected_response_count
     assert responses[0].content.parts[0].text == 'Hello'
+
+
+def test_process_chunk_with_refusal_streaming():
+  handler = ChatCompletionsResponseHandler()
+
+  chunk1 = {
+      'choices': [{
+          'delta': {
+              'role': 'assistant',
+              'content': 'Hello',
+          },
+          'index': 0,
+      }]
+  }
+  responses1 = list(handler.process_chunk(chunk1))
+  assert len(responses1) == 1
+  assert responses1[0].content.parts[0].text == 'Hello'
+
+  chunk2 = {
+      'choices': [{
+          'delta': {
+              'refusal': 'I refuse',
+          },
+          'index': 0,
+      }]
+  }
+  responses2 = list(handler.process_chunk(chunk2))
+  assert len(responses2) == 1
+  assert responses2[0].content.parts[0].text == '\n[[REFUSAL]]: I refuse'
+
+  chunk3 = {
+      'choices': [{
+          'delta': {
+              'refusal': ' to answer',
+          },
+          'index': 0,
+      }]
+  }
+  responses3 = list(handler.process_chunk(chunk3))
+  assert len(responses3) == 1
+  assert responses3[0].content.parts[0].text == ' to answer'
+
+  chunk4 = {
+      'choices': [{
+          'delta': {},
+          'finish_reason': 'stop',
+          'index': 0,
+      }]
+  }
+  responses4 = list(handler.process_chunk(chunk4))
+  assert len(responses4) == 2
+  final_response = responses4[1]
+  assert final_response.finish_reason == types.FinishReason.STOP
+  assert (
+      final_response.content.parts[0].text
+      == 'Hello\n[[REFUSAL]]: I refuse to answer'
+  )