refactor: use Metadata map instead of dedicated proto fields for structured output

eureka0928 · eureka0928 · commit 278e7e25de97 · 2026-03-13T19:14:07.000+01:00
Address review feedback: - Remove JSONSchema and ResponseFormat proto fields; pass them via the existing Metadata map instead, avoiding proto changes - vLLM backend reads json_schema and response_format from request.Metadata - Add structured output support (json_schema, json_object) to Open Responses API via text_format parameter - Update docs with Open Responses structured output examples Ref: #6857 Signed-off-by: eureka928 <meobius123@gmail.com>
diff --git a/backend/backend.proto b/backend/backend.proto
@@ -162,9 +162,7 @@ message PredictOptions {
   string ToolChoice = 49;  // JSON string or object specifying tool choice behavior
   int32 Logprobs = 50;  // Number of top logprobs to return (maps to OpenAI logprobs parameter)
   int32 TopLogprobs = 51;  // Number of top logprobs to return per token (maps to OpenAI top_logprobs parameter)
-  map<string, string> Metadata = 52;  // Generic per-request metadata (e.g., enable_thinking)
-  string JSONSchema = 53;  // Raw JSON schema string for backends that support native structured output (e.g. vLLM guided decoding)
-  string ResponseFormat = 54;  // Response format type: "json_object", "json_schema", or empty
+  map<string, string> Metadata = 52;  // Generic per-request metadata (e.g., enable_thinking, json_schema, response_format)
 }
 
 // The response message containing the result
diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py
@@ -246,11 +246,13 @@ async def _predict(self, request, context, streaming=False):
                     setattr(sampling_params, param_field, value)
 
         # Handle structured output via guided decoding / structured outputs
+        # Read json_schema and response_format from Metadata map (avoids extra proto fields)
         if _structured_output_cls is not None:
+            metadata = dict(request.Metadata) if hasattr(request, 'Metadata') and request.Metadata else {}
             constraint = None
-            if hasattr(request, 'JSONSchema') and request.JSONSchema:
-                constraint = _structured_output_cls(json=request.JSONSchema)
-            elif hasattr(request, 'ResponseFormat') and request.ResponseFormat == "json_object":
+            if metadata.get("json_schema"):
+                constraint = _structured_output_cls(json=metadata["json_schema"])
+            elif metadata.get("response_format") == "json_object":
                 constraint = _structured_output_cls(json_object=True)
             elif hasattr(request, 'Grammar') and request.Grammar:
                 constraint = _structured_output_cls(grammar=request.Grammar)
diff --git a/core/backend/options.go b/core/backend/options.go
@@ -253,8 +253,6 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
 		TensorSplit:         c.TensorSplit,
 		TailFreeSamplingZ:   float32(*c.TFZ),
 		TypicalP:            float32(*c.TypicalP),
-		JSONSchema:          c.JSONSchema,
-		ResponseFormat:      c.ResponseFormat,
 	}
 
 	metadata := map[string]string{}
@@ -265,6 +263,12 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
 			metadata["enable_thinking"] = "true"
 		}
 	}
+	if c.ResponseFormat != "" {
+		metadata["response_format"] = c.ResponseFormat
+	}
+	for k, v := range c.RequestMetadata {
+		metadata[k] = v
+	}
 	pbOpts.Metadata = metadata
 
 	// Logprobs and TopLogprobs are set by the caller if provided
diff --git a/core/config/model_config.go b/core/config/model_config.go
@@ -51,7 +51,7 @@ type ModelConfig struct {
 	functionCallString, functionCallNameString string                 `yaml:"-" json:"-"`
 	ResponseFormat                             string                 `yaml:"-" json:"-"`
 	ResponseFormatMap                          map[string]interface{} `yaml:"-" json:"-"`
-	JSONSchema                                 string                 `yaml:"-" json:"-"`
+	RequestMetadata                            map[string]string      `yaml:"-" json:"-"`
 
 	FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
 	ReasoningConfig reasoning.Config          `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
@@ -443,10 +443,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
 					return err
 				}
 
-				// Pass raw JSON schema to backends that support native structured output
+				// Pass raw JSON schema via metadata for backends that support native structured output
 				schemaBytes, err := json.Marshal(d.JsonSchema.Schema)
 				if err == nil {
-					config.JSONSchema = string(schemaBytes)
+					if config.RequestMetadata == nil {
+						config.RequestMetadata = map[string]string{}
+					}
+					config.RequestMetadata["json_schema"] = string(schemaBytes)
 				}
 
 				fs := &functions.JSONFunctionStructure{
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
@@ -99,7 +99,10 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 					if err := json.Unmarshal(dat, &jsr); err == nil {
 						schemaBytes, err := json.Marshal(jsr.JsonSchema.Schema)
 						if err == nil {
-							config.JSONSchema = string(schemaBytes)
+							if config.RequestMetadata == nil {
+								config.RequestMetadata = map[string]string{}
+							}
+							config.RequestMetadata["json_schema"] = string(schemaBytes)
 						}
 						fs := &functions.JSONFunctionStructure{
 							AnyOf: []functions.Item{jsr.JsonSchema.Schema},
diff --git a/core/http/endpoints/openresponses/responses.go b/core/http/endpoints/openresponses/responses.go
@@ -128,9 +128,42 @@ func ResponsesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval
 			Functions: funcs,
 		}
 
-		// Handle text_format -> response_format conversion
+		// Handle text_format -> response_format conversion and structured output
 		if input.TextFormat != nil {
-			openAIReq.ResponseFormat = convertTextFormatToResponseFormat(input.TextFormat)
+			responseFormat := convertTextFormatToResponseFormat(input.TextFormat)
+			openAIReq.ResponseFormat = responseFormat
+
+			// Generate grammar and pass schema for structured output (like OpenAI chat/completion)
+			if rfMap, ok := responseFormat.(map[string]interface{}); ok {
+				if rfType, _ := rfMap["type"].(string); rfType == "json_object" {
+					cfg.Grammar = functions.JSONBNF
+					cfg.ResponseFormat = "json_object"
+				} else if rfType == "json_schema" {
+					cfg.ResponseFormat = "json_schema"
+					d := schema.JsonSchemaRequest{}
+					dat, err := json.Marshal(rfMap)
+					if err == nil {
+						if err := json.Unmarshal(dat, &d); err == nil {
+							schemaBytes, err := json.Marshal(d.JsonSchema.Schema)
+							if err == nil {
+								if cfg.RequestMetadata == nil {
+									cfg.RequestMetadata = map[string]string{}
+								}
+								cfg.RequestMetadata["json_schema"] = string(schemaBytes)
+							}
+							fs := &functions.JSONFunctionStructure{
+								AnyOf: []functions.Item{d.JsonSchema.Schema},
+							}
+							g, err := fs.Grammar(cfg.FunctionsConfig.GrammarOptions()...)
+							if err == nil {
+								cfg.Grammar = g
+							} else {
+								xlog.Error("Open Responses - Failed generating grammar for json_schema", "error", err)
+							}
+						}
+					}
+				}
+			}
 		}
 
 		// Generate grammar for function calling (similar to OpenAI chat endpoint)
diff --git a/docs/content/features/constrained_grammars.md b/docs/content/features/constrained_grammars.md
@@ -123,6 +123,43 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 }'
 ```
 
+## Open Responses API
+
+The Open Responses API (`/v1/responses`) also supports structured output via the `text_format` parameter:
+
+### JSON Schema
+
+```bash
+curl http://localhost:8080/v1/responses -H "Content-Type: application/json" -d '{
+  "model": "my-model",
+  "input": "Generate a person object",
+  "text_format": {
+    "type": "json_schema",
+    "json_schema": {
+      "name": "person",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "name": {"type": "string"},
+          "age": {"type": "integer"}
+        },
+        "required": ["name", "age"]
+      }
+    }
+  }
+}'
+```
+
+### JSON Object
+
+```bash
+curl http://localhost:8080/v1/responses -H "Content-Type: application/json" -d '{
+  "model": "my-model",
+  "input": "Generate a person as JSON",
+  "text_format": {"type": "json_object"}
+}'
+```
+
 ## Related Features
 
 - [OpenAI Functions]({{%relref "features/openai-functions" %}}) - Function calling with structured outputs

Original file line number	Diff line number	Diff line change
`@@ -253,8 +253,6 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions`
`253`	`253`	`TensorSplit: c.TensorSplit,`
`254`	`254`	`TailFreeSamplingZ: float32(*c.TFZ),`
`255`	`255`	`TypicalP: float32(*c.TypicalP),`
`256`		`- JSONSchema: c.JSONSchema,`
`257`		`- ResponseFormat: c.ResponseFormat,`
`258`	`256`	`}`
`259`	`257`
`260`	`258`	`metadata := map[string]string{}`
`@@ -265,6 +263,12 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions`
`265`	`263`	`metadata["enable_thinking"] = "true"`
`266`	`264`	`}`
`267`	`265`	`}`
	`266`	`+ if c.ResponseFormat != "" {`
	`267`	`+ metadata["response_format"] = c.ResponseFormat`
	`268`	`+ }`
	`269`	`+ for k, v := range c.RequestMetadata {`
	`270`	`+ metadata[k] = v`
	`271`	`+ }`
`268`	`272`	`pbOpts.Metadata = metadata`
`269`	`273`
`270`	`274`	`// Logprobs and TopLogprobs are set by the caller if provided`
Original file line number	Diff line number	Diff line change
`@@ -443,10 +443,13 @@ func ChatEndpoint(cl config.ModelConfigLoader, ml model.ModelLoader, evaluator`
`443`	`443`	`return err`
`444`	`444`	`}`
`445`	`445`
`446`		`- // Pass raw JSON schema to backends that support native structured output`
	`446`	`+ // Pass raw JSON schema via metadata for backends that support native structured output`
`447`	`447`	`schemaBytes, err := json.Marshal(d.JsonSchema.Schema)`
`448`	`448`	`if err == nil {`
`449`		`- config.JSONSchema = string(schemaBytes)`
	`449`	`+ if config.RequestMetadata == nil {`
	`450`	`+ config.RequestMetadata = map[string]string{}`
	`451`	`+ }`
	`452`	`+ config.RequestMetadata["json_schema"] = string(schemaBytes)`
`450`	`453`	`}`
`451`	`454`
`452`	`455`	`fs := &functions.JSONFunctionStructure{`