Skip to content

Commit 278e7e2

Browse files
committed
refactor: use Metadata map instead of dedicated proto fields for structured output
Address review feedback: - Remove JSONSchema and ResponseFormat proto fields; pass them via the existing Metadata map instead, avoiding proto changes - vLLM backend reads json_schema and response_format from request.Metadata - Add structured output support (json_schema, json_object) to Open Responses API via text_format parameter - Update docs with Open Responses structured output examples Ref: #6857 Signed-off-by: eureka928 <meobius123@gmail.com>
1 parent bb08454 commit 278e7e2

8 files changed

Lines changed: 94 additions & 14 deletions

File tree

backend/backend.proto

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,7 @@ message PredictOptions {
162162
string ToolChoice = 49; // JSON string or object specifying tool choice behavior
163163
int32 Logprobs = 50; // Number of top logprobs to return (maps to OpenAI logprobs parameter)
164164
int32 TopLogprobs = 51; // Number of top logprobs to return per token (maps to OpenAI top_logprobs parameter)
165-
map<string, string> Metadata = 52; // Generic per-request metadata (e.g., enable_thinking)
166-
string JSONSchema = 53; // Raw JSON schema string for backends that support native structured output (e.g. vLLM guided decoding)
167-
string ResponseFormat = 54; // Response format type: "json_object", "json_schema", or empty
165+
map<string, string> Metadata = 52; // Generic per-request metadata (e.g., enable_thinking, json_schema, response_format)
168166
}
169167

170168
// The response message containing the result

backend/python/vllm/backend.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,13 @@ async def _predict(self, request, context, streaming=False):
246246
setattr(sampling_params, param_field, value)
247247

248248
# Handle structured output via guided decoding / structured outputs
249+
# Read json_schema and response_format from Metadata map (avoids extra proto fields)
249250
if _structured_output_cls is not None:
251+
metadata = dict(request.Metadata) if hasattr(request, 'Metadata') and request.Metadata else {}
250252
constraint = None
251-
if hasattr(request, 'JSONSchema') and request.JSONSchema:
252-
constraint = _structured_output_cls(json=request.JSONSchema)
253-
elif hasattr(request, 'ResponseFormat') and request.ResponseFormat == "json_object":
253+
if metadata.get("json_schema"):
254+
constraint = _structured_output_cls(json=metadata["json_schema"])
255+
elif metadata.get("response_format") == "json_object":
254256
constraint = _structured_output_cls(json_object=True)
255257
elif hasattr(request, 'Grammar') and request.Grammar:
256258
constraint = _structured_output_cls(grammar=request.Grammar)

core/backend/options.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,6 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
253253
TensorSplit: c.TensorSplit,
254254
TailFreeSamplingZ: float32(*c.TFZ),
255255
TypicalP: float32(*c.TypicalP),
256-
JSONSchema: c.JSONSchema,
257-
ResponseFormat: c.ResponseFormat,
258256
}
259257

260258
metadata := map[string]string{}
@@ -265,6 +263,12 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
265263
metadata["enable_thinking"] = "true"
266264
}
267265
}
266+
if c.ResponseFormat != "" {
267+
metadata["response_format"] = c.ResponseFormat
268+
}
269+
for k, v := range c.RequestMetadata {
270+
metadata[k] = v
271+
}
268272
pbOpts.Metadata = metadata
269273

270274
// Logprobs and TopLogprobs are set by the caller if provided

core/config/model_config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ type ModelConfig struct {
5151
functionCallString, functionCallNameString string `yaml:"-" json:"-"`
5252
ResponseFormat string `yaml:"-" json:"-"`
5353
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
54-
JSONSchema string `yaml:"-" json:"-"`
54+
RequestMetadata map[string]string `yaml:"-" json:"-"`
5555

5656
FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
5757
ReasoningConfig reasoning.Config `yaml:"reasoning,omitempty" json:"reasoning,omitempty"`

core/http/endpoints/openai/chat.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -443,10 +443,13 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
443443
return err
444444
}
445445

446-
// Pass raw JSON schema to backends that support native structured output
446+
// Pass raw JSON schema via metadata for backends that support native structured output
447447
schemaBytes, err := json.Marshal(d.JsonSchema.Schema)
448448
if err == nil {
449-
config.JSONSchema = string(schemaBytes)
449+
if config.RequestMetadata == nil {
450+
config.RequestMetadata = map[string]string{}
451+
}
452+
config.RequestMetadata["json_schema"] = string(schemaBytes)
450453
}
451454

452455
fs := &functions.JSONFunctionStructure{

core/http/endpoints/openai/completion.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,10 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
9999
if err := json.Unmarshal(dat, &jsr); err == nil {
100100
schemaBytes, err := json.Marshal(jsr.JsonSchema.Schema)
101101
if err == nil {
102-
config.JSONSchema = string(schemaBytes)
102+
if config.RequestMetadata == nil {
103+
config.RequestMetadata = map[string]string{}
104+
}
105+
config.RequestMetadata["json_schema"] = string(schemaBytes)
103106
}
104107
fs := &functions.JSONFunctionStructure{
105108
AnyOf: []functions.Item{jsr.JsonSchema.Schema},

core/http/endpoints/openresponses/responses.go

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,42 @@ func ResponsesEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval
128128
Functions: funcs,
129129
}
130130

131-
// Handle text_format -> response_format conversion
131+
// Handle text_format -> response_format conversion and structured output
132132
if input.TextFormat != nil {
133-
openAIReq.ResponseFormat = convertTextFormatToResponseFormat(input.TextFormat)
133+
responseFormat := convertTextFormatToResponseFormat(input.TextFormat)
134+
openAIReq.ResponseFormat = responseFormat
135+
136+
// Generate grammar and pass schema for structured output (like OpenAI chat/completion)
137+
if rfMap, ok := responseFormat.(map[string]interface{}); ok {
138+
if rfType, _ := rfMap["type"].(string); rfType == "json_object" {
139+
cfg.Grammar = functions.JSONBNF
140+
cfg.ResponseFormat = "json_object"
141+
} else if rfType == "json_schema" {
142+
cfg.ResponseFormat = "json_schema"
143+
d := schema.JsonSchemaRequest{}
144+
dat, err := json.Marshal(rfMap)
145+
if err == nil {
146+
if err := json.Unmarshal(dat, &d); err == nil {
147+
schemaBytes, err := json.Marshal(d.JsonSchema.Schema)
148+
if err == nil {
149+
if cfg.RequestMetadata == nil {
150+
cfg.RequestMetadata = map[string]string{}
151+
}
152+
cfg.RequestMetadata["json_schema"] = string(schemaBytes)
153+
}
154+
fs := &functions.JSONFunctionStructure{
155+
AnyOf: []functions.Item{d.JsonSchema.Schema},
156+
}
157+
g, err := fs.Grammar(cfg.FunctionsConfig.GrammarOptions()...)
158+
if err == nil {
159+
cfg.Grammar = g
160+
} else {
161+
xlog.Error("Open Responses - Failed generating grammar for json_schema", "error", err)
162+
}
163+
}
164+
}
165+
}
166+
}
134167
}
135168

136169
// Generate grammar for function calling (similar to OpenAI chat endpoint)

docs/content/features/constrained_grammars.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,43 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
123123
}'
124124
```
125125

126+
## Open Responses API
127+
128+
The Open Responses API (`/v1/responses`) also supports structured output via the `text_format` parameter:
129+
130+
### JSON Schema
131+
132+
```bash
133+
curl http://localhost:8080/v1/responses -H "Content-Type: application/json" -d '{
134+
"model": "my-model",
135+
"input": "Generate a person object",
136+
"text_format": {
137+
"type": "json_schema",
138+
"json_schema": {
139+
"name": "person",
140+
"schema": {
141+
"type": "object",
142+
"properties": {
143+
"name": {"type": "string"},
144+
"age": {"type": "integer"}
145+
},
146+
"required": ["name", "age"]
147+
}
148+
}
149+
}
150+
}'
151+
```
152+
153+
### JSON Object
154+
155+
```bash
156+
curl http://localhost:8080/v1/responses -H "Content-Type: application/json" -d '{
157+
"model": "my-model",
158+
"input": "Generate a person as JSON",
159+
"text_format": {"type": "json_object"}
160+
}'
161+
```
162+
126163
## Related Features
127164

128165
- [OpenAI Functions]({{%relref "features/openai-functions" %}}) - Function calling with structured outputs

0 commit comments

Comments
 (0)