Skip to content

Commit 7548a52

Browse files
authored
Move RequestBody from scheduling to requesthandling (kubernetes-sigs/gateway-api-inference-extension#2808)
Co-authored-by: Ryan Rosario <6713180+RyanRosario@users.noreply.github.com>
1 parent ac378c0 commit 7548a52

24 files changed

Lines changed: 698 additions & 673 deletions

File tree

pkg/epp/framework/interface/requestcontrol/types.go

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package requestcontrol
1818

1919
import (
2020
"google.golang.org/protobuf/types/known/structpb"
21+
22+
requesthandling "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/framework/interface/requesthandling"
2123
)
2224

2325
// Response contains information from the response received to be passed to the Response requestcontrol plugins
@@ -33,19 +35,8 @@ type Response struct {
3335
// Currently, this is only used by conformance test.
3436
ReqMetadata map[string]any
3537
// Token usage counts parsed from the response body.
36-
Usage Usage
38+
Usage requesthandling.Usage
3739
// DynamicMetadata is a map of metadata that can be passed to the Envoy. It is populated into the dynamic
3840
// metadata when processing ProcessingResponse_RequestHeaders.
3941
DynamicMetadata *structpb.Struct
4042
}
41-
42-
type Usage struct {
43-
PromptTokens int `json:"prompt_tokens"`
44-
CompletionTokens int `json:"completion_tokens"`
45-
TotalTokens int `json:"total_tokens"`
46-
PromptTokenDetails *PromptTokenDetails `json:"prompt_token_details,omitempty"`
47-
}
48-
49-
type PromptTokenDetails struct {
50-
CachedTokens int `json:"cached_tokens"`
51-
}

pkg/epp/framework/interface/requesthandling/plugins.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,13 @@ import (
2121

2222
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
2323
fwkplugin "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/framework/interface/plugin"
24-
25-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/framework/interface/requestcontrol"
26-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/framework/interface/scheduling"
2724
)
2825

2926
// Parser defines the interface for parsing payload(requests and responses).
3027
type Parser interface {
3128
fwkplugin.Plugin
3229
// ParseRequest parses the request body and headers and returns a map representation.
33-
ParseRequest(ctx context.Context, body []byte, headers map[string]string) (*scheduling.InferenceRequestBody, error)
30+
ParseRequest(ctx context.Context, body []byte, headers map[string]string) (*InferenceRequestBody, error)
3431

3532
// ParseResponse parses the response payload.
3633
// For streaming responses , this method is invoked multiple times (once per chunk),
@@ -46,5 +43,5 @@ type Parser interface {
4643

4744
type ParsedResponse struct {
4845
// Usage is only populate when the raw response has usage.
49-
Usage *requestcontrol.Usage
46+
Usage *Usage
5047
}

0 commit comments

Comments
 (0)