Skip to content

Commit d210e2b

Browse files
Merge pull request #131 from Portkey-AI/update/max-completion-tokens-and-gpt5-models
Update OpenAPI specification to use gpt-5 model and replace max_token…
2 parents f8ef354 + bc50f68 commit d210e2b

1 file changed

Lines changed: 76 additions & 27 deletions

File tree

openapi.yaml

Lines changed: 76 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ paths:
190190
-H "x-portkey-api-key: $PORTKEY_API_KEY" \
191191
-H "x-portkey-virtual-key: $PORTKEY_PROVIDER_VIRTUAL_KEY" \
192192
-d '{
193-
"model": "gpt-4o",
193+
"model": "gpt-5",
194194
"messages": [
195195
{
196196
"role": "system",
@@ -200,7 +200,8 @@ paths:
200200
"role": "user",
201201
"content": "Hello!"
202202
}
203-
]
203+
],
204+
"max_completion_tokens": 250
204205
}'
205206
- lang: cURL
206207
label: Self-Hosted
@@ -210,7 +211,7 @@ paths:
210211
-H "x-portkey-api-key: $PORTKEY_API_KEY" \
211212
-H "x-portkey-virtual-key: $PORTKEY_PROVIDER_VIRTUAL_KEY" \
212213
-d '{
213-
"model": "gpt-4o",
214+
"model": "gpt-5",
214215
"messages": [
215216
{
216217
"role": "system",
@@ -220,7 +221,8 @@ paths:
220221
"role": "user",
221222
"content": "Hello!"
222223
}
223-
]
224+
],
225+
"max_completion_tokens": 250
224226
}'
225227
- lang: python
226228
label: Default
@@ -233,11 +235,12 @@ paths:
233235
)
234236

235237
response = portkey.chat.completions.create(
236-
model="gpt-4o",
238+
model="gpt-5",
237239
messages=[
238240
{"role": "system", "content": "You are a helpful assistant."},
239241
{"role": "user", "content": "Hello!"}
240-
]
242+
],
243+
max_completion_tokens=250
241244
)
242245

243246
print(response.choices[0].message)
@@ -253,11 +256,12 @@ paths:
253256
)
254257

255258
response = portkey.chat.completions.create(
256-
model="gpt-4o",
259+
model="gpt-5",
257260
messages=[
258261
{"role": "system", "content": "You are a helpful assistant."},
259262
{"role": "user", "content": "Hello!"}
260-
]
263+
],
264+
max_completion_tokens=250
261265
)
262266

263267
print(response.choices[0].message)
@@ -274,7 +278,8 @@ paths:
274278
async function main() {
275279
const response = await portkey.chat.completions.create({
276280
messages: [{ role: "system", content: "You are a helpful assistant." }],
277-
model: "gpt-4o",
281+
model: "gpt-5",
282+
max_completion_tokens: 250,
278283
});
279284

280285
console.log(response.choices[0]);
@@ -295,7 +300,8 @@ paths:
295300
async function main() {
296301
const response = await portkey.chat.completions.create({
297302
messages: [{ role: "system", content: "You are a helpful assistant." }],
298-
model: "gpt-4o",
303+
model: "gpt-5",
304+
max_completion_tokens: 250,
299305
});
300306

301307
console.log(response.choices[0]);
@@ -1803,7 +1809,7 @@ paths:
18031809
required:
18041810
- variables
18051811
description: |
1806-
Note: Although hyperparameters are shown grouped here (like messages, max_tokens, temperature, etc.), they should only be passed at the root level, alongside 'variables' and 'stream'.
1812+
Note: Although hyperparameters are shown grouped here (like messages, max_completion_tokens, temperature, etc.), they should only be passed at the root level, alongside 'variables' and 'stream'. The `max_tokens` parameter is deprecated — use `max_completion_tokens` instead.
18071813
properties:
18081814
variables:
18091815
type: object
@@ -1853,7 +1859,7 @@ paths:
18531859
"variables": {
18541860
"user_input": "Hello world"
18551861
},
1856-
"max_tokens": 250,
1862+
"max_completion_tokens": 250,
18571863
"presence_penalty": 0.2
18581864
}'
18591865
- lang: Python
@@ -1870,7 +1876,7 @@ paths:
18701876
variables={
18711877
"user_input": "Hello world"
18721878
},
1873-
max_tokens=250,
1879+
max_completion_tokens=250,
18741880
presence_penalty=0.2
18751881
)
18761882

@@ -1890,7 +1896,7 @@ paths:
18901896
variables: {
18911897
user_input: "Hello world"
18921898
},
1893-
max_tokens: 250,
1899+
max_completion_tokens: 250,
18941900
presence_penalty: 0.2
18951901
});
18961902

@@ -1905,7 +1911,7 @@ paths:
19051911
"variables": {
19061912
"user_input": "Hello world"
19071913
},
1908-
"max_tokens": 250,
1914+
"max_completion_tokens": 250,
19091915
"presence_penalty": 0.2
19101916
}'
19111917
- lang: python
@@ -1923,7 +1929,7 @@ paths:
19231929
variables={
19241930
"user_input": "Hello world"
19251931
},
1926-
max_tokens=250,
1932+
max_completion_tokens=250,
19271933
presence_penalty=0.2
19281934
)
19291935

@@ -1943,7 +1949,7 @@ paths:
19431949
variables: {
19441950
user_input: "Hello world"
19451951
},
1946-
max_tokens: 250,
1952+
max_completion_tokens: 250,
19471953
presence_penalty: 0.2
19481954
});
19491955

@@ -1975,7 +1981,7 @@ paths:
19751981
required:
19761982
- variables
19771983
description: |
1978-
Note: Although hyperparameters are shown grouped here (like messages, max_tokens, temperature, etc.), they should only be passed at the root level, alongside 'variables' and 'stream'.
1984+
Note: Although hyperparameters are shown grouped here (like messages, max_completion_tokens, temperature, etc.), they should only be passed at the root level, alongside 'variables' and 'stream'. The `max_tokens` parameter is deprecated — use `max_completion_tokens` instead.
19791985
properties:
19801986
variables:
19811987
type: object
@@ -2008,7 +2014,7 @@ paths:
20082014
"variables": {
20092015
"user_input": "Hello world"
20102016
},
2011-
"max_tokens": 250,
2017+
"max_completion_tokens": 250,
20122018
"presence_penalty": 0.2
20132019
}'
20142020
- lang: Python
@@ -2025,7 +2031,7 @@ paths:
20252031
variables={
20262032
"user_input": "Hello world"
20272033
},
2028-
max_tokens=250,
2034+
max_completion_tokens=250,
20292035
presence_penalty=0.2
20302036
)
20312037

@@ -2045,7 +2051,7 @@ paths:
20452051
variables: {
20462052
user_input: "Hello world"
20472053
},
2048-
max_tokens: 250,
2054+
max_completion_tokens: 250,
20492055
presence_penalty: 0.2
20502056
});
20512057

@@ -2060,7 +2066,7 @@ paths:
20602066
"variables": {
20612067
"user_input": "Hello world"
20622068
},
2063-
"max_tokens": 250,
2069+
"max_completion_tokens": 250,
20642070
"presence_penalty": 0.2
20652071
}'
20662072
- lang: Python
@@ -2078,7 +2084,7 @@ paths:
20782084
variables={
20792085
"user_input": "Hello world"
20802086
},
2081-
max_tokens=250,
2087+
max_completion_tokens=250,
20822088
presence_penalty=0.2
20832089
)
20842090

@@ -2099,7 +2105,7 @@ paths:
20992105
variables: {
21002106
user_input: "Hello world"
21012107
},
2102-
max_tokens: 250,
2108+
max_completion_tokens: 250,
21032109
presence_penalty: 0.2
21042110
});
21052111

@@ -22282,13 +22288,22 @@ components:
2228222288
$ref: "#/components/schemas/ChatCompletionRequestMessage"
2228322289
model:
2228422290
description: ID of the model to use. See the [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API.
22285-
example: "gpt-4-turbo"
22291+
example: "gpt-5"
2228622292
anyOf:
2228722293
- type: string
2228822294
- type: string
2228922295
enum:
2229022296
[
22297+
"gpt-5",
22298+
"gpt-5-mini",
22299+
"gpt-5-nano",
22300+
"o4-mini",
22301+
"o3",
22302+
"o3-mini",
22303+
"o1",
22304+
"o1-mini",
2229122305
"gpt-4o",
22306+
"gpt-4o-mini",
2229222307
"gpt-4o-2024-05-13",
2229322308
"gpt-4-turbo",
2229422309
"gpt-4-turbo-2024-04-09",
@@ -22342,9 +22357,21 @@ components:
2234222357
nullable: true
2234322358
max_tokens:
2234422359
description: |
22345-
The maximum number of [tokens](https://platform.openai.com/tokenizer?view=bpe) that can be generated in the chat completion.
22360+
Deprecated in favor of `max_completion_tokens`.
22361+
22362+
The maximum number of [tokens](https://platform.openai.com/tokenizer?view=bpe) that can be generated in the chat completion. This value can be used to control [costs](https://openai.com/api/pricing/) for text generated via API.
2234622363

22347-
The total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.
22364+
**Supported models:** GPT-4o, GPT-4o-mini, GPT-4, GPT-4 Turbo, GPT-3.5 Turbo.
22365+
22366+
**Not supported:** o-series reasoning models (o1, o3, o3-mini, o4-mini) and GPT-5 series — use `max_completion_tokens` instead.
22367+
type: integer
22368+
nullable: true
22369+
deprecated: true
22370+
max_completion_tokens:
22371+
description: |
22372+
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
22373+
22374+
**Supported models:** GPT-5 series, o-series reasoning models (o1, o3, o3-mini, o4-mini) — required. Also supported on GPT-4o, GPT-4o-mini, GPT-4, GPT-4 Turbo, GPT-3.5 Turbo as a replacement for `max_tokens`.
2234822375
type: integer
2234922376
nullable: true
2235022377
n:
@@ -24999,6 +25026,28 @@ components:
2499925026
total_tokens:
2500025027
type: integer
2500125028
description: Total number of tokens used in the request (prompt + completion).
25029+
completion_tokens_details:
25030+
type: object
25031+
nullable: true
25032+
description: Breakdown of tokens used in a completion.
25033+
properties:
25034+
reasoning_tokens:
25035+
type: integer
25036+
description: Tokens generated by the model for reasoning.
25037+
accepted_prediction_tokens:
25038+
type: integer
25039+
description: When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
25040+
rejected_prediction_tokens:
25041+
type: integer
25042+
description: When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion.
25043+
prompt_tokens_details:
25044+
type: object
25045+
nullable: true
25046+
description: Breakdown of tokens used in the prompt.
25047+
properties:
25048+
cached_tokens:
25049+
type: integer
25050+
description: Cached tokens present in the prompt.
2500225051
required:
2500325052
- prompt_tokens
2500425053
- completion_tokens

0 commit comments

Comments
 (0)