Skip to content

Commit 5f9931a

Browse files
ci: parameterize project and instance names in dataset and add to substitution script
1 parent 6a673bc commit 5f9931a

2 files changed

Lines changed: 9 additions & 9 deletions

File tree

evals/dataset.json

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
"scenarios": [
33
{
44
"id": "cloud-sql-debug-instance",
5-
"starting_prompt": "Check on my databases in project ext-test-cloud-sql-postgres.",
6-
"conversation_plan": "Ask the agent to list all Cloud SQL instances in the project. Once all instances are listed, if 'daily-ci-evals-db' exists, get its details and validate it is RUNNABLE.",
5+
"starting_prompt": "Check on my databases in project ${GOOGLE_CLOUD_PROJECT}.",
6+
"conversation_plan": "Ask the agent to list all Cloud SQL instances in the project. Once all instances are listed, if '${CLOUD_SQL_POSTGRES_INSTANCE}' exists, get its details and validate it is RUNNABLE.",
77
"expected_trajectory": [
88
"list_instances",
99
"get_instance"
1010
],
1111
"env": {
12-
"GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
12+
"GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
1313
},
1414
"kind": "tools",
1515
"max_turns": 3
@@ -23,7 +23,7 @@
2323
"list_tables"
2424
],
2525
"env": {
26-
"GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
26+
"GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
2727
},
2828
"kind": "tools",
2929
"max_turns": 3
@@ -37,21 +37,21 @@
3737
"list_locks"
3838
],
3939
"env": {
40-
"GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
40+
"GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
4141
},
4242
"kind": "tools",
4343
"max_turns": 3
4444
},
4545
{
4646
"id": "cloud-sql-metrics-cpu-investigation",
47-
"starting_prompt": "I'm worried about the database load for daily-ci-evals-db.",
48-
"conversation_plan": "First, ask the agent to check the CPU utilization for the instance 'daily-ci-evals-db' for the last 5 minutes. After the agent provides the CPU data, ask it to check the overall database stats to see connection counts or transaction volume.",
47+
"starting_prompt": "I'm worried about the database load for ${CLOUD_SQL_POSTGRES_INSTANCE}.",
48+
"conversation_plan": "First, ask the agent to check the CPU utilization for the instance '${CLOUD_SQL_POSTGRES_INSTANCE}' for the last 5 minutes. After the agent provides the CPU data, ask it to check the overall database stats to see connection counts or transaction volume.",
4949
"expected_trajectory": [
5050
"get_system_metrics",
5151
"list_database_stats"
5252
],
5353
"env": {
54-
"GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
54+
"GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
5555
},
5656
"kind": "tools",
5757
"max_turns": 3

evals/substitute_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import re
33

44
def main():
5-
yaml_paths = ['/workspace/evals/model_config.yaml', '/workspace/evals/run_config.yaml']
5+
yaml_paths = ['/workspace/evals/model_config.yaml', '/workspace/evals/run_config.yaml', '/workspace/evals/dataset.json']
66
for yaml_path in yaml_paths:
77
if os.path.exists(yaml_path):
88
with open(yaml_path, 'r') as f:

0 commit comments

Comments
 (0)