Skip to content

Commit 04be752

Browse files
ci: add automated evaluation pipeline with environment variable substitution for Cloud SQL SQL Server
1 parent 99215a8 commit 04be752

7 files changed

Lines changed: 282 additions & 0 deletions

File tree

cloudbuild.yaml

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
options:
16+
logging: CLOUD_LOGGING_ONLY
17+
18+
steps:
19+
20+
# --- Evaluation Step ---
21+
- name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest'
22+
entrypoint: 'bash'
23+
# Decrypts the secret from Secret Manager into the MSSQL_DB_PASSWORD environment variable
24+
secretEnv: ['MSSQL_DB_PASSWORD', 'GITHUB_TOKEN']
25+
args:
26+
- '-c'
27+
- |
28+
set -e
29+
30+
echo "Fetching PR data from GitHub API..."
31+
32+
# Fetch PR data and status code
33+
HTTP_STATUS=$(curl -s -o pr_data.json -w "%{http_code}" -H "Authorization: token $$GITHUB_TOKEN" \
34+
"https://api.github.com/repos/$REPO_FULL_NAME/pulls/$_PR_NUMBER")
35+
36+
if [ "$$HTTP_STATUS" -ne 200 ]; then
37+
echo "Error fetching PR data: HTTP $$HTTP_STATUS"
38+
cat pr_data.json
39+
exit 1
40+
fi
41+
42+
PR_DATA=$(cat pr_data.json)
43+
44+
# Extract labels and title from PR data (Use $$ to escape bash variables)
45+
PR_LABELS=$(echo "$$PR_DATA" | jq -r '[.labels[].name] | join(",")')
46+
PR_TITLE=$(echo "$$PR_DATA" | jq -r '.title')
47+
48+
# Check if execution labels are present
49+
if [[ "$$PR_LABELS" != *"autorelease: pending"* && "$$PR_LABELS" != *"ci:run-evals"* ]]; then
50+
echo "PR does not have 'autorelease: pending' or 'ci:run-evals' label. Skipping execution."
51+
exit 0
52+
fi
53+
echo "Execution label detected. Processing release version context..."
54+
55+
# Determine Release Version based on branch name
56+
if [[ "$_HEAD_BRANCH" == release-please-* ]]; then
57+
if [[ "$$PR_TITLE" =~ release\ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then
58+
export RELEASE_VERSION="$${BASH_REMATCH[1]}"
59+
else
60+
export RELEASE_VERSION="pr-$_PR_NUMBER-release-unknown"
61+
fi
62+
else
63+
export RELEASE_VERSION="pr-$_PR_NUMBER-ci-run-evals"
64+
fi
65+
66+
# Workaround for evalbench bug: settings are only applied if path basename matches extension ID
67+
ln -s /workspace /workspace/cloud-sql-sqlserver
68+
cd /evalbench
69+
70+
# evalbench specific environment variables
71+
export EVAL_GCP_PROJECT_ID=$PROJECT_ID
72+
export EVAL_GCP_PROJECT_REGION=$_CLOUD_SQL_REGION
73+
export EVAL_REPORTING_PROJECT=$_EVAL_REPORTING_PROJECT
74+
export GOOGLE_CLOUD_PROJECT=$PROJECT_ID
75+
76+
# Cloud SQL SQL Server specific environment variables
77+
export CLOUD_SQL_MSSQL_PROJECT=$PROJECT_ID
78+
export CLOUD_SQL_MSSQL_INSTANCE=$_CLOUD_SQL_INSTANCE
79+
export CLOUD_SQL_MSSQL_REGION=$_CLOUD_SQL_REGION
80+
export CLOUD_SQL_MSSQL_DATABASE=$_CLOUD_SQL_DATABASE
81+
export CLOUD_SQL_MSSQL_USER=$_CLOUD_SQL_USER
82+
export CLOUD_SQL_MSSQL_IP_TYPE=$_CLOUD_SQL_IP_TYPE
83+
84+
# Maps the decrypted MSSQL_DB_PASSWORD to the exact variable expected by gemini_cli and extension skills
85+
export CLOUD_SQL_MSSQL_PASSWORD=$$MSSQL_DB_PASSWORD
86+
87+
# Combine CI metadata with run config
88+
cat /workspace/evals/ci_metadata.yaml >> /workspace/evals/run_config.yaml
89+
90+
# Substitute environment variables in model_config.yaml
91+
python3 /workspace/evals/substitute_env.py
92+
93+
cd /evalbench
94+
export PYTHONPATH=./evalbench:./evalbench/evalproto
95+
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
96+
97+
echo "Launching Standalone Evaluation..."
98+
python3 evalbench/evalbench.py --experiment_config=/workspace/evals/run_config.yaml
99+
100+
101+
availableSecrets:
102+
secretManager:
103+
- versionName: projects/$PROJECT_ID/secrets/MSSQL_DB_PASSWORD/versions/latest
104+
env: 'MSSQL_DB_PASSWORD'
105+
- versionName: projects/$PROJECT_ID/secrets/GITHUB_TOKEN/versions/latest
106+
env: 'GITHUB_TOKEN'

evals/ci_metadata.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
############################################################
16+
### CI Metadata (Repository Specific)
17+
### Note: These fields are used for version tracking in BQ
18+
### and are not part of the core Evalbench schema.
19+
############################################################
20+
21+
extension_id: cloud-sql-sqlserver
22+
release_version: ${RELEASE_VERSION}

evals/dataset.json

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"scenarios": [
3+
{
4+
"id": "cloud-sql-debug-instance",
5+
"starting_prompt": "Check on my databases in project ${GOOGLE_CLOUD_PROJECT}.",
6+
"conversation_plan": "Ask the agent to list all Cloud SQL instances in the project. Once all instances are listed, if '${CLOUD_SQL_MSSQL_INSTANCE}' exists, get its details and validate it is RUNNABLE.",
7+
"expected_trajectory": [
8+
"list_instances",
9+
"get_instance"
10+
],
11+
"env": {
12+
"GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
13+
},
14+
"kind": "tools",
15+
"max_turns": 3
16+
},
17+
{
18+
"id": "cloud-sql-schema-tables-explore",
19+
"starting_prompt": "I want to understand the structure of my database.",
20+
"conversation_plan": "First, ask the agent to list the databases in the instance. After the agent provides the databases, ask it to list the tables specifically for that database.",
21+
"expected_trajectory": [
22+
"list_databases",
23+
"list_tables"
24+
],
25+
"env": {
26+
"GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
27+
},
28+
"kind": "tools",
29+
"max_turns": 3
30+
},
31+
{
32+
"id": "cloud-sql-performance-check",
33+
"starting_prompt": "Our database performance seems degraded.",
34+
"conversation_plan": "Start by asking the agent to check the CPU utilization system metrics for the database instance to see if it's overloaded.",
35+
"expected_trajectory": [
36+
"get_system_metrics"
37+
],
38+
"env": {
39+
"GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
40+
},
41+
"kind": "tools",
42+
"max_turns": 3
43+
}
44+
]
45+
}

evals/gemini_2.5_pro_model.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
generator: gcp_vertex_gemini
16+
vertex_model: gemini-2.5-pro
17+
base_prompt: ""
18+
execs_per_minute: 5

evals/model_config.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
gemini_cli_version: "@google/gemini-cli@latest"
16+
generator: gemini_cli
17+
env:
18+
GOOGLE_CLOUD_PROJECT: "${GOOGLE_CLOUD_PROJECT}"
19+
GOOGLE_CLOUD_LOCATION: "global"
20+
GOOGLE_GENAI_USE_VERTEXAI: "true"
21+
setup:
22+
extensions:
23+
# Points to the symlink created in cloudbuild.yaml to match the extension ID
24+
"/workspace/cloud-sql-sqlserver":
25+
settings:
26+
CLOUD_SQL_MSSQL_PROJECT: "${CLOUD_SQL_MSSQL_PROJECT}"
27+
CLOUD_SQL_MSSQL_INSTANCE: "${CLOUD_SQL_MSSQL_INSTANCE}"
28+
CLOUD_SQL_MSSQL_REGION: "${CLOUD_SQL_MSSQL_REGION}"
29+
CLOUD_SQL_MSSQL_DATABASE: "${CLOUD_SQL_MSSQL_DATABASE}"
30+
CLOUD_SQL_MSSQL_USER: "${CLOUD_SQL_MSSQL_USER}"
31+
CLOUD_SQL_MSSQL_PASSWORD: '${CLOUD_SQL_MSSQL_PASSWORD}'
32+
CLOUD_SQL_MSSQL_IP_TYPE: "${CLOUD_SQL_MSSQL_IP_TYPE}"

evals/run_config.yaml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
dataset_config: /workspace/evals/dataset.json
16+
dataset_format: gemini-cli-format
17+
18+
orchestrator: geminicli
19+
model_config: /workspace/evals/model_config.yaml
20+
simulated_user_model_config: /workspace/evals/gemini_2.5_pro_model.yaml
21+
22+
scorers:
23+
# Qualitative (Judge-based)
24+
goal_completion:
25+
model_config: /workspace/evals/gemini_2.5_pro_model.yaml
26+
behavioral_metrics:
27+
model_config: /workspace/evals/gemini_2.5_pro_model.yaml
28+
skills_best_practices:
29+
model_config: /workspace/evals/gemini_2.5_pro_model.yaml
30+
skills_dir: /workspace/cloud-sql-sqlserver/skills
31+
32+
# Performance
33+
turn_count: {}
34+
end_to_end_latency: {}
35+
tool_call_latency: {}
36+
token_consumption: {}
37+
skills_trajectory: {}
38+
39+
reporting:
40+
bigquery:
41+
gcp_project_id: "${EVAL_REPORTING_PROJECT}"

evals/substitute_env.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import os
2+
import re
3+
4+
def main():
5+
yaml_paths = ['/workspace/evals/model_config.yaml', '/workspace/evals/run_config.yaml', '/workspace/evals/dataset.json']
6+
for yaml_path in yaml_paths:
7+
if os.path.exists(yaml_path):
8+
with open(yaml_path, 'r') as f:
9+
content = f.read()
10+
content = re.sub(r'\${(\w+)}', lambda m: os.environ.get(m.group(1), m.group(0)), content)
11+
with open(yaml_path, 'w') as f:
12+
f.write(content)
13+
print(f"Successfully substituted environment variables in {yaml_path}")
14+
else:
15+
print(f"File not found: {yaml_path}")
16+
17+
if __name__ == '__main__':
18+
main()

0 commit comments

Comments
 (0)