Skip to content

Commit cd3d9e2

Browse files
feat: add ci:run-evals label support and parameterize evaluation reporting project for flexible CI execution
1 parent 79d7c4a commit cd3d9e2

4 files changed

Lines changed: 27 additions & 15 deletions

File tree

.github/labels.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,8 @@
8383

8484
- name: 'release-please:force-run'
8585
color: bdca82
86-
description: Manually trigger the release please workflow on a PR.
86+
description: Manually trigger the release please workflow on a PR.
87+
88+
- name: 'ci:run-evals'
89+
color: 4285f4
90+
description: Manually trigger the evaluation CI pipeline on a PR.

cloudbuild.yaml

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,7 @@ steps:
2727
- |
2828
set -e
2929
30-
# Only run on release branches
31-
if [[ "$_HEAD_BRANCH" != release-please-* ]]; then
32-
echo "Not a release-please branch. Exiting."
33-
exit 0
34-
fi
35-
echo "Release branch detected. Fetching PR data from GitHub API..."
30+
echo "Fetching PR data from GitHub API..."
3631
3732
# Fetch PR data and status code
3833
HTTP_STATUS=$(curl -s -o pr_data.json -w "%{http_code}" -H "Authorization: token $$GITHUB_TOKEN" \
@@ -46,28 +41,39 @@ steps:
4641
4742
PR_DATA=$(cat pr_data.json)
4843
49-
# Extract labels and title from PR data (Use $$ to escape bash variables)
50-
PR_LABELS=$(echo "$$PR_DATA" | jq -r '[.labels[].name] | join(",")')
44+
# Extract title from PR data (Use $$ to escape bash variables)
5145
PR_TITLE=$(echo "$$PR_DATA" | jq -r '.title')
5246
53-
# Determine Release Version (Use double quotes and $$ for bash variables)
54-
if [[ "$$PR_LABELS" == *"autorelease: triggered"* ]]; then
47+
# Check if execution labels are present using exact matching via jq
48+
if ! jq -e '.labels | any(.name == "autorelease: pending" or .name == "ci:run-evals")' pr_data.json > /dev/null; then
49+
echo "PR does not have 'autorelease: pending' or 'ci:run-evals' label. Skipping execution."
50+
exit 0
51+
fi
52+
echo "Execution label detected. Processing release version context..."
53+
54+
# Determine Release Version based on branch name
55+
if [[ "$_HEAD_BRANCH" == release-please-* ]]; then
5556
if [[ "$$PR_TITLE" =~ release\ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then
5657
export RELEASE_VERSION="$${BASH_REMATCH[1]}"
5758
else
58-
export RELEASE_VERSION="unknown"
59+
export RELEASE_VERSION="pr-$_PR_NUMBER-release-unknown"
5960
fi
6061
else
61-
export RELEASE_VERSION="unknown"
62+
export RELEASE_VERSION="pr-$_PR_NUMBER-ci-run-evals"
6263
fi
6364
6465
# Workaround for evalbench bug: settings are only applied if path basename matches extension ID
6566
ln -s /workspace /workspace/cloud-sql-postgresql
6667
cd /evalbench
6768
69+
# Set environment variables for evalbench
6870
export EVAL_GCP_PROJECT_ID=$PROJECT_ID
6971
export EVAL_GCP_PROJECT_REGION=$_CLOUD_SQL_REGION
7072
export GOOGLE_CLOUD_PROJECT=$PROJECT_ID
73+
export EVAL_REPORTING_PROJECT=$_EVAL_REPORTING_PROJECT
74+
75+
76+
# Set environment variables for extension
7177
export CLOUD_SQL_POSTGRES_PROJECT=$PROJECT_ID
7278
export CLOUD_SQL_POSTGRES_INSTANCE=$_CLOUD_SQL_INSTANCE
7379
export CLOUD_SQL_POSTGRES_REGION=$_CLOUD_SQL_REGION

evals/model_config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
gemini_cli_version: "@google/gemini-cli@0.38.1"
15+
gemini_cli_version: "@google/gemini-cli@latest"
1616
generator: gemini_cli
1717
env:
1818
GOOGLE_CLOUD_PROJECT: "${GOOGLE_CLOUD_PROJECT}"
1919
GOOGLE_CLOUD_LOCATION: "global"
2020
GOOGLE_GENAI_USE_VERTEXAI: "true"
21+
GEMINI_CLI_TRUST_WORKSPACE: "true"
2122
setup:
2223
extensions:
2324
# Points to the symlink created in cloudbuild.yaml to match the extension ID

evals/run_config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,5 @@ scorers:
3434

3535
reporting:
3636
bigquery:
37-
gcp_project_id: cloud-db-nl2sql
37+
gcp_project_id: "${EVAL_REPORTING_PROJECT}"
38+

0 commit comments

Comments
 (0)