diff --git a/.github/actions/dispatch-suite/action.yaml b/.github/actions/dispatch-suite/action.yaml new file mode 100644 index 0000000..cac384f --- /dev/null +++ b/.github/actions/dispatch-suite/action.yaml @@ -0,0 +1,102 @@ +name: 'Dispatch scenario suite' +description: >- + Dispatch a downstream cascade-example repo's scenario-suite.yaml on its own + main, recover the run id it created, and watch that run to its conclusion. + +inputs: + repo: + description: 'Target repo slug, e.g. stablekernel/cascade-example-primary' + required: true + token: + description: >- + PAT with Actions read/write on the target repo. GITHUB_TOKEN cannot + dispatch cross-repo, so a fleet-wide fine-grained PAT is mandatory. + required: true + workflow: + description: 'Workflow file to dispatch in the target repo' + required: false + default: 'scenario-suite.yaml' + ref: + description: "Target ref to dispatch against (must be the target's default branch)" + required: false + default: 'main' + recover-attempts: + description: 'How many times to poll for the dispatched run before giving up' + required: false + default: '30' + recover-interval: + description: 'Seconds between recovery polls' + required: false + default: '10' + +runs: + using: 'composite' + steps: + # Reconciliation, dispatch -> recover -> watch, with zero target-side change. + # + # Cross-repo workflow_dispatch returns 204 with no run id (CONFIRMED in the + # pattern research), so we cannot await the run we just created directly. We + # recover it by listing the target's scenario-suite runs created at/after the + # dispatch timestamp (event = workflow_dispatch) and taking the newest. This + # is the current approach; a future refinement could echo a distinct_id + # marker into the suite run-name for race-free recovery once the suites + # carry one. + - name: Dispatch and watch + shell: bash + env: + GH_TOKEN: ${{ inputs.token }} + TARGET_REPO: ${{ inputs.repo }} + TARGET_WORKFLOW: ${{ inputs.workflow }} + TARGET_REF: ${{ inputs.ref }} + RECOVER_ATTEMPTS: ${{ inputs.recover-attempts }} + RECOVER_INTERVAL: ${{ inputs.recover-interval }} + run: | + set -euo pipefail + + # Capture a UTC timestamp BEFORE dispatching so the recovery filter only + # matches runs this action created, not pre-existing ones. + DISPATCH_TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) + echo "Dispatching $TARGET_WORKFLOW in $TARGET_REPO @ $TARGET_REF (since $DISPATCH_TS)" + + # NOTE: do NOT pass -f cascade_version=... here. The suites do not define + # that input yet, so an extra input would error with "unexpected inputs". + # The version under test is computed and logged by the orchestrator but + # is inert until the suites accept the input. + gh workflow run "$TARGET_WORKFLOW" \ + --repo "$TARGET_REPO" \ + --ref "$TARGET_REF" + + # Recover the run id. Cross-repo dispatch is async; the run may not be + # listable immediately, so poll with a bounded retry. + RUN_ID="" + for attempt in $(seq 1 "$RECOVER_ATTEMPTS"); do + RUN_ID=$(gh run list \ + --repo "$TARGET_REPO" \ + --workflow "$TARGET_WORKFLOW" \ + --event workflow_dispatch \ + --created ">=$DISPATCH_TS" \ + --limit 20 \ + --json databaseId,status,conclusion,createdAt \ + --jq 'sort_by(.createdAt) | reverse | .[0].databaseId // empty') + if [ -n "$RUN_ID" ]; then + echo "Recovered run id $RUN_ID on attempt $attempt" + break + fi + echo "Run not visible yet (attempt $attempt/$RECOVER_ATTEMPTS); sleeping ${RECOVER_INTERVAL}s" + sleep "$RECOVER_INTERVAL" + done + + if [ -z "$RUN_ID" ]; then + echo "::error::Could not recover a $TARGET_WORKFLOW run in $TARGET_REPO after dispatch" + exit 1 + fi + + RUN_URL="https://github.com/$TARGET_REPO/actions/runs/$RUN_ID" + echo "Watching $RUN_URL" + { + echo "- **$TARGET_REPO**: [run $RUN_ID]($RUN_URL)" + } >> "$GITHUB_STEP_SUMMARY" + + # Block on the recovered run's conclusion. --exit-status makes gh return + # non-zero if the run concluded with a non-success result. + gh run watch "$RUN_ID" --repo "$TARGET_REPO" --exit-status diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index ea6e2f3..3565bde 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -1,21 +1,21 @@ -# End-to-end test workflow +# Integration test workflow (act + gitea testcontainers). # Triggers: # push:tags every release tag (existing) # workflow_dispatch manual run against any ref (existing) # merge_group runs as a merge-queue gate before merging to main -# schedule nightly at 07:00 UTC (low-traffic window) against main # -# E2E uses act + gitea testcontainers and is too slow + flaky to run per PR. -# Run locally (`go test -v ./e2e/...`) before pushing instead. -name: E2E +# This workflow uses act + gitea testcontainers and is too slow + flaky to run +# per PR. Run locally (`go test -v ./e2e/...`) before pushing instead. +# +# NOTE: the `name:` below is referenced by fleet-e2e.yaml's workflow_run trigger +# ("Integration (act + gitea)"). Keep the two in sync if this is ever renamed. +name: Integration (act + gitea) on: push: tags: - 'v*' merge_group: - schedule: - - cron: '0 7 * * *' workflow_dispatch: inputs: ref: @@ -42,8 +42,8 @@ jobs: - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: # workflow_dispatch: honour the explicit ref input. - # All other triggers (push:tags, merge_group, schedule): use the - # exact SHA that triggered the run so we test what GitHub resolved. + # All other triggers (push:tags, merge_group): use the exact SHA + # that triggered the run so we test what GitHub resolved. ref: ${{ github.event.inputs.ref || github.sha }} - uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 diff --git a/.github/workflows/fleet-e2e.yaml b/.github/workflows/fleet-e2e.yaml new file mode 100644 index 0000000..ff73fe6 --- /dev/null +++ b/.github/workflows/fleet-e2e.yaml @@ -0,0 +1,224 @@ +# Fleet E2E - revalidates the downstream cascade-example fleet on live GitHub. +# +# This is maintainer CI: hand-written tooling that lives in cascade's repo, not +# a product feature and not part of cascade's generated output. A green Fleet +# run means: this cascade version validated across all 8 example +# repos, each running its own scenario-suite.yaml in its OWN repo context (own +# token, own main, own manifest). It is the release-candidate fleet gate. +# +# Triggers: +# workflow_run of "Integration (act + gitea)" on completion - makes the E2E +# dependency NATIVE: Fleet only fans out once Integration is +# green for an rc tag. No runner held open polling for it. +# workflow_dispatch manual override (bypasses the rc-tag gate intentionally), +# with an optional cascade_version input. +# +# IMPORTANT: the workflow_run trigger references the source workflow by its +# `name:` ("Integration (act + gitea)"). Keep that name in sync with e2e.yaml. +name: Fleet E2E (live GitHub) + +on: + workflow_run: + workflows: ["Integration (act + gitea)"] + types: [completed] + workflow_dispatch: + inputs: + cascade_version: + description: >- + cascade version to validate (e.g. v1.2.0-rc.1). Default empty resolves + to the rc tag on the workflow_run path. NOTE: passing this to the + suites is wired but inert until the suites accept the input. + required: false + default: '' + +permissions: + contents: read + +# Single in-flight fleet run per rc tag; a newer rc supersedes an older queued +# fleet run rather than piling up live cross-repo dispatches. +concurrency: + group: fleet-e2e-${{ github.event.workflow_run.head_branch || github.event.inputs.cascade_version || github.run_id }} + cancel-in-progress: false + +env: + # Eight downstream example repos. primary must finish before its two dependents + # (they mutate primary's shared external state); the rest are independent. + FLEET_OWNER: stablekernel + +jobs: + # Resolve the cascade version under test and re-assert the rc-tag gate as a + # job output so every fan-out job can gate on it cheaply. + resolve: + name: Resolve version under test + runs-on: ubuntu-latest + # Top-level guard: only fan out for a manual dispatch, or a green + # Integration run that was a push of an rc tag. This filters out + # merge_group / non-rc completions. + # + # workflow_run.head_branch carries the short ref name of whatever triggered + # the source run. For a tag push that is the tag's short name (e.g. + # v1.2.0-rc.1). We gate on it here AND, in the compute step below, resolve + # the tag from head_sha as a fallback in case head_branch is ever empty for + # a tag-triggered source run. + if: >- + github.event_name == 'workflow_dispatch' || + (github.event.workflow_run.conclusion == 'success' && + github.event.workflow_run.event == 'push' && + startsWith(github.event.workflow_run.head_branch, 'v') && + contains(github.event.workflow_run.head_branch, '-rc.')) + permissions: + contents: read + actions: read + outputs: + cascade_version: ${{ steps.compute.outputs.cascade_version }} + steps: + - name: Compute cascade version under test + id: compute + env: + # PAT is only needed for the head_sha -> tag fallback (a cross-ref + # lookup against this repo's tags). GITHUB_TOKEN would also work for + # same-repo reads, but we standardise on the fleet PAT. + GH_TOKEN: ${{ secrets.CASCADE_STATE_TOKEN }} + EVENT_NAME: ${{ github.event_name }} + INPUT_VERSION: ${{ github.event.inputs.cascade_version }} + WR_HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }} + WR_HEAD_SHA: ${{ github.event.workflow_run.head_sha }} + run: | + set -euo pipefail + if [ "$EVENT_NAME" = "workflow_dispatch" ] && [ -n "$INPUT_VERSION" ]; then + VERSION="$INPUT_VERSION" + elif [ -n "$WR_HEAD_BRANCH" ]; then + # Primary path: the rc tag short-name from the source push run. + VERSION="$WR_HEAD_BRANCH" + elif [ -n "$WR_HEAD_SHA" ]; then + # Fallback: head_branch was empty; resolve the rc tag pointing at the + # source run's head_sha. Tolerated to be empty (dispatch with no + # input), so guard the lookup. + # A sha can carry more than one rc tag; pick the highest by version + # sort so selection is deterministic regardless of API ordering. + VERSION=$(gh api "repos/${GITHUB_REPOSITORY}/tags" \ + --jq ".[] | select(.commit.sha == \"$WR_HEAD_SHA\") | .name" \ + | grep -- '-rc\.' | sort -V -r | head -n 1 || true) + else + VERSION="" + fi + + echo "cascade_version=$VERSION" >> "$GITHUB_OUTPUT" + { + echo "## Fleet E2E" + echo "" + echo "Trigger: \`$EVENT_NAME\`" + echo "cascade version under test: \`${VERSION:-}\`" + echo "" + echo "> Version passing to suites is computed and logged here but" + echo "> currently INERT: the suites do not yet accept a" + echo "> \`cascade_version\` input." + } >> "$GITHUB_STEP_SUMMARY" + + # Stage 1: primary must run and pass before its dependents. + primary: + name: primary + needs: resolve + runs-on: ubuntu-latest + permissions: + contents: read + actions: read + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - name: Dispatch and watch primary + uses: ./.github/actions/dispatch-suite + with: + repo: ${{ env.FLEET_OWNER }}/cascade-example-primary + token: ${{ secrets.CASCADE_STATE_TOKEN }} + + # Stage 2: dependents of primary (mutate primary's shared external state), + # so they only start after primary is green. + dependents: + name: dependents (${{ matrix.repo }}) + needs: primary + runs-on: ubuntu-latest + permissions: + contents: read + actions: read + strategy: + fail-fast: false + matrix: + repo: [artifact-a, artifact-b] + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - name: Dispatch and watch + uses: ./.github/actions/dispatch-suite + with: + repo: ${{ env.FLEET_OWNER }}/cascade-example-${{ matrix.repo }} + token: ${{ secrets.CASCADE_STATE_TOKEN }} + + # Stage 3: independent suites, run in parallel with no ordering constraint. + independents: + name: independents (${{ matrix.repo }}) + needs: resolve + runs-on: ubuntu-latest + permissions: + contents: read + actions: read + strategy: + fail-fast: false + matrix: + repo: [4env, 3env, 2env, single-env, release-only] + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - name: Dispatch and watch + uses: ./.github/actions/dispatch-suite + with: + repo: ${{ env.FLEET_OWNER }}/cascade-example-${{ matrix.repo }} + token: ${{ secrets.CASCADE_STATE_TOKEN }} + + # Fan-in: this job's conclusion is the rc fleet gate. It fails if any upstream + # fan-out job failed and emits a per-repo pass/fail table to the summary. + aggregate: + name: Fleet gate + needs: [resolve, primary, dependents, independents] + # Only render a verdict when the fleet actually fanned out. On filtered-out + # completions (merge_group, non-rc tags, dispatch with no rc) resolve is + # skipped, so this job is skipped too and the run is a clean no-op rather + # than a false-red. A genuine fan-out failure still reds the run because + # resolve succeeded and the result checks below catch the failed stage. + if: always() && needs.resolve.result == 'success' + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Aggregate fleet result + env: + R_PRIMARY: ${{ needs.primary.result }} + R_DEPENDENTS: ${{ needs.dependents.result }} + R_INDEPENDENTS: ${{ needs.independents.result }} + VERSION: ${{ needs.resolve.outputs.cascade_version }} + run: | + set -euo pipefail + { + echo "## Fleet E2E result" + echo "" + echo "cascade version under test: \`${VERSION:-}\`" + echo "" + echo "| Stage | Result |" + echo "|---|---|" + echo "| primary | $R_PRIMARY |" + echo "| dependents (artifact-a, artifact-b) | $R_DEPENDENTS |" + echo "| independents (4env, 3env, 2env, single-env, release-only) | $R_INDEPENDENTS |" + echo "" + echo "> rc gate: this conclusion is the fleet validation signal for" + echo "> the rc tag. rc -> release promotion should consume the latest" + echo "> fleet-e2e conclusion for that tag before promoting." + } >> "$GITHUB_STEP_SUMMARY" + + fail=0 + for r in "$R_PRIMARY" "$R_DEPENDENTS" "$R_INDEPENDENTS"; do + if [ "$r" != "success" ]; then + fail=1 + fi + done + if [ "$fail" -ne 0 ]; then + echo "::error::Fleet E2E failed: one or more suites did not pass" + exit 1 + fi + echo "Fleet E2E passed across all suites" diff --git a/.github/workflows/validate.yaml b/.github/workflows/validate.yaml index 4ab367d..4dfc036 100644 --- a/.github/workflows/validate.yaml +++ b/.github/workflows/validate.yaml @@ -1,6 +1,13 @@ -# Validation workflow - runs tests and lint -# Called by orchestrate workflow during CI/CD -name: Validate +# Tests & Lint - runs go test -race + coverage and golangci-lint. +# +# Triggers: +# workflow_call invoked by orchestrate.yaml on PRs (keep - do not remove). +# push: tags standalone run on every release/rc tag. +# workflow_dispatch manual standalone run against any ref. +# +# The standalone triggers give this workflow runs of its own so its status +# badge renders; a workflow_call-only workflow has no standalone runs to badge. +name: Tests & Lint on: workflow_call: @@ -14,6 +21,10 @@ on: result: description: 'Validation result (success/failure)' value: ${{ jobs.validate.outputs.result }} + push: + tags: + - 'v*' + workflow_dispatch: permissions: contents: read diff --git a/README.md b/README.md index 5175e44..79048d5 100644 --- a/README.md +++ b/README.md @@ -10,14 +10,19 @@

- Validate - E2E CodeQL OpenSSF Scorecard Latest release License: Apache 2.0

+ +

+ Tests & Lint + Integration (act + gitea) + Fleet E2E (live GitHub) +

+

The cascade mascot