diff --git a/.github/agentic-ops.yml b/.github/agentic-ops.yml new file mode 100644 index 0000000..52a9b46 --- /dev/null +++ b/.github/agentic-ops.yml @@ -0,0 +1,27 @@ +# Configuration for the agentic-ops bundle (audit + optimizer). +# +# By default the workflows audit ONLY the repository they run in. To audit +# AI-credit (AIC) spend across MULTIPLE repositories from one central repo, list +# them under `repos:` below (owner/repo, one per line, uncommented). The audit +# and optimizer then collect each repository's agentic-workflow logs via +# `gh aw logs --repo` and aggregate the results per repository and workflow. +# +# Leave this file out, or leave `repos:` empty, to keep the default single-repo +# behavior — the multi-repo feature is fully opt-in and backward compatible. +# +# Multi-repo collection reads each listed repository's GitHub Actions API, so it +# needs a token with `actions: read` on every listed repo. These workflows use +# gh-aw's standard GH_AW_GITHUB_TOKEN "magic" secret (set it to a classic PAT +# with `repo`, a fine-grained PAT with Actions read, or a GitHub App token); see +# https://github.github.com/gh-aw/reference/auth/. They fall back to the default +# GITHUB_TOKEN (current repo only) when it is unset. +# +# See the README section "Auditing multiple repositories". +repos: + # - owner/repo + # - owner/another-repo + +# Optional. The repository that develops the AIC monitoring family (this audit + +# optimizer). Its own monitoring workflows remain eligible for optimization +# there; in every other repo they are excluded. Defaults to githubnext/agentic-ops. +# source-repo: githubnext/agentic-ops diff --git a/.github/workflows/agentic-token-audit.lock.yml b/.github/workflows/agentic-token-audit.lock.yml index a2306b9..93697a4 100644 --- a/.github/workflows/agentic-token-audit.lock.yml +++ b/.github/workflows/agentic-token-audit.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"4a41c3edd8a3fb119d464ec53a09afac38215cacc41e4814d76d3f62887f0111","body_hash":"7d511f3097a945ab0d7e071cfe75231c45c678e9c4047d791d457aad1dcccbb5","compiler_version":"v0.79.6","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"baf7858359bb170f1ba5a354c161e18f21cd54962a064a167d5842ceb913e0ee","body_hash":"4f9e2c6809a379c8ed4ad68f46b955c4038adeafc4d752331993c96ba1c6bd4d","compiler_version":"v0.79.6","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/setup-python","sha":"a309ff8b426b58ec0e2a45f0f869d46889d02405","version":"v6.2.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"},{"repo":"github/gh-aw/actions/setup-cli","sha":"9c481b8bc46dce8b92fce8ffc51781e5c330d37c","version":"v0.79.6"}],"containers":[{"image":"alpine:latest","digest":"sha256:5b10f432ef3da1b8d4c7eb6c487f2f5a8f096bc91145e68878dd4a5019afde11","pinned_image":"alpine:latest@sha256:5b10f432ef3da1b8d4c7eb6c487f2f5a8f096bc91145e68878dd4a5019afde11"},{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.25","digest":"sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.25@sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"}]} # ___ _ _ # / _ \ | | (_) @@ -52,7 +52,7 @@ name: "Daily Agentic Workflow AIC Usage Audit" on: schedule: - - cron: "22 12 * * 1-5" + - cron: "12 12 * * 1-5" # Friendly format: daily around 12:00 on weekdays (scattered) workflow_dispatch: inputs: @@ -272,7 +272,6 @@ jobs: env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_ENGINE_ID: "copilot" - GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -441,9 +440,9 @@ jobs: run: | python3 -m pip install --quiet --target /tmp/gh-aw/token-audit/site-packages pandas matplotlib seaborn - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} name: Download agentic workflow logs - run: "set -euo pipefail\nmkdir -p /tmp/gh-aw/token-audit\nPARTS_DIR=/tmp/gh-aw/token-audit/log-parts\nmkdir -p \"$PARTS_DIR\"\n\n# Fetch logs per workflow to avoid repo-wide pagination truncation in\n# high-CI-volume repositories.\nFOUND_WORKFLOW=0\nfor workflow in .github/workflows/*.md; do\n [ -f \"$workflow\" ] || continue\n\n WORKFLOW_ID=$(sed -n 's/^tracker-id:[[:space:]]*//p' \"$workflow\" | head -n 1 | tr -d '\\r' | sed 's/[[:space:]]*$//')\n [ -n \"$WORKFLOW_ID\" ] || continue\n\n FOUND_WORKFLOW=1\n SAFE_WORKFLOW_ID=$(printf '%s' \"$WORKFLOW_ID\" | tr -cs 'A-Za-z0-9._-' '_')\n PART_FILE=\"$PARTS_DIR/$SAFE_WORKFLOW_ID.json\"\n PART_EXIT=0\n gh aw logs \"$WORKFLOW_ID\" \\\n --start-date -1d \\\n --json \\\n -c 100 \\\n > \"$PART_FILE\" || PART_EXIT=$?\n\n if ! jq -e . \"$PART_FILE\" >/dev/null 2>&1; then\n echo \"⚠️ $WORKFLOW_ID: invalid log JSON (exit code $PART_EXIT)\"\n rm -f \"$PART_FILE\"\n continue\n fi\n\n COUNT=$(jq '(.runs // []) | length' \"$PART_FILE\")\n if [ \"$COUNT\" -gt 0 ]; then\n echo \"✅ $WORKFLOW_ID: downloaded $COUNT runs (exit code $PART_EXIT)\"\n else\n echo \"⚠️ $WORKFLOW_ID: no log data (exit code $PART_EXIT)\"\n rm -f \"$PART_FILE\"\n fi\ndone\n\nif [ \"$FOUND_WORKFLOW\" -eq 1 ] && ls \"$PARTS_DIR\"/*.json >/dev/null 2>&1; then\n jq -s '\n (map(.runs // []) | add // [] | unique_by(.run_id)) as $runs |\n {\n summary: {\n total_runs: ($runs | length),\n total_tokens: ($runs | map(.token_usage // 0) | add // 0),\n total_aic: ($runs | map(.aic // 0) | add // 0)\n },\n runs: $runs\n }\n ' \"$PARTS_DIR\"/*.json > /tmp/gh-aw/token-audit/workflow-logs.json\n TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/workflow-logs.json)\n echo \"✅ Downloaded $TOTAL agentic workflow runs (last 24 hours)\"\nelse\n if [ \"$FOUND_WORKFLOW\" -eq 0 ]; then\n echo \"⚠️ No agentic workflow sources found under .github/workflows\"\n fi\n echo '{\"runs\":[],\"summary\":{}}' > /tmp/gh-aw/token-audit/workflow-logs.json\nfi\n" + run: "set -euo pipefail\nmkdir -p /tmp/gh-aw/token-audit\nPARTS_DIR=/tmp/gh-aw/token-audit/log-parts\nmkdir -p \"$PARTS_DIR\"\n\n# Determine which repositories to audit. By default this is just the\n# current repository (single-repo behavior, unchanged). When\n# `.github/agentic-ops.yml` lists `repos:`, audit each of them and\n# aggregate centrally. See the README \"Auditing multiple repositories\".\nCONFIG_FILE=\".github/agentic-ops.yml\"\nREPOS=()\nif [ -f \"$CONFIG_FILE\" ]; then\n while IFS= read -r repo_line; do\n if [ -n \"$repo_line\" ]; then REPOS+=(\"$repo_line\"); fi\n done < <(awk '/^repos:[[:space:]]*$/{f=1;next} /^[^[:space:]#]/{f=0} f' \"$CONFIG_FILE\" \\\n | sed 's/#.*$//' \\\n | grep -oE '[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+' || true)\nfi\nif [ \"${#REPOS[@]}\" -eq 0 ]; then\n REPOS=(\"${GITHUB_REPOSITORY:-}\")\nfi\necho \"🗂️ Auditing repositories:\"\nprintf ' - %s\\n' \"${REPOS[@]}\"\n\n# Fetch one workflow's logs, stamp each run with its source repository, and\n# keep the part only if it has runs. $1=repo, $2=workflow identifier; any\n# further args are passed through to `gh aw logs` (e.g. --repo for other repos).\nFOUND_WORKFLOW=0\ncollect_one() {\n local repo=\"$1\" wfid=\"$2\"; shift 2\n local safe_repo safe_id part exit_code count\n safe_repo=$(printf '%s' \"$repo\" | tr -cs 'A-Za-z0-9._-' '_')\n safe_id=$(printf '%s' \"$wfid\" | tr -cs 'A-Za-z0-9._-' '_')\n part=\"$PARTS_DIR/${safe_repo}__${safe_id}.json\"\n exit_code=0\n gh aw logs \"$wfid\" \"$@\" \\\n --start-date -1d \\\n --json \\\n -c 100 \\\n > \"$part\" || exit_code=$?\n if ! jq -e . \"$part\" >/dev/null 2>&1; then\n echo \"⚠️ $repo :: $wfid: invalid log JSON (exit code $exit_code)\"\n rm -f \"$part\"\n return 0\n fi\n # Stamp each run with its source repository for cross-repo aggregation.\n if jq --arg repo \"$repo\" '.runs = ((.runs // []) | map(.repository //= $repo))' \\\n \"$part\" > \"$part.tagged\"; then\n mv \"$part.tagged\" \"$part\"\n else\n rm -f \"$part.tagged\"\n fi\n count=$(jq '(.runs // []) | length' \"$part\")\n if [ \"$count\" -gt 0 ]; then\n echo \"✅ $repo :: $wfid: downloaded $count run(s) (exit code $exit_code)\"\n else\n echo \"⚠️ $repo :: $wfid: no log data (exit code $exit_code)\"\n rm -f \"$part\"\n fi\n}\n\n# Fetch logs per workflow (avoids repo-wide pagination truncation in busy\n# repos). For the current repo, resolve agentic workflows from the local\n# checkout by tracker-id — unchanged single-repo behavior. For any other\n# repo, resolve them by display name via the GitHub Actions API and pass\n# --repo, because `gh aw logs` resolves a remote workflow only by its name.\nfor repo in \"${REPOS[@]}\"; do\n [ -n \"$repo\" ] || continue\n if [ \"$repo\" = \"${GITHUB_REPOSITORY:-}\" ] && [ -d .github/workflows ]; then\n for wf in .github/workflows/*.md; do\n [ -f \"$wf\" ] || continue\n wfid=$(sed -n 's/^tracker-id:[[:space:]]*//p' \"$wf\" | head -n 1 | tr -d '\\r' | sed 's/[[:space:]]*$//')\n [ -n \"$wfid\" ] || continue\n FOUND_WORKFLOW=1\n collect_one \"$repo\" \"$wfid\"\n done\n else\n while IFS= read -r wfname; do\n [ -n \"$wfname\" ] || continue\n FOUND_WORKFLOW=1\n collect_one \"$repo\" \"$wfname\" --repo \"$repo\"\n done < <(gh api \"repos/$repo/actions/workflows?per_page=100\" \\\n --jq '.workflows[] | select(.path | endswith(\".lock.yml\")) | .name' 2>/dev/null || true)\n fi\ndone\n\nif [ \"$FOUND_WORKFLOW\" -eq 1 ] && ls \"$PARTS_DIR\"/*.json >/dev/null 2>&1; then\n jq -s '\n (map(.runs // []) | add // [] | unique_by([.repository, .run_id])) as $runs |\n {\n summary: {\n total_runs: ($runs | length),\n total_tokens: ($runs | map(.token_usage // 0) | add // 0),\n total_aic: ($runs | map(.aic // 0) | add // 0)\n },\n runs: $runs\n }\n ' \"$PARTS_DIR\"/*.json > /tmp/gh-aw/token-audit/workflow-logs.json\n TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/workflow-logs.json)\n echo \"✅ Downloaded $TOTAL agentic workflow runs (last 24 hours)\"\nelse\n if [ \"$FOUND_WORKFLOW\" -eq 0 ]; then\n echo \"⚠️ No agentic workflow sources found under .github/workflows\"\n fi\n echo '{\"runs\":[],\"summary\":{}}' > /tmp/gh-aw/token-audit/workflow-logs.json\nfi\n" # Repo memory git-based storage configuration from frontmatter processed below - name: Clone repo-memory branch (default) diff --git a/.github/workflows/agentic-token-audit.md b/.github/workflows/agentic-token-audit.md index 45c41e4..6712c18 100644 --- a/.github/workflows/agentic-token-audit.md +++ b/.github/workflows/agentic-token-audit.md @@ -46,50 +46,97 @@ steps: python3 -m pip install --quiet --target /tmp/gh-aw/token-audit/site-packages pandas matplotlib seaborn - name: Download agentic workflow logs env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} run: | set -euo pipefail mkdir -p /tmp/gh-aw/token-audit PARTS_DIR=/tmp/gh-aw/token-audit/log-parts mkdir -p "$PARTS_DIR" - # Fetch logs per workflow to avoid repo-wide pagination truncation in - # high-CI-volume repositories. - FOUND_WORKFLOW=0 - for workflow in .github/workflows/*.md; do - [ -f "$workflow" ] || continue - - WORKFLOW_ID=$(sed -n 's/^tracker-id:[[:space:]]*//p' "$workflow" | head -n 1 | tr -d '\r' | sed 's/[[:space:]]*$//') - [ -n "$WORKFLOW_ID" ] || continue + # Determine which repositories to audit. By default this is just the + # current repository (single-repo behavior, unchanged). When + # `.github/agentic-ops.yml` lists `repos:`, audit each of them and + # aggregate centrally. See the README "Auditing multiple repositories". + CONFIG_FILE=".github/agentic-ops.yml" + REPOS=() + if [ -f "$CONFIG_FILE" ]; then + while IFS= read -r repo_line; do + if [ -n "$repo_line" ]; then REPOS+=("$repo_line"); fi + done < <(awk '/^repos:[[:space:]]*$/{f=1;next} /^[^[:space:]#]/{f=0} f' "$CONFIG_FILE" \ + | sed 's/#.*$//' \ + | grep -oE '[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+' || true) + fi + if [ "${#REPOS[@]}" -eq 0 ]; then + REPOS=("${GITHUB_REPOSITORY:-}") + fi + echo "🗂️ Auditing repositories:" + printf ' - %s\n' "${REPOS[@]}" - FOUND_WORKFLOW=1 - SAFE_WORKFLOW_ID=$(printf '%s' "$WORKFLOW_ID" | tr -cs 'A-Za-z0-9._-' '_') - PART_FILE="$PARTS_DIR/$SAFE_WORKFLOW_ID.json" - PART_EXIT=0 - gh aw logs "$WORKFLOW_ID" \ + # Fetch one workflow's logs, stamp each run with its source repository, and + # keep the part only if it has runs. $1=repo, $2=workflow identifier; any + # further args are passed through to `gh aw logs` (e.g. --repo for other repos). + FOUND_WORKFLOW=0 + collect_one() { + local repo="$1" wfid="$2"; shift 2 + local safe_repo safe_id part exit_code count + safe_repo=$(printf '%s' "$repo" | tr -cs 'A-Za-z0-9._-' '_') + safe_id=$(printf '%s' "$wfid" | tr -cs 'A-Za-z0-9._-' '_') + part="$PARTS_DIR/${safe_repo}__${safe_id}.json" + exit_code=0 + gh aw logs "$wfid" "$@" \ --start-date -1d \ --json \ -c 100 \ - > "$PART_FILE" || PART_EXIT=$? - - if ! jq -e . "$PART_FILE" >/dev/null 2>&1; then - echo "⚠️ $WORKFLOW_ID: invalid log JSON (exit code $PART_EXIT)" - rm -f "$PART_FILE" - continue + > "$part" || exit_code=$? + if ! jq -e . "$part" >/dev/null 2>&1; then + echo "⚠️ $repo :: $wfid: invalid log JSON (exit code $exit_code)" + rm -f "$part" + return 0 fi - - COUNT=$(jq '(.runs // []) | length' "$PART_FILE") - if [ "$COUNT" -gt 0 ]; then - echo "✅ $WORKFLOW_ID: downloaded $COUNT runs (exit code $PART_EXIT)" + # Stamp each run with its source repository for cross-repo aggregation. + if jq --arg repo "$repo" '.runs = ((.runs // []) | map(.repository //= $repo))' \ + "$part" > "$part.tagged"; then + mv "$part.tagged" "$part" + else + rm -f "$part.tagged" + fi + count=$(jq '(.runs // []) | length' "$part") + if [ "$count" -gt 0 ]; then + echo "✅ $repo :: $wfid: downloaded $count run(s) (exit code $exit_code)" else - echo "⚠️ $WORKFLOW_ID: no log data (exit code $PART_EXIT)" - rm -f "$PART_FILE" + echo "⚠️ $repo :: $wfid: no log data (exit code $exit_code)" + rm -f "$part" + fi + } + + # Fetch logs per workflow (avoids repo-wide pagination truncation in busy + # repos). For the current repo, resolve agentic workflows from the local + # checkout by tracker-id — unchanged single-repo behavior. For any other + # repo, resolve them by display name via the GitHub Actions API and pass + # --repo, because `gh aw logs` resolves a remote workflow only by its name. + for repo in "${REPOS[@]}"; do + [ -n "$repo" ] || continue + if [ "$repo" = "${GITHUB_REPOSITORY:-}" ] && [ -d .github/workflows ]; then + for wf in .github/workflows/*.md; do + [ -f "$wf" ] || continue + wfid=$(sed -n 's/^tracker-id:[[:space:]]*//p' "$wf" | head -n 1 | tr -d '\r' | sed 's/[[:space:]]*$//') + [ -n "$wfid" ] || continue + FOUND_WORKFLOW=1 + collect_one "$repo" "$wfid" + done + else + while IFS= read -r wfname; do + [ -n "$wfname" ] || continue + FOUND_WORKFLOW=1 + collect_one "$repo" "$wfname" --repo "$repo" + done < <(gh api "repos/$repo/actions/workflows?per_page=100" \ + --jq '.workflows[] | select(.path | endswith(".lock.yml")) | .name' 2>/dev/null || true) fi done if [ "$FOUND_WORKFLOW" -eq 1 ] && ls "$PARTS_DIR"/*.json >/dev/null 2>&1; then jq -s ' - (map(.runs // []) | add // [] | unique_by(.run_id)) as $runs | + (map(.runs // []) | add // [] | unique_by([.repository, .run_id])) as $runs | { summary: { total_runs: ($runs | length), @@ -142,6 +189,7 @@ Each element of `.runs` is a `RunData` object with (among others): |---|---|---| | `workflow_name` | string | Human-readable name | | `workflow_path` | string | `.github/workflows/....lock.yml` | +| `repository` | string | `owner/repo` the run belongs to (used to group spend by repository when auditing multiple repos) | | `aic` | float | AI Credits (AIC) consumed (primary billing metric; 1 AIC = $0.01 USD) | | `token_usage` | int | Total tokens (`omitempty` — treat missing/null as 0) | | `effective_tokens` | int | Legacy normalized token metric (deprecated; use `aic` for billing) | @@ -167,8 +215,8 @@ Write a Python script to `/tmp/gh-aw/token-audit/process_audit.py` and run it. T 1. Load `/tmp/gh-aw/token-audit/workflow-logs.json` and extract `.runs`. 2. Filter to `status == "completed"` runs only. -3. Group by `workflow_name` and compute per-workflow aggregates: - - `run_count`, `total_ai_credits`, `avg_ai_credits`, `total_tokens`, `avg_tokens`, `total_turns`, `avg_turns`, `total_action_minutes`, `error_count`, `warning_count` +3. Group by `repository` + `workflow_name` (so identically named workflows in different repositories are never conflated) and compute per-workflow aggregates: + - `repo`, `run_count`, `total_ai_credits`, `avg_ai_credits`, `total_tokens`, `avg_tokens`, `total_turns`, `avg_turns`, `total_action_minutes`, `error_count`, `warning_count` 4. Compute an overall summary: total runs, total AI credits, total tokens, total action minutes. 5. Sort workflows descending by `total_ai_credits`. 6. Save the result to `/tmp/gh-aw/token-audit/audit_snapshot.json` with this shape: @@ -185,6 +233,7 @@ Write a Python script to `/tmp/gh-aw/token-audit/process_audit.py` and run it. T }, "workflows": [ { + "repo": "owner/repo", "workflow_name": "...", "run_count": N, "total_ai_credits": F, @@ -204,6 +253,8 @@ Write a Python script to `/tmp/gh-aw/token-audit/process_audit.py` and run it. T Handle null/missing `aic` and `token_usage` by treating them as 0. +When runs span more than one repository, also build a top-level `repos` array — one entry per repository with `repo`, `total_runs`, `total_ai_credits`, `total_tokens`, `total_action_minutes`, and `active_workflows` — so the issue can present a per-repository rollup. When all runs come from a single repository, omit `repos`. + ## Phase 2 — Persist Snapshot to Repo-Memory 1. Read the snapshot from `/tmp/gh-aw/token-audit/audit_snapshot.json`. @@ -249,7 +300,7 @@ Create an issue with these sections: - Use `###` for main sections and `####` for subsections inside the issue body. - Keep the executive summary and final observations visible without collapsible sections. - Put verbose tables or supporting detail inside `
...` blocks. -- If you cite specific workflow runs, format them as links like `[§12345](https://github.com/${{ github.repository }}/actions/runs/12345)` and include up to 3 under `**References:**`. +- If you cite specific workflow runs, link them using each run's own `url` field (e.g. `[§12345]()`) so links resolve correctly even when runs come from multiple repositories. Include up to 3 under `**References:**`. ### Report Template @@ -257,6 +308,7 @@ Create an issue with these sections: ### 📊 Executive Summary - **Period**: last 24 hours (YYYY-MM-DD to YYYY-MM-DD) +- **Repositories audited**: N (include this line only when auditing more than one) - **Total runs**: N - **Total AI credits**: N.NN AIC - **Total tokens**: N (formatted with commas) @@ -269,6 +321,16 @@ Create an issue with these sections: |---|---|---|---| | ... | ... | ... | ... | +When auditing more than one repository, add a leading **Repository** column to this table (and to the full per-workflow breakdown), and include the per-repository section below. + +### 🗂️ By Repository + +_Include this section only when auditing more than one repository, built from the snapshot `repos` array._ + +| Repository | Runs | Total AI Credits | Active Workflows | +|---|---|---|---| +| ... | ... | ... | ... | + ### 📈 Trends Embed chart images using uploaded asset URLs when available: diff --git a/.github/workflows/agentic-token-optimizer.lock.yml b/.github/workflows/agentic-token-optimizer.lock.yml index 18e457b..cfe3875 100644 --- a/.github/workflows/agentic-token-optimizer.lock.yml +++ b/.github/workflows/agentic-token-optimizer.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"a3339c9e0f7677eed814cb606a64ad59f73b76f804341b1d4ed4e91206ce5f5a","body_hash":"5c8703573ad87e25b6d9bcfab4ebaa03624d31343fdf7a245b4fce9fe52c94cc","compiler_version":"v0.79.6","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"87ebf6f3b5add0e4995036c6cc8cc83b88e22fc7debfaf8d3b6348f0f732f91d","body_hash":"122672ef1ea6d0815a29dae243b7c1bfc114de6031ba90ce4e8e9b1405133880","compiler_version":"v0.79.6","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"5c2fe865bb4dc46e1450f6ee0d0541d759aea73a","version":"v0.79.6"},{"repo":"github/gh-aw/actions/setup-cli","sha":"9c481b8bc46dce8b92fce8ffc51781e5c330d37c","version":"v0.79.6"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2","digest":"sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6","pinned_image":"ghcr.io/github/gh-aw-firewall/agent:0.27.2@sha256:f88e5b17b6b7a600117bc121114d6ce2155c88c983c0c939c5df884f730fa1d6"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2","digest":"sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4","pinned_image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.27.2@sha256:ee39841d980878ebbb87592903b06d31a1af500c71525c9616f7e8e2a27041a4"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.2","digest":"sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0","pinned_image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.27.2@sha256:02f3ec08f32dc26c5427920c6a2e2f3036238fce44802f2f11ef49ed8621b5d0"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2","digest":"sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591","pinned_image":"ghcr.io/github/gh-aw-firewall/squid:0.27.2@sha256:2e3a717e5f19a654cd9a2263beb52012b56bcb68562ec5ae2e42f9d156b49591"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.25","digest":"sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.25@sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"}]} # ___ _ _ # / _ \ | | (_) @@ -50,7 +50,7 @@ name: "Agentic Workflow AIC Usage Optimizer" on: schedule: - - cron: "22 14 * * 1-5" + - cron: "28 13 * * 1-5" # Friendly format: daily around 14:00 on weekdays (scattered) workflow_dispatch: inputs: @@ -427,11 +427,11 @@ jobs: env: GH_TOKEN: ${{ github.token }} - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} name: Download recent agentic workflow logs - run: "set -euo pipefail\nmkdir -p /tmp/gh-aw/token-audit\nPARTS_DIR=/tmp/gh-aw/token-audit/log-parts\nmkdir -p \"$PARTS_DIR\"\n\necho \"📥 Downloading agentic workflow logs (last 7 days)...\"\n\nFOUND_WORKFLOW=0\nfor workflow in .github/workflows/*.md; do\n [ -f \"$workflow\" ] || continue\n\n WORKFLOW_ID=$(sed -n 's/^tracker-id:[[:space:]]*//p' \"$workflow\" | head -n 1 | tr -d '\\r' | sed 's/[[:space:]]*$//')\n [ -n \"$WORKFLOW_ID\" ] || continue\n\n # Skip the AIC monitoring family in downstream repositories.\n # In the source repo (githubnext/agentic-ops) they remain valid targets;\n # in any other repo, optimization suggestions for them belong upstream.\n if [[ \"$GITHUB_REPOSITORY\" != \"githubnext/agentic-ops\" && \\\n (\"$WORKFLOW_ID\" == \"agentic-token-optimizer\" || \"$WORKFLOW_ID\" == \"agentic-token-audit\") ]]; then\n echo \"⏭️ Skipping $WORKFLOW_ID (AIC monitoring family — optimize in githubnext/agentic-ops, not here)\"\n continue\n fi\n\n FOUND_WORKFLOW=1\n SAFE_WORKFLOW_ID=$(printf '%s' \"$WORKFLOW_ID\" | tr -cs 'A-Za-z0-9._-' '_')\n PART_FILE=\"$PARTS_DIR/$SAFE_WORKFLOW_ID.json\"\n PART_EXIT=0\n gh aw logs \"$WORKFLOW_ID\" \\\n --start-date -7d \\\n --json \\\n -c 50 \\\n > \"$PART_FILE\" || PART_EXIT=$?\n\n if ! jq -e . \"$PART_FILE\" >/dev/null 2>&1; then\n echo \"⚠️ $WORKFLOW_ID: invalid log JSON (exit code $PART_EXIT)\"\n rm -f \"$PART_FILE\"\n continue\n fi\n\n COUNT=$(jq '(.runs // []) | length' \"$PART_FILE\")\n if [ \"$COUNT\" -gt 0 ]; then\n echo \"✅ $WORKFLOW_ID: downloaded $COUNT runs (exit code $PART_EXIT)\"\n else\n echo \"⚠️ $WORKFLOW_ID: no log data (exit code $PART_EXIT)\"\n rm -f \"$PART_FILE\"\n fi\ndone\n\nif [ \"$FOUND_WORKFLOW\" -eq 1 ] && ls \"$PARTS_DIR\"/*.json >/dev/null 2>&1; then\n jq -s '\n (map(.runs // []) | add // [] | unique_by(.run_id)) as $runs |\n {\n summary: {\n total_runs: ($runs | length),\n total_tokens: ($runs | map(.token_usage // 0) | add // 0),\n total_aic: ($runs | map(.aic // 0) | add // 0)\n },\n runs: $runs\n }\n ' \"$PARTS_DIR\"/*.json > /tmp/gh-aw/token-audit/all-runs.json\n TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/all-runs.json)\n echo \"✅ Downloaded $TOTAL agentic workflow runs (last 7 days)\"\nelse\n if [ \"$FOUND_WORKFLOW\" -eq 0 ]; then\n echo \"⚠️ No agentic workflow sources found under .github/workflows\"\n fi\n echo '{\"runs\":[],\"summary\":{}}' > /tmp/gh-aw/token-audit/all-runs.json\nfi\n\nBEFORE_COUNT=$(jq '(.runs // []) | length' /tmp/gh-aw/token-audit/all-runs.json)\nif [[ \"$GITHUB_REPOSITORY\" != \"githubnext/agentic-ops\" ]]; then\n jq '\n (.runs // [])\n | map(select(\n (.workflow_path // \"\") != \".github/workflows/agentic-token-optimizer.lock.yml\"\n and (.workflow_path // \"\") != \".github/workflows/agentic-token-audit.lock.yml\"\n and (.workflow_name // \"\") != \"Agentic Workflow AIC Usage Optimizer\"\n and (.workflow_name // \"\") != \"Daily Agentic Workflow AIC Usage Audit\"\n )) as $runs\n | {\n summary: {\n total_runs: ($runs | length),\n total_tokens: ($runs | map(.token_usage // 0) | add // 0),\n total_aic: ($runs | map(.aic // 0) | add // 0)\n },\n runs: $runs\n }\n ' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/all-runs.filtered.json\n mv /tmp/gh-aw/token-audit/all-runs.filtered.json /tmp/gh-aw/token-audit/all-runs.json\n AFTER_COUNT=$(jq '(.runs // []) | length' /tmp/gh-aw/token-audit/all-runs.json)\n echo \"🚫 Excluded AIC monitoring family from candidate pool: $((BEFORE_COUNT - AFTER_COUNT)) run(s) removed\"\nelse\n echo \"ℹ️ Running in source repo — AIC monitoring family remains in candidate pool\"\n AFTER_COUNT=$BEFORE_COUNT\nfi\n" + run: "set -euo pipefail\nmkdir -p /tmp/gh-aw/token-audit\nPARTS_DIR=/tmp/gh-aw/token-audit/log-parts\nmkdir -p \"$PARTS_DIR\"\n\necho \"📥 Downloading agentic workflow logs (last 7 days)...\"\n\n# Determine which repositories to scan. By default this is just the\n# current repository (single-repo behavior, unchanged). When\n# `.github/agentic-ops.yml` lists `repos:`, scan each of them and\n# aggregate centrally. See the README \"Auditing multiple repositories\".\nCONFIG_FILE=\".github/agentic-ops.yml\"\nREPOS=()\nif [ -f \"$CONFIG_FILE\" ]; then\n while IFS= read -r repo_line; do\n if [ -n \"$repo_line\" ]; then REPOS+=(\"$repo_line\"); fi\n done < <(awk '/^repos:[[:space:]]*$/{f=1;next} /^[^[:space:]#]/{f=0} f' \"$CONFIG_FILE\" \\\n | sed 's/#.*$//' \\\n | grep -oE '[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+' || true)\nfi\nif [ \"${#REPOS[@]}\" -eq 0 ]; then\n REPOS=(\"${GITHUB_REPOSITORY:-}\")\nfi\n\n# The repository that develops the AIC monitoring family (audit + optimizer).\n# In that repo the family workflows are valid optimization targets; in any\n# other repo they are excluded (optimize them in the source repo, not here).\nSOURCE_REPO=\"\"\nif [ -f \"$CONFIG_FILE\" ]; then\n SOURCE_REPO=$(sed 's/#.*$//' \"$CONFIG_FILE\" | grep -E '^source-repo:' \\\n | grep -oE '[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+' | head -n 1 || true)\nfi\nif [ -z \"$SOURCE_REPO\" ]; then\n SOURCE_REPO=\"githubnext/agentic-ops\"\nfi\necho \"🗂️ Scanning repositories:\"\nprintf ' - %s\\n' \"${REPOS[@]}\"\necho \"ℹ️ AIC monitoring family source repo: $SOURCE_REPO\"\n\n# Fetch one workflow's logs, stamp each run with its source repository, and\n# keep the part only if it has runs. $1=repo, $2=workflow identifier; any\n# further args are passed through to `gh aw logs` (e.g. --repo for other repos).\nFOUND_WORKFLOW=0\ncollect_one() {\n local repo=\"$1\" wfid=\"$2\"; shift 2\n local safe_repo safe_id part exit_code count\n safe_repo=$(printf '%s' \"$repo\" | tr -cs 'A-Za-z0-9._-' '_')\n safe_id=$(printf '%s' \"$wfid\" | tr -cs 'A-Za-z0-9._-' '_')\n part=\"$PARTS_DIR/${safe_repo}__${safe_id}.json\"\n exit_code=0\n gh aw logs \"$wfid\" \"$@\" \\\n --start-date -7d \\\n --json \\\n -c 50 \\\n > \"$part\" || exit_code=$?\n if ! jq -e . \"$part\" >/dev/null 2>&1; then\n echo \"⚠️ $repo :: $wfid: invalid log JSON (exit code $exit_code)\"\n rm -f \"$part\"\n return 0\n fi\n # Stamp each run with its source repository for cross-repo aggregation.\n if jq --arg repo \"$repo\" '.runs = ((.runs // []) | map(.repository //= $repo))' \\\n \"$part\" > \"$part.tagged\"; then\n mv \"$part.tagged\" \"$part\"\n else\n rm -f \"$part.tagged\"\n fi\n count=$(jq '(.runs // []) | length' \"$part\")\n if [ \"$count\" -gt 0 ]; then\n echo \"✅ $repo :: $wfid: downloaded $count run(s) (exit code $exit_code)\"\n else\n echo \"⚠️ $repo :: $wfid: no log data (exit code $exit_code)\"\n rm -f \"$part\"\n fi\n}\n\n# Fetch logs per workflow (avoids repo-wide pagination truncation in busy\n# repos). For the current repo, resolve agentic workflows from the local\n# checkout by tracker-id — unchanged single-repo behavior. For any other\n# repo, resolve them by display name via the GitHub Actions API and pass\n# --repo, because `gh aw logs` resolves a remote workflow only by its name.\n# The AIC monitoring family is skipped everywhere except the configured\n# source repo; the post-merge filter below is the safety net.\nfor repo in \"${REPOS[@]}\"; do\n [ -n \"$repo\" ] || continue\n if [ \"$repo\" = \"${GITHUB_REPOSITORY:-}\" ] && [ -d .github/workflows ]; then\n for wf in .github/workflows/*.md; do\n [ -f \"$wf\" ] || continue\n wfid=$(sed -n 's/^tracker-id:[[:space:]]*//p' \"$wf\" | head -n 1 | tr -d '\\r' | sed 's/[[:space:]]*$//')\n [ -n \"$wfid\" ] || continue\n if [ \"$repo\" != \"$SOURCE_REPO\" ] && { [ \"$wfid\" = \"agentic-token-optimizer\" ] || [ \"$wfid\" = \"agentic-token-audit\" ]; }; then\n echo \"⏭️ Skipping $repo :: $wfid (AIC monitoring family — optimize in $SOURCE_REPO)\"\n continue\n fi\n FOUND_WORKFLOW=1\n collect_one \"$repo\" \"$wfid\"\n done\n else\n while IFS= read -r wfname; do\n [ -n \"$wfname\" ] || continue\n if [ \"$repo\" != \"$SOURCE_REPO\" ] && { [ \"$wfname\" = \"Agentic Workflow AIC Usage Optimizer\" ] || [ \"$wfname\" = \"Daily Agentic Workflow AIC Usage Audit\" ]; }; then\n echo \"⏭️ Skipping $repo :: $wfname (AIC monitoring family — optimize in $SOURCE_REPO)\"\n continue\n fi\n FOUND_WORKFLOW=1\n collect_one \"$repo\" \"$wfname\" --repo \"$repo\"\n done < <(gh api \"repos/$repo/actions/workflows?per_page=100\" \\\n --jq '.workflows[] | select(.path | endswith(\".lock.yml\")) | .name' 2>/dev/null || true)\n fi\ndone\n\nif [ \"$FOUND_WORKFLOW\" -eq 1 ] && ls \"$PARTS_DIR\"/*.json >/dev/null 2>&1; then\n jq -s '\n (map(.runs // []) | add // [] | unique_by([.repository, .run_id])) as $runs |\n {\n summary: {\n total_runs: ($runs | length),\n total_tokens: ($runs | map(.token_usage // 0) | add // 0),\n total_aic: ($runs | map(.aic // 0) | add // 0)\n },\n runs: $runs\n }\n ' \"$PARTS_DIR\"/*.json > /tmp/gh-aw/token-audit/all-runs.json\n TOTAL=$(jq '.runs | length' /tmp/gh-aw/token-audit/all-runs.json)\n echo \"✅ Downloaded $TOTAL agentic workflow runs (last 7 days)\"\nelse\n if [ \"$FOUND_WORKFLOW\" -eq 0 ]; then\n echo \"⚠️ No agentic workflow sources found under .github/workflows\"\n fi\n echo '{\"runs\":[],\"summary\":{}}' > /tmp/gh-aw/token-audit/all-runs.json\nfi\n\n# Defensively drop any monitoring-family runs that slipped through from\n# repositories other than the source repo (e.g. a stale window or a renamed\n# workflow). Family runs from the source repo are retained as valid targets.\nBEFORE_COUNT=$(jq '(.runs // []) | length' /tmp/gh-aw/token-audit/all-runs.json)\njq --arg src \"$SOURCE_REPO\" '\n (.runs // [])\n | map(select(\n ((.repository // \"\") == $src)\n or (\n (.workflow_path // \"\") != \".github/workflows/agentic-token-optimizer.lock.yml\"\n and (.workflow_path // \"\") != \".github/workflows/agentic-token-audit.lock.yml\"\n and (.workflow_name // \"\") != \"Agentic Workflow AIC Usage Optimizer\"\n and (.workflow_name // \"\") != \"Daily Agentic Workflow AIC Usage Audit\"\n )\n )) as $runs\n | {\n summary: {\n total_runs: ($runs | length),\n total_tokens: ($runs | map(.token_usage // 0) | add // 0),\n total_aic: ($runs | map(.aic // 0) | add // 0)\n },\n runs: $runs\n }\n' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/all-runs.filtered.json\nmv /tmp/gh-aw/token-audit/all-runs.filtered.json /tmp/gh-aw/token-audit/all-runs.json\nAFTER_COUNT=$(jq '(.runs // []) | length' /tmp/gh-aw/token-audit/all-runs.json)\nif [ \"$BEFORE_COUNT\" -ne \"$AFTER_COUNT\" ]; then\n echo \"🚫 Excluded AIC monitoring family from candidate pool: $((BEFORE_COUNT - AFTER_COUNT)) run(s) removed\"\nfi\n" - name: Aggregate top workflows by AIC usage - run: "set -euo pipefail\nmkdir -p /tmp/gh-aw/token-audit\n\njq '{\n generated_at: (now | todateiso8601),\n window_days: 7,\n top_workflows: (\n [.runs[]\n | select(.status == \"completed\")\n | select((.aic // 0) > 0)\n | {\n workflow_name: .workflow_name,\n ai_credits: (.aic // 0),\n tokens: (.token_usage // 0),\n turns: (.turns // 0),\n action_minutes: (.action_minutes // 0)\n }\n ]\n | group_by(.workflow_name)\n | map({\n workflow_name: .[0].workflow_name,\n run_count: length,\n total_ai_credits: (map(.ai_credits) | add),\n avg_ai_credits: ((map(.ai_credits) | add) / length),\n total_tokens: (map(.tokens) | add),\n avg_tokens: ((map(.tokens) | add) / length),\n total_turns: (map(.turns) | add),\n total_action_minutes: (map(.action_minutes) | add)\n })\n | sort_by(.total_ai_credits)\n | reverse\n | .[:10]\n )\n}' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/top-workflows.json\n\necho \"✅ Generated top workflow summary at /tmp/gh-aw/token-audit/top-workflows.json\"\njq '.top_workflows' /tmp/gh-aw/token-audit/top-workflows.json\n" + run: "set -euo pipefail\nmkdir -p /tmp/gh-aw/token-audit\n\njq '{\n generated_at: (now | todateiso8601),\n window_days: 7,\n top_workflows: (\n [.runs[]\n | select(.status == \"completed\")\n | select((.aic // 0) > 0)\n | {\n repo: (.repository // \"\"),\n workflow_name: .workflow_name,\n workflow_path: (.workflow_path // \"\"),\n ai_credits: (.aic // 0),\n tokens: (.token_usage // 0),\n turns: (.turns // 0),\n action_minutes: (.action_minutes // 0)\n }\n ]\n | group_by([.repo, .workflow_name])\n | map({\n repo: .[0].repo,\n workflow_name: .[0].workflow_name,\n workflow_path: (map(.workflow_path) | map(select(. != \"\")) | (.[0] // \"\")),\n run_count: length,\n total_ai_credits: (map(.ai_credits) | add),\n avg_ai_credits: ((map(.ai_credits) | add) / length),\n total_tokens: (map(.tokens) | add),\n avg_tokens: ((map(.tokens) | add) / length),\n total_turns: (map(.turns) | add),\n total_action_minutes: (map(.action_minutes) | add)\n })\n | sort_by(.total_ai_credits)\n | reverse\n | .[:10]\n )\n}' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/top-workflows.json\n\necho \"✅ Generated top workflow summary at /tmp/gh-aw/token-audit/top-workflows.json\"\njq '.top_workflows' /tmp/gh-aw/token-audit/top-workflows.json\n" - name: Load optimization history run: "set -euo pipefail\n\nOPT_LOG=\"/tmp/gh-aw/repo-memory/default/optimization-log.json\"\nif [ -f \"$OPT_LOG\" ]; then\n echo \"✅ Previous optimizations:\"\n jq -r '.[] | \"\\(.date): \\(.workflow_name)\"' \"$OPT_LOG\"\nelse\n echo \"ℹ️ No previous optimization history found.\"\nfi\n" diff --git a/.github/workflows/agentic-token-optimizer.md b/.github/workflows/agentic-token-optimizer.md index 02b1cbe..165586d 100644 --- a/.github/workflows/agentic-token-optimizer.md +++ b/.github/workflows/agentic-token-optimizer.md @@ -33,7 +33,7 @@ timeout-minutes: 30 steps: - name: Download recent agentic workflow logs env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} run: | set -euo pipefail mkdir -p /tmp/gh-aw/token-audit @@ -42,50 +42,113 @@ steps: echo "📥 Downloading agentic workflow logs (last 7 days)..." - FOUND_WORKFLOW=0 - for workflow in .github/workflows/*.md; do - [ -f "$workflow" ] || continue - - WORKFLOW_ID=$(sed -n 's/^tracker-id:[[:space:]]*//p' "$workflow" | head -n 1 | tr -d '\r' | sed 's/[[:space:]]*$//') - [ -n "$WORKFLOW_ID" ] || continue - - # Skip the AIC monitoring family in downstream repositories. - # In the source repo (githubnext/agentic-ops) they remain valid targets; - # in any other repo, optimization suggestions for them belong upstream. - if [[ "$GITHUB_REPOSITORY" != "githubnext/agentic-ops" && \ - ("$WORKFLOW_ID" == "agentic-token-optimizer" || "$WORKFLOW_ID" == "agentic-token-audit") ]]; then - echo "⏭️ Skipping $WORKFLOW_ID (AIC monitoring family — optimize in githubnext/agentic-ops, not here)" - continue - fi + # Determine which repositories to scan. By default this is just the + # current repository (single-repo behavior, unchanged). When + # `.github/agentic-ops.yml` lists `repos:`, scan each of them and + # aggregate centrally. See the README "Auditing multiple repositories". + CONFIG_FILE=".github/agentic-ops.yml" + REPOS=() + if [ -f "$CONFIG_FILE" ]; then + while IFS= read -r repo_line; do + if [ -n "$repo_line" ]; then REPOS+=("$repo_line"); fi + done < <(awk '/^repos:[[:space:]]*$/{f=1;next} /^[^[:space:]#]/{f=0} f' "$CONFIG_FILE" \ + | sed 's/#.*$//' \ + | grep -oE '[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+' || true) + fi + if [ "${#REPOS[@]}" -eq 0 ]; then + REPOS=("${GITHUB_REPOSITORY:-}") + fi - FOUND_WORKFLOW=1 - SAFE_WORKFLOW_ID=$(printf '%s' "$WORKFLOW_ID" | tr -cs 'A-Za-z0-9._-' '_') - PART_FILE="$PARTS_DIR/$SAFE_WORKFLOW_ID.json" - PART_EXIT=0 - gh aw logs "$WORKFLOW_ID" \ + # The repository that develops the AIC monitoring family (audit + optimizer). + # In that repo the family workflows are valid optimization targets; in any + # other repo they are excluded (optimize them in the source repo, not here). + SOURCE_REPO="" + if [ -f "$CONFIG_FILE" ]; then + SOURCE_REPO=$(sed 's/#.*$//' "$CONFIG_FILE" | grep -E '^source-repo:' \ + | grep -oE '[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+' | head -n 1 || true) + fi + if [ -z "$SOURCE_REPO" ]; then + SOURCE_REPO="githubnext/agentic-ops" + fi + echo "🗂️ Scanning repositories:" + printf ' - %s\n' "${REPOS[@]}" + echo "ℹ️ AIC monitoring family source repo: $SOURCE_REPO" + + # Fetch one workflow's logs, stamp each run with its source repository, and + # keep the part only if it has runs. $1=repo, $2=workflow identifier; any + # further args are passed through to `gh aw logs` (e.g. --repo for other repos). + FOUND_WORKFLOW=0 + collect_one() { + local repo="$1" wfid="$2"; shift 2 + local safe_repo safe_id part exit_code count + safe_repo=$(printf '%s' "$repo" | tr -cs 'A-Za-z0-9._-' '_') + safe_id=$(printf '%s' "$wfid" | tr -cs 'A-Za-z0-9._-' '_') + part="$PARTS_DIR/${safe_repo}__${safe_id}.json" + exit_code=0 + gh aw logs "$wfid" "$@" \ --start-date -7d \ --json \ -c 50 \ - > "$PART_FILE" || PART_EXIT=$? - - if ! jq -e . "$PART_FILE" >/dev/null 2>&1; then - echo "⚠️ $WORKFLOW_ID: invalid log JSON (exit code $PART_EXIT)" - rm -f "$PART_FILE" - continue + > "$part" || exit_code=$? + if ! jq -e . "$part" >/dev/null 2>&1; then + echo "⚠️ $repo :: $wfid: invalid log JSON (exit code $exit_code)" + rm -f "$part" + return 0 fi - - COUNT=$(jq '(.runs // []) | length' "$PART_FILE") - if [ "$COUNT" -gt 0 ]; then - echo "✅ $WORKFLOW_ID: downloaded $COUNT runs (exit code $PART_EXIT)" + # Stamp each run with its source repository for cross-repo aggregation. + if jq --arg repo "$repo" '.runs = ((.runs // []) | map(.repository //= $repo))' \ + "$part" > "$part.tagged"; then + mv "$part.tagged" "$part" + else + rm -f "$part.tagged" + fi + count=$(jq '(.runs // []) | length' "$part") + if [ "$count" -gt 0 ]; then + echo "✅ $repo :: $wfid: downloaded $count run(s) (exit code $exit_code)" + else + echo "⚠️ $repo :: $wfid: no log data (exit code $exit_code)" + rm -f "$part" + fi + } + + # Fetch logs per workflow (avoids repo-wide pagination truncation in busy + # repos). For the current repo, resolve agentic workflows from the local + # checkout by tracker-id — unchanged single-repo behavior. For any other + # repo, resolve them by display name via the GitHub Actions API and pass + # --repo, because `gh aw logs` resolves a remote workflow only by its name. + # The AIC monitoring family is skipped everywhere except the configured + # source repo; the post-merge filter below is the safety net. + for repo in "${REPOS[@]}"; do + [ -n "$repo" ] || continue + if [ "$repo" = "${GITHUB_REPOSITORY:-}" ] && [ -d .github/workflows ]; then + for wf in .github/workflows/*.md; do + [ -f "$wf" ] || continue + wfid=$(sed -n 's/^tracker-id:[[:space:]]*//p' "$wf" | head -n 1 | tr -d '\r' | sed 's/[[:space:]]*$//') + [ -n "$wfid" ] || continue + if [ "$repo" != "$SOURCE_REPO" ] && { [ "$wfid" = "agentic-token-optimizer" ] || [ "$wfid" = "agentic-token-audit" ]; }; then + echo "⏭️ Skipping $repo :: $wfid (AIC monitoring family — optimize in $SOURCE_REPO)" + continue + fi + FOUND_WORKFLOW=1 + collect_one "$repo" "$wfid" + done else - echo "⚠️ $WORKFLOW_ID: no log data (exit code $PART_EXIT)" - rm -f "$PART_FILE" + while IFS= read -r wfname; do + [ -n "$wfname" ] || continue + if [ "$repo" != "$SOURCE_REPO" ] && { [ "$wfname" = "Agentic Workflow AIC Usage Optimizer" ] || [ "$wfname" = "Daily Agentic Workflow AIC Usage Audit" ]; }; then + echo "⏭️ Skipping $repo :: $wfname (AIC monitoring family — optimize in $SOURCE_REPO)" + continue + fi + FOUND_WORKFLOW=1 + collect_one "$repo" "$wfname" --repo "$repo" + done < <(gh api "repos/$repo/actions/workflows?per_page=100" \ + --jq '.workflows[] | select(.path | endswith(".lock.yml")) | .name' 2>/dev/null || true) fi done if [ "$FOUND_WORKFLOW" -eq 1 ] && ls "$PARTS_DIR"/*.json >/dev/null 2>&1; then jq -s ' - (map(.runs // []) | add // [] | unique_by(.run_id)) as $runs | + (map(.runs // []) | add // [] | unique_by([.repository, .run_id])) as $runs | { summary: { total_runs: ($runs | length), @@ -104,31 +167,34 @@ steps: echo '{"runs":[],"summary":{}}' > /tmp/gh-aw/token-audit/all-runs.json fi + # Defensively drop any monitoring-family runs that slipped through from + # repositories other than the source repo (e.g. a stale window or a renamed + # workflow). Family runs from the source repo are retained as valid targets. BEFORE_COUNT=$(jq '(.runs // []) | length' /tmp/gh-aw/token-audit/all-runs.json) - if [[ "$GITHUB_REPOSITORY" != "githubnext/agentic-ops" ]]; then - jq ' - (.runs // []) - | map(select( + jq --arg src "$SOURCE_REPO" ' + (.runs // []) + | map(select( + ((.repository // "") == $src) + or ( (.workflow_path // "") != ".github/workflows/agentic-token-optimizer.lock.yml" and (.workflow_path // "") != ".github/workflows/agentic-token-audit.lock.yml" and (.workflow_name // "") != "Agentic Workflow AIC Usage Optimizer" and (.workflow_name // "") != "Daily Agentic Workflow AIC Usage Audit" - )) as $runs - | { - summary: { - total_runs: ($runs | length), - total_tokens: ($runs | map(.token_usage // 0) | add // 0), - total_aic: ($runs | map(.aic // 0) | add // 0) - }, - runs: $runs - } - ' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/all-runs.filtered.json - mv /tmp/gh-aw/token-audit/all-runs.filtered.json /tmp/gh-aw/token-audit/all-runs.json - AFTER_COUNT=$(jq '(.runs // []) | length' /tmp/gh-aw/token-audit/all-runs.json) + ) + )) as $runs + | { + summary: { + total_runs: ($runs | length), + total_tokens: ($runs | map(.token_usage // 0) | add // 0), + total_aic: ($runs | map(.aic // 0) | add // 0) + }, + runs: $runs + } + ' /tmp/gh-aw/token-audit/all-runs.json > /tmp/gh-aw/token-audit/all-runs.filtered.json + mv /tmp/gh-aw/token-audit/all-runs.filtered.json /tmp/gh-aw/token-audit/all-runs.json + AFTER_COUNT=$(jq '(.runs // []) | length' /tmp/gh-aw/token-audit/all-runs.json) + if [ "$BEFORE_COUNT" -ne "$AFTER_COUNT" ]; then echo "🚫 Excluded AIC monitoring family from candidate pool: $((BEFORE_COUNT - AFTER_COUNT)) run(s) removed" - else - echo "ℹ️ Running in source repo — AIC monitoring family remains in candidate pool" - AFTER_COUNT=$BEFORE_COUNT fi - name: Aggregate top workflows by AIC usage @@ -144,16 +210,20 @@ steps: | select(.status == "completed") | select((.aic // 0) > 0) | { + repo: (.repository // ""), workflow_name: .workflow_name, + workflow_path: (.workflow_path // ""), ai_credits: (.aic // 0), tokens: (.token_usage // 0), turns: (.turns // 0), action_minutes: (.action_minutes // 0) } ] - | group_by(.workflow_name) + | group_by([.repo, .workflow_name]) | map({ + repo: .[0].repo, workflow_name: .[0].workflow_name, + workflow_path: (map(.workflow_path) | map(select(. != "")) | (.[0] // "")), run_count: length, total_ai_credits: (map(.ai_credits) | add), avg_ai_credits: ((map(.ai_credits) | add) / length), @@ -235,9 +305,9 @@ Treat missing numeric fields (`aic`, `token_usage`, `turns`, `action_minutes`) a - Start from `top-workflows.json`. - Exclude workflows optimized in the last 14 days (use `optimization-log.json`). -- Exclude the AIC monitoring family — the `agentic-token-optimizer` and `agentic-token-audit` workflows (display names "Agentic Workflow AIC Usage Optimizer" and "Daily Agentic Workflow AIC Usage Audit") — **unless this workflow is running in `githubnext/agentic-ops`** (the source repository that ships them). In downstream repositories these workflows are not valid optimization targets; any optimization suggestions for them belong in `githubnext/agentic-ops`. In downstream repos they are pre-filtered from `all-runs.json` and `top-workflows.json`, but never select them even if a stale snapshot still lists them. -- Choose the highest AI-credit-spend workflow that remains. -- If no snapshot/history exists, derive candidates directly from `all-runs.json`. +- Exclude the AIC monitoring family — the `agentic-token-optimizer` and `agentic-token-audit` workflows (display names "Agentic Workflow AIC Usage Optimizer" and "Daily Agentic Workflow AIC Usage Audit") — for every repository **except the configured `source-repo`** (the repository that ships them; defaults to `githubnext/agentic-ops`). In other repositories these workflows are not valid optimization targets; suggestions for them belong in the source repo. They are pre-filtered from `all-runs.json` and `top-workflows.json`, but never select them even if a stale snapshot still lists them. +- Choose the highest AI-credit-spend workflow that remains. Candidates may span multiple repositories — each entry in `top-workflows.json` carries a `repo` field identifying where the workflow lives; preserve it through your analysis and reference it in the issue. +- If no snapshot/history exists, derive candidates directly from `all-runs.json` (grouping by `repository` + `workflow_name`). Then collect run-level data for the selected workflow: @@ -281,8 +351,12 @@ Rules: Use `gh api` with `--jq` (via cli-proxy) to read the target workflow `.md` source. Extract only the sections you need — do not load the whole file if a targeted slice is sufficient. ```bash -REPO="${{ github.repository }}" -WF_PATH=".github/workflows/.md" +# Read the SELECTED target workflow's source from its OWN repository. `repo` and +# `workflow_path` come from the chosen entry in top-workflows.json; the .md source +# is the .lock.yml path with the extension swapped. For single-repo audits this is +# simply the current repository. +REPO="" +WF_PATH="" # Read the full source only when necessary gh api "repos/$REPO/contents/$WF_PATH" --jq '.content' | base64 -d @@ -355,7 +429,7 @@ Recommend at most 3 inline sub-agents, and only when the combined opportunity is Create one issue with: -- **Target workflow + reason selected** +- **Target workflow (and its repository) + reason selected** - **Analysis period + runs analyzed** - **Spend profile table** (total AIC, avg AIC/run, total tokens, avg turns/run, cache efficiency) - **Ranked recommendations** with: @@ -371,7 +445,7 @@ Create one issue with: - Use `###` for main sections and `####` for subsections. - Keep the selected workflow, token profile summary, and ranked recommendations visible without collapsible sections. - Use `
...` blocks for long supporting tables, raw run evidence, and lower-priority context. -- If you cite specific workflow runs, format them as links like `[§12345](https://github.com/${{ github.repository }}/actions/runs/12345)` and include up to 3 under `**References:**`. +- If you cite specific workflow runs, link them using each run's own `url` field (e.g. `[§12345]()`) so links resolve correctly even when runs come from multiple repositories. Include up to 3 under `**References:**`. - If you recommend inline sub-agents, include each candidate's task, why a smaller model fits, score breakdown, and the exact invocation change you want made in the main prompt. ## Phase 6 — Update Optimization Log @@ -391,3 +465,4 @@ Load the existing array if present, append, keep only the last 30 entries, and s - Do not modify audit snapshots; only update `optimization-log.json`. - If the target workflow already has inline sub-agents, do not recommend adding more unless there is a clearly separate, still-extractive task. - If no structural optimization is warranted, omit that section rather than padding the issue. +- If the selected workflow lives in another repository and its source cannot be read, continue with run-metrics-based analysis and note the limited source visibility in the issue rather than failing. diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ee0c10..857a1f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,10 @@ Changelog entries should describe what changed for users of these workflows, not ## [Unreleased] +### Added + +- Optional multi-repository auditing. Add a `.github/agentic-ops.yml` config with a `repos:` list to audit and optimize AI-credit spend across several repositories from one central repository; the audit and optimizer collect each repository's logs via `gh aw logs --repo` and aggregate the report by repository and workflow. Multi-repo collection uses gh-aw's standard `GH_AW_GITHUB_TOKEN` secret (set it to a token with `actions: read` on the listed repositories); the workflows fall back to `GITHUB_TOKEN` (current repository only) when it is unset. An optional `source-repo` key controls which repository keeps the monitoring workflows as optimization targets. With no config file the workflows behave exactly as before, auditing only the repository they run in. + ## [0.3.3] - 2026-06-14 ### Fixed diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7d30d37 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 GitHub, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 37ad91b..d24eec4 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,31 @@ Included workflows: | [`Daily Agentic Workflow AIC Usage Audit`](https://github.com/githubnext/agentic-ops/blob/main/.github/workflows/agentic-token-audit.md?plain=1) | Collects recent agentic workflow usage and creates a daily AIC spend snapshot. | | [`Agentic Workflow AIC Usage Optimizer`](https://github.com/githubnext/agentic-ops/blob/main/.github/workflows/agentic-token-optimizer.md?plain=1) | Analyzes high-AIC workflows and proposes conservative efficiency changes, including inline sub-agent opportunities when they are a strong fit. | +## Auditing multiple repositories + +By default each workflow audits only the repository it runs in. To monitor AI-credit spend across **many repositories from one central repository**, add a `.github/agentic-ops.yml` config to the repo where the workflows run: + +```yaml +repos: + - your-org/repo-a + - your-org/repo-b + - your-org/repo-c +``` + +With `repos` set, the audit and optimizer collect each listed repository's agentic-workflow logs via `gh aw logs --repo` and aggregate them into a single report broken down by repository and workflow. Leave the file out (or leave `repos` empty) to keep the default single-repo behavior — the feature is fully opt-in and backward compatible. + +Multi-repo collection reads each listed repository's GitHub Actions API, so it needs a token with **`actions: read` on every listed repo** (the default `GITHUB_TOKEN` only covers the current repository). These workflows use gh-aw's standard [`GH_AW_GITHUB_TOKEN`](https://github.github.com/gh-aw/reference/auth/) "magic" secret — set it to a PAT (classic `repo` scope, or a fine-grained PAT with Actions read) or a GitHub App token with access to the listed repos: + +```bash +gh aw secrets set GH_AW_GITHUB_TOKEN --value "" +``` + +The workflows fall back to `GITHUB_TOKEN` (current repo only) when `GH_AW_GITHUB_TOKEN` is unset. + +Optional keys in `.github/agentic-ops.yml`: + +- `source-repo` — the repository that develops the audit/optimizer workflows themselves (defaults to `githubnext/agentic-ops`). The optimizer keeps the monitoring workflows eligible for optimization only in that repository and excludes them everywhere else. + ## License MIT