diff --git a/src/forge_loop/config.py b/src/forge_loop/config.py index 09f9210..54f7050 100644 --- a/src/forge_loop/config.py +++ b/src/forge_loop/config.py @@ -205,6 +205,12 @@ class Config: # sub-issue is closed. Sourced from settings.maintenance.epic_label. epic_label: str = "epic" + # Epic TTL-expiry pass (issue #435). Whole-day TTL: an open epic with zero + # open sub-issues older than this is auto-closed with an expiry audit + # comment. ``0`` DISABLES the pass (pre-#435 no-op). Sourced from + # settings.maintenance.epic_ttl_days. + epic_ttl_days: int = 0 + @property def state_dir(self) -> Path: return self.repo / "docs" / "ops" @@ -361,6 +367,7 @@ def _from_settings(s: Settings) -> Config: stuck_threshold_attempts=s.maintenance.stuck_threshold_attempts, stuck_tail_events=s.maintenance.stuck_tail_events, epic_label=s.maintenance.epic_label, + epic_ttl_days=s.maintenance.epic_ttl_days, ) diff --git a/src/forge_loop/epic_sweep.py b/src/forge_loop/epic_sweep.py index 1b96720..2c78a75 100644 --- a/src/forge_loop/epic_sweep.py +++ b/src/forge_loop/epic_sweep.py @@ -53,6 +53,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from datetime import UTC, datetime from typing import Protocol from forge_loop.gh_client import Issue, SubIssue @@ -86,11 +87,18 @@ class EpicSweepReport: Every list holds epic issue NUMBERS. ``errors`` maps an epic number to a short reason so the operator can see why a close didn't land without grepping structlog. Mutually exclusive: each evaluated epic lands in - exactly one of ``closed`` / ``skipped_open_subs`` / ``skipped_no_subs`` / - ``errors``. + exactly one of ``closed`` / ``expired`` / ``skipped_open_subs`` / + ``skipped_no_subs`` / ``errors``. + + ``closed`` were closed via the all-subs-resolved (completed) path; + ``expired`` were closed via the TTL path (issue #435) — an open epic with + zero open sub-issues that aged past ``epic_ttl_days``. They are reported + distinctly because they mean different things to an operator: completed + work vs. an undecomposed epic reaped for staleness. """ closed: list[int] = field(default_factory=list) + expired: list[int] = field(default_factory=list) skipped_open_subs: list[int] = field(default_factory=list) skipped_no_subs: list[int] = field(default_factory=list) errors: dict[int, str] = field(default_factory=dict) @@ -130,6 +138,72 @@ def build_close_comment(epic_number: int, subs: list[SubIssue]) -> str: ) +def build_expiry_comment(epic_number: int, age_days: int, ttl_days: int) -> str: + """Build the audit comment posted before a TTL-expired epic is closed (#435). + + Deliberately and textually DISTINCT from :func:`build_close_comment`: the + completed comment lists resolved sub-issues, this one names the age + TTL + (``open 158 days, TTL 90 days``) so an operator can tell at a glance the + epic was reaped for *staleness*, not because its work finished. + """ + return ( + f"Auto-expiring epic #{epic_number}: open {age_days} days, TTL {ttl_days} " + "days, with zero open sub-issues.\n\n" + "Closed automatically by the forge-loop epic TTL sweep (issue #435) " + "because it exceeded the configured epic time-to-live without being " + "broken into tracked sub-issues. Re-open if this was premature." + ) + + +def _age_days(created_at: str | None, now: datetime) -> int | None: + """Whole-day age (``now − created_at``) of an epic, or ``None`` when the + timestamp is missing/unparseable (cannot prove age → caller must fail safe + and never expire). Mirrors ``control/status._oldest_age_days`` for a single + issue: aware-datetime math, floored at 0.""" + if not created_at: + return None + try: + created = datetime.fromisoformat(str(created_at).replace("Z", "+00:00")) + except (ValueError, TypeError): + return None + if created.tzinfo is None: + created = created.replace(tzinfo=UTC) + reference = now if now.tzinfo is not None else now.replace(tzinfo=UTC) + return max(0, (reference - created).days) + + +def _post_and_close( + gh_client: GhClientLike, + *, + owner: str, + repo: str, + epic_number: int, + comment: str, + reason: str, +) -> tuple[bool, str | None]: + """Post the audit comment (best-effort) then close the epic. + + Shared by the completed and TTL close paths so neither reinvents the + comment-then-close-then-record sequence (manifesto Q7). Returns + ``(closed_ok, error_reason)``: a comment failure is swallowed (best-effort, + mirrors stuck_sweep.py); only the close is load-bearing. ``error_reason`` is + ``None`` on success, else a short string for ``report.errors``. + """ + log = get_logger() + try: + gh_client.add_comment(owner, repo, epic_number, comment) + except Exception as ex: # noqa: BLE001 — comment is best-effort + log.info("epic_sweep_comment_failed", epic=epic_number, err=str(ex)[:200]) + try: + ok = gh_client.close_issue(owner, repo, epic_number, reason=reason) + except Exception as ex: # noqa: BLE001 + log.warning("epic_sweep_close_failed", epic=epic_number, err=str(ex)[:200]) + return False, f"close: {ex}"[:200] + if ok: + return True, None + return False, "close_returned_false" + + # --------------------------------------------------------------------------- # Public entrypoint # --------------------------------------------------------------------------- @@ -142,8 +216,11 @@ def sweep( repo: str, epic_label: str = "epic", limit: int = 100, + epic_ttl_days: int = 0, + now: datetime | None = None, ) -> EpicSweepReport: - """Close every open epic whose tracked sub-issues are all resolved. + """Close every open epic whose tracked sub-issues are all resolved, plus + expire stale undecomposed epics past a TTL (issue #435). Parameters ---------- @@ -156,14 +233,27 @@ def sweep( Label that marks an issue as an epic. Default ``"epic"``. limit: Max epics to consider in one pass (bounds the GraphQL fan-out). + epic_ttl_days: + TTL in whole days for the expiry pass (issue #435). An open epic with + **zero open** sub-issues whose age (``now − created_at``) exceeds this + is closed via :func:`build_expiry_comment` and reported under + ``expired``. A value ``<= 0`` **disables** the TTL pass entirely (the + pre-#435 no-op). The all-subs-resolved (completed) close and the + ≥1-open-sub fail-safe both dominate the TTL check. + now: + Reference instant for the age computation, **injected** so the function + stays deterministic/clock-free in tests. Defaults to the current UTC + time (mirrors ``tick_checks._agent_live_paths``); only consulted when + the TTL pass is enabled. Returns ------- EpicSweepReport summarising what we touched. Never raises — every GhClient error is caught and recorded. """ - log = get_logger() report = EpicSweepReport() + clock = now if now is not None else datetime.now(UTC) + log = get_logger() try: epics = gh_client.issues_by_label(owner, repo, epic_label, limit) @@ -189,34 +279,52 @@ def sweep( continue snapshot.append((epic, subs)) - # Phase 2 — decide + act on the snapshot. + # Phase 2 — decide + act on the snapshot. Bucket order encodes the + # precedence the spec mandates: the ≥1-open-sub fail-safe DOMINATES (an + # ancient epic with live work is never expired); the all-subs-resolved + # (completed) close is checked before the TTL pass (an old, fully-resolved + # epic still closes via the completed comment, not the expiry comment); the + # TTL pass only ever reaps the leak population — epics with zero tracked + # sub-issues — and only when enabled + provably aged past the TTL. for epic, subs in snapshot: - if not subs: - report.skipped_no_subs.append(epic.number) - continue - if any(not _is_closed(s) for s in subs): - report.skipped_open_subs.append(epic.number) + if subs: + if any(not _is_closed(s) for s in subs): + report.skipped_open_subs.append(epic.number) # fail-safe: live work + continue + # All sub-issues closed → completed close path (unchanged, #367). + ok, err = _post_and_close( + gh_client, + owner=owner, + repo=repo, + epic_number=epic.number, + comment=build_close_comment(epic.number, subs), + reason="completed", + ) + if ok: + report.closed.append(epic.number) + elif err is not None: + report.errors[epic.number] = err continue - # All sub-issues closed → post the audit comment (best-effort) then - # close the epic. The close is the load-bearing op; a comment-only - # failure still lets the close proceed (mirrors stuck_sweep.py). - body = build_close_comment(epic.number, subs) - try: - gh_client.add_comment(owner, repo, epic.number, body) - except Exception as ex: # noqa: BLE001 — comment is best-effort - log.info("epic_sweep_comment_failed", epic=epic.number, err=str(ex)[:200]) - - try: - ok = gh_client.close_issue(owner, repo, epic.number, reason="completed") - except Exception as ex: # noqa: BLE001 - log.warning("epic_sweep_close_failed", epic=epic.number, err=str(ex)[:200]) - report.errors[epic.number] = f"close: {ex}"[:200] + # Zero tracked sub-issues — the immortal leak population (#435). Expire + # iff the TTL pass is enabled AND the age is provable AND past the TTL; + # a None age (unknown created_at) fails safe → never expired. + age = _age_days(epic.created_at, clock) if epic_ttl_days > 0 else None + if age is not None and age > epic_ttl_days: + ok, err = _post_and_close( + gh_client, + owner=owner, + repo=repo, + epic_number=epic.number, + comment=build_expiry_comment(epic.number, age, epic_ttl_days), + reason="not_planned", + ) + if ok: + report.expired.append(epic.number) + elif err is not None: + report.errors[epic.number] = err continue - if ok: - report.closed.append(epic.number) - else: - report.errors[epic.number] = "close_returned_false" + report.skipped_no_subs.append(epic.number) return report @@ -225,5 +333,6 @@ def sweep( "EpicSweepReport", "GhClientLike", "build_close_comment", + "build_expiry_comment", "sweep", ] diff --git a/src/forge_loop/events.py b/src/forge_loop/events.py index 10d277e..d0407b8 100644 --- a/src/forge_loop/events.py +++ b/src/forge_loop/events.py @@ -347,15 +347,19 @@ class EpicSweepDoneEvent(EventBase): Emitted by ``forge_loop.runner.tick_checks.run_epic_sweep`` on the maintenance cadence. Each list holds epic issue NUMBERS: ``closed`` were - auto-closed (all tracked sub-issues resolved); ``skipped_open_subs`` had - ≥ 1 still-open sub-issue; ``skipped_no_subs`` had no tracked sub-issues; - ``errors`` hit a GhClient failure (sub-issue lookup or close) and were - left untouched. + auto-closed (all tracked sub-issues resolved); ``expired`` were closed by + the TTL pass (issue #435 — zero open sub-issues, aged past + ``epic_ttl_days``), reported distinctly from ``closed`` because they mean + an undecomposed epic reaped for staleness, not finished work; + ``skipped_open_subs`` had ≥ 1 still-open sub-issue; ``skipped_no_subs`` had + no tracked sub-issues; ``errors`` hit a GhClient failure (sub-issue lookup + or close) and were left untouched. """ KIND: ClassVar[str] = "epic_sweep_done" tick: int = Field(ge=0, default=0) closed: list[int] = Field(default_factory=list) + expired: list[int] = Field(default_factory=list) skipped_open_subs: list[int] = Field(default_factory=list) skipped_no_subs: list[int] = Field(default_factory=list) errors: list[int] = Field(default_factory=list) diff --git a/src/forge_loop/runner/tick_checks.py b/src/forge_loop/runner/tick_checks.py index 3193f78..f01d40c 100644 --- a/src/forge_loop/runner/tick_checks.py +++ b/src/forge_loop/runner/tick_checks.py @@ -7,6 +7,7 @@ import subprocess import time from collections.abc import Callable +from datetime import UTC, datetime from pathlib import Path from forge_loop.branch_sweep import BranchSweepReport @@ -67,15 +68,21 @@ def run_stuck_sweep(cfg: Config, tick: int) -> SweepReport | None: def run_epic_sweep( - cfg: Config, tick: int, *, client: GhClientLike | None = None + cfg: Config, + tick: int, + *, + client: GhClientLike | None = None, + now: datetime | None = None, ) -> EpicSweepReport | None: - """Auto-close epics whose tracked sub-issues are all resolved (issue #367). + """Auto-close resolved epics (#367) + expire stale undecomposed epics (#435). Runs on the maintenance cadence (``maintenance_every_n_ticks``) only — a no-op off-cadence (returns ``None`` without touching GitHub). Deterministic Python; spawns NO LLM subagent. Emits a typed ``epic_sweep_done`` summary - event and returns the report. ``client`` is injectable for tests; in - production it is the real ``GithubkitClient``. + event (carrying ``expired`` distinctly from ``closed``) and returns the + report. ``client`` is injectable for tests; in production it is the real + ``GithubkitClient``. ``now`` is injected for clock-free tests, defaulting to + the current UTC instant; it + ``cfg.epic_ttl_days`` drive the TTL pass. """ if cfg.maintenance_every_n_ticks <= 0 or tick % cfg.maintenance_every_n_ticks != 0: return None @@ -95,8 +102,16 @@ def run_epic_sweep( reason=f"gh_client_init: {ex}"[:200], ) return None + effective_now = now if now is not None else datetime.now(UTC) try: - report = _epic_sweep(client, owner=owner, repo=repo, epic_label=cfg.epic_label) + report = _epic_sweep( + client, + owner=owner, + repo=repo, + epic_label=cfg.epic_label, + epic_ttl_days=cfg.epic_ttl_days, + now=effective_now, + ) except Exception as ex: # noqa: BLE001 — the sweep never raises, but belt-and-braces append_event(cfg.events_file, "epic_sweep_crashed", tick=tick, err=str(ex)[:200]) return None @@ -108,6 +123,7 @@ def run_epic_sweep( EpicSweepDoneEvent( tick=tick, closed=report.closed, + expired=report.expired, skipped_open_subs=report.skipped_open_subs, skipped_no_subs=report.skipped_no_subs, errors=list(report.errors), diff --git a/src/forge_loop/settings.py b/src/forge_loop/settings.py index 2e2b62c..6606a58 100644 --- a/src/forge_loop/settings.py +++ b/src/forge_loop/settings.py @@ -473,6 +473,11 @@ class MaintenanceSettings(BaseSettings): # Label marking an issue as an epic; the deterministic epic sweep (#367) # closes an open epic once every tracked sub-issue is closed. epic_label: str = "epic" + # TTL in whole days for the epic-expiry pass (issue #435). An open epic + # with zero open sub-issues older than this is auto-closed with an expiry + # audit comment. ``0`` (the default) DISABLES the TTL pass — pre-#435 + # behaviour, so existing deployments keep expiring nothing until they opt in. + epic_ttl_days: int = 0 class MiscSettings(BaseSettings): diff --git a/tests/test_epic_sweep.py b/tests/test_epic_sweep.py index e153bbf..4ec54c1 100644 --- a/tests/test_epic_sweep.py +++ b/tests/test_epic_sweep.py @@ -19,19 +19,36 @@ from __future__ import annotations import json +from datetime import UTC, datetime from pathlib import Path from typing import Any from forge_loop.config import Config -from forge_loop.epic_sweep import EpicSweepReport, build_close_comment, sweep +from forge_loop.epic_sweep import ( + EpicSweepReport, + build_close_comment, + build_expiry_comment, + sweep, +) from forge_loop.gh_client import GhError, Issue, MockGhClient, SubIssue from forge_loop.runner.tick_checks import run_epic_sweep EPIC = "epic" +# Fixed clock for the deterministic TTL tests — matches issue #435's worked +# example (today = 2026-06-08), so a 2026-01-01 epic is 158 days old. +NOW = datetime(2026, 6, 8, tzinfo=UTC) -def _epic(number: int, *, state: str = "open") -> Issue: - return Issue(number=number, title=f"epic {number}", body="", state=state, labels=[EPIC]) + +def _epic(number: int, *, state: str = "open", created_at: str | None = None) -> Issue: + return Issue( + number=number, + title=f"epic {number}", + body="", + state=state, + labels=[EPIC], + created_at=created_at, + ) def _sub(number: int, *, state: str = "CLOSED", closing_pr: int | None = None) -> SubIssue: @@ -215,6 +232,139 @@ def test_child_epic_close_does_not_cascade_close_parent() -> None: assert closed_numbers == [501] +# --------------------------------------------------------------------------- +# TTL expiry pass (issue #435) +# --------------------------------------------------------------------------- + + +def test_stale_epic_with_no_open_subs_is_expired() -> None: + """Primary falsifiable acceptance: zero open subs + age > TTL → expired, + closed with the expiry reason, the expiry audit comment posted.""" + epic = _epic(390, created_at="2026-01-01T00:00:00Z") # 158 days old at NOW + gh = MockGhClient(issues_by_label_response=[epic], sub_issues_by_epic={}) + + rep = sweep(gh, owner="o", repo="r", epic_label=EPIC, epic_ttl_days=90, now=NOW) + + assert rep.expired == [390] + assert rep.closed == [] + assert rep.skipped_no_subs == [] + assert rep.errors == {} + methods = [m for m, _ in gh.calls] + assert "add_comment" in methods + assert "close_issue" in methods + close_kw = [kw for m, kw in gh.calls if m == "close_issue"][0] + assert close_kw["number"] == 390 + assert close_kw["reason"] == "not_planned" # distinct from the "completed" path + comment = [kw for m, kw in gh.calls if m == "add_comment"][0]["body"] + assert "158 days" in comment + assert "TTL 90" in comment + + +def test_epic_within_ttl_is_not_expired() -> None: + """Zero open subs but age < TTL → left open (skipped_no_subs), no comment.""" + epic = _epic(391, created_at="2026-05-01T00:00:00Z") # ~38 days old at NOW + gh = MockGhClient(issues_by_label_response=[epic], sub_issues_by_epic={}) + + rep = sweep(gh, owner="o", repo="r", epic_label=EPIC, epic_ttl_days=90, now=NOW) + + assert rep.expired == [] + assert rep.skipped_no_subs == [391] + methods = [m for m, _ in gh.calls] + assert "close_issue" not in methods + assert "add_comment" not in methods + + +def test_epic_with_open_sub_is_never_expired_even_when_ancient() -> None: + """Fail-safe (load-bearing): ≥1 open sub dominates the TTL check — an + ancient epic with live work stays under skipped_open_subs, never expired.""" + epic = _epic(392, created_at="2020-01-01T00:00:00Z") # ~6 years old + gh = MockGhClient( + issues_by_label_response=[epic], + sub_issues_by_epic={392: [_sub(146), _sub(150, state="OPEN")]}, + ) + + rep = sweep(gh, owner="o", repo="r", epic_label=EPIC, epic_ttl_days=90, now=NOW) + + assert rep.expired == [] + assert rep.closed == [] + assert rep.skipped_open_subs == [392] + assert "close_issue" not in [m for m, _ in gh.calls] + + +def test_epic_with_no_created_at_is_never_expired() -> None: + """created_at=None → age unprovable → fail safe (never expired), no raise.""" + epic = _epic(393, created_at=None) + gh = MockGhClient(issues_by_label_response=[epic], sub_issues_by_epic={}) + + rep = sweep(gh, owner="o", repo="r", epic_label=EPIC, epic_ttl_days=90, now=NOW) + + assert rep.expired == [] + assert rep.skipped_no_subs == [393] + assert "close_issue" not in [m for m, _ in gh.calls] + + +def test_ttl_zero_disables_expiry_pass() -> None: + """epic_ttl_days=0 → behaviour identical to pre-#435: no expiry closes even + for an ancient undecomposed epic (it lands in skipped_no_subs).""" + epic = _epic(394, created_at="2020-01-01T00:00:00Z") + gh = MockGhClient(issues_by_label_response=[epic], sub_issues_by_epic={}) + + rep = sweep(gh, owner="o", repo="r", epic_label=EPIC, epic_ttl_days=0, now=NOW) + + assert rep.expired == [] + assert rep.skipped_no_subs == [394] + assert "close_issue" not in [m for m, _ in gh.calls] + + +def test_all_subs_resolved_still_uses_completed_path_not_ttl() -> None: + """An old epic whose subs are all CLOSED closes via the completed comment + (reason='completed'), NOT the TTL path — even though it is also TTL-aged.""" + epic = _epic(395, created_at="2020-01-01T00:00:00Z") + gh = MockGhClient( + issues_by_label_response=[epic], + sub_issues_by_epic={395: [_sub(146), _sub(150)]}, + ) + + rep = sweep(gh, owner="o", repo="r", epic_label=EPIC, epic_ttl_days=90, now=NOW) + + assert rep.closed == [395] + assert rep.expired == [] + close_kw = [kw for m, kw in gh.calls if m == "close_issue"][0] + assert close_kw["reason"] == "completed" + comment = [kw for m, kw in gh.calls if m == "add_comment"][0]["body"] + assert "all 2 sub-issues resolved" in comment + assert "TTL" not in comment + + +def test_expiry_comment_is_distinct_from_resolved_comment() -> None: + """The expiry body must differ from build_close_comment AND name age/TTL.""" + expiry = build_expiry_comment(390, age_days=158, ttl_days=90) + resolved = build_close_comment(390, [_sub(146), _sub(150)]) + assert expiry != resolved + assert "158" in expiry + assert "90" in expiry + assert "TTL" in expiry + # The expiry comment must NOT claim sub-issues were resolved. + assert "sub-issues resolved" not in expiry + + +def test_ttl_close_issue_error_is_recorded_not_raised() -> None: + """Adversarial: close_issue raises on a TTL-eligible epic → recorded under + errors, sweep returns normally (the tick is never crashed).""" + epic = _epic(396, created_at="2026-01-01T00:00:00Z") + gh = MockGhClient( + issues_by_label_response=[epic], + sub_issues_by_epic={}, + raise_on={"close_issue": GhError("close_issue", 500, "boom")}, + ) + + rep = sweep(gh, owner="o", repo="r", epic_label=EPIC, epic_ttl_days=90, now=NOW) + + assert rep.expired == [] + assert 396 in rep.errors + assert "close" in rep.errors[396] + + # --------------------------------------------------------------------------- # Integration — tick wiring + cadence gate # --------------------------------------------------------------------------- @@ -250,6 +400,30 @@ def test_run_epic_sweep_writes_event_and_returns_report_on_cadence(tmp_path: Pat assert events[0]["tick"] == 5 +def test_run_epic_sweep_event_carries_expired_with_ttl_and_injected_now( + tmp_path: Path, +) -> None: + """Tick wiring threads cfg.epic_ttl_days + an injected ``now`` into the pure + sweep, and the EpicSweepDoneEvent carries ``expired`` distinctly.""" + cfg = Config( + repo=tmp_path, github_repo="o/r", maintenance_every_n_ticks=5, epic_ttl_days=90 + ) + gh = MockGhClient( + issues_by_label_response=[_epic(390, created_at="2026-01-01T00:00:00Z")], + sub_issues_by_epic={}, + ) + + rep = run_epic_sweep(cfg, tick=5, client=gh, now=NOW) + + assert rep is not None + assert rep.expired == [390] + assert rep.closed == [] + events = [e for e in _read_events(cfg) if e.get("kind") == "epic_sweep_done"] + assert len(events) == 1 + assert events[0]["expired"] == [390] + assert events[0]["closed"] == [] + + def test_run_epic_sweep_is_noop_off_cadence(tmp_path: Path) -> None: cfg = _cfg(tmp_path) gh = MockGhClient(