Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/forge_loop/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,12 @@ class Config:
# sub-issue is closed. Sourced from settings.maintenance.epic_label.
epic_label: str = "epic"

# Epic TTL-expiry pass (issue #435). Whole-day TTL: an open epic with zero
# open sub-issues older than this is auto-closed with an expiry audit
# comment. ``0`` DISABLES the pass (pre-#435 no-op). Sourced from
# settings.maintenance.epic_ttl_days.
epic_ttl_days: int = 0

@property
def state_dir(self) -> Path:
return self.repo / "docs" / "ops"
Expand Down Expand Up @@ -361,6 +367,7 @@ def _from_settings(s: Settings) -> Config:
stuck_threshold_attempts=s.maintenance.stuck_threshold_attempts,
stuck_tail_events=s.maintenance.stuck_tail_events,
epic_label=s.maintenance.epic_label,
epic_ttl_days=s.maintenance.epic_ttl_days,
)


Expand Down
165 changes: 137 additions & 28 deletions src/forge_loop/epic_sweep.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from __future__ import annotations

from dataclasses import dataclass, field
from datetime import UTC, datetime
from typing import Protocol

from forge_loop.gh_client import Issue, SubIssue
Expand Down Expand Up @@ -86,11 +87,18 @@ class EpicSweepReport:
Every list holds epic issue NUMBERS. ``errors`` maps an epic number to a
short reason so the operator can see why a close didn't land without
grepping structlog. Mutually exclusive: each evaluated epic lands in
exactly one of ``closed`` / ``skipped_open_subs`` / ``skipped_no_subs`` /
``errors``.
exactly one of ``closed`` / ``expired`` / ``skipped_open_subs`` /
``skipped_no_subs`` / ``errors``.

``closed`` were closed via the all-subs-resolved (completed) path;
``expired`` were closed via the TTL path (issue #435) — an open epic with
zero open sub-issues that aged past ``epic_ttl_days``. They are reported
distinctly because they mean different things to an operator: completed
work vs. an undecomposed epic reaped for staleness.
"""

closed: list[int] = field(default_factory=list)
expired: list[int] = field(default_factory=list)
skipped_open_subs: list[int] = field(default_factory=list)
skipped_no_subs: list[int] = field(default_factory=list)
errors: dict[int, str] = field(default_factory=dict)
Expand Down Expand Up @@ -130,6 +138,72 @@ def build_close_comment(epic_number: int, subs: list[SubIssue]) -> str:
)


def build_expiry_comment(epic_number: int, age_days: int, ttl_days: int) -> str:
"""Build the audit comment posted before a TTL-expired epic is closed (#435).

Deliberately and textually DISTINCT from :func:`build_close_comment`: the
completed comment lists resolved sub-issues, this one names the age + TTL
(``open 158 days, TTL 90 days``) so an operator can tell at a glance the
epic was reaped for *staleness*, not because its work finished.
"""
return (
f"Auto-expiring epic #{epic_number}: open {age_days} days, TTL {ttl_days} "
"days, with zero open sub-issues.\n\n"
"Closed automatically by the forge-loop epic TTL sweep (issue #435) "
"because it exceeded the configured epic time-to-live without being "
"broken into tracked sub-issues. Re-open if this was premature."
)


def _age_days(created_at: str | None, now: datetime) -> int | None:
"""Whole-day age (``now − created_at``) of an epic, or ``None`` when the
timestamp is missing/unparseable (cannot prove age → caller must fail safe
and never expire). Mirrors ``control/status._oldest_age_days`` for a single
issue: aware-datetime math, floored at 0."""
if not created_at:
return None
try:
created = datetime.fromisoformat(str(created_at).replace("Z", "+00:00"))
except (ValueError, TypeError):
return None
if created.tzinfo is None:
created = created.replace(tzinfo=UTC)
reference = now if now.tzinfo is not None else now.replace(tzinfo=UTC)
return max(0, (reference - created).days)


def _post_and_close(
gh_client: GhClientLike,
*,
owner: str,
repo: str,
epic_number: int,
comment: str,
reason: str,
) -> tuple[bool, str | None]:
"""Post the audit comment (best-effort) then close the epic.

Shared by the completed and TTL close paths so neither reinvents the
comment-then-close-then-record sequence (manifesto Q7). Returns
``(closed_ok, error_reason)``: a comment failure is swallowed (best-effort,
mirrors stuck_sweep.py); only the close is load-bearing. ``error_reason`` is
``None`` on success, else a short string for ``report.errors``.
"""
log = get_logger()
try:
gh_client.add_comment(owner, repo, epic_number, comment)
except Exception as ex: # noqa: BLE001 — comment is best-effort
log.info("epic_sweep_comment_failed", epic=epic_number, err=str(ex)[:200])
try:
ok = gh_client.close_issue(owner, repo, epic_number, reason=reason)
except Exception as ex: # noqa: BLE001
log.warning("epic_sweep_close_failed", epic=epic_number, err=str(ex)[:200])
return False, f"close: {ex}"[:200]
if ok:
return True, None
return False, "close_returned_false"


# ---------------------------------------------------------------------------
# Public entrypoint
# ---------------------------------------------------------------------------
Expand All @@ -142,8 +216,11 @@ def sweep(
repo: str,
epic_label: str = "epic",
limit: int = 100,
epic_ttl_days: int = 0,
now: datetime | None = None,
) -> EpicSweepReport:
"""Close every open epic whose tracked sub-issues are all resolved.
"""Close every open epic whose tracked sub-issues are all resolved, plus
expire stale undecomposed epics past a TTL (issue #435).

Parameters
----------
Expand All @@ -156,14 +233,27 @@ def sweep(
Label that marks an issue as an epic. Default ``"epic"``.
limit:
Max epics to consider in one pass (bounds the GraphQL fan-out).
epic_ttl_days:
TTL in whole days for the expiry pass (issue #435). An open epic with
**zero open** sub-issues whose age (``now − created_at``) exceeds this
is closed via :func:`build_expiry_comment` and reported under
``expired``. A value ``<= 0`` **disables** the TTL pass entirely (the
pre-#435 no-op). The all-subs-resolved (completed) close and the
≥1-open-sub fail-safe both dominate the TTL check.
now:
Reference instant for the age computation, **injected** so the function
stays deterministic/clock-free in tests. Defaults to the current UTC
time (mirrors ``tick_checks._agent_live_paths``); only consulted when
the TTL pass is enabled.

Returns
-------
EpicSweepReport summarising what we touched. Never raises — every
GhClient error is caught and recorded.
"""
log = get_logger()
report = EpicSweepReport()
clock = now if now is not None else datetime.now(UTC)
log = get_logger()

try:
epics = gh_client.issues_by_label(owner, repo, epic_label, limit)
Expand All @@ -189,34 +279,52 @@ def sweep(
continue
snapshot.append((epic, subs))

# Phase 2 — decide + act on the snapshot.
# Phase 2 — decide + act on the snapshot. Bucket order encodes the
# precedence the spec mandates: the ≥1-open-sub fail-safe DOMINATES (an
# ancient epic with live work is never expired); the all-subs-resolved
# (completed) close is checked before the TTL pass (an old, fully-resolved
# epic still closes via the completed comment, not the expiry comment); the
# TTL pass only ever reaps the leak population — epics with zero tracked
# sub-issues — and only when enabled + provably aged past the TTL.
for epic, subs in snapshot:
if not subs:
report.skipped_no_subs.append(epic.number)
continue
if any(not _is_closed(s) for s in subs):
report.skipped_open_subs.append(epic.number)
if subs:
if any(not _is_closed(s) for s in subs):
report.skipped_open_subs.append(epic.number) # fail-safe: live work
continue
# All sub-issues closed → completed close path (unchanged, #367).
ok, err = _post_and_close(
gh_client,
owner=owner,
repo=repo,
epic_number=epic.number,
comment=build_close_comment(epic.number, subs),
reason="completed",
)
if ok:
report.closed.append(epic.number)
elif err is not None:
report.errors[epic.number] = err
continue

# All sub-issues closed → post the audit comment (best-effort) then
# close the epic. The close is the load-bearing op; a comment-only
# failure still lets the close proceed (mirrors stuck_sweep.py).
body = build_close_comment(epic.number, subs)
try:
gh_client.add_comment(owner, repo, epic.number, body)
except Exception as ex: # noqa: BLE001 — comment is best-effort
log.info("epic_sweep_comment_failed", epic=epic.number, err=str(ex)[:200])

try:
ok = gh_client.close_issue(owner, repo, epic.number, reason="completed")
except Exception as ex: # noqa: BLE001
log.warning("epic_sweep_close_failed", epic=epic.number, err=str(ex)[:200])
report.errors[epic.number] = f"close: {ex}"[:200]
# Zero tracked sub-issues — the immortal leak population (#435). Expire
# iff the TTL pass is enabled AND the age is provable AND past the TTL;
# a None age (unknown created_at) fails safe → never expired.
age = _age_days(epic.created_at, clock) if epic_ttl_days > 0 else None
if age is not None and age > epic_ttl_days:
ok, err = _post_and_close(
gh_client,
owner=owner,
repo=repo,
epic_number=epic.number,
comment=build_expiry_comment(epic.number, age, epic_ttl_days),
reason="not_planned",
)
if ok:
report.expired.append(epic.number)
elif err is not None:
report.errors[epic.number] = err
continue
if ok:
report.closed.append(epic.number)
else:
report.errors[epic.number] = "close_returned_false"
report.skipped_no_subs.append(epic.number)

return report

Expand All @@ -225,5 +333,6 @@ def sweep(
"EpicSweepReport",
"GhClientLike",
"build_close_comment",
"build_expiry_comment",
"sweep",
]
12 changes: 8 additions & 4 deletions src/forge_loop/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,15 +347,19 @@ class EpicSweepDoneEvent(EventBase):

Emitted by ``forge_loop.runner.tick_checks.run_epic_sweep`` on the
maintenance cadence. Each list holds epic issue NUMBERS: ``closed`` were
auto-closed (all tracked sub-issues resolved); ``skipped_open_subs`` had
≥ 1 still-open sub-issue; ``skipped_no_subs`` had no tracked sub-issues;
``errors`` hit a GhClient failure (sub-issue lookup or close) and were
left untouched.
auto-closed (all tracked sub-issues resolved); ``expired`` were closed by
the TTL pass (issue #435 — zero open sub-issues, aged past
``epic_ttl_days``), reported distinctly from ``closed`` because they mean
an undecomposed epic reaped for staleness, not finished work;
``skipped_open_subs`` had ≥ 1 still-open sub-issue; ``skipped_no_subs`` had
no tracked sub-issues; ``errors`` hit a GhClient failure (sub-issue lookup
or close) and were left untouched.
"""

KIND: ClassVar[str] = "epic_sweep_done"
tick: int = Field(ge=0, default=0)
closed: list[int] = Field(default_factory=list)
expired: list[int] = Field(default_factory=list)
skipped_open_subs: list[int] = Field(default_factory=list)
skipped_no_subs: list[int] = Field(default_factory=list)
errors: list[int] = Field(default_factory=list)
Expand Down
26 changes: 21 additions & 5 deletions src/forge_loop/runner/tick_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
import time
from collections.abc import Callable
from datetime import UTC, datetime
from pathlib import Path

from forge_loop.branch_sweep import BranchSweepReport
Expand Down Expand Up @@ -67,15 +68,21 @@ def run_stuck_sweep(cfg: Config, tick: int) -> SweepReport | None:


def run_epic_sweep(
cfg: Config, tick: int, *, client: GhClientLike | None = None
cfg: Config,
tick: int,
*,
client: GhClientLike | None = None,
now: datetime | None = None,
) -> EpicSweepReport | None:
"""Auto-close epics whose tracked sub-issues are all resolved (issue #367).
"""Auto-close resolved epics (#367) + expire stale undecomposed epics (#435).

Runs on the maintenance cadence (``maintenance_every_n_ticks``) only — a
no-op off-cadence (returns ``None`` without touching GitHub). Deterministic
Python; spawns NO LLM subagent. Emits a typed ``epic_sweep_done`` summary
event and returns the report. ``client`` is injectable for tests; in
production it is the real ``GithubkitClient``.
event (carrying ``expired`` distinctly from ``closed``) and returns the
report. ``client`` is injectable for tests; in production it is the real
``GithubkitClient``. ``now`` is injected for clock-free tests, defaulting to
the current UTC instant; it + ``cfg.epic_ttl_days`` drive the TTL pass.
"""
if cfg.maintenance_every_n_ticks <= 0 or tick % cfg.maintenance_every_n_ticks != 0:
return None
Expand All @@ -95,8 +102,16 @@ def run_epic_sweep(
reason=f"gh_client_init: {ex}"[:200],
)
return None
effective_now = now if now is not None else datetime.now(UTC)
try:
report = _epic_sweep(client, owner=owner, repo=repo, epic_label=cfg.epic_label)
report = _epic_sweep(
client,
owner=owner,
repo=repo,
epic_label=cfg.epic_label,
epic_ttl_days=cfg.epic_ttl_days,
now=effective_now,
)
except Exception as ex: # noqa: BLE001 — the sweep never raises, but belt-and-braces
append_event(cfg.events_file, "epic_sweep_crashed", tick=tick, err=str(ex)[:200])
return None
Expand All @@ -108,6 +123,7 @@ def run_epic_sweep(
EpicSweepDoneEvent(
tick=tick,
closed=report.closed,
expired=report.expired,
skipped_open_subs=report.skipped_open_subs,
skipped_no_subs=report.skipped_no_subs,
errors=list(report.errors),
Expand Down
5 changes: 5 additions & 0 deletions src/forge_loop/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,11 @@ class MaintenanceSettings(BaseSettings):
# Label marking an issue as an epic; the deterministic epic sweep (#367)
# closes an open epic once every tracked sub-issue is closed.
epic_label: str = "epic"
# TTL in whole days for the epic-expiry pass (issue #435). An open epic
# with zero open sub-issues older than this is auto-closed with an expiry
# audit comment. ``0`` (the default) DISABLES the TTL pass — pre-#435
# behaviour, so existing deployments keep expiring nothing until they opt in.
epic_ttl_days: int = 0


class MiscSettings(BaseSettings):
Expand Down
Loading
Loading