From cfde540b76b9221f92e79df0f57931d97a54e902 Mon Sep 17 00:00:00 2001
From: Jared Wein <jwein@mozilla.com>
Date: Thu, 28 May 2026 17:28:06 -0400
Subject: [PATCH 1/4] Gate automated review on risk/complexity scores and test
 coverage

Adds a cheap pre-screen so the (expensive) review agent only runs on revisions
that score low on risk and complexity, letting teams "ease in" by raising
thresholds as confidence grows.

Library (bugbug/tools/code_review/):
- diff_analysis.py: classify the patch's in-diff test signal (absent / sparse /
  adequate / tests_only / no_code_change) and collect changed non-test paths.
- test_coverage.py: probe mozilla-central via the async searchfox client for
  existing tests referencing the changed files (covered / partial / uncovered);
  degrades gracefully if searchfox is unavailable.
- scoring.py: RiskComplexityScorer, a single structured Haiku call returning
  0-10 risk/complexity plus factors, fed the precomputed <test_signals> block.
- agent.py: inject the test-signals block into the review prompt and surface
  token usage + tool-call counts in the response details.
- llms.py: DEFAULT_SCORING_MODEL and a shared usage_from_messages() helper.

Service (reviewhelper-api):
- reviewer_groups.py + reviewer_groups.yaml: per-group thresholds (with
  defaults) loaded from a checked-in YAML file; matching_groups() resolves a
  revision's reviewer groups via the new PhabricatorPatch plumbing.
- review_processor.py: _score_and_gate() scores before reviewing and raises
  ReviewSkipped("above_threshold") when either score meets its threshold.
- ReviewStatus.SKIPPED (terminal, no retry) + skipped_reason and
  risk/complexity/test-signal columns on review_requests, recorded even when
  skipped; alembic migration + status index.
- internal.py records skips as SKIPPED (HTTP 204); request.py messaging.
- pricing.py: per-model token pricing for later cost reporting.
---
 bugbug/tools/code_review/agent.py             |  51 ++++-
 bugbug/tools/code_review/diff_analysis.py     | 180 +++++++++++++++
 bugbug/tools/code_review/scoring.py           | 204 +++++++++++++++++
 bugbug/tools/code_review/test_coverage.py     | 161 +++++++++++++
 bugbug/tools/core/llms.py                     |  28 +++
 ...e5f6_add_risk_scores_and_skipped_status.py |  90 ++++++++
 services/reviewhelper-api/app/config.py       |   4 +
 .../reviewhelper-api/app/database/models.py   |  12 +
 services/reviewhelper-api/app/enums.py        |   9 +-
 services/reviewhelper-api/app/pricing.py      |  63 ++++++
 .../reviewhelper-api/app/review_processor.py  | 120 +++++++++-
 .../reviewhelper-api/app/reviewer_groups.py   | 105 +++++++++
 .../reviewhelper-api/app/routers/internal.py  |   9 +
 .../reviewhelper-api/app/routers/request.py   |   3 +
 services/reviewhelper-api/pyproject.toml      |   1 +
 .../reviewhelper-api/reviewer_groups.yaml     |  22 ++
 services/reviewhelper-api/tests/conftest.py   |  23 ++
 .../tests/test_review_gating.py               |  90 ++++++++
 .../tests/test_reviewer_groups.py             |  57 +++++
 tests/test_code_review_scoring.py             | 212 ++++++++++++++++++
 20 files changed, 1429 insertions(+), 15 deletions(-)
 create mode 100644 bugbug/tools/code_review/diff_analysis.py
 create mode 100644 bugbug/tools/code_review/scoring.py
 create mode 100644 bugbug/tools/code_review/test_coverage.py
 create mode 100644 services/reviewhelper-api/alembic/versions/c1a2b3d4e5f6_add_risk_scores_and_skipped_status.py
 create mode 100644 services/reviewhelper-api/app/pricing.py
 create mode 100644 services/reviewhelper-api/app/reviewer_groups.py
 create mode 100644 services/reviewhelper-api/reviewer_groups.yaml
 create mode 100644 services/reviewhelper-api/tests/conftest.py
 create mode 100644 services/reviewhelper-api/tests/test_review_gating.py
 create mode 100644 services/reviewhelper-api/tests/test_reviewer_groups.py
 create mode 100644 tests/test_code_review_scoring.py

diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
index badab3f528..b9d3674e9f 100644
--- a/bugbug/tools/code_review/agent.py
+++ b/bugbug/tools/code_review/agent.py
@@ -55,7 +55,11 @@
     LargeDiffError,
     RecursionLimitError,
 )
-from bugbug.tools.core.llms import DEFAULT_ANTHROPIC_MODEL, get_tokenizer
+from bugbug.tools.core.llms import (
+    DEFAULT_ANTHROPIC_MODEL,
+    get_tokenizer,
+    usage_from_messages,
+)
 from bugbug.tools.core.platforms.base import Patch
 
 logger = getLogger(__name__)
@@ -204,11 +208,15 @@ def count_tokens(self, text):
         return len(self._tokenizer.encode(text))
 
     def generate_initial_prompt(
-        self, patch: Patch, patch_summary: str, external_context: str = ""
+        self,
+        patch: Patch,
+        patch_summary: str,
+        external_context: str = "",
+        test_signals_block: str | None = None,
     ) -> str:
         created_before = patch.date_created if self.is_experiment_env else None
 
-        return FIRST_MESSAGE_TEMPLATE.format(
+        prompt = FIRST_MESSAGE_TEMPLATE.format(
             patch=format_patch_set(patch.patch_set),
             patch_summarization=patch_summary,
             external_context=external_context,
@@ -216,9 +224,17 @@ def generate_initial_prompt(
             approved_examples=self._get_generated_examples(patch, created_before),
         )
 
+        if test_signals_block:
+            prompt = f"{test_signals_block}\n\n{prompt}"
+
+        return prompt
+
     async def generate_review_comments(
-        self, patch: Patch, patch_summary: str
-    ) -> tuple[list[GeneratedReviewComment], list[dict]]:
+        self,
+        patch: Patch,
+        patch_summary: str,
+        test_signals_block: str | None = None,
+    ) -> tuple[dict, list[dict]]:
         external_context = ""
         manifest: list[dict] = []
         if self._review_context_repo:
@@ -240,7 +256,10 @@ async def generate_review_comments(
                     "messages": [
                         HumanMessage(
                             self.generate_initial_prompt(
-                                patch, patch_summary, external_context
+                                patch,
+                                patch_summary,
+                                external_context,
+                                test_signals_block,
                             )
                         ),
                     ]
@@ -253,7 +272,7 @@ async def generate_review_comments(
         except GraphRecursionError as e:
             raise RecursionLimitError("The model could not complete the review") from e
 
-        return result["structured_response"].comments, manifest
+        return result, manifest
 
     async def assess_patch_scope(
         self, patch: Patch, patch_summary: str
@@ -270,16 +289,19 @@ async def assess_patch_scope(
         result = await self.scope_agent.ainvoke({"messages": [HumanMessage(prompt)]})
         return result["structured_response"].comments[:1]
 
-    async def run(self, patch: Patch) -> CodeReviewToolResponse:
+    async def run(
+        self, patch: Patch, test_signals_block: str | None = None
+    ) -> CodeReviewToolResponse:
         if self.count_tokens(patch.raw_diff) > 21000:
             raise LargeDiffError("The diff is too large")
 
         patch_summary = self.patch_summarizer.run(patch)
 
-        (
-            unfiltered_suggestions,
-            external_content_manifest,
-        ) = await self.generate_review_comments(patch, patch_summary)
+        result, external_content_manifest = await self.generate_review_comments(
+            patch, patch_summary, test_signals_block
+        )
+        messages = result.get("messages", [])
+        unfiltered_suggestions = result["structured_response"].comments
         if not unfiltered_suggestions:
             logger.info("No suggestions were generated")
 
@@ -305,6 +327,11 @@ async def run(self, patch: Patch) -> CodeReviewToolResponse:
                 "num_unfiltered_suggestions": len(unfiltered_suggestions),
                 "num_scope_suggestions": len(scope_suggestions),
                 "external_content": external_content_manifest,
+                "usage": usage_from_messages(messages),
+                "tool_calls": sum(
+                    len(getattr(message, "tool_calls", []) or [])
+                    for message in messages
+                ),
             },
         )
 
diff --git a/bugbug/tools/code_review/diff_analysis.py b/bugbug/tools/code_review/diff_analysis.py
new file mode 100644
index 0000000000..cc831a718f
--- /dev/null
+++ b/bugbug/tools/code_review/diff_analysis.py
@@ -0,0 +1,180 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Parse a unified diff into structured test signals used by risk scoring.
+
+The diff analyzer answers one question deterministically, so we don't have to
+ask the model to eyeball it: did the patch include test changes, and how much?
+The resulting signal feeds both the risk/complexity scorer and the review
+prompt. The set of changed non-test paths is also returned so the existing-test
+coverage lookup (``test_coverage``) knows what to probe for in mozilla-central.
+
+Ported from qreviews (qreviews/diff_analysis.py); the hunk-anchor tracking used
+there to validate inline comments is dropped, since bugbug validates generated
+comments against the patch set via ``utils.find_comment_scope``.
+"""
+
+import re
+from dataclasses import dataclass, field
+
+# Mozilla-flavoured test-path matchers. Applied to the new-side path
+# of each `diff --git a/X b/Y` (or to X if the file was deleted).
+_TEST_DIR_RE = re.compile(
+    r"(^|/)(tests?|xpcshell|mochitests?|gtests?|reftests?|crashtests?|"
+    r"jsapi-tests?|web-platform/tests?|googletest)(/|$)"
+)
+_TEST_FILENAME_RES = (
+    re.compile(r"(^|/)test_[^/]+$"),
+    re.compile(r"[^/]*_test\.[A-Za-z0-9]+$"),
+    re.compile(r"[^/]*\.test\.[A-Za-z0-9]+$"),
+    re.compile(r"(^|/)head[^/]*\.js$"),
+    re.compile(r"(^|/)browser_[^/]+\.js$"),
+)
+
+
+def _is_test_path(path: str) -> bool:
+    if not path:
+        return False
+    if _TEST_DIR_RE.search(path):
+        return True
+    return any(r.search(path) for r in _TEST_FILENAME_RES)
+
+
+_DIFF_GIT_RE = re.compile(r"^diff --git a/(\S+) b/(\S+)\s*$")
+_HUNK_RE = re.compile(
+    r"^@@ -(?P<old_start>\d+)(?:,(?P<old_len>\d+))? "
+    r"\+(?P<new_start>\d+)(?:,(?P<new_len>\d+))? @@"
+)
+
+# Possible values of DiffStats.in_diff_test_signal.
+IN_DIFF_TEST_SIGNALS = ("absent", "sparse", "adequate", "tests_only", "no_code_change")
+
+
+@dataclass(frozen=True)
+class DiffStats:
+    files_changed: int
+    test_files_changed: int
+    non_test_files_changed: int
+    lines_added: int
+    test_lines_added: int
+    non_test_lines_added: int
+    # One of IN_DIFF_TEST_SIGNALS.
+    in_diff_test_signal: str
+    non_test_paths: list[str] = field(default_factory=list)
+
+
+def _classify_signal(
+    *, non_test_files: int, test_files: int, non_test_added: int, test_added: int
+) -> str:
+    if non_test_files == 0 and test_files == 0:
+        return "no_code_change"
+    if non_test_files == 0:
+        return "tests_only"
+    if test_files == 0:
+        return "absent"
+    # Both kinds present — ratio decides.
+    if non_test_added <= 0:
+        # All deletions on non-test side; test additions are still meaningful.
+        return "adequate" if test_added > 0 else "sparse"
+    ratio = test_added / non_test_added
+    return "adequate" if ratio >= 0.3 else "sparse"
+
+
+def analyze_diff(raw_diff: str) -> DiffStats:
+    """Walk a unified diff once and emit a DiffStats."""
+    files_changed = 0
+    test_files = 0
+    non_test_files = 0
+    total_added = 0
+    test_added = 0
+    non_test_added = 0
+    non_test_paths: list[str] = []
+
+    current_path: str | None = None
+    current_is_test = False
+    in_hunk = False
+
+    for line in raw_diff.splitlines():
+        m = _DIFF_GIT_RE.match(line)
+        if m:
+            files_changed += 1
+            # Use the new-side path; on a deletion this still names the
+            # original file (Phabricator passes the same path on both sides).
+            current_path = m.group(2)
+            current_is_test = _is_test_path(current_path)
+            if current_is_test:
+                test_files += 1
+            else:
+                non_test_files += 1
+                non_test_paths.append(current_path)
+            in_hunk = False
+            continue
+
+        if current_path is None:
+            continue
+
+        if _HUNK_RE.match(line):
+            in_hunk = True
+            continue
+
+        if not in_hunk or not line:
+            continue
+
+        if line[0] == "+":
+            total_added += 1
+            if current_is_test:
+                test_added += 1
+            else:
+                non_test_added += 1
+        elif line[0] not in "- \\":
+            # Unexpected line inside a hunk (e.g. a stray header from a
+            # malformed diff). Drop out of hunk mode to resync.
+            in_hunk = False
+
+    signal = _classify_signal(
+        non_test_files=non_test_files,
+        test_files=test_files,
+        non_test_added=non_test_added,
+        test_added=test_added,
+    )
+
+    return DiffStats(
+        files_changed=files_changed,
+        test_files_changed=test_files,
+        non_test_files_changed=non_test_files,
+        lines_added=total_added,
+        test_lines_added=test_added,
+        non_test_lines_added=non_test_added,
+        in_diff_test_signal=signal,
+        non_test_paths=non_test_paths,
+    )
+
+
+def format_test_signal_block(
+    stats: DiffStats,
+    *,
+    coverage_block: str | None = None,
+) -> str:
+    """Render the structured signal block prepended to scoring/review messages.
+
+    The optional ``coverage_block`` is the existing-coverage layer rendered by
+    ``test_coverage.format_coverage_block``; the two-layer rendering lives here
+    so it isn't duplicated across scoring and review.
+    """
+    lines = [
+        "<test_signals>",
+        "Pre-computed from the diff (not the model):",
+        f"  files_changed={stats.files_changed}",
+        f"  test_files_changed={stats.test_files_changed}",
+        f"  non_test_files_changed={stats.non_test_files_changed}",
+        f"  lines_added={stats.lines_added} "
+        f"(test={stats.test_lines_added}, non_test={stats.non_test_lines_added})",
+        f"  in_diff_test_signal={stats.in_diff_test_signal}",
+    ]
+    if coverage_block:
+        lines.append("")
+        lines.append(coverage_block.rstrip())
+    lines.append("</test_signals>")
+    return "\n".join(lines)
diff --git a/bugbug/tools/code_review/scoring.py b/bugbug/tools/code_review/scoring.py
new file mode 100644
index 0000000000..4ef34a1278
--- /dev/null
+++ b/bugbug/tools/code_review/scoring.py
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Risk + complexity scoring via a cheap Claude model.
+
+One call per fresh diff, returning two independent 0-10 integer scores plus the
+factors behind them. The result gates whether the (expensive) review agent runs
+at all, so this deliberately uses a small model. Token usage is captured so the
+dashboard can show cost.
+
+Ported from qreviews (qreviews/scoring.py); the raw Anthropic SDK call is
+replaced with LangChain structured output to match the rest of bugbug.
+"""
+
+from dataclasses import dataclass, field
+from logging import getLogger
+
+from langchain.chat_models import init_chat_model
+from langchain.messages import HumanMessage, SystemMessage
+from pydantic import BaseModel, Field
+
+from bugbug.tools.core.exceptions import ModelResultError
+from bugbug.tools.core.llms import DEFAULT_SCORING_MODEL, usage_from_messages
+
+logger = getLogger(__name__)
+
+
+SCORING_SYSTEM_PROMPT = """\
+You are a senior Mozilla Firefox reviewer. Rate an incoming Phabricator
+revision on two integer 0-10 axes. You MUST use the full 0-10 range —
+trivial changes deserve scores of 0 or 1, not 2 or 3.
+
+  - RISK: blast radius if this change is wrong. Considers sensitive
+    components (security, IPC, prefs, permissions, crypto, sandbox,
+    network, auth), irreversibility (data migrations, schema changes),
+    breadth (number of touched modules), and dangerous patterns (eval,
+    raw HTML injection, regex on untrusted input, concurrency / async
+    races, file/network I/O, telemetry definitions, mots.yaml /
+    build / CI files).
+
+  - COMPLEXITY: how hard the change is for a human reviewer to
+    understand and verify. Considers LOC added/removed, number of
+    files, control-flow density, new abstractions, refactors / renames,
+    presence vs absence of tests, and clarity of the diff.
+
+Both axes are independent. A simple 5-line patch to a security-critical
+file is HIGH risk but LOW complexity.
+
+The user message includes a `<test_signals>` block computed
+deterministically from the diff and from a searchfox lookup over
+mozilla-central. Treat these as hints, not absolutes:
+
+  - `in_diff_test_signal=absent` means the patch adds no test
+    changes. On non-trivial non-test code, this is mildly
+    risk-increasing; on docs/CSS/string changes it does not matter.
+  - `in_diff_test_signal=tests_only` means the patch is test-only.
+    This is risk-decreasing — broken tests fail loudly without
+    affecting production.
+  - `coverage_signal=uncovered` means searchfox found no existing
+    tests in mozilla-central that reference the changed non-test
+    files. Combined with `in_diff_test_signal=absent` on a
+    sensitive area, this materially raises risk.
+  - `coverage_signal=covered` or `partial` lowers risk slightly —
+    the touched code at least has some automated check around it.
+  - `coverage_signal=skipped_*` means the lookup wasn't run
+    (large diff or searchfox unavailable). Do not let it affect
+    your score in either direction.
+
+Continue to use the full 0-10 scale; do not let the test signals
+overwhelm other risk factors (security, IPC, etc.) — they are one
+input among many.
+
+Score anchors (use these as a calibration reference, do not be afraid
+to score 0 or 1):
+
+  RISK
+    0  = pure docs / comments / strings, no executable code change
+    1  = CSS-only, dead-code removal, isolated UI tweak in a leaf component,
+         a localization string addition
+    2  = small JS/HTML change in a non-sensitive UI component, no network
+         or storage touched
+    3-4 = moderate change to UI logic, or any touch of a moderately
+         sensitive area (telemetry, prefs reads)
+    5-6 = multi-file change with cross-cutting effects, or touches
+         moderately sensitive subsystems
+    7-8 = security-relevant code, IPC, sandbox, auth, crypto, network
+         protocols, build / CI / signing
+    9-10 = data migrations, irreversible schema changes, security boundary
+         changes
+
+  COMPLEXITY
+    0  = whitespace, single-line value change, single-line string change
+    1  = <10 LOC, single file, no control flow added
+    2  = ~10-30 LOC, 1-2 files, simple straight-line additions
+    3-4 = 30-100 LOC, new function/method, simple control flow
+    5-6 = 100-300 LOC OR 5+ files OR new abstraction
+    7-8 = significant refactor, renames, signature changes across many
+         callers, or non-trivial concurrency/async logic
+    9-10 = large refactors with subtle invariants, new subsystems
+
+Provide 1-5 factors per axis. Each factor MUST be ONE short sentence,
+not a paragraph. Cite specific file paths in `path:line` form when useful.\
+"""
+
+
+class Scores(BaseModel):
+    """Structured risk/complexity assessment of a revision."""
+
+    risk: int = Field(ge=0, le=10, description="Blast radius if the change is wrong.")
+    complexity: int = Field(
+        ge=0, le=10, description="How hard the change is to review and verify."
+    )
+    risk_factors: list[str] = Field(
+        default_factory=list,
+        description="1-5 short sentences justifying the risk score.",
+    )
+    complexity_factors: list[str] = Field(
+        default_factory=list,
+        description="1-5 short sentences justifying the complexity score.",
+    )
+
+
+@dataclass
+class ScoringResult:
+    scores: Scores
+    model: str
+    usage: dict[str, int] = field(default_factory=dict)
+
+
+def _build_user_message(
+    *,
+    title: str,
+    summary: str | None,
+    revision_id: int,
+    author: str,
+    bug_id: str | int | None,
+    raw_diff: str,
+    test_signals_block: str | None = None,
+) -> str:
+    header = (
+        f"Revision: D{revision_id}\n"
+        f"Title: {title}\n"
+        f"Author: {author}\n"
+        f"Bug: {bug_id or '(none)'}\n"
+        f"\nSummary:\n{summary or '(no summary provided)'}\n"
+    )
+    signals = f"\n{test_signals_block}\n" if test_signals_block else ""
+    return (
+        f"{header}{signals}\n----- BEGIN DIFF -----\n{raw_diff}\n----- END DIFF -----\n"
+    )
+
+
+class RiskComplexityScorer:
+    """Scores a revision's risk and complexity with a single cheap LLM call."""
+
+    def __init__(self, llm, model_name: str) -> None:
+        self._model_name = model_name
+        self._structured = llm.with_structured_output(Scores, include_raw=True)
+
+    @classmethod
+    def create(
+        cls,
+        model: str = DEFAULT_SCORING_MODEL,
+        max_tokens: int = 1024,
+    ) -> "RiskComplexityScorer":
+        llm = init_chat_model(model, max_tokens=max_tokens, temperature=0)
+        return cls(llm, model)
+
+    async def run(
+        self,
+        *,
+        title: str,
+        summary: str | None,
+        revision_id: int,
+        author: str,
+        bug_id: str | int | None,
+        raw_diff: str,
+        test_signals_block: str | None = None,
+    ) -> ScoringResult:
+        """Call the model and return parsed scores plus token usage."""
+        user_msg = _build_user_message(
+            title=title,
+            summary=summary,
+            revision_id=revision_id,
+            author=author,
+            bug_id=bug_id,
+            raw_diff=raw_diff,
+            test_signals_block=test_signals_block,
+        )
+        result = await self._structured.ainvoke(
+            [SystemMessage(SCORING_SYSTEM_PROMPT), HumanMessage(user_msg)]
+        )
+
+        scores = result.get("parsed")
+        if scores is None:
+            raise ModelResultError(
+                f"Risk/complexity scoring did not return valid scores: "
+                f"{result.get('parsing_error')}"
+            )
+
+        usage = usage_from_messages([result["raw"]])
+        return ScoringResult(scores=scores, model=self._model_name, usage=usage)
diff --git a/bugbug/tools/code_review/test_coverage.py b/bugbug/tools/code_review/test_coverage.py
new file mode 100644
index 0000000000..0e744c746a
--- /dev/null
+++ b/bugbug/tools/code_review/test_coverage.py
@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Probe mozilla-central via searchfox for existing tests of a revision's files.
+
+This complements ``diff_analysis``. The diff analyzer tells us whether the patch
+itself includes test changes; this module tells us whether existing tests in the
+tree already reference the touched non-test files, even when the patch doesn't
+add tests.
+
+The signal is intentionally a hint, not a guarantee — basename queries can hit
+unrelated same-named files. Risk scoring treats it as one input among many.
+
+Ported from qreviews (qreviews/test_coverage.py), with the searchfox-cli
+subprocess backend swapped for bugbug's async ``AsyncSearchfoxClient`` (the same
+client the review agent's searchfox tools already use).
+"""
+
+import os
+from dataclasses import dataclass, field
+from logging import getLogger
+
+from bugbug.tools.code_review.langchain_tools import _get_client
+
+logger = getLogger(__name__)
+
+# Cap how many non-test files we look up per revision. searchfox queries are
+# cheap but not free; large refactors would otherwise issue dozens of requests.
+DEFAULT_FILE_CAP = 10
+
+# Per-query hit cap. We only need to know "are there any tests" — a small
+# limit keeps the result bounded.
+PER_QUERY_LIMIT = 5
+
+
+@dataclass(frozen=True)
+class ExistingCoverage:
+    covered_paths: dict[str, list[str]] = field(default_factory=dict)
+    uncovered_paths: list[str] = field(default_factory=list)
+    # "covered" | "partial" | "uncovered" | "no_non_test_files" |
+    # "skipped_large_diff" | "skipped_searchfox_error"
+    coverage_signal: str = "skipped_searchfox_error"
+    # Total non-test files in the diff, regardless of cap.
+    candidate_count: int = 0
+
+
+def _basename_no_ext(path: str) -> str:
+    """Strip up to two extensions so a basename collapses to its module name.
+
+    Two so Mozilla suffixes like ``foo.sys.mjs`` collapse to ``foo``, but no
+    further so single-dot names (``some.module.cpp``) keep ``some.module``.
+    """
+    base = os.path.basename(path)
+    for _ in range(2):
+        name, ext = os.path.splitext(base)
+        if not ext:
+            break
+        base = name
+    return base
+
+
+async def lookup_existing_coverage(
+    non_test_paths: list[str],
+    *,
+    file_cap: int = DEFAULT_FILE_CAP,
+) -> ExistingCoverage:
+    """Run a small searchfox query per non-test path and aggregate.
+
+    Returns an ExistingCoverage describing the per-file hit lists and an overall
+    ``coverage_signal``. Any searchfox failure degrades to a skipped signal
+    rather than raising — coverage is advisory, never required.
+    """
+    if not non_test_paths:
+        return ExistingCoverage(coverage_signal="no_non_test_files", candidate_count=0)
+
+    candidate_count = len(non_test_paths)
+    skipped_large = candidate_count > file_cap
+    paths = non_test_paths[:file_cap] if skipped_large else list(non_test_paths)
+
+    client = _get_client()
+    covered: dict[str, list[str]] = {}
+    uncovered: list[str] = []
+
+    for path in paths:
+        query = _basename_no_ext(path)
+        if not query or len(query) < 3:
+            # Generic short names like 'x' or 'io' produce too much noise.
+            uncovered.append(path)
+            continue
+        try:
+            results = await client.search(
+                query=query,
+                tests="only",
+                limit=PER_QUERY_LIMIT,
+            )
+        except Exception as e:  # searchfox raises plain Exception
+            logger.warning("searchfox coverage lookup failed at %s: %s", path, e)
+            return ExistingCoverage(
+                covered_paths=covered,
+                uncovered_paths=uncovered + paths[len(covered) + len(uncovered) :],
+                coverage_signal="skipped_searchfox_error",
+                candidate_count=candidate_count,
+            )
+
+        # Drop self-references — the queried file can show up if it shares a
+        # basename with a test path by sheer regex.
+        hits: list[str] = []
+        seen: set[str] = set()
+        for hit_path, _line, _content in results:
+            if hit_path == path or hit_path in seen:
+                continue
+            seen.add(hit_path)
+            hits.append(hit_path)
+
+        if hits:
+            covered[path] = hits
+        else:
+            uncovered.append(path)
+
+    if skipped_large:
+        signal = "skipped_large_diff"
+    elif covered and uncovered:
+        signal = "partial"
+    elif covered:
+        signal = "covered"
+    else:
+        signal = "uncovered"
+
+    return ExistingCoverage(
+        covered_paths=covered,
+        uncovered_paths=uncovered,
+        coverage_signal=signal,
+        candidate_count=candidate_count,
+    )
+
+
+def format_coverage_block(coverage: ExistingCoverage) -> str | None:
+    """Render the existing-coverage layer for the ``<test_signals>`` block.
+
+    Returns None when there's nothing meaningful to show.
+    """
+    if coverage.coverage_signal == "no_non_test_files":
+        return None
+    lines = [
+        "Existing tests referencing changed non-test files (searchfox lookup):",
+        f"  coverage_signal={coverage.coverage_signal}",
+        f"  candidate_files={coverage.candidate_count}",
+    ]
+    if coverage.covered_paths:
+        lines.append("  covered:")
+        for src, tests in coverage.covered_paths.items():
+            sample = ", ".join(tests[:3])
+            more = "" if len(tests) <= 3 else f" (+{len(tests) - 3} more)"
+            lines.append(f"    - {src} <- {sample}{more}")
+    if coverage.uncovered_paths:
+        lines.append("  uncovered:")
+        for src in coverage.uncovered_paths:
+            lines.append(f"    - {src}")
+    return "\n".join(lines)
diff --git a/bugbug/tools/core/llms.py b/bugbug/tools/core/llms.py
index e247aad8b2..6da5dcbcf5 100644
--- a/bugbug/tools/core/llms.py
+++ b/bugbug/tools/core/llms.py
@@ -11,6 +11,34 @@
 
 DEFAULT_OPENAI_MODEL = "gpt-5-2025-08-07"
 DEFAULT_ANTHROPIC_MODEL = "claude-opus-4-6"
+# Cheap, fast model used for the risk/complexity pre-screen, separate from the
+# review model so the gate stays inexpensive.
+DEFAULT_SCORING_MODEL = "claude-haiku-4-5-20251001"
+
+
+def usage_from_messages(messages) -> dict[str, int]:
+    """Aggregate LangChain ``usage_metadata`` across one or more AI messages.
+
+    Returns a dict with input/output token totals and the cache-read /
+    cache-creation breakdown, suitable for cost estimation. Messages without
+    usage metadata (e.g. tool or human messages) are ignored.
+    """
+    totals = {
+        "input_tokens": 0,
+        "output_tokens": 0,
+        "cache_read_input_tokens": 0,
+        "cache_creation_input_tokens": 0,
+    }
+    for message in messages:
+        usage = getattr(message, "usage_metadata", None)
+        if not usage:
+            continue
+        totals["input_tokens"] += usage.get("input_tokens", 0) or 0
+        totals["output_tokens"] += usage.get("output_tokens", 0) or 0
+        details = usage.get("input_token_details") or {}
+        totals["cache_read_input_tokens"] += details.get("cache_read", 0) or 0
+        totals["cache_creation_input_tokens"] += details.get("cache_creation", 0) or 0
+    return totals
 
 
 def get_tokenizer(model_name):
diff --git a/services/reviewhelper-api/alembic/versions/c1a2b3d4e5f6_add_risk_scores_and_skipped_status.py b/services/reviewhelper-api/alembic/versions/c1a2b3d4e5f6_add_risk_scores_and_skipped_status.py
new file mode 100644
index 0000000000..7de42ebca9
--- /dev/null
+++ b/services/reviewhelper-api/alembic/versions/c1a2b3d4e5f6_add_risk_scores_and_skipped_status.py
@@ -0,0 +1,90 @@
+"""Add risk/complexity scores, test signals, and SKIPPED status.
+
+Revision ID: c1a2b3d4e5f6
+Revises: 714c920ab85b
+Create Date: 2026-05-28 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+from alembic_postgresql_enum import TableReference
+
+# revision identifiers, used by Alembic.
+revision: str = "c1a2b3d4e5f6"
+down_revision: Union[str, Sequence[str], None] = "714c920ab85b"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    op.sync_enum_values(
+        enum_schema="public",
+        enum_name="review_status_enum",
+        new_values=[
+            "PENDING",
+            "PROCESSING",
+            "RETRY_PENDING",
+            "PUBLISHED",
+            "FAILED",
+            "SKIPPED",
+        ],
+        affected_columns=[
+            TableReference(
+                table_schema="public",
+                table_name="review_requests",
+                column_name="status",
+            )
+        ],
+        enum_values_to_rename=[],
+    )
+
+    op.add_column("review_requests", sa.Column("risk", sa.Integer(), nullable=True))
+    op.add_column(
+        "review_requests", sa.Column("complexity", sa.Integer(), nullable=True)
+    )
+    op.add_column(
+        "review_requests", sa.Column("in_diff_test_signal", sa.Text(), nullable=True)
+    )
+    op.add_column(
+        "review_requests", sa.Column("coverage_signal", sa.Text(), nullable=True)
+    )
+    op.add_column(
+        "review_requests", sa.Column("skipped_reason", sa.Text(), nullable=True)
+    )
+    op.create_index(
+        "ix_review_requests_status", "review_requests", ["status"], unique=False
+    )
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    op.drop_index("ix_review_requests_status", table_name="review_requests")
+    op.drop_column("review_requests", "skipped_reason")
+    op.drop_column("review_requests", "coverage_signal")
+    op.drop_column("review_requests", "in_diff_test_signal")
+    op.drop_column("review_requests", "complexity")
+    op.drop_column("review_requests", "risk")
+
+    op.sync_enum_values(
+        enum_schema="public",
+        enum_name="review_status_enum",
+        new_values=[
+            "PENDING",
+            "PROCESSING",
+            "RETRY_PENDING",
+            "PUBLISHED",
+            "FAILED",
+        ],
+        affected_columns=[
+            TableReference(
+                table_schema="public",
+                table_name="review_requests",
+                column_name="status",
+            )
+        ],
+        enum_values_to_rename=[],
+    )
diff --git a/services/reviewhelper-api/app/config.py b/services/reviewhelper-api/app/config.py
index a0dd25afd1..a278cd15be 100644
--- a/services/reviewhelper-api/app/config.py
+++ b/services/reviewhelper-api/app/config.py
@@ -38,6 +38,10 @@ class Settings(BaseSettings):
     # Agent settings
     todo_enabled: bool = True
 
+    # Risk/complexity pre-screen (gates whether the review agent runs).
+    scoring_model: str = "claude-haiku-4-5-20251001"
+    scoring_max_tokens: int = 1024
+
     model_config = {
         "env_file": ".env",
         "env_file_encoding": "utf-8",
diff --git a/services/reviewhelper-api/app/database/models.py b/services/reviewhelper-api/app/database/models.py
index 2c8339dbd0..eb91dec843 100644
--- a/services/reviewhelper-api/app/database/models.py
+++ b/services/reviewhelper-api/app/database/models.py
@@ -74,6 +74,16 @@ class ReviewRequest(Base):
     error: Mapped[str | None] = mapped_column(Text)
     summary: Mapped[str | None] = mapped_column(Text)
 
+    # Risk/complexity pre-screen scores (0-10), recorded even when the review is
+    # skipped so thresholds can be tuned and the dashboard can chart them. The
+    # verbose factors and coverage detail live in `details`.
+    risk: Mapped[int | None]
+    complexity: Mapped[int | None]
+    in_diff_test_signal: Mapped[str | None] = mapped_column(Text)
+    coverage_signal: Mapped[str | None] = mapped_column(Text)
+    # Why the review was skipped (e.g. "above_threshold"); set with status=SKIPPED.
+    skipped_reason: Mapped[str | None] = mapped_column(Text)
+
     # Relationships
     comments: Mapped[list["GeneratedComment"]] = relationship(
         "GeneratedComment",
@@ -115,6 +125,8 @@ async def is_first_published_review(self) -> bool:
             unique=True,
             postgresql_where=(platform == Platform.GITHUB),
         ),
+        # Speeds up dashboard aggregations grouped by status / skip reason.
+        Index("ix_review_requests_status", "status"),
     )
 
 
diff --git a/services/reviewhelper-api/app/enums.py b/services/reviewhelper-api/app/enums.py
index d392dccdb5..08d1e4b899 100644
--- a/services/reviewhelper-api/app/enums.py
+++ b/services/reviewhelper-api/app/enums.py
@@ -12,10 +12,17 @@ class ReviewStatus(str, Enum):
     RETRY_PENDING = "retry_pending"
     PUBLISHED = "published"
     FAILED = "failed"
+    # The revision was intentionally not reviewed (gated out by risk/complexity
+    # thresholds, reviewer-group rules, etc.). Terminal, like PUBLISHED/FAILED.
+    SKIPPED = "skipped"
 
     @property
     def is_final(self) -> bool:
-        return self is ReviewStatus.PUBLISHED or self is ReviewStatus.FAILED
+        return self in (
+            ReviewStatus.PUBLISHED,
+            ReviewStatus.FAILED,
+            ReviewStatus.SKIPPED,
+        )
 
 
 class FeedbackType(str, Enum):
diff --git a/services/reviewhelper-api/app/pricing.py b/services/reviewhelper-api/app/pricing.py
new file mode 100644
index 0000000000..d1078df506
--- /dev/null
+++ b/services/reviewhelper-api/app/pricing.py
@@ -0,0 +1,63 @@
+"""Per-model pricing for cost estimation. USD per 1M tokens.
+
+Ported from qreviews (qreviews/pricing.py). Used to turn the token usage recorded
+on each review request into an estimated dollar cost for the dashboard.
+"""
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class ModelPrice:
+    input_per_mtok: float
+    output_per_mtok: float
+    cache_read_per_mtok: float
+    cache_write_per_mtok: float
+
+
+# Anthropic published rates (USD / 1M tokens) as of late 2025/early 2026.
+# Update this table when rates change.
+PRICES: dict[str, ModelPrice] = {
+    # Claude Opus 4.x
+    "claude-opus-4-8": ModelPrice(15.00, 75.00, 1.50, 18.75),
+    "claude-opus-4-7": ModelPrice(15.00, 75.00, 1.50, 18.75),
+    "claude-opus-4-6": ModelPrice(15.00, 75.00, 1.50, 18.75),
+    # Claude Sonnet 4.x
+    "claude-sonnet-4-6": ModelPrice(3.00, 15.00, 0.30, 3.75),
+    "claude-sonnet-4-5": ModelPrice(3.00, 15.00, 0.30, 3.75),
+    # Claude Haiku 4.x
+    "claude-haiku-4-5": ModelPrice(1.00, 5.00, 0.10, 1.25),
+    "claude-haiku-4-5-20251001": ModelPrice(1.00, 5.00, 0.10, 1.25),
+}
+
+# Generic fallback if a model ID isn't in the table — assume Sonnet-class.
+FALLBACK_PRICE = ModelPrice(3.00, 15.00, 0.30, 3.75)
+
+
+def price_for(model: str) -> ModelPrice:
+    if not model:
+        return FALLBACK_PRICE
+    if model in PRICES:
+        return PRICES[model]
+    # Try a prefix match (e.g. snapshot IDs).
+    for key, price in PRICES.items():
+        if model.startswith(key):
+            return price
+    return FALLBACK_PRICE
+
+
+def estimate_cost_usd(
+    model: str,
+    *,
+    input_tokens: int = 0,
+    output_tokens: int = 0,
+    cache_read: int = 0,
+    cache_write: int = 0,
+) -> float:
+    p = price_for(model)
+    return (
+        input_tokens * p.input_per_mtok
+        + output_tokens * p.output_per_mtok
+        + cache_read * p.cache_read_per_mtok
+        + cache_write * p.cache_write_per_mtok
+    ) / 1_000_000.0
diff --git a/services/reviewhelper-api/app/review_processor.py b/services/reviewhelper-api/app/review_processor.py
index 84cffce308..3f002fc784 100644
--- a/services/reviewhelper-api/app/review_processor.py
+++ b/services/reviewhelper-api/app/review_processor.py
@@ -7,6 +7,16 @@
 from app.config import settings
 from app.database.models import GeneratedComment, ReviewRequest
 from app.enums import Platform
+from app.reviewer_groups import get_reviewer_groups_config, matching_groups
+from bugbug.tools.code_review.diff_analysis import (
+    analyze_diff,
+    format_test_signal_block,
+)
+from bugbug.tools.code_review.scoring import RiskComplexityScorer
+from bugbug.tools.code_review.test_coverage import (
+    format_coverage_block,
+    lookup_existing_coverage,
+)
 from bugbug.tools.core.exceptions import LargeDiffError, RecursionLimitError
 from bugbug.tools.core.platforms.phabricator import (
     PhabricatorPatch,
@@ -29,6 +39,19 @@ class RevisionNotYetPublicError(Exception):
     """
 
 
+class ReviewSkipped(Exception):
+    """The revision was intentionally not reviewed (e.g. gated out by scores).
+
+    Carries the reason and the details to persist. Not an error: the caller
+    records it as a terminal SKIPPED status without retrying.
+    """
+
+    def __init__(self, reason: str, *, details: dict | None = None):
+        super().__init__(reason)
+        self.reason = reason
+        self.details = details or {}
+
+
 @cache
 def get_code_review_tool():
     from bugbug.tools.code_review import CodeReviewTool
@@ -36,6 +59,14 @@ def get_code_review_tool():
     return CodeReviewTool.create(todo_enabled=settings.todo_enabled)
 
 
+@cache
+def get_risk_scorer() -> RiskComplexityScorer:
+    return RiskComplexityScorer.create(
+        model=settings.scoring_model,
+        max_tokens=settings.scoring_max_tokens,
+    )
+
+
 async def process_review(
     review_request: ReviewRequest,
 ) -> tuple[list[GeneratedComment], str, dict]:
@@ -73,10 +104,12 @@ async def process_review(
             "the revision is private or has restricted visibility."
         )
 
+    test_signals_block, scoring_details = await _score_and_gate(patch, review_request)
+
     tool = get_code_review_tool()
 
     try:
-        result = await tool.run(patch)
+        result = await tool.run(patch, test_signals_block=test_signals_block)
     except LargeDiffError as e:
         raise ReviewProcessingError(
             "The diff size exceeds the current processing limits."
@@ -99,7 +132,90 @@ async def process_review(
         for comment in result.review_comments
     ]
 
-    return generated_comments, result.patch_summary, result.details
+    details = {**result.details, **scoring_details}
+
+    return generated_comments, result.patch_summary, details
+
+
+async def _score_and_gate(
+    patch: PhabricatorPatch, review_request: ReviewRequest
+) -> tuple[str, dict]:
+    """Score the revision's risk/complexity and decide whether to review it.
+
+    Records the scores and test signals on ``review_request`` (so they're
+    persisted even when skipped) and returns the rendered ``<test_signals>``
+    block plus the scoring details to merge into the review details. Raises
+    ``ReviewSkipped`` when either score is at or above the group's threshold.
+    """
+    config = get_reviewer_groups_config()
+    groups = matching_groups(patch)
+    # First matching group acts as the primary; otherwise fall back to defaults.
+    primary = groups[0] if groups else None
+    risk_threshold = (
+        primary.effective_risk_threshold(config.defaults)
+        if primary
+        else config.defaults.risk_threshold
+    )
+    complexity_threshold = (
+        primary.effective_complexity_threshold(config.defaults)
+        if primary
+        else config.defaults.complexity_threshold
+    )
+
+    diff_stats = analyze_diff(patch.raw_diff)
+    coverage = await lookup_existing_coverage(diff_stats.non_test_paths)
+    test_signals_block = format_test_signal_block(
+        diff_stats, coverage_block=format_coverage_block(coverage)
+    )
+
+    scorer = get_risk_scorer()
+    scoring = await scorer.run(
+        title=patch.patch_title,
+        summary=patch.summary,
+        revision_id=patch.revision_id,
+        author=patch.author_phid,
+        bug_id=patch.bug_id if patch.has_bug else None,
+        raw_diff=patch.raw_diff,
+        test_signals_block=test_signals_block,
+    )
+    scores = scoring.scores
+
+    # Persist the scalar signals on the request so they survive a skip and feed
+    # the dashboard; verbose factors/coverage go in the details JSON.
+    review_request.risk = scores.risk
+    review_request.complexity = scores.complexity
+    review_request.in_diff_test_signal = diff_stats.in_diff_test_signal
+    review_request.coverage_signal = coverage.coverage_signal
+
+    scoring_details = {
+        "scoring_model": scoring.model,
+        "scoring_usage": scoring.usage,
+        "risk_factors": scores.risk_factors,
+        "complexity_factors": scores.complexity_factors,
+        "coverage": {
+            "covered_paths": coverage.covered_paths,
+            "uncovered_paths": coverage.uncovered_paths,
+            "candidate_count": coverage.candidate_count,
+        },
+        "thresholds": {
+            "risk": risk_threshold,
+            "complexity": complexity_threshold,
+            "group": primary.slug if primary else None,
+        },
+    }
+
+    if scores.risk >= risk_threshold or scores.complexity >= complexity_threshold:
+        logger.info(
+            "Skipping review for D%s: risk=%s complexity=%s (thresholds %s/%s)",
+            review_request.revision_id,
+            scores.risk,
+            scores.complexity,
+            risk_threshold,
+            complexity_threshold,
+        )
+        raise ReviewSkipped("above_threshold", details=scoring_details)
+
+    return test_signals_block, scoring_details
 
 
 def submit_review_to_platform(
diff --git a/services/reviewhelper-api/app/reviewer_groups.py b/services/reviewhelper-api/app/reviewer_groups.py
new file mode 100644
index 0000000000..377c732b7b
--- /dev/null
+++ b/services/reviewhelper-api/app/reviewer_groups.py
@@ -0,0 +1,105 @@
+"""Per-reviewer-group configuration for automated code review.
+
+This is the shared config that risk/complexity gating (and, later, group-specific
+skills and member-author restriction) reads. It is a checked-in YAML file rather
+than env vars or a DB table: the config is a small, structured, version-controlled
+list, and "easing in" a group is a reviewed change to the file.
+
+The file path defaults to ``reviewer_groups.yaml`` at the service root and can be
+overridden with the ``REVIEWER_GROUPS_CONFIG`` environment variable.
+"""
+
+import os
+from functools import cache
+from logging import getLogger
+from pathlib import Path
+
+import yaml
+from pydantic import BaseModel, Field
+
+logger = getLogger(__name__)
+
+_DEFAULT_CONFIG_PATH = Path(__file__).resolve().parent.parent / "reviewer_groups.yaml"
+
+
+class Defaults(BaseModel):
+    """Fallback thresholds applied to groups that don't override them."""
+
+    # A review runs only when BOTH scores are strictly below their threshold.
+    risk_threshold: int = 3
+    complexity_threshold: int = 3
+
+
+class ReviewerGroup(BaseModel):
+    """Configuration for a single Phabricator reviewer group (project)."""
+
+    slug: str
+    enabled: bool = False
+    risk_threshold: int | None = None
+    complexity_threshold: int | None = None
+
+    def effective_risk_threshold(self, defaults: Defaults) -> int:
+        return (
+            self.risk_threshold
+            if self.risk_threshold is not None
+            else defaults.risk_threshold
+        )
+
+    def effective_complexity_threshold(self, defaults: Defaults) -> int:
+        return (
+            self.complexity_threshold
+            if self.complexity_threshold is not None
+            else defaults.complexity_threshold
+        )
+
+
+class ReviewerGroupsConfig(BaseModel):
+    defaults: Defaults = Field(default_factory=Defaults)
+    groups: list[ReviewerGroup] = Field(default_factory=list)
+
+    def by_slug(self, slug: str) -> ReviewerGroup | None:
+        for group in self.groups:
+            if group.slug == slug:
+                return group
+        return None
+
+
+def _config_path() -> Path:
+    return Path(os.getenv("REVIEWER_GROUPS_CONFIG", str(_DEFAULT_CONFIG_PATH)))
+
+
+@cache
+def get_reviewer_groups_config() -> ReviewerGroupsConfig:
+    """Load and cache the reviewer-groups config for the process lifetime."""
+    path = _config_path()
+    if not path.exists():
+        logger.warning(
+            "reviewer groups config not found at %s; using empty config", path
+        )
+        return ReviewerGroupsConfig()
+    data = yaml.safe_load(path.read_text()) or {}
+    return ReviewerGroupsConfig.model_validate(data)
+
+
+def matching_groups(patch) -> list[ReviewerGroup]:
+    """Return the configured groups the patch is requesting review from.
+
+    A patch matches a group when one of its reviewer-group PHIDs resolves to the
+    same project as the group's slug. The result preserves config order so the
+    first match can act as the "primary" group.
+    """
+    # Imported here so the heavy Phabricator/searchfox import chain isn't pulled
+    # in just to parse the config (and so tests can monkeypatch the resolver).
+    from bugbug.tools.core.platforms import phabricator as phab
+
+    config = get_reviewer_groups_config()
+    reviewer_projects = set(patch.reviewer_project_phids)
+    if not reviewer_projects:
+        return []
+
+    matched = []
+    for group in config.groups:
+        phid = phab.resolve_project_phid(group.slug)
+        if phid and phid in reviewer_projects:
+            matched.append(group)
+    return matched
diff --git a/services/reviewhelper-api/app/routers/internal.py b/services/reviewhelper-api/app/routers/internal.py
index b83b68afdd..4886ae86c7 100644
--- a/services/reviewhelper-api/app/routers/internal.py
+++ b/services/reviewhelper-api/app/routers/internal.py
@@ -13,6 +13,7 @@
 from app.enums import ReviewStatus
 from app.review_processor import (
     ReviewProcessingError,
+    ReviewSkipped,
     RevisionNotYetPublicError,
     process_review,
     submit_review_to_platform,
@@ -137,6 +138,14 @@ async def _set_retry_pending(db: AsyncSession):
             review_request.status = ReviewStatus.RETRY_PENDING
             await db.commit()
             return Response(status_code=status.HTTP_409_CONFLICT)
+        except ReviewSkipped as e:
+            logger.info("Review request %s skipped: %s", review_request_id, e.reason)
+            review_request.status = ReviewStatus.SKIPPED
+            review_request.skipped_reason = e.reason
+            review_request.details = e.details
+            await db.commit()
+            # Terminal, like a failure: 204 so Cloud Tasks doesn't retry.
+            return Response(status_code=status.HTTP_204_NO_CONTENT)
         except ReviewProcessingError as e:
             review_request.error = str(e)
             review_request.status = ReviewStatus.FAILED
diff --git a/services/reviewhelper-api/app/routers/request.py b/services/reviewhelper-api/app/routers/request.py
index e3ba4578b8..40955df049 100644
--- a/services/reviewhelper-api/app/routers/request.py
+++ b/services/reviewhelper-api/app/routers/request.py
@@ -117,6 +117,9 @@ def _build_response_message(review_request: ReviewRequest) -> str:
     if review_request.status == ReviewStatus.PUBLISHED:
         return f"Review Helper already posted its review for Diff {review_request.diff_id}."
 
+    if review_request.status == ReviewStatus.SKIPPED:
+        return f"Review Helper did not review Diff {review_request.diff_id} (reason: {review_request.skipped_reason})."
+
     if review_request.status == ReviewStatus.FAILED:
         return f"The review processing for Diff {review_request.diff_id} failed with error: {review_request.error}"
 
diff --git a/services/reviewhelper-api/pyproject.toml b/services/reviewhelper-api/pyproject.toml
index 58c28a0ec5..71dfcb227e 100644
--- a/services/reviewhelper-api/pyproject.toml
+++ b/services/reviewhelper-api/pyproject.toml
@@ -16,6 +16,7 @@ dependencies = [
     "google-cloud-tasks>=2.22.0",
     "cloud-sql-python-connector[asyncpg]>=1.20.3",
     "python-dotenv>=1.0.0",
+    "pyyaml>=6.0",
     "bugbug",
     "sentry-sdk>=2.60.0",
 ]
diff --git a/services/reviewhelper-api/reviewer_groups.yaml b/services/reviewhelper-api/reviewer_groups.yaml
new file mode 100644
index 0000000000..ea802f68d1
--- /dev/null
+++ b/services/reviewhelper-api/reviewer_groups.yaml
@@ -0,0 +1,22 @@
+# Per-reviewer-group configuration for automated code review.
+#
+# `defaults` apply to any revision whose reviewer group isn't listed below, and
+# to listed groups that don't override a value. A review runs only when BOTH the
+# risk and complexity scores are strictly below their thresholds, so raising a
+# threshold widens what gets auto-reviewed ("easing in").
+#
+# Thresholds are 0-10. Start conservative (3/3 → only scores of 0,1,2 pass) and
+# raise as the team grows comfortable.
+
+defaults:
+  risk_threshold: 3
+  complexity_threshold: 3
+
+groups:
+  - slug: ip-protection-reviewers
+    risk_threshold: 3
+    complexity_threshold: 3
+
+  - slug: home-newtab-reviewers
+    risk_threshold: 3
+    complexity_threshold: 3
diff --git a/services/reviewhelper-api/tests/conftest.py b/services/reviewhelper-api/tests/conftest.py
new file mode 100644
index 0000000000..7bc0c21247
--- /dev/null
+++ b/services/reviewhelper-api/tests/conftest.py
@@ -0,0 +1,23 @@
+import os
+
+# app.config builds a pydantic Settings() at import time, which requires a set
+# of env vars. Provide dummies so the app package is importable under test.
+_DUMMY_ENV = {
+    "cloud_sql_instance": "test:test:test",
+    "db_user": "test",
+    "db_pass": "test",
+    "db_name": "test",
+    "external_api_key": "test",
+    "internal_api_key": "test",
+    "cloud_tasks_project": "test",
+    "cloud_tasks_location": "test",
+    "cloud_tasks_queue": "test",
+    "worker_url": "https://worker.test",
+    "phabricator_url": "https://phabricator.test",
+    "phabricator_api_key": "test",
+    "bugzilla_url": "https://bugzilla.test",
+    "bugzilla_api_key": "test",
+}
+
+for key, value in _DUMMY_ENV.items():
+    os.environ.setdefault(key, value)
diff --git a/services/reviewhelper-api/tests/test_review_gating.py b/services/reviewhelper-api/tests/test_review_gating.py
new file mode 100644
index 0000000000..f6abf61bee
--- /dev/null
+++ b/services/reviewhelper-api/tests/test_review_gating.py
@@ -0,0 +1,90 @@
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+from app import review_processor
+from app.review_processor import ReviewSkipped, _score_and_gate
+from app.reviewer_groups import Defaults, ReviewerGroup, ReviewerGroupsConfig
+
+from bugbug.tools.code_review.scoring import Scores, ScoringResult
+from bugbug.tools.code_review.test_coverage import ExistingCoverage
+
+
+def _fake_patch():
+    return SimpleNamespace(
+        raw_diff="diff --git a/foo.js b/foo.js\n--- a/foo.js\n+++ b/foo.js\n@@ -1 +1 @@\n+x\n",
+        patch_title="Tweak",
+        summary=None,
+        revision_id=123,
+        author_phid="PHID-USER-x",
+        has_bug=False,
+        bug_id=0,
+        reviewer_project_phids=[],
+    )
+
+
+def _patch_common(monkeypatch, *, risk, complexity, group=None):
+    config = ReviewerGroupsConfig(
+        defaults=Defaults(risk_threshold=3, complexity_threshold=3)
+    )
+    monkeypatch.setattr(review_processor, "get_reviewer_groups_config", lambda: config)
+    monkeypatch.setattr(
+        review_processor, "matching_groups", lambda patch: [group] if group else []
+    )
+    monkeypatch.setattr(
+        review_processor,
+        "lookup_existing_coverage",
+        AsyncMock(return_value=ExistingCoverage(coverage_signal="uncovered")),
+    )
+    scorer = SimpleNamespace(
+        run=AsyncMock(
+            return_value=ScoringResult(
+                scores=Scores(risk=risk, complexity=complexity),
+                model="claude-haiku-4-5",
+                usage={"input_tokens": 10},
+            )
+        )
+    )
+    monkeypatch.setattr(review_processor, "get_risk_scorer", lambda: scorer)
+    return config
+
+
+@pytest.mark.asyncio
+async def test_gate_skips_when_above_threshold(monkeypatch):
+    _patch_common(monkeypatch, risk=5, complexity=1)
+    review_request = SimpleNamespace(revision_id=123)
+
+    with pytest.raises(ReviewSkipped) as exc:
+        await _score_and_gate(_fake_patch(), review_request)
+
+    assert exc.value.reason == "above_threshold"
+    # Scores are recorded on the request even though we skipped.
+    assert review_request.risk == 5
+    assert review_request.complexity == 1
+    assert review_request.coverage_signal == "uncovered"
+
+
+@pytest.mark.asyncio
+async def test_gate_passes_when_below_threshold(monkeypatch):
+    _patch_common(monkeypatch, risk=1, complexity=2)
+    review_request = SimpleNamespace(revision_id=123)
+
+    block, details = await _score_and_gate(_fake_patch(), review_request)
+
+    assert "<test_signals>" in block
+    assert details["scoring_model"] == "claude-haiku-4-5"
+    assert review_request.risk == 1
+    assert review_request.complexity == 2
+
+
+@pytest.mark.asyncio
+async def test_gate_uses_group_threshold_override(monkeypatch):
+    group = ReviewerGroup(slug="ip-protection-reviewers", risk_threshold=6)
+    _patch_common(monkeypatch, risk=5, complexity=1, group=group)
+    review_request = SimpleNamespace(revision_id=123)
+
+    # risk=5 would skip under the default threshold of 3, but the group raised
+    # it to 6, so the review proceeds.
+    block, details = await _score_and_gate(_fake_patch(), review_request)
+    assert details["thresholds"]["group"] == "ip-protection-reviewers"
+    assert details["thresholds"]["risk"] == 6
diff --git a/services/reviewhelper-api/tests/test_reviewer_groups.py b/services/reviewhelper-api/tests/test_reviewer_groups.py
new file mode 100644
index 0000000000..da4787993a
--- /dev/null
+++ b/services/reviewhelper-api/tests/test_reviewer_groups.py
@@ -0,0 +1,57 @@
+from types import SimpleNamespace
+
+from app.reviewer_groups import (
+    Defaults,
+    ReviewerGroup,
+    ReviewerGroupsConfig,
+    matching_groups,
+)
+
+
+def test_effective_thresholds_fall_back_to_defaults():
+    defaults = Defaults(risk_threshold=3, complexity_threshold=4)
+    group = ReviewerGroup(slug="g", risk_threshold=5)
+    assert group.effective_risk_threshold(defaults) == 5
+    assert group.effective_complexity_threshold(defaults) == 4
+
+
+def test_by_slug():
+    config = ReviewerGroupsConfig(
+        groups=[ReviewerGroup(slug="a"), ReviewerGroup(slug="b")]
+    )
+    assert config.by_slug("b").slug == "b"
+    assert config.by_slug("missing") is None
+
+
+def test_matching_groups_filters_to_requested_projects(monkeypatch):
+    config = ReviewerGroupsConfig(
+        groups=[
+            ReviewerGroup(slug="ip-protection-reviewers"),
+            ReviewerGroup(slug="home-newtab-reviewers"),
+        ]
+    )
+    monkeypatch.setattr(
+        "app.reviewer_groups.get_reviewer_groups_config", lambda: config
+    )
+
+    phid_by_slug = {
+        "ip-protection-reviewers": "PHID-PROJ-ip",
+        "home-newtab-reviewers": "PHID-PROJ-newtab",
+    }
+    monkeypatch.setattr(
+        "bugbug.tools.core.platforms.phabricator.resolve_project_phid",
+        lambda slug: phid_by_slug.get(slug),
+    )
+
+    patch = SimpleNamespace(reviewer_project_phids=["PHID-PROJ-ip"])
+    matched = matching_groups(patch)
+    assert [g.slug for g in matched] == ["ip-protection-reviewers"]
+
+
+def test_matching_groups_empty_when_no_reviewer_projects(monkeypatch):
+    monkeypatch.setattr(
+        "app.reviewer_groups.get_reviewer_groups_config",
+        lambda: ReviewerGroupsConfig(groups=[ReviewerGroup(slug="a")]),
+    )
+    patch = SimpleNamespace(reviewer_project_phids=[])
+    assert matching_groups(patch) == []
diff --git a/tests/test_code_review_scoring.py b/tests/test_code_review_scoring.py
new file mode 100644
index 0000000000..acdc6f6228
--- /dev/null
+++ b/tests/test_code_review_scoring.py
@@ -0,0 +1,212 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from bugbug.tools.code_review.diff_analysis import (
+    analyze_diff,
+    format_test_signal_block,
+)
+from bugbug.tools.code_review.scoring import RiskComplexityScorer, Scores
+from bugbug.tools.code_review.test_coverage import (
+    ExistingCoverage,
+    format_coverage_block,
+    lookup_existing_coverage,
+)
+
+
+def _diff(*files: str) -> str:
+    return "\n".join(files)
+
+
+def _git_file(path: str, added: int = 1, removed: int = 0) -> str:
+    body = "\n".join(["+new line"] * added + ["-old line"] * removed)
+    return (
+        f"diff --git a/{path} b/{path}\n"
+        f"--- a/{path}\n+++ b/{path}\n"
+        f"@@ -1,{removed or 1} +1,{added or 1} @@\n"
+        f"{body}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# diff_analysis
+# ---------------------------------------------------------------------------
+
+
+def test_no_code_change():
+    assert analyze_diff("").in_diff_test_signal == "no_code_change"
+
+
+def test_tests_only():
+    diff = _git_file("browser/components/foo/test/browser_foo.js", added=5)
+    stats = analyze_diff(diff)
+    assert stats.in_diff_test_signal == "tests_only"
+    assert stats.non_test_files_changed == 0
+    assert stats.test_files_changed == 1
+
+
+def test_absent_when_only_non_test_code():
+    diff = _git_file("browser/components/foo/Foo.sys.mjs", added=20)
+    stats = analyze_diff(diff)
+    assert stats.in_diff_test_signal == "absent"
+    assert stats.non_test_paths == ["browser/components/foo/Foo.sys.mjs"]
+
+
+def test_sparse_low_test_ratio():
+    diff = _diff(
+        _git_file("browser/components/foo/Foo.sys.mjs", added=100),
+        _git_file("browser/components/foo/test/test_foo.js", added=5),
+    )
+    assert analyze_diff(diff).in_diff_test_signal == "sparse"
+
+
+def test_adequate_high_test_ratio():
+    diff = _diff(
+        _git_file("browser/components/foo/Foo.sys.mjs", added=10),
+        _git_file("browser/components/foo/test/test_foo.js", added=10),
+    )
+    assert analyze_diff(diff).in_diff_test_signal == "adequate"
+
+
+def test_format_test_signal_block_includes_coverage():
+    stats = analyze_diff(_git_file("a/b/C.cpp", added=3))
+    block = format_test_signal_block(stats, coverage_block="coverage_signal=uncovered")
+    assert "<test_signals>" in block
+    assert "in_diff_test_signal=absent" in block
+    assert "coverage_signal=uncovered" in block
+    assert block.rstrip().endswith("</test_signals>")
+
+
+# ---------------------------------------------------------------------------
+# test_coverage
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_lookup_no_non_test_files():
+    result = await lookup_existing_coverage([])
+    assert result.coverage_signal == "no_non_test_files"
+
+
+@pytest.mark.asyncio
+async def test_lookup_partial_coverage():
+    async def fake_search(query, tests, limit):
+        if query == "Covered":
+            return [("dom/tests/test_covered.js", 10, "...")]
+        return []
+
+    client = AsyncMock()
+    client.search.side_effect = fake_search
+    with patch(
+        "bugbug.tools.code_review.test_coverage._get_client", return_value=client
+    ):
+        result = await lookup_existing_coverage(
+            ["dom/base/Covered.cpp", "dom/base/Uncovered.cpp"]
+        )
+    assert result.coverage_signal == "partial"
+    assert "dom/base/Covered.cpp" in result.covered_paths
+    assert "dom/base/Uncovered.cpp" in result.uncovered_paths
+
+
+@pytest.mark.asyncio
+async def test_lookup_searchfox_error_degrades():
+    client = AsyncMock()
+    client.search.side_effect = Exception("searchfox down")
+    with patch(
+        "bugbug.tools.code_review.test_coverage._get_client", return_value=client
+    ):
+        result = await lookup_existing_coverage(["dom/base/Element.cpp"])
+    assert result.coverage_signal == "skipped_searchfox_error"
+
+
+@pytest.mark.asyncio
+async def test_lookup_large_diff_skipped():
+    client = AsyncMock()
+    client.search.return_value = []
+    paths = [f"dom/base/File{i}.cpp" for i in range(15)]
+    with patch(
+        "bugbug.tools.code_review.test_coverage._get_client", return_value=client
+    ):
+        result = await lookup_existing_coverage(paths, file_cap=10)
+    assert result.coverage_signal == "skipped_large_diff"
+    assert result.candidate_count == 15
+
+
+def test_format_coverage_block_none_when_no_files():
+    assert (
+        format_coverage_block(ExistingCoverage(coverage_signal="no_non_test_files"))
+        is None
+    )
+
+
+# ---------------------------------------------------------------------------
+# scoring
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_risk_scorer_parses_and_captures_usage():
+    fake_llm = MagicMock()
+    structured = AsyncMock()
+    fake_llm.with_structured_output.return_value = structured
+    scorer = RiskComplexityScorer(fake_llm, "claude-haiku-4-5")
+
+    raw = SimpleNamespace(
+        usage_metadata={
+            "input_tokens": 100,
+            "output_tokens": 20,
+            "input_token_details": {"cache_read": 10, "cache_creation": 5},
+        }
+    )
+    structured.ainvoke.return_value = {
+        "parsed": Scores(risk=2, complexity=1, risk_factors=["leaf UI"]),
+        "raw": raw,
+    }
+
+    result = await scorer.run(
+        title="Tweak button",
+        summary=None,
+        revision_id=42,
+        author="PHID-USER-x",
+        bug_id=None,
+        raw_diff="diff --git a/a b/a",
+        test_signals_block="<test_signals>...</test_signals>",
+    )
+
+    assert result.scores.risk == 2
+    assert result.scores.complexity == 1
+    assert result.model == "claude-haiku-4-5"
+    assert result.usage["input_tokens"] == 100
+    assert result.usage["cache_read_input_tokens"] == 10
+    assert result.usage["cache_creation_input_tokens"] == 5
+
+
+@pytest.mark.asyncio
+async def test_risk_scorer_raises_on_parse_failure():
+    from bugbug.tools.core.exceptions import ModelResultError
+
+    fake_llm = MagicMock()
+    structured = AsyncMock()
+    fake_llm.with_structured_output.return_value = structured
+    scorer = RiskComplexityScorer(fake_llm, "claude-haiku-4-5")
+    structured.ainvoke.return_value = {
+        "parsed": None,
+        "raw": SimpleNamespace(),
+        "parsing_error": "bad",
+    }
+
+    with pytest.raises(ModelResultError):
+        await scorer.run(
+            title="t",
+            summary=None,
+            revision_id=1,
+            author="a",
+            bug_id=None,
+            raw_diff="d",
+        )

From 11661611fcdf18437f5b9e15fb303390406362bd Mon Sep 17 00:00:00 2001
From: Jared Wein <jwein@mozilla.com>
Date: Tue, 23 Jun 2026 16:39:26 -0400
Subject: [PATCH 2/4] Match reviewer groups via aliases and rotation history

Consume PhabricatorPatch.historical_reviewer_project_phids so a group that was
assigned via a rotation (added, then swapped for an individual and removed)
still matches. Add an `aliases` field to ReviewerGroup so a rotation project
(e.g. home-newtab-reviewers-rotation) counts as the same logical group as
home-newtab-reviewers, and resolve every slug (own + aliases) when matching.
Falls back to the current reviewer snapshot for patches without history.
---
 .../reviewhelper-api/app/reviewer_groups.py   | 27 ++++++++++---
 .../reviewhelper-api/reviewer_groups.yaml     |  5 +++
 .../tests/test_reviewer_groups.py             | 40 +++++++++++++++++++
 3 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/services/reviewhelper-api/app/reviewer_groups.py b/services/reviewhelper-api/app/reviewer_groups.py
index 377c732b7b..02b62bbe79 100644
--- a/services/reviewhelper-api/app/reviewer_groups.py
+++ b/services/reviewhelper-api/app/reviewer_groups.py
@@ -34,10 +34,18 @@ class ReviewerGroup(BaseModel):
     """Configuration for a single Phabricator reviewer group (project)."""
 
     slug: str
+    # Additional project slugs that count as this same logical group — e.g. a
+    # review rotation's project (`<group>-rotation`), which is added as the
+    # reviewer and then swapped for an individual member.
+    aliases: list[str] = Field(default_factory=list)
     enabled: bool = False
     risk_threshold: int | None = None
     complexity_threshold: int | None = None
 
+    def all_slugs(self) -> list[str]:
+        """The group's own slug plus any alias slugs."""
+        return [self.slug, *self.aliases]
+
     def effective_risk_threshold(self, defaults: Defaults) -> int:
         return (
             self.risk_threshold
@@ -84,22 +92,29 @@ def get_reviewer_groups_config() -> ReviewerGroupsConfig:
 def matching_groups(patch) -> list[ReviewerGroup]:
     """Return the configured groups the patch is requesting review from.
 
-    A patch matches a group when one of its reviewer-group PHIDs resolves to the
-    same project as the group's slug. The result preserves config order so the
-    first match can act as the "primary" group.
+    A patch matches a group when any of the group's slugs (its own or an alias)
+    resolves to a project that was a reviewer of the revision. We match against
+    ``historical_reviewer_project_phids`` so a rotation group that was added and
+    later removed (replaced by an individual) still counts; falls back to the
+    current reviewer snapshot for patches that don't expose history. The result
+    preserves config order so the first match can act as the "primary" group.
     """
     # Imported here so the heavy Phabricator/searchfox import chain isn't pulled
     # in just to parse the config (and so tests can monkeypatch the resolver).
     from bugbug.tools.core.platforms import phabricator as phab
 
     config = get_reviewer_groups_config()
-    reviewer_projects = set(patch.reviewer_project_phids)
+    reviewer_projects = set(
+        getattr(patch, "historical_reviewer_project_phids", None)
+        or patch.reviewer_project_phids
+    )
     if not reviewer_projects:
         return []
 
     matched = []
     for group in config.groups:
-        phid = phab.resolve_project_phid(group.slug)
-        if phid and phid in reviewer_projects:
+        group_phids = {phab.resolve_project_phid(slug) for slug in group.all_slugs()}
+        group_phids.discard(None)
+        if group_phids & reviewer_projects:
             matched.append(group)
     return matched
diff --git a/services/reviewhelper-api/reviewer_groups.yaml b/services/reviewhelper-api/reviewer_groups.yaml
index ea802f68d1..635aba74f2 100644
--- a/services/reviewhelper-api/reviewer_groups.yaml
+++ b/services/reviewhelper-api/reviewer_groups.yaml
@@ -12,11 +12,16 @@ defaults:
   risk_threshold: 3
   complexity_threshold: 3
 
+# `aliases` lists extra project slugs that count as the same logical group —
+# notably a review rotation's `<group>-rotation` project, which is added as the
+# reviewer and then swapped for an individual member before review time.
 groups:
   - slug: ip-protection-reviewers
+    aliases: [ip-protection-reviewers-rotation]
     risk_threshold: 3
     complexity_threshold: 3
 
   - slug: home-newtab-reviewers
+    aliases: [home-newtab-reviewers-rotation]
     risk_threshold: 3
     complexity_threshold: 3
diff --git a/services/reviewhelper-api/tests/test_reviewer_groups.py b/services/reviewhelper-api/tests/test_reviewer_groups.py
index da4787993a..c0bd03b569 100644
--- a/services/reviewhelper-api/tests/test_reviewer_groups.py
+++ b/services/reviewhelper-api/tests/test_reviewer_groups.py
@@ -55,3 +55,43 @@ def test_matching_groups_empty_when_no_reviewer_projects(monkeypatch):
     )
     patch = SimpleNamespace(reviewer_project_phids=[])
     assert matching_groups(patch) == []
+
+
+def test_all_slugs_includes_aliases():
+    group = ReviewerGroup(
+        slug="home-newtab-reviewers", aliases=["home-newtab-reviewers-rotation"]
+    )
+    assert group.all_slugs() == [
+        "home-newtab-reviewers",
+        "home-newtab-reviewers-rotation",
+    ]
+
+
+def test_matching_groups_recovers_rotation_via_alias_and_history(monkeypatch):
+    # Models D308166: the rotation project was a reviewer, then removed and
+    # replaced by an individual — so it survives only in the historical list.
+    config = ReviewerGroupsConfig(
+        groups=[
+            ReviewerGroup(
+                slug="home-newtab-reviewers",
+                aliases=["home-newtab-reviewers-rotation"],
+            )
+        ]
+    )
+    monkeypatch.setattr(
+        "app.reviewer_groups.get_reviewer_groups_config", lambda: config
+    )
+    monkeypatch.setattr(
+        "bugbug.tools.core.platforms.phabricator.resolve_project_phid",
+        lambda slug: {
+            "home-newtab-reviewers": "PHID-PROJ-newtab",
+            "home-newtab-reviewers-rotation": "PHID-PROJ-newtabrotation",
+        }.get(slug),
+    )
+
+    patch = SimpleNamespace(
+        reviewer_project_phids=[],  # group already removed from current reviewers
+        historical_reviewer_project_phids=["PHID-PROJ-newtabrotation"],
+    )
+    matched = matching_groups(patch)
+    assert [g.slug for g in matched] == ["home-newtab-reviewers"]

From 259cb76f02927c4500f47c0e4925d0a54103a6ce Mon Sep 17 00:00:00 2001
From: Jared Wein <jwein@mozilla.com>
Date: Thu, 28 May 2026 17:31:17 -0400
Subject: [PATCH 3/4] Limit automated review to enabled groups and member
 authors

Builds on the reviewer-groups config to control rollout per team:

- Only revisions requesting review from a group marked `enabled: true` are
  auto-reviewed (skip reason "not_enabled"), so teams can be turned on one at a
  time (e.g. ip-protection-reviewers first, home-newtab-reviewers later).
- `restrict_to_member_authors` (default true) reviews only patches whose author
  is a member of the group's Phabricator project (skip "author_not_in_group");
  an empty membership lookup over-includes rather than dropping everything.
- An optional per-group `opt_out` PHID list excludes individual members (skip
  "author_opted_out").

These cheap metadata gates run before the paid scoring call. Adds tests for
each gate.
---
 .../reviewhelper-api/app/review_processor.py  |  52 +++++---
 .../reviewhelper-api/app/reviewer_groups.py   |   7 ++
 .../reviewhelper-api/reviewer_groups.yaml     |  10 ++
 .../tests/test_review_gating.py               | 117 ++++++++++++++++--
 4 files changed, 162 insertions(+), 24 deletions(-)

diff --git a/services/reviewhelper-api/app/review_processor.py b/services/reviewhelper-api/app/review_processor.py
index 3f002fc784..c156e27459 100644
--- a/services/reviewhelper-api/app/review_processor.py
+++ b/services/reviewhelper-api/app/review_processor.py
@@ -21,6 +21,8 @@
 from bugbug.tools.core.platforms.phabricator import (
     PhabricatorPatch,
     get_phabricator_client,
+    get_project_members,
+    resolve_project_phid,
 )
 
 logger = logging.getLogger(__name__)
@@ -148,19 +150,20 @@ async def _score_and_gate(
     ``ReviewSkipped`` when either score is at or above the group's threshold.
     """
     config = get_reviewer_groups_config()
-    groups = matching_groups(patch)
-    # First matching group acts as the primary; otherwise fall back to defaults.
-    primary = groups[0] if groups else None
-    risk_threshold = (
-        primary.effective_risk_threshold(config.defaults)
-        if primary
-        else config.defaults.risk_threshold
-    )
-    complexity_threshold = (
-        primary.effective_complexity_threshold(config.defaults)
-        if primary
-        else config.defaults.complexity_threshold
-    )
+
+    # Only revisions requesting review from an enabled group are auto-reviewed.
+    enabled_groups = [group for group in matching_groups(patch) if group.enabled]
+    if not enabled_groups:
+        raise ReviewSkipped("not_enabled")
+
+    # First enabled matching group acts as the primary.
+    primary = enabled_groups[0]
+
+    # Cheap author gates run before the (paid) scoring call.
+    _enforce_author_gates(primary, patch)
+
+    risk_threshold = primary.effective_risk_threshold(config.defaults)
+    complexity_threshold = primary.effective_complexity_threshold(config.defaults)
 
     diff_stats = analyze_diff(patch.raw_diff)
     coverage = await lookup_existing_coverage(diff_stats.non_test_paths)
@@ -200,7 +203,7 @@ async def _score_and_gate(
         "thresholds": {
             "risk": risk_threshold,
             "complexity": complexity_threshold,
-            "group": primary.slug if primary else None,
+            "group": primary.slug,
         },
     }
 
@@ -218,6 +221,27 @@ async def _score_and_gate(
     return test_signals_block, scoring_details
 
 
+def _enforce_author_gates(group, patch: PhabricatorPatch) -> None:
+    """Skip review based on the revision author's relationship to the group.
+
+    Raises ``ReviewSkipped`` when the author has opted out, or when the group
+    restricts review to its own members and the author isn't one.
+    """
+    author_phid = patch.author_phid
+
+    if author_phid in group.opt_out:
+        raise ReviewSkipped("author_opted_out")
+
+    if group.restrict_to_member_authors:
+        group_phid = resolve_project_phid(group.slug)
+        members = get_project_members(group_phid) if group_phid else frozenset()
+        # If the membership lookup is empty (transient failure / unresolved
+        # project), don't skip — we'd rather over-include than silently drop
+        # everything for the group.
+        if members and author_phid not in members:
+            raise ReviewSkipped("author_not_in_group")
+
+
 def submit_review_to_platform(
     review_request: ReviewRequest, generated_comments: Iterable[GeneratedComment]
 ) -> AsyncIterator[tuple[GeneratedComment, int]]:
diff --git a/services/reviewhelper-api/app/reviewer_groups.py b/services/reviewhelper-api/app/reviewer_groups.py
index 02b62bbe79..a035cece2c 100644
--- a/services/reviewhelper-api/app/reviewer_groups.py
+++ b/services/reviewhelper-api/app/reviewer_groups.py
@@ -41,6 +41,13 @@ class ReviewerGroup(BaseModel):
     enabled: bool = False
     risk_threshold: int | None = None
     complexity_threshold: int | None = None
+    # When True, only review revisions authored by a member of this group's
+    # Phabricator project. Lets a team dogfood automated review on its own
+    # patches before opening it to outside contributors.
+    restrict_to_member_authors: bool = True
+    # PHIDs of group members who have opted out of automated review on their
+    # own patches.
+    opt_out: list[str] = Field(default_factory=list)
 
     def all_slugs(self) -> list[str]:
         """The group's own slug plus any alias slugs."""
diff --git a/services/reviewhelper-api/reviewer_groups.yaml b/services/reviewhelper-api/reviewer_groups.yaml
index 635aba74f2..64633a4b05 100644
--- a/services/reviewhelper-api/reviewer_groups.yaml
+++ b/services/reviewhelper-api/reviewer_groups.yaml
@@ -12,16 +12,26 @@ defaults:
   risk_threshold: 3
   complexity_threshold: 3
 
+# Only revisions whose reviewer group is listed AND `enabled: true` get an
+# automated review. Roll out one group at a time by flipping `enabled`.
+# `restrict_to_member_authors` (default true) limits review to patches authored
+# by a member of the group; add a member's PHID to `opt_out` to exclude them.
 # `aliases` lists extra project slugs that count as the same logical group —
 # notably a review rotation's `<group>-rotation` project, which is added as the
 # reviewer and then swapped for an individual member before review time.
 groups:
   - slug: ip-protection-reviewers
     aliases: [ip-protection-reviewers-rotation]
+    enabled: true
     risk_threshold: 3
     complexity_threshold: 3
+    restrict_to_member_authors: true
+    opt_out: []
 
   - slug: home-newtab-reviewers
     aliases: [home-newtab-reviewers-rotation]
+    enabled: false
     risk_threshold: 3
     complexity_threshold: 3
+    restrict_to_member_authors: true
+    opt_out: []
diff --git a/services/reviewhelper-api/tests/test_review_gating.py b/services/reviewhelper-api/tests/test_review_gating.py
index f6abf61bee..509f80daf9 100644
--- a/services/reviewhelper-api/tests/test_review_gating.py
+++ b/services/reviewhelper-api/tests/test_review_gating.py
@@ -9,21 +9,31 @@
 from bugbug.tools.code_review.scoring import Scores, ScoringResult
 from bugbug.tools.code_review.test_coverage import ExistingCoverage
 
+AUTHOR = "PHID-USER-x"
 
-def _fake_patch():
+
+def _fake_patch(author_phid=AUTHOR):
     return SimpleNamespace(
         raw_diff="diff --git a/foo.js b/foo.js\n--- a/foo.js\n+++ b/foo.js\n@@ -1 +1 @@\n+x\n",
         patch_title="Tweak",
         summary=None,
         revision_id=123,
-        author_phid="PHID-USER-x",
+        author_phid=author_phid,
         has_bug=False,
         bug_id=0,
-        reviewer_project_phids=[],
+        reviewer_project_phids=["PHID-PROJ-x"],
     )
 
 
-def _patch_common(monkeypatch, *, risk, complexity, group=None):
+def _patch_common(monkeypatch, *, risk, complexity, group=None, members=frozenset()):
+    if group is None:
+        # Default: an enabled group with no author restriction, so the gating
+        # tests exercise only the score threshold.
+        group = ReviewerGroup(
+            slug="ip-protection-reviewers",
+            enabled=True,
+            restrict_to_member_authors=False,
+        )
     config = ReviewerGroupsConfig(
         defaults=Defaults(risk_threshold=3, complexity_threshold=3)
     )
@@ -31,6 +41,10 @@ def _patch_common(monkeypatch, *, risk, complexity, group=None):
     monkeypatch.setattr(
         review_processor, "matching_groups", lambda patch: [group] if group else []
     )
+    monkeypatch.setattr(
+        review_processor, "resolve_project_phid", lambda slug: "PHID-PROJ-x"
+    )
+    monkeypatch.setattr(review_processor, "get_project_members", lambda phid: members)
     monkeypatch.setattr(
         review_processor,
         "lookup_existing_coverage",
@@ -49,6 +63,11 @@ def _patch_common(monkeypatch, *, risk, complexity, group=None):
     return config
 
 
+# ---------------------------------------------------------------------------
+# score threshold gate
+# ---------------------------------------------------------------------------
+
+
 @pytest.mark.asyncio
 async def test_gate_skips_when_above_threshold(monkeypatch):
     _patch_common(monkeypatch, risk=5, complexity=1)
@@ -58,7 +77,6 @@ async def test_gate_skips_when_above_threshold(monkeypatch):
         await _score_and_gate(_fake_patch(), review_request)
 
     assert exc.value.reason == "above_threshold"
-    # Scores are recorded on the request even though we skipped.
     assert review_request.risk == 5
     assert review_request.complexity == 1
     assert review_request.coverage_signal == "uncovered"
@@ -74,17 +92,96 @@ async def test_gate_passes_when_below_threshold(monkeypatch):
     assert "<test_signals>" in block
     assert details["scoring_model"] == "claude-haiku-4-5"
     assert review_request.risk == 1
-    assert review_request.complexity == 2
 
 
 @pytest.mark.asyncio
 async def test_gate_uses_group_threshold_override(monkeypatch):
-    group = ReviewerGroup(slug="ip-protection-reviewers", risk_threshold=6)
+    group = ReviewerGroup(
+        slug="ip-protection-reviewers",
+        enabled=True,
+        risk_threshold=6,
+        restrict_to_member_authors=False,
+    )
     _patch_common(monkeypatch, risk=5, complexity=1, group=group)
     review_request = SimpleNamespace(revision_id=123)
 
-    # risk=5 would skip under the default threshold of 3, but the group raised
-    # it to 6, so the review proceeds.
-    block, details = await _score_and_gate(_fake_patch(), review_request)
+    _block, details = await _score_and_gate(_fake_patch(), review_request)
     assert details["thresholds"]["group"] == "ip-protection-reviewers"
     assert details["thresholds"]["risk"] == 6
+
+
+# ---------------------------------------------------------------------------
+# enablement + author gates (run before scoring)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_skips_when_no_enabled_group(monkeypatch):
+    disabled = ReviewerGroup(slug="ip-protection-reviewers", enabled=False)
+    scorer = _patch_common(monkeypatch, risk=0, complexity=0, group=disabled)  # noqa: F841
+
+    with pytest.raises(ReviewSkipped) as exc:
+        await _score_and_gate(_fake_patch(), SimpleNamespace(revision_id=123))
+    assert exc.value.reason == "not_enabled"
+
+
+@pytest.mark.asyncio
+async def test_skips_when_author_not_a_member(monkeypatch):
+    group = ReviewerGroup(
+        slug="ip-protection-reviewers", enabled=True, restrict_to_member_authors=True
+    )
+    _patch_common(
+        monkeypatch,
+        risk=0,
+        complexity=0,
+        group=group,
+        members=frozenset({"PHID-USER-other"}),
+    )
+
+    with pytest.raises(ReviewSkipped) as exc:
+        await _score_and_gate(_fake_patch(), SimpleNamespace(revision_id=123))
+    assert exc.value.reason == "author_not_in_group"
+
+
+@pytest.mark.asyncio
+async def test_member_author_passes_membership_gate(monkeypatch):
+    group = ReviewerGroup(
+        slug="ip-protection-reviewers", enabled=True, restrict_to_member_authors=True
+    )
+    _patch_common(
+        monkeypatch, risk=0, complexity=0, group=group, members=frozenset({AUTHOR})
+    )
+
+    block, _details = await _score_and_gate(
+        _fake_patch(), SimpleNamespace(revision_id=123)
+    )
+    assert "<test_signals>" in block
+
+
+@pytest.mark.asyncio
+async def test_empty_membership_lookup_does_not_skip(monkeypatch):
+    group = ReviewerGroup(
+        slug="ip-protection-reviewers", enabled=True, restrict_to_member_authors=True
+    )
+    # Empty membership set (transient failure) — over-include rather than drop.
+    _patch_common(monkeypatch, risk=0, complexity=0, group=group, members=frozenset())
+
+    block, _details = await _score_and_gate(
+        _fake_patch(), SimpleNamespace(revision_id=123)
+    )
+    assert "<test_signals>" in block
+
+
+@pytest.mark.asyncio
+async def test_opted_out_author_is_skipped(monkeypatch):
+    group = ReviewerGroup(
+        slug="ip-protection-reviewers",
+        enabled=True,
+        restrict_to_member_authors=False,
+        opt_out=[AUTHOR],
+    )
+    _patch_common(monkeypatch, risk=0, complexity=0, group=group)
+
+    with pytest.raises(ReviewSkipped) as exc:
+        await _score_and_gate(_fake_patch(), SimpleNamespace(revision_id=123))
+    assert exc.value.reason == "author_opted_out"

From 2dcfe1adf6ee3920186e22e08fb6d27ad16e898e Mon Sep 17 00:00:00 2001
From: Jared Wein <jwein@mozilla.com>
Date: Mon, 22 Jun 2026 15:43:08 -0400
Subject: [PATCH 4/4] Match review-context rules on reviewer groups

Implements the previously-stubbed `review` predicate in the code-review external-content engine so a review-context.toml rule can target a reviewer group (the capability upstream left unimplemented):

- review_context.py: implement _review_matches for `review.reviewer`,
  `review.blocking_reviewer`, and `review.author`. A predicate matches when
  every facet it specifies matches; reviewer groups are matched by Phabricator
  project slug (resolved via resolve_project_phid) against the revision's
  requested reviewers. Thread a ReviewInfo (author + reviewer/blocking group
  PHIDs) through rule_matches/predicate_matches/collect_actions, built from the
  patch in load_external_content_for_diff. Fails closed when no review info is
  available (e.g. a GitHub diff).
- phabricator.py: expose blocking_reviewer_project_phids and author_username,
  refactoring the reviewers fetch into a shared _reviewers cached_property.
- docs/code-review-skills.md: document the now-working `review` predicate.

This replaces the earlier bespoke per-group skill mechanism: teams attach
area guidance by reviewer group through the existing review-context.toml,
with no second skill system. Depends on the reviewer-group plumbing from the
PhabricatorPatch membership change.
---
 bugbug/tools/code_review/review_context.py | 101 +++++++++++++++++++--
 bugbug/tools/core/platforms/phabricator.py |  32 +++++--
 docs/code-review-skills.md                 |  13 ++-
 tests/test_code_review.py                  |  84 +++++++++++++++++
 4 files changed, 209 insertions(+), 21 deletions(-)

diff --git a/bugbug/tools/code_review/review_context.py b/bugbug/tools/code_review/review_context.py
index fa34beb409..586f6f1aea 100644
--- a/bugbug/tools/code_review/review_context.py
+++ b/bugbug/tools/code_review/review_context.py
@@ -150,10 +150,42 @@ def parse_diff_files(diff: str) -> set[str]:
         return files
 
 
+@dataclass(frozen=True)
+class ReviewInfo:
+    """Reviewer-related facts about a revision, used by the ``review`` predicate."""
+
+    author: str | None = None
+    # Reviewer-group (Phabricator project) PHIDs requested on the revision.
+    reviewer_groups: frozenset[str] = frozenset()
+    # The subset of reviewer groups that are *blocking* reviewers.
+    blocking_reviewer_groups: frozenset[str] = frozenset()
+
+
+async def build_review_info(patch) -> ReviewInfo:
+    """Build a ReviewInfo from a patch, tolerating patches that lack the fields."""
+    try:
+        reviewer_groups = frozenset(getattr(patch, "reviewer_project_phids", []) or [])
+        blocking = frozenset(
+            getattr(patch, "blocking_reviewer_project_phids", []) or []
+        )
+        author = getattr(patch, "author_username", None)
+    except Exception:
+        logger.exception("Could not build review info for the patch")
+        return ReviewInfo()
+    return ReviewInfo(
+        author=author,
+        reviewer_groups=reviewer_groups,
+        blocking_reviewer_groups=blocking,
+    )
+
+
 def rule_matches(
-    rule: Rule, changed_files: set[str], bug_component: str | None = None
+    rule: Rule,
+    changed_files: set[str],
+    bug_component: str | None = None,
+    review: ReviewInfo | None = None,
 ) -> bool:
-    return predicate_matches(rule.when, changed_files, bug_component)
+    return predicate_matches(rule.when, changed_files, bug_component, review)
 
 
 def _normalise_path(path: str) -> str:
@@ -188,8 +220,51 @@ def _bugzilla_matches(
     return False
 
 
-def _review_matches(predicate: ReviewPredicate) -> bool:
-    return False
+def _resolve_group_slugs(slugs: list[str]) -> set[str]:
+    """Resolve reviewer-group slugs to project PHIDs (cached, best-effort)."""
+    from bugbug.tools.core.platforms import phabricator as phab
+
+    resolved = set()
+    for slug in slugs:
+        phid = phab.resolve_project_phid(slug)
+        if phid:
+            resolved.add(phid)
+    return resolved
+
+
+def _review_matches(
+    predicate: ReviewPredicate, review: ReviewInfo | None = None
+) -> bool:
+    """Match a revision's reviewers/author against a ReviewPredicate.
+
+    Each specified facet (``author``, ``reviewer``, ``blocking_reviewer``) must
+    match — mirroring FilePredicate's include/exclude/ext conjunction. A
+    predicate with no facets, or a revision with no review info, never matches.
+    """
+    if review is None:
+        return False
+
+    matched_any_facet = False
+
+    if predicate.author:
+        if review.author not in predicate.author:
+            return False
+        matched_any_facet = True
+
+    if predicate.reviewer:
+        if not (review.reviewer_groups & _resolve_group_slugs(predicate.reviewer)):
+            return False
+        matched_any_facet = True
+
+    if predicate.blocking_reviewer:
+        if not (
+            review.blocking_reviewer_groups
+            & _resolve_group_slugs(predicate.blocking_reviewer)
+        ):
+            return False
+        matched_any_facet = True
+
+    return matched_any_facet
 
 
 def _patch_matches(predicate: PatchPredicate) -> bool:
@@ -200,20 +275,21 @@ def predicate_matches(
     predicate: Predicate,
     changed_files: set[str],
     bug_component: str | None = None,
+    review: ReviewInfo | None = None,
 ) -> bool:
     match predicate:
         case AllPredicate(children=children):
             return all(
-                predicate_matches(child, changed_files, bug_component)
+                predicate_matches(child, changed_files, bug_component, review)
                 for child in children
             )
         case AnyPredicate(children=children):
             return any(
-                predicate_matches(child, changed_files, bug_component)
+                predicate_matches(child, changed_files, bug_component, review)
                 for child in children
             )
         case NotPredicate(child=child):
-            return not predicate_matches(child, changed_files, bug_component)
+            return not predicate_matches(child, changed_files, bug_component, review)
         case AnyFilePredicate(predicate=file_predicate):
             return any(_file_matches(file_predicate, f) for f in changed_files)
         case AllFilesPredicate(predicate=file_predicate):
@@ -223,7 +299,7 @@ def predicate_matches(
         case BugzillaPredicate():
             return _bugzilla_matches(predicate, bug_component)
         case ReviewPredicate():
-            return _review_matches(predicate)
+            return _review_matches(predicate, review)
         case PatchPredicate():
             return _patch_matches(predicate)
     raise TypeError(f"Unknown predicate type: {type(predicate).__name__}")
@@ -356,6 +432,7 @@ def collect_actions(
     config: ReviewContextConfig,
     bug_component: str | None = None,
     extra_context_toml: str | None = None,
+    review: ReviewInfo | None = None,
 ) -> list[MatchedAction]:
     """Return deduplicated actions matched from config against the diff.
 
@@ -386,7 +463,7 @@ def collect_actions(
         enumerate(config.rules), key=lambda item: (-item[1].priority, item[0])
     )
     for _, rule in ordered_rules:
-        if not rule_matches(rule, changed_files, bug_component):
+        if not rule_matches(rule, changed_files, bug_component, review):
             continue
         rule_name = rule.name
         new_actions = []
@@ -530,11 +607,15 @@ async def load_external_content_for_diff(
         return []
 
     bug_component: str | None = None
+    review: ReviewInfo | None = None
     if patch is not None:
         bug_component = await patch.bug_component()
+        review = await build_review_info(patch)
 
     try:
-        actions = collect_actions(diff, config, bug_component, extra_context_toml)
+        actions = collect_actions(
+            diff, config, bug_component, extra_context_toml, review
+        )
     except (tomllib.TOMLDecodeError, ReviewContextValidationError):
         logger.exception("Could not parse extra review context")
         return []
diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py
index 39baabd645..8a89d47c35 100644
--- a/bugbug/tools/core/platforms/phabricator.py
+++ b/bugbug/tools/core/platforms/phabricator.py
@@ -561,11 +561,12 @@ def author_phid(self) -> str:
         return self._revision_metadata["fields"]["authorPHID"]
 
     @cached_property
-    def reviewer_phids(self) -> list[str]:
-        """PHIDs of the revision's reviewers (a mix of users and projects).
+    def _reviewers(self) -> list[dict]:
+        """Raw reviewer entries from the revision's reviewers attachment.
 
         The reviewers attachment is not part of the default revision metadata,
         so we fetch it with a dedicated ``differential.revision.search`` call.
+        Each entry carries ``reviewerPHID`` and ``isBlocking``.
         """
         phabricator = get_phabricator_client()
         response = phabricator.request(
@@ -576,12 +577,12 @@ def reviewer_phids(self) -> list[str]:
         data = response.get("data") or []
         if not data:
             return []
-        reviewers = data[0].get("attachments", {}).get("reviewers", {})
-        return [
-            r["reviewerPHID"]
-            for r in reviewers.get("reviewers", [])
-            if r.get("reviewerPHID")
-        ]
+        return data[0].get("attachments", {}).get("reviewers", {}).get("reviewers", [])
+
+    @property
+    def reviewer_phids(self) -> list[str]:
+        """PHIDs of the revision's reviewers (a mix of users and projects)."""
+        return [r["reviewerPHID"] for r in self._reviewers if r.get("reviewerPHID")]
 
     @property
     def reviewer_project_phids(self) -> list[str]:
@@ -633,6 +634,21 @@ def _add(phid: str) -> None:
 
         return phids
 
+    @property
+    def blocking_reviewer_project_phids(self) -> list[str]:
+        """PHIDs of the revision's *blocking* reviewer groups."""
+        return [
+            r["reviewerPHID"]
+            for r in self._reviewers
+            if r.get("isBlocking")
+            and r.get("reviewerPHID", "").startswith("PHID-PROJ-")
+        ]
+
+    @property
+    def author_username(self) -> str | None:
+        """The revision author's Phabricator username (email), if resolvable."""
+        return self._users_info.get(self.author_phid, {}).get("email")
+
     @property
     def diff_author_phid(self) -> str:
         return self._diff_metadata["authorPHID"]
diff --git a/docs/code-review-skills.md b/docs/code-review-skills.md
index ab0cbfd9ae..ad5a611b31 100644
--- a/docs/code-review-skills.md
+++ b/docs/code-review-skills.md
@@ -83,9 +83,16 @@ The patch's associated bug ID is read from `patch.bug_id` (available on
 component string compared against the rule is `"Product::Component"`.
 
 The schema accepts `bugzilla`, `review`, and `patch` predicates. Runtime
-matching currently implements `bugzilla.component`; `bugzilla.product`,
-`bugzilla.keywords`, `bugzilla.severity`, `review.*`, and `patch.*` are parsed
-and validated but fail closed until trusted metadata is wired into the matcher.
+matching implements `bugzilla.component` and the `review` predicate
+(`review.reviewer`, `review.blocking_reviewer`, `review.author`); a `review`
+predicate matches when every facet it specifies matches, so a rule can target a
+reviewer group (e.g. `review = { reviewer = ["ip-protection-reviewers"] }`).
+Reviewer groups are matched by Phabricator project slug, resolved to the
+revision's requested reviewers. `bugzilla.product`, `bugzilla.keywords`,
+`bugzilla.severity`, and `patch.*` are parsed and validated but fail closed
+until the corresponding metadata is wired into the matcher. When the platform
+provides no reviewer information (e.g. a plain GitHub diff), `review` predicates
+fail closed.
 
 ### `fetch_revision`
 
diff --git a/tests/test_code_review.py b/tests/test_code_review.py
index 8a533766da..7b4475fb06 100644
--- a/tests/test_code_review.py
+++ b/tests/test_code_review.py
@@ -25,7 +25,9 @@
     search_text,
 )
 from bugbug.tools.code_review.review_context import (
+    ReviewInfo,
     _merge_rules,
+    build_review_info,
     external_content_manifest,
     format_external_content,
     github_repo_allowed,
@@ -39,6 +41,7 @@
     BugzillaPredicate,
     FilePredicate,
     ReviewContextValidationError,
+    ReviewPredicate,
     Rule,
 )
 from bugbug.tools.code_review.review_context_schema import (
@@ -788,6 +791,87 @@ def test_rule_bugzilla_component_matches():
     )
 
 
+def _patch_resolver(monkeypatch, slug_to_phid):
+    monkeypatch.setattr(
+        "bugbug.tools.core.platforms.phabricator.resolve_project_phid",
+        lambda slug: slug_to_phid.get(slug),
+    )
+
+
+def test_rule_review_reviewer_group_matches(monkeypatch):
+    _patch_resolver(monkeypatch, {"ip-protection-reviewers": "PHID-PROJ-ip"})
+    rule = Rule(
+        name="ip",
+        when=ReviewPredicate(reviewer=["ip-protection-reviewers"]),
+        load=[],
+    )
+    review = ReviewInfo(reviewer_groups=frozenset({"PHID-PROJ-ip"}))
+    assert rule_matches(rule, {"any/file.cpp"}, review=review)
+
+
+def test_rule_review_reviewer_group_no_match(monkeypatch):
+    _patch_resolver(monkeypatch, {"ip-protection-reviewers": "PHID-PROJ-ip"})
+    rule = Rule(
+        name="ip",
+        when=ReviewPredicate(reviewer=["ip-protection-reviewers"]),
+        load=[],
+    )
+    review = ReviewInfo(reviewer_groups=frozenset({"PHID-PROJ-other"}))
+    assert not rule_matches(rule, {"any/file.cpp"}, review=review)
+
+
+def test_rule_review_fails_closed_without_review_info(monkeypatch):
+    _patch_resolver(monkeypatch, {"ip-protection-reviewers": "PHID-PROJ-ip"})
+    rule = Rule(
+        name="ip",
+        when=ReviewPredicate(reviewer=["ip-protection-reviewers"]),
+        load=[],
+    )
+    # No review info (e.g. a GitHub diff) → never matches.
+    assert not rule_matches(rule, {"any/file.cpp"})
+
+
+def test_rule_review_blocking_reviewer(monkeypatch):
+    _patch_resolver(monkeypatch, {"security-reviewers": "PHID-PROJ-sec"})
+    rule = Rule(
+        name="sec",
+        when=ReviewPredicate(blocking_reviewer=["security-reviewers"]),
+        load=[],
+    )
+    # Present as a reviewer but not blocking → no match.
+    non_blocking = ReviewInfo(reviewer_groups=frozenset({"PHID-PROJ-sec"}))
+    assert not rule_matches(rule, {"f.cpp"}, review=non_blocking)
+    # Blocking → match.
+    blocking = ReviewInfo(
+        reviewer_groups=frozenset({"PHID-PROJ-sec"}),
+        blocking_reviewer_groups=frozenset({"PHID-PROJ-sec"}),
+    )
+    assert rule_matches(rule, {"f.cpp"}, review=blocking)
+
+
+def test_rule_review_author():
+    rule = Rule(name="a", when=ReviewPredicate(author=["alice@mozilla.com"]), load=[])
+    assert rule_matches(rule, {"f.cpp"}, review=ReviewInfo(author="alice@mozilla.com"))
+    assert not rule_matches(
+        rule, {"f.cpp"}, review=ReviewInfo(author="bob@mozilla.com")
+    )
+
+
+@pytest.mark.asyncio
+async def test_build_review_info_from_patch():
+    from types import SimpleNamespace
+
+    patch = SimpleNamespace(
+        reviewer_project_phids=["PHID-PROJ-ip", "PHID-PROJ-newtab"],
+        blocking_reviewer_project_phids=["PHID-PROJ-ip"],
+        author_username="alice@mozilla.com",
+    )
+    info = await build_review_info(patch)
+    assert info.author == "alice@mozilla.com"
+    assert info.reviewer_groups == frozenset({"PHID-PROJ-ip", "PHID-PROJ-newtab"})
+    assert info.blocking_reviewer_groups == frozenset({"PHID-PROJ-ip"})
+
+
 @pytest.mark.asyncio
 async def test_patch_bug_component():
     from bugbug.tools.core.platforms.phabricator import PhabricatorPatch