diff --git a/Cargo.lock b/Cargo.lock
index e6a916de..a9557172 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -187,6 +187,7 @@ dependencies = [
  "nix",
  "once_cell",
  "percent-encoding",
+ "proptest",
  "proxy_agent_shared",
  "regex",
  "serde",
@@ -231,11 +232,26 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
+[[package]]
+name = "bit-set"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
 [[package]]
 name = "bitflags"
-version = "2.6.0"
+version = "2.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
+checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
 
 [[package]]
 name = "bumpalo"
@@ -430,6 +446,22 @@ version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
+
 [[package]]
 name = "field-offset"
 version = "0.3.6"
@@ -517,6 +549,18 @@ dependencies = [
  "wasi",
 ]
 
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
 [[package]]
 name = "gimli"
 version = "0.31.0"
@@ -733,6 +777,12 @@ dependencies = [
  "windows-targets",
 ]
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+
 [[package]]
 name = "log"
 version = "0.4.26"
@@ -936,6 +986,25 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "proptest"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744"
+dependencies = [
+ "bit-set",
+ "bit-vec",
+ "bitflags",
+ "num-traits",
+ "rand 0.9.4",
+ "rand_chacha 0.9.0",
+ "rand_xorshift",
+ "regex-syntax",
+ "rusty-fork",
+ "tempfile",
+ "unarray",
+]
+
 [[package]]
 name = "proxy_agent_setup"
 version = "9.9.9"
@@ -982,6 +1051,12 @@ dependencies = [
  "winreg",
 ]
 
+[[package]]
+name = "quick-error"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+
 [[package]]
 name = "quote"
 version = "1.0.37"
@@ -991,6 +1066,12 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
 [[package]]
 name = "rand"
 version = "0.8.6"
@@ -998,8 +1079,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
 dependencies = [
  "libc",
- "rand_chacha",
- "rand_core",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
+dependencies = [
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.5",
 ]
 
 [[package]]
@@ -1009,7 +1100,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
 dependencies = [
  "ppv-lite86",
- "rand_core",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.9.5",
 ]
 
 [[package]]
@@ -1018,7 +1119,25 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.15",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
+[[package]]
+name = "rand_xorshift"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
+dependencies = [
+ "rand_core 0.9.5",
 ]
 
 [[package]]
@@ -1085,12 +1204,37 @@ dependencies = [
  "semver",
 ]
 
+[[package]]
+name = "rustix"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys",
+]
+
 [[package]]
 name = "rustversion"
 version = "1.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
 
+[[package]]
+name = "rusty-fork"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2"
+dependencies = [
+ "fnv",
+ "quick-error",
+ "tempfile",
+ "wait-timeout",
+]
+
 [[package]]
 name = "ryu"
 version = "1.0.18"
@@ -1232,6 +1376,19 @@ dependencies = [
  "windows",
 ]
 
+[[package]]
+name = "tempfile"
+version = "3.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
+dependencies = [
+ "fastrand",
+ "getrandom 0.3.4",
+ "once_cell",
+ "rustix",
+ "windows-sys",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.64"
@@ -1414,6 +1571,12 @@ version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
 
+[[package]]
+name = "unarray"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.13"
@@ -1432,8 +1595,8 @@ version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
 dependencies = [
- "getrandom",
- "rand",
+ "getrandom 0.2.15",
+ "rand 0.8.6",
 ]
 
 [[package]]
@@ -1458,6 +1621,15 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
+[[package]]
+name = "wait-timeout"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "want"
 version = "0.3.1"
@@ -1473,6 +1645,15 @@ version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
+[[package]]
+name = "wasip2"
+version = "1.0.3+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
+dependencies = [
+ "wit-bindgen",
+]
+
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.100"
@@ -1758,6 +1939,12 @@ dependencies = [
  "toml",
 ]
 
+[[package]]
+name = "wit-bindgen"
+version = "0.57.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
+
 [[package]]
 name = "xml-rs"
 version = "0.8.22"
diff --git a/cspell.json b/cspell.json
index 52429808..fd1f639a 100644
--- a/cspell.json
+++ b/cspell.json
@@ -645,6 +645,12 @@
     "Skus",
     "VMSS",
     "zeroize",
+    "greppable",
+    "HGAP",
+    "hostgaplugin",
+    "rsid",
+    "strat",
+    "uncanonicalizable",
     "Аdministrator",
     "аzureuser",
     "rооt"
diff --git a/doc/plans/Innovation-2.1-canonical-request.md b/doc/plans/Innovation-2.1-canonical-request.md
index 7cfb50d0..8e83bf52 100644
--- a/doc/plans/Innovation-2.1-canonical-request.md
+++ b/doc/plans/Innovation-2.1-canonical-request.md
@@ -284,6 +284,7 @@ hyper::Uri │ ▼ parse_scheme_method (must be http; reject https/ws/...; metho
 | `MalformedPercent` | Truncated / non-hex `%XX`             | Deny; 400 | `CANON_PCT`       |
 | `OverlongUtf8`     | Classic IDS-bypass payload            | Deny; 400 | `CANON_OVERLONG`  |
 | `InvalidUtf8`      | Random bytes or wrong codec           | Deny; 400 | `CANON_UTF8`      |
+| `NonAscii`         | Unicode confusable / homoglyph attack in **path** (query allows non-ASCII values) | Deny; 400 | `CANON_NON_ASCII` |
 | `ControlChar`      | CRLF injection attempt                | Deny; 400 | `CANON_CTRL`      |
 | `PathUnderflow`    | Too many `..`                         | Deny; 400 | `CANON_UNDERFLOW` |
 | `EmbeddedQuery`    | `%3F` smuggling                       | Deny; 400 | `CANON_EMBQ`      |
diff --git a/pentest/linux/phase4_rules_fuzz/url_diff.py b/pentest/linux/phase4_rules_fuzz/url_diff.py
index 5ad3c92a..4d817bd6 100755
--- a/pentest/linux/phase4_rules_fuzz/url_diff.py
+++ b/pentest/linux/phase4_rules_fuzz/url_diff.py
@@ -13,7 +13,7 @@
 Loopback fabric only; safe to run from a normal user account.
 """
 from __future__ import annotations
-import argparse, datetime, http.client, os, sys, urllib.parse
+import argparse, datetime, http.client, os, re, socket, sys, time, urllib.parse
 
 IMDS_HOST = "169.254.169.254"
 IMDS_PORT = 80
@@ -22,19 +22,35 @@
 CANONICAL = "/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/"
 
 VARIANTS = {
-    "canonical":         CANONICAL,
-    "uppercase_pct":     "/metadata/identity/oauth2%2Ftoken?api-version=2018-02-01&resource=https://management.azure.com/",
-    "lowercase_pct":     "/metadata/identity/oauth2%2ftoken?api-version=2018-02-01&resource=https://management.azure.com/",
-    "double_encoded":    "/metadata/identity/oauth2%252Ftoken?api-version=2018-02-01&resource=https://management.azure.com/",
-    "trailing_dot_path": "/metadata/identity/oauth2/token./?api-version=2018-02-01&resource=https://management.azure.com/",
-    "dot_segments":      "/metadata/./identity/../identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/",
-    "param_injection":   "/metadata/identity/oauth2/token;x=1?api-version=2018-02-01&resource=https://management.azure.com/",
-    "case_path":         "/Metadata/Identity/OAuth2/Token?api-version=2018-02-01&resource=https://management.azure.com/",
-    "extra_slashes":     "//metadata///identity//oauth2//token?api-version=2018-02-01&resource=https://management.azure.com/",
-    "fragment":          "/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/#x",
-    "unicode_dotless":   "/metadata/\u0131dentity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/",
+    "canonical":            CANONICAL,
+    "uppercase_pct":        "/metadata/identity/oauth2%2Ftoken?api-version=2018-02-01&resource=https://management.azure.com/",
+    "lowercase_pct":        "/metadata/identity/oauth2%2ftoken?api-version=2018-02-01&resource=https://management.azure.com/",
+    "double_encoded":       "/metadata/identity/oauth2%252Ftoken?api-version=2018-02-01&resource=https://management.azure.com/",
+    "trailing_dot_path":    "/metadata/identity/oauth2/token./?api-version=2018-02-01&resource=https://management.azure.com/",
+    "dot_segments":         "/metadata/./identity/../identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/",
+    "dot_segments_encoded": "/metadata/%2E/identity/%2E%2E/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/",
+    "param_injection":      "/metadata/identity/oauth2/token;x=1?api-version=2018-02-01&resource=https://management.azure.com/",
+    "case_path":            "/Metadata/Identity/OAuth2/Token?api-version=2018-02-01&resource=https://management.azure.com/",
+    "extra_slashes":        "//metadata///identity//oauth2//token?api-version=2018-02-01&resource=https://management.azure.com/",
+    "fragment":             "/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/#x",
+    "unicode_dotless":      "/metadata/\u0131dentity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/",
 }
 
+# Variants whose effectiveness depends on the raw bytes reaching GPA verbatim.
+# Python's http.client.HTTPConnection currently passes the request-target
+# through to the wire without collapsing `./`, `../`, repeated slashes, or
+# decoding `%2E` — but this is an undocumented quirk of the CPython
+# implementation, not a contract. .NET's System.Uri normalizes all of these
+# before the request leaves the box; if a future Python release follows suit
+# (or a sysadmin adds a transparent proxy that rewrites paths), the
+# differential test silently turns into a no-op. Sending these as a literal
+# HTTP/1.1 request line via a raw socket guarantees the bytes hit GPA
+# verbatim and matches what the Windows harness does (Phase4-RulesFuzz.ps1
+# Send-VariantRaw). The eBPF cgroup redirector still intercepts outbound
+# traffic to IMDS:80 at the kernel layer, so the raw connection is picked
+# up by GPA the same as http.client.
+RAW_VARIANTS = frozenset({"dot_segments", "dot_segments_encoded"})
+
 
 def send(path: str) -> tuple[int, str]:
     try:
@@ -48,6 +64,54 @@ def send(path: str) -> tuple[int, str]:
         return -1, f"{type(exc).__name__}: {exc}"
 
 
+def send_raw(path: str) -> tuple[int, str]:
+    """Write a literal HTTP/1.1 request line over a raw socket, bypassing any
+    URL parsing/canonicalization that http.client or a future Python release
+    might perform. The request bytes contain `path` verbatim."""
+    req_text = (
+        f"GET {path} HTTP/1.1\r\n"
+        f"Host: {IMDS_HOST}\r\n"
+        "Metadata: true\r\n"
+        "Connection: close\r\n"
+        "Accept: */*\r\n"
+        "\r\n"
+    )
+    sock: socket.socket | None = None
+    try:
+        sock = socket.create_connection((IMDS_HOST, IMDS_PORT), timeout=8)
+        sock.sendall(req_text.encode("ascii"))
+        sock.settimeout(8)
+        chunks: list[bytes] = []
+        total = 0
+        deadline = time.monotonic() + 8.0
+        while total < 4096 and time.monotonic() < deadline:
+            try:
+                buf = sock.recv(min(4096 - total, 1024))
+            except socket.timeout:
+                break
+            if not buf:
+                break
+            chunks.append(buf)
+            total += len(buf)
+        text = b"".join(chunks).decode("latin-1", errors="replace")
+        status_line = text.split("\r\n", 1)[0] if text else ""
+        m = re.match(r"^HTTP/1\.[01]\s+(\d{3})", status_line)
+        code = int(m.group(1)) if m else -1
+        body_head = ""
+        idx = text.find("\r\n\r\n")
+        if 0 <= idx and idx + 4 < len(text):
+            body_head = text[idx + 4 : idx + 84].replace("\n", " ").replace("\t", " ")
+        return code, body_head
+    except Exception as exc:  # pragma: no cover
+        return -1, f"RawSocket {type(exc).__name__}: {exc}"
+    finally:
+        if sock is not None:
+            try:
+                sock.close()
+            except Exception:
+                pass
+
+
 def main() -> int:
     ap = argparse.ArgumentParser()
     ap.add_argument("--out", default=os.environ.get(
@@ -63,12 +127,17 @@ def main() -> int:
     rows = []
     diff_count = 0
     for name, path in VARIANTS.items():
-        status, head = send(path)
+        if name in RAW_VARIANTS:
+            status, head = send_raw(path)
+            sender = "raw"
+        else:
+            status, head = send(path)
+            sender = "net"
         diff = "DIFF" if status != baseline_status else "same"
         if diff == "DIFF":
             diff_count += 1
         rows.append((name, status, diff, urllib.parse.quote(path, safe="/?&=:%")[:120], head[:60]))
-        print(f"  {name:<18} status={status:<4} {diff}")
+        print(f"  {name:<22} [{sender}] status={status:<4} {diff}")
 
     with open(args.out, "a", encoding="utf-8") as fh:
         ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
diff --git a/pentest/linux/phase4b_local_rules/run.py b/pentest/linux/phase4b_local_rules/run.py
index 7c111ce8..6131c309 100755
--- a/pentest/linux/phase4b_local_rules/run.py
+++ b/pentest/linux/phase4b_local_rules/run.py
@@ -170,6 +170,26 @@ def get_current_user_identity() -> dict:
     }
 
 
+def _wire_encode_path(path: str) -> str:
+    """Percent-encode non-ASCII path bytes as UTF-8 before send.
+
+    Python's http.client transmits the request line via request.encode('ascii')
+    and raises UnicodeEncodeError on any raw non-ASCII char (e.g. the U+0131
+    dotless-i confusable in token_unicode_dotless). That exception is caught
+    below as a false -1 — the request never reaches GPA, so we'd never observe
+    the real authorization decision. Browsers and .NET's System.Uri percent-
+    encode non-ASCII path bytes as UTF-8 before sending; mirror that so the
+    confusable arrives on the wire as %C4%B1 and GPA's 403 is actually exercised.
+
+    Only non-ASCII codepoints are rewritten; ASCII (including existing %XX,
+    '/', '?', ';', '&', '=') passes through verbatim so every other probe is
+    byte-for-byte unchanged on the wire."""
+    return "".join(
+        ch if ord(ch) < 0x80 else "".join(f"%{b:02X}" for b in ch.encode("utf-8"))
+        for ch in path
+    )
+
+
 def send(target: Target, method: str, path: str,
          headers: Optional[dict] = None, timeout: float = 8.0) -> int:
     h: dict[str, str] = {}
@@ -181,7 +201,7 @@ def send(target: Target, method: str, path: str,
         h.update(headers)
     try:
         conn = http.client.HTTPConnection(target.host, target.port, timeout=timeout)
-        conn.request(method, path, headers=h)
+        conn.request(method, _wire_encode_path(path), headers=h)
         resp = conn.getresponse()
         resp.read(64)
         conn.close()
@@ -631,6 +651,67 @@ def _imds_scenarios(target: Target, identity: dict) -> list[Scenario]:
         ],
     ))
 
+    # S20: explicit fail-closed of the IMDS managed-identity token endpoint.
+    # defaultAccess=deny + mode=enforce + empty privileges means EVERY path —
+    # including every URL-equivalence variant of /metadata/identity/oauth2/
+    # token (mixed case, %2F-encoded slash, double-encoded, trailing dot,
+    # extra slashes, fragment, matrix param, Unicode confusable) — must fall
+    # through to the deny default. With shadow-mode (M3) enabled this is the
+    # rule set under which the canonical authorizer can surface
+    # CANON_DIVERGENCE log lines: any variant where the legacy substring
+    # matcher and the canonical normalized-path matcher disagree shows up as
+    # `legacy=allow canon=deny` (or vice versa) in the GPA service log even
+    # though the on-wire response stays 403. Any 200 response here is a true
+    # bypass.
+    # `token_extra_slashes` may also surface as 404 once the pre-canonical
+    # traversal guard (path_has_traversal) catches `//` before the rule
+    # engine runs — both 403 and 404 mean the bypass attempt failed.
+    scenarios.append(Scenario(
+        sid=f"{pfx}-S20-deny-token-path",
+        target=target,
+        description="enforce + deny + no privileges; every URL-equivalence "
+                    "variant of the IMDS token path must 403 (or 404 when "
+                    "GPA traversal-guard fires)",
+        rules={
+            "defaultAccess": "deny",
+            "mode": "enforce",
+            "id": "pentest-s20",
+            "rules": {},
+        },
+        probes=[
+            Probe("token_canonical",
+                  "/metadata/identity/oauth2/token?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/", 403),
+            Probe("token_uppercase_pct",
+                  "/metadata/identity/oauth2%2Ftoken?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/", 403),
+            Probe("token_lowercase_pct",
+                  "/metadata/identity/oauth2%2ftoken?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/", 403),
+            Probe("token_double_encoded",
+                  "/metadata/identity/oauth2%252Ftoken?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/", 403),
+            Probe("token_trailing_dot_path",
+                  "/metadata/identity/oauth2/token./?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/", 403),
+            Probe("token_param_injection",
+                  "/metadata/identity/oauth2/token;x=1?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/", 403),
+            Probe("token_case_path",
+                  "/Metadata/Identity/OAuth2/Token?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/", 403),
+            Probe("token_extra_slashes",
+                  "//metadata///identity//oauth2//token?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/", (403, 404)),
+            Probe("token_fragment",
+                  "/metadata/identity/oauth2/token?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/#x", 403),
+            Probe("token_unicode_dotless",
+                  "/metadata/\u0131dentity/oauth2/token?api-version=2018-02-01"
+                  "&resource=https://management.azure.com/", 403),
+        ],
+    ))
+
     return scenarios
 
 
diff --git a/pentest/linux/test_catalog.py b/pentest/linux/test_catalog.py
index 70524c7f..cf127e29 100644
--- a/pentest/linux/test_catalog.py
+++ b/pentest/linux/test_catalog.py
@@ -294,6 +294,12 @@ class TestInfo(TypedDict, total=False):
         "design": "Identity `userName` uses Cyrillic о (U+043E) in place of ASCII 'o'. The matcher must compare bytes, not Unicode-normalized forms — 200 would mean Unicode normalization is happening in identity comparison and could be used to bypass authZ.",
         "automation": "Rewrites the harness identity's userName with Cyrillic о substitutions; expects 403.",
     },
+    "IMDS-S20-deny-token-path": {
+        "title": "Fail-closed deny of the IMDS managed-identity token endpoint across URL-equivalence variants",
+        "design": "defaultAccess=deny + mode=enforce + empty privileges. Every URL-equivalence variant of /metadata/identity/oauth2/token (mixed case, %2F/%2f/%252F-encoded slash, trailing dot path, extra slashes, fragment, matrix param, Unicode confusable like U+0131) must 403 — none of them should reach the upstream. With shadow-mode (M3) enabled this is the rule set under which the canonical authorizer surfaces `CANON_DIVERGENCE` log lines for any variant where the legacy substring matcher and the canonical normalized-path matcher disagree (`legacy=allow canon=deny` or vice versa) even though the on-wire response stays 403. Any 200 is a true bypass.",
+        "automation": "Writes `defaultAccess=deny, mode=enforce, rules={}`; probes 10 equivalent forms of the token path. `token_extra_slashes` accepts (403, 404) since the pre-canonical traversal guard (path_has_traversal) may fire on `//` before the rule engine runs — both outcomes mean the bypass failed.",
+        "fix": "A 200 response here means a URL canonicalization gap before AuthZ. Audit the request path through `authorization_rules.rs` → enable canonical-mode enforce (M5 sunset of the legacy substring matcher) — by design every variant in this scenario normalizes to /metadata/identity/oauth2/token, which falls through to defaultAccess=deny.",
+    },
 
     # ---------------- WireServer ----------------
     "WS-S1-disabled-allow": {
diff --git a/pentest/windows/Phase4-RulesFuzz.ps1 b/pentest/windows/Phase4-RulesFuzz.ps1
index bf8b068f..abe88970 100644
--- a/pentest/windows/Phase4-RulesFuzz.ps1
+++ b/pentest/windows/Phase4-RulesFuzz.ps1
@@ -20,19 +20,34 @@ if (-not $Out) { $Out = $UrlDiffTsv }
 $canonical = '/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/'
 
 $variants = [ordered]@{
-    'canonical'         = $canonical
-    'uppercase_pct'     = '/metadata/identity/oauth2%2Ftoken?api-version=2018-02-01&resource=https://management.azure.com/'
-    'lowercase_pct'     = '/metadata/identity/oauth2%2ftoken?api-version=2018-02-01&resource=https://management.azure.com/'
-    'double_encoded'    = '/metadata/identity/oauth2%252Ftoken?api-version=2018-02-01&resource=https://management.azure.com/'
-    'trailing_dot_path' = '/metadata/identity/oauth2/token./?api-version=2018-02-01&resource=https://management.azure.com/'
-    'dot_segments'      = '/metadata/./identity/../identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/'
-    'param_injection'   = '/metadata/identity/oauth2/token;x=1?api-version=2018-02-01&resource=https://management.azure.com/'
-    'case_path'         = '/Metadata/Identity/OAuth2/Token?api-version=2018-02-01&resource=https://management.azure.com/'
-    'extra_slashes'     = '//metadata///identity//oauth2//token?api-version=2018-02-01&resource=https://management.azure.com/'
-    'fragment'          = '/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/#x'
-    'unicode_dotless'   = "/metadata/$([char]0x131)dentity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/"
+    'canonical'            = $canonical
+    'uppercase_pct'        = '/metadata/identity/oauth2%2Ftoken?api-version=2018-02-01&resource=https://management.azure.com/'
+    'lowercase_pct'        = '/metadata/identity/oauth2%2ftoken?api-version=2018-02-01&resource=https://management.azure.com/'
+    'double_encoded'       = '/metadata/identity/oauth2%252Ftoken?api-version=2018-02-01&resource=https://management.azure.com/'
+    'trailing_dot_path'    = '/metadata/identity/oauth2/token./?api-version=2018-02-01&resource=https://management.azure.com/'
+    'dot_segments'         = '/metadata/./identity/../identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/'
+    'dot_segments_encoded' = '/metadata/%2E/identity/%2E%2E/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/'
+    'param_injection'      = '/metadata/identity/oauth2/token;x=1?api-version=2018-02-01&resource=https://management.azure.com/'
+    'case_path'            = '/Metadata/Identity/OAuth2/Token?api-version=2018-02-01&resource=https://management.azure.com/'
+    'extra_slashes'        = '//metadata///identity//oauth2//token?api-version=2018-02-01&resource=https://management.azure.com/'
+    'fragment'             = '/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/#x'
+    'unicode_dotless'      = "/metadata/$([char]0x131)dentity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/"
 }
 
+# Variants whose effectiveness depends on the raw bytes reaching GPA verbatim.
+# System.Net.HttpWebRequest constructs a System.Uri that canonicalizes `./`
+# and `../` and decodes `%2E` BEFORE the request leaves the box, so without
+# raw-socket sending these variants would be silently rewritten to the
+# canonical path and never actually exercise GPA's traversal guard nor the
+# canonical-vs-legacy shadow comparator. Sending them as a literal HTTP/1.1
+# request line via TcpClient bypasses the .NET URI parser entirely; the WFP
+# redirector still intercepts the outbound connection to ImdsIp:80 the same
+# way it does for HttpWebRequest, so the bytes hit GPA verbatim.
+$rawVariants = [System.Collections.Generic.HashSet[string]]::new(
+    [System.StringComparer]::OrdinalIgnoreCase)
+[void]$rawVariants.Add('dot_segments')
+[void]$rawVariants.Add('dot_segments_encoded')
+
 function Send-Variant {
     param([string] $Path)
     # Use HttpWebRequest so we can pass paths with raw %-sequences without
@@ -60,6 +75,61 @@ function Send-Variant {
     }
 }
 
+function Send-VariantRaw {
+    param([string] $Path)
+    # Write a literal HTTP/1.1 request line to a TcpClient stream so that
+    # System.Uri / HttpWebRequest cannot canonicalize `./`, `../`, `%2E`,
+    # or repeated slashes before send. The WFP redirector intercepts
+    # outbound traffic to ImdsIp:80 at the kernel layer, so TcpClient is
+    # picked up by GPA the same as HttpWebRequest.
+    $client = New-Object System.Net.Sockets.TcpClient
+    try {
+        $client.SendTimeout = 8000
+        $client.ReceiveTimeout = 8000
+        $client.Connect($ImdsIp, 80)
+        $stream = $client.GetStream()
+        # Build the raw request bytes manually so the path is sent verbatim.
+        $reqText = "GET $Path HTTP/1.1`r`nHost: $ImdsIp`r`nMetadata: true`r`nConnection: close`r`nAccept: */*`r`n`r`n"
+        $reqBytes = [System.Text.Encoding]::ASCII.GetBytes($reqText)
+        $stream.Write($reqBytes, 0, $reqBytes.Length)
+        $stream.Flush()
+
+        # Read up to 4 KB — enough for the status line + headers + a body head.
+        $buf = New-Object byte[] 4096
+        $total = 0
+        $deadline = [DateTime]::UtcNow.AddSeconds(8)
+        while ($total -lt $buf.Length -and [DateTime]::UtcNow -lt $deadline) {
+            if (-not $stream.DataAvailable) {
+                Start-Sleep -Milliseconds 50
+                continue
+            }
+            $n = $stream.Read($buf, $total, $buf.Length - $total)
+            if ($n -le 0) { break }
+            $total += $n
+        }
+        $text = [System.Text.Encoding]::ASCII.GetString($buf, 0, $total)
+        $statusLine = ($text -split "`r`n", 2)[0]
+        if ($statusLine -match '^HTTP/1\.[01]\s+(\d{3})') {
+            $code = [int]$Matches[1]
+        } else {
+            $code = -1
+        }
+        # Body head = everything after the first blank line, lightly cleaned.
+        $headBody = ''
+        $idx = $text.IndexOf("`r`n`r`n")
+        if ($idx -ge 0 -and $idx + 4 -lt $text.Length) {
+            $headBody = $text.Substring($idx + 4)
+        }
+        $head = ($headBody.Substring(0, [Math]::Min(80, $headBody.Length))) -replace "[`r`n`t]", ' '
+        return [pscustomobject]@{ Code = $code; Head = $head }
+    } catch {
+        return [pscustomobject]@{ Code = -1; Head = ("RawSocket: $($_.Exception.Message)" -replace "[`r`n`t]", ' ') }
+    } finally {
+        if ($client.Connected) { $client.Close() }
+        $client.Dispose()
+    }
+}
+
 $baseline = Send-Variant $canonical
 Write-Host ("baseline ({0}) for canonical path" -f $baseline.Code)
 
@@ -67,10 +137,16 @@ $ts = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ')
 $rows = @()
 $diffs = 0
 foreach ($k in $variants.Keys) {
-    $r = Send-Variant $variants[$k]
+    if ($rawVariants.Contains($k)) {
+        $r = Send-VariantRaw $variants[$k]
+        $sender = 'raw'
+    } else {
+        $r = Send-Variant $variants[$k]
+        $sender = 'net'
+    }
     $verdict = if ($r.Code -ne $baseline.Code) { 'DIFF' } else { 'same' }
     if ($verdict -eq 'DIFF') { $diffs++ }
-    Write-Host ("  {0,-18} status={1,-4} {2}" -f $k, $r.Code, $verdict)
+    Write-Host ("  {0,-22} [{1}] status={2,-4} {3}" -f $k, $sender, $r.Code, $verdict)
     $rows += "$ts`t$k`t$($r.Code)`t$verdict`t$($variants[$k])`t$($r.Head)"
 }
 
diff --git a/pentest/windows/Phase4b-LocalRules.ps1 b/pentest/windows/Phase4b-LocalRules.ps1
index ea101470..464aaa61 100644
--- a/pentest/windows/Phase4b-LocalRules.ps1
+++ b/pentest/windows/Phase4b-LocalRules.ps1
@@ -479,6 +479,34 @@ function Build-ImdsScenarios {
         Write-Finding "$pfx-S19-identity-homoglyph" INFO ("skipped: '{0}' has no character with a Cyrillic visual look-alike in our table" -f $Identity.userName)
     }
 
+    # S20 — explicit fail-closed of the IMDS managed-identity token endpoint.
+    # defaultAccess=deny + mode=enforce + empty privileges means EVERY path
+    # — including every URL-equivalence variant of /metadata/identity/oauth2/
+    # token (mixed case, %2F-encoded slash, double-encoded, dot segments,
+    # extra slashes, fragment, matrix param, Unicode confusable) — must fall
+    # through to the deny default. With shadow-mode (M3) enabled this is the
+    # rule set under which the canonical authorizer can surface
+    # CANON_DIVERGENCE log lines: any variant where the legacy substring
+    # matcher and the canonical normalized-path matcher disagree shows up as
+    # `legacy=allow canon=deny` (or vice versa) in the GPA service log even
+    # though the on-wire response stays 403. Any 200 response here is a true
+    # bypass.
+    $scn += New-Scenario -Sid "$pfx-S20-deny-token-path" -TargetName imds `
+        -Description 'enforce + deny + no privileges; every URL-equivalence variant of the IMDS token path must 403 (or 404 when GPA traversal-guard fires)' `
+        -Rules @{ defaultAccess = 'deny'; mode = 'enforce'; id = 'pentest-s20'; rules = @{} } `
+        -Probes @(
+        [Probe]::new('token_canonical',          '/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/', 403),
+        [Probe]::new('token_uppercase_pct',      '/metadata/identity/oauth2%2Ftoken?api-version=2018-02-01&resource=https://management.azure.com/', 403),
+        [Probe]::new('token_lowercase_pct',      '/metadata/identity/oauth2%2ftoken?api-version=2018-02-01&resource=https://management.azure.com/', 403),
+        [Probe]::new('token_double_encoded',     '/metadata/identity/oauth2%252Ftoken?api-version=2018-02-01&resource=https://management.azure.com/', 403),
+        [Probe]::new('token_trailing_dot_path',  '/metadata/identity/oauth2/token./?api-version=2018-02-01&resource=https://management.azure.com/', 403),
+        [Probe]::new('token_param_injection',    '/metadata/identity/oauth2/token;x=1?api-version=2018-02-01&resource=https://management.azure.com/', 403),
+        [Probe]::new('token_case_path',          '/Metadata/Identity/OAuth2/Token?api-version=2018-02-01&resource=https://management.azure.com/', 403),
+        [Probe]::new('token_extra_slashes',      '//metadata///identity//oauth2//token?api-version=2018-02-01&resource=https://management.azure.com/', 403),
+        [Probe]::new('token_fragment',           '/metadata/identity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/#x', 403),
+        [Probe]::new('token_unicode_dotless',    "/metadata/$([char]0x131)dentity/oauth2/token?api-version=2018-02-01&resource=https://management.azure.com/", 403)
+    )
+
     return $scn
 }
 
diff --git a/pentest/windows/TestCatalog.psm1 b/pentest/windows/TestCatalog.psm1
index a67e430f..c6f07e3c 100644
--- a/pentest/windows/TestCatalog.psm1
+++ b/pentest/windows/TestCatalog.psm1
@@ -210,6 +210,7 @@ $script:Phase4b = @{
     'IMDS-S17-name-collision-shielded' = @{ Title='Local rule names already containing LocalFileRules_ prefix';        Design='Tests that `prefix_local_rule_names` rewrites both names AND cross-references consistently even when the admin pre-includes the prefix — so the rule still binds. 403 on /metadata/instance would mean the prefixer mishandled the doubled prefix.' }
     'IMDS-S18-dangling-references'= @{ Title='Dangling role→privilege reference must reject';            Design='Validator must reject; agent fail-closes to defaultAccess=deny. 200 means dangling-ref validation regressed.' }
     'IMDS-S19-identity-homoglyph' = @{ Title='Cyrillic homoglyph in userName must NOT match ASCII user'; Design='Identity userName uses a Cyrillic look-alike (e.g. а=U+0430, е=U+0435, о=U+043E, …) in place of one character of the current user''s ASCII name. The two strings render identically in most fonts but are NOT byte-equal. Matcher must compare bytes, not Unicode-normalized forms — 200 would mean Unicode normalization is happening in identity comparison and could be used to bypass authZ. Skipped (INFO) when the current user has no character with a Cyrillic look-alike in the harness table.' }
+    'IMDS-S20-deny-token-path'    = @{ Title='Fail-closed deny of the IMDS managed-identity token endpoint across URL-equivalence variants'; Design='defaultAccess=deny + mode=enforce + empty privileges. Every URL-equivalence variant of /metadata/identity/oauth2/token (mixed case, %2F/%2f/%252F-encoded slash, dot segments, extra slashes, fragment, matrix param, Unicode confusable like U+0131) must 403 — none of them should reach the upstream. With shadow-mode (M3) enabled this is the rule set under which the canonical authorizer can surface `CANON_DIVERGENCE` log lines for variants where the legacy substring-matcher and canonical normalized-path matcher disagree. Any 200 here is a true bypass.' }
     'WS-S1-disabled-allow'        = @{ Title='Control: mode=disabled + allow (WireServer)';             Design='Baseline 200s for goalstate and versions.' }
     'WS-S2-enforce-deny-empty'    = @{ Title='Fail-closed enforce+deny (WireServer)';                   Design='All WireServer calls 403.' }
     'WS-S3-audit-deny-empty'      = @{ Title='Local `mode` field must be ignored (WireServer regression guard)'; Design='Writes mode=audit + defaultAccess=deny + empty rules to WireServer_Rules.json. PRE confirms remote mode=enforce, so probes must 403. A 200 would mean the agent honored `mode` from the local file.' }
diff --git a/proxy_agent/Cargo.toml b/proxy_agent/Cargo.toml
index 3b8cd9a1..bb7d9021 100644
--- a/proxy_agent/Cargo.toml
+++ b/proxy_agent/Cargo.toml
@@ -31,6 +31,12 @@ libc = "0.2.147"
 socket2 = "0.5"               # Set socket options without tokio/std conversion
 base64 = "0.22"
 percent-encoding = "2.3"
+# Optional: only compiled when the `proptests` feature is enabled (see
+# `[features]` below). Lives under `[dependencies]` rather than
+# `[dev-dependencies]` because Cargo rejects `optional` on dev-deps;
+# the only `use` sites are inside `#[cfg(all(test, feature = "proptests"))]`
+# modules so the crate never lands in the production binary.
+proptest = { version = "1.11", optional = true }
 
 [dependencies.uuid]
 version = "1.3.0"
@@ -84,6 +90,13 @@ features = [
 
 [features]
 test-with-root = []
+# Innovation 2.1 M2 property tests. Off by default so the inner-loop
+# `cargo test` skips both the proptest *crate compilation* (~3s the
+# first time) and the 256-case generation per property. The `dep:` form
+# below makes Cargo treat the optional `proptest` dependency as opt-in
+# too — with the feature off the crate is not pulled into the build
+# graph at all. CI picks it up via `--all-features` in build-linux.sh.
+proptests = ["dep:proptest"]
 
 [package.metadata.deb]
 name = "azure-proxy-agent"
@@ -101,4 +114,4 @@ assets = [
     ["azure-proxy-agent", "usr/sbin/azure-proxy-agent", "755"],  # Binary
     ["proxy-agent.json", "etc/azure/proxy-agent.json", "644"],
     ["ebpf_cgroup.o", "usr/lib/azure-proxy-agent/ebpf_cgroup.o", "644"],
-]
\ No newline at end of file
+]
diff --git a/proxy_agent/config/GuestProxyAgent.linux.json b/proxy_agent/config/GuestProxyAgent.linux.json
index 34007661..90b6d0b5 100644
--- a/proxy_agent/config/GuestProxyAgent.linux.json
+++ b/proxy_agent/config/GuestProxyAgent.linux.json
@@ -9,5 +9,6 @@
     "cgroupRoot": "/sys/fs/cgroup",
     "fileLogLevel": "Trace",
     "fileLogLevelForEvents": "Info",
-    "fileLogLevelForSystemEvents": "Info"
+    "fileLogLevelForSystemEvents": "Info",
+    "canonicalRequestMode": "Shadow"
 }
\ No newline at end of file
diff --git a/proxy_agent/config/GuestProxyAgent.windows.json b/proxy_agent/config/GuestProxyAgent.windows.json
index ccad256c..b43e5768 100644
--- a/proxy_agent/config/GuestProxyAgent.windows.json
+++ b/proxy_agent/config/GuestProxyAgent.windows.json
@@ -8,5 +8,6 @@
     "ebpfProgramName": "redirect.bpf.sys",
     "fileLogLevel": "Trace",
     "fileLogLevelForEvents": "Info",
-    "fileLogLevelForSystemEvents": "Info"
+    "fileLogLevelForSystemEvents": "Info",
+    "canonicalRequestMode": "Shadow"
 }
\ No newline at end of file
diff --git a/proxy_agent/proptest-regressions/proxy/canonical/property_tests.txt b/proxy_agent/proptest-regressions/proxy/canonical/property_tests.txt
new file mode 100644
index 00000000..6777a0c8
--- /dev/null
+++ b/proxy_agent/proptest-regressions/proxy/canonical/property_tests.txt
@@ -0,0 +1,9 @@
+# Seeds for failure cases proptest has generated in the past. It is
+# automatically read and these particular cases re-run before any
+# novel cases are generated.
+#
+# It is recommended to check this file in to source control so that
+# everyone who runs the test benefits from these saved cases.
+cc 6e318ae548f23aef626ed306c43a3903aaa8d87519f7314c005fa58118e5b587 # shrinks to url = "http://169.254.169.254/;/"
+cc ced0a9f4654c5313e012532a2fe6b322ab32cfa67d567b3668e31bd3089ba09b # shrinks to raw = 1
+cc 9f6ed1e81539e9e4aa92df7aaa4ab446a4fccad216ed51984342293b42b3ad5d # shrinks to url = "http://169.254.169.254/.;"
diff --git a/proxy_agent/src/common/config.rs b/proxy_agent/src/common/config.rs
index 021ef065..4365f148 100644
--- a/proxy_agent/src/common/config.rs
+++ b/proxy_agent/src/common/config.rs
@@ -17,6 +17,7 @@
 //! ```
 
 use crate::common::constants;
+use crate::common::logger;
 use once_cell::sync::Lazy;
 use proxy_agent_shared::{logger::LoggerLevel, misc_helpers};
 use serde_derive::{Deserialize, Serialize};
@@ -78,6 +79,16 @@ pub fn get_enable_http_proxy_trace() -> bool {
     SYSTEM_CONFIG.enableHttpProxyTrace.unwrap_or(false)
 }
 
+/// Rollout flag for the Innovation 2.1 canonical request pipeline.
+///
+/// Read from the optional `canonicalRequestMode` key in the GPA config
+/// JSON. Accepted values: `off`, `shadow`, `enforce`. Anything missing
+/// or unparseable resolves to [`CanonicalMode::Off`] so production
+/// traffic keeps the legacy authorizer end-to-end.
+pub fn get_canonical_request_mode() -> crate::proxy::canonical::CanonicalMode {
+    SYSTEM_CONFIG.get_canonical_request_mode()
+}
+
 #[derive(Serialize, Deserialize)]
 #[allow(non_snake_case)]
 pub struct Config {
@@ -104,6 +115,13 @@ pub struct Config {
     /// This is an optional config, mainly for manual debugging purpose
     #[serde(skip_serializing_if = "Option::is_none")]
     enableHttpProxyTrace: Option<bool>,
+    /// Innovation 2.1 canonical request rollout flag.
+    /// Optional; absent or unparseable values resolve to
+    /// [`crate::proxy::canonical::CanonicalMode::Off`] so production
+    /// behavior is unchanged when the key is missing.
+    /// Accepted values: `off`, `shadow`, `enforce`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    canonicalRequestMode: Option<String>,
 }
 
 impl Default for Config {
@@ -211,6 +229,33 @@ impl Config {
         }
         None
     }
+
+    /// Resolve the canonical-request rollout flag.
+    ///
+    /// Returns [`crate::proxy::canonical::CanonicalMode::Off`] when the
+    /// config key is absent or holds an unrecognized value (with a
+    /// warning logged in the latter case). This conservative default is
+    /// what guarantees the "behavior unchanged for production traffic"
+    /// M3 exit criterion: nothing in the canonical pipeline runs until
+    /// the operator explicitly opts in.
+    pub fn get_canonical_request_mode(&self) -> crate::proxy::canonical::CanonicalMode {
+        use crate::proxy::canonical::CanonicalMode;
+        match &self.canonicalRequestMode {
+            None => CanonicalMode::Off,
+            Some(s) => match CanonicalMode::from_str(s) {
+                Ok(mode) => mode,
+                Err(err) => {
+                    // Don't take down the agent over a typo'd config key;
+                    // fall back to Off (the safe default) and log so
+                    // operators see the misconfiguration in trace.
+                    logger::write_warning(format!(
+                        "Invalid canonicalRequestMode={s:?} in config ({err}); defaulting to Off"
+                    ));
+                    CanonicalMode::Off
+                }
+            },
+        }
+    }
 }
 
 #[cfg(test)]
diff --git a/proxy_agent/src/proxy.rs b/proxy_agent/src/proxy.rs
index b0be815c..721e912c 100644
--- a/proxy_agent/src/proxy.rs
+++ b/proxy_agent/src/proxy.rs
@@ -32,6 +32,7 @@
 //! ```
 
 pub mod authorization_rules;
+pub mod canonical;
 pub mod proxy_authorizer;
 pub mod proxy_connection;
 pub mod proxy_server;
diff --git a/proxy_agent/src/proxy/authorization_rules.rs b/proxy_agent/src/proxy/authorization_rules.rs
index 857c3ade..7eb261d4 100644
--- a/proxy_agent/src/proxy/authorization_rules.rs
+++ b/proxy_agent/src/proxy/authorization_rules.rs
@@ -20,6 +20,7 @@
 use super::{proxy_connection::ConnectionLogger, Claims};
 use crate::common::logger;
 use crate::key_keeper::key::{AuthorizationItem, AuthorizationRules, Identity, Privilege, Role};
+use crate::proxy::canonical::{self, CanonError, CanonicalMode, CanonicalPattern};
 use proxy_agent_shared::logger::LoggerLevel;
 use proxy_agent_shared::misc_helpers;
 use serde_derive::{Deserialize, Serialize};
@@ -73,6 +74,40 @@ pub struct ComputedAuthorizationItem {
     // all the defined unique identities, distinct by name
     // key - identity name, value - identity object
     pub identities: HashMap<String, Identity>,
+
+    /// Innovation 2.1 shadow cache: each `Privilege` compiled through
+    /// the canonical pipeline once at rule-load time so per-request
+    /// shadow evaluation in [`ComputedAuthorizationItem::canonical_decision`]
+    /// is a plain linear scan instead of re-parsing the rule path on
+    /// every connection.
+    ///
+    /// Stored as `Vec<(name, pattern)>` rather than `HashMap` because:
+    /// - The access pattern is "scan all" — `canonical_decision`
+    ///   iterates every entry looking for matches; there is no
+    ///   lookup-by-privilege-name path on the canonical side.
+    /// - Iteration order is stable (insertion order), which makes
+    ///   divergence logs and any future "first match wins" semantics
+    ///   deterministic across processes — `HashMap` iteration is
+    ///   randomized.
+    /// - For typical rule counts (tens of entries) the per-entry
+    ///   overhead is smaller than `HashMap`.
+    ///
+    /// Skipped from (de)serialization because:
+    /// 1. The cache is a pure function of `privileges` — round-tripping
+    ///    it through the AuthorizationRulesForLogging JSON would just
+    ///    duplicate that data and risk drift if the canonical pipeline
+    ///    is upgraded between writer and reader.
+    /// 2. `CanonicalPattern` is not (and intentionally should not be)
+    ///    `Serialize`/`Deserialize` — its shape is an internal contract
+    ///    of the matcher.
+    ///
+    /// Privileges that fail to canonicalize at load time are dropped
+    /// from this cache **and** a warning is logged. This is fail-closed
+    /// for the canonical side: shadow / enforce mode will report a
+    /// divergence (legacy may still match the un-canonicalizable rule)
+    /// which is exactly the signal we want during rollout.
+    #[serde(skip, default)]
+    canonical_patterns: Vec<(String, CanonicalPattern)>,
 }
 
 #[allow(dead_code)]
@@ -171,11 +206,41 @@ impl ComputedAuthorizationItem {
             defaultAllowed: authorization_item.defaultAccess.to_lowercase() == "allow",
             mode: authorization_mode,
             identities: identity_dict,
+            canonical_patterns: Self::compile_canonical_patterns(&privilege_dict),
             privileges: privilege_dict,
             privilegeAssignments: privilege_assignments,
         }
     }
 
+    /// Compile every privilege through the canonical pipeline once.
+    ///
+    /// Errors are logged and the offending privilege is dropped from
+    /// the cache (fail-closed on the canonical side; the legacy matcher
+    /// retains its copy in `self.privileges`). This is exactly the
+    /// shape of divergence shadow-mode is designed to surface.
+    fn compile_canonical_patterns(
+        privilege_dict: &HashMap<String, Privilege>,
+    ) -> Vec<(String, CanonicalPattern)> {
+        let mut out = Vec::with_capacity(privilege_dict.len());
+        for (name, privilege) in privilege_dict {
+            match CanonicalPattern::from_privilege(privilege) {
+                Ok(pat) => out.push((name.clone(), pat)),
+                Err(e) => {
+                    // Don't fail rule-load; canonical mode will deny
+                    // anything that needed this rule, which is the M3
+                    // signal we want operators to see.
+                    logger::write_warning(format!(
+                        "Privilege '{name}' failed canonicalization ({code}); dropping from canonical cache (legacy matcher unaffected). path={path:?}",
+                        name = name,
+                        code = e.code(),
+                        path = privilege.path,
+                    ));
+                }
+            }
+        }
+        out
+    }
+
     pub fn is_allowed(
         &self,
         logger: &mut ConnectionLogger,
@@ -259,6 +324,203 @@ impl ComputedAuthorizationItem {
         );
         self.defaultAllowed
     }
+
+    // ------------------------------------------------------------------
+    // Innovation 2.1 M3 — shadow-mode integration.
+    //
+    // The two methods below add the canonical-pipeline evaluator and the
+    // divergence comparator that `proxy_authorizer::authorize` invokes
+    // when the rollout flag is `shadow` or `enforce`. With the default
+    // flag (`off`) neither runs and the legacy path above is the
+    // entirety of the authorization decision — the M3 exit criterion
+    // "behavior unchanged for production traffic".
+    // ------------------------------------------------------------------
+
+    /// Canonical-pipeline mirror of [`is_allowed`].
+    ///
+    /// Runs the request through `canonical::canonicalize` and matches it
+    /// against the precomputed [`CanonicalPattern`]s in
+    /// `self.canonical_patterns`. The identity check is shared with the
+    /// legacy path (same `Identity::is_match` semantics, same default
+    /// fallback) — the only difference is *path matching*. This is what
+    /// the comparator targets.
+    ///
+    /// **Fail-closed**: canonicalization errors collapse to
+    /// [`CanonicalDecision::Error`], which the comparator surfaces in
+    /// the divergence record and which the enforce-mode caller treats
+    /// as deny.
+    pub fn canonical_decision(
+        &self,
+        logger: &mut ConnectionLogger,
+        request_uri: &hyper::Uri,
+        request_method: &hyper::Method,
+        claims: &Claims,
+    ) -> CanonicalDecision {
+        // Disabled mode short-circuits identically to legacy. We keep
+        // the two implementations symmetric here so a divergence is
+        // always attributable to path/query handling, never to the
+        // disabled-skip.
+        if self.mode == AuthorizationMode::Disabled {
+            return CanonicalDecision::Allowed;
+        }
+
+        let canon = match canonical::canonicalize(request_uri, request_method) {
+            Ok(c) => c,
+            Err(e) => return CanonicalDecision::Error(e),
+        };
+
+        let mut any_pattern_matched = false;
+        for (privilege_name, pattern) in &self.canonical_patterns {
+            if !pattern.matches(&canon) {
+                continue;
+            }
+            any_pattern_matched = true;
+            logger.write(
+                LoggerLevel::Trace,
+                format!("[canonical] Request matched privilege '{privilege_name}'."),
+            );
+
+            if let Some(assignments) = self.privilegeAssignments.get(privilege_name) {
+                for identity_name in assignments {
+                    if let Some(identity) = self.identities.get(identity_name) {
+                        if identity.is_match(logger, claims) {
+                            return CanonicalDecision::Allowed;
+                        }
+                    }
+                }
+            }
+        }
+
+        if any_pattern_matched {
+            // Same semantics as legacy: any privilege matched but no
+            // identity matched -> deny (do NOT fall through to default).
+            return CanonicalDecision::Denied;
+        }
+        if self.defaultAllowed {
+            CanonicalDecision::Allowed
+        } else {
+            CanonicalDecision::Denied
+        }
+    }
+
+    /// Compare the precomputed legacy decision with the canonical
+    /// pipeline's verdict and emit a single divergence log line per
+    /// request when they disagree.
+    ///
+    /// The shape of the emitted line is the M3 telemetry contract from
+    /// `doc/plans/Innovation-2.1-canonical-request.md` §9.2 — see
+    /// [`DivergenceRecord`]. We log it as a prefixed single-line
+    /// `key=value` record so dev/test grep / structured log shippers can
+    /// pick it up without a new telemetry pipeline (deferred to M4).
+    ///
+    /// Returns the canonical decision so an enforce-mode caller can use
+    /// it directly. Off / shadow callers ignore the return value.
+    pub fn shadow_compare(
+        &self,
+        logger: &mut ConnectionLogger,
+        request_uri: &hyper::Uri,
+        request_method: &hyper::Method,
+        claims: &Claims,
+        legacy_allowed: bool,
+        mode: CanonicalMode,
+    ) -> CanonicalDecision {
+        debug_assert!(
+            mode != CanonicalMode::Off,
+            "shadow_compare invoked while canonical mode is Off; the proxy_authorizer guard should have skipped this call"
+        );
+
+        let canon = self.canonical_decision(logger, request_uri, request_method, claims);
+
+        let canon_allowed = canon.allowed_or_fail_closed();
+        if canon_allowed != legacy_allowed {
+            let record = DivergenceRecord {
+                rule_set_id: &self.id,
+                mode,
+                legacy_decision: legacy_allowed,
+                canon_decision: &canon,
+                request_uri,
+            };
+            // Warn level: M3 wants this visible in dev/test without a
+            // separate telemetry pipeline. It's per-request only when
+            // there *is* a divergence, so noise stays low. The plan
+            // is to graduate this to a structured telemetry event in
+            // M4 (§9.3).
+            logger.write(LoggerLevel::Warn, record.to_log_line());
+        }
+        canon
+    }
+}
+
+/// Outcome of [`ComputedAuthorizationItem::canonical_decision`].
+///
+/// Distinct from the legacy `bool` return so the shadow comparator can
+/// distinguish "canonicalization rejected the request" from "matcher
+/// said deny" in the divergence record. Both collapse to `false` under
+/// [`CanonicalDecision::allowed_or_fail_closed`].
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum CanonicalDecision {
+    Allowed,
+    Denied,
+    /// The canonical pipeline rejected the request before any matcher
+    /// ran. Always a deny in fail-closed evaluation.
+    Error(CanonError),
+}
+
+impl CanonicalDecision {
+    /// Project to a boolean using fail-closed semantics: errors become
+    /// `false`. Used by the comparator to decide whether the canonical
+    /// verdict diverges from the legacy bool.
+    pub fn allowed_or_fail_closed(&self) -> bool {
+        matches!(self, CanonicalDecision::Allowed)
+    }
+}
+
+impl std::fmt::Display for CanonicalDecision {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            CanonicalDecision::Allowed => write!(f, "allow"),
+            CanonicalDecision::Denied => write!(f, "deny"),
+            CanonicalDecision::Error(e) => write!(f, "error:{}", e.code()),
+        }
+    }
+}
+
+/// Single divergence event between the legacy authorizer and the
+/// canonical pipeline. Mapped to a one-line audit string by
+/// [`DivergenceRecord::to_log_line`]; M4 will graduate this to a
+/// structured telemetry event per design §9.3.
+struct DivergenceRecord<'a> {
+    rule_set_id: &'a str,
+    mode: CanonicalMode,
+    legacy_decision: bool,
+    canon_decision: &'a CanonicalDecision,
+    /// The full original `hyper::Uri`. We only log the path+query
+    /// portion to keep authority/userinfo (already validated by the
+    /// canonical pipeline) out of audit lines.
+    request_uri: &'a hyper::Uri,
+}
+
+impl<'a> DivergenceRecord<'a> {
+    fn to_log_line(&self) -> String {
+        // `CANON_DIVERGENCE` is a stable prefix so structured log
+        // shippers and ad-hoc grep can both find these. Field order is
+        // also part of the contract — append-only.
+        let path = self.request_uri.path();
+        let query = self.request_uri.query().unwrap_or("");
+        let path_and_query = if query.is_empty() {
+            path.to_string()
+        } else {
+            format!("{path}?{query}")
+        };
+        format!(
+            "CANON_DIVERGENCE mode={mode} rule_set={rsid} legacy={legacy} canon={canon} uri={uri:?}",
+            mode = self.mode,
+            rsid = self.rule_set_id,
+            legacy = if self.legacy_decision { "allow" } else { "deny" },
+            canon = self.canon_decision,
+            uri = path_and_query,
+        )
+    }
 }
 
 #[derive(Serialize, Deserialize, Clone)]
@@ -740,4 +1002,232 @@ mod tests {
             "Trusted user must be allowed through percent-encoded path (%2f)"
         );
     }
+
+    // ------------------------------------------------------------------
+    // Innovation 2.1 M3 — shadow-mode integration tests.
+    //
+    // These tests verify the new canonical-pipeline evaluator and the
+    // shadow comparator added in `ComputedAuthorizationItem`. They do
+    // NOT exercise the proxy_authorizer wire-up directly (that needs
+    // a live config getter); see `proxy_authorizer::tests` for the
+    // outer plumbing.
+    //
+    // What we pin here:
+    //   1. canonical_decision agrees with legacy on a path that does
+    //      not exercise canonicalization differences.
+    //   2. canonical_decision *disagrees* with legacy on the exact
+    //      bypass class M2 was built to catch — substring vs
+    //      segment prefix — so shadow telemetry has the signal it
+    //      promises in §9.2.
+    //   3. canonical_decision returns Error (fail-closed) when the
+    //      pipeline rejects the URL, and Display surfaces a stable
+    //      `error:<code>` token for the audit log.
+    //   4. compile_canonical_patterns drops privileges that cannot be
+    //      canonicalized, without taking down the rule load.
+    //   5. shadow_compare returns the canonical decision so an
+    //      enforce-mode caller can use it (M5/M6 hand-off).
+    // ------------------------------------------------------------------
+
+    use crate::proxy::authorization_rules::CanonicalDecision;
+    use crate::proxy::canonical::{CanonError, CanonicalMode};
+
+    /// Builds a single-privilege rule set: privilege path `/test` is
+    /// granted to one identity named "trusted". Default-deny.
+    fn build_test_rules(privilege_path: &str) -> ComputedAuthorizationItem {
+        let access_control_rules = AccessControlRules {
+            roles: Some(vec![Role {
+                name: "r".to_string(),
+                privileges: vec!["test".to_string()],
+            }]),
+            privileges: Some(vec![Privilege {
+                name: "test".to_string(),
+                path: privilege_path.to_string(),
+                queryParameters: None,
+            }]),
+            identities: Some(vec![Identity {
+                name: "trusted".to_string(),
+                userName: Some("trusted-user".to_string()),
+                groupName: None,
+                exePath: None,
+                processName: None,
+            }]),
+            roleAssignments: Some(vec![RoleAssignment {
+                role: "r".to_string(),
+                identities: vec!["trusted".to_string()],
+            }]),
+        };
+        let authorization_item = AuthorizationItem {
+            defaultAccess: "deny".to_string(),
+            mode: "enforce".to_string(),
+            rules: Some(access_control_rules),
+            id: "test-rs".to_string(),
+        };
+        ComputedAuthorizationItem::from_authorization_item(authorization_item)
+    }
+
+    fn trusted_claims() -> Claims {
+        Claims {
+            userId: 0,
+            userName: "trusted-user".to_string(),
+            userGroups: vec![],
+            processId: 0,
+            processFullPath: PathBuf::from("p"),
+            clientIp: "0".to_string(),
+            clientPort: 0,
+            processName: OsString::from("p"),
+            processCmdLine: "p".to_string(),
+            runAsElevated: true,
+        }
+    }
+
+    #[test]
+    fn canonical_decision_agrees_with_legacy_on_clean_paths() {
+        // Sanity: when nothing in the URL exercises a canonical-vs-
+        // substring difference, the two evaluators must agree. If this
+        // ever flips, every shadow log line becomes noise and we lose
+        // the M3 signal. So this is a regression net for both sides.
+        let rules = build_test_rules("/test");
+        let claims = trusted_claims();
+        let mut log = ConnectionLogger::new(0, 0);
+
+        let cases: &[(&str, bool)] = &[
+            // (uri, expected_allowed)
+            ("http://169.254.169.254/test/x", true),
+            ("http://169.254.169.254/test", true),
+            // not under /test -> default deny
+            ("http://169.254.169.254/other", false),
+        ];
+        for (uri_str, expected) in cases {
+            let uri = hyper::Uri::from_str(uri_str).unwrap();
+            let legacy = rules.is_allowed(&mut log, uri.clone(), claims.clone());
+            let canon = rules.canonical_decision(&mut log, &uri, &hyper::Method::GET, &claims);
+            assert_eq!(legacy, *expected, "legacy verdict for {uri_str}");
+            assert_eq!(
+                canon.allowed_or_fail_closed(),
+                *expected,
+                "canonical verdict for {uri_str}"
+            );
+            assert_eq!(
+                legacy,
+                canon.allowed_or_fail_closed(),
+                "shadow drift for {uri_str}"
+            );
+        }
+    }
+
+    #[test]
+    fn canonical_decision_diverges_on_substring_vs_segment_prefix() {
+        // THE M3 motivating divergence. Legacy `Privilege::is_match`
+        // uses `starts_with` -> `/test-evil/x` is "under /test" by
+        // substring. Canonical does segment-by-segment prefix
+        // matching -> `/test-evil/x` is NOT under `/test`. Shadow mode
+        // exists to report this exact class.
+        let rules = build_test_rules("/test");
+        let claims = trusted_claims();
+        let mut log = ConnectionLogger::new(0, 0);
+
+        let uri = hyper::Uri::from_str("http://169.254.169.254/test-evil/anything").unwrap();
+        let legacy = rules.is_allowed(&mut log, uri.clone(), claims.clone());
+        let canon = rules.canonical_decision(&mut log, &uri, &hyper::Method::GET, &claims);
+
+        // Legacy is buggy here (allows the SSRF-shaped path).
+        assert!(
+            legacy,
+            "legacy substring match should still allow /test-evil; if this fails, legacy was fixed and this test should be retired"
+        );
+        // Canonical correctly denies.
+        assert_eq!(
+            canon,
+            CanonicalDecision::Denied,
+            "canonical segment match must deny /test-evil"
+        );
+        // The shape we'll feed to telemetry:
+        assert_ne!(
+            legacy,
+            canon.allowed_or_fail_closed(),
+            "this is the divergence shadow mode must surface"
+        );
+    }
+
+    #[test]
+    fn canonical_decision_returns_error_for_userinfo_url() {
+        // Userinfo in the authority -> canonical rejects at the
+        // pipeline entry with UserinfoPresent. The decision must
+        // collapse to fail-closed and Display must surface the stable
+        // error code so audit logs can grep for it.
+        let rules = build_test_rules("/test");
+        let claims = trusted_claims();
+        let mut log = ConnectionLogger::new(0, 0);
+
+        // Build via hyper to avoid relying on whether `from_str`
+        // accepts the userinfo form.
+        let uri: hyper::Uri = match "http://attacker@169.254.169.254/test".parse() {
+            Ok(u) => u,
+            Err(_) => {
+                // Hyper may refuse to parse this -> nothing to test.
+                eprintln!("hyper refused to parse userinfo URL; skipping");
+                return;
+            }
+        };
+        let canon = rules.canonical_decision(&mut log, &uri, &hyper::Method::GET, &claims);
+        assert_eq!(
+            canon,
+            CanonicalDecision::Error(CanonError::UserinfoPresent),
+            "canonical must surface UserinfoPresent (got {canon:?})"
+        );
+        assert!(!canon.allowed_or_fail_closed(), "errors must fail-closed");
+        // The audit-log token shape is part of the §9.2 telemetry
+        // contract — keep it stable across refactors.
+        assert_eq!(canon.to_string(), "error:CANON_USERINFO");
+    }
+
+    #[test]
+    fn shadow_compare_returns_canonical_decision_for_enforce_handoff() {
+        // shadow_compare's return value is the M5/M6 hand-off: the
+        // enforce-mode caller will use it as the decision once the
+        // shadow window proves zero divergences. We pin its
+        // semantics here so a future enforce wiring doesn't get a
+        // surprise.
+        let rules = build_test_rules("/test");
+        let claims = trusted_claims();
+        let mut log = ConnectionLogger::new(0, 0);
+
+        // Divergent case: legacy=true (substring), canon=Denied.
+        let uri = hyper::Uri::from_str("http://169.254.169.254/test-evil/anything").unwrap();
+        let legacy_allowed = true;
+        let canon = rules.shadow_compare(
+            &mut log,
+            &uri,
+            &hyper::Method::GET,
+            &claims,
+            legacy_allowed,
+            CanonicalMode::Shadow,
+        );
+        assert_eq!(
+            canon,
+            CanonicalDecision::Denied,
+            "shadow_compare must return the canonical verdict"
+        );
+    }
+
+    #[test]
+    fn compile_canonical_patterns_drops_uncanonicalizable_rules() {
+        // A privilege whose path can't be canonicalized must NOT
+        // brick the whole rule-load — the legacy matcher still has
+        // its copy and the canonical cache simply skips it. This is
+        // exactly the asymmetry shadow mode is designed to surface.
+        //
+        // We use an overlong %C0%AF (canonically `/`) which the
+        // pipeline rejects with OverlongUtf8.
+        let rules = build_test_rules("/x%C0%AFy");
+
+        // Legacy still sees the privilege:
+        assert_eq!(rules.privileges.len(), 1);
+        // Canonical cache dropped it:
+        assert_eq!(
+            rules.canonical_patterns.len(),
+            0,
+            "uncanonicalizable privilege must be skipped from canonical cache"
+        );
+    }
 }
diff --git a/proxy_agent/src/proxy/canonical/destination.rs b/proxy_agent/src/proxy/canonical/destination.rs
new file mode 100644
index 00000000..34c8b9fb
--- /dev/null
+++ b/proxy_agent/src/proxy/canonical/destination.rs
@@ -0,0 +1,873 @@
+// Copyright (c) Microsoft Corporation
+// SPDX-License-Identifier: MIT
+
+//! Destination classification.
+//!
+//! Maps the host+port of an incoming request to one of GPA's known
+//! endpoints (IMDS, WireServer, HostGAPlugin). Numeric host forms
+//! (decimal, hex, octal, IPv4-mapped IPv6, etc.) all canonicalize to the
+//! same variant — this is the defense against pentest C7.
+//!
+//! Hostnames that are not IP literals are *not* DNS-resolved here. DNS at
+//! the matcher would be a confused-deputy surface; instead we surface the
+//! host text in [`Destination::Unknown`] so rule authors can write
+//! explicit allow rules keyed on host text if they need it.
+
+use std::fmt;
+use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
+
+use hyper::Uri;
+
+use crate::common::constants;
+
+use super::CanonError;
+
+/// Address family of an [`Destination::Unknown`] target. Kept narrow so
+/// we don't accidentally treat numeric strings as hostnames.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum AddrFamily {
+    V4,
+    V6,
+    Name,
+}
+
+/// Canonical destination. Matching uses the typed enum only; the raw
+/// `host_text` on `Unknown` is for audit, never for matching decisions.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub enum Destination {
+    /// Instance Metadata Service: 169.254.169.254:80 in any encoding.
+    Imds,
+    /// Azure WireServer: 168.63.129.16:80.
+    WireServer,
+    /// Host GuestAgent Plugin: 168.63.129.16:32526.
+    HostGaPlugin,
+    /// Anything else. The matcher denies unknowns unless an explicit rule
+    /// allows them.
+    Unknown {
+        family: AddrFamily,
+        ip: Option<IpAddr>,
+        port: u16,
+        host_text: Option<String>,
+    },
+}
+
+impl fmt::Display for Destination {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Destination::Imds => f.write_str("imds"),
+            Destination::WireServer => f.write_str("wireserver"),
+            Destination::HostGaPlugin => f.write_str("hostga"),
+            Destination::Unknown { .. } => f.write_str("unknown"),
+        }
+    }
+}
+
+/// Classify the destination of a request URI. See module docs.
+pub fn classify(uri: &Uri) -> Result<Destination, CanonError> {
+    // Reject userinfo (`user@host` smuggling).
+    if uri
+        .authority()
+        .map(|a| a.as_str().contains('@'))
+        .unwrap_or(false)
+    {
+        return Err(CanonError::UserinfoPresent);
+    }
+
+    let host = match uri.host() {
+        Some(h) => h,
+        // Origin-form requests (the common proxy case) have no authority.
+        // We must still allow them to flow: the destination is decided by
+        // the redirector at the socket layer, not by the URL.
+        None => {
+            return Ok(Destination::Unknown {
+                family: AddrFamily::Name,
+                ip: None,
+                port: 0,
+                host_text: None,
+            });
+        }
+    };
+
+    let port = if let Some(p) = uri.port() {
+        // Happy path: hyper successfully framed the port and it fits in
+        // u16. Re-parsing the text here is a belt-and-suspenders check
+        // so we never widen the return type just to thread an infallible
+        // unwrap through.
+        p.as_str().parse::<u16>().map_err(|_| CanonError::BadPort)?
+    } else {
+        // hyper's port() / port_u16() both return None when the port
+        // text is *present but malformed* (e.g. ":99999" overflows
+        // u16). The old `port_u16().unwrap_or(IMDS_PORT)` collapsed
+        // this into the default, which let
+        // `http://169.254.169.254:99999/x` smuggle past as IMDS.
+        // Inspect the authority string ourselves to tell "no port"
+        // apart from "bad port". For IPv6, the host is bracketed
+        // (`[...]`), so any colon *after* `]` is the port separator;
+        // otherwise the last `:` (if any) is.
+        let port_text: Option<&str> = uri.authority().and_then(|a| {
+            let s = a.as_str();
+            if let Some(rb) = s.rfind(']') {
+                s[rb + 1..].strip_prefix(':')
+            } else {
+                s.rsplit_once(':').map(|(_, p)| p)
+            }
+        });
+        match port_text {
+            Some(text) => text.parse::<u16>().map_err(|_| CanonError::BadPort)?,
+            None => constants::IMDS_PORT,
+        }
+    };
+
+    let ip = parse_host_as_ip(host)?;
+    match ip {
+        Some(IpAddr::V4(v4)) => Ok(known_v4(v4, port).unwrap_or(Destination::Unknown {
+            family: AddrFamily::V4,
+            ip: Some(IpAddr::V4(v4)),
+            port,
+            host_text: Some(host.to_string()),
+        })),
+        Some(IpAddr::V6(v6)) => {
+            // IPv4-mapped IPv6 (::ffff:a.b.c.d) projects down to IPv4 so
+            // it shares the same Destination as the dotted form.
+            if let Some(v4) = v6.to_ipv4_mapped() {
+                if let Some(known) = known_v4(v4, port) {
+                    return Ok(known);
+                }
+                return Ok(Destination::Unknown {
+                    family: AddrFamily::V4,
+                    ip: Some(IpAddr::V4(v4)),
+                    port,
+                    host_text: Some(host.to_string()),
+                });
+            }
+            Ok(Destination::Unknown {
+                family: AddrFamily::V6,
+                ip: Some(IpAddr::V6(v6)),
+                port,
+                host_text: Some(host.to_string()),
+            })
+        }
+        None => Ok(Destination::Unknown {
+            family: AddrFamily::Name,
+            ip: None,
+            port,
+            host_text: Some(host.to_string()),
+        }),
+    }
+}
+
+fn known_v4(v4: Ipv4Addr, port: u16) -> Option<Destination> {
+    let imds: Ipv4Addr = constants::IMDS_IP.parse().ok()?;
+    let wire: Ipv4Addr = constants::WIRE_SERVER_IP.parse().ok()?;
+
+    if v4 == imds && port == constants::IMDS_PORT {
+        return Some(Destination::Imds);
+    }
+    if v4 == wire && port == constants::WIRE_SERVER_PORT {
+        return Some(Destination::WireServer);
+    }
+    if v4 == wire && port == constants::GA_PLUGIN_PORT {
+        return Some(Destination::HostGaPlugin);
+    }
+    None
+}
+
+/// Parse a host string into an `IpAddr` when it is an IP literal in any
+/// historical numeric form. Returns `Ok(None)` for true hostnames (i.e.
+/// not an IP), which the caller treats as `Destination::Unknown`.
+///
+/// Supports:
+///   - dotted quad   `169.254.169.254`
+///   - 32-bit decimal `2852039166`
+///   - 32-bit hex     `0xa9fea9fe`
+///   - octal-quad     `0251.0376.0251.0376`
+///   - hex-quad       `0xa9.0xfe.0xa9.0xfe`
+///   - mixed forms allowed per RFC 3493 / inet_aton tradition
+///   - bracketed IPv6 `[::ffff:169.254.169.254]` (brackets handled by hyper)
+fn parse_host_as_ip(host: &str) -> Result<Option<IpAddr>, CanonError> {
+    // Tolerate both forms (hyper strips brackets in most versions but
+    // not all). Strip surrounding `[]` if present before parsing IPv6.
+    let host_unbracketed = host
+        .strip_prefix('[')
+        .and_then(|h| h.strip_suffix(']'))
+        .unwrap_or(host);
+    if let Ok(v6) = host_unbracketed.parse::<Ipv6Addr>() {
+        return Ok(Some(IpAddr::V6(v6)));
+    }
+    if let Ok(v4) = host.parse::<Ipv4Addr>() {
+        return Ok(Some(IpAddr::V4(v4)));
+    }
+
+    // Trailing dot on hostname (`metadata.azure.internal.`) — strip then
+    // re-classify. A bare `.` is not a host.
+    let trimmed = host.trim_end_matches('.');
+    if trimmed.is_empty() {
+        return Err(CanonError::BadHost);
+    }
+
+    if let Some(v4) = parse_inet_aton(trimmed)? {
+        return Ok(Some(IpAddr::V4(v4)));
+    }
+
+    // Not an IP literal in any supported form. Distinguish "valid
+    // hostname" from "garbage": at least one ASCII alphanumeric and no
+    // forbidden characters.
+    if trimmed
+        .chars()
+        .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '.')
+        && trimmed.chars().any(|c| c.is_ascii_alphanumeric())
+    {
+        Ok(None)
+    } else {
+        Err(CanonError::BadHost)
+    }
+}
+
+/// `inet_aton`-style numeric IPv4 parser.
+///
+/// Implemented by hand (rather than calling out to libc) because
+/// `inet_aton` behavior is platform-dependent: glibc accepts `0x` and
+/// leading-zero octal; musl is stricter; Windows differs again. A
+/// hand-rolled parser keeps Linux and Windows builds identical.
+fn parse_inet_aton(input: &str) -> Result<Option<Ipv4Addr>, CanonError> {
+    // Must look numeric. Reject early if it has any character outside the
+    // numeric/separator set so we don't shadow a legitimate hostname.
+    if input.is_empty() {
+        return Err(CanonError::BadHost);
+    }
+    let looks_numeric = input
+        .chars()
+        .all(|c| c.is_ascii_hexdigit() || c == 'x' || c == 'X' || c == '.');
+    if !looks_numeric {
+        return Ok(None);
+    }
+
+    let parts: Vec<&str> = input.split('.').collect();
+    if parts.is_empty() || parts.len() > 4 {
+        return Err(CanonError::BadHost);
+    }
+    // Empty parts (e.g. trailing dot already stripped, double dot here)
+    // are illegal.
+    if parts.iter().any(|p| p.is_empty()) {
+        return Err(CanonError::BadHost);
+    }
+
+    let nums: Vec<u32> = parts
+        .iter()
+        .map(|p| parse_numeric_octet(p))
+        .collect::<Result<Vec<_>, _>>()?;
+
+    let addr: u32 = match nums.len() {
+        // single 32-bit number: maps directly
+        1 => nums[0],
+        // a.b => a in top 8 bits, b in low 24
+        2 => {
+            if nums[0] > 0xFF || nums[1] > 0x00FF_FFFF {
+                return Err(CanonError::BadHost);
+            }
+            (nums[0] << 24) | nums[1]
+        }
+        // a.b.c => a,b top 16 bits, c low 16
+        3 => {
+            if nums[0] > 0xFF || nums[1] > 0xFF || nums[2] > 0xFFFF {
+                return Err(CanonError::BadHost);
+            }
+            (nums[0] << 24) | (nums[1] << 16) | nums[2]
+        }
+        // a.b.c.d => standard dotted quad
+        4 => {
+            if nums.iter().any(|&n| n > 0xFF) {
+                return Err(CanonError::BadHost);
+            }
+            (nums[0] << 24) | (nums[1] << 16) | (nums[2] << 8) | nums[3]
+        }
+        _ => return Err(CanonError::BadHost),
+    };
+
+    Ok(Some(Ipv4Addr::from(addr)))
+}
+
+fn parse_numeric_octet(s: &str) -> Result<u32, CanonError> {
+    // 0x... => hex
+    if let Some(rest) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) {
+        if rest.is_empty() || rest.len() > 8 {
+            return Err(CanonError::BadHost);
+        }
+        return u32::from_str_radix(rest, 16).map_err(|_| CanonError::BadHost);
+    }
+    // 0... (and not just "0") => octal
+    if s.len() > 1 && s.starts_with('0') {
+        return u32::from_str_radix(&s[1..], 8).map_err(|_| CanonError::BadHost);
+    }
+    // decimal
+    s.parse::<u32>().map_err(|_| CanonError::BadHost)
+}
+
+#[cfg(test)]
+mod destination_tests {
+    use super::*;
+
+    fn aton(s: &str) -> Result<Option<Ipv4Addr>, CanonError> {
+        parse_inet_aton(s)
+    }
+    fn host(s: &str) -> Result<Option<IpAddr>, CanonError> {
+        parse_host_as_ip(s)
+    }
+    fn ip(s: &str) -> Ipv4Addr {
+        s.parse().unwrap()
+    }
+    fn uri(s: &str) -> Uri {
+        s.parse().unwrap()
+    }
+
+    // -----------------------------------------------------------------
+    // parse_numeric_octet
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn numeric_octet_accepts_valid_forms() {
+        // (input, expected). Covers decimal (incl. u32::MAX and lone
+        // zero), hex with both prefix cases, and octal.
+        let cases: &[(&str, u32)] = &[
+            ("0", 0),
+            ("169", 169),
+            ("255", 255),
+            ("4294967295", u32::MAX),
+            ("0x0", 0),
+            ("0xa9", 0xA9),
+            ("0XA9", 0xA9),
+            ("0xa9fea9fe", 0xA9FE_A9FE),
+            ("0xFFFFFFFF", u32::MAX),
+            ("0251", 0o251), // 169
+            ("0376", 0o376), // 254
+            ("00", 0),
+        ];
+        for (input, expected) in cases {
+            assert_eq!(
+                parse_numeric_octet(input).unwrap(),
+                *expected,
+                "input={input:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn numeric_octet_rejects_invalid_forms() {
+        // All of these must be BadHost: decimal overflow, hex without
+        // digits, hex too long for u32, non-hex digits in hex, non-octal
+        // digits in an octal-prefixed string, and empty input.
+        let bad: &[&str] = &[
+            "4294967296",  // decimal overflow
+            "0x",          // empty hex
+            "0X",          // empty hex (upper prefix)
+            "0x100000000", // hex too long for u32
+            "0xZZ",        // bad hex digit
+            "08",          // 8 is not octal
+            "0129",        // 9 is not octal
+            "",            // empty
+        ];
+        for input in bad {
+            assert_eq!(
+                parse_numeric_octet(input).unwrap_err(),
+                CanonError::BadHost,
+                "input={input:?}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // parse_inet_aton
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn inet_aton_accepts_all_numeric_forms() {
+        let imds = Ipv4Addr::new(169, 254, 169, 254);
+        // (input, expected). Covers dotted quad, single decimal, single
+        // hex, octal/hex/mixed quads, and the 2-/3-part legacy forms.
+        let two_part = {
+            let v: u32 = (169u32 << 24) | 16_624_894;
+            Ipv4Addr::from(v)
+        };
+        let cases: &[(&str, Ipv4Addr)] = &[
+            ("169.254.169.254", imds),
+            ("2852039166", imds),
+            ("0xa9fea9fe", imds),
+            ("0251.0376.0251.0376", imds),
+            ("0xa9.0xfe.0xa9.0xfe", imds),
+            ("169.0xfe.0251.254", imds),
+            ("169.254.43518", imds),    // 3-part form
+            ("169.16624894", two_part), // 2-part form
+        ];
+        for (input, expected) in cases {
+            assert_eq!(aton(input).unwrap(), Some(*expected), "input={input:?}");
+        }
+    }
+
+    #[test]
+    fn inet_aton_rejects_malformed_inputs() {
+        // Empty parts, too-many parts, octet overflow at each supported
+        // arity, and the empty string itself.
+        let bad: &[&str] = &[
+            "",
+            "1.2.3.4.5",  // too many parts
+            "1..2.3",     // double dot
+            ".1.2.3",     // leading dot
+            "1.2.3.",     // trailing dot
+            "300.1.1.1",  // 4-part octet > 0xFF
+            "256.0.0.0",  // 4-part octet > 0xFF
+            "256.1",      // 2-part top byte > 0xFF
+            "1.16777216", // 2-part low value > 0x00FF_FFFF
+            "1.2.65536",  // 3-part last value > 0xFFFF
+        ];
+        for input in bad {
+            assert_eq!(
+                aton(input).unwrap_err(),
+                CanonError::BadHost,
+                "input={input:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn inet_aton_passes_through_non_numeric_hostnames() {
+        // Hostnames must fall through with Ok(None), not error — the
+        // caller decides whether to allow them.
+        for input in ["metadata", "metadata.azure.internal", "host-with-dash"] {
+            assert_eq!(aton(input).unwrap(), None, "input={input:?}");
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // parse_host_as_ip
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn host_parses_all_ip_literal_forms() {
+        // (input, expected IpAddr). Covers IPv4 dotted, IPv4 numeric
+        // (falls through to inet_aton), IPv6 plain, IPv6 bracketed
+        // (tolerance for hyper versions that don't strip brackets), and
+        // IPv4-mapped IPv6.
+        let v6_mapped: Ipv6Addr = "::ffff:169.254.169.254".parse().unwrap();
+        let cases: &[(&str, IpAddr)] = &[
+            ("127.0.0.1", IpAddr::V4(Ipv4Addr::LOCALHOST)),
+            ("2852039166", IpAddr::V4(Ipv4Addr::new(169, 254, 169, 254))),
+            ("::1", IpAddr::V6(Ipv6Addr::LOCALHOST)),
+            ("[::1]", IpAddr::V6(Ipv6Addr::LOCALHOST)),
+            ("::ffff:169.254.169.254", IpAddr::V6(v6_mapped)),
+        ];
+        for (input, expected) in cases {
+            assert_eq!(host(input).unwrap(), Some(*expected), "input={input:?}");
+        }
+    }
+
+    #[test]
+    fn host_returns_none_for_valid_hostnames() {
+        // Hostnames (including the RFC 1034 trailing-dot form) must not
+        // be silently treated as IPs.
+        for input in [
+            "metadata.azure.internal",
+            "metadata.azure.internal.",
+            "foo",
+            "a-b-c.example",
+        ] {
+            assert_eq!(host(input).unwrap(), None, "input={input:?}");
+        }
+    }
+
+    #[test]
+    fn host_rejects_garbage() {
+        // Empty, bare dot, and any input containing characters outside
+        // the hostname/IP alphabet must be rejected — not silently
+        // treated as a hostname.
+        for input in ["", ".", "foo bar", "foo/bar", "foo_bar"] {
+            assert_eq!(
+                host(input).unwrap_err(),
+                CanonError::BadHost,
+                "input={input:?}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // known_v4
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn known_v4_maps_recognized_endpoints() {
+        let cases: &[(&str, u16, Destination)] = &[
+            (constants::IMDS_IP, constants::IMDS_PORT, Destination::Imds),
+            (
+                constants::WIRE_SERVER_IP,
+                constants::WIRE_SERVER_PORT,
+                Destination::WireServer,
+            ),
+            (
+                constants::WIRE_SERVER_IP,
+                constants::GA_PLUGIN_PORT,
+                Destination::HostGaPlugin,
+            ),
+        ];
+        for (addr, port, expected) in cases {
+            assert_eq!(
+                known_v4(ip(addr), *port),
+                Some(expected.clone()),
+                "ip={addr}, port={port}"
+            );
+        }
+    }
+
+    #[test]
+    fn known_v4_returns_none_for_misses() {
+        // Correct IP / wrong port and any unrelated IP must not be
+        // promoted to a known destination.
+        let cases: &[(&str, u16)] = &[
+            (constants::IMDS_IP, 8080),
+            (constants::WIRE_SERVER_IP, 8080),
+            ("8.8.8.8", constants::IMDS_PORT),
+            ("127.0.0.1", constants::IMDS_PORT),
+        ];
+        for (addr, port) in cases {
+            assert_eq!(known_v4(ip(addr), *port), None, "ip={addr}, port={port}");
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // classify (URI-level entrypoint)
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn classify_resolves_known_destinations() {
+        // (url, expected). Covers default-port inference for IMDS,
+        // explicit-port forms, WireServer, HostGA, and the IPv4-mapped
+        // IPv6 projection (pentest C7).
+        let cases: &[(&str, Destination)] = &[
+            ("http://169.254.169.254/x", Destination::Imds),
+            ("http://169.254.169.254:80/x", Destination::Imds),
+            ("http://168.63.129.16:80/x", Destination::WireServer),
+            ("http://168.63.129.16:32526/x", Destination::HostGaPlugin),
+            ("http://[::ffff:169.254.169.254]/x", Destination::Imds),
+        ];
+        for (url, expected) in cases {
+            assert_eq!(classify(&uri(url)).unwrap(), *expected, "url={url}");
+        }
+    }
+
+    #[test]
+    fn classify_falls_back_to_unknown_for_unrecognized_targets() {
+        // IMDS IP on the wrong port must NOT inherit the IMDS variant.
+        match classify(&uri("http://169.254.169.254:8080/x")).unwrap() {
+            Destination::Unknown {
+                family: AddrFamily::V4,
+                ip: Some(IpAddr::V4(v4)),
+                port: 8080,
+                ..
+            } => assert_eq!(v4, ip(constants::IMDS_IP)),
+            other => panic!("expected Unknown V4 on port 8080, got {other:?}"),
+        }
+
+        // Arbitrary public IP -> Unknown V4 with host_text preserved.
+        match classify(&uri("http://1.2.3.4/x")).unwrap() {
+            Destination::Unknown {
+                family: AddrFamily::V4,
+                ip: Some(IpAddr::V4(v4)),
+                host_text: Some(_),
+                ..
+            } => assert_eq!(v4, Ipv4Addr::new(1, 2, 3, 4)),
+            other => panic!("expected Unknown V4, got {other:?}"),
+        }
+
+        // Hostname -> Unknown Name with host_text preserved (no DNS).
+        match classify(&uri("http://metadata.azure.internal/x")).unwrap() {
+            Destination::Unknown {
+                family: AddrFamily::Name,
+                ip: None,
+                host_text: Some(s),
+                ..
+            } => assert_eq!(s, "metadata.azure.internal"),
+            other => panic!("expected Unknown Name, got {other:?}"),
+        }
+
+        // Non-mapped IPv6 -> Unknown V6.
+        match classify(&uri("http://[::1]/x")).unwrap() {
+            Destination::Unknown {
+                family: AddrFamily::V6,
+                ip: Some(IpAddr::V6(v6)),
+                ..
+            } => assert_eq!(v6, Ipv6Addr::LOCALHOST),
+            other => panic!("expected Unknown V6, got {other:?}"),
+        }
+
+        // Origin-form (no authority) -> stub Unknown with no info.
+        let origin_form: Uri = "/metadata/identity".parse().unwrap();
+        match classify(&origin_form).unwrap() {
+            Destination::Unknown {
+                family: AddrFamily::Name,
+                ip: None,
+                port: 0,
+                host_text: None,
+            } => {}
+            other => panic!("expected stub Unknown for origin-form, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn classify_rejects_bad_inputs() {
+        // (url, expected error). Userinfo smuggling and host text with
+        // characters outside the hostname/IP alphabet.
+        let cases: &[(&str, CanonError)] = &[
+            ("http://user@169.254.169.254/x", CanonError::UserinfoPresent),
+            ("http://foo_bar/x", CanonError::BadHost),
+        ];
+        for (url, expected) in cases {
+            assert_eq!(classify(&uri(url)).unwrap_err(), *expected, "url={url}");
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // Display
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn display_strings_are_stable() {
+        // These strings appear in audit logs; pin them so a rename
+        // doesn't silently break downstream log consumers.
+        let cases: &[(Destination, &str)] = &[
+            (Destination::Imds, "imds"),
+            (Destination::WireServer, "wireserver"),
+            (Destination::HostGaPlugin, "hostga"),
+            (
+                Destination::Unknown {
+                    family: AddrFamily::Name,
+                    ip: None,
+                    port: 0,
+                    host_text: None,
+                },
+                "unknown",
+            ),
+        ];
+        for (dest, expected) in cases {
+            assert_eq!(&format!("{dest}"), expected);
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // Appendix A.2 — host golden vectors + end-to-end classification
+    //
+    // Spec-conformance tests for destination classification. Vector
+    // labels (`A2.xxx`) appear in every failure message so a regression
+    // points straight back to the row in `Innovation-2.1-canonical-request.md`.
+    //
+    // These call the full canonicalize_str() entrypoint rather than
+    // classify() directly so the assertion target matches what a caller
+    // would see — the assertion target is still the destination output.
+    // -----------------------------------------------------------------
+
+    fn dest_via_pipeline(url: &str) -> Destination {
+        super::super::canonicalize_str(url).unwrap().destination
+    }
+
+    #[test]
+    fn appendix_a2_host_vectors_classify_as_imds() {
+        // Every numeric / packed / IPv4-mapped-IPv6 form of
+        // 169.254.169.254 must classify to Destination::Imds — this is
+        // the SSRF-defeating contract that justifies the whole module's
+        // existence.
+        let cases: &[(&str, &str)] = &[
+            ("A2.dotted_quad", "http://169.254.169.254/x"),
+            ("A2.decimal_32bit", "http://2852039166/x"),
+            ("A2.hex_packed", "http://0xa9fea9fe/x"),
+            ("A2.octal_quad", "http://0251.0376.0251.0376/x"),
+            ("A2.ipv4_mapped_dotted", "http://[::ffff:169.254.169.254]/x"),
+            ("A2.ipv4_mapped_hex", "http://[::ffff:a9fe:a9fe]/x"),
+            // Explicit :80 — must still classify as IMDS (default port).
+            ("A2.explicit_default_port", "http://169.254.169.254:80/x"),
+        ];
+        for (label, url) in cases {
+            assert_eq!(dest_via_pipeline(url), Destination::Imds, "vector={label}");
+        }
+    }
+
+    #[test]
+    fn hostnames_classify_as_unknown_for_dns_rebinding_defense() {
+        // Hostnames are NEVER trusted — even one that resolves to IMDS
+        // at runtime must canonicalize to Unknown (with host_text
+        // preserved for audit). This is the OWASP DNS-rebinding
+        // defense.
+        match dest_via_pipeline("http://metadata.azure.internal/x") {
+            Destination::Unknown {
+                host_text: Some(s), ..
+            } => assert!(
+                s.contains("metadata.azure.internal"),
+                "host_text must preserve original hostname for audit, got {s:?}"
+            ),
+            d => panic!("expected Unknown with host_text, got {d:?}"),
+        }
+    }
+
+    #[test]
+    fn destination_classified_by_host_and_port() {
+        // WireServer and HostGAPlugin share an IP but differ by port —
+        // pin both the default-port and the :32526 branch.
+        let cases: &[(&str, &str, Destination)] = &[
+            (
+                "wireserver default port",
+                "http://168.63.129.16/x",
+                Destination::WireServer,
+            ),
+            (
+                "wireserver explicit :80",
+                "http://168.63.129.16:80/x",
+                Destination::WireServer,
+            ),
+            (
+                "hostgaplugin on :32526",
+                "http://168.63.129.16:32526/x",
+                Destination::HostGaPlugin,
+            ),
+        ];
+        for (label, url, expected) in cases {
+            assert_eq!(dest_via_pipeline(url), *expected, "{label}");
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // C7 extended host vectors (M2: golden vectors for the pentest C7
+    // "host-form differentials" family — design doc §3.2). The
+    // Appendix A.2 table above covers IMDS-side variations; this
+    // expands to:
+    //
+    //   - WireServer (168.63.129.16) in every numeric form.
+    //   - HostGAPlugin (same IP, port 32526) in every numeric form.
+    //   - Adversarial host shapes from the threat-model section: userinfo
+    //     smuggling, port smuggling, hostname-uppercase / trailing-dot,
+    //     out-of-range port, malformed IP literal.
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn c7_extended_host_vectors_classify_correctly() {
+        // (label, url, expected) — all numeric forms of 168.63.129.16
+        // (= 0xA83F8110 = 2_822_734_096 decimal) must classify to
+        // WireServer when on port 80 and HostGaPlugin when on :32526.
+        let cases: &[(&str, &str, Destination)] = &[
+            // WireServer ↔ numeric forms.
+            (
+                "C7.wireserver_decimal_32bit",
+                "http://2822734096/x",
+                Destination::WireServer,
+            ),
+            (
+                "C7.wireserver_hex_packed",
+                "http://0xa83f8110/x",
+                Destination::WireServer,
+            ),
+            (
+                "C7.wireserver_ipv4_mapped_dotted",
+                "http://[::ffff:168.63.129.16]/x",
+                Destination::WireServer,
+            ),
+            (
+                "C7.wireserver_ipv4_mapped_hex",
+                "http://[::ffff:a83f:8110]/x",
+                Destination::WireServer,
+            ),
+            // HostGAPlugin ↔ numeric forms (still on :32526).
+            (
+                "C7.hostga_decimal_with_port",
+                "http://2822734096:32526/x",
+                Destination::HostGaPlugin,
+            ),
+            (
+                "C7.hostga_hex_packed_with_port",
+                "http://0xa83f8110:32526/x",
+                Destination::HostGaPlugin,
+            ),
+        ];
+        for (label, url, expected) in cases {
+            assert_eq!(dest_via_pipeline(url), *expected, "vector={label}");
+        }
+    }
+
+    #[test]
+    fn c7_hostname_shape_variations_classify_as_unknown() {
+        // Every hostname shape stays Unknown — the matcher never
+        // trusts DNS. host_text is preserved verbatim (modulo any
+        // lowercasing hyper does at parse time) for audit.
+        let cases: &[(&str, &str)] = &[
+            // Plain hostname.
+            ("C7.hostname_plain", "http://metadata.azure.internal/x"),
+            // Uppercased hostname.
+            ("C7.hostname_uppercase", "http://METADATA.AZURE.INTERNAL/x"),
+            // Trailing-dot hostname (FQDN). Hyper may or may not
+            // strip the dot, but classification is still Unknown.
+            (
+                "C7.hostname_trailing_dot",
+                "http://metadata.azure.internal./x",
+            ),
+            // Arbitrary attacker-controlled hostname.
+            ("C7.hostname_attacker", "http://attacker.example.com/x"),
+        ];
+        for (label, url) in cases {
+            match dest_via_pipeline(url) {
+                Destination::Unknown { host_text, .. } => assert!(
+                    host_text.is_some(),
+                    "vector={label}: host_text must be preserved for audit"
+                ),
+                d => panic!("vector={label}: expected Unknown, got {d:?}"),
+            }
+        }
+    }
+
+    #[test]
+    fn c7_adversarial_host_shapes_rejected() {
+        use super::super::CanonError;
+
+        // (label, url, expected_one_of) — the canonicalizer's
+        // top-level entrypoint may surface either of two errors
+        // depending on whether hyper or our gate rejects the input
+        // first. Both are equally a deny.
+        let cases: &[(&str, &str, &[CanonError])] = &[
+            // Userinfo smuggle: `user@host`. UserinfoPresent is the
+            // preferred surfacing; BadHost is the fallback if hyper
+            // refuses to parse it as an absolute URI.
+            (
+                "C7.userinfo_bare_user",
+                "http://user@169.254.169.254/x",
+                &[CanonError::UserinfoPresent, CanonError::BadHost],
+            ),
+            (
+                "C7.userinfo_user_password",
+                "http://user:pass@169.254.169.254/x",
+                &[CanonError::UserinfoPresent, CanonError::BadHost],
+            ),
+            // Port-smuggle: `IP:port@evil` is the classic confusable
+            // (the `:80` looks like a port but the `@` makes the real
+            // host `evil`). Either UserinfoPresent or BadHost.
+            (
+                "C7.port_smuggle_via_userinfo",
+                "http://169.254.169.254:80@evil.com/x",
+                &[CanonError::UserinfoPresent, CanonError::BadHost],
+            ),
+            // Out-of-range port (>65535).
+            (
+                "C7.port_too_large",
+                "http://169.254.169.254:99999/x",
+                &[CanonError::BadPort, CanonError::BadHost],
+            ),
+            // Malformed dotted-quad (octet > 255).
+            (
+                "C7.ipv4_octet_overflow",
+                "http://999.999.999.999/x",
+                &[CanonError::BadHost],
+            ),
+        ];
+        for (label, url, expected_set) in cases {
+            match super::super::canonicalize_str(url) {
+                Ok(ok) => panic!("vector={label}: expected deny, got Ok({ok:?})"),
+                Err(err) => assert!(
+                    expected_set.contains(&err),
+                    "vector={label}: got {err:?}, expected one of {expected_set:?}"
+                ),
+            }
+        }
+    }
+}
diff --git a/proxy_agent/src/proxy/canonical/mod.rs b/proxy_agent/src/proxy/canonical/mod.rs
new file mode 100644
index 00000000..384c0cad
--- /dev/null
+++ b/proxy_agent/src/proxy/canonical/mod.rs
@@ -0,0 +1,637 @@
+// Copyright (c) Microsoft Corporation
+// SPDX-License-Identifier: MIT
+
+//! Canonical request model (Innovation 2.1).
+//!
+//! Provides a single, total, idempotent normalization step that reduces
+//! every incoming [`hyper::Uri`] (and, separately, every authorization
+//! rule pattern) to the same [`CanonicalRequest`] form before they meet
+//! the matcher. The goal is to eliminate the rule/request asymmetries
+//! that produce SSRF-style AuthZ bypasses (pentest categories D1, C7).
+//!
+//! ## Pipeline
+//!
+//! ```text
+//! hyper::Uri
+//!   │
+//!   ▼  parse_scheme_method   (http only; allow-list of methods)
+//!   ▼  classify_destination  (IP/host -> Destination; covers numeric forms)
+//!   ▼  validate_userinfo     (must be empty)
+//!   ▼  decode_path_once      (single percent-decode; strict UTF-8)
+//!   ▼  reject_control_chars  (no CR/LF/NUL/HTAB after decode)
+//!   ▼  ascii_lowercase_path  (case-insensitive matching)
+//!   ▼  split_segments        (split '/'; collapse '.'; resolve '..')
+//!   ▼  strip_matrix_params   (drop `;k=v` suffix on each segment)
+//!   ▼  decode_query_once     (k/v percent-decode once; lowercase keys)
+//!   ▼  reject_embedded_query (decoded path must not contain literal '?')
+//!   ▼  fold_into_btreemap    (group by key)
+//! CanonicalRequest
+//! ```
+//!
+//! ## Fail-closed
+//!
+//! Every error variant in [`CanonError`] denies the request. There is no
+//! "best effort" branch.
+//!
+//! ## Idempotency
+//!
+//! `canonicalize(canonicalize(x).render()) == canonicalize(x)`. This is
+//! enforced via property tests in `tests::proptests`.
+//!
+//! ## Fuzzing (M2)
+//!
+//! The M2 exit criterion is "zero panics in 1 CPU-day of fuzzing." We
+//! currently meet it with the [`proptests`] module — `no_panics` runs
+//! `canonicalize_str` against random printable-ASCII strings and
+//! `idempotent` exercises the parse-canonicalize-render-reparse loop.
+//! Crank case counts via the standard env var:
+//!
+//! ```text
+//! PROPTEST_CASES=1000000 cargo test -p azure-proxy-agent --release \
+//!     proxy::canonical::proptests
+//! ```
+//!
+//! A dedicated `cargo-fuzz` (libFuzzer) target would give better corpus
+//! minimization and coverage feedback. It is **deferred** because
+//! `proxy_agent` is a binary crate (`src/main.rs`, no `lib.rs`) and
+//! `cargo-fuzz` requires importing a library. The non-invasive
+//! follow-up:
+//!
+//! 1. Add a `lib.rs` re-exporting `pub mod proxy;` (and whatever else
+//!    the fuzz targets need). The existing binary should stay
+//!    `bin/azure-proxy-agent.rs` to avoid disturbing release artifact
+//!    paths.
+//! 2. `cargo fuzz init` in the crate, then add targets
+//!    `fuzz_targets/canonicalize.rs` and `fuzz_targets/matches.rs`
+//!    calling `azure_proxy_agent::proxy::canonical::canonicalize_str`
+//!    and `CanonicalPattern::matches` respectively.
+//! 3. Wire `cargo fuzz run canonicalize -- -max_total_time=86400` into
+//!    the nightly CI matrix.
+
+pub mod destination;
+pub mod path;
+pub mod query;
+pub mod rule;
+
+// Innovation 2.1 M2 property tests live behind the `proptests` feature
+// so the default `cargo test` inner loop stays fast. CI picks them up
+// through `cargo test --all-features` (see build-linux.sh).
+#[cfg(all(test, feature = "proptests"))]
+mod property_tests;
+
+use std::collections::BTreeMap;
+use std::fmt;
+
+use hyper::{Method, Uri};
+use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
+
+pub use destination::{AddrFamily, Destination};
+pub use rule::CanonicalPattern;
+
+/// Bytes that must be percent-encoded inside a path segment so that
+/// re-parsing the rendered output yields the same canonical form.
+///
+/// - `%` would otherwise be interpreted as the start of an escape on
+///   the second decode pass.
+/// - `#` would be stripped by hyper as a fragment delimiter.
+/// - `?` would be flagged by the canonicalizer as `EmbeddedQuery`.
+/// - `;` would be re-stripped as a matrix-param sentinel.
+/// - space and the C0 controls are invalid in a URI per RFC 3986.
+///
+/// `/` is intentionally NOT in this set because [`path::split_and_resolve`]
+/// already guarantees no literal `/` survives inside a segment.
+const PATH_SEG_ENCODE: &AsciiSet = &CONTROLS.add(b' ').add(b'%').add(b'#').add(b'?').add(b';');
+
+/// Bytes that must be percent-encoded inside a query key or value.
+///
+/// In addition to the path-segment hazards, query strings treat `&` and
+/// `=` as delimiters, and [`query::decode_query_component`] turns `+`
+/// into a literal space — so a raw `+` in the rendered output would
+/// round-trip to a space and lose data. All three must be encoded.
+const QUERY_ENCODE: &AsciiSet = &CONTROLS
+    .add(b' ')
+    .add(b'%')
+    .add(b'#')
+    .add(b'&')
+    .add(b'=')
+    .add(b'+');
+
+/// Fully-normalized form of an HTTP request as it is fed to the matcher.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub struct CanonicalRequest {
+    /// HTTP method, restricted to the allow-list.
+    pub method: Method,
+
+    /// Classified destination. Matching uses the typed enum only; the raw
+    /// host text is never compared.
+    pub destination: Destination,
+
+    /// Canonical path segments: percent-decoded once, ASCII-lowercased,
+    /// `.` collapsed, `..` resolved, matrix params stripped.
+    ///
+    /// Always begins with the empty root segment (so `/metadata/identity`
+    /// becomes `["", "metadata", "identity"]`).
+    pub path_segments: Vec<String>,
+
+    /// Whether the original path ended in `/`. Preserved as a single bit
+    /// so rules can opt to be slash-sensitive without re-introducing
+    /// string-level asymmetry.
+    pub trailing_slash: bool,
+
+    /// Query parameters, canonical form: keys lowercased & decoded once,
+    /// values decoded once, grouped by key. Insertion order within a key
+    /// is preserved; key order is lexicographic.
+    pub query: BTreeMap<String, Vec<String>>,
+}
+
+impl CanonicalRequest {
+    /// Stable textual rendering. Re-parsing this string and canonicalizing
+    /// it must yield the same `CanonicalRequest` (idempotency invariant).
+    ///
+    /// Path segments and query components are percent-encoded on the way
+    /// out (using [`PATH_SEG_ENCODE`] / [`QUERY_ENCODE`]) so that bytes
+    /// which the pipeline decoded once — `%`, `&`, `=`, `+`, ` `, `#` and
+    /// friends — survive a parse-decode-canonicalize round trip without
+    /// being re-interpreted as delimiters.
+    pub fn render(&self) -> String {
+        let mut out = String::new();
+        if self.path_segments.is_empty() {
+            out.push('/');
+        } else {
+            for (i, seg) in self.path_segments.iter().enumerate() {
+                if i == 0 && seg.is_empty() {
+                    // root marker
+                    out.push('/');
+                    continue;
+                }
+                if i > 0 {
+                    out.push('/');
+                }
+                out.extend(utf8_percent_encode(seg, PATH_SEG_ENCODE));
+            }
+        }
+        if self.trailing_slash && !out.ends_with('/') {
+            out.push('/');
+        }
+        if !self.query.is_empty() {
+            out.push('?');
+            let mut first = true;
+            for (k, values) in self.query.iter() {
+                for v in values {
+                    if !first {
+                        out.push('&');
+                    }
+                    first = false;
+                    out.extend(utf8_percent_encode(k, QUERY_ENCODE));
+                    out.push('=');
+                    out.extend(utf8_percent_encode(v, QUERY_ENCODE));
+                }
+            }
+        }
+        out
+    }
+}
+
+impl fmt::Display for CanonicalRequest {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{} {} {}", self.method, self.destination, self.render())
+    }
+}
+
+/// Typed errors produced by the canonicalizer. All variants are
+/// **fail-closed**: callers must deny the request when any of these is
+/// returned.
+#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
+pub enum CanonError {
+    #[error("scheme not http")]
+    SchemeNotHttp,
+    #[error("method not allowed")]
+    MethodNotAllowed,
+    #[error("userinfo present in URL")]
+    UserinfoPresent,
+    #[error("malformed percent-encoding")]
+    MalformedPercent,
+    #[error("overlong UTF-8 in path/query")]
+    OverlongUtf8,
+    #[error("invalid UTF-8 in path/query")]
+    InvalidUtf8,
+    /// Decoded **path** bytes were well-formed UTF-8 but contained at
+    /// least one non-ASCII codepoint. Separated from
+    /// [`CanonError::InvalidUtf8`] so audit logs distinguish *encoding
+    /// corruption* (random bytes, wrong codec) from *Unicode-confusable
+    /// attacks* (e.g. U+0131 dotless-i looks like ASCII `i`, fullwidth
+    /// solidus U+FF0F looks like ASCII `/`, Cyrillic homoglyphs) where the
+    /// attacker hand-crafts perfectly valid UTF-8 specifically to fool
+    /// ASCII-only string comparisons. The two classes have very different
+    /// triage paths, so they get different stable codes. Only the path
+    /// pipeline raises this — the query pipeline allows non-ASCII values
+    /// (ARM ids may contain Unicode).
+    #[error("non-ASCII codepoint in path")]
+    NonAscii,
+    #[error("control character in path/query")]
+    ControlChar,
+    #[error("path traversal past root")]
+    PathUnderflow,
+    #[error("embedded '?' after decoding")]
+    EmbeddedQuery,
+    #[error("unparseable host")]
+    BadHost,
+    #[error("unparseable port")]
+    BadPort,
+}
+
+impl CanonError {
+    /// Stable short code suitable for audit logs and pentest assertions.
+    pub fn code(&self) -> &'static str {
+        match self {
+            CanonError::SchemeNotHttp => "CANON_SCHEME",
+            CanonError::MethodNotAllowed => "CANON_METHOD",
+            CanonError::UserinfoPresent => "CANON_USERINFO",
+            CanonError::MalformedPercent => "CANON_PCT",
+            CanonError::OverlongUtf8 => "CANON_OVERLONG",
+            CanonError::InvalidUtf8 => "CANON_UTF8",
+            CanonError::NonAscii => "CANON_NON_ASCII",
+            CanonError::ControlChar => "CANON_CTRL",
+            CanonError::PathUnderflow => "CANON_UNDERFLOW",
+            CanonError::EmbeddedQuery => "CANON_EMBQ",
+            CanonError::BadHost => "CANON_HOST",
+            CanonError::BadPort => "CANON_PORT",
+        }
+    }
+}
+
+/// HTTP methods accepted by the canonicalizer. Anything not on this list
+/// is rejected with [`CanonError::MethodNotAllowed`].
+///
+/// Kept in sync with `proxy_server::ProxyServer::ALLOWED_METHODS`.
+const ALLOWED_METHODS: &[Method] = &[
+    Method::GET,
+    Method::POST,
+    Method::PUT,
+    Method::DELETE,
+    Method::HEAD,
+    Method::OPTIONS,
+    Method::PATCH,
+];
+
+fn check_method(method: &Method) -> Result<(), CanonError> {
+    if ALLOWED_METHODS.iter().any(|m| m == method) {
+        Ok(())
+    } else {
+        Err(CanonError::MethodNotAllowed)
+    }
+}
+
+fn check_scheme(uri: &Uri) -> Result<(), CanonError> {
+    match uri.scheme_str() {
+        // Hyper guarantees the connect-target form for absolute URIs has a
+        // scheme; the proxy receives origin-form requests where the scheme
+        // is omitted. Both cases are acceptable. A non-http scheme (https,
+        // ws, gopher, ...) is a hard reject.
+        None => Ok(()),
+        Some(s) if s.eq_ignore_ascii_case("http") => Ok(()),
+        Some(_) => Err(CanonError::SchemeNotHttp),
+    }
+}
+
+/// Reject any URL whose authority carries `userinfo` (the `user[:pass]@`
+/// prefix before the host).
+///
+/// Hyper exposes the full authority string via [`Uri::authority`]; the
+/// presence of a literal `@` is the unambiguous signal of userinfo. We
+/// refuse it entirely because it is the canonical host-smuggle vector
+/// (pentest C7): `http://169.254.169.254:80@evil.com/` *looks* like
+/// IMDS to a careless parser but resolves to `evil.com` in real HTTP
+/// clients. Symmetrically, `http://attacker@169.254.169.254/` lets the
+/// attacker decorate an otherwise-legitimate URL with audit-confusing
+/// junk.
+fn check_userinfo(uri: &Uri) -> Result<(), CanonError> {
+    if let Some(authority) = uri.authority() {
+        if authority.as_str().contains('@') {
+            return Err(CanonError::UserinfoPresent);
+        }
+    }
+    Ok(())
+}
+
+/// Canonicalize a parsed request.
+///
+/// Returns `Ok(CanonicalRequest)` for inputs that survive every stage of
+/// the pipeline, or a typed [`CanonError`] otherwise. The function is
+/// **total** — every well-formed `hyper::Uri` produces exactly one of
+/// these two outcomes; it never panics.
+pub fn canonicalize(uri: &Uri, method: &Method) -> Result<CanonicalRequest, CanonError> {
+    check_scheme(uri)?;
+    check_method(method)?;
+    check_userinfo(uri)?;
+
+    let destination = destination::classify(uri)?;
+
+    let (path_segments, trailing_slash) = path::canonicalize_path(uri.path())?;
+    let query = query::canonicalize_query(uri.query().unwrap_or(""))?;
+
+    Ok(CanonicalRequest {
+        method: method.clone(),
+        destination,
+        path_segments,
+        trailing_slash,
+        query,
+    })
+}
+
+/// Convenience: parse and canonicalize a string. Useful for tests and the
+/// shadow-mode shim that takes raw URLs from telemetry replay.
+#[allow(dead_code)]
+pub fn canonicalize_str(url: &str) -> Result<CanonicalRequest, CanonError> {
+    let uri: Uri = url.parse().map_err(|_| CanonError::BadHost)?;
+    canonicalize(&uri, &Method::GET)
+}
+
+/// Rollout flag controlling whether the canonical pipeline shadows or
+/// replaces the legacy authorizer.
+///
+/// See `doc/plans/Innovation-2.1-canonical-request.md` §9 (Shadow-Mode
+/// Rollout). Defaults to [`CanonicalMode::Off`] so production traffic
+/// keeps the pre-canonical behavior bit-for-bit.
+///
+/// - [`Off`](CanonicalMode::Off): legacy decides; canonical not invoked.
+/// - [`Shadow`](CanonicalMode::Shadow): legacy decides; canonical runs
+///   in parallel and divergences are logged as telemetry. **Behavior
+///   unchanged** — this is the M3 default for dev/test.
+/// - [`Enforce`](CanonicalMode::Enforce): canonical decides; legacy is
+///   still computed for divergence telemetry. Wired but only intended
+///   for use once shadow-mode reports zero divergences (M5/M6).
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
+pub enum CanonicalMode {
+    #[default]
+    Off,
+    Shadow,
+    Enforce,
+}
+
+impl std::fmt::Display for CanonicalMode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            CanonicalMode::Off => write!(f, "off"),
+            CanonicalMode::Shadow => write!(f, "shadow"),
+            CanonicalMode::Enforce => write!(f, "enforce"),
+        }
+    }
+}
+
+impl std::str::FromStr for CanonicalMode {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.trim().to_ascii_lowercase().as_str() {
+            "off" | "" => Ok(CanonicalMode::Off),
+            "shadow" => Ok(CanonicalMode::Shadow),
+            "enforce" => Ok(CanonicalMode::Enforce),
+            other => Err(format!("Invalid CanonicalMode: {other}")),
+        }
+    }
+}
+
+#[cfg(test)]
+mod mod_tests {
+    //! Cross-cutting tests for the canonical pipeline.
+    //!
+    //! Helper-specific golden vectors live next to their helpers
+    //! (`path::path_tests::appendix_a1_*`,
+    //! `destination::destination_tests::appendix_a2_*`). What lives
+    //! here is what cuts across every helper:
+    //!
+    //! - Scheme / method / userinfo gating (the top-level checks in
+    //!   [`canonicalize`]).
+    //! - Idempotency: `canonicalize(canonicalize(x).render()) ==
+    //!   canonicalize(x)` — verifies the renderer round-trips every
+    //!   helper's output.
+    //! - Total / no-panic on adversarial inputs.
+    //! - Stability of [`CanonError::code`] strings (audit-log contract).
+
+    use hyper::{Method, Uri};
+
+    use super::*;
+
+    #[test]
+    fn userinfo_rejected_at_pipeline_entry() {
+        // hyper may either parse-and-reject or refuse outright. Either
+        // is a deny, but UserinfoPresent is the preferred surfacing.
+        let err = canonicalize_str("http://user@169.254.169.254/x").unwrap_err();
+        assert!(
+            matches!(err, CanonError::UserinfoPresent | CanonError::BadHost),
+            "userinfo: got {err:?}"
+        );
+    }
+
+    #[test]
+    fn allowed_methods_are_accepted_others_rejected() {
+        let uri: Uri = "http://169.254.169.254/x".parse().unwrap();
+
+        // Positive: every method in ALLOWED_METHODS canonicalizes
+        // successfully. Locks the slice's contents against accidental
+        // shrinking — a removal would surface here as a method that
+        // used to work and now doesn't.
+        let allowed = &[
+            Method::GET,
+            Method::POST,
+            Method::PUT,
+            Method::DELETE,
+            Method::HEAD,
+            Method::OPTIONS,
+            Method::PATCH,
+        ];
+        for m in allowed {
+            assert!(
+                canonicalize(&uri, m).is_ok(),
+                "method {m:?} should be accepted"
+            );
+        }
+
+        // Negative: methods explicitly NOT on the list deny with
+        // MethodNotAllowed.
+        let denied = &[Method::CONNECT, Method::TRACE];
+        for m in denied {
+            assert_eq!(
+                canonicalize(&uri, m).unwrap_err(),
+                CanonError::MethodNotAllowed,
+                "method {m:?} should be rejected"
+            );
+        }
+    }
+
+    #[test]
+    fn non_http_schemes_rejected() {
+        // Anything not bare `http://` is a deny. Hyper may refuse to
+        // parse some of these as absolute URIs; either path is a deny,
+        // but the explicit canonicalize() rejection is what we want to
+        // pin for the schemes hyper does accept.
+        let cases: &[(&str, &str)] = &[
+            ("https", "https://169.254.169.254/x"),
+            ("ftp", "ftp://169.254.169.254/x"),
+            ("file", "file://169.254.169.254/x"),
+        ];
+        for (label, url) in cases {
+            match url.parse::<Uri>() {
+                Ok(uri) => assert_eq!(
+                    canonicalize(&uri, &Method::GET).unwrap_err(),
+                    CanonError::SchemeNotHttp,
+                    "scheme={label}"
+                ),
+                Err(_) => {
+                    // hyper refused to parse — equally a deny, no-op.
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn canonical_form_is_idempotent() {
+        // canonicalize(canonicalize(x).render()) == canonicalize(x).
+        //
+        // Per-vector idempotency catches a class of bugs where the
+        // renderer and the parser disagree on something subtle
+        // (encoding of `+`, matrix-param re-emergence, etc).
+        let cases: &[(&str, &str)] = &[
+            (
+                "typical imds token",
+                "http://169.254.169.254/Metadata/Identity/oauth2/token?api-version=2018-02-01&Resource=https%3A%2F%2Fmanagement.azure.com%2F",
+            ),
+            ("root only", "http://169.254.169.254/"),
+            (
+                "empty query",
+                "http://169.254.169.254/metadata/identity?",
+            ),
+            (
+                "multi-key query with case fold",
+                "http://169.254.169.254/m?Foo=1&BAR=2&baz=3",
+            ),
+            (
+                // Value contains decoded space (`%20`) and decoded `&`
+                // (`%26`). render() must re-encode both, otherwise the
+                // re-parse would either fail (space is invalid in a
+                // URI) or split the value on the spurious `&`.
+                "query value with reserved chars",
+                "http://169.254.169.254/m?k=a%20b%26c",
+            ),
+            (
+                // Value contains decoded `+` (`%2B`).
+                // decode_query_component turns raw `+` into a space, so
+                // render must re-encode `+` as `%2B` to round-trip.
+                "query value with literal plus",
+                "http://169.254.169.254/m?k=a%2Bb",
+            ),
+            (
+                // Path segment contains a literal `%` after one decode
+                // (`%252F` -> `%2f` literal). render() must emit
+                // `%252f` so the second decode lands on the same
+                // literal byte.
+                "path segment with literal percent",
+                "http://169.254.169.254/metadata%252Fidentity",
+            ),
+            (
+                "dot segments collapse first time",
+                "http://169.254.169.254/a/./b/../c",
+            ),
+        ];
+        for (label, url) in cases {
+            let c1 = canonicalize_str(url).expect(label);
+            // render() drops host; reattach so the second pass sees the
+            // same destination.
+            let rendered = format!("http://169.254.169.254{}", c1.render());
+            let c2 = canonicalize_str(&rendered).expect(label);
+            assert_eq!(c1, c2, "not idempotent: {label}");
+        }
+    }
+
+    #[test]
+    fn canonicalize_never_panics_on_adversarial_inputs() {
+        // Sanity smoke test — a proptest target lives in a follow-up
+        // PR. For every shape hyper consents to parse, the
+        // canonicalizer must return either Ok or a typed CanonError.
+        // Panics here are bugs.
+        let paths = &[
+            "/",
+            "//",
+            "/.",
+            "/..",
+            "/%00",
+            "/a/b/c?",
+            "/?",
+            "/a;",
+            "/a;b;c;",
+            "/%",
+            "/%%",
+            "/%%%",
+            "/%C0%AF", // overlong utf-8
+            "/very/long/path/that/repeats/very/long/path/that/repeats",
+            "/a/../../..",     // underflow
+            "/a/b/c/../../..", // exact-root underflow
+        ];
+        let methods = &[Method::GET, Method::POST, Method::CONNECT];
+        for raw in paths {
+            if let Ok(uri) = format!("http://169.254.169.254{raw}").parse::<Uri>() {
+                for m in methods {
+                    let _ = canonicalize(&uri, m);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn error_codes_are_stable() {
+        // Stability of these strings is a CONTRACT with the audit log
+        // and pentest scripts. Changing any one of these is a breaking
+        // change that must bump the canonical-request schema version.
+        let cases: &[(CanonError, &str)] = &[
+            (CanonError::SchemeNotHttp, "CANON_SCHEME"),
+            (CanonError::MethodNotAllowed, "CANON_METHOD"),
+            (CanonError::UserinfoPresent, "CANON_USERINFO"),
+            (CanonError::MalformedPercent, "CANON_PCT"),
+            (CanonError::OverlongUtf8, "CANON_OVERLONG"),
+            (CanonError::InvalidUtf8, "CANON_UTF8"),
+            (CanonError::NonAscii, "CANON_NON_ASCII"),
+            (CanonError::ControlChar, "CANON_CTRL"),
+            (CanonError::PathUnderflow, "CANON_UNDERFLOW"),
+            (CanonError::EmbeddedQuery, "CANON_EMBQ"),
+            (CanonError::BadHost, "CANON_HOST"),
+            (CanonError::BadPort, "CANON_PORT"),
+        ];
+        for (err, expected_code) in cases {
+            assert_eq!(err.code(), *expected_code, "variant={err:?}");
+        }
+    }
+
+    #[test]
+    fn canonical_mode_parses_and_defaults_off() {
+        use std::str::FromStr;
+
+        // Empty / missing config string defaults to Off so production
+        // traffic is unchanged when the config key is absent.
+        assert_eq!(CanonicalMode::default(), CanonicalMode::Off);
+        assert_eq!(CanonicalMode::from_str("").unwrap(), CanonicalMode::Off);
+
+        // Accepted spellings — case- and whitespace-insensitive so
+        // operators can write "Shadow" or " enforce " without surprises.
+        let cases: &[(&str, CanonicalMode)] = &[
+            ("off", CanonicalMode::Off),
+            ("Off", CanonicalMode::Off),
+            ("shadow", CanonicalMode::Shadow),
+            ("SHADOW", CanonicalMode::Shadow),
+            ("  shadow  ", CanonicalMode::Shadow),
+            ("enforce", CanonicalMode::Enforce),
+        ];
+        for (input, expected) in cases {
+            assert_eq!(
+                CanonicalMode::from_str(input).unwrap(),
+                *expected,
+                "input={input:?}"
+            );
+        }
+
+        // Unknown strings reject — better to fail loud at config load
+        // than silently fall through to Off and lie in telemetry.
+        assert!(CanonicalMode::from_str("audit").is_err());
+        assert!(CanonicalMode::from_str("on").is_err());
+    }
+}
diff --git a/proxy_agent/src/proxy/canonical/path.rs b/proxy_agent/src/proxy/canonical/path.rs
new file mode 100644
index 00000000..5b165e84
--- /dev/null
+++ b/proxy_agent/src/proxy/canonical/path.rs
@@ -0,0 +1,867 @@
+// Copyright (c) Microsoft Corporation
+// SPDX-License-Identifier: MIT
+
+//! Path canonicalization.
+//!
+//! Steps (each is a small pure function so they can be unit-tested
+//! independently):
+//!
+//! 1. Single percent-decode of the raw path; reject malformed `%XX`.
+//! 2. Reject overlong UTF-8 encodings of ASCII (`%C0%AF` etc.).
+//! 3. Reject control characters (`\r`, `\n`, `\0`, `\t`).
+//! 4. Reject non-ASCII bytes (paths to IMDS / WireServer are ASCII;
+//!    accepting non-ASCII would require NFC normalization that adds a
+//!    dependency we do not currently take).
+//! 5. ASCII-lowercase.
+//! 6. Split on `/`, drop empty segments, drop `.`, resolve `..`
+//!    (RFC 3986 §5.2.4). Underflow past the root is an error, not a
+//!    no-op — a real client would never produce it.
+//! 7. Strip matrix params (`;jsessionid=...`) from each segment.
+//! 8. Reject an embedded `?` in the decoded path (caused by `%3F`
+//!    smuggling) — the matcher must never see ambiguous input.
+//!
+//! The output is `(segments, trailing_slash)`. `segments` always begins
+//! with the empty root segment, so the canonical form of `/` is
+//! `vec![""]` and the canonical form of `/metadata/identity` is
+//! `vec!["", "metadata", "identity"]`.
+
+use super::CanonError;
+
+const ROOT: &str = "";
+
+/// Run the path pipeline. Public for unit tests; the canonicalizer
+/// entrypoint is [`super::canonicalize`].
+pub fn canonicalize_path(raw: &str) -> Result<(Vec<String>, bool), CanonError> {
+    // hyper guarantees the path starts with '/'.
+    let raw = if raw.is_empty() { "/" } else { raw };
+    let trailing_slash = raw.len() > 1 && raw.ends_with('/');
+
+    let decoded = decode_path_once(raw)?;
+    reject_overlong_utf8(decoded.as_bytes())?;
+    reject_control_chars(&decoded)?;
+    reject_non_ascii(&decoded)?;
+    if decoded.contains('?') {
+        return Err(CanonError::EmbeddedQuery);
+    }
+
+    let lowered = decoded.to_ascii_lowercase();
+    let segments = split_and_resolve(&lowered)?;
+
+    // A trailing `/` only carries meaning when there's a non-root
+    // segment in front of it. Without this clamp, inputs that collapse
+    // to root after dot/matrix resolution (e.g. `/;/`, `/./`, `//`)
+    // would carry `trailing_slash = true` even though `render()`
+    // produces just `/` — which re-parses with `trailing_slash =
+    // false`, breaking the canonicalize-render-canonicalize idempotency
+    // invariant the rest of the pipeline depends on.
+    let trailing_slash = trailing_slash && segments.len() > 1;
+
+    Ok((segments, trailing_slash))
+}
+
+/// Single-pass percent-decode. Rejects truncated (`%2`) and non-hex
+/// (`%ZZ`) sequences as `MalformedPercent`. Never decodes twice — that
+/// is exactly the asymmetry the canonical model is built to remove.
+fn decode_path_once(raw: &str) -> Result<String, CanonError> {
+    let bytes = raw.as_bytes();
+    let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
+    let mut i = 0;
+    while i < bytes.len() {
+        let b = bytes[i];
+        if b == b'%' {
+            if i + 2 >= bytes.len() {
+                return Err(CanonError::MalformedPercent);
+            }
+            let h = hex_value(bytes[i + 1])?;
+            let l = hex_value(bytes[i + 2])?;
+            out.push((h << 4) | l);
+            i += 3;
+        } else {
+            out.push(b);
+            i += 1;
+        }
+    }
+    // Strict UTF-8: lossy decoding is what allowed the silent-replacement
+    // bypass in the legacy matcher.
+    String::from_utf8(out).map_err(|_| CanonError::InvalidUtf8)
+}
+
+fn hex_value(b: u8) -> Result<u8, CanonError> {
+    match b {
+        b'0'..=b'9' => Ok(b - b'0'),
+        b'a'..=b'f' => Ok(10 + b - b'a'),
+        b'A'..=b'F' => Ok(10 + b - b'A'),
+        _ => Err(CanonError::MalformedPercent),
+    }
+}
+
+/// Detect classic overlong UTF-8 encodings (e.g. `%C0%AF` for `/`).
+///
+/// `String::from_utf8` already rejects overlong sequences as invalid, so
+/// by the time we run this the bytes are *guaranteed* well-formed
+/// UTF-8 — meaning the overlong forms below would already have produced
+/// `InvalidUtf8`. We run this pass *before* UTF-8 validation in case the
+/// caller ever switches to a lossy decoder; today it is a defense in
+/// depth that also gives us a more specific telemetry code.
+fn reject_overlong_utf8(bytes: &[u8]) -> Result<(), CanonError> {
+    let mut i = 0;
+    while i < bytes.len() {
+        let b = bytes[i];
+        // 2-byte overlong: lead byte 0xC0 or 0xC1 (would encode <0x80).
+        if b == 0xC0 || b == 0xC1 {
+            return Err(CanonError::OverlongUtf8);
+        }
+        // 3-byte overlong: 0xE0 0x80..0x9F (would encode <0x800).
+        if b == 0xE0 && i + 1 < bytes.len() && (0x80..=0x9F).contains(&bytes[i + 1]) {
+            return Err(CanonError::OverlongUtf8);
+        }
+        // 4-byte overlong: 0xF0 0x80..0x8F (would encode <0x10000).
+        if b == 0xF0 && i + 1 < bytes.len() && (0x80..=0x8F).contains(&bytes[i + 1]) {
+            return Err(CanonError::OverlongUtf8);
+        }
+        i += 1;
+    }
+    Ok(())
+}
+
+fn reject_control_chars(s: &str) -> Result<(), CanonError> {
+    for b in s.bytes() {
+        // CR, LF, NUL, HTAB, plus the rest of the C0 control block and
+        // DEL. Anything below 0x20 or equal to 0x7F is rejected.
+        if b < 0x20 || b == 0x7F {
+            return Err(CanonError::ControlChar);
+        }
+    }
+    Ok(())
+}
+
+fn reject_non_ascii(s: &str) -> Result<(), CanonError> {
+    // Well-formed-UTF-8 non-ASCII gets its own dedicated error class so
+    // it shows up in audit logs as `canon=error:CANON_NON_ASCII`,
+    // distinguishable from genuine encoding corruption (`CANON_UTF8`,
+    // `CANON_OVERLONG`). The Unicode-confusable attack class
+    // (U+0131 dotless-i, fullwidth solidus, Cyrillic homoglyphs)
+    // surfaces exclusively here, making the family greppable in audit
+    // logs without false positives from random-byte fuzz.
+    if s.is_ascii() {
+        Ok(())
+    } else {
+        Err(CanonError::NonAscii)
+    }
+}
+
+/// Split on `/`, strip matrix params, drop empty/`.` segments, resolve
+/// `..` with underflow detection.
+fn split_and_resolve(path: &str) -> Result<Vec<String>, CanonError> {
+    let mut segments: Vec<String> = vec![ROOT.to_string()];
+    // RFC 3986 defines a path as a sequence of segments separated by '/'.
+    for raw_seg in path.split('/') {
+        // ';' is a sub-delim — it's a perfectly legal character inside a path segment.
+        // Strip matrix params *before* dot-resolution so a segment like
+        // `.;jsessionid=1` (which decodes to the current-directory
+        // marker `.` after stripping) is dropped rather than preserved.
+        // The reverse order let `/.;` canonicalize to `["", "."]` while
+        // its rendered form `//.` re-parsed to `[""]`, breaking the
+        // idempotency invariant exercised by the M2 proptest.
+        // Matrix params are never used in authorization decisions:
+        // e.g. `segment;k=v;k2=v2` -> `segment`.
+        let cleaned = match raw_seg.find(';') {
+            Some(pos) => &raw_seg[..pos],
+            None => raw_seg,
+        };
+        // A segment that's empty (from `//`), a current-directory marker
+        // (`.`), or pure matrix params (`;k=v`, which strips to "")
+        // collapses away — same treatment, same reason: keeping any of
+        // them in the canonical form would re-introduce the kind of
+        // request/rule asymmetry the canonical model exists to remove.
+        if cleaned.is_empty() || cleaned == "." {
+            continue;
+        }
+        if cleaned == ".." {
+            // Pop the previous segment. Popping the root is an error.
+            if segments.len() <= 1 {
+                return Err(CanonError::PathUnderflow);
+            }
+            segments.pop();
+            continue;
+        }
+        segments.push(cleaned.to_string());
+    }
+    Ok(segments)
+}
+
+#[cfg(test)]
+mod path_tests {
+    use super::*;
+
+    fn segs(parts: &[&str]) -> Vec<String> {
+        parts.iter().map(|s| (*s).to_string()).collect()
+    }
+
+    // -----------------------------------------------------------------
+    // canonicalize_path — end-to-end happy paths
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn canonicalize_path_accepts_valid_inputs() {
+        // (input, expected_segments, expected_trailing_slash). Covers:
+        // root, empty-string fallback, simple path, ASCII case folding,
+        // double-slash collapse, `.` removal, `..` resolution
+        // (including chained), single percent-decode, double-encoding
+        // leaving a literal `%` after one decode, mixed-case percent
+        // (`%2f` and `%2F`), matrix-param stripping on one and many
+        // segments, and a segment that becomes empty after matrix
+        // stripping.
+        let cases: &[(&str, &[&str], bool)] = &[
+            ("/", &[""], false),
+            ("", &[""], false),
+            ("/metadata/identity", &["", "metadata", "identity"], false),
+            ("/Metadata/Identity", &["", "metadata", "identity"], false),
+            ("/metadata//identity", &["", "metadata", "identity"], false),
+            ("/metadata/./identity", &["", "metadata", "identity"], false),
+            (
+                "/metadata/x/../identity",
+                &["", "metadata", "identity"],
+                false,
+            ),
+            ("/a/b/c/../../identity", &["", "a", "identity"], false),
+            ("/metadata%2Fidentity", &["", "metadata", "identity"], false),
+            ("/metadata%2fidentity", &["", "metadata", "identity"], false),
+            (
+                "/metadata%252Fidentity",
+                &["", "metadata%2fidentity"],
+                false,
+            ),
+            (
+                "/metadata/identity;jsessionid=abc",
+                &["", "metadata", "identity"],
+                false,
+            ),
+            (
+                "/metadata;a=1;b=2/identity;c=3",
+                &["", "metadata", "identity"],
+                false,
+            ),
+            ("/a/;jsessionid=x/b", &["", "a", "b"], false),
+            ("/metadata/", &["", "metadata"], true),
+        ];
+        for (input, expected_segs, expected_ts) in cases {
+            let got = canonicalize_path(input)
+                .unwrap_or_else(|e| panic!("expected Ok for {input:?}, got {e:?}"));
+            assert_eq!(got, (segs(expected_segs), *expected_ts), "input={input:?}");
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // canonicalize_path — end-to-end errors
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn canonicalize_path_rejects_invalid_inputs() {
+        // Each row is (input, expected_error). Exercises every typed
+        // error the path pipeline can produce except OverlongUtf8 /
+        // InvalidUtf8 for overlong sequences, which have their own test
+        // because either variant is acceptable.
+        let cases: &[(&str, CanonError)] = &[
+            // Malformed percent: dangling `%`, truncated, non-hex digit
+            ("/abc%", CanonError::MalformedPercent),
+            ("/abc%2", CanonError::MalformedPercent),
+            ("/abc%ZZ", CanonError::MalformedPercent),
+            ("/abc%2G", CanonError::MalformedPercent),
+            // Control characters: NUL, HTAB, LF, CR, DEL
+            ("/x%00", CanonError::ControlChar),
+            ("/x%09", CanonError::ControlChar),
+            ("/x%0A", CanonError::ControlChar),
+            ("/x%0D", CanonError::ControlChar),
+            ("/x%7F", CanonError::ControlChar),
+            // Non-ASCII after decode: U+4E2D `中` in UTF-8 — well-formed,
+            // so it surfaces as NonAscii (Unicode-confusable / homoglyph
+            // attack class), not InvalidUtf8 (encoding corruption).
+            ("/x%E4%B8%AD", CanonError::NonAscii),
+            // Embedded `?` from %3F smuggling
+            (
+                "/metadata/identity%3Fapi-version=2018",
+                CanonError::EmbeddedQuery,
+            ),
+            // Path traversal past root: chained, immediate, and a lone `..`
+            ("/a/../..", CanonError::PathUnderflow),
+            ("/..", CanonError::PathUnderflow),
+            ("/a/b/../../../c", CanonError::PathUnderflow),
+        ];
+        for (input, expected) in cases {
+            assert_eq!(
+                canonicalize_path(input).unwrap_err(),
+                *expected,
+                "input={input:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn canonicalize_path_rejects_overlong_utf8() {
+        // %C0%AF is the classic 2-byte overlong for `/` (IDS bypass).
+        // %E0%80%AF is the 3-byte overlong for `/`.
+        // %F0%80%80%AF is the 4-byte overlong for `/`.
+        //
+        // Each MUST be rejected. The exact variant depends on whether
+        // reject_overlong_utf8 catches it first or String::from_utf8 does
+        // (both happen to fire on these inputs); either way the request
+        // is denied, so we accept either error code.
+        for input in ["/x%C0%AFy", "/x%E0%80%AFy", "/x%F0%80%80%AFy"] {
+            let err = canonicalize_path(input).unwrap_err();
+            assert!(
+                matches!(err, CanonError::OverlongUtf8 | CanonError::InvalidUtf8),
+                "input={input:?} got={err:?}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // decode_path_once
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn decode_path_once_handles_hex_correctly() {
+        // Happy cases: identity (no `%`), upper, lower, and mixed hex.
+        // The decoder must accept all of them — case asymmetry was one
+        // of the legacy bypass vectors.
+        let ok: &[(&str, &str)] = &[
+            ("/plain/path", "/plain/path"),
+            ("/a%2Fb", "/a/b"),
+            ("/a%2fb", "/a/b"),
+            ("/a%2fb%2F%2f", "/a/b//"),
+            ("/space%20here", "/space here"),
+        ];
+        for (input, expected) in ok {
+            assert_eq!(
+                decode_path_once(input).unwrap(),
+                *expected,
+                "input={input:?}"
+            );
+        }
+
+        // Malformed: `%` at very end, `%X` (only one nibble), invalid
+        // hex digits, surrounded by garbage.
+        let bad: &[&str] = &["%", "abc%", "%2", "abc%2", "%ZZ", "abc%2G", "%9Q"];
+        for input in bad {
+            assert_eq!(
+                decode_path_once(input).unwrap_err(),
+                CanonError::MalformedPercent,
+                "input={input:?}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // hex_value
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn hex_value_accepts_digits_and_rejects_others() {
+        // Every valid hex digit, both letter cases.
+        let valid: &[(u8, u8)] = &[
+            (b'0', 0),
+            (b'1', 1),
+            (b'2', 2),
+            (b'3', 3),
+            (b'4', 4),
+            (b'5', 5),
+            (b'6', 6),
+            (b'7', 7),
+            (b'8', 8),
+            (b'9', 9),
+            (b'a', 10),
+            (b'b', 11),
+            (b'c', 12),
+            (b'd', 13),
+            (b'e', 14),
+            (b'f', 15),
+            (b'A', 10),
+            (b'B', 11),
+            (b'C', 12),
+            (b'D', 13),
+            (b'E', 14),
+            (b'F', 15),
+        ];
+        for (byte, expected) in valid {
+            assert_eq!(
+                hex_value(*byte).unwrap(),
+                *expected,
+                "byte={}",
+                *byte as char
+            );
+        }
+
+        // A handful of representative non-hex bytes: G/g (just past f),
+        // punctuation, whitespace, high bytes.
+        for byte in [b'g', b'G', b'/', b' ', b'\0', 0xFFu8] {
+            assert_eq!(
+                hex_value(byte).unwrap_err(),
+                CanonError::MalformedPercent,
+                "byte=0x{byte:02X}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // reject_overlong_utf8 — direct byte-level tests
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn reject_overlong_utf8_catches_all_overlong_forms() {
+        // Each "bad" buffer carries an overlong sequence of arity 2, 3,
+        // or 4. Each "ok" buffer is a well-formed UTF-8 sequence of the
+        // same arity that must NOT be flagged. This pins both the
+        // detection AND the absence of false positives.
+        let bad: &[&[u8]] = &[
+            // 2-byte overlong: leading 0xC0 / 0xC1 always overlong.
+            &[0xC0, 0xAF],
+            &[b'/', 0xC1, 0xAF, b'/'],
+            // 3-byte overlong: 0xE0 followed by 0x80..=0x9F.
+            &[0xE0, 0x80, 0xAF],
+            &[0xE0, 0x9F, 0xBF],
+            // 4-byte overlong: 0xF0 followed by 0x80..=0x8F.
+            &[0xF0, 0x80, 0x80, 0xAF],
+            &[0xF0, 0x8F, 0xBF, 0xBF],
+        ];
+        for buf in bad {
+            assert_eq!(
+                reject_overlong_utf8(buf).unwrap_err(),
+                CanonError::OverlongUtf8,
+                "buf={buf:?}"
+            );
+        }
+
+        let ok: &[&[u8]] = &[
+            b"plain ascii",
+            // Well-formed 2-byte sequence (U+00A9 ©): 0xC2 0xA9
+            &[0xC2, 0xA9],
+            // Well-formed 3-byte sequence (U+4E2D 中): 0xE4 0xB8 0xAD
+            &[0xE4, 0xB8, 0xAD],
+            // Well-formed 4-byte sequence (U+1F600): 0xF0 0x9F 0x98 0x80
+            &[0xF0, 0x9F, 0x98, 0x80],
+            b"",
+        ];
+        for buf in ok {
+            assert!(reject_overlong_utf8(buf).is_ok(), "buf={buf:?}");
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // reject_control_chars — direct
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn reject_control_chars_blocks_c0_block_and_del() {
+        // Every byte in the C0 control block (0x00..=0x1F) plus DEL
+        // (0x7F) must be rejected. We check the full block, not just
+        // a few representative bytes, because each byte is a separate
+        // CRLF/NUL/HTAB injection vector.
+        for b in 0u8..=0x1F {
+            let s = std::str::from_utf8(&[b]).unwrap().to_string();
+            assert_eq!(
+                reject_control_chars(&s).unwrap_err(),
+                CanonError::ControlChar,
+                "byte=0x{b:02X}"
+            );
+        }
+        let del = String::from_utf8(vec![0x7F]).unwrap();
+        assert_eq!(
+            reject_control_chars(&del).unwrap_err(),
+            CanonError::ControlChar
+        );
+
+        // Printable ASCII (0x20..=0x7E) and the empty string must pass.
+        assert!(reject_control_chars("").is_ok());
+        assert!(reject_control_chars(" !#0AZaz~").is_ok());
+        assert!(reject_control_chars("/metadata/identity?x=1&y=2").is_ok());
+    }
+
+    // -----------------------------------------------------------------
+    // reject_non_ascii — direct
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn reject_non_ascii_blocks_high_bytes() {
+        // Anything within the ASCII range (incl. the C0 block — that's
+        // a different helper's job) must pass.
+        for s in ["", "/", "/abc", "/metadata/identity?api-version=2018"] {
+            assert!(reject_non_ascii(s).is_ok(), "input={s:?}");
+        }
+        // Any non-ASCII character (1, 2, 3, or 4 UTF-8 bytes wide) must
+        // be rejected with the NonAscii code — distinct from InvalidUtf8
+        // (random bytes) and OverlongUtf8 (IDS-bypass overlongs) so audit
+        // logs can `grep CANON_NON_ASCII` for the homoglyph attack class
+        // without picking up generic encoding-failure noise.
+        for s in ["é", "中", "🙂", "/abc/中文/x"] {
+            assert_eq!(
+                reject_non_ascii(s).unwrap_err(),
+                CanonError::NonAscii,
+                "input={s:?}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // split_and_resolve — direct
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn split_and_resolve_handles_dot_segments_and_matrix_params() {
+        // (input lowered path, expected segments). Inputs are passed as
+        // they would be after decode + lowercase, so this isolates the
+        // segment-level behavior. Covers: root, double-slash collapse,
+        // `.` removal, `..` chain, matrix params on one and many
+        // segments, a segment that becomes empty after matrix strip,
+        // and a trailing matrix-param-only segment.
+        let cases: &[(&str, &[&str])] = &[
+            ("/", &[""]),
+            ("//", &[""]),
+            ("/a/b", &["", "a", "b"]),
+            ("/a//b", &["", "a", "b"]),
+            ("/a/./b", &["", "a", "b"]),
+            ("/a/b/../c", &["", "a", "c"]),
+            ("/a/b/c/../../d", &["", "a", "d"]),
+            ("/a;k=v/b", &["", "a", "b"]),
+            ("/a;k=v;k2=v2/b;k3=v3", &["", "a", "b"]),
+            ("/a/;k=v/b", &["", "a", "b"]),
+            ("/a/b/;k=v", &["", "a", "b"]),
+        ];
+        for (input, expected) in cases {
+            assert_eq!(
+                split_and_resolve(input).unwrap(),
+                segs(expected),
+                "input={input:?}"
+            );
+        }
+
+        // Underflow: every form must fail-closed with PathUnderflow.
+        for input in ["/..", "/a/../..", "/../a", "/a/b/../../../c"] {
+            assert_eq!(
+                split_and_resolve(input).unwrap_err(),
+                CanonError::PathUnderflow,
+                "input={input:?}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // Appendix A.1 — path golden vectors
+    //
+    // Spec-conformance tests for the path pipeline. The vector labels
+    // (`A1.xxx`) appear in every failure message so a regression points
+    // straight back to the row in `Innovation-2.1-canonical-request.md`.
+    //
+    // These call the full canonicalize_str() entrypoint rather than
+    // canonicalize_path() directly so the assertions match the form a
+    // caller would see — the assertion target is still the path output.
+    // -----------------------------------------------------------------
+
+    fn canon_path_via_pipeline(url: &str) -> String {
+        super::super::canonicalize_str(url)
+            .unwrap()
+            .path_segments
+            .join("/")
+    }
+
+    #[test]
+    fn appendix_a1_path_vectors_canonicalize_successfully() {
+        // (vector_label, raw_url, expected_canonical_path)
+        let cases: &[(&str, &str, &str)] = &[
+            (
+                "A1.plain",
+                "http://169.254.169.254/metadata/identity",
+                "/metadata/identity",
+            ),
+            (
+                "A1.mixed_case",
+                "http://169.254.169.254/Metadata/Identity",
+                "/metadata/identity",
+            ),
+            (
+                "A1.double_slash",
+                "http://169.254.169.254/metadata//identity",
+                "/metadata/identity",
+            ),
+            (
+                "A1.dot_segment",
+                "http://169.254.169.254/metadata/./identity",
+                "/metadata/identity",
+            ),
+            (
+                "A1.dotdot_segment",
+                "http://169.254.169.254/metadata/x/../identity",
+                "/metadata/identity",
+            ),
+            (
+                "A1.encoded_slash_decodes",
+                "http://169.254.169.254/metadata%2Fidentity",
+                "/metadata/identity",
+            ),
+            (
+                // Decoding happens once: %252F -> %2F (literal, not a separator).
+                "A1.double_encoding_decoded_once",
+                "http://169.254.169.254/metadata%252Fidentity",
+                "/metadata%2fidentity",
+            ),
+            (
+                "A1.matrix_param_stripped",
+                "http://169.254.169.254/metadata/identity;jsessionid=abc",
+                "/metadata/identity",
+            ),
+        ];
+        for (label, url, expected) in cases {
+            assert_eq!(canon_path_via_pipeline(url), *expected, "vector={label}");
+        }
+    }
+
+    #[test]
+    fn appendix_a1_path_vectors_rejected() {
+        // (vector_label, raw_url, expected_error)
+        let exact: &[(&str, &str, CanonError)] = &[
+            (
+                "A1.path_underflow",
+                // /metadata/identity -> pop x2 -> root; the third .. underflows.
+                "http://169.254.169.254/metadata/identity/../../..",
+                CanonError::PathUnderflow,
+            ),
+            (
+                "A1.embedded_query",
+                "http://169.254.169.254/metadata/identity%3Fapi-version=2018",
+                CanonError::EmbeddedQuery,
+            ),
+            (
+                "A1.control_char",
+                "http://169.254.169.254/metadata/identity%0A",
+                CanonError::ControlChar,
+            ),
+        ];
+        for (label, url, expected) in exact {
+            assert_eq!(
+                super::super::canonicalize_str(url).unwrap_err(),
+                *expected,
+                "vector={label}"
+            );
+        }
+
+        // Either-of class: overlong UTF-8 may surface as OverlongUtf8 or
+        // InvalidUtf8 depending on decoder state — both are equally a deny.
+        let either: &[(&str, &str)] = &[(
+            "A1.overlong_utf8",
+            "http://169.254.169.254/metadata/%C0%AFidentity",
+        )];
+        for (label, url) in either {
+            let err = super::super::canonicalize_str(url).unwrap_err();
+            assert!(
+                matches!(err, CanonError::OverlongUtf8 | CanonError::InvalidUtf8),
+                "vector={label} got={err:?}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------
+    // D1 extended path vectors (M2: golden vectors for the pentest D1
+    // "URL parsing differentials" family — pentest/linux/DESIGN.md row D1
+    // and design doc §3.1).
+    //
+    // The Appendix A.1 table above is the *representative* set the
+    // design promises by name. These are the additional concrete forms
+    // that must produce the same canonical output (or the same typed
+    // deny) so that the matcher's behavior cannot diverge from the
+    // upstream server's interpretation.
+    // -----------------------------------------------------------------
+
+    #[test]
+    fn d1_extended_path_vectors_canonicalize_successfully() {
+        let cases: &[(&str, &str, &str)] = &[
+            // Mixed-case percent escapes for `/` decode the same.
+            (
+                "D1.lowercase_encoded_slash",
+                "http://169.254.169.254/metadata%2fidentity",
+                "/metadata/identity",
+            ),
+            // Double-encoded `..` (`%252e%252e`) decodes ONCE to the literal
+            // bytes `%2e%2e` — it must NOT collapse like `..` would.
+            (
+                "D1.double_encoded_dotdot",
+                "http://169.254.169.254/metadata/%252e%252e/identity",
+                "/metadata/%2e%2e/identity",
+            ),
+            // Multiple matrix params on a single segment all strip.
+            (
+                "D1.multiple_matrix_params",
+                "http://169.254.169.254/metadata;a=1;b=2;c=3/identity",
+                "/metadata/identity",
+            ),
+            // Matrix params across multiple segments.
+            (
+                "D1.matrix_params_each_segment",
+                "http://169.254.169.254/metadata;k=v/identity;k2=v2",
+                "/metadata/identity",
+            ),
+            // A segment that is ONLY matrix params (`;k=v`) collapses to
+            // nothing — never to an empty segment that would shift indices.
+            (
+                "D1.matrix_only_segment_drops",
+                "http://169.254.169.254/a/;k=v/b",
+                "/a/b",
+            ),
+            // Encoded space survives as a literal space in the segment;
+            // render() must percent-encode it back.
+            (
+                "D1.encoded_space_in_segment",
+                "http://169.254.169.254/foo%20bar",
+                "/foo bar",
+            ),
+            // Combined dot / dotdot / matrix in one path.
+            (
+                "D1.combined_dot_dotdot_matrix",
+                "http://169.254.169.254/a/b/./c/../d;p=q/e",
+                "/a/b/d/e",
+            ),
+            // Leading multi-slash collapses to single root.
+            (
+                "D1.leading_multi_slash",
+                "http://169.254.169.254///metadata",
+                "/metadata",
+            ),
+            // Intermixed `./` segments collapse.
+            (
+                "D1.intermixed_dot_segments",
+                "http://169.254.169.254/./a/./b/./",
+                "/a/b",
+            ),
+            // Case folding plus encoded slash plus matrix.
+            (
+                "D1.combined_case_encoded_slash_matrix",
+                "http://169.254.169.254/Foo%2FBar;p=q",
+                "/foo/bar",
+            ),
+            // Encoded `;` (`%3B`) decodes to a literal `;` and then
+            // triggers the same matrix-param stripping as a raw `;` —
+            // this symmetry is REQUIRED, otherwise an attacker could
+            // smuggle past a `/foo;v=1` deny rule by writing `/foo%3Bv=1`.
+            (
+                "D1.encoded_semicolon_strips_like_raw",
+                "http://169.254.169.254/a%3Bb",
+                "/a",
+            ),
+        ];
+        for (label, url, expected) in cases {
+            assert_eq!(canon_path_via_pipeline(url), *expected, "vector={label}");
+        }
+    }
+
+    #[test]
+    fn d1_extended_path_vectors_rejected() {
+        // Exact-error vectors.
+        let exact: &[(&str, &str, CanonError)] = &[
+            // Truncated percent at end of input (no following hex digits).
+            (
+                "D1.truncated_percent_end",
+                "http://169.254.169.254/a%",
+                CanonError::MalformedPercent,
+            ),
+            (
+                "D1.truncated_percent_one_hex",
+                "http://169.254.169.254/a%2",
+                CanonError::MalformedPercent,
+            ),
+            // Non-hex characters after `%`.
+            (
+                "D1.non_hex_percent",
+                "http://169.254.169.254/a%ZZ",
+                CanonError::MalformedPercent,
+            ),
+            (
+                "D1.partial_non_hex_percent",
+                "http://169.254.169.254/a%2G",
+                CanonError::MalformedPercent,
+            ),
+            // All four control-character flavours the matcher must deny.
+            (
+                "D1.nul_byte",
+                "http://169.254.169.254/a%00b",
+                CanonError::ControlChar,
+            ),
+            (
+                "D1.cr_byte",
+                "http://169.254.169.254/a%0Db",
+                CanonError::ControlChar,
+            ),
+            (
+                "D1.tab_byte",
+                "http://169.254.169.254/a%09b",
+                CanonError::ControlChar,
+            ),
+            (
+                "D1.del_byte",
+                "http://169.254.169.254/a%7Fb",
+                CanonError::ControlChar,
+            ),
+            // Decoded non-ASCII (valid UTF-8 but outside the matcher's
+            // ASCII-only contract). Distinct from encoding-corruption
+            // (`InvalidUtf8`) and overlong-bypass (`OverlongUtf8`)
+            // because this is the Unicode-confusable attack family.
+            (
+                "D1.decoded_non_ascii_lowercase_e_acute",
+                "http://169.254.169.254/caf%C3%A9",
+                CanonError::NonAscii,
+            ),
+            // Underflow variants the Appendix table doesn't enumerate.
+            (
+                "D1.underflow_from_root",
+                "http://169.254.169.254/..",
+                CanonError::PathUnderflow,
+            ),
+            (
+                "D1.underflow_via_dotdot_chain",
+                "http://169.254.169.254/a/b/../../..",
+                CanonError::PathUnderflow,
+            ),
+            // Embedded `?` smuggled via uppercase AND lowercase `%3F`.
+            (
+                "D1.embedded_query_uppercase_hex",
+                "http://169.254.169.254/x%3Fy",
+                CanonError::EmbeddedQuery,
+            ),
+            (
+                "D1.embedded_query_lowercase_hex",
+                "http://169.254.169.254/x%3fy",
+                CanonError::EmbeddedQuery,
+            ),
+        ];
+        for (label, url, expected) in exact {
+            assert_eq!(
+                super::super::canonicalize_str(url).unwrap_err(),
+                *expected,
+                "vector={label}"
+            );
+        }
+
+        // Either-of class: 3-byte and 4-byte overlong UTF-8 sequences may
+        // surface as OverlongUtf8 or InvalidUtf8 depending on which check
+        // fires first.
+        let either: &[(&str, &str)] = &[
+            (
+                "D1.overlong_utf8_3byte_slash",
+                "http://169.254.169.254/x/%E0%80%AFy",
+            ),
+            (
+                "D1.overlong_utf8_4byte_slash",
+                "http://169.254.169.254/x/%F0%80%80%AFy",
+            ),
+            (
+                "D1.overlong_utf8_2byte_backslash",
+                "http://169.254.169.254/x/%C1%9Cy",
+            ),
+        ];
+        for (label, url) in either {
+            let err = super::super::canonicalize_str(url).unwrap_err();
+            assert!(
+                matches!(err, CanonError::OverlongUtf8 | CanonError::InvalidUtf8),
+                "vector={label} got={err:?}"
+            );
+        }
+    }
+}
diff --git a/proxy_agent/src/proxy/canonical/property_tests.rs b/proxy_agent/src/proxy/canonical/property_tests.rs
new file mode 100644
index 00000000..6671d04f
--- /dev/null
+++ b/proxy_agent/src/proxy/canonical/property_tests.rs
@@ -0,0 +1,231 @@
+// Copyright (c) Microsoft Corporation
+// SPDX-License-Identifier: MIT
+
+//! Property tests for the canonical pipeline (Innovation 2.1, M2).
+//!
+//! These are *invariants*, not example tables: every test draws inputs
+//! from a [`proptest`] strategy and asserts a relationship that must hold
+//! for **all** survivors of that strategy. They complement the hand-rolled
+//! Appendix A / D1 / C7 golden vectors by exercising shapes nobody
+//! enumerated.
+//!
+//! The three properties — taken verbatim from §10.2 of the design doc —
+//! are:
+//!
+//! 1. **`idempotent`**: `canonicalize(canonicalize(x).render()) == canonicalize(x)`.
+//!    Forms the contract that lets us cache canonicalized rules and reuse
+//!    them across requests without re-deriving on every match.
+//!
+//! 2. **`no_panics`**: `canonicalize_str` returns `Result`, never panics,
+//!    for *any* string short of those that fail `Uri::parse`. This is the
+//!    M2 fuzz-target exit criterion expressed as a property so we get the
+//!    same coverage with one tool (no separate cargo-fuzz crate yet —
+//!    see the TODO in `mod.rs` for the binary-crate blocker).
+//!
+//! 3. **`host_form_equivalence`**: every numeric IPv4 spelling of the same
+//!    address (dotted, decimal-u32, hex-u32, IPv4-mapped IPv6) classifies
+//!    to the same [`Destination`]. This is the *positive* counterpart of
+//!    the C7 host-smuggling vectors.
+
+use super::{canonicalize, canonicalize_str, Destination};
+use hyper::{Method, Uri};
+use proptest::prelude::*;
+use std::net::Ipv4Addr;
+
+// ---------------------------------------------------------------------------
+// Strategies
+// ---------------------------------------------------------------------------
+
+/// Characters that survive a `path_segment` round-trip without being
+/// reinterpreted as a delimiter. We pick a curated subset so the strategy
+/// stays focused on shapes the canonicalizer is supposed to normalize
+/// (case, percent-encoding, dot resolution, matrix params) rather than
+/// degenerating into a hyper-parser-rejection generator.
+///
+/// Intentionally includes `%` and `;` so the strategy exercises the
+/// percent-decode and matrix-strip stages.
+const SEG_BYTES: &str = "abcXY01-._~/%;:@&=+$,";
+
+fn arb_segment() -> impl Strategy<Value = String> {
+    // 0..16 keeps the search space tight enough to drive ~1000 cases
+    // through every code path without blowing the default proptest budget.
+    proptest::collection::vec(proptest::sample::select(SEG_BYTES.as_bytes()), 0..16)
+        .prop_map(|bs| String::from_utf8(bs).expect("ASCII subset is always valid UTF-8"))
+}
+
+const QKV_BYTES: &str = "abcXY01-._~%+";
+
+fn arb_qkv() -> impl Strategy<Value = String> {
+    proptest::collection::vec(proptest::sample::select(QKV_BYTES.as_bytes()), 0..8)
+        .prop_map(|bs| String::from_utf8(bs).expect("ASCII subset is always valid UTF-8"))
+}
+
+/// Build a syntactically reasonable URL whose authority we control (so
+/// `Destination` is stable across round-trips) but whose path/query are
+/// drawn from broad strategies.
+fn arb_url() -> impl Strategy<Value = String> {
+    let path_strat = proptest::collection::vec(arb_segment(), 0..4);
+    let query_strat = proptest::collection::vec((arb_qkv(), arb_qkv()), 0..4);
+    let trailing_slash = any::<bool>();
+
+    (path_strat, query_strat, trailing_slash).prop_map(|(segs, kvs, ts)| {
+        let mut url = String::from("http://169.254.169.254");
+        if segs.is_empty() {
+            url.push('/');
+        } else {
+            for s in &segs {
+                url.push('/');
+                url.push_str(s);
+            }
+        }
+        if ts && !url.ends_with('/') {
+            url.push('/');
+        }
+        if !kvs.is_empty() {
+            url.push('?');
+            for (i, (k, v)) in kvs.iter().enumerate() {
+                if i > 0 {
+                    url.push('&');
+                }
+                url.push_str(k);
+                url.push('=');
+                url.push_str(v);
+            }
+        }
+        url
+    })
+}
+
+// ---------------------------------------------------------------------------
+// Properties
+// ---------------------------------------------------------------------------
+
+proptest! {
+    /// Re-canonicalizing a rendered canonical request yields the same
+    /// request. This is the *load-bearing* invariant: every cache, every
+    /// audit-log signature, and every "rule equals request" comparison
+    /// in the matcher assumes this holds.
+    ///
+    /// We only assert the property on inputs that canonicalize
+    /// successfully on the first pass; rejected inputs are out of scope
+    /// (covered by `no_panics`).
+    #[test]
+    fn idempotent(url in arb_url()) {
+        let c1 = match canonicalize_str(&url) {
+            Ok(c) => c,
+            Err(_) => return Ok(()),
+        };
+        // render() omits the authority by design — re-attach the same
+        // host we built the input from so the destination doesn't
+        // change between rounds.
+        let url2 = format!("http://169.254.169.254{}", c1.render());
+        let c2 = canonicalize_str(&url2)
+            .map_err(|e| TestCaseError::fail(format!(
+                "second pass failed: input={url:?} rendered={url2:?} err={e:?}"
+            )))?;
+        prop_assert_eq!(
+            &c1, &c2,
+            "non-idempotent: input={:?} rendered={:?}", url, url2
+        );
+    }
+
+    /// `canonicalize_str` is *total* on any input that survives
+    /// `Uri::parse` — it returns a typed error rather than panicking.
+    /// Random printable-ASCII strings give the parser plenty to choke
+    /// on, and the canonicalizer plenty of weird-but-valid `Uri` shapes
+    /// to chew through.
+    #[test]
+    fn no_panics(s in r"[\x20-\x7E]{0,80}") {
+        // proptest catches panics across the FFI boundary automatically;
+        // the test "passes" iff this call returns (Ok or Err) without
+        // unwinding.
+        let _ = canonicalize_str(&s);
+    }
+
+    /// Same target — different spelling. All four well-formed numeric
+    /// forms of an IPv4 address must classify to the same Destination.
+    /// This is the positive contract behind the C7 host-shape rejection
+    /// vectors: the canonicalizer is allowed to *deny* exotic forms,
+    /// but if it *accepts* them they must collapse onto the same
+    /// classification as the dotted form.
+    #[test]
+    fn host_form_equivalence(raw in any::<u32>()) {
+        let v4 = Ipv4Addr::from(raw);
+        // Don't run the property on the dual-stack any-address; hyper
+        // happens to reject the bracketed `[::ffff:0.0.0.0]` form, which
+        // would unbalance the comparison. The fix isn't shape-changing,
+        // so skipping is safe.
+        if raw == 0 {
+            return Ok(());
+        }
+
+        let dotted = format!("http://{}/x", v4);
+        let decimal = format!("http://{}/x", raw);
+        let hex = format!("http://0x{:x}/x", raw);
+        let mapped = format!("http://[::ffff:{}]/x", v4);
+
+        let dotted_dest = classify_or_none(&dotted);
+        let decimal_dest = classify_or_none(&decimal);
+        let hex_dest = classify_or_none(&hex);
+        let mapped_dest = classify_or_none(&mapped);
+
+        // Forms the canonicalizer accepts must all agree. Forms it
+        // rejects are out of scope (we don't require it to accept any
+        // particular alternative spelling — only that acceptance is
+        // self-consistent).
+        //
+        // `Destination::Unknown` carries `host_text` so audit logs can
+        // show the *originally requested* spelling; that field is
+        // intentionally form-dependent and we strip it for the
+        // equivalence comparison. Known destinations (IMDS, WireServer,
+        // HostGAPlugin) have no host_text and compare directly.
+        let accepted: Vec<(&str, Destination)> = [
+            ("dotted", dotted_dest),
+            ("decimal", decimal_dest),
+            ("hex", hex_dest),
+            ("mapped", mapped_dest),
+        ]
+        .into_iter()
+        .filter_map(|(n, d)| d.map(|d| (n, normalize_for_equivalence(d))))
+        .collect();
+
+        if let Some((_, first)) = accepted.first() {
+            let first = first.clone();
+            for (name, d) in accepted.iter().skip(1) {
+                prop_assert_eq!(
+                    d, &first,
+                    "host-form mismatch for {}: {} -> {:?} vs dotted -> {:?}",
+                    v4, name, d, first
+                );
+            }
+        }
+    }
+}
+
+/// Helper: parse + canonicalize, returning only the destination. Used by
+/// `host_form_equivalence` so a parse failure on one spelling doesn't
+/// short-circuit the comparison.
+fn classify_or_none(url: &str) -> Option<Destination> {
+    let uri: Uri = url.parse().ok()?;
+    canonicalize(&uri, &Method::GET).ok().map(|c| c.destination)
+}
+
+/// Strip `host_text` from `Destination::Unknown` so two equivalent
+/// numeric spellings (e.g. `0.0.0.1` and `1`) compare equal. Known
+/// destinations are returned unchanged.
+fn normalize_for_equivalence(d: Destination) -> Destination {
+    match d {
+        Destination::Unknown {
+            family,
+            ip,
+            port,
+            host_text: _,
+        } => Destination::Unknown {
+            family,
+            ip,
+            port,
+            host_text: None,
+        },
+        other => other,
+    }
+}
diff --git a/proxy_agent/src/proxy/canonical/query.rs b/proxy_agent/src/proxy/canonical/query.rs
new file mode 100644
index 00000000..1540ffd2
--- /dev/null
+++ b/proxy_agent/src/proxy/canonical/query.rs
@@ -0,0 +1,209 @@
+// Copyright (c) Microsoft Corporation
+// SPDX-License-Identifier: MIT
+
+//! Query string canonicalization.
+//!
+//! - Split on `&`, then on the first `=` (additional `=` characters
+//!   become part of the value).
+//! - Single percent-decode of both key and value.
+//! - Lowercase the key (case-insensitive matching).
+//! - Reject control characters and malformed UTF-8 in both key and value.
+//!   Unlike the path pipeline, well-formed non-ASCII UTF-8 is **allowed**
+//!   here: query *values* legitimately carry it (e.g. an IMDS
+//!   `msi_res_id` / `resource` ARM id whose resource-group name contains
+//!   Unicode letters, which Azure naming rules permit). The path stays
+//!   ASCII-only because IMDS / WireServer / HGAP paths never contain
+//!   non-ASCII and a confusable path segment could slip past a deny rule.
+//! - Fold into a `BTreeMap<String, Vec<String>>`: deterministic key
+//!   ordering, insertion order preserved within a key.
+
+use std::collections::BTreeMap;
+
+use super::CanonError;
+
+/// Canonicalize a raw query string (the part after `?`, without it).
+pub fn canonicalize_query(raw: &str) -> Result<BTreeMap<String, Vec<String>>, CanonError> {
+    let mut map: BTreeMap<String, Vec<String>> = BTreeMap::new();
+    if raw.is_empty() {
+        return Ok(map);
+    }
+    for pair in raw.split('&') {
+        if pair.is_empty() {
+            // `?a=1&&b=2` -> skip empty pairs.
+            continue;
+        }
+        let (k_raw, v_raw) = match pair.find('=') {
+            Some(pos) => (&pair[..pos], &pair[pos + 1..]),
+            None => (pair, ""),
+        };
+        let k = decode_query_component(k_raw)?;
+        let v = decode_query_component(v_raw)?;
+        if k.is_empty() {
+            // `?=foo` is malformed — silently drop instead of injecting a
+            // ghost empty key into the map.
+            continue;
+        }
+        map.entry(k.to_ascii_lowercase()).or_default().push(v);
+    }
+    Ok(map)
+}
+
+fn decode_query_component(raw: &str) -> Result<String, CanonError> {
+    // `+` in a query component is a legacy form for space (application/x-www-form-urlencoded).
+    // IMDS / WireServer don't use form-encoded queries, but normalize it so a rule author
+    // can't tell the difference between `?key=a+b` and `?key=a%20b`.
+    let bytes = raw.as_bytes();
+    let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
+    let mut i = 0;
+    while i < bytes.len() {
+        let b = bytes[i];
+        match b {
+            b'%' => {
+                if i + 2 >= bytes.len() {
+                    return Err(CanonError::MalformedPercent);
+                }
+                let h = hex_value(bytes[i + 1])?;
+                let l = hex_value(bytes[i + 2])?;
+                out.push((h << 4) | l);
+                i += 3;
+            }
+            b'+' => {
+                out.push(b' ');
+                i += 1;
+            }
+            _ => {
+                out.push(b);
+                i += 1;
+            }
+        }
+    }
+    // `String::from_utf8` still rejects malformed byte sequences
+    // (`InvalidUtf8`), so smuggling via broken encodings is closed. We
+    // deliberately do NOT reject well-formed non-ASCII here the way the
+    // path pipeline does: query values legitimately carry Unicode (e.g.
+    // an ARM `msi_res_id` whose resource-group name has Unicode letters),
+    // and a confusable in a query value cannot slip past a path-prefix
+    // deny rule. Control characters stay forbidden in both halves.
+    let s = String::from_utf8(out).map_err(|_| CanonError::InvalidUtf8)?;
+    for b in s.bytes() {
+        if b < 0x20 || b == 0x7F {
+            return Err(CanonError::ControlChar);
+        }
+    }
+    Ok(s)
+}
+
+fn hex_value(b: u8) -> Result<u8, CanonError> {
+    match b {
+        b'0'..=b'9' => Ok(b - b'0'),
+        b'a'..=b'f' => Ok(10 + b - b'a'),
+        b'A'..=b'F' => Ok(10 + b - b'A'),
+        _ => Err(CanonError::MalformedPercent),
+    }
+}
+
+#[cfg(test)]
+mod query_tests {
+    use super::*;
+
+    #[test]
+    fn empty() {
+        assert!(canonicalize_query("").unwrap().is_empty());
+    }
+
+    #[test]
+    fn single_pair() {
+        let q = canonicalize_query("api-version=2018-02-01").unwrap();
+        assert_eq!(q.get("api-version"), Some(&vec!["2018-02-01".to_string()]));
+    }
+
+    #[test]
+    fn key_lowercased() {
+        let q = canonicalize_query("API-Version=2018").unwrap();
+        assert_eq!(q.get("api-version"), Some(&vec!["2018".to_string()]));
+        assert!(!q.contains_key("API-Version"));
+    }
+
+    #[test]
+    fn percent_decoded_once() {
+        let q = canonicalize_query("resource=https%3A%2F%2Fmanagement.azure.com%2F").unwrap();
+        assert_eq!(
+            q.get("resource"),
+            Some(&vec!["https://management.azure.com/".to_string()])
+        );
+    }
+
+    #[test]
+    fn plus_to_space() {
+        let q = canonicalize_query("k=a+b").unwrap();
+        assert_eq!(q.get("k"), Some(&vec!["a b".to_string()]));
+    }
+
+    #[test]
+    fn repeated_key_preserves_order() {
+        let q = canonicalize_query("k=1&k=2&k=3").unwrap();
+        assert_eq!(
+            q.get("k"),
+            Some(&vec!["1".to_string(), "2".to_string(), "3".to_string()])
+        );
+    }
+
+    #[test]
+    fn no_value() {
+        let q = canonicalize_query("foo").unwrap();
+        assert_eq!(q.get("foo"), Some(&vec!["".to_string()]));
+    }
+
+    #[test]
+    fn malformed_percent_rejected() {
+        assert_eq!(
+            canonicalize_query("k=%2").unwrap_err(),
+            CanonError::MalformedPercent
+        );
+    }
+
+    #[test]
+    fn empty_key_dropped() {
+        let q = canonicalize_query("=value&k=v").unwrap();
+        assert!(!q.contains_key(""));
+        assert_eq!(q.get("k"), Some(&vec!["v".to_string()]));
+    }
+
+    #[test]
+    fn control_char_rejected() {
+        assert_eq!(
+            canonicalize_query("k=%0A").unwrap_err(),
+            CanonError::ControlChar
+        );
+    }
+
+    #[test]
+    fn non_ascii_allowed_in_query_value_and_key() {
+        // Unlike the path pipeline, well-formed non-ASCII UTF-8 is
+        // accepted in the query so legitimate ARM ids (e.g. an
+        // `msi_res_id` whose resource-group name has Unicode letters) are
+        // not rejected. The decoded codepoints survive verbatim.
+        // Value side: U+4E2D `中`.
+        let q = canonicalize_query("k=%E4%B8%AD").unwrap();
+        assert_eq!(q.get("k"), Some(&vec!["\u{4e2d}".to_string()]));
+        // Key side: U+00E9 `é` (only A–Z is ASCII-lowercased, so the
+        // non-ASCII key is preserved as-is).
+        let q = canonicalize_query("caf%C3%A9=1").unwrap();
+        assert_eq!(q.get("caf\u{e9}"), Some(&vec!["1".to_string()]));
+        // Both halves non-ASCII.
+        let q = canonicalize_query("%C3%A9=%C3%A9").unwrap();
+        assert_eq!(q.get("\u{e9}"), Some(&vec!["\u{e9}".to_string()]));
+    }
+
+    #[test]
+    fn malformed_utf8_still_reports_invalid_utf8() {
+        // Lone continuation byte (0x80 with no lead byte) is genuine
+        // encoding corruption, not a homoglyph attack. It must stay on
+        // `InvalidUtf8` / `CANON_UTF8` so the two audit-log classes
+        // remain separable.
+        assert_eq!(
+            canonicalize_query("k=%80").unwrap_err(),
+            CanonError::InvalidUtf8
+        );
+    }
+}
diff --git a/proxy_agent/src/proxy/canonical/rule.rs b/proxy_agent/src/proxy/canonical/rule.rs
new file mode 100644
index 00000000..edbdd106
--- /dev/null
+++ b/proxy_agent/src/proxy/canonical/rule.rs
@@ -0,0 +1,493 @@
+// Copyright (c) Microsoft Corporation
+// SPDX-License-Identifier: MIT
+
+//! Canonical form of a rule pattern.
+//!
+//! Rules go through the same pipeline as requests, with two
+//! differences:
+//!
+//! 1. There is no scheme/method on a rule (the matcher inherits those
+//!    from the request).
+//! 2. A rule's destination is `RuleDestination`, which adds an `Any`
+//!    variant for rules that intentionally span endpoints.
+//!
+//! Matching is then a pure structural comparison:
+//!
+//! - `Destination` must equal the rule's destination (or the rule is
+//!   `Any`).
+//! - The rule's path is a **prefix** of the request's canonical path,
+//!   compared segment-by-segment (not character-by-character — this is
+//!   what prevents `starts_with("/metadata")` from matching
+//!   `/metadata-attacker`).
+//! - For each query key constrained by the rule, the request must
+//!   have at least one matching value (case-insensitive after the
+//!   canonical pipeline already lowercased both sides).
+
+use std::collections::BTreeMap;
+
+use crate::key_keeper::key::Privilege;
+
+use super::destination::Destination;
+use super::path::canonicalize_path;
+use super::query::canonicalize_query;
+use super::{CanonError, CanonicalRequest};
+
+/// Destination constraint on a rule.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub enum RuleDestination {
+    /// Rule applies to a single classified destination.
+    Only(Destination),
+    /// Rule applies regardless of destination (used for the per-endpoint
+    /// rule files where the file itself already partitions the rules).
+    Any,
+}
+
+/// Canonical form of an authorization rule.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CanonicalPattern {
+    pub destination: RuleDestination,
+    /// Segments to prefix-match against [`CanonicalRequest::path_segments`].
+    /// Always starts with the root marker (empty string).
+    pub path_prefix: Vec<String>,
+    /// Required query parameters. Empty map means "no query constraint".
+    /// All present keys must match at least one of the supplied values.
+    pub required_query: BTreeMap<String, Vec<String>>,
+}
+
+impl CanonicalPattern {
+    /// Build from a raw `Privilege` (the on-disk rule format).
+    ///
+    /// The privilege's path is run through the canonical path pipeline,
+    /// and its query parameters are run through the canonical query
+    /// pipeline. Rules that fail canonicalization are **rejected** by
+    /// the loader — fail-closed.
+    pub fn from_privilege(p: &Privilege) -> Result<Self, CanonError> {
+        let (segments, _trailing) = canonicalize_path(&p.path)?;
+        let required_query = match &p.queryParameters {
+            None => BTreeMap::new(),
+            Some(qp) => {
+                let mut joined = String::new();
+                for (k, v) in qp.iter() {
+                    if !joined.is_empty() {
+                        joined.push('&');
+                    }
+                    joined.push_str(k);
+                    joined.push('=');
+                    joined.push_str(v);
+                }
+                canonicalize_query(&joined)?
+            }
+        };
+        Ok(CanonicalPattern {
+            destination: RuleDestination::Any,
+            path_prefix: segments,
+            required_query,
+        })
+    }
+
+    /// Structural match against a canonical request.
+    pub fn matches(&self, req: &CanonicalRequest) -> bool {
+        // Destination
+        if let RuleDestination::Only(d) = &self.destination {
+            if d != &req.destination {
+                return false;
+            }
+        }
+
+        // Path: segment-by-segment prefix match.
+        if req.path_segments.len() < self.path_prefix.len() {
+            return false;
+        }
+        for (i, seg) in self.path_prefix.iter().enumerate() {
+            if &req.path_segments[i] != seg {
+                return false;
+            }
+        }
+
+        // Query: every required key must be present and at least one
+        // of its required values must appear among the request's values
+        // for that key.
+        for (k, required_values) in &self.required_query {
+            let actual = match req.query.get(k) {
+                Some(v) => v,
+                None => return false,
+            };
+            let any_match = required_values
+                .iter()
+                .any(|rv| actual.iter().any(|av| av == rv));
+            if !any_match {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+#[cfg(test)]
+// The table-driven tests below intentionally carry deep nested-tuple
+// literal types like `&[(&str, Option<&[(&str, &str)]>, &[&str], &[(&str, &[&str])])]`.
+// That nesting IS the readability point — each column lines up with a
+// (input, expected) axis the test is exercising — and factoring it out
+// into a `type` alias hurts at-a-glance reading more than it helps.
+#[allow(clippy::type_complexity)]
+mod rule_tests {
+    use std::collections::HashMap;
+
+    use hyper::{Method, Uri};
+
+    use super::*;
+    use crate::proxy::canonical::canonicalize;
+
+    // ---------- helpers ----------
+
+    fn priv_of(path: &str, qp: Option<&[(&str, &str)]>) -> Privilege {
+        Privilege {
+            name: "test".to_string(),
+            path: path.to_string(),
+            queryParameters: qp.map(|pairs| {
+                pairs
+                    .iter()
+                    .map(|(k, v)| (k.to_string(), v.to_string()))
+                    .collect::<HashMap<_, _>>()
+            }),
+        }
+    }
+
+    fn req_of(uri: &str, method: &Method) -> CanonicalRequest {
+        let u: Uri = uri.parse().unwrap();
+        canonicalize(&u, method).unwrap()
+    }
+
+    fn segs(parts: &[&str]) -> Vec<String> {
+        parts.iter().map(|s| s.to_string()).collect()
+    }
+
+    fn pat(
+        destination: RuleDestination,
+        prefix: &[&str],
+        query: &[(&str, &[&str])],
+    ) -> CanonicalPattern {
+        let mut required = BTreeMap::new();
+        for (k, vs) in query {
+            required.insert(
+                (*k).to_string(),
+                vs.iter().map(|v| (*v).to_string()).collect(),
+            );
+        }
+        CanonicalPattern {
+            destination,
+            path_prefix: segs(prefix),
+            required_query: required,
+        }
+    }
+
+    // ---------- from_privilege ----------
+
+    #[test]
+    fn from_privilege_canonicalizes_and_normalizes() {
+        // (raw_path, raw_query_pairs) -> (expected_path_prefix, expected_required_query as sorted pairs)
+        let cases: &[(&str, Option<&[(&str, &str)]>, &[&str], &[(&str, &[&str])])] = &[
+            // case folding + leading-slash root marker preserved.
+            ("/Metadata", None, &["", "metadata"], &[]),
+            // dot-segments collapse on the rule side too.
+            ("/a/./b/../c", None, &["", "a", "c"], &[]),
+            // trailing slash on rule is dropped from segments (same as request side).
+            (
+                "/metadata/identity/",
+                None,
+                &["", "metadata", "identity"],
+                &[],
+            ),
+            // None vs Some(empty) both yield an empty required_query.
+            ("/x", None, &["", "x"], &[]),
+            ("/x", Some(&[]), &["", "x"], &[]),
+            // single query param canonicalized (key case-folded).
+            (
+                "/m",
+                Some(&[("Api-Version", "2018-02-01")]),
+                &["", "m"],
+                &[("api-version", &["2018-02-01"])],
+            ),
+            // value with encoded specials kept literal (not re-split on '=').
+            (
+                "/m",
+                Some(&[("k", "a%3Db")]),
+                &["", "m"],
+                &[("k", &["a=b"])],
+            ),
+            // root-only rule.
+            ("/", None, &[""], &[]),
+        ];
+
+        for (path, qp, expect_segs, expect_q) in cases {
+            let p = CanonicalPattern::from_privilege(&priv_of(path, *qp)).unwrap();
+            assert_eq!(
+                p.destination,
+                RuleDestination::Any,
+                "from_privilege must default to Any (input path={path:?})"
+            );
+            assert_eq!(p.path_prefix, segs(expect_segs), "path={path:?}");
+            let actual: Vec<(String, Vec<String>)> = p.required_query.into_iter().collect();
+            let expected: Vec<(String, Vec<String>)> = expect_q
+                .iter()
+                .map(|(k, vs)| {
+                    (
+                        (*k).to_string(),
+                        vs.iter().map(|v| (*v).to_string()).collect(),
+                    )
+                })
+                .collect();
+            assert_eq!(actual, expected, "path={path:?}");
+        }
+    }
+
+    #[test]
+    fn from_privilege_rejects_invalid_inputs() {
+        // Garbage propagates from the canonical pipeline as a CanonError;
+        // the loader is expected to drop the rule rather than admit it.
+        let bad: &[(&str, Option<&[(&str, &str)]>)] = &[
+            // Malformed percent in rule path.
+            ("/bad%ZZ", None),
+            // Control byte in rule path.
+            ("/bad\x01path", None),
+            // Non-ASCII in rule path.
+            ("/café", None),
+            // Malformed percent in rule query value.
+            ("/x", Some(&[("k", "%ZZ")])),
+            // Control byte in rule query key.
+            ("/x", Some(&[("k\x01", "v")])),
+        ];
+        for (path, qp) in bad {
+            let r = CanonicalPattern::from_privilege(&priv_of(path, *qp));
+            assert!(
+                r.is_err(),
+                "expected canonical rejection for path={path:?} qp={qp:?}"
+            );
+        }
+    }
+
+    // ---------- destination matching ----------
+
+    #[test]
+    fn matches_destination_constraint() {
+        // Any always matches; Only matches only its own classified destination.
+        let any = pat(RuleDestination::Any, &[""], &[]);
+        let only_imds = pat(RuleDestination::Only(Destination::Imds), &[""], &[]);
+        let only_ws = pat(RuleDestination::Only(Destination::WireServer), &[""], &[]);
+
+        let imds_req = req_of("http://169.254.169.254/x", &Method::GET);
+        let ws_req = req_of("http://168.63.129.16/x", &Method::GET);
+        let unk_req = req_of("http://10.0.0.1/x", &Method::GET);
+
+        let cases: &[(&CanonicalPattern, &CanonicalRequest, bool, &str)] = &[
+            (&any, &imds_req, true, "Any+Imds"),
+            (&any, &ws_req, true, "Any+WireServer"),
+            (&any, &unk_req, true, "Any+Unknown"),
+            (&only_imds, &imds_req, true, "Only(Imds)+Imds"),
+            (&only_imds, &ws_req, false, "Only(Imds)+WireServer rejects"),
+            (&only_imds, &unk_req, false, "Only(Imds)+Unknown rejects"),
+            (&only_ws, &ws_req, true, "Only(WS)+WireServer"),
+            (&only_ws, &imds_req, false, "Only(WS)+Imds rejects"),
+        ];
+        for (rule, req, expected, label) in cases {
+            assert_eq!(rule.matches(req), *expected, "{label}");
+        }
+    }
+
+    // ---------- path matching ----------
+
+    #[test]
+    fn matches_path_prefix_semantics() {
+        // (rule_path, request_uri, expected, label)
+        // Method is intentionally varied to confirm it is NOT part of rule matching.
+        let cases: &[(&str, &str, &Method, bool, &str)] = &[
+            // Segment-boundary safety: /metadata must not match /metadata-attacker.
+            (
+                "/metadata",
+                "http://169.254.169.254/metadata/identity",
+                &Method::GET,
+                true,
+                "rule shorter than request matches at boundary",
+            ),
+            (
+                "/metadata",
+                "http://169.254.169.254/metadata-attacker/identity",
+                &Method::GET,
+                false,
+                "rule must not bleed across segment boundary",
+            ),
+            // Exact match.
+            (
+                "/metadata/identity",
+                "http://169.254.169.254/metadata/identity",
+                &Method::POST,
+                true,
+                "exact path equality (method irrelevant)",
+            ),
+            // Request strictly shorter than rule prefix -> reject.
+            (
+                "/metadata/identity/oauth2/token",
+                "http://169.254.169.254/metadata/identity",
+                &Method::GET,
+                false,
+                "request shorter than rule rejects",
+            ),
+            // Mid-segment differ (not just at the end).
+            (
+                "/a/b/c",
+                "http://169.254.169.254/a/X/c",
+                &Method::GET,
+                false,
+                "differing mid segment rejects",
+            ),
+            // Case-insensitive (both rule and request lowercased by pipeline).
+            (
+                "/Metadata",
+                "http://169.254.169.254/METADATA/Identity",
+                &Method::GET,
+                true,
+                "case-insensitive path match",
+            ),
+            // Percent-encoded slash decoded once -> rule that includes the slash matches.
+            (
+                "/metadata/identity",
+                "http://169.254.169.254/metadata%2Fidentity/oauth2/token",
+                &Method::GET,
+                true,
+                "encoded slash in request decodes to rule path",
+            ),
+            // Root-only rule matches any request path.
+            (
+                "/",
+                "http://169.254.169.254/anything/at/all",
+                &Method::GET,
+                true,
+                "root-only rule is universal on path",
+            ),
+            (
+                "/",
+                "http://169.254.169.254/",
+                &Method::GET,
+                true,
+                "root-only rule on root request",
+            ),
+        ];
+        for (rule_path, uri, method, expected, label) in cases {
+            let p = CanonicalPattern::from_privilege(&priv_of(rule_path, None)).unwrap();
+            let r = req_of(uri, method);
+            assert_eq!(p.matches(&r), *expected, "{label}");
+        }
+    }
+
+    // ---------- query matching ----------
+
+    #[test]
+    fn matches_query_constraint_semantics() {
+        // Across keys: AND. Within a key: OR over the rule's allowed values.
+        // Build patterns directly so we can exercise multiple values per key
+        // (the on-disk Privilege format only supports a single value per key).
+        let multi_value = pat(RuleDestination::Any, &["", "m"], &[("v", &["a", "b"])]);
+        let multi_key = pat(
+            RuleDestination::Any,
+            &["", "m"],
+            &[("a", &["1"]), ("b", &["2"])],
+        );
+        let no_query = pat(RuleDestination::Any, &["", "m"], &[]);
+
+        let cases: &[(&CanonicalPattern, &str, bool, &str)] = &[
+            // No required_query -> any query (including none) matches.
+            (
+                &no_query,
+                "http://169.254.169.254/m",
+                true,
+                "no constraint + no query",
+            ),
+            (
+                &no_query,
+                "http://169.254.169.254/m?anything=here",
+                true,
+                "no constraint + extra query",
+            ),
+            // OR within a key.
+            (
+                &multi_value,
+                "http://169.254.169.254/m?v=a",
+                true,
+                "OR within key, first value",
+            ),
+            (
+                &multi_value,
+                "http://169.254.169.254/m?v=b",
+                true,
+                "OR within key, second value",
+            ),
+            (
+                &multi_value,
+                "http://169.254.169.254/m?v=c",
+                false,
+                "value outside allowed set rejects",
+            ),
+            (
+                &multi_value,
+                "http://169.254.169.254/m",
+                false,
+                "missing required key rejects",
+            ),
+            // Request supplies the same key twice; rule accepts if ANY request value matches.
+            (
+                &multi_value,
+                "http://169.254.169.254/m?v=c&v=b",
+                true,
+                "request-side repeat: any value matches rule",
+            ),
+            // Extra request keys are allowed (rule is a minimum requirement, not an exact match).
+            (
+                &multi_value,
+                "http://169.254.169.254/m?v=a&extra=zzz",
+                true,
+                "extra request keys allowed",
+            ),
+            // AND across keys.
+            (
+                &multi_key,
+                "http://169.254.169.254/m?a=1&b=2",
+                true,
+                "AND across keys satisfied",
+            ),
+            (
+                &multi_key,
+                "http://169.254.169.254/m?a=1",
+                false,
+                "AND across keys: missing second key rejects",
+            ),
+            (
+                &multi_key,
+                "http://169.254.169.254/m?a=1&b=9",
+                false,
+                "AND across keys: wrong value on one key rejects",
+            ),
+            // Case folding on keys (Api-Version vs api-version) via canonical pipeline.
+            (
+                &CanonicalPattern::from_privilege(&priv_of(
+                    "/metadata/identity",
+                    Some(&[("Api-Version", "2018-02-01")]),
+                ))
+                .unwrap(),
+                "http://169.254.169.254/metadata/identity?API-VERSION=2018-02-01",
+                true,
+                "case-insensitive key fold both sides",
+            ),
+            // Encoded value on the request side decodes once to match the rule.
+            (
+                &CanonicalPattern::from_privilege(&priv_of("/m", Some(&[("k", "a b")]))).unwrap(),
+                "http://169.254.169.254/m?k=a%20b",
+                true,
+                "request value decoded once matches rule value",
+            ),
+        ];
+        for (rule, uri, expected, label) in cases {
+            let r = req_of(uri, &Method::GET);
+            assert_eq!(rule.matches(&r), *expected, "{label}");
+        }
+    }
+}
diff --git a/proxy_agent/src/proxy/proxy_authorizer.rs b/proxy_agent/src/proxy/proxy_authorizer.rs
index f4f8a28d..0c59c82a 100644
--- a/proxy_agent/src/proxy/proxy_authorizer.rs
+++ b/proxy_agent/src/proxy/proxy_authorizer.rs
@@ -21,8 +21,9 @@
 
 use super::authorization_rules::{AuthorizationMode, ComputedAuthorizationItem};
 use super::proxy_connection::ConnectionLogger;
+use crate::proxy::canonical::CanonicalMode;
 use crate::shared_state::access_control_wrapper::AccessControlSharedState;
-use crate::{common::constants, common::result::Result, proxy::Claims};
+use crate::{common::config, common::constants, common::result::Result, proxy::Claims};
 use proxy_agent_shared::hyper_client;
 use proxy_agent_shared::logger::LoggerLevel;
 
@@ -243,12 +244,50 @@ pub fn authorize(
         return AuthorizeResult::Ok;
     }
 
-    let auth = get_authorizer(ip, port, claims);
+    let auth = get_authorizer(ip, port, claims.clone());
     logger.write(
         LoggerLevel::Trace,
         format!("Got auth: {}", auth.to_string()),
     );
-    auth.authorize(logger, request_uri, access_control_rules)
+    let legacy_result = auth.authorize(logger, request_uri.clone(), access_control_rules.clone());
+
+    // Innovation 2.1 M3 — shadow-mode integration.
+    //
+    // Only walks the canonical pipeline when the operator opts in via
+    // the `canonicalRequestMode` config key. With the default (`off`)
+    // there is **zero** additional work versus the pre-M3 control
+    // flow — that's what guarantees the "behavior unchanged for
+    // production traffic" exit criterion.
+    //
+    // Why we re-evaluate `is_allowed` here instead of recovering it
+    // from `legacy_result`: the wrapper above mixes in audit-mode
+    // softening and `runAsElevated` gating, neither of which the
+    // canonical matcher knows about. Comparing the wrapper output to
+    // the canonical matcher would produce phantom divergences. So we
+    // call the rules-only predicate twice in shadow/enforce — once
+    // through the Authorizer, once explicitly for the shadow
+    // comparison — accepting one extra matcher invocation per request
+    // only in non-production modes.
+    let mode = config::get_canonical_request_mode();
+    if mode != CanonicalMode::Off {
+        if let Some(rules) = access_control_rules.as_ref() {
+            let legacy_allowed = rules.is_allowed(logger, request_uri.clone(), claims.clone());
+            let _canon = rules.shadow_compare(
+                logger,
+                &request_uri,
+                &request_method,
+                &claims,
+                legacy_allowed,
+                mode,
+            );
+            // Enforce-mode *behavior* is deliberately deferred to
+            // M5/M6 — see the design doc §9.3. In M3 Enforce only
+            // surfaces telemetry; it does NOT yet replace the legacy
+            // decision. A one-shot warning above (during config load)
+            // tells operators the same.
+        }
+    }
+    legacy_result
 }
 
 #[cfg(test)]
diff --git a/proxy_agent/src/proxy/proxy_connection.rs b/proxy_agent/src/proxy/proxy_connection.rs
index a810c77a..20fe40db 100644
--- a/proxy_agent/src/proxy/proxy_connection.rs
+++ b/proxy_agent/src/proxy/proxy_connection.rs
@@ -282,8 +282,19 @@ impl HttpConnectionContext {
         hyper_client::should_skip_sig(&self.method, &self.url)
     }
 
+    /// Pre-canonical defense-in-depth guard. Returns `true` if the request
+    /// path contains a pattern commonly used to bypass prefix-based
+    /// authorization rules. See [`path_has_traversal`] for the exact set.
+    /// This is checked in `handle_new_http_request` before any rule lookup
+    /// so suspicious paths short-circuit to 403 without ever reaching the
+    /// matcher or the upstream.
+    ///
+    /// Once the canonical pipeline graduates to enforce mode (M5), the
+    /// `PathUnderflow` / slash-collapsing / encoded-dot handling in
+    /// `crate::proxy::canonical::path` subsumes this check entirely and
+    /// this method can be removed.
     pub fn contains_traversal_characters(&self) -> bool {
-        self.url.path().contains("..")
+        path_has_traversal(self.url.path())
     }
 
     pub fn log(&mut self, logger_level: LoggerLevel, message: String) {
@@ -393,3 +404,126 @@ impl Clone for ConnectionLogger {
         }
     }
 }
+
+/// Returns `true` if `raw_path` (the path component of a request URI in
+/// its on-wire, *not yet percent-decoded* form) contains a pattern that
+/// is commonly used to bypass prefix-based authorization rules.
+///
+/// Caught by this function (rejected with 404 by the caller):
+///   * `..` — literal dot-dot anywhere in the path
+///   * `%2E%2E`, `%2e%2e`, and mixed-case (`%2E%2e`, `%2e%2E`) — caught
+///     after a single percent-decode pass
+///   * `.%2e`, `%2E.`, etc. — same single-decode pass collapses them
+///     into a literal `..`
+///   * `//` — two or more consecutive slashes (multi-slash bypass).
+///     Legacy prefix-matching treats `/foo//bar` as a distinct path
+///     from `/foo/bar`; reject the raw form so attackers cannot pivot
+///     past a privilege whose path string lacks the extra slash.
+///
+/// INTENTIONALLY NOT rejected here (the canonical pipeline in
+/// [`crate::proxy::canonical::path`] handles these and will subsume this
+/// guard entirely once it graduates to enforce mode in M5):
+///   * single `.` segments — valid in real paths like `/.well-known/*`
+///   * `;` matrix parameters — sub-delim that is legal inside a segment
+///   * non-ASCII / Unicode-confusable bytes — caught by `reject_non_ascii`
+///   * embedded `?` after percent-decode — caught by canonical's EmbQ check
+///   * trailing-slash variation — clamped by canonical's normalizer
+///
+/// Decoding uses `decode_utf8_lossy`: any invalid-UTF-8 percent
+/// sequences are replaced by U+FFFD (which does not contain `..`), and
+/// truly malformed sequences are passed through unchanged. This keeps
+/// the check fail-open for the rare valid non-ASCII path while staying
+/// safe for the traversal cases that matter.
+fn path_has_traversal(raw_path: &str) -> bool {
+    if raw_path.contains("//") {
+        return true;
+    }
+    let decoded = percent_encoding::percent_decode_str(raw_path).decode_utf8_lossy();
+    decoded.contains("..")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::path_has_traversal;
+
+    #[test]
+    fn clean_paths_are_not_traversal() {
+        for p in [
+            "/",
+            "/metadata/instance",
+            "/metadata/identity/oauth2/token",
+            "/machine/?comp=goalstate",
+            "/.well-known/foo",
+            "/metadata/instance?api-version=2021-02-01",
+        ] {
+            assert!(
+                !path_has_traversal(p),
+                "expected clean path, got traversal: {p:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn literal_dot_dot_is_traversal() {
+        for p in [
+            "/foo/../bar",
+            "/metadata/./identity/../identity/oauth2/token",
+            "/..",
+            "../etc/passwd",
+        ] {
+            assert!(path_has_traversal(p), "expected traversal: {p:?}");
+        }
+    }
+
+    #[test]
+    fn percent_encoded_dot_dot_is_traversal() {
+        // Uppercase, lowercase, and mixed-case percent encodings must all
+        // be caught — they all decode to a literal `..` in a single pass.
+        for p in [
+            "/foo/%2E%2E/bar",
+            "/foo/%2e%2e/bar",
+            "/foo/%2E%2e/bar",
+            "/foo/%2e%2E/bar",
+            "/foo/.%2e/bar",
+            "/foo/%2E./bar",
+            "/metadata/%2E/identity/%2E%2E/identity/oauth2/token",
+        ] {
+            assert!(
+                path_has_traversal(p),
+                "expected percent-encoded traversal: {p:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn multi_slash_is_traversal() {
+        for p in [
+            "//metadata/instance",
+            "/metadata//instance",
+            "/metadata///identity//oauth2//token",
+            "//",
+        ] {
+            assert!(
+                path_has_traversal(p),
+                "expected multi-slash traversal: {p:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn single_dot_and_matrix_params_are_not_traversal_here() {
+        // Single `.` segments and matrix params (`;`) are not traversal
+        // markers and must pass through this pre-canonical guard. The
+        // canonical pipeline applies its own normalization in M5.
+        for p in [
+            "/metadata/./instance",
+            "/metadata/instance;jsessionid=abc",
+            "/.well-known/openid-configuration",
+        ] {
+            assert!(
+                !path_has_traversal(p),
+                "expected non-traversal (deferred to canonical): {p:?}"
+            );
+        }
+    }
+}
diff --git a/proxy_agent/src/proxy/proxy_server.rs b/proxy_agent/src/proxy/proxy_server.rs
index 9cd2258d..be8fc2e7 100644
--- a/proxy_agent/src/proxy/proxy_server.rs
+++ b/proxy_agent/src/proxy/proxy_server.rs
@@ -404,15 +404,15 @@ impl ProxyServer {
         }
 
         if http_connection_context.contains_traversal_characters() {
-            // If the proxied request contains traversal characters, we will return 404 Not Found to avoid potential security issues.
+            // If the proxied request contains traversal characters, we will return 403 Forbidden to avoid potential security issues.
             self.log_connection_summary(
                 &mut http_connection_context,
-                StatusCode::NOT_FOUND,
+                StatusCode::FORBIDDEN,
                 false,
-                "Traversal characters found in the request, return NOT_FOUND!".to_string(),
+                "Traversal characters found in the request, return FORBIDDEN!".to_string(),
             )
             .await;
-            return Ok(Self::closed_response(StatusCode::NOT_FOUND));
+            return Ok(Self::closed_response(StatusCode::FORBIDDEN));
         }
 
         if http_connection_context.url == provision::provision_query::PROVISION_URL_PATH {
@@ -1176,9 +1176,9 @@ mod tests {
             .await
             .unwrap();
         assert_eq!(
-            http::StatusCode::NOT_FOUND,
+            http::StatusCode::FORBIDDEN,
             response.status(),
-            "response.status must be NOT_FOUND."
+            "response.status must be FORBIDDEN."
         );
 
         // test large request body