From 0fbc28860cea67de87d59182976e6d90db984f29 Mon Sep 17 00:00:00 2001
From: lzeligman-mark43 <lance.zeligman@mark43.com>
Date: Thu, 21 May 2026 13:05:33 -0400
Subject: [PATCH 1/2] fix: URLSearchParams refactor broke valid urlencoded
 webhooks; drop redundant decodeURIComponent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current `URL_ENCODED` branch of `parseRequestBody` has two bugs that combine to make every valid urlencoded GitHub webhook fail:

1. `bodySchema.parse(payloadParam)` is called with a `string | null` returned by `URLSearchParams.get("payload")`, but `bodySchema` is `z.object({ payload: z.string() })` — i.e. it expects an *object* with a `payload` key. zod throws `Invalid input: expected object, received string` on every valid payload, so the function returns `undefined`. The pre-existing fixture test (`fixtures/invalid-payload-urlencoded.txt`) hides this because it only exercises an invalid payload that's expected to be rejected with 403.

2. `JSON.parse(decodeURIComponent(payload))` calls `decodeURIComponent` a second time on a value `URLSearchParams.get` has already URL-decoded. This throws `URIError: URI malformed` whenever the decoded JSON contains a literal `%` that isn't part of a valid `%XX` escape — which is common in real PR titles, commit messages, and comments ("30% threshold", "set %USERPROFILE% to ~", "WHERE x LIKE '%foo%'", "printf(\"%s\", val)").

Fixes:

- Wrap the `URLSearchParams.get("payload")` value into the `{ payload: string }` shape the schema expects, so schema validation actually runs against valid payloads.
- Drop the redundant `decodeURIComponent(payload)` — `URLSearchParams.get` already URL-decodes once, which is exactly what GitHub's single-URL-encoded webhook body needs.

Tests: 9 new cases in `lambda/proxy.test.ts` cover the percent-character regression patterns plus a control and a `%20`-preservation case, exercised both end-to-end through the handler and directly against `parseRequestBody`.
---
 lambda/parse-request-body.ts | 17 +++++--
 lambda/proxy.test.ts         | 96 ++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/lambda/parse-request-body.ts b/lambda/parse-request-body.ts
index 08b9ac5..83aa722 100644
--- a/lambda/parse-request-body.ts
+++ b/lambda/parse-request-body.ts
@@ -27,9 +27,20 @@ export function parseRequestBody(
         return JSON.parse(body);
       case CONTENT_TYPES.URL_ENCODED:
         const params = new URLSearchParams(body);
-        const payloadParam = params.get("payload");
-        const { payload } = bodySchema.parse(payloadParam);
-        return JSON.parse(decodeURIComponent(payload));
+        // bodySchema is `z.object({ payload: z.string() })`, so wrap the
+        // URLSearchParams.get() value (a `string | null`) in that shape
+        // before validating; passing the raw string failed schema parsing
+        // and broke every valid urlencoded webhook.
+        const { payload } = bodySchema.parse({
+          payload: params.get("payload"),
+        });
+        // URLSearchParams.get already URL-decodes the value, so `payload`
+        // is already the JSON string sent by GitHub. A second
+        // decodeURIComponent would throw URIError on any bare '%' that
+        // appears as literal user content (e.g. "30% threshold" in a PR
+        // title) and is unnecessary because GitHub webhooks are
+        // single-URL-encoded per the form-urlencoded spec.
+        return JSON.parse(payload);
     }
   } catch (error) {
     console.error(`Error parsing request body: ${error}`);
diff --git a/lambda/proxy.test.ts b/lambda/proxy.test.ts
index 52c7b0d..d6a528a 100644
--- a/lambda/proxy.test.ts
+++ b/lambda/proxy.test.ts
@@ -12,6 +12,7 @@ limitations under the License.
 */
 
 import { handler } from "./proxy";
+import { parseRequestBody } from "./parse-request-body";
 import type { AxiosRequestHeaders, AxiosResponse } from "axios";
 import { readFileSync } from "fs";
 import { Agent } from "https";
@@ -318,4 +319,99 @@ describe("proxy", () => {
     });
     expect(axiosPostMock).toHaveBeenCalled();
   });
+
+  it("should forward a urlencoded webhook whose JSON contains literal '%' characters", async () => {
+    // Regression: bare '%' in PR titles / commit messages / comments (e.g.
+    // "30% threshold") previously caused parseRequestBody to throw URIError
+    // because of a redundant decodeURIComponent(payload) after
+    // URLSearchParams.get() had already URL-decoded the value.
+    const payloadWithPercent = {
+      ...VALID_PUSH_PAYLOAD,
+      head_commit: {
+        ...(VALID_PUSH_PAYLOAD as any).head_commit,
+        message:
+          "Roll out 30% threshold; reference %USERPROFILE% on Windows; WHERE x LIKE '%foo%'",
+      },
+    };
+    const urlencodedBody =
+      "payload=" + encodeURIComponent(JSON.stringify(payloadWithPercent));
+    const destinationUrl = "https://approved.host/github-webhook/";
+    const endpointId = encodeURIComponent(destinationUrl);
+    const event: APIGatewayProxyWithLambdaAuthorizerEvent<any> = {
+      ...baseEvent,
+      headers: {
+        ...baseEvent.headers,
+        "content-type": "application/x-www-form-urlencoded",
+      },
+      body: urlencodedBody,
+      pathParameters: { endpointId },
+    };
+    const result = await handler(event);
+    expect(result).toEqual(expectedResponseObject);
+    expect(axiosPostMock).toHaveBeenCalled();
+  });
+});
+
+describe("parseRequestBody — urlencoded payloads with literal '%' characters", () => {
+  // The URL_ENCODED branch used to call JSON.parse(decodeURIComponent(payload)).
+  // URLSearchParams.get() already URL-decodes once, so the second decode would
+  // throw URIError: URI malformed on any string whose decoded form contains a
+  // bare '%' not followed by two hex digits — which is common in real PR
+  // titles, bodies, and comments. These tests pin the fix.
+  const headers = { "content-type": "application/x-www-form-urlencoded" };
+
+  const cases: Array<{ label: string; userContent: string }> = [
+    { label: "percentage in PR title", userContent: "30% threshold rollout" },
+    { label: "trailing percent", userContent: "rate: 5%" },
+    {
+      label: "Windows env var reference",
+      userContent: "set %USERPROFILE% to ~",
+    },
+    {
+      label: "SQL LIKE wildcard",
+      userContent: "WHERE name LIKE '%foo%' AND status = 1",
+    },
+    {
+      label: "C-style format string",
+      userContent: 'printf("%s\\n", val);',
+    },
+    { label: "Go format verb", userContent: 'log.Printf("%v", obj)' },
+    {
+      label: "bare % followed by non-hex",
+      userContent: "look here: %g and %h",
+    },
+  ];
+
+  for (const c of cases) {
+    it(`parses a webhook whose JSON contains literal '%' (${c.label})`, () => {
+      const json = JSON.stringify({
+        action: "opened",
+        pull_request: { title: c.userContent, body: c.userContent },
+      });
+      const body = "payload=" + encodeURIComponent(json);
+      const result = parseRequestBody(body, headers);
+      expect(result).toBeDefined();
+      expect((result as any).pull_request.title).toBe(c.userContent);
+    });
+  }
+
+  it("still parses payloads with no '%' characters (control)", () => {
+    const json = JSON.stringify({ action: "opened", number: 42 });
+    const body = "payload=" + encodeURIComponent(json);
+    const result = parseRequestBody(body, headers);
+    expect(result).toBeDefined();
+    expect((result as any).number).toBe(42);
+  });
+
+  it("correctly preserves valid percent-escapes (%20 → space) inside JSON string values", () => {
+    // A PR description that documents URL encoding by writing '%20' literally
+    // should reach the parsed JSON value as the literal string '%20', NOT as
+    // a decoded space. URLSearchParams.get() decodes the OUTER encoding once;
+    // characters inside the JSON string value are preserved verbatim.
+    const literalText = "encoded as %20 example";
+    const json = JSON.stringify({ comment: { body: literalText } });
+    const body = "payload=" + encodeURIComponent(json);
+    const result = parseRequestBody(body, headers);
+    expect((result as any).comment.body).toBe(literalText);
+  });
 });

From 550d1c5e5d8001d14656497a377f49aed9436311 Mon Sep 17 00:00:00 2001
From: Dan Adajian <danadajian@gmail.com>
Date: Tue, 26 May 2026 12:45:54 -0500
Subject: [PATCH 2/2] Apply suggestions from code review

Co-authored-by: Dan Adajian <danadajian@gmail.com>
---
 lambda/parse-request-body.ts | 10 ----------
 lambda/proxy.test.ts         | 13 -------------
 2 files changed, 23 deletions(-)

diff --git a/lambda/parse-request-body.ts b/lambda/parse-request-body.ts
index 83aa722..39b9ab9 100644
--- a/lambda/parse-request-body.ts
+++ b/lambda/parse-request-body.ts
@@ -27,19 +27,9 @@ export function parseRequestBody(
         return JSON.parse(body);
       case CONTENT_TYPES.URL_ENCODED:
         const params = new URLSearchParams(body);
-        // bodySchema is `z.object({ payload: z.string() })`, so wrap the
-        // URLSearchParams.get() value (a `string | null`) in that shape
-        // before validating; passing the raw string failed schema parsing
-        // and broke every valid urlencoded webhook.
         const { payload } = bodySchema.parse({
           payload: params.get("payload"),
         });
-        // URLSearchParams.get already URL-decodes the value, so `payload`
-        // is already the JSON string sent by GitHub. A second
-        // decodeURIComponent would throw URIError on any bare '%' that
-        // appears as literal user content (e.g. "30% threshold" in a PR
-        // title) and is unnecessary because GitHub webhooks are
-        // single-URL-encoded per the form-urlencoded spec.
         return JSON.parse(payload);
     }
   } catch (error) {
diff --git a/lambda/proxy.test.ts b/lambda/proxy.test.ts
index d6a528a..733fa50 100644
--- a/lambda/proxy.test.ts
+++ b/lambda/proxy.test.ts
@@ -321,10 +321,6 @@ describe("proxy", () => {
   });
 
   it("should forward a urlencoded webhook whose JSON contains literal '%' characters", async () => {
-    // Regression: bare '%' in PR titles / commit messages / comments (e.g.
-    // "30% threshold") previously caused parseRequestBody to throw URIError
-    // because of a redundant decodeURIComponent(payload) after
-    // URLSearchParams.get() had already URL-decoded the value.
     const payloadWithPercent = {
       ...VALID_PUSH_PAYLOAD,
       head_commit: {
@@ -353,11 +349,6 @@ describe("proxy", () => {
 });
 
 describe("parseRequestBody — urlencoded payloads with literal '%' characters", () => {
-  // The URL_ENCODED branch used to call JSON.parse(decodeURIComponent(payload)).
-  // URLSearchParams.get() already URL-decodes once, so the second decode would
-  // throw URIError: URI malformed on any string whose decoded form contains a
-  // bare '%' not followed by two hex digits — which is common in real PR
-  // titles, bodies, and comments. These tests pin the fix.
   const headers = { "content-type": "application/x-www-form-urlencoded" };
 
   const cases: Array<{ label: string; userContent: string }> = [
@@ -404,10 +395,6 @@ describe("parseRequestBody — urlencoded payloads with literal '%' characters",
   });
 
   it("correctly preserves valid percent-escapes (%20 → space) inside JSON string values", () => {
-    // A PR description that documents URL encoding by writing '%20' literally
-    // should reach the parsed JSON value as the literal string '%20', NOT as
-    // a decoded space. URLSearchParams.get() decodes the OUTER encoding once;
-    // characters inside the JSON string value are preserved verbatim.
     const literalText = "encoded as %20 example";
     const json = JSON.stringify({ comment: { body: literalText } });
     const body = "payload=" + encodeURIComponent(json);