From 0fbc28860cea67de87d59182976e6d90db984f29 Mon Sep 17 00:00:00 2001 From: lzeligman-mark43 Date: Thu, 21 May 2026 13:05:33 -0400 Subject: [PATCH 1/2] fix: URLSearchParams refactor broke valid urlencoded webhooks; drop redundant decodeURIComponent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current `URL_ENCODED` branch of `parseRequestBody` has two bugs that combine to make every valid urlencoded GitHub webhook fail: 1. `bodySchema.parse(payloadParam)` is called with a `string | null` returned by `URLSearchParams.get("payload")`, but `bodySchema` is `z.object({ payload: z.string() })` — i.e. it expects an *object* with a `payload` key. zod throws `Invalid input: expected object, received string` on every valid payload, so the function returns `undefined`. The pre-existing fixture test (`fixtures/invalid-payload-urlencoded.txt`) hides this because it only exercises an invalid payload that's expected to be rejected with 403. 2. `JSON.parse(decodeURIComponent(payload))` calls `decodeURIComponent` a second time on a value `URLSearchParams.get` has already URL-decoded. This throws `URIError: URI malformed` whenever the decoded JSON contains a literal `%` that isn't part of a valid `%XX` escape — which is common in real PR titles, commit messages, and comments ("30% threshold", "set %USERPROFILE% to ~", "WHERE x LIKE '%foo%'", "printf(\"%s\", val)"). Fixes: - Wrap the `URLSearchParams.get("payload")` value into the `{ payload: string }` shape the schema expects, so schema validation actually runs against valid payloads. - Drop the redundant `decodeURIComponent(payload)` — `URLSearchParams.get` already URL-decodes once, which is exactly what GitHub's single-URL-encoded webhook body needs. Tests: 9 new cases in `lambda/proxy.test.ts` cover the percent-character regression patterns plus a control and a `%20`-preservation case, exercised both end-to-end through the handler and directly against `parseRequestBody`. --- lambda/parse-request-body.ts | 17 +++++-- lambda/proxy.test.ts | 96 ++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 3 deletions(-) diff --git a/lambda/parse-request-body.ts b/lambda/parse-request-body.ts index 08b9ac5..83aa722 100644 --- a/lambda/parse-request-body.ts +++ b/lambda/parse-request-body.ts @@ -27,9 +27,20 @@ export function parseRequestBody( return JSON.parse(body); case CONTENT_TYPES.URL_ENCODED: const params = new URLSearchParams(body); - const payloadParam = params.get("payload"); - const { payload } = bodySchema.parse(payloadParam); - return JSON.parse(decodeURIComponent(payload)); + // bodySchema is `z.object({ payload: z.string() })`, so wrap the + // URLSearchParams.get() value (a `string | null`) in that shape + // before validating; passing the raw string failed schema parsing + // and broke every valid urlencoded webhook. + const { payload } = bodySchema.parse({ + payload: params.get("payload"), + }); + // URLSearchParams.get already URL-decodes the value, so `payload` + // is already the JSON string sent by GitHub. A second + // decodeURIComponent would throw URIError on any bare '%' that + // appears as literal user content (e.g. "30% threshold" in a PR + // title) and is unnecessary because GitHub webhooks are + // single-URL-encoded per the form-urlencoded spec. + return JSON.parse(payload); } } catch (error) { console.error(`Error parsing request body: ${error}`); diff --git a/lambda/proxy.test.ts b/lambda/proxy.test.ts index 52c7b0d..d6a528a 100644 --- a/lambda/proxy.test.ts +++ b/lambda/proxy.test.ts @@ -12,6 +12,7 @@ limitations under the License. */ import { handler } from "./proxy"; +import { parseRequestBody } from "./parse-request-body"; import type { AxiosRequestHeaders, AxiosResponse } from "axios"; import { readFileSync } from "fs"; import { Agent } from "https"; @@ -318,4 +319,99 @@ describe("proxy", () => { }); expect(axiosPostMock).toHaveBeenCalled(); }); + + it("should forward a urlencoded webhook whose JSON contains literal '%' characters", async () => { + // Regression: bare '%' in PR titles / commit messages / comments (e.g. + // "30% threshold") previously caused parseRequestBody to throw URIError + // because of a redundant decodeURIComponent(payload) after + // URLSearchParams.get() had already URL-decoded the value. + const payloadWithPercent = { + ...VALID_PUSH_PAYLOAD, + head_commit: { + ...(VALID_PUSH_PAYLOAD as any).head_commit, + message: + "Roll out 30% threshold; reference %USERPROFILE% on Windows; WHERE x LIKE '%foo%'", + }, + }; + const urlencodedBody = + "payload=" + encodeURIComponent(JSON.stringify(payloadWithPercent)); + const destinationUrl = "https://approved.host/github-webhook/"; + const endpointId = encodeURIComponent(destinationUrl); + const event: APIGatewayProxyWithLambdaAuthorizerEvent = { + ...baseEvent, + headers: { + ...baseEvent.headers, + "content-type": "application/x-www-form-urlencoded", + }, + body: urlencodedBody, + pathParameters: { endpointId }, + }; + const result = await handler(event); + expect(result).toEqual(expectedResponseObject); + expect(axiosPostMock).toHaveBeenCalled(); + }); +}); + +describe("parseRequestBody — urlencoded payloads with literal '%' characters", () => { + // The URL_ENCODED branch used to call JSON.parse(decodeURIComponent(payload)). + // URLSearchParams.get() already URL-decodes once, so the second decode would + // throw URIError: URI malformed on any string whose decoded form contains a + // bare '%' not followed by two hex digits — which is common in real PR + // titles, bodies, and comments. These tests pin the fix. + const headers = { "content-type": "application/x-www-form-urlencoded" }; + + const cases: Array<{ label: string; userContent: string }> = [ + { label: "percentage in PR title", userContent: "30% threshold rollout" }, + { label: "trailing percent", userContent: "rate: 5%" }, + { + label: "Windows env var reference", + userContent: "set %USERPROFILE% to ~", + }, + { + label: "SQL LIKE wildcard", + userContent: "WHERE name LIKE '%foo%' AND status = 1", + }, + { + label: "C-style format string", + userContent: 'printf("%s\\n", val);', + }, + { label: "Go format verb", userContent: 'log.Printf("%v", obj)' }, + { + label: "bare % followed by non-hex", + userContent: "look here: %g and %h", + }, + ]; + + for (const c of cases) { + it(`parses a webhook whose JSON contains literal '%' (${c.label})`, () => { + const json = JSON.stringify({ + action: "opened", + pull_request: { title: c.userContent, body: c.userContent }, + }); + const body = "payload=" + encodeURIComponent(json); + const result = parseRequestBody(body, headers); + expect(result).toBeDefined(); + expect((result as any).pull_request.title).toBe(c.userContent); + }); + } + + it("still parses payloads with no '%' characters (control)", () => { + const json = JSON.stringify({ action: "opened", number: 42 }); + const body = "payload=" + encodeURIComponent(json); + const result = parseRequestBody(body, headers); + expect(result).toBeDefined(); + expect((result as any).number).toBe(42); + }); + + it("correctly preserves valid percent-escapes (%20 → space) inside JSON string values", () => { + // A PR description that documents URL encoding by writing '%20' literally + // should reach the parsed JSON value as the literal string '%20', NOT as + // a decoded space. URLSearchParams.get() decodes the OUTER encoding once; + // characters inside the JSON string value are preserved verbatim. + const literalText = "encoded as %20 example"; + const json = JSON.stringify({ comment: { body: literalText } }); + const body = "payload=" + encodeURIComponent(json); + const result = parseRequestBody(body, headers); + expect((result as any).comment.body).toBe(literalText); + }); }); From 550d1c5e5d8001d14656497a377f49aed9436311 Mon Sep 17 00:00:00 2001 From: Dan Adajian Date: Tue, 26 May 2026 12:45:54 -0500 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Dan Adajian --- lambda/parse-request-body.ts | 10 ---------- lambda/proxy.test.ts | 13 ------------- 2 files changed, 23 deletions(-) diff --git a/lambda/parse-request-body.ts b/lambda/parse-request-body.ts index 83aa722..39b9ab9 100644 --- a/lambda/parse-request-body.ts +++ b/lambda/parse-request-body.ts @@ -27,19 +27,9 @@ export function parseRequestBody( return JSON.parse(body); case CONTENT_TYPES.URL_ENCODED: const params = new URLSearchParams(body); - // bodySchema is `z.object({ payload: z.string() })`, so wrap the - // URLSearchParams.get() value (a `string | null`) in that shape - // before validating; passing the raw string failed schema parsing - // and broke every valid urlencoded webhook. const { payload } = bodySchema.parse({ payload: params.get("payload"), }); - // URLSearchParams.get already URL-decodes the value, so `payload` - // is already the JSON string sent by GitHub. A second - // decodeURIComponent would throw URIError on any bare '%' that - // appears as literal user content (e.g. "30% threshold" in a PR - // title) and is unnecessary because GitHub webhooks are - // single-URL-encoded per the form-urlencoded spec. return JSON.parse(payload); } } catch (error) { diff --git a/lambda/proxy.test.ts b/lambda/proxy.test.ts index d6a528a..733fa50 100644 --- a/lambda/proxy.test.ts +++ b/lambda/proxy.test.ts @@ -321,10 +321,6 @@ describe("proxy", () => { }); it("should forward a urlencoded webhook whose JSON contains literal '%' characters", async () => { - // Regression: bare '%' in PR titles / commit messages / comments (e.g. - // "30% threshold") previously caused parseRequestBody to throw URIError - // because of a redundant decodeURIComponent(payload) after - // URLSearchParams.get() had already URL-decoded the value. const payloadWithPercent = { ...VALID_PUSH_PAYLOAD, head_commit: { @@ -353,11 +349,6 @@ describe("proxy", () => { }); describe("parseRequestBody — urlencoded payloads with literal '%' characters", () => { - // The URL_ENCODED branch used to call JSON.parse(decodeURIComponent(payload)). - // URLSearchParams.get() already URL-decodes once, so the second decode would - // throw URIError: URI malformed on any string whose decoded form contains a - // bare '%' not followed by two hex digits — which is common in real PR - // titles, bodies, and comments. These tests pin the fix. const headers = { "content-type": "application/x-www-form-urlencoded" }; const cases: Array<{ label: string; userContent: string }> = [ @@ -404,10 +395,6 @@ describe("parseRequestBody — urlencoded payloads with literal '%' characters", }); it("correctly preserves valid percent-escapes (%20 → space) inside JSON string values", () => { - // A PR description that documents URL encoding by writing '%20' literally - // should reach the parsed JSON value as the literal string '%20', NOT as - // a decoded space. URLSearchParams.get() decodes the OUTER encoding once; - // characters inside the JSON string value are preserved verbatim. const literalText = "encoded as %20 example"; const json = JSON.stringify({ comment: { body: literalText } }); const body = "payload=" + encodeURIComponent(json);