Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/postgres-compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ why.
| date / time / timestamp / timestamptz / interval | ✅ | `types_test.go::TestTypesDateTime`; server `TestEncode/DecodeDate/Timestamp/Time/Interval` | incl. ancient dates, microseconds |
| boolean | ✅ | `types_test.go::TestTypesBoolean`; server `TestEncode/DecodeBool` | all literal forms (t/f/yes/no/1/0) |
| uuid | ✅ | `types_test.go::TestTypesUUID`; server `TestEncodeDecodeUUID` | |
| json / jsonb | ✅ | `types_test.go::TestTypesJSON`; server `TestEncodeJSON`/`TestEncodeBinaryJSON` | operators `-> ->> @> ?`; JSONPath `@?` skipped |
| json / jsonb | ✅ | `types_test.go::TestTypesJSON`; server `TestEncodeJSON`/`TestEncodeBinaryJSON`; server `TestJSONExtractDollarKey_RoundTrip`/`_ParameterRoundTrip` | operators `-> ->> @> ?`; `$`-prefixed property keys (`$ai_session_id`, `$group_0`) are rewritten to a quoted-member JSONPath so DuckDB looks them up literally instead of failing as a malformed path (literal keys statically, bound params — including `::text`-cast ones — via the type-aware `duckgres_json_extract_path` macro, which still array-indexes integer params); a `$.`/`$[`-prefixed key navigates as a DuckDB JSONPath even via `->>` (deliberate divergence from PG literal-key semantics, see `normalizeJSONPathKey`); JSONPath `@?` skipped |
| arrays | ✅ | `types_test.go::TestTypesArray` | subscript, slice, concat, contains, ANY/ALL |
| NULL handling (all types) | ✅ | `types_test.go::TestTypesNullHandling` | |
| Casts (`CAST`, `::`, implicit) | ✅ | `types_test.go::TestTypesCasting` | |
Expand Down
33 changes: 33 additions & 0 deletions server/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -1535,6 +1535,39 @@ func initUtilityMacros(db *sql.DB, serverStartTime, processStartTime time.Time,
// In standalone mode this equals control_plane_version(). During rolling updates
// these may differ if the control plane has been upgraded but workers haven't yet.
fmt.Sprintf(`CREATE OR REPLACE MACRO worker_version() AS '%s'`, strings.ReplaceAll(processVersion, "'", "''")),

// duckgres_json_extract_path - normalize a JSON-extraction key/path at
// runtime so a Postgres property key like "$ai_session_id" or "$group_0"
// is not mis-parsed by DuckDB as a (malformed) JSONPath and rejected at
// bind time ("JSON path error near ..."). The transpiler wraps the path
// argument of json_extract[_string] in this macro when it is a bound
// parameter ($N) whose value is unknown at transpile time (literal string
// keys are rewritten statically by normalizeJSONPathKey in
// transpiler/transform/operators.go; the string rules below MUST match it).
//
// Because the bound value's TYPE is only known at execute time, the macro
// dispatches on typeof(k):
// - NULL -> NULL
// - integer type -> $[k] (Postgres `json -> int` / `->> int`
// array indexing; a bare integer would also
// index, but every CASE branch must return
// one type, so we emit a VARCHAR path)
// - "$." / "$[" prefix -> unchanged (already a valid JSONPath)
// - other "$"-prefixed key -> $."<key>" (escape \ and " for the
// quoted member)
// - plain key -> unchanged (DuckDB looks it up literally;
// a string "0" stays an object-key lookup,
// matching Postgres text-vs-int semantics)
// The string branches cast k to VARCHAR so LIKE type-checks for every bound
// type (an integer k would otherwise fail to bind the `~~`/LIKE operator).
`CREATE OR REPLACE MACRO duckgres_json_extract_path(k) AS (
CASE
WHEN k IS NULL THEN NULL
WHEN typeof(k) IN ('TINYINT','SMALLINT','INTEGER','BIGINT','HUGEINT','UTINYINT','USMALLINT','UINTEGER','UBIGINT','UHUGEINT') THEN '$[' || k::VARCHAR || ']'
WHEN k::VARCHAR LIKE '$.%' OR k::VARCHAR LIKE '$[%' THEN k::VARCHAR
WHEN k::VARCHAR LIKE '$%' THEN '$."' || replace(replace(k::VARCHAR, '\', '\\'), '"', '\"') || '"'
ELSE k::VARCHAR
END)`,
}

for _, m := range macros {
Expand Down
175 changes: 175 additions & 0 deletions server/json_extract_path_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
package server

import (
"database/sql"
"testing"

"github.com/posthog/duckgres/transpiler"
)

// TestJSONExtractDollarKey_RoundTrip is the end-to-end regression test for the
// production "Binder Error: JSON path error near 'ai_session_id'" failures:
// PostHog/HogQL property keys that begin with '$' ($ai_session_id, $group_0)
// must extract their value instead of being mis-parsed by DuckDB as a malformed
// JSONPath. These cases transpile through the full pipeline and execute against
// a real in-memory DuckDB seeded with initPgCatalog (which registers the
// duckgres_json_extract_path macro), asserting the actual extracted value.
func TestJSONExtractDollarKey_RoundTrip(t *testing.T) {
runTransformCases(t, []transformCase{
{
// The exact broken shape: ->> with a '$'-prefixed literal key.
name: "dollar key via ->> extracts the value",
query: `SELECT ('{"$ai_session_id":"sess_abc"}'::json)->>'$ai_session_id'`,
want: "sess_abc",
},
{
// Direct json_extract_string(...) with a '$'-prefixed literal key —
// the function-call form clients also send.
name: "dollar key via direct json_extract_string extracts the value",
query: `SELECT json_extract_string('{"$group_0":"team_42"}', '$group_0')`,
want: "team_42",
},
{
// Chained arrows with '$'-prefixed keys at every step.
name: "chained dollar keys extract the nested value",
query: `SELECT ('{"$a":{"$b":"deep"}}'::json)->'$a'->>'$b'`,
want: "deep",
},
{
// Regression guard: a plain key must still be a literal-key lookup.
name: "plain key is unaffected",
query: `SELECT json_extract_string('{"plain":"ok"}', 'plain')`,
want: "ok",
},
{
// A '$.'/'$['-prefixed argument is a valid DuckDB JSONPath, so it is
// left to navigate. For a direct json_extract[_string] call this is
// unambiguously correct — the client wrote a DuckDB function and a
// DuckDB path.
name: "valid JSONPath in a direct call navigates",
query: `SELECT json_extract_string('{"a":{"b":"nested"}}', '$.a.b')`,
want: "nested",
},
{
// KNOWN, DELIBERATE DIVERGENCE: in PostgreSQL `data ->> '$.a.b'` means
// the literal key named "$.a.b" (-> NULL here), but we keep DuckDB
// JSONPath semantics for '$.'/'$['-prefixed arrow keys too — consistent
// with direct calls and with the pre-existing #639 transpiler test. This
// only affects keys that are *also* valid JSONPaths; the reported bug
// keys ($ai_session_id, $group_0 — no dot) are unaffected and HogQL does
// not emit dotted JSONPath property keys. See normalizeJSONPathKey.
name: "dotted JSONPath via arrow navigates (documented divergence)",
query: `SELECT ('{"a":{"b":"nested"}}'::json)->>'$.a.b'`,
want: "nested",
},
})
}

// TestJSONExtractDollarKey_ParameterRoundTrip covers the parameterized form of
// the production bug, where the JSON key arrives as a bound parameter ($1) whose
// value is unknown at transpile time. The transpiler wraps the path argument in
// the duckgres_json_extract_path() macro; here we bind '$ai_session_id' at
// execute time and confirm DuckDB returns the value instead of failing at bind
// time. A normal key bound to the same statement must still work, and an
// ordinary (non-path) string parameter must be untouched.
func TestJSONExtractDollarKey_ParameterRoundTrip(t *testing.T) {
db, err := sql.Open("duckdb", ":memory:")
if err != nil {
t.Fatalf("open duckdb: %v", err)
}
defer func() { _ = db.Close() }()
// initPgCatalog registers initUtilityMacros, including duckgres_json_extract_path.
if err := initPgCatalog(db, processStartTime, processStartTime, "dev", "dev"); err != nil {
t.Fatalf("initPgCatalog: %v", err)
}
// ConvertPlaceholders mirrors the extended-query protocol path that carries
// bound parameters.
tr := transpiler.New(transpiler.Config{ConvertPlaceholders: true})

const doc = `{"$ai_session_id":"sess_from_param","normal":"plain_value"}`
const arrDoc = `["zero","one","two"]`

cases := []struct {
name string
query string
arg any
want string
}{
{
name: "parameter holding a dollar key extracts the value (the prod bug)",
query: `SELECT json_extract_string('` + doc + `'::json, $1)`,
arg: "$ai_session_id",
want: "sess_from_param",
},
{
name: "same statement with a normal key still works",
query: `SELECT json_extract_string('` + doc + `'::json, $1)`,
arg: "normal",
want: "plain_value",
},
{
name: "arrow ->> with a parameter dollar key extracts the value",
query: `SELECT ('` + doc + `'::json)->>$1`,
arg: "$ai_session_id",
want: "sess_from_param",
},
{
// P2 regression: a path param wrapped in an explicit ::text cast (added
// by some drivers) must still resolve, not bypass normalization.
name: "parameter dollar key with a ::text cast extracts the value",
query: `SELECT json_extract_string('` + doc + `'::json, $1::text)`,
arg: "$ai_session_id",
want: "sess_from_param",
},
{
// P2 regression: an INTEGER-bound parameter must still index the array
// (Postgres `json ->> int`). The type-aware macro emits a $[i] path
// rather than mangling the integer into a string key.
name: "integer parameter indexes the array (arrow)",
query: `SELECT ('` + arrDoc + `'::json)->>$1`,
arg: 2,
want: "two",
},
{
name: "integer parameter indexes the array (direct call)",
query: `SELECT json_extract_string('` + arrDoc + `'::json, $1)`,
arg: int64(0),
want: "zero",
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
res, err := tr.Transpile(tc.query)
if err != nil {
t.Fatalf("transpile %q: %v", tc.query, err)
}
if res.ParamCount != 1 {
t.Fatalf("transpile %q: ParamCount = %d, want 1", tc.query, res.ParamCount)
}
var got sql.NullString
if err := db.QueryRow(res.SQL, tc.arg).Scan(&got); err != nil {
t.Fatalf("exec %q (transpiled %q) arg=%v: %v", tc.query, res.SQL, tc.arg, err)
}
if !got.Valid || got.String != tc.want {
t.Fatalf("%q arg=%v = %v, want %q (transpiled %q)", tc.query, tc.arg, got, tc.want, res.SQL)
}
})
}

// An ordinary string parameter (not a JSON path argument) must be passed
// through verbatim — the fix must not globally rewrite string parameters.
t.Run("ordinary string parameter is not wrapped", func(t *testing.T) {
res, err := tr.Transpile("SELECT $1::varchar")
if err != nil {
t.Fatalf("transpile: %v", err)
}
var got sql.NullString
if err := db.QueryRow(res.SQL, "$ai_session_id").Scan(&got); err != nil {
t.Fatalf("exec (transpiled %q): %v", res.SQL, err)
}
if !got.Valid || got.String != "$ai_session_id" {
t.Fatalf("ordinary param = %v, want %q (transpiled %q)", got, "$ai_session_id", res.SQL)
}
})
}
18 changes: 18 additions & 0 deletions tests/e2e-mw-dev/harness.sh
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,24 @@ pg_compat_functions() { # org password
assert_compat "$1" "$2" ducklake "SELECT ('{\"a\":1,\"b\":2}'::jsonb @> '{\"a\":1}'::jsonb)::int::text" "1" "jsonb_contains"
# #>> jsonpath text extraction.
assert_compat "$1" "$2" ducklake "SELECT '{\"a\":{\"b\":1}}'::json #>> '{a,b}'" "1" "jsonb_path_text"
# '$'-prefixed property keys ($ai_session_id, $group_0): DuckDB reads a
# '$'-prefixed json_extract path as a (malformed) JSONPath and fails at bind
# time ("JSON path error near ..."); the transpiler rewrites such literal keys
# to the quoted-member form $."<key>". Regression for the prod
# json_extract_string failures. Both the ->> arrow and the direct
# json_extract_string(...) call shapes (clients send both) are exercised.
assert_compat "$1" "$2" ducklake "SELECT '{\"\$ai_session_id\":\"sess9\"}'::json ->> '\$ai_session_id'" "sess9" "json_dollar_key_arrow"
assert_compat "$1" "$2" ducklake "SELECT json_extract_string('{\"\$group_0\":\"team7\"}', '\$group_0')" "team7" "json_dollar_key_func"
# duckgres_json_extract_path is the runtime normalization macro the transpiler
# wraps around bound-parameter ($N) JSON paths (whose value is unknown at
# transpile time). Calling it directly asserts it is registered AND
# memory.main-qualified on the live DuckLake worker — the path a parameterized
# `json_extract_string(props, $1)` from a HogQL client takes.
assert_compat "$1" "$2" ducklake "SELECT json_extract_string('{\"\$ai_session_id\":\"viaMacro\"}', duckgres_json_extract_path('\$ai_session_id'))" "viaMacro" "json_dollar_key_macro"
# The macro is type-aware: an integer path argument (Postgres `json ->> int`
# array indexing) becomes a $[i] JSONPath rather than being mangled into a
# string key. Guards the parameterized-array-index path on the live worker.
assert_compat "$1" "$2" ducklake "SELECT json_extract_string('[\"a\",\"b\",\"c\"]', duckgres_json_extract_path(2))" "c" "json_int_index_macro"
# PG curly-brace array literal cast.
assert_compat "$1" "$2" ducklake "SELECT array_length('{1,2,3}'::int[],1)::text" "3" "array_literal_cast"
# set-returning table macro in FROM position (memory.main-qualified).
Expand Down
Loading
Loading