|
| 1 | +import { test, expect, beforeEach, afterEach } from "vitest"; |
| 2 | +import { buildStatsFromDatabase } from "./build-stats.ts"; |
| 3 | +import { connectToSource } from "./sql/postgresjs.ts"; |
| 4 | +import { Connectable } from "./sync/connectable.ts"; |
| 5 | +import { |
| 6 | + IndexOptimizer, |
| 7 | + PostgresQueryBuilder, |
| 8 | + Statistics, |
| 9 | + type Postgres, |
| 10 | +} from "@query-doctor/core"; |
| 11 | + |
| 12 | +const TEST_DB = "querydoctor_test"; |
| 13 | +const PG_URL = "postgresql://localhost:5432"; |
| 14 | + |
| 15 | +let db: Postgres; |
| 16 | + |
| 17 | +async function execOnAdmin(sql: string) { |
| 18 | + const admin = connectToSource(Connectable.fromString(`${PG_URL}/postgres`)); |
| 19 | + try { |
| 20 | + await admin.exec(sql); |
| 21 | + } finally { |
| 22 | + await (admin as unknown as { close(): Promise<void> }).close(); |
| 23 | + } |
| 24 | +} |
| 25 | + |
| 26 | +beforeEach(async () => { |
| 27 | + await execOnAdmin(`DROP DATABASE IF EXISTS ${TEST_DB}`); |
| 28 | + await execOnAdmin(`CREATE DATABASE ${TEST_DB}`); |
| 29 | + db = connectToSource(Connectable.fromString(`${PG_URL}/${TEST_DB}`)); |
| 30 | +}); |
| 31 | + |
| 32 | +afterEach(async () => { |
| 33 | + await (db as unknown as { close(): Promise<void> }).close(); |
| 34 | + await execOnAdmin(`DROP DATABASE IF EXISTS ${TEST_DB}`); |
| 35 | +}); |
| 36 | + |
| 37 | +test("sets reltuples to 10,000 for tables below threshold, preserves real relpages", async () => { |
| 38 | + await db.exec(` |
| 39 | + CREATE TABLE users(id serial PRIMARY KEY, name text, email text); |
| 40 | + CREATE INDEX users_email_idx ON users(email); |
| 41 | + INSERT INTO users (name, email) |
| 42 | + SELECT 'user_' || i, 'user_' || i || '@example.com' |
| 43 | + FROM generate_series(1, 1000) AS i; |
| 44 | + ANALYZE; |
| 45 | + `); |
| 46 | + |
| 47 | + const mode = await buildStatsFromDatabase(db); |
| 48 | + |
| 49 | + expect(mode.kind).toBe("fromStatisticsExport"); |
| 50 | + if (mode.kind !== "fromStatisticsExport") throw new Error("unreachable"); |
| 51 | + |
| 52 | + const usersStats = mode.stats.find((s) => s.tableName === "users"); |
| 53 | + expect(usersStats).toBeDefined(); |
| 54 | + // 1000 rows is below the 5,000 threshold → bumped to 10,000 |
| 55 | + expect(usersStats!.reltuples).toBe(10_000); |
| 56 | + // 1000 rows should produce more than 1 page |
| 57 | + expect(usersStats!.relpages).toBeGreaterThan(1); |
| 58 | + |
| 59 | + // Verify indexes are included |
| 60 | + const emailIdx = usersStats!.indexes.find( |
| 61 | + (i) => i.indexName === "users_email_idx", |
| 62 | + ); |
| 63 | + expect(emailIdx).toBeDefined(); |
| 64 | + expect(emailIdx!.relpages).toBeGreaterThanOrEqual(1); |
| 65 | +}); |
| 66 | + |
| 67 | +test("clamps relpages to at least 1 for empty tables", async () => { |
| 68 | + await db.exec(` |
| 69 | + CREATE TABLE empty_table(id serial PRIMARY KEY, data text); |
| 70 | + ANALYZE; |
| 71 | + `); |
| 72 | + |
| 73 | + const mode = await buildStatsFromDatabase(db); |
| 74 | + if (mode.kind !== "fromStatisticsExport") throw new Error("unreachable"); |
| 75 | + |
| 76 | + const stats = mode.stats.find((s) => s.tableName === "empty_table"); |
| 77 | + expect(stats).toBeDefined(); |
| 78 | + expect(stats!.reltuples).toBe(10_000); |
| 79 | + expect(stats!.relpages).toBeGreaterThanOrEqual(1); |
| 80 | +}); |
| 81 | + |
| 82 | +test("density stays realistic regardless of actual row count", async () => { |
| 83 | + // This is the core bug: with fromAssumption(reltuples=10000, relpages=1), |
| 84 | + // PostgreSQL calculates estimated_tuples = actual_pages * 10000 / 1, |
| 85 | + // inflating estimates proportionally to actual data volume. |
| 86 | + // |
| 87 | + // buildStatsFromDatabase fixes this by using the real relpages so that |
| 88 | + // estimated_tuples = actual_pages * 10000 / actual_relpages ≈ 10000. |
| 89 | + await db.exec(` |
| 90 | + CREATE TABLE orders(id serial PRIMARY KEY, user_id int, total numeric); |
| 91 | + CREATE INDEX orders_user_id_idx ON orders(user_id); |
| 92 | + INSERT INTO orders (user_id, total) |
| 93 | + SELECT (random() * 1000)::int, random() * 100 |
| 94 | + FROM generate_series(1, 10000); |
| 95 | + ANALYZE; |
| 96 | + `); |
| 97 | + |
| 98 | + const mode = await buildStatsFromDatabase(db); |
| 99 | + if (mode.kind !== "fromStatisticsExport") throw new Error("unreachable"); |
| 100 | + |
| 101 | + const ordersStats = mode.stats.find((s) => s.tableName === "orders"); |
| 102 | + expect(ordersStats).toBeDefined(); |
| 103 | + |
| 104 | + // The key invariant: reltuples / relpages should give a reasonable |
| 105 | + // density, NOT the broken 10000/1 = 10000 tuples-per-page. |
| 106 | + const density = ordersStats!.reltuples / ordersStats!.relpages; |
| 107 | + // Real density for a table with int+int+numeric columns is roughly |
| 108 | + // 50-200 tuples per page. The override should preserve this ratio. |
| 109 | + expect(density).toBeLessThan(500); |
| 110 | + expect(density).toBeGreaterThan(10); |
| 111 | +}); |
| 112 | + |
| 113 | +test("groups indexes by their parent table", async () => { |
| 114 | + await db.exec(` |
| 115 | + CREATE TABLE products(id serial PRIMARY KEY, name text, price numeric); |
| 116 | + CREATE INDEX products_name_idx ON products(name); |
| 117 | + CREATE INDEX products_price_idx ON products(price); |
| 118 | + CREATE TABLE categories(id serial PRIMARY KEY, label text); |
| 119 | + ANALYZE; |
| 120 | + `); |
| 121 | + |
| 122 | + const mode = await buildStatsFromDatabase(db); |
| 123 | + if (mode.kind !== "fromStatisticsExport") throw new Error("unreachable"); |
| 124 | + |
| 125 | + const products = mode.stats.find((s) => s.tableName === "products"); |
| 126 | + expect(products).toBeDefined(); |
| 127 | + const indexNames = products!.indexes.map((i) => i.indexName).sort(); |
| 128 | + expect(indexNames).toContain("products_name_idx"); |
| 129 | + expect(indexNames).toContain("products_price_idx"); |
| 130 | + expect(indexNames).toContain("products_pkey"); |
| 131 | + |
| 132 | + const categories = mode.stats.find((s) => s.tableName === "categories"); |
| 133 | + expect(categories).toBeDefined(); |
| 134 | + const catIndexNames = categories!.indexes.map((i) => i.indexName); |
| 135 | + expect(catIndexNames).toContain("categories_pkey"); |
| 136 | + expect(catIndexNames).not.toContain("products_name_idx"); |
| 137 | +}); |
| 138 | + |
| 139 | +test("planner estimates 10,000 rows with only 1 row seeded", async () => { |
| 140 | + // This is the end-to-end proof: seed 1 row, run ANALYZE, |
| 141 | + // build stats, feed them through core's restoreStats + EXPLAIN, |
| 142 | + // and verify the planner sees ~10,000 rows — not 1. |
| 143 | + await db.exec(` |
| 144 | + CREATE TABLE widgets(id serial PRIMARY KEY, user_id uuid, name text); |
| 145 | + INSERT INTO widgets (user_id, name) VALUES ('00000000-0000-0000-0000-000000000001', 'w1'); |
| 146 | + ANALYZE; |
| 147 | + `); |
| 148 | + |
| 149 | + const mode = await buildStatsFromDatabase(db); |
| 150 | + const stats = await Statistics.fromPostgres(db, mode); |
| 151 | + const existingIndexes = await stats.getExistingIndexes(); |
| 152 | + const optimizer = new IndexOptimizer(db, stats, existingIndexes); |
| 153 | + |
| 154 | + const builder = new PostgresQueryBuilder("SELECT * FROM widgets"); |
| 155 | + const plan = await optimizer.testQueryWithStats(builder); |
| 156 | + |
| 157 | + // The planner's "Plan Rows" should be exactly 10,000 — NOT 1. |
| 158 | + const estimatedRows = plan.Plan["Plan Rows"]; |
| 159 | + expect(estimatedRows).toBe(10_000); |
| 160 | +}); |
| 161 | + |
| 162 | +test("planner estimates 10,000 rows with 10,000 rows seeded", async () => { |
| 163 | + // Same test but with 10,000 actual rows — the estimate should be |
| 164 | + // the same, proving the stats override works regardless of actual data. |
| 165 | + await db.exec(` |
| 166 | + CREATE TABLE widgets(id serial PRIMARY KEY, user_id uuid, name text); |
| 167 | + INSERT INTO widgets (user_id, name) |
| 168 | + SELECT gen_random_uuid(), 'widget_' || i |
| 169 | + FROM generate_series(1, 10000) AS i; |
| 170 | + ANALYZE; |
| 171 | + `); |
| 172 | + |
| 173 | + const mode = await buildStatsFromDatabase(db); |
| 174 | + const stats = await Statistics.fromPostgres(db, mode); |
| 175 | + const existingIndexes = await stats.getExistingIndexes(); |
| 176 | + const optimizer = new IndexOptimizer(db, stats, existingIndexes); |
| 177 | + |
| 178 | + const builder = new PostgresQueryBuilder("SELECT * FROM widgets"); |
| 179 | + const plan = await optimizer.testQueryWithStats(builder); |
| 180 | + |
| 181 | + const estimatedRows = plan.Plan["Plan Rows"]; |
| 182 | + expect(estimatedRows).toBe(10_000); |
| 183 | +}); |
| 184 | + |
| 185 | +test("planner estimates 10,000 rows even with 50,000 rows seeded", async () => { |
| 186 | + await db.exec(` |
| 187 | + CREATE TABLE widgets(id serial PRIMARY KEY, user_id uuid, name text); |
| 188 | + INSERT INTO widgets (user_id, name) |
| 189 | + SELECT gen_random_uuid(), 'widget_' || i |
| 190 | + FROM generate_series(1, 50000) AS i; |
| 191 | + ANALYZE; |
| 192 | + `); |
| 193 | + |
| 194 | + const mode = await buildStatsFromDatabase(db); |
| 195 | + const stats = await Statistics.fromPostgres(db, mode); |
| 196 | + const existingIndexes = await stats.getExistingIndexes(); |
| 197 | + const optimizer = new IndexOptimizer(db, stats, existingIndexes); |
| 198 | + |
| 199 | + const builder = new PostgresQueryBuilder("SELECT * FROM widgets"); |
| 200 | + const plan = await optimizer.testQueryWithStats(builder); |
| 201 | + |
| 202 | + const estimatedRows = plan.Plan["Plan Rows"]; |
| 203 | + expect(estimatedRows).toBe(10_000); |
| 204 | +}); |
| 205 | + |
| 206 | +test("BUG: fromAssumption(relpages=1) inflates estimates with real data", async () => { |
| 207 | + // Demonstrates the bug in core's fromAssumption mode. |
| 208 | + // With 10,000 rows seeded (~74 real pages), the planner calculates: |
| 209 | + // estimated_tuples = actual_pages × reltuples ÷ relpages |
| 210 | + // = 74 × 10,000 ÷ 1 = 740,000 |
| 211 | + // The estimate is wildly inflated — 74x the correct value. |
| 212 | + await db.exec(` |
| 213 | + CREATE TABLE widgets(id serial PRIMARY KEY, user_id uuid, name text); |
| 214 | + INSERT INTO widgets (user_id, name) |
| 215 | + SELECT gen_random_uuid(), 'widget_' || i |
| 216 | + FROM generate_series(1, 10000) AS i; |
| 217 | + ANALYZE; |
| 218 | + `); |
| 219 | + |
| 220 | + const brokenMode = Statistics.defaultStatsMode; // fromAssumption(10000, 1) |
| 221 | + const stats = await Statistics.fromPostgres(db, brokenMode); |
| 222 | + const existingIndexes = await stats.getExistingIndexes(); |
| 223 | + const optimizer = new IndexOptimizer(db, stats, existingIndexes); |
| 224 | + |
| 225 | + const builder = new PostgresQueryBuilder("SELECT * FROM widgets"); |
| 226 | + const plan = await optimizer.testQueryWithStats(builder); |
| 227 | + |
| 228 | + const estimatedRows = plan.Plan["Plan Rows"]; |
| 229 | + // With the bug, this is ~740,000 — NOT 10,000. |
| 230 | + expect(estimatedRows).toBeGreaterThan(100_000); |
| 231 | +}); |
| 232 | + |
| 233 | +test("leaves columns null so ANALYZE pg_statistic entries persist", async () => { |
| 234 | + await db.exec(` |
| 235 | + CREATE TABLE items(id serial PRIMARY KEY, label text); |
| 236 | + INSERT INTO items (label) SELECT 'item_' || i FROM generate_series(1, 100) AS i; |
| 237 | + ANALYZE; |
| 238 | + `); |
| 239 | + |
| 240 | + const mode = await buildStatsFromDatabase(db); |
| 241 | + if (mode.kind !== "fromStatisticsExport") throw new Error("unreachable"); |
| 242 | + |
| 243 | + const items = mode.stats.find((s) => s.tableName === "items"); |
| 244 | + expect(items).toBeDefined(); |
| 245 | + // columns must be null — core's restoreStats only overwrites pg_statistic |
| 246 | + // when columns are provided. Leaving them null means the ANALYZE-populated |
| 247 | + // statistics persist across the rolled-back transaction. |
| 248 | + expect(items!.columns).toBeNull(); |
| 249 | +}); |
0 commit comments