From 51a1c99a93f3d8dabee977e24b4867e2c55e216b Mon Sep 17 00:00:00 2001 From: Raymond Jacobson Date: Fri, 8 May 2026 16:47:45 -0700 Subject: [PATCH] [Perf] Feed query: LATERAL + OFFSET 0 fence + per-followee cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The feed query had a ~10x planner-cliff for some users: identical SQL took 125ms for one user with 1752 follows but 9-18s for another user with 1816 follows. Cause: stale n_distinct stats on follows.follower_user_id make Postgres estimate follow_set at ~17,290 rows when actual is <2,000 — for the unlucky users it flips from a sane nested-loop plan to "materialize all 2M reposts of the past year, merge-join, then hash-join 1.4M tracks." Three changes hold the planner to nested-loop semantics: 1. follow_set CTE marked MATERIALIZED so its row count is fixed downstream rather than re-estimated through inlining. 2. Each branch joins follow_set via CROSS JOIN LATERAL with an OFFSET 0 fence inside the lateral subquery — this is the well- known optimization barrier that prevents Postgres from flattening the lateral back into a merge-join. 3. Per-followee LIMIT 100 (50 for owned playlists) caps the cost for users whose followees are very active. The outer query takes only the top-@limit by created_at, so reposts/tracks past the per-followee top-100 can never reach the response anyway. Verified end-to-end against the prod read replica via local server: user 20 (1752 follows) 500-750ms -> 280-300ms warm user 222 (1820 follows) ~4.5s -> 1.3-1.5s (3x) user 755516 (1816 follows) 9-18s -> 640-700ms (~20x) Existing TestUsersFeed regression covers the entity-type branches and the no-followees empty case; full ./api/... suite is green. --- api/v1_users_feed.go | 134 +++++++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 50 deletions(-) diff --git a/api/v1_users_feed.go b/api/v1_users_feed.go index 788bd184..770c80a5 100644 --- a/api/v1_users_feed.go +++ b/api/v1_users_feed.go @@ -33,9 +33,20 @@ func (app *ApiServer) v1UsersFeed(c *fiber.Ctx) error { return err } + // follow_set is MATERIALIZED + each branch is a LATERAL with an OFFSET 0 + // optimization fence. Without this, Postgres mis-estimates follow_set + // cardinality for some users (stale n_distinct stats on + // follows.follower_user_id) and flips from a sane nested-loop plan to + // "materialize all 2M reposts of the past year, then merge-join," + // turning a sub-second feed into a 9-18 second feed. + // + // Per-followee LIMIT 100 (50 for playlists) caps the cost when a + // followee is very active. The outer query takes only the top-@limit + // by created_at, so any reposts/tracks past the per-followee top-100 + // can never reach the response anyway. sql := ` WITH - follow_set AS ( + follow_set AS MATERIALIZED ( SELECT followee_user_id AS user_id FROM follows WHERE @@ -50,27 +61,30 @@ func (app *ApiServer) v1UsersFeed(c *fiber.Ctx) error { ), history as ( - -- Track-type reposts. Splitting from playlist-type reposts so each - -- branch can use a per-row JOIN against the entity instead of forcing - -- the planner to hash every public playlist (~94k rows) just to filter - -- a handful of repost rows. + -- Track-type reposts. ( SELECT 'track' as entity_type, - repost_item_id as entity_id, - min(reposts.created_at) as created_at - FROM reposts - JOIN follow_set using (user_id) - JOIN tracks ON repost_item_id = tracks.track_id + r.repost_item_id as entity_id, + min(r.created_at) as created_at + FROM follow_set fs + CROSS JOIN LATERAL ( + SELECT repost_item_id, created_at + FROM reposts + WHERE reposts.user_id = fs.user_id + AND reposts.repost_type = 'track' + AND reposts.created_at < @before + AND reposts.created_at >= @before - INTERVAL '1 YEAR' + AND reposts.is_delete = false + ORDER BY created_at DESC + LIMIT 100 + OFFSET 0 + ) r + JOIN tracks ON r.repost_item_id = tracks.track_id AND tracks.is_delete = false AND tracks.is_unlisted = false AND tracks.is_available = true - WHERE - @filter in ('all', 'repost') - AND reposts.repost_type = 'track' - AND reposts.created_at < @before - AND reposts.created_at >= @before - INTERVAL '1 YEAR' - AND reposts.is_delete = false + WHERE @filter in ('all', 'repost') GROUP BY entity_id ) @@ -79,20 +93,26 @@ func (app *ApiServer) v1UsersFeed(c *fiber.Ctx) error { -- Playlist/album-type reposts. ( SELECT - reposts.repost_type::text as entity_type, - repost_item_id as entity_id, - min(reposts.created_at) as created_at - FROM reposts - JOIN follow_set using (user_id) - JOIN playlists ON repost_item_id = playlists.playlist_id + r.repost_type::text as entity_type, + r.repost_item_id as entity_id, + min(r.created_at) as created_at + FROM follow_set fs + CROSS JOIN LATERAL ( + SELECT repost_type, repost_item_id, created_at + FROM reposts + WHERE reposts.user_id = fs.user_id + AND reposts.repost_type <> 'track' + AND reposts.created_at < @before + AND reposts.created_at >= @before - INTERVAL '1 YEAR' + AND reposts.is_delete = false + ORDER BY created_at DESC + LIMIT 100 + OFFSET 0 + ) r + JOIN playlists ON r.repost_item_id = playlists.playlist_id AND playlists.is_delete = false AND playlists.is_private = false - WHERE - @filter in ('all', 'repost') - AND reposts.repost_type <> 'track' - AND reposts.created_at < @before - AND reposts.created_at >= @before - INTERVAL '1 YEAR' - AND reposts.is_delete = false + WHERE @filter in ('all', 'repost') GROUP BY entity_type, entity_id ) @@ -101,19 +121,26 @@ func (app *ApiServer) v1UsersFeed(c *fiber.Ctx) error { ( SELECT 'track' as entity_type, - track_id as entity_id, - created_at - from tracks - join follow_set on owner_id = user_id - where @filter in ('all', 'original') - AND created_at < @before - AND created_at >= @before::timestamp - INTERVAL '1 YEAR' - AND is_unlisted = false - AND is_delete = false - AND stem_of is null - AND (access_authorities IS NULL - OR (COALESCE(@authed_wallet, '') <> '' - AND EXISTS (SELECT 1 FROM unnest(access_authorities) aa WHERE lower(aa) = lower(@authed_wallet)))) + t.track_id as entity_id, + t.created_at + FROM follow_set fs + CROSS JOIN LATERAL ( + SELECT track_id, created_at + FROM tracks + WHERE owner_id = fs.user_id + AND created_at < @before + AND created_at >= @before::timestamp - INTERVAL '1 YEAR' + AND is_unlisted = false + AND is_delete = false + AND stem_of IS NULL + AND (access_authorities IS NULL + OR (COALESCE(@authed_wallet, '') <> '' + AND EXISTS (SELECT 1 FROM unnest(access_authorities) aa WHERE lower(aa) = lower(@authed_wallet)))) + ORDER BY created_at DESC + LIMIT 100 + OFFSET 0 + ) t + WHERE @filter in ('all', 'original') ) UNION ALL @@ -121,15 +148,22 @@ func (app *ApiServer) v1UsersFeed(c *fiber.Ctx) error { ( SELECT 'playlist' as entity_type, - playlist_id as entity_id, - created_at - from playlists - join follow_set on playlist_owner_id = user_id - where @filter in ('all', 'original') - AND created_at < @before - AND created_at >= @before - INTERVAL '1 YEAR' - AND is_delete = false - AND is_private = false + p.playlist_id as entity_id, + p.created_at + FROM follow_set fs + CROSS JOIN LATERAL ( + SELECT playlist_id, created_at + FROM playlists + WHERE playlist_owner_id = fs.user_id + AND created_at < @before + AND created_at >= @before - INTERVAL '1 YEAR' + AND is_delete = false + AND is_private = false + ORDER BY created_at DESC + LIMIT 50 + OFFSET 0 + ) p + WHERE @filter in ('all', 'original') ) )