From f17353f78f4071ef7a65a82859f2f4038790fd5b Mon Sep 17 00:00:00 2001 From: karczuRF Date: Mon, 8 Jun 2026 13:27:53 +0200 Subject: [PATCH 01/17] chore(lore-0038): convert task to directory form Prep for adding a notes/ subdir. Rename README + shift relative links one level deeper to match the new path depth. --- .../README.md} | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) rename lore/1-tasks/active/{0038_FEATURE_prices-ledger-processor-lambda.md => 0038_FEATURE_prices-ledger-processor-lambda/README.md} (93%) diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md similarity index 93% rename from lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda.md rename to lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md index f545a1c..9b711e9 100644 --- a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda.md +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md @@ -8,17 +8,17 @@ related_tasks: ["0011", "0037", "0045", "0047", "0048"] tags: [layer-indexing, priority-high, effort-large, milestone-M1, stream-1, lambda, ingestion, rust, aws, clickhouse, hetzner] milestone: 1 links: - - "../../../docs/prices-api-general-overview.md" - - "../../2-adrs/0001_stream1-clickhouse-sourced-amm-backfill.md" - - "../../2-adrs/0003_price-ohlcv-pk-includes-quote-asset-id.md" - - "../../2-adrs/0004_price-ohlcv-multi-source-merge-columns.md" - - "../../2-adrs/0005_stream2-sdex-local-workstation-backfill.md" - - "../../2-adrs/0006_runtime-framework-rust-axum.md" - - "../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" - - "../archive/0045_RESEARCH_cross-team-bundle-with-be-on-hetzner-ch-tenancy/notes/G-be-agreement-record.md" - - "../backlog/0011_FEATURE_bootstrap-cdk-with-ssm-platform-lookups.md" - - "../backlog/0037_FEATURE_tranche1-ledger-processor-skeleton.md" - - "../backlog/0047_RESEARCH_cross-tenant-throughput-verification-on-shared-hetzner-ch.md" + - "../../../../docs/prices-api-general-overview.md" + - "../../../2-adrs/0001_stream1-clickhouse-sourced-amm-backfill.md" + - "../../../2-adrs/0003_price-ohlcv-pk-includes-quote-asset-id.md" + - "../../../2-adrs/0004_price-ohlcv-multi-source-merge-columns.md" + - "../../../2-adrs/0005_stream2-sdex-local-workstation-backfill.md" + - "../../../2-adrs/0006_runtime-framework-rust-axum.md" + - "../../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" + - "../../archive/0045_RESEARCH_cross-team-bundle-with-be-on-hetzner-ch-tenancy/notes/G-be-agreement-record.md" + - "../../archive/0011_FEATURE_bootstrap-cdk-with-ssm-platform-lookups.md" + - "../../archive/0037_FEATURE_tranche1-ledger-processor-skeleton.md" + - "../../backlog/0047_RESEARCH_cross-tenant-throughput-verification-on-shared-hetzner-ch.md" history: - date: 2026-05-18 status: backlog From 1137464f5a8a0fe39050cbc18eff9d19e8ea9778 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Mon, 8 Jun 2026 13:28:02 +0200 Subject: [PATCH 02/17] docs(lore-0038): draft local-prototype spec for BE meeting G-note describing the local-only Lambda binary scope (Part A), explicit non-goals (Part B), the cross-team contract agenda for the BE conversation (Part C), open questions (Part D), and the production-rewrite punch list once the engineering gates (BE 0227 mTLS endpoint, task 0047 throughput verification) clear (Part E). --- .../notes/G-local-prototype-spec.md | 517 ++++++++++++++++++ 1 file changed, 517 insertions(+) create mode 100644 lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md new file mode 100644 index 0000000..d67af5f --- /dev/null +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md @@ -0,0 +1,517 @@ +--- +id: "G-local-prototype-spec" +title: "Local-prototype scope + BE cross-team contract for the Prices Ledger Processor Lambda" +type: G +task: "0038" +status: developing +spawned_from: [] +spawns: [] +related_notes: [] +links: + - "../../../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" + - "../../../../2-adrs/0006_runtime-framework-rust-axum.md" + - "../../../../2-adrs/0005_stream2-sdex-local-workstation-backfill.md" + - "../../../../2-adrs/0003_price-ohlcv-pk-includes-quote-asset-id.md" + - "../../../../2-adrs/0004_price-ohlcv-multi-source-merge-columns.md" + - "../../../archive/0048_RESEARCH_soroban-events-pricing-decoder-spec/notes/G-soroban-events-pricing-decoder.md" + - "../../../archive/0045_RESEARCH_cross-team-bundle-with-be-on-hetzner-ch-tenancy/notes/G-be-agreement-record.md" + - "../../../archive/0037_FEATURE_tranche1-ledger-processor-skeleton.md" + - "../../../backlog/0047_RESEARCH_cross-tenant-throughput-verification-on-shared-hetzner-ch.md" + - "../../../../../docs/prices-api-general-overview.md" +--- + +# Local-prototype spec + BE cross-team contract + +> **Audience:** prices-api implementer (Part A), BE team reviewers (Part C). +> **Status:** draft for cross-team discussion. +> **Why this note exists:** task 0038's 2026-06-08 activation history +> entry promised a "local-only binary + design document" deliverable +> while the original engineering blockers (BE 0227 mTLS endpoint, +> task 0047 cross-tenant throughput) remain open. This document is +> that design. + +--- + +## 0. TL;DR + +We are building a **local-only** Rust Lambda binary that exercises +the live-ingestion path end-to-end against recorded fixtures — +S3 event → XDR decode → `dispatch()` → 1-min OHLCV bucketing → +**stub sink** (stdout / file emit). It does NOT deploy to AWS, does +NOT register on BE's S3 bucket, does NOT write to Hetzner ClickHouse. +The prototype's value is twofold: + +1. **De-risk the binary shape** — prove the kernel from task 0037 + composes correctly with `lambda_runtime`, `aws_sdk_s3` (mocked at + the trait boundary), and the `xdr-parser` decode crate. +2. **Ground the BE meeting** — Part C of this note is the concrete + list of cross-team commitments the production Lambda needs. + Giving BE a runnable binary + a written contract is cheaper than + asking for those commitments in the abstract. + +When the gating events clear (BE 0227 ships; task 0047 verifies +throughput GREEN/YELLOW), the prototype's interior is reused; +only the sink, the S3 client, and the CDK packaging change. + +--- + +## Part A — Local prototype scope + +### A.1 What the binary does + +A single Rust binary, `prices-ledger-processor`, that on each +invocation: + +1. Accepts an `aws_lambda_events::s3::S3Event` JSON document on + stdin (when run via `cargo lambda invoke`) or on `--event ` + (when run via `cargo run`). +2. For each record in the event, **fetches the referenced object** + via an `ObjectFetcher` trait — wired in prototype mode to a + local-disk implementation that maps `s3://bucket/key` to + `fixtures/`. +3. **zstd-decompresses** the bytes (Galexie output is `*.xdr.zstd` + per general-overview §5.1). +4. **Decodes** the bytes as `LedgerCloseMeta` via the BE-authored + `xdr-parser` crate (ADR 0005 §3, ADR 0006 §Decision). +5. **Normalizes** Soroban contract events into the + `SorobanEventRow` shape consumed by `dispatch()` (the kernel + from task 0037), grouped by `(transaction_id, contract_id)`. +6. **Calls** `ledger_processor::dispatch::dispatch(&rows, &venue_registry, &phoenix_registry)` + and collects the returned `TradeRow` set. +7. **Buckets** trades into 1-minute OHLCV candles in-process per + the merge formula from ADR 0004 §Decision (preserve `open`, + overwrite `close`, `GREATEST(high)`, `LEAST(low)`, sum + `volume_base`/`volume_quote_usd`/`trade_count`, recompute `vwap`). +8. **Writes** to a stub sink (see A.7) — no network egress. + +### A.2 Workspace placement + +``` +packages/ +├── extractors-core/ # existing (from 0037) +├── ledger-processor/ # existing (from 0037) +├── phoenix-extractor/ # existing (from 0037) +├── soroswap-extractor/ # existing (from 0037, stub) +├── aquarius-extractor/ # existing (from 0037, stub) +├── sdex-backfill/ # existing +└── prices-ledger-processor/ # NEW — this prototype + ├── Cargo.toml + ├── src/ + │ ├── main.rs # lambda_runtime entrypoint + │ ├── handler.rs # S3Event → Vec + │ ├── decode.rs # xdr-parser → SorobanEventRow + │ ├── bucket.rs # 1-min OHLCV merge (ADR 0004) + │ ├── sink/ # writer abstraction + │ │ ├── mod.rs # trait `OhlcvSink` + │ │ ├── stdout.rs # JSON-lines to stdout + │ │ └── sql_file.rs # ALTER-friendly SQL dump + │ └── object_fetcher/ # input abstraction + │ ├── mod.rs # trait `ObjectFetcher` + │ └── local_disk.rs # `fixtures/` mapping + ├── fixtures/ # gitignored sample LedgerCloseMeta + └── tests/ + └── e2e_fixture.rs # one ledger end-to-end test +``` + +The two trait boundaries (`ObjectFetcher`, `OhlcvSink`) are +deliberate seams: production swaps `local_disk` for `aws_sdk_s3` +and `stdout` for a ClickHouse `clickhouse::Client`. Everything +else stays. + +### A.3 Inputs — fixture, not S3 + +For the prototype, fixtures come from BE's existing +`stellar-ledger-data/` bucket layout — we copy a handful of +`*.xdr.zstd` files locally, plus a matching `S3Event` JSON +mocked from CloudTrail format. Concretely: + +``` +packages/prices-ledger-processor/fixtures/ +├── events/ +│ ├── single-soroban-swap.json # 1 record, 1 Phoenix swap +│ ├── multi-swap-batch.json # 1 record, 4 swaps mixed venues +│ └── empty-ledger.json # 1 record, no swaps (negative test) +└── ledgers/ + ├── 62019999.xdr.zstd # known-Phoenix-swap ledger + ├── 62020247.xdr.zstd # known multi-venue ledger + └── 62079982.xdr.zstd # known empty ledger +``` + +Fixture ledgers are picked from the 10k uniform sample analysed +in task 0046 / 0048 — same evidence base as the decoder spec, so +expected outputs are pre-known. + +### A.4 Decode boundary — `xdr-parser` + +The `xdr-parser` BE-authored crate is consumed as a `git`-source +Cargo dependency per ADR 0005 §3: + +```toml +[dependencies] +xdr-parser = { git = "ssh://git@github.com/rumblefishdev/soroban-block-explorer.git", branch = "main", package = "xdr-parser" } +``` + +**Open question for BE (C.4):** the production form needs a +**pinned tag** (e.g. `xdr-parser-v0.4.2`), not a moving `main`. +BE owns the release cadence. The prototype can ride `main` for +now; the production rewrite cannot. + +What we need out of the crate: + +- `LedgerCloseMeta::decode(&[u8]) -> Result` +- A walk of `SorobanTransactionMeta.events` that yields + `(transaction_id, contract_id, event_index, topics, data)` + tuples (already implemented in BE's local Ledger Processor — + we want the same path exposed as a library function). + +If BE has not yet exposed that walk as a library API (it may live +inside their Lambda binary today), C.4 asks them to lift it. + +### A.5 Kernel boundary — `dispatch()` + +The prototype does NOT re-implement extraction. It calls the +existing kernel surface from task 0037: + +```rust +use ledger_processor::dispatch::dispatch; +use extractors_core::{SorobanEventRow, TradeRow, VenueRegistry}; +use phoenix_extractor::PhoenixPoolRegistry; + +let trades: Vec = dispatch(&rows, &venue_registry, &phoenix_registry)?; +``` + +Today the kernel implements Phoenix XYK only. Soroswap and +Aquarius extractors return `VenueNotImplemented`. The prototype +tolerates that error variant — it counts those rows in a +`unimplemented_venue` metric and continues, exactly like the +production Lambda should once those extractors land. + +**Implication for the BE meeting:** Soroswap and Aquarius live +ingestion is **gated on extractor work that is NOT part of this +task** (separate FEATURE tasks, not yet spawned). The Lambda +shape is complete without them; the venues just yield empty +output until their extractors arrive. + +### A.6 OHLCV bucketing + +In-process, no DB round-trip. Pseudocode: + +```rust +let bucket_key = |t: &TradeRow| OhlcvKey { + timestamp: floor_to_minute(t.closed_at), + asset_id: t.base_asset(), + granularity: Granularity::OneMinute, + quote_asset_id: t.quote_asset(), // ADR 0003 + source: t.venue.into(), // ADR 0004 +}; + +let mut candles: HashMap = HashMap::new(); +for trade in trades { + candles.entry(bucket_key(&trade)) + .and_modify(|c| c.merge(&trade)) // ADR 0004 merge formula + .or_insert_with(|| OhlcvRow::from_first_trade(&trade)); +} +``` + +The `merge` impl is the canonical place to keep the incremental- +merge SQL: it gets tested in-process and the production rewrite +can either reuse the in-memory merge or translate it to a CH +`AggregatingMergeTree` materialised view (per task 0048's +recommendation §6.3). + +### A.7 Sinks — stub only + +Two prototype sinks, both pure-local: + +1. **`StdoutJsonSink`** — emits one JSON line per `OhlcvRow` to + stdout. Tail-friendly, grep-able, diff-able across runs. +2. **`SqlFileSink`** — writes one `.sql` file per invocation + under `out/` containing the `INSERT INTO prices.price_ohlcv ... + ON CONFLICT ...` statements the production writer would emit. + This is the artefact we hand to BE in the meeting — they can + read it and tell us whether the column shape lines up with what + their `prices.*` database (per ADR 0007) is going to host. + +**Explicitly out of prototype scope:** + +- No `clickhouse::Client` connection (no Hetzner reachability yet). +- No RDS Postgres connection (ADR 0007 supersedes the RDS path). +- No CloudWatch metric / log emit (stdout structured-JSON is + enough; CloudWatch is a deployment concern). + +### A.8 Operator invocation surface + +Two modes the operator on a local machine can use: + +```bash +# Mode 1: lambda_runtime via cargo-lambda (closer to production) +cargo lambda invoke prices-ledger-processor \ + --data-file fixtures/events/single-soroban-swap.json + +# Mode 2: direct cargo run (faster iteration) +cargo run -p prices-ledger-processor -- \ + --event fixtures/events/multi-swap-batch.json \ + --sink stdout +``` + +Mode 2 is the inner-loop. Mode 1 proves the `provided.al2` +runtime shape works locally. + +### A.9 Prototype acceptance + +- [ ] `cargo build -p prices-ledger-processor --release` succeeds. +- [ ] `cargo lambda invoke` against `single-soroban-swap.json` + emits the expected `OhlcvRow` for the known Phoenix XLM/USDC + swap in ledger 62019999. +- [ ] `cargo run -- --event multi-swap-batch.json --sink sql_file` + produces a `.sql` file whose `INSERT ... ON CONFLICT ...` + statements use the PK shape mandated by ADR 0003 + (`timestamp, asset_id, granularity, quote_asset_id`) and the + merge columns from ADR 0004. +- [ ] Re-running the same invocation is bit-identical (idempotent; + proves the merge is deterministic). +- [ ] One `tests/e2e_fixture.rs` test, runnable on a clean clone + with `nx test prices-ledger-processor`, that covers the + whole pipeline against one of the three ledger fixtures. +- [ ] This G-note's Part C reviewed by BE; their answers captured + below the questions inline (or as a follow-up G-note). + +No deployment, no AWS calls, no live network. + +--- + +## Part B — Out of prototype scope (explicit non-goals) + +Listed so the meeting doesn't accidentally extend scope: + +- **CDK stack.** No `infra/aws-cdk/` changes. The original + Implementation Plan Step 4 in this task's README is deferred to + the production-rewrite task (see Part E). +- **S3 notification registration on BE's bucket.** The prototype + never touches the real bucket. Registration is a BE-coordination + step, not a unilateral one (general-overview §5.1). +- **SSM platform-key consumption.** No `/platform/{env}/*` reads. + The prototype takes bucket name and key prefix as CLI args / + env vars only. +- **mTLS to Hetzner ClickHouse.** No certificates issued, no + `clickhouse-rs` wiring. Sink stays local. +- **VPC, IAM, Lambda execution role.** All AWS-side; deferred. +- **CloudWatch alarms, X-Ray traces, DLQ.** Observability is + prototype-side stdout JSON only. +- **Soroswap / Aquarius extractor bodies.** The prototype tolerates + `VenueNotImplemented`; those bodies are separate tasks. +- **SDEX trade extraction.** The 0037 kernel currently dispatches + Soroban-only; classic SDEX ops travel a different path that the + Lambda inherits when 0022's extractor lands. + +--- + +## Part C — Cross-team contract (BE meeting agenda) + +This is the action-item list for the BE conversation. Each item is +phrased as a concrete decision we need from them, with the +prices-api position pre-staked so the meeting is about confirming +or pushing back, not co-designing from scratch. + +### C.1 — S3 notification registration on `stellar-ledger-data/` + +**The ask:** add `prices-ledger-processor` as a **second** +event-notification target on the existing bucket, for `s3:ObjectCreated:*` +events under the same key prefix BE's own Ledger Processor consumes. + +**Why a contract item:** the bucket is BE-owned. Adding a second +target requires a CDK change in BE's infra repo (or wherever the +bucket lives), not in ours. + +**Open sub-questions for the meeting:** + +- Do BE's event filters include `*.xdr.zstd` only, or do we need + client-side filtering? (Prices Lambda will filter regardless, + but we'd rather not fire on irrelevant objects.) +- Should this be SNS-fan-out (per ADR 0007's "Cluster A: + announcement-not-approval" norm hints at SNS) or two direct + Lambda subscriptions? Trade-off: SNS adds 1 hop but decouples + consumer changes from BE's bucket config. + +### C.2 — SSM platform keys + +Per the `ssm-key-contract-split` memory: `/platform/{env}/*` is +BE-owned, `/prices/{env}/*` is prices-owned. The Lambda needs +to read **identifier-only** values (never bulk trust material) +from `/platform/{env}/*`. Proposed key set: + +| Key | Type | Purpose | +|-----|------|---------| +| `/platform/{env}/stellar-ledger-data-bucket-arn` | String | bucket the Lambda is subscribed to | +| `/platform/{env}/stellar-ledger-data-bucket-name` | String | for S3 client GetObject (avoid ARN parse) | +| `/platform/{env}/stellar-ledger-data-kms-key-arn` | String | KMS key the bucket uses for SSE-KMS (if any) so the Lambda role can be granted `kms:Decrypt` | +| `/platform/{env}/hetzner-ch-endpoint` | String | Caddy address for `prices.*` writes (per ADR 0007) | +| `/platform/{env}/hetzner-ch-ca-cert-arn` | String | ARN of the Secrets Manager secret holding the BE-issued CA cert for mTLS validation | + +**The ask:** BE commits to populating these keys (with appropriate +IAM read grants for the prices-api Lambda role) and notifying us +before any rotation. **None of these contain secrets**; the mTLS +key+cert pair lives under `/prices/{env}/*` and is owned by us. + +**Open sub-question for the meeting:** + +- Naming — do the keys above match BE's existing `/platform/` + conventions, or should they live under a sub-namespace + (`/platform/{env}/stellar-ledger-data/...`)? + +### C.3 — IAM principal authorisation + +The prototype doesn't need this; the production Lambda does. + +**The ask:** BE's bucket-policy and KMS-key-policy explicitly trust +the prices-api Lambda execution role ARN. The role ARN will be +exported from the prices-api CDK stack and published under +`/prices/{env}/lambda-ledger-processor-role-arn` for BE to +consume in their own CDK. + +This is the standard cross-account / cross-stack handshake; the +contract is just "BE agrees to wire this once it lands." + +### C.4 — `xdr-parser` crate publishing + +The Lambda depends on BE's `xdr-parser` crate via a `git`-source +Cargo dep (ADR 0005 §3). + +**The ask:** + +- BE publishes **tagged releases** of `xdr-parser` (e.g. + `xdr-parser-v0.x.y`). Prices-api pins to a tag, not `main`. +- BE exposes the `LedgerCloseMeta` → `(tx_id, contract_id, events)` + walk as a public library function (not just an internal helper + in their Lambda binary). If it already is public, point us at + it. +- BE commits to **semver discipline** on that public surface: + payload-shape changes get a MAJOR bump, additions get MINOR, + bug fixes get PATCH. We don't need an SLA on cadence, just on + semver. + +**Open sub-question for the meeting:** + +- Cargo registry vs git tag: would BE prefer to publish to a + private cargo registry (crates.io is public; there's no + obvious private registry today)? Git tags are fine for now; + flagging in case BE has a preference. + +### C.5 — Hetzner ClickHouse mTLS write contract + +Per ADR 0007 §Decision: prices-api writes into a separate `prices.*` +database on BE's Hetzner CH cluster over mTLS via Caddy. + +**The ask (production-only, surfaced now for awareness):** + +- A `prices` database (CH-level), not `default`. ADR 0007 §5 + notes the "separate-`prices`-database shape" was the all-yes + outcome of task 0045's Cluster A. +- A CH user `prices_writer` (or similar) with `INSERT`, `ALTER`, + `OPTIMIZE`, `SELECT` (for self-readback) on `prices.*` only. +- mTLS cert issuance: BE-operated CA issues per-env certs + (`prices-api-dev`, `prices-api-prod`) per ADR 0007 §Decision + Cluster C (per-env mTLS, 1-year manual rotation, + CA-rotation revocation). +- Caddy endpoint reachable from the Lambda's outbound CIDR + (Lambdas without VPC use the AWS public egress — confirm + whether BE wants to whitelist or relies purely on mTLS). + +**Gating:** this whole item is blocked behind BE 0227 (Hetzner CH +ships) and task 0047 (cross-tenant throughput verification). It +is in this spec to confirm the **shape** of the eventual contract, +not to schedule it. A RED outcome from task 0047 supersedes +ADR 0007 to the sidecar-CH variant — same shape, different host. + +### C.6 — DLQ, retry, lag alarms + +Lambda-side concerns where BE's S3 retry semantics intersect with +our DLQ / lag-alarm story: + +**The ask:** + +- Confirm BE's bucket has `s3:ObjectCreated:*` notifications + configured with the default at-least-once delivery semantics + (i.e. we should treat duplicate invocations as normal, not + exceptional — the prototype's idempotent merge per A.9 is the + right design). +- Agree on a DLQ pattern: per general-overview §5.2 we plan a + per-Lambda SQS DLQ for messages that fail decode or write 3x. + Confirm BE is OK with us re-fetching the same object after + re-driving from DLQ (i.e. no expiration on the ledger objects + for at least DLQ retention). +- Agree on a lag alarm: `prices.ledger_processor.lag_seconds` + = `now() - ledger.closed_at` at invocation time, alarm if + >60s sustained. Matches the Galexie §5.1 lag-alarm shape; + flagged here so BE doesn't see our alarm and assume their + pipeline is broken. + +--- + +## Part D — Open questions for the meeting + +Not commitments; just things we want BE's input on that aren't +yet phrased as concrete asks. + +1. **OHLCV column shape — `quote_asset_id` and `quote_volume_usd`:** + ADR 0003 puts `quote_asset_id` in the PK. ADR 0004 adds the + `volume_quote_usd` merge column. Both are prices-api decisions, + but if BE expects to read `prices.price_ohlcv` for any reason + (BE-side analytics, board), the column shape is a soft + coordination item. +2. **CH retention on `prices.*`:** prices-api's empirical footprint + from task 0046 is ~0.45 GB/yr. BE's retention policy on the + shared cluster — does our database inherit BE's TTLs, or do + we set our own? Lean: we set our own (separate DB → separate + retention). +3. **Backfill coexistence:** Stream 1 (ADR 0001) and Stream 2 + (ADR 0005) backfill writers will eventually also write to + `prices.*`. The 1-min UPSERT contract is shared with the live + Lambda. Sequencing question: do we backfill before the live + Lambda goes live, or backfill into a side table and `INSERT + ... SELECT` into the live table once the live tip is healthy? +4. **Empty-ledger optimization:** task 0048's 10k sample showed + most ledgers contain zero pricing-relevant events. Worth + asking BE if they're willing to pre-filter at the bucket + level (e.g. only notify on `*.has-soroban-events.zstd` if + their pipeline tags such ledgers), or if we just eat the + no-op invocations on our side. + +--- + +## Part E — When gates clear: production rewrite punch list + +Surfaced here so the meeting can react to the **full sequence**, not +just the prototype. These items are NOT in scope for this +activation; they spawn as separate backlog tasks when (a) BE 0227 +lands and (b) task 0047 verifies throughput. + +1. Replace `LocalDiskFetcher` with `aws_sdk_s3` GetObject. (~1 day) +2. Replace `StdoutJsonSink` / `SqlFileSink` with + `clickhouse::Client` + mTLS + the ADR 0004 merge SQL. (~3 days) +3. CDK stack — Lambda function, role, S3 notification, SSM reads, + CloudWatch alarms, DLQ. (~3 days) +4. Cert issuance + rotation playbook (mTLS to Hetzner CH). (~1 day) +5. Cross-stack handshake — publish Lambda role ARN under + `/prices/{env}/...`, BE consumes it in their CDK. (~0.5 day) +6. xdr-parser pin from `main` to first tagged release. (~0.5 day) +7. Lag-alarm wiring + dashboard. (~1 day) +8. End-to-end smoke from a real ledger-data event in `dev`. (~1 day) + +Total once gates clear: roughly 10 engineering days. + +--- + +## Appendix — references + +- General overview §5.2 — Prices Ledger Processor (Rust) +- ADR 0001 — Stream 1 historical backfill (CH-sourced) +- ADR 0003 — `price_ohlcv` PK shape with `quote_asset_id` +- ADR 0004 — multi-source merge columns +- ADR 0005 — Stream 2 backfill; xdr-parser as git Cargo dep +- ADR 0006 — runtime framework Rust/axum +- ADR 0007 — live data sink on shared Hetzner ClickHouse +- Task 0037 — Tranche 1 Ledger Processor skeleton (the kernel) +- Task 0048 — Soroban events pricing decoder spec +- Task 0045 — BE agreement record (G-note) +- Task 0047 — cross-tenant throughput verification (gating) From bd2ea9db6dc7aea116449b3d45317365f555cddf Mon Sep 17 00:00:00 2001 From: karczuRF Date: Mon, 8 Jun 2026 14:49:41 +0200 Subject: [PATCH 03/17] docs(lore-0038): revise spec with BE indexer reference findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incorporate the production patterns from soroban-block-explorer's crates/indexer/ that the prices-api Lambda must mirror: doorbell- cursor reconcile loop with reservedConcurrentExecutions=1 for ordering correctness, S3->SQS trigger (body ignored), Galexie key derivation, [50,200,800]ms retry envelope, safe-error redaction, and mTLS via Parameters and Secrets Lambda Extension. Major revisions: - New section 1 — load-bearing vs chosen vs not-inherited patterns from BE, with explicit code-path citations. - Part A rewritten as doorbell-cursor loop with three trait seams (Cursor, ObjectFetcher, OhlcvSink) sized for production swap. - Part A.4 expands xdr-parser distribution model into four options with a recommendation (git Cargo dep pinned to sha for prototype, tag-pinned for production). - Part C.1 pivots — register our SQS queue as second notification target on BE bucket, not our Lambda directly. - Part C.2 corrects SSM-at-runtime to CDK-time SSM reads baked into Lambda env vars. - Parts C.5 and C.6 added — reuse of db-clickhouse::mtls, Caddyfile CN_USER_MAP for prices-api CNs. - Part D.1 added — cursor source design question (own table vs cross-DB read vs S3-derived) with a stake. --- .../notes/G-local-prototype-spec.md | 789 ++++++++++++------ 1 file changed, 516 insertions(+), 273 deletions(-) diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md index d67af5f..a2c0ba4 100644 --- a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md @@ -23,7 +23,8 @@ links: # Local-prototype spec + BE cross-team contract > **Audience:** prices-api implementer (Part A), BE team reviewers (Part C). -> **Status:** draft for cross-team discussion. +> **Status:** draft for cross-team discussion. Revised 2026-06-08 +> after reading BE's production indexer crate. > **Why this note exists:** task 0038's 2026-06-08 activation history > entry promised a "local-only binary + design document" deliverable > while the original engineering blockers (BE 0227 mTLS endpoint, @@ -34,24 +35,68 @@ links: ## 0. TL;DR -We are building a **local-only** Rust Lambda binary that exercises -the live-ingestion path end-to-end against recorded fixtures — -S3 event → XDR decode → `dispatch()` → 1-min OHLCV bucketing → -**stub sink** (stdout / file emit). It does NOT deploy to AWS, does -NOT register on BE's S3 bucket, does NOT write to Hetzner ClickHouse. +We are building a **local-only** Rust Lambda binary that mirrors +the **shape** of BE's production indexer (`crates/indexer/` in the +soroban-block-explorer repo) — same doorbell-cursor pattern, same +S3 → SQS → Lambda trigger model, same mTLS-to-Hetzner-CH sink — +but exercised against local fixtures + a stub cursor instead of +real S3 / real CH. It does NOT deploy to AWS, does NOT consume +real SQS messages, does NOT write to Hetzner ClickHouse. + The prototype's value is twofold: 1. **De-risk the binary shape** — prove the kernel from task 0037 - composes correctly with `lambda_runtime`, `aws_sdk_s3` (mocked at - the trait boundary), and the `xdr-parser` decode crate. + composes correctly with BE's reusable building blocks + (`xdr-parser`, `db-clickhouse::mtls`, Galexie key derivation) + and the doorbell-cursor reconcile loop adapts cleanly to our + narrower extraction surface. 2. **Ground the BE meeting** — Part C of this note is the concrete list of cross-team commitments the production Lambda needs. - Giving BE a runnable binary + a written contract is cheaper than - asking for those commitments in the abstract. + The big questions are dependency distribution (`xdr-parser` is + currently an internal workspace path-dep at BE, not a published + crate) and ownership of the new SQS queue between our bucket + notifier and our Lambda. When the gating events clear (BE 0227 ships; task 0047 verifies -throughput GREEN/YELLOW), the prototype's interior is reused; -only the sink, the S3 client, and the CDK packaging change. +throughput), the prototype's interior survives: only the cursor +store, the S3 client, and the CDK packaging swap from stub +implementations to production wiring. + +--- + +## 1. Reference: BE production indexer (the model we mirror) + +Reading `soroban-block-explorer/crates/indexer/` is **prerequisite +context for the meeting** — the shape we propose IS BE's shape, +modulo a different extraction surface (Soroban swaps for price +discovery, not the 17 RMT tables BE writes) and a different +target database (`prices.*` per ADR 0007, not `default.*`). + +### 1.1 Patterns we MUST inherit (load-bearing, not preference) + +| Pattern | BE source | Why load-bearing | +|---|---|---| +| `reservedConcurrentExecutions = 1` | `compute-stack.ts:260` | Two concurrent invocations would race the CH cursor. Ordering correctness depends on serial execution. | +| Doorbell-cursor reconcile (ignore SQS body; read `max()` from CH) | `handler/mod.rs:160-251` | Order comes from the cursor + S3 contents, not SQS delivery order. Removes any need for FIFO. | +| Last-row-wins commit ordering per ledger | BE: `ledgers` row written last; us: equivalent "cursor advance" written last | A crash mid-ledger resumes cleanly from the unchanged cursor; partial writes get superseded by `ReplacingMergeTree` on next merge. | +| Lambdas outside the VPC, mTLS only | `compute-stack.ts:32-36` (task 0239) | The shared Caddy/Hetzner-CH path is mTLS-terminated; no SG or VPC peering. Putting our Lambda in a VPC would also need a NAT GW for S3. | +| `safe_error_message` redaction | `handler/mod.rs:416-485` | CH `BadResponse` bodies can echo offending row values; their `Display` would leak data into CW Logs. We need the same redactor. | + +### 1.2 Patterns we CHOOSE to inherit (sensible defaults, not absolutes) + +- **Retry backoff `[50, 200, 800] ms`** (`handler/mod.rs:113`) — three retries, four wire calls total, only on transient errors (network / timeout / 5xx). +- **Partial-batch-failure SQS response** (`handler/mod.rs:64-75, 160-189`) — fail just the offending message, ack the rest. +- **Eager init at cold start** — surface missing env / unreachable extension as a Lambda Init Errors entry, not a per-event panic (`main.rs:40, 50-67`). +- **Structured JSON tracing-subscriber** with `EnvFilter::from_default_env()` driven by `RUST_LOG`. +- **`maxReceiveCount = 10` on the SQS source** (`compute-stack.ts:147`) — higher than the usual 3 because with `concurrency = 1` the ESM over-polls and gets throttled; the queue absorbs that without false-DLQ'ing a processable record. +- **`visibilityTimeout = lambdaTimeout + 60s`** (`compute-stack.ts:139`). + +### 1.3 Patterns we DO NOT inherit + +- **`default` CH database.** Per ADR 0007 we live in our own `prices.*` database on the same Hetzner cluster. +- **One cursor table named `ledgers`.** BE persists every ledger they see; we only persist ledgers containing pricing-relevant trades. Cursor design is open (Part D.1). +- **Enrichment SQS fan-out.** BE has a separate `enrichment-worker` Lambda fed from the indexer. We don't need that pattern in scope of 0038 — Soroswap/Aquarius asset-discovery is task 0039's job. +- **17 RMT tables.** Our write surface is just `prices.price_ohlcv` (and possibly a small `prices.processed_ledgers` cursor table — see Part D.1). --- @@ -60,31 +105,37 @@ only the sink, the S3 client, and the CDK packaging change. ### A.1 What the binary does A single Rust binary, `prices-ledger-processor`, that on each -invocation: - -1. Accepts an `aws_lambda_events::s3::S3Event` JSON document on - stdin (when run via `cargo lambda invoke`) or on `--event ` - (when run via `cargo run`). -2. For each record in the event, **fetches the referenced object** - via an `ObjectFetcher` trait — wired in prototype mode to a - local-disk implementation that maps `s3://bucket/key` to - `fixtures/`. -3. **zstd-decompresses** the bytes (Galexie output is `*.xdr.zstd` - per general-overview §5.1). -4. **Decodes** the bytes as `LedgerCloseMeta` via the BE-authored - `xdr-parser` crate (ADR 0005 §3, ADR 0006 §Decision). -5. **Normalizes** Soroban contract events into the - `SorobanEventRow` shape consumed by `dispatch()` (the kernel - from task 0037), grouped by `(transaction_id, contract_id)`. -6. **Calls** `ledger_processor::dispatch::dispatch(&rows, &venue_registry, &phoenix_registry)` - and collects the returned `TradeRow` set. -7. **Buckets** trades into 1-minute OHLCV candles in-process per - the merge formula from ADR 0004 §Decision (preserve `open`, - overwrite `close`, `GREATEST(high)`, `LEAST(low)`, sum - `volume_base`/`volume_quote_usd`/`trade_count`, recompute `vwap`). -8. **Writes** to a stub sink (see A.7) — no network egress. - -### A.2 Workspace placement +invocation runs the doorbell-cursor reconcile loop locally: + +1. Reads its **cursor** — for the prototype, a `--cursor ` CLI + arg (production: a CH-table read). +2. Computes the deterministic S3 key for ledger `cursor + 1` using + the **same Galexie key derivation as BE** (one's-complement + prefixes, `.xdr.zst` extension — see §1.3 below). +3. Resolves that key via an `ObjectFetcher` trait — wired in + prototype mode to a local-disk impl that maps the derived key + to `fixtures/ledgers/`. Misses → "no new ledger yet, stop" + (gap-stop is normal; future doorbell resumes). +4. Hits → `zstd`-decompresses + calls + `xdr_parser::deserialize_batch()` → iterates the + `LedgerCloseMeta` batch. +5. Per ledger: extracts Soroban contract events via the + `xdr-parser` walk, normalises into the `SorobanEventRow` shape + consumed by `dispatch()` from task 0037, groups by + `(transaction_id, contract_id)`, calls `dispatch()`, collects + `TradeRow`s. +6. **Buckets** the trades into 1-min OHLCV candles in-process per + the ADR 0004 merge formula (preserve `open`, overwrite `close`, + `GREATEST(high)`, `LEAST(low)`, sum `volume_base` / + `volume_quote_usd` / `trade_count`, recompute `vwap`). +7. **Writes** to a stub sink (see A.7) — no network egress. +8. **Advances the cursor** (writes new value to the prototype + stub: `out/cursor.txt`) **last** — the equivalent of BE's + "ledgers row written last" ordering barrier. +9. Loops back to step 2 until a gap, the in-process time budget, + or `--max-iterations` is hit. + +### A.2 Workspace placement + trait seams ``` packages/ @@ -97,75 +148,112 @@ packages/ └── prices-ledger-processor/ # NEW — this prototype ├── Cargo.toml ├── src/ - │ ├── main.rs # lambda_runtime entrypoint - │ ├── handler.rs # S3Event → Vec - │ ├── decode.rs # xdr-parser → SorobanEventRow + │ ├── main.rs # lambda_runtime entrypoint + CLI mode + │ ├── reconcile.rs # doorbell-cursor loop + │ ├── decode.rs # xdr-parser walk → SorobanEventRow │ ├── bucket.rs # 1-min OHLCV merge (ADR 0004) - │ ├── sink/ # writer abstraction - │ │ ├── mod.rs # trait `OhlcvSink` - │ │ ├── stdout.rs # JSON-lines to stdout - │ │ └── sql_file.rs # ALTER-friendly SQL dump - │ └── object_fetcher/ # input abstraction - │ ├── mod.rs # trait `ObjectFetcher` - │ └── local_disk.rs # `fixtures/` mapping - ├── fixtures/ # gitignored sample LedgerCloseMeta + │ ├── galexie_key.rs # ledger# → S3 key (copy of BE's) + │ ├── retry.rs # [50,200,800]ms backoff + │ ├── safe_log.rs # redaction wrappers (mirrors BE) + │ ├── object_fetcher/ # input abstraction + │ │ ├── mod.rs # trait `ObjectFetcher` + │ │ └── local_disk.rs # fixtures/ledgers/ + │ ├── cursor/ # cursor abstraction + │ │ ├── mod.rs # trait `Cursor` + │ │ └── stub_file.rs # out/cursor.txt + │ └── sink/ # writer abstraction + │ ├── mod.rs # trait `OhlcvSink` + │ ├── stdout.rs # JSON-lines to stdout + │ └── sql_file.rs # ALTER-friendly SQL dump + ├── fixtures/ # gitignored sample ledger files └── tests/ - └── e2e_fixture.rs # one ledger end-to-end test + └── reconcile_e2e.rs # one full loop through fixtures ``` -The two trait boundaries (`ObjectFetcher`, `OhlcvSink`) are -deliberate seams: production swaps `local_disk` for `aws_sdk_s3` -and `stdout` for a ClickHouse `clickhouse::Client`. Everything -else stays. +The three trait seams (`ObjectFetcher`, `Cursor`, `OhlcvSink`) are +the **production swap points**. In the production rewrite: -### A.3 Inputs — fixture, not S3 +- `LocalDiskFetcher` → `aws_sdk_s3::Client::get_object` +- `StubFileCursor` → CH-backed cursor (see Part D.1) +- `StdoutJsonSink` / `SqlFileSink` → `clickhouse::Client` over + mTLS, via `db_clickhouse::mtls::client_from_lambda_env` -For the prototype, fixtures come from BE's existing -`stellar-ledger-data/` bucket layout — we copy a handful of -`*.xdr.zstd` files locally, plus a matching `S3Event` JSON -mocked from CloudTrail format. Concretely: +Everything else — the reconcile loop, the decode, the +bucketing, the redaction, the retry — survives. + +### A.3 Inputs — fixtures, not S3 events + +For the prototype, fixtures are real Galexie outputs copied locally, +indexed by their **derived** key (so the same `galexie_key.rs` +function we ship works in both modes): ``` packages/prices-ledger-processor/fixtures/ -├── events/ -│ ├── single-soroban-swap.json # 1 record, 1 Phoenix swap -│ ├── multi-swap-batch.json # 1 record, 4 swaps mixed venues -│ └── empty-ledger.json # 1 record, no swaps (negative test) └── ledgers/ - ├── 62019999.xdr.zstd # known-Phoenix-swap ledger - ├── 62020247.xdr.zstd # known multi-venue ledger - └── 62079982.xdr.zstd # known empty ledger + ├── FC45E5FF--62528000-62591999/ + │ ├── FC45E5C4--62528059.xdr.zst # known Phoenix swap + │ ├── FC45E5C3--62528060.xdr.zst # empty + │ └── FC45E5C2--62528061.xdr.zst # multi-venue + └── ... ``` -Fixture ledgers are picked from the 10k uniform sample analysed -in task 0046 / 0048 — same evidence base as the decoder spec, so -expected outputs are pre-known. +The operator picks fixtures from the 10k uniform sample analysed +in tasks 0046 / 0048 — same evidence base as the decoder spec, so +expected outputs are pre-known. Filling `fixtures/` is a one-time +manual step (`aws s3 cp` against the dev bucket, after which the +prototype is offline-runnable). + +**No `S3Event` JSON fixtures.** The doorbell pattern means the +SQS message body would be ignored anyway — fabricating S3-event +JSONs gains us nothing and falsely suggests the Lambda parses +them. ### A.4 Decode boundary — `xdr-parser` -The `xdr-parser` BE-authored crate is consumed as a `git`-source -Cargo dependency per ADR 0005 §3: +**Significant cross-team item.** BE's `xdr-parser` is a workspace +**path dep** at `soroban-block-explorer/crates/xdr-parser/`, not a +published crate. The prototype needs decisions on: + +**Option 1 — Vendor a snapshot** into +`packages/prices-ledger-processor/vendored/xdr-parser/`. Pros: +zero BE coordination; clean Cargo build. Cons: drifts on every +Stellar protocol upgrade; explicit re-sync ceremony. + +**Option 2 — Git submodule** of the BE repo, with a Cargo +`path = "../../soroban-block-explorer/crates/xdr-parser"` dep. +Pros: pinned commit, simple update. Cons: weird workspace layout; +breaks `cargo publish` (irrelevant for us) and `nx`-only mental +models. + +**Option 3 — Git Cargo dep** against the BE GitHub repo. Pros: +clean Cargo idiom. Cons: requires BE to keep `xdr-parser` a +**top-level package in their workspace** (it already is) and accept +that prices-api pins against specific commits. Stellar-XDR major +bumps still require coordinated PRs. + +**Option 4 — Ask BE to publish to a private cargo registry** +(e.g. CodeArtifact). Most disruptive; only justifies itself if +multiple downstream consumers exist. + +**Prototype recommendation: Option 3.** It is the cheapest +"works today" option that doesn't impose on BE — we just pin a +commit sha: ```toml [dependencies] -xdr-parser = { git = "ssh://git@github.com/rumblefishdev/soroban-block-explorer.git", branch = "main", package = "xdr-parser" } +xdr-parser = { git = "ssh://git@github.com/rumblefishdev/soroban-block-explorer.git", rev = "", package = "xdr-parser" } +stellar-xdr = "" # transitively required ``` -**Open question for BE (C.4):** the production form needs a -**pinned tag** (e.g. `xdr-parser-v0.4.2`), not a moving `main`. -BE owns the release cadence. The prototype can ride `main` for -now; the production rewrite cannot. - -What we need out of the crate: +**Production rewrite item:** lock to a tagged release (e.g. +`xdr-parser-v0.4.0`) and agree on a semver discipline (Part C.4). -- `LedgerCloseMeta::decode(&[u8]) -> Result` -- A walk of `SorobanTransactionMeta.events` that yields - `(transaction_id, contract_id, event_index, topics, data)` - tuples (already implemented in BE's local Ledger Processor — - we want the same path exposed as a library function). +What we need from the crate (all already exposed per the indexer's +usage at `handler/mod.rs:313-316, 327`): -If BE has not yet exposed that walk as a library API (it may live -inside their Lambda binary today), C.4 asks them to lift it. +- `xdr_parser::decompress_zstd(&[u8]) -> Result, ParseError>` +- `xdr_parser::deserialize_batch(&[u8]) -> Result` where `Batch` has `.ledger_close_metas: Vec` +- A walk of `SorobanTransactionMeta.events` that yields the `(transaction_id, contract_id, event_index, topics, data)` tuples the dispatcher expects (BE's `handler/process::parse_ledger` does this; we may not need the full parse, just the events walk — Part C.4 sub-question). ### A.5 Kernel boundary — `dispatch()` @@ -182,15 +270,15 @@ let trades: Vec = dispatch(&rows, &venue_registry, &phoenix_registry)? Today the kernel implements Phoenix XYK only. Soroswap and Aquarius extractors return `VenueNotImplemented`. The prototype -tolerates that error variant — it counts those rows in a +tolerates that variant — counts those rows in an `unimplemented_venue` metric and continues, exactly like the production Lambda should once those extractors land. **Implication for the BE meeting:** Soroswap and Aquarius live -ingestion is **gated on extractor work that is NOT part of this -task** (separate FEATURE tasks, not yet spawned). The Lambda -shape is complete without them; the venues just yield empty -output until their extractors arrive. +ingestion is **gated on extractor work outside this task** +(separate FEATURE tasks, not yet spawned). The Lambda shape is +complete without them; the venues just yield empty output until +their extractors arrive. ### A.6 OHLCV bucketing @@ -214,30 +302,32 @@ for trade in trades { ``` The `merge` impl is the canonical place to keep the incremental- -merge SQL: it gets tested in-process and the production rewrite +merge logic: it gets tested in-process and the production rewrite can either reuse the in-memory merge or translate it to a CH `AggregatingMergeTree` materialised view (per task 0048's recommendation §6.3). -### A.7 Sinks — stub only +### A.7 Sinks + cursor — stub only -Two prototype sinks, both pure-local: +Three prototype-side stubs, all pure-local: 1. **`StdoutJsonSink`** — emits one JSON line per `OhlcvRow` to stdout. Tail-friendly, grep-able, diff-able across runs. 2. **`SqlFileSink`** — writes one `.sql` file per invocation - under `out/` containing the `INSERT INTO prices.price_ohlcv ... - ON CONFLICT ...` statements the production writer would emit. - This is the artefact we hand to BE in the meeting — they can - read it and tell us whether the column shape lines up with what - their `prices.*` database (per ADR 0007) is going to host. + under `out/` containing the `INSERT INTO prices.price_ohlcv ...` + statements the production writer would emit. Hand to BE in + the meeting; they can read it and tell us whether the column + shape lines up with what `prices.*` will host. +3. **`StubFileCursor`** — reads/writes `out/cursor.txt` (a single + `u64`). Production replaces with a CH-table read (see Part D.1). **Explicitly out of prototype scope:** - No `clickhouse::Client` connection (no Hetzner reachability yet). -- No RDS Postgres connection (ADR 0007 supersedes the RDS path). +- No `aws_sdk_s3` client (no AWS reachability). +- No `aws_sdk_sqs` client (no real queue). - No CloudWatch metric / log emit (stdout structured-JSON is - enough; CloudWatch is a deployment concern). + enough; CW is a deployment concern). ### A.8 Operator invocation surface @@ -246,33 +336,41 @@ Two modes the operator on a local machine can use: ```bash # Mode 1: lambda_runtime via cargo-lambda (closer to production) cargo lambda invoke prices-ledger-processor \ - --data-file fixtures/events/single-soroban-swap.json + --data '{"Records":[{"messageId":"local-doorbell","body":"ignored"}]}' # Mode 2: direct cargo run (faster iteration) cargo run -p prices-ledger-processor -- \ - --event fixtures/events/multi-swap-batch.json \ + --cursor 62528058 \ + --max-iterations 16 \ --sink stdout ``` Mode 2 is the inner-loop. Mode 1 proves the `provided.al2` -runtime shape works locally. +runtime shape works locally and exercises the full doorbell event +deserialise path (even though the body is ignored — same as BE). ### A.9 Prototype acceptance - [ ] `cargo build -p prices-ledger-processor --release` succeeds. -- [ ] `cargo lambda invoke` against `single-soroban-swap.json` - emits the expected `OhlcvRow` for the known Phoenix XLM/USDC - swap in ledger 62019999. -- [ ] `cargo run -- --event multi-swap-batch.json --sink sql_file` - produces a `.sql` file whose `INSERT ... ON CONFLICT ...` - statements use the PK shape mandated by ADR 0003 - (`timestamp, asset_id, granularity, quote_asset_id`) and the - merge columns from ADR 0004. -- [ ] Re-running the same invocation is bit-identical (idempotent; - proves the merge is deterministic). -- [ ] One `tests/e2e_fixture.rs` test, runnable on a clean clone - with `nx test prices-ledger-processor`, that covers the - whole pipeline against one of the three ledger fixtures. +- [ ] `cargo lambda invoke` against a stub doorbell event, with a + fixtures dir containing the known Phoenix-swap ledger + 62528059, emits the expected `OhlcvRow` for that swap and + advances `out/cursor.txt` to 62528059. +- [ ] `cargo run -- --cursor 62528058 --max-iterations 16 --sink sql_file` + walks contiguous fixtures, produces a `.sql` file whose + `INSERT ... ON CONFLICT ...` statements use the PK shape + from ADR 0003 (`timestamp, asset_id, granularity, + quote_asset_id`) and the merge columns from ADR 0004. +- [ ] Re-running the same invocation from the same starting + cursor is bit-identical (idempotent; proves the merge is + deterministic). +- [ ] Hitting a missing fixture is logged as `"reached gap on S3 + — contiguous run done"` (mirrors BE's wording for the same + condition) and exits cleanly without advancing past the gap. +- [ ] One `tests/reconcile_e2e.rs` test, runnable on a clean + clone with `nx test prices-ledger-processor`, that covers + the whole pipeline against three fixture ledgers (swap, + empty, gap-stop). - [ ] This G-note's Part C reviewed by BE; their answers captured below the questions inline (or as a follow-up G-note). @@ -287,223 +385,368 @@ Listed so the meeting doesn't accidentally extend scope: - **CDK stack.** No `infra/aws-cdk/` changes. The original Implementation Plan Step 4 in this task's README is deferred to the production-rewrite task (see Part E). -- **S3 notification registration on BE's bucket.** The prototype - never touches the real bucket. Registration is a BE-coordination - step, not a unilateral one (general-overview §5.1). -- **SSM platform-key consumption.** No `/platform/{env}/*` reads. - The prototype takes bucket name and key prefix as CLI args / - env vars only. -- **mTLS to Hetzner ClickHouse.** No certificates issued, no - `clickhouse-rs` wiring. Sink stays local. -- **VPC, IAM, Lambda execution role.** All AWS-side; deferred. -- **CloudWatch alarms, X-Ray traces, DLQ.** Observability is +- **Real S3 → SQS wiring.** No notification configuration on BE's + bucket; no SQS queue creation. +- **Lambda execution role / IAM.** All AWS-side; deferred. +- **mTLS cert issuance.** No CA call, no Secrets Manager write, + no Caddyfile change. +- **CloudWatch alarms, X-Ray traces.** Observability is prototype-side stdout JSON only. +- **DLQ.** No `aws_sdk_sqs::Client`, no DLQ behaviour modelled. - **Soroswap / Aquarius extractor bodies.** The prototype tolerates `VenueNotImplemented`; those bodies are separate tasks. - **SDEX trade extraction.** The 0037 kernel currently dispatches Soroban-only; classic SDEX ops travel a different path that the Lambda inherits when 0022's extractor lands. +- **xdr-parser republishing.** Prototype consumes via git Cargo + dep against the BE repo on a pinned commit. Tag-pinning and + semver discipline are Part C.4 items, not prototype work. --- ## Part C — Cross-team contract (BE meeting agenda) -This is the action-item list for the BE conversation. Each item is -phrased as a concrete decision we need from them, with the -prices-api position pre-staked so the meeting is about confirming -or pushing back, not co-designing from scratch. +Each item is phrased as a concrete decision we need from BE, with +the prices-api position pre-staked so the meeting is about +confirming or pushing back, not co-designing from scratch. + +### C.1 — SQS queue ownership + S3 → SQS notification -### C.1 — S3 notification registration on `stellar-ledger-data/` +**Background.** Post-task-0241 (BE), the indexer is triggered by +**SQS doorbells**, not direct S3 → Lambda. The flow is: -**The ask:** add `prices-ledger-processor` as a **second** -event-notification target on the existing bucket, for `s3:ObjectCreated:*` -events under the same key prefix BE's own Ledger Processor consumes. +``` +ledger object PutObject → S3 ObjectCreated event + → SQS message ("doorbell", body ignored) + → Lambda invocation (batchSize=1, concurrency=1) +``` + +Our Lambda follows the same shape — a separate SQS queue with its +own doorbells, fed from the same `ObjectCreated` events on the +same bucket. + +**The ask:** add a **second** event notification on BE's +`stellar-ledger-data` bucket targeting a **prices-api-owned SQS +queue** (`prices-ingest-queue-{env}`), filtered to `.xdr.zst` +suffix (same filter BE uses — `compute-stack.ts:278`). -**Why a contract item:** the bucket is BE-owned. Adding a second -target requires a CDK change in BE's infra repo (or wherever the -bucket lives), not in ours. +Why a prices-api-owned queue, not a shared one: failure isolation. +A backlog or DLQ-spam on the prices side mustn't pressure BE's +indexer queue. **Open sub-questions for the meeting:** -- Do BE's event filters include `*.xdr.zstd` only, or do we need - client-side filtering? (Prices Lambda will filter regardless, - but we'd rather not fire on irrelevant objects.) -- Should this be SNS-fan-out (per ADR 0007's "Cluster A: - announcement-not-approval" norm hints at SNS) or two direct - Lambda subscriptions? Trade-off: SNS adds 1 hop but decouples - consumer changes from BE's bucket config. +1. SNS-fan-out vs two direct notifications. BE today wires the + bucket directly to their SQS queue. Adding our queue as a + second target on the same bucket is supported by S3, but if BE + anticipates a third or fourth consumer they may prefer to + move the bucket-side to SNS and let everyone subscribe. +2. Notification filter precision. `.xdr.zst` is bucket-wide; + ledgers don't have a separate prefix today. If BE plans to + add other object types to the bucket (snapshot dumps, + diagnostic exports), we'd want a prefix filter on our + subscription so we don't process them. -### C.2 — SSM platform keys +### C.2 — Env-var injection contract (NOT SSM-at-runtime) -Per the `ssm-key-contract-split` memory: `/platform/{env}/*` is -BE-owned, `/prices/{env}/*` is prices-owned. The Lambda needs -to read **identifier-only** values (never bulk trust material) -from `/platform/{env}/*`. Proposed key set: +**Correction to the earlier draft.** I previously proposed +`/platform/{env}/*` SSM keys read at Lambda runtime. **BE's actual +pattern (compute-stack.ts:261-267) is CDK-time SSM reads baked +into Lambda env vars** at deploy. We mirror that. -| Key | Type | Purpose | -|-----|------|---------| -| `/platform/{env}/stellar-ledger-data-bucket-arn` | String | bucket the Lambda is subscribed to | -| `/platform/{env}/stellar-ledger-data-bucket-name` | String | for S3 client GetObject (avoid ARN parse) | -| `/platform/{env}/stellar-ledger-data-kms-key-arn` | String | KMS key the bucket uses for SSE-KMS (if any) so the Lambda role can be granted `kms:Decrypt` | -| `/platform/{env}/hetzner-ch-endpoint` | String | Caddy address for `prices.*` writes (per ADR 0007) | -| `/platform/{env}/hetzner-ch-ca-cert-arn` | String | ARN of the Secrets Manager secret holding the BE-issued CA cert for mTLS validation | +**The ask:** BE publishes the following identifiers under +`/platform/{env}/*` for our CDK to consume at deploy time: -**The ask:** BE commits to populating these keys (with appropriate -IAM read grants for the prices-api Lambda role) and notifying us -before any rotation. **None of these contain secrets**; the mTLS -key+cert pair lives under `/prices/{env}/*` and is owned by us. +| SSM key | Type | Consumed at deploy → injected as env var | +|---|---|---| +| `/platform/{env}/stellar-ledger-data-bucket-name` | String | `BUCKET_NAME` | +| `/platform/{env}/stellar-ledger-data-bucket-arn` | String | (CDK-side, for IAM grant) | +| `/platform/{env}/ch-domain` | String | `CH_DOMAIN` (Caddy host) | +| `/platform/{env}/stellar-network-passphrase` | String | `STELLAR_NETWORK_PASSPHRASE` (xdr-parser cache init) | -**Open sub-question for the meeting:** +**Why this changes the contract.** No prices-api Lambda runtime +reads from SSM. The Lambda only sees env vars. SSM is the +deploy-time handshake, not a runtime dependency. -- Naming — do the keys above match BE's existing `/platform/` - conventions, or should they live under a sub-namespace - (`/platform/{env}/stellar-ledger-data/...`)? +**Open sub-question:** + +- Does BE already publish a `STELLAR_NETWORK_PASSPHRASE` SSM key + (mainnet vs testnet)? BE's indexer reads it from env; if their + CDK reads it from SSM at deploy, point us at the key. ### C.3 — IAM principal authorisation -The prototype doesn't need this; the production Lambda does. +Lighter than first draft because Caddy's CN mapping (C.6) does +most of the data-plane auth. The remaining IAM grants: + +**The ask:** BE's bucket policy + KMS key policy (if SSE-KMS) +explicitly trusts the prices-api Lambda execution role ARN for: + +- `s3:GetObject`, `s3:HeadObject` on the bucket +- `kms:Decrypt` on the bucket's KMS key (if any) -**The ask:** BE's bucket-policy and KMS-key-policy explicitly trust -the prices-api Lambda execution role ARN. The role ARN will be -exported from the prices-api CDK stack and published under -`/prices/{env}/lambda-ledger-processor-role-arn` for BE to -consume in their own CDK. +The role ARN will be exported from the prices-api CDK stack and +published under `/prices/{env}/lambda-ledger-processor-role-arn` +for BE to consume in their own CDK. -This is the standard cross-account / cross-stack handshake; the -contract is just "BE agrees to wire this once it lands." +This is the standard cross-stack handshake — contract is "BE +agrees to wire this once our CDK stack lands." -### C.4 — `xdr-parser` crate publishing +### C.4 — `xdr-parser` distribution model -The Lambda depends on BE's `xdr-parser` crate via a `git`-source -Cargo dep (ADR 0005 §3). +**The biggest single item in this meeting.** Today +`xdr-parser` is a workspace path-dep in +`soroban-block-explorer/crates/xdr-parser/`, not a published +crate. The prototype runs against a git-source Cargo dep pinned +to a commit (Option 3 in A.4). The production Lambda needs a +sturdier dependency contract. **The ask:** -- BE publishes **tagged releases** of `xdr-parser` (e.g. - `xdr-parser-v0.x.y`). Prices-api pins to a tag, not `main`. -- BE exposes the `LedgerCloseMeta` → `(tx_id, contract_id, events)` - walk as a public library function (not just an internal helper - in their Lambda binary). If it already is public, point us at - it. -- BE commits to **semver discipline** on that public surface: - payload-shape changes get a MAJOR bump, additions get MINOR, - bug fixes get PATCH. We don't need an SLA on cadence, just on - semver. +1. BE keeps `xdr-parser` as a **top-level workspace package** + (already true; just confirming nobody intends to fold it into + the indexer binary). +2. BE publishes **tagged releases** of `xdr-parser` + (`xdr-parser-vMAJOR.MINOR.PATCH`). Prices-api pins to a tag + in production, not `main` or a sha. +3. BE commits to **semver discipline** on the public surface + (the `decompress_zstd` / `deserialize_batch` / `parse_ledger` + functions and the public types they return). Payload-shape + changes get a MAJOR bump; additions get MINOR; bug fixes PATCH. + We don't need an SLA on cadence, just on semver. +4. BE exposes (if not already) the `SorobanTransactionMeta` events + walk as a **public library function** distinct from + `parse_ledger`. We don't need the full BE parse — we only need + the events stream + `(tx_id, contract_id, event_index, topics, + data)` tuples. If `parse_ledger` is the only entrypoint + today, we'd ride that (paying the cost of fields we discard); + if BE is willing to factor the events walk out, that's + cleaner. + +**Open sub-questions:** + +- Cargo registry vs git tag: would BE prefer to publish to + CodeArtifact (or similar)? Git tags work fine for now; flag + in case BE has a preference. +- `stellar-xdr` version pin. The prototype must use the **same** + `stellar-xdr` version as `xdr-parser` (Rust ABI). Today BE's + workspace pins it in the root `Cargo.toml`. Whose pin wins + when both repos drift? Proposal: prices-api pins to whatever + the `xdr-parser` tag we depend on transitively requires; we + follow BE on `stellar-xdr` updates within `xdr-parser` semver. + +### C.5 — Reuse of `db-clickhouse::mtls` + +**Background.** BE's `db-clickhouse` crate contains +`mtls::client_from_lambda_env(database: &str) -> Result` +which fetches `{cert, key, ca}` from Secrets Manager via the +Parameters and Secrets Lambda Extension on `localhost:2773`, +parses the PEM bundle, assembles a `rustls::ClientConfig`, and +returns a ready `clickhouse::Client` (`db-clickhouse/src/mtls.rs`). +This is exactly what our Lambda needs. -**Open sub-question for the meeting:** +**The ask:** + +- BE is willing to let prices-api depend on **just the `mtls` + module** of `db-clickhouse`, exposed as a smaller crate (e.g. + `db-clickhouse-mtls` or `clickhouse-mtls-aws`) — OR +- BE is willing to let prices-api depend on the **whole + `db-clickhouse` crate** (path `db-clickhouse = { ..., features = ["aws-mtls"] }`), + accepting we pull in their schema / persist code as dead + weight in our binary (Cargo dead-code-strips, so wire-size + impact ≈ zero) — OR +- BE is fine with prices-api **vendoring `mtls.rs` verbatim** + with a clear "synced from BE rev X" comment. + +**Position:** Option 2 (depend on the whole crate) is the +lowest-friction. Cargo's dead-code-elimination handles the unused +modules; we get the helper "for free" and inherit fixes when BE +ships them. If BE prefers we don't carry the dependency, Option 3 +(vendor) is acceptable; Option 1 (factor a smaller crate) is the +most disruptive on BE's side. + +**Open sub-question:** + +- The `mtls::client_from_lambda_env` reads `MTLS_SECRET_NAME` and + `CH_DOMAIN` env vars. Are those names canonical, or should + prices-api use a different prefix to avoid clashing if both + Lambdas ever share a process (they won't, but the env-var + name is in the public API of the helper)? + +### C.6 — Caddyfile `CLICKHOUSE_CN_USER_MAP` for prices-api + +**Background.** Per BE's mTLS design +(`db-clickhouse/src/mtls.rs` module docs and task 0240), Caddy +**strips** any client-supplied `X-ClickHouse-User` and re-applies +the user mapped from the certificate's CN via +`CLICKHOUSE_CN_USER_MAP`. The client never sets a user; Caddy +decides. -- Cargo registry vs git tag: would BE prefer to publish to a - private cargo registry (crates.io is public; there's no - obvious private registry today)? Git tags are fine for now; - flagging in case BE has a preference. +**The ask:** BE adds two CN → CH-user mappings to the production +Caddy config: -### C.5 — Hetzner ClickHouse mTLS write contract +- `prices-api-dev` → `prices_writer_dev` (CH user) +- `prices-api-prod` → `prices_writer` (CH user) -Per ADR 0007 §Decision: prices-api writes into a separate `prices.*` -database on BE's Hetzner CH cluster over mTLS via Caddy. +…and provisions the corresponding CH users with `INSERT`, `ALTER`, +`OPTIMIZE`, `SELECT` grants on the **`prices.*`** database only +(no access to `default.*`). The CN values match the issued cert +CNs (Part C.7). + +**Open sub-question:** + +- Does BE want prices-api to draft the `CREATE USER` DDL itself + (per ADR 0007's announcement-not-approval norm), or do they + prefer to author it? Lean: we draft, they apply, we land the + SQL in `lore/3-wiki/` for traceability. + +### C.7 — mTLS cert issuance for `prices-api-{env}` + +**Background.** BE operates the CA and the per-service cert +issuance procedure (`infra-hetzner/ca/README.md`). **The ask (production-only, surfaced now for awareness):** -- A `prices` database (CH-level), not `default`. ADR 0007 §5 - notes the "separate-`prices`-database shape" was the all-yes - outcome of task 0045's Cluster A. -- A CH user `prices_writer` (or similar) with `INSERT`, `ALTER`, - `OPTIMIZE`, `SELECT` (for self-readback) on `prices.*` only. -- mTLS cert issuance: BE-operated CA issues per-env certs - (`prices-api-dev`, `prices-api-prod`) per ADR 0007 §Decision - Cluster C (per-env mTLS, 1-year manual rotation, - CA-rotation revocation). -- Caddy endpoint reachable from the Lambda's outbound CIDR - (Lambdas without VPC use the AWS public egress — confirm - whether BE wants to whitelist or relies purely on mTLS). - -**Gating:** this whole item is blocked behind BE 0227 (Hetzner CH -ships) and task 0047 (cross-tenant throughput verification). It -is in this spec to confirm the **shape** of the eventual contract, -not to schedule it. A RED outcome from task 0047 supersedes -ADR 0007 to the sidecar-CH variant — same shape, different host. - -### C.6 — DLQ, retry, lag alarms - -Lambda-side concerns where BE's S3 retry semantics intersect with -our DLQ / lag-alarm story: +- BE-operated CA issues two prices-api certs (`prices-api-dev`, + `prices-api-prod`) with the CNs from C.6. +- Per ADR 0007 Cluster C: per-env, 1-year manual rotation, + CA-rotation revocation. +- Bundle uploaded to Secrets Manager under + `${mtlsSecretNamePrefix}/lambda-prices-ledger-processor-{env}` + (matches BE's naming convention from `compute-stack.ts:251, + 305`); prices-api Lambda role granted Secrets Manager read. + +**Gating:** blocked behind BE 0227 (Hetzner CH ships) and task +0047 (cross-tenant throughput verification). In this spec to +confirm the **shape** of the eventual contract, not to schedule +it. + +### C.8 — DLQ + lag-alarm coordination **The ask:** -- Confirm BE's bucket has `s3:ObjectCreated:*` notifications - configured with the default at-least-once delivery semantics - (i.e. we should treat duplicate invocations as normal, not - exceptional — the prototype's idempotent merge per A.9 is the - right design). -- Agree on a DLQ pattern: per general-overview §5.2 we plan a - per-Lambda SQS DLQ for messages that fail decode or write 3x. - Confirm BE is OK with us re-fetching the same object after - re-driving from DLQ (i.e. no expiration on the ledger objects - for at least DLQ retention). -- Agree on a lag alarm: `prices.ledger_processor.lag_seconds` - = `now() - ledger.closed_at` at invocation time, alarm if - >60s sustained. Matches the Galexie §5.1 lag-alarm shape; - flagged here so BE doesn't see our alarm and assume their - pipeline is broken. +- prices-api owns its own DLQ for the prices-ingest queue + (`prices-ingest-dlq-{env}`). `maxReceiveCount = 10` matches + BE's value for the same reason: with `concurrency = 1` the + ESM over-polls and gets throttled, which absorbs without + false-DLQ'ing a processable doorbell. +- Lag alarm: `prices.ledger_processor.lag_seconds` = + `now() - ledger.closed_at` at invocation time, alarm if >60s + sustained. Flagged here so BE doesn't see our alarm and + assume their pipeline is broken — our alarm fires on **our** + Lambda being behind, not on Galexie being behind. --- ## Part D — Open questions for the meeting -Not commitments; just things we want BE's input on that aren't -yet phrased as concrete asks. - -1. **OHLCV column shape — `quote_asset_id` and `quote_volume_usd`:** - ADR 0003 puts `quote_asset_id` in the PK. ADR 0004 adds the - `volume_quote_usd` merge column. Both are prices-api decisions, - but if BE expects to read `prices.price_ohlcv` for any reason - (BE-side analytics, board), the column shape is a soft - coordination item. -2. **CH retention on `prices.*`:** prices-api's empirical footprint - from task 0046 is ~0.45 GB/yr. BE's retention policy on the - shared cluster — does our database inherit BE's TTLs, or do - we set our own? Lean: we set our own (separate DB → separate - retention). -3. **Backfill coexistence:** Stream 1 (ADR 0001) and Stream 2 - (ADR 0005) backfill writers will eventually also write to - `prices.*`. The 1-min UPSERT contract is shared with the live - Lambda. Sequencing question: do we backfill before the live - Lambda goes live, or backfill into a side table and `INSERT - ... SELECT` into the live table once the live tip is healthy? -4. **Empty-ledger optimization:** task 0048's 10k sample showed - most ledgers contain zero pricing-relevant events. Worth - asking BE if they're willing to pre-filter at the bucket - level (e.g. only notify on `*.has-soroban-events.zstd` if - their pipeline tags such ledgers), or if we just eat the - no-op invocations on our side. +Not commitments; questions where we want BE's input but haven't +pre-staked a position. + +### D.1 — Cursor source + +BE's cursor is `max(sequence) FROM default.ledgers` — they +persist every ledger they see. We only persist ledgers +containing pricing-relevant trades, so `max(...) FROM +prices.price_ohlcv` is a UNDER-COUNT, not the cursor we need. + +**Three options:** + +1. **Own cursor table `prices.processed_ledgers`** — single-row, + updated last per invocation per ADR 0007's last-row-wins + convention. Pros: independent of BE. Cons: yet another + `ReplacingMergeTree` to operate. +2. **Cross-DB read of `default.ledgers.max(sequence)`** as our + ceiling, processed-up-to stored on our side as a small file + or table. Pros: no parallel state. Cons: couples our cursor + to BE's persist pipeline; if BE pauses (`indexerLambdaConcurrency + = 0`), we'd also stall. +3. **Driven purely from S3** — HEAD-probe forward from the last + confirmed key, keep no cursor in CH. Pros: stateless. Cons: + restart cost on cold start (scan to find the floor). + +**Lean: Option 1.** Independence > parallel-state savings. Worth +~5 minutes of meeting time to confirm BE is fine with us adding +one tiny RMT table to `prices.*`. + +### D.2 — OHLCV column shape + +ADR 0003 puts `quote_asset_id` in the PK. ADR 0004 adds the +`volume_quote_usd` merge column. Both are prices-api decisions, +but if BE expects to read `prices.price_ohlcv` for any reason +(BE-side analytics, board, debugging), the column shape is a +soft coordination item. + +### D.3 — Retention on `prices.*` + +prices-api's empirical footprint from task 0046 is ~0.45 GB/yr. +BE's retention policy on the shared cluster — does our database +inherit BE's TTLs, or do we set our own? Lean: own (separate DB +→ separate retention). + +### D.4 — Backfill / live coexistence + +Stream 1 (ADR 0001) and Stream 2 (ADR 0005) backfill writers +will eventually also write to `prices.*`. The 1-min UPSERT +contract is shared with the live Lambda. Sequencing question: +backfill before live, or backfill into a side table and +`INSERT ... SELECT` into the live table once live tip is healthy? + +### D.5 — Empty-ledger optimisation + +Task 0048's 10k sample showed most ledgers contain zero +pricing-relevant events. Worth asking BE if they're willing to +pre-tag at the bucket level (e.g. an additional notification on +`*.has-soroban-events.zst` if their pipeline tags such ledgers), +or if we eat the no-op invocations. Likely answer: eat them — +the Lambda no-op path is cheap. + +### D.6 — Batch size + +BE uses `batchSize = 1` because their concurrency = 1 makes +larger batches pointless. Should we do the same, or — given +that most prices-relevant ledgers cluster and we expect long +gaps — increase to (say) 5 to amortise cold-start over multiple +doorbells? Probably not worth complexity; mirror BE at 1. --- -## Part E — When gates clear: production rewrite punch list +## Part E — Production rewrite punch list (when gates clear) -Surfaced here so the meeting can react to the **full sequence**, not -just the prototype. These items are NOT in scope for this -activation; they spawn as separate backlog tasks when (a) BE 0227 -lands and (b) task 0047 verifies throughput. +Surfaced here so the meeting can react to the **full sequence**. +These items are NOT in scope for this activation; they spawn as +separate backlog tasks when (a) BE 0227 lands and (b) task 0047 +verifies throughput. -1. Replace `LocalDiskFetcher` with `aws_sdk_s3` GetObject. (~1 day) -2. Replace `StdoutJsonSink` / `SqlFileSink` with - `clickhouse::Client` + mTLS + the ADR 0004 merge SQL. (~3 days) -3. CDK stack — Lambda function, role, S3 notification, SSM reads, - CloudWatch alarms, DLQ. (~3 days) -4. Cert issuance + rotation playbook (mTLS to Hetzner CH). (~1 day) -5. Cross-stack handshake — publish Lambda role ARN under - `/prices/{env}/...`, BE consumes it in their CDK. (~0.5 day) -6. xdr-parser pin from `main` to first tagged release. (~0.5 day) -7. Lag-alarm wiring + dashboard. (~1 day) -8. End-to-end smoke from a real ledger-data event in `dev`. (~1 day) +| # | Item | Est. days | +|---|---|---| +| 1 | Replace `LocalDiskFetcher` with `aws_sdk_s3` GetObject + HeadObject. | 1 | +| 2 | Replace `StubFileCursor` with the cursor strategy chosen in D.1. | 1 | +| 3 | Replace `StdoutJsonSink` / `SqlFileSink` with `db_clickhouse::mtls`-backed `clickhouse::Client` + ADR 0004 merge SQL. | 2 | +| 4 | CDK stack — Lambda function, role, SQS queue + DLQ, S3 notification on BE's bucket, env vars from `/platform/{env}/*` SSM reads, CW alarms. | 3 | +| 5 | mTLS cert issuance + Caddy `CN_USER_MAP` change with BE + cert upload to Secrets Manager. | 1 | +| 6 | Cross-stack handshake — publish Lambda role ARN under `/prices/{env}/...`, BE consumes in their CDK. | 0.5 | +| 7 | Pin `xdr-parser` from commit-sha to first tagged release. | 0.5 | +| 8 | Lag-alarm wiring + dashboard. | 1 | +| 9 | End-to-end smoke from a real `dev`-bucket doorbell. | 1 | -Total once gates clear: roughly 10 engineering days. +**Total once gates clear: ~11 engineering days.** --- ## Appendix — references +### Code in BE repo (`soroban-block-explorer/`) +- `crates/indexer/src/main.rs` — cold-start shape, env-var contract +- `crates/indexer/src/handler/mod.rs` — doorbell-cursor reconcile loop +- `crates/indexer/src/handler/process.rs` — `parse_ledger` walk +- `crates/xdr-parser/` — XDR decode crate we'll depend on +- `crates/db-clickhouse/src/mtls.rs` — reusable mTLS client builder +- `infra/src/lib/stacks/compute-stack.ts` — Lambda + SQS + DLQ CDK wiring +- `infra-hetzner/Caddyfile` — `CLICKHOUSE_CN_USER_MAP` +- `infra-hetzner/ca/README.md` — cert issuance procedure + +### Local docs - General overview §5.2 — Prices Ledger Processor (Rust) - ADR 0001 — Stream 1 historical backfill (CH-sourced) - ADR 0003 — `price_ohlcv` PK shape with `quote_asset_id` From 10b60a3678998935c14a4f61d46473eef5ed7e0a Mon Sep 17 00:00:00 2001 From: karczuRF Date: Mon, 8 Jun 2026 15:19:58 +0200 Subject: [PATCH 04/17] feat(lore-0038): Phase 1 scaffolding for the local-prototype Lambda MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add packages/prices-ledger-processor — the local-only prototype of the Prices Ledger Processor Lambda. Mirrors BE's production indexer shape (doorbell-cursor reconcile, deterministic Galexie key derivation, [50,200,800]ms retry envelope, safe-error redaction) but runs against fixture files on disk through trait-seam abstractions that swap to AWS in production. Three trait seams sized for production swap: - ObjectFetcher (LocalDiskFetcher → aws_sdk_s3::get_object) - Cursor (StubFileCursor → CH-backed cursor per G-note D.1) - OhlcvSink (StdoutJsonSink / SqlFileSink → clickhouse::Client) The reconcile loop reads the cursor, derives the next S3 key, fetches via the trait, decodes via an injected LedgerDecoder, calls ledger_processor::dispatch per (tx_id, contract_id) group, buckets trades into 1-min OHLCV per ADR 0004, writes to the sink, and advances the cursor LAST (the ordering barrier mirroring BE's "ledgers row written last" pattern). Phase 1 simplifications flagged for the BE meeting: - LedgerDecoder is a no-op in the binary; tests use a fake decoder that produces ledger-sequence-only DecodedLedger values. The real xdr-parser walk lands in Phase 2. - Canonical (base, quote) is lexicographic on (token_in, token_out) — placeholder, not a production policy. - Prices are f64; volumes are i128. 27 tests pass (23 unit + 4 e2e). Workspace builds clean; clippy clean on the new crate. --- Cargo.lock | 55 ++++ Cargo.toml | 1 + packages/prices-ledger-processor/.gitignore | 2 + packages/prices-ledger-processor/Cargo.toml | 29 ++ .../prices-ledger-processor/src/bucket.rs | 290 ++++++++++++++++++ .../prices-ledger-processor/src/cursor/mod.rs | 24 ++ .../src/cursor/stub_file.rs | 58 ++++ .../src/galexie_key.rs | 49 +++ packages/prices-ledger-processor/src/lib.rs | 8 + packages/prices-ledger-processor/src/main.rs | 111 +++++++ .../src/object_fetcher/local_disk.rs | 56 ++++ .../src/object_fetcher/mod.rs | 24 ++ .../prices-ledger-processor/src/reconcile.rs | 141 +++++++++ packages/prices-ledger-processor/src/retry.rs | 117 +++++++ .../prices-ledger-processor/src/safe_log.rs | 62 ++++ .../prices-ledger-processor/src/sink/mod.rs | 23 ++ .../src/sink/sql_file.rs | 66 ++++ .../src/sink/stdout.rs | 14 + .../tests/reconcile_e2e.rs | 202 ++++++++++++ 19 files changed, 1332 insertions(+) create mode 100644 packages/prices-ledger-processor/.gitignore create mode 100644 packages/prices-ledger-processor/Cargo.toml create mode 100644 packages/prices-ledger-processor/src/bucket.rs create mode 100644 packages/prices-ledger-processor/src/cursor/mod.rs create mode 100644 packages/prices-ledger-processor/src/cursor/stub_file.rs create mode 100644 packages/prices-ledger-processor/src/galexie_key.rs create mode 100644 packages/prices-ledger-processor/src/lib.rs create mode 100644 packages/prices-ledger-processor/src/main.rs create mode 100644 packages/prices-ledger-processor/src/object_fetcher/local_disk.rs create mode 100644 packages/prices-ledger-processor/src/object_fetcher/mod.rs create mode 100644 packages/prices-ledger-processor/src/reconcile.rs create mode 100644 packages/prices-ledger-processor/src/retry.rs create mode 100644 packages/prices-ledger-processor/src/safe_log.rs create mode 100644 packages/prices-ledger-processor/src/sink/mod.rs create mode 100644 packages/prices-ledger-processor/src/sink/sql_file.rs create mode 100644 packages/prices-ledger-processor/src/sink/stdout.rs create mode 100644 packages/prices-ledger-processor/tests/reconcile_e2e.rs diff --git a/Cargo.lock b/Cargo.lock index 982514c..350aaf8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -460,6 +460,12 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -900,6 +906,12 @@ version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + [[package]] name = "litemap" version = "0.8.2" @@ -1050,6 +1062,23 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prices-ledger-processor" +version = "0.1.0" +dependencies = [ + "clap", + "extractors-core", + "ledger-processor", + "phoenix-extractor", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tracing", + "tracing-subscriber", +] + [[package]] name = "proc-macro-crate" version = "3.5.0" @@ -1250,6 +1279,19 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -1502,6 +1544,19 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys", +] + [[package]] name = "thiserror" version = "1.0.69" diff --git a/Cargo.toml b/Cargo.toml index 4573767..7466494 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "packages/soroswap-extractor", "packages/aquarius-extractor", "packages/ledger-processor", + "packages/prices-ledger-processor", ] [workspace.dependencies] diff --git a/packages/prices-ledger-processor/.gitignore b/packages/prices-ledger-processor/.gitignore new file mode 100644 index 0000000..0c1fa53 --- /dev/null +++ b/packages/prices-ledger-processor/.gitignore @@ -0,0 +1,2 @@ +fixtures/ +out/ diff --git a/packages/prices-ledger-processor/Cargo.toml b/packages/prices-ledger-processor/Cargo.toml new file mode 100644 index 0000000..f9b4b29 --- /dev/null +++ b/packages/prices-ledger-processor/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "prices-ledger-processor" +version = "0.1.0" +edition = "2024" +description = "Prices Ledger Processor — local-only prototype (task 0038) of the live ingestion Lambda" + +[lib] +name = "prices_ledger_processor" +path = "src/lib.rs" + +[[bin]] +name = "prices-ledger-processor" +path = "src/main.rs" + +[dependencies] +extractors-core = { path = "../extractors-core" } +ledger-processor = { path = "../ledger-processor" } +phoenix-extractor = { path = "../phoenix-extractor" } + +clap = { workspace = true } +serde = { workspace = true } +serde_json = "1" +thiserror = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } + +[dev-dependencies] +tempfile = "3" diff --git a/packages/prices-ledger-processor/src/bucket.rs b/packages/prices-ledger-processor/src/bucket.rs new file mode 100644 index 0000000..d1e6cdb --- /dev/null +++ b/packages/prices-ledger-processor/src/bucket.rs @@ -0,0 +1,290 @@ +//! 1-minute OHLCV bucketing. +//! +//! Per ADR 0004 §Decision: incremental-merge update preserves `open`, +//! overwrites `close`, takes `GREATEST(high)` / `LEAST(low)`, sums +//! `volume_base` / `volume_quote` / `trade_count`, and accumulates +//! VWAP numerator/denominator pairs. +//! +//! Prototype simplifications (flagged for the BE meeting): +//! - Canonical `(base, quote)` is the lexicographically smaller / +//! larger of `(token_in, token_out)`. Production policy may differ. +//! - Prices are `f64`. Production may want a fixed-point or rational +//! representation; the merge formula is identical either way. + +use std::collections::HashMap; + +use extractors_core::{TradeRow, Venue}; +use serde::Serialize; + +const GRANULARITY_ONE_MINUTE: &str = "1m"; +const ONE_MINUTE_SECS: i64 = 60; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] +pub struct OhlcvKey { + pub timestamp_minute: i64, + pub asset_id: String, + pub granularity: String, + pub quote_asset_id: String, + pub source: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct OhlcvRow { + pub key: OhlcvKey, + pub open: f64, + pub high: f64, + pub low: f64, + pub close: f64, + pub volume_base: i128, + pub volume_quote: i128, + pub trade_count: u64, + pub vwap_numerator: f64, + pub vwap_denominator: i128, +} + +impl OhlcvRow { + fn merge(&mut self, price: f64, volume_base: i128, volume_quote: i128) { + if price > self.high { + self.high = price; + } + if price < self.low { + self.low = price; + } + self.close = price; + self.volume_base = self.volume_base.saturating_add(volume_base); + self.volume_quote = self.volume_quote.saturating_add(volume_quote); + self.trade_count += 1; + self.vwap_numerator += price * (volume_quote as f64); + self.vwap_denominator = self.vwap_denominator.saturating_add(volume_quote); + } + + pub fn vwap(&self) -> Option { + if self.vwap_denominator == 0 { + None + } else { + Some(self.vwap_numerator / (self.vwap_denominator as f64)) + } + } +} + +pub struct Bucketer { + by_key: HashMap, +} + +impl Bucketer { + pub fn new() -> Self { + Self { + by_key: HashMap::new(), + } + } + + pub fn ingest(&mut self, closed_at_unix_seconds: i64, trade: &TradeRow) { + let (asset_id, quote_asset_id, amount_base, amount_quote) = canonical_pair( + &trade.token_in, + &trade.token_out, + trade.amount_in, + trade.amount_out, + ); + if amount_base == 0 { + return; + } + let price = (amount_quote as f64) / (amount_base as f64); + let key = OhlcvKey { + timestamp_minute: floor_to_minute(closed_at_unix_seconds), + asset_id, + granularity: GRANULARITY_ONE_MINUTE.to_string(), + quote_asset_id, + source: venue_to_source(&trade.venue).to_string(), + }; + self.by_key + .entry(key.clone()) + .and_modify(|row| row.merge(price, amount_base, amount_quote)) + .or_insert_with(|| OhlcvRow { + key, + open: price, + high: price, + low: price, + close: price, + volume_base: amount_base, + volume_quote: amount_quote, + trade_count: 1, + vwap_numerator: price * (amount_quote as f64), + vwap_denominator: amount_quote, + }); + } + + pub fn drain(&mut self) -> Vec { + let mut rows: Vec = self.by_key.drain().map(|(_, v)| v).collect(); + rows.sort_by(|a, b| { + a.key + .timestamp_minute + .cmp(&b.key.timestamp_minute) + .then_with(|| a.key.asset_id.cmp(&b.key.asset_id)) + .then_with(|| a.key.quote_asset_id.cmp(&b.key.quote_asset_id)) + .then_with(|| a.key.source.cmp(&b.key.source)) + }); + rows + } +} + +impl Default for Bucketer { + fn default() -> Self { + Self::new() + } +} + +fn floor_to_minute(unix_seconds: i64) -> i64 { + (unix_seconds / ONE_MINUTE_SECS) * ONE_MINUTE_SECS +} + +fn canonical_pair( + token_in: &str, + token_out: &str, + amount_in: i128, + amount_out: i128, +) -> (String, String, i128, i128) { + if token_in <= token_out { + ( + token_in.to_string(), + token_out.to_string(), + amount_in, + amount_out, + ) + } else { + ( + token_out.to_string(), + token_in.to_string(), + amount_out, + amount_in, + ) + } +} + +fn venue_to_source(v: &Venue) -> &'static str { + match v { + Venue::Soroswap => "soroswap", + Venue::Aquarius => "aquarius", + Venue::Phoenix => "phoenix", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use extractors_core::Venue; + + fn trade( + venue: Venue, + token_in: &str, + token_out: &str, + amount_in: i128, + amount_out: i128, + ) -> TradeRow { + TradeRow { + venue, + contract_id: "C".into(), + transaction_id: "T".into(), + ledger_sequence: 1, + first_event_index: 0, + token_in: token_in.into(), + token_out: token_out.into(), + amount_in, + amount_out, + fee: None, + trader: None, + } + } + + #[test] + fn floor_to_minute_rounds_down() { + assert_eq!(floor_to_minute(0), 0); + assert_eq!(floor_to_minute(59), 0); + assert_eq!(floor_to_minute(60), 60); + assert_eq!(floor_to_minute(125), 120); + } + + #[test] + fn canonical_pair_orders_lexicographically() { + let (b, q, ab, aq) = canonical_pair("USDC", "XLM", 100, 200); + assert_eq!((b.as_str(), q.as_str(), ab, aq), ("USDC", "XLM", 100, 200)); + + let (b, q, ab, aq) = canonical_pair("XLM", "USDC", 100, 200); + assert_eq!((b.as_str(), q.as_str(), ab, aq), ("USDC", "XLM", 200, 100)); + } + + #[test] + fn single_trade_seeds_open_high_low_close_equal() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + let rows = b.drain(); + assert_eq!(rows.len(), 1); + let r = &rows[0]; + assert_eq!(r.open, 5.0); + assert_eq!(r.high, 5.0); + assert_eq!(r.low, 5.0); + assert_eq!(r.close, 5.0); + assert_eq!(r.volume_base, 10); + assert_eq!(r.volume_quote, 50); + assert_eq!(r.trade_count, 1); + assert_eq!(r.vwap(), Some(5.0)); + } + + #[test] + fn merges_two_trades_same_bucket() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); // price=5 + b.ingest(1_700_000_030, &trade(Venue::Phoenix, "USDC", "XLM", 5, 30)); // price=6 + let rows = b.drain(); + assert_eq!(rows.len(), 1); + let r = &rows[0]; + assert_eq!(r.open, 5.0); + assert_eq!(r.close, 6.0); + assert_eq!(r.high, 6.0); + assert_eq!(r.low, 5.0); + assert_eq!(r.volume_base, 15); + assert_eq!(r.volume_quote, 80); + assert_eq!(r.trade_count, 2); + // VWAP = (5*50 + 6*30) / (50+30) = (250 + 180) / 80 = 430/80 = 5.375 + let vwap = r.vwap().unwrap(); + assert!((vwap - 5.375).abs() < 1e-9); + } + + #[test] + fn different_minute_separate_buckets() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + b.ingest(1_700_000_090, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + assert_eq!(b.drain().len(), 2); + } + + #[test] + fn reverse_direction_same_pair_merges() { + // A swap USDC→XLM at price 5, then a swap XLM→USDC at amount_in=30, amount_out=5 + // → canonical pair is still (USDC, XLM) but with flipped base/quote on the input. + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + b.ingest(1_700_000_030, &trade(Venue::Phoenix, "XLM", "USDC", 30, 5)); + let rows = b.drain(); + assert_eq!(rows.len(), 1, "reverse direction must collapse to one key"); + let r = &rows[0]; + assert_eq!(r.trade_count, 2); + } + + #[test] + fn different_source_separate_buckets() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + b.ingest( + 1_700_000_000, + &trade(Venue::Soroswap, "USDC", "XLM", 10, 50), + ); + assert_eq!(b.drain().len(), 2); + } + + #[test] + fn zero_amount_in_skipped() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 0, 50)); + assert_eq!(b.drain().len(), 0); + } +} diff --git a/packages/prices-ledger-processor/src/cursor/mod.rs b/packages/prices-ledger-processor/src/cursor/mod.rs new file mode 100644 index 0000000..117185f --- /dev/null +++ b/packages/prices-ledger-processor/src/cursor/mod.rs @@ -0,0 +1,24 @@ +//! Cursor trait — the production-swap seam for ledger-sequence state. +//! In prod this reads from / writes to a ClickHouse cursor table +//! (see G-note Part D.1 for the design question). + +use std::future::Future; + +pub mod stub_file; + +pub use stub_file::StubFileCursor; + +#[derive(Debug, thiserror::Error)] +pub enum CursorError { + #[error("cursor read failed: {0}")] + Read(String), + #[error("cursor write failed: {0}")] + Write(String), + #[error("cursor value malformed: {0}")] + Parse(String), +} + +pub trait Cursor { + fn read(&self) -> impl Future> + Send; + fn write(&self, value: u64) -> impl Future> + Send; +} diff --git a/packages/prices-ledger-processor/src/cursor/stub_file.rs b/packages/prices-ledger-processor/src/cursor/stub_file.rs new file mode 100644 index 0000000..125851d --- /dev/null +++ b/packages/prices-ledger-processor/src/cursor/stub_file.rs @@ -0,0 +1,58 @@ +use std::path::{Path, PathBuf}; + +use super::{Cursor, CursorError}; + +pub struct StubFileCursor { + path: PathBuf, +} + +impl StubFileCursor { + pub fn new(path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + } + } +} + +impl Cursor for StubFileCursor { + async fn read(&self) -> Result { + let raw = tokio::fs::read_to_string(&self.path) + .await + .map_err(|e| CursorError::Read(e.to_string()))?; + raw.trim() + .parse::() + .map_err(|e| CursorError::Parse(e.to_string())) + } + + async fn write(&self, value: u64) -> Result<(), CursorError> { + if let Some(parent) = self.path.parent() { + tokio::fs::create_dir_all(parent) + .await + .map_err(|e| CursorError::Write(e.to_string()))?; + } + tokio::fs::write(&self.path, format!("{value}\n")) + .await + .map_err(|e| CursorError::Write(e.to_string())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[tokio::test] + async fn write_then_read_roundtrips() { + let dir = tempdir().unwrap(); + let c = StubFileCursor::new(dir.path().join("cursor.txt")); + c.write(62_528_059).await.unwrap(); + assert_eq!(c.read().await.unwrap(), 62_528_059); + } + + #[tokio::test] + async fn missing_file_errors() { + let dir = tempdir().unwrap(); + let c = StubFileCursor::new(dir.path().join("nope.txt")); + assert!(matches!(c.read().await, Err(CursorError::Read(_)))); + } +} diff --git a/packages/prices-ledger-processor/src/galexie_key.rs b/packages/prices-ledger-processor/src/galexie_key.rs new file mode 100644 index 0000000..29fdcf3 --- /dev/null +++ b/packages/prices-ledger-processor/src/galexie_key.rs @@ -0,0 +1,49 @@ +//! S3 key derivation for Galexie ledger objects. +//! +//! Mirrors BE's indexer (`soroban-block-explorer/crates/indexer/src/handler/mod.rs:263`). +//! Coupled to Galexie's datastore schema (`ledgers_per_file = 1`, +//! `files_per_partition = 64000`). A wrong key reads as a gap and stalls the tail. + +const FILES_PER_PARTITION: i64 = 64_000; + +pub fn ledger_s3_key(ledger: i64) -> String { + let part_start = (ledger / FILES_PER_PARTITION) * FILES_PER_PARTITION; + let part_end = part_start + FILES_PER_PARTITION - 1; + let part_prefix = 0xFFFF_FFFFu32 - part_start as u32; + let file_prefix = 0xFFFF_FFFFu32 - ledger as u32; + format!("{part_prefix:08X}--{part_start}-{part_end}/{file_prefix:08X}--{ledger}.xdr.zst") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn matches_verified_live_key() { + // From BE: L = 62528059 → FC45E5FF--62528000-62591999/FC45E5C4--62528059.xdr.zst + assert_eq!( + ledger_s3_key(62_528_059), + "FC45E5FF--62528000-62591999/FC45E5C4--62528059.xdr.zst" + ); + } + + #[test] + fn ledgers_in_same_partition_share_prefix() { + let key_a = ledger_s3_key(64_000); + let key_b = ledger_s3_key(127_999); + let prefix_a = key_a.split('/').next().unwrap(); + let prefix_b = key_b.split('/').next().unwrap(); + assert_eq!(prefix_a, prefix_b); + assert!(prefix_a.ends_with("--64000-127999")); + } + + #[test] + fn partition_boundary_changes_prefix() { + let last = ledger_s3_key(127_999); + let first_next = ledger_s3_key(128_000); + let prefix_last = last.split('/').next().unwrap(); + let prefix_next = first_next.split('/').next().unwrap(); + assert_ne!(prefix_last, prefix_next); + assert!(prefix_next.ends_with("--128000-191999")); + } +} diff --git a/packages/prices-ledger-processor/src/lib.rs b/packages/prices-ledger-processor/src/lib.rs new file mode 100644 index 0000000..fbd8a19 --- /dev/null +++ b/packages/prices-ledger-processor/src/lib.rs @@ -0,0 +1,8 @@ +pub mod bucket; +pub mod cursor; +pub mod galexie_key; +pub mod object_fetcher; +pub mod reconcile; +pub mod retry; +pub mod safe_log; +pub mod sink; diff --git a/packages/prices-ledger-processor/src/main.rs b/packages/prices-ledger-processor/src/main.rs new file mode 100644 index 0000000..70289de --- /dev/null +++ b/packages/prices-ledger-processor/src/main.rs @@ -0,0 +1,111 @@ +use std::path::PathBuf; + +use clap::{Parser, ValueEnum}; +use extractors_core::VenueRegistry; +use phoenix_extractor::PhoenixPoolRegistry; +use prices_ledger_processor::{ + cursor::{Cursor, StubFileCursor}, + object_fetcher::LocalDiskFetcher, + reconcile::{DecodedLedger, LedgerDecoder, Reconciler}, + sink::{SqlFileSink, StdoutJsonSink}, +}; +use tracing::info; + +#[derive(Parser, Debug)] +#[command( + name = "prices-ledger-processor", + about = "Local-only prototype of the Prices Ledger Processor Lambda (task 0038)" +)] +struct Args { + /// Initial cursor value (ledger sequence the run starts AFTER). + /// Always overwrites the cursor file before the run. + #[arg(long)] + cursor: u64, + + /// Maximum reconcile iterations per invocation. + #[arg(long, default_value_t = 16)] + max_iterations: usize, + + /// Sink selection. + #[arg(long, value_enum, default_value_t = SinkKind::Stdout)] + sink: SinkKind, + + /// Local fixture root — keys derived by `ledger_s3_key` are joined onto this. + #[arg(long, default_value = "fixtures/ledgers")] + fixtures_dir: PathBuf, + + /// Where the cursor file lives. + #[arg(long, default_value = "out/cursor.txt")] + cursor_file: PathBuf, + + /// Where SQL-file sink output lands. + #[arg(long, default_value = "out")] + out_dir: PathBuf, +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum SinkKind { + Stdout, + SqlFile, +} + +/// Phase-1 no-op decoder. Returns an empty ledger list regardless of input, +/// so the loop exercises cursor / fetcher / sink wiring without a real +/// xdr-parser integration. Phase 2 replaces this with the real walk. +struct NoopDecoder; + +impl LedgerDecoder for NoopDecoder { + async fn decode(&self, _bytes: &[u8]) -> Result, String> { + Ok(Vec::new()) + } +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .json() + .init(); + + let args = Args::parse(); + + let cursor = StubFileCursor::new(&args.cursor_file); + cursor.write(args.cursor).await?; + + let fetcher = LocalDiskFetcher::new(&args.fixtures_dir); + + let stats = match args.sink { + SinkKind::Stdout => { + let reconciler = Reconciler { + fetcher, + cursor, + sink: StdoutJsonSink, + decoder: NoopDecoder, + venue_registry: VenueRegistry::new(), + phoenix_registry: PhoenixPoolRegistry::default(), + }; + reconciler.run(args.max_iterations).await? + } + SinkKind::SqlFile => { + let reconciler = Reconciler { + fetcher, + cursor, + sink: SqlFileSink::new(&args.out_dir), + decoder: NoopDecoder, + venue_registry: VenueRegistry::new(), + phoenix_registry: PhoenixPoolRegistry::default(), + }; + reconciler.run(args.max_iterations).await? + } + }; + + info!( + start = stats.start_cursor, + end = stats.end_cursor, + persisted = stats.ledgers_persisted, + rows = stats.rows_emitted, + "reconcile complete" + ); + + Ok(()) +} diff --git a/packages/prices-ledger-processor/src/object_fetcher/local_disk.rs b/packages/prices-ledger-processor/src/object_fetcher/local_disk.rs new file mode 100644 index 0000000..dedb268 --- /dev/null +++ b/packages/prices-ledger-processor/src/object_fetcher/local_disk.rs @@ -0,0 +1,56 @@ +use std::path::{Path, PathBuf}; + +use super::{FetchError, ObjectFetcher}; + +pub struct LocalDiskFetcher { + root: PathBuf, +} + +impl LocalDiskFetcher { + pub fn new(root: impl AsRef) -> Self { + Self { + root: root.as_ref().to_path_buf(), + } + } +} + +impl ObjectFetcher for LocalDiskFetcher { + async fn fetch(&self, key: &str) -> Result>, FetchError> { + let path = self.root.join(key); + match tokio::fs::read(&path).await { + Ok(bytes) => Ok(Some(bytes)), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(source) => Err(FetchError::Io { + key: key.to_string(), + source, + }), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[tokio::test] + async fn hit_returns_bytes() { + let dir = tempdir().unwrap(); + let key = "subdir/file.bin"; + tokio::fs::create_dir_all(dir.path().join("subdir")) + .await + .unwrap(); + tokio::fs::write(dir.path().join(key), b"hello") + .await + .unwrap(); + let f = LocalDiskFetcher::new(dir.path()); + assert_eq!(f.fetch(key).await.unwrap(), Some(b"hello".to_vec())); + } + + #[tokio::test] + async fn miss_returns_none() { + let dir = tempdir().unwrap(); + let f = LocalDiskFetcher::new(dir.path()); + assert_eq!(f.fetch("nope").await.unwrap(), None); + } +} diff --git a/packages/prices-ledger-processor/src/object_fetcher/mod.rs b/packages/prices-ledger-processor/src/object_fetcher/mod.rs new file mode 100644 index 0000000..3ec7f13 --- /dev/null +++ b/packages/prices-ledger-processor/src/object_fetcher/mod.rs @@ -0,0 +1,24 @@ +//! Object-source trait — the production-swap seam where prototype-mode +//! local-disk reads become `aws_sdk_s3::Client::get_object` calls. + +use std::future::Future; + +pub mod local_disk; + +pub use local_disk::LocalDiskFetcher; + +#[derive(Debug, thiserror::Error)] +pub enum FetchError { + #[error("i/o error fetching {key}: {source}")] + Io { + key: String, + #[source] + source: std::io::Error, + }, +} + +pub trait ObjectFetcher { + /// `Ok(Some(bytes))` on hit, `Ok(None)` on miss (treat as a gap and + /// stop the reconcile run), `Err(...)` on a hard error. + fn fetch(&self, key: &str) -> impl Future>, FetchError>> + Send; +} diff --git a/packages/prices-ledger-processor/src/reconcile.rs b/packages/prices-ledger-processor/src/reconcile.rs new file mode 100644 index 0000000..c32e62e --- /dev/null +++ b/packages/prices-ledger-processor/src/reconcile.rs @@ -0,0 +1,141 @@ +//! Doorbell-cursor reconcile loop. +//! +//! Mirrors BE's indexer (`crates/indexer/src/handler/mod.rs:201`): +//! read cursor, derive next S3 key, fetch, decode, dispatch, bucket, +//! sink, advance cursor last. Stops at the first gap or +//! `max_iterations`. The cursor write is the **ordering barrier** — +//! a crash before it leaves the cursor unchanged and the next +//! invocation re-processes the same ledger (idempotent via the +//! ReplacingMergeTree / merge semantics in production; via the +//! pure-function bucketer in the prototype). + +use std::future::Future; + +use extractors_core::{SorobanEventRow, VenueRegistry}; +use ledger_processor::dispatch::{DispatchError, dispatch}; +use phoenix_extractor::PhoenixPoolRegistry; +use tracing::{info, warn}; + +use crate::bucket::Bucketer; +use crate::cursor::{Cursor, CursorError}; +use crate::galexie_key::ledger_s3_key; +use crate::object_fetcher::{FetchError, ObjectFetcher}; +use crate::sink::{OhlcvSink, SinkError}; + +#[derive(Debug, thiserror::Error)] +pub enum ReconcileError { + #[error("cursor error: {0}")] + Cursor(#[from] CursorError), + #[error("fetch error: {0}")] + Fetch(#[from] FetchError), + #[error("decode error: {0}")] + Decode(String), + #[error("dispatch error: {0}")] + Dispatch(String), + #[error("sink error: {0}")] + Sink(#[from] SinkError), +} + +#[derive(Debug, Clone)] +pub struct DecodedLedger { + pub ledger_sequence: u64, + pub closed_at_unix_seconds: i64, + /// Soroban events grouped by `(transaction_id, contract_id)` — the + /// shape the kernel from task 0037 dispatches on. + pub event_groups: Vec>, +} + +pub trait LedgerDecoder { + fn decode( + &self, + bytes: &[u8], + ) -> impl Future, String>> + Send; +} + +#[derive(Debug, Clone, Default)] +pub struct RunStats { + pub start_cursor: u64, + pub end_cursor: u64, + pub ledgers_persisted: u64, + pub rows_emitted: u64, +} + +pub struct Reconciler { + pub fetcher: F, + pub cursor: C, + pub sink: S, + pub decoder: D, + pub venue_registry: VenueRegistry, + pub phoenix_registry: PhoenixPoolRegistry, +} + +impl Reconciler +where + F: ObjectFetcher + Sync, + C: Cursor + Sync, + S: OhlcvSink + Sync, + D: LedgerDecoder + Sync, +{ + pub async fn run(&self, max_iterations: usize) -> Result { + let start = self.cursor.read().await?; + let mut current = start; + let mut persisted = 0u64; + let mut rows_emitted = 0u64; + + for _ in 0..max_iterations { + let next = current + 1; + let key = ledger_s3_key(next as i64); + let Some(bytes) = self.fetcher.fetch(&key).await? else { + if persisted == 0 { + info!(next, "no new contiguous ledger — nothing to do"); + } else { + info!(next, persisted, "reached gap on S3 — contiguous run done"); + } + break; + }; + + let ledgers = self + .decoder + .decode(&bytes) + .await + .map_err(ReconcileError::Decode)?; + + let mut bucketer = Bucketer::new(); + let mut max_seq = current; + for ledger in ledgers { + for group in &ledger.event_groups { + let trades = match dispatch(group, &self.venue_registry, &self.phoenix_registry) + { + Ok(t) => t, + Err(DispatchError::VenueNotImplemented { venue, contract_id }) => { + warn!(?venue, %contract_id, "venue extractor not yet implemented — skipping"); + Vec::new() + } + Err(e) => return Err(ReconcileError::Dispatch(e.to_string())), + }; + for trade in &trades { + bucketer.ingest(ledger.closed_at_unix_seconds, trade); + } + } + if ledger.ledger_sequence > max_seq { + max_seq = ledger.ledger_sequence; + } + } + + let rows = bucketer.drain(); + rows_emitted += rows.len() as u64; + self.sink.write(&rows).await?; + self.cursor.write(max_seq).await?; + info!(ledger = max_seq, rows = rows.len(), "ledger persisted"); + current = max_seq; + persisted += 1; + } + + Ok(RunStats { + start_cursor: start, + end_cursor: current, + ledgers_persisted: persisted, + rows_emitted, + }) + } +} diff --git a/packages/prices-ledger-processor/src/retry.rs b/packages/prices-ledger-processor/src/retry.rs new file mode 100644 index 0000000..856c056 --- /dev/null +++ b/packages/prices-ledger-processor/src/retry.rs @@ -0,0 +1,117 @@ +//! Retry-with-backoff envelope mirroring BE's indexer +//! (`crates/indexer/src/handler/mod.rs:113`). +//! +//! `[50, 200, 800] ms` cadence — three retries, four wire calls total. +//! Only the caller knows which errors are transient; pass a classifier. + +use std::time::Duration; + +pub const DEFAULT_BACKOFF_MS: [u64; 3] = [50, 200, 800]; + +/// Returns `Ok(attempts)` where `attempts` is the retry count (0 = first +/// attempt succeeded). Errors classified as non-transient short-circuit. +pub async fn retry_with_backoff( + backoff_ms: &[u64], + is_transient: P, + mut attempt: F, +) -> Result<(T, u32), E> +where + F: FnMut() -> Fut, + Fut: std::future::Future>, + P: Fn(&E) -> bool, +{ + let mut tries: u32 = 0; + loop { + match attempt().await { + Ok(v) => return Ok((v, tries)), + Err(e) => { + if !is_transient(&e) || tries as usize >= backoff_ms.len() { + return Err(e); + } + tokio::time::sleep(Duration::from_millis(backoff_ms[tries as usize])).await; + tries += 1; + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::cell::Cell; + + #[derive(Debug, PartialEq)] + enum FakeErr { + Transient, + Permanent, + } + + #[tokio::test] + async fn succeeds_first_try() { + let (v, tries) = retry_with_backoff( + &[1, 1, 1], + |_: &FakeErr| true, + || async { Ok::(42) }, + ) + .await + .unwrap(); + assert_eq!(v, 42); + assert_eq!(tries, 0); + } + + #[tokio::test] + async fn retries_transient_then_succeeds() { + let calls = Cell::new(0u32); + let (v, tries) = retry_with_backoff( + &[1, 1, 1], + |e: &FakeErr| matches!(e, FakeErr::Transient), + || async { + let n = calls.get(); + calls.set(n + 1); + if n < 2 { + Err(FakeErr::Transient) + } else { + Ok::(7) + } + }, + ) + .await + .unwrap(); + assert_eq!(v, 7); + assert_eq!(tries, 2); + } + + #[tokio::test] + async fn permanent_error_short_circuits() { + let calls = Cell::new(0u32); + let err = retry_with_backoff( + &[1, 1, 1], + |e: &FakeErr| matches!(e, FakeErr::Transient), + || async { + calls.set(calls.get() + 1); + Err::(FakeErr::Permanent) + }, + ) + .await + .unwrap_err(); + assert_eq!(err, FakeErr::Permanent); + assert_eq!(calls.get(), 1); + } + + #[tokio::test] + async fn exhausts_backoff_then_fails() { + let calls = Cell::new(0u32); + let err = retry_with_backoff( + &[1, 1, 1], + |e: &FakeErr| matches!(e, FakeErr::Transient), + || async { + calls.set(calls.get() + 1); + Err::(FakeErr::Transient) + }, + ) + .await + .unwrap_err(); + assert_eq!(err, FakeErr::Transient); + assert_eq!(calls.get(), 4); // 1 initial + 3 retries + } +} diff --git a/packages/prices-ledger-processor/src/safe_log.rs b/packages/prices-ledger-processor/src/safe_log.rs new file mode 100644 index 0000000..8f8aea3 --- /dev/null +++ b/packages/prices-ledger-processor/src/safe_log.rs @@ -0,0 +1,62 @@ +//! Error redaction wrappers for log emission. +//! +//! Mirrors BE's `safe_error_message` / `safe_bad_response_token` +//! (`crates/indexer/src/handler/mod.rs:436-485`). +//! +//! Logging policy: NEVER stringify an upstream error whose `Display` +//! could embed row data (ClickHouse `BadResponse` is the canonical +//! example — its body echoes offending row values into the message). +//! Emit fixed labels plus, for HTTP/CH responses, only the leading +//! `Code: NNN` or HTTP status token. + +/// Extract ONLY the leading code/status token from a wire-error body. +/// Returns `"Code: NNN"` for a CH exception body, `"HTTP NNN"` for a +/// plain HTTP status line, or `"detail suppressed"` for anything else +/// where we cannot prove the remainder is data-free. +pub fn safe_response_token(msg: &str) -> String { + if let Some(rest) = msg.strip_prefix("Code: ") { + let digits: String = rest.chars().take_while(char::is_ascii_digit).collect(); + if !digits.is_empty() { + return format!("Code: {digits}"); + } + } + let leading: String = msg.chars().take_while(char::is_ascii_digit).collect(); + if leading.len() == 3 { + return format!("HTTP {leading}"); + } + "detail suppressed".to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ch_exception_extracts_code() { + assert_eq!( + safe_response_token("Code: 241. DB::Exception: foo bar=12345"), + "Code: 241" + ); + } + + #[test] + fn http_status_extracts_three_digits() { + assert_eq!( + safe_response_token("503 Service Unavailable: backend timeout"), + "HTTP 503" + ); + } + + #[test] + fn proxy_html_suppresses_everything() { + assert_eq!( + safe_response_token("Bad Gateway"), + "detail suppressed" + ); + } + + #[test] + fn malformed_code_prefix_suppresses() { + assert_eq!(safe_response_token("Code: abc."), "detail suppressed"); + } +} diff --git a/packages/prices-ledger-processor/src/sink/mod.rs b/packages/prices-ledger-processor/src/sink/mod.rs new file mode 100644 index 0000000..d1614f2 --- /dev/null +++ b/packages/prices-ledger-processor/src/sink/mod.rs @@ -0,0 +1,23 @@ +//! OHLCV sink trait — the production-swap seam where prototype-mode +//! stdout / SQL-file writes become a `clickhouse::Client` insert against +//! `prices.price_ohlcv` (per ADRs 0003, 0004, 0007). + +use std::future::Future; + +use crate::bucket::OhlcvRow; + +pub mod sql_file; +pub mod stdout; + +pub use sql_file::SqlFileSink; +pub use stdout::StdoutJsonSink; + +#[derive(Debug, thiserror::Error)] +pub enum SinkError { + #[error("sink write failed: {0}")] + Write(String), +} + +pub trait OhlcvSink { + fn write(&self, rows: &[OhlcvRow]) -> impl Future> + Send; +} diff --git a/packages/prices-ledger-processor/src/sink/sql_file.rs b/packages/prices-ledger-processor/src/sink/sql_file.rs new file mode 100644 index 0000000..5e24fb7 --- /dev/null +++ b/packages/prices-ledger-processor/src/sink/sql_file.rs @@ -0,0 +1,66 @@ +use std::path::{Path, PathBuf}; + +use super::{OhlcvSink, SinkError}; +use crate::bucket::OhlcvRow; + +/// Emits one `INSERT INTO prices.price_ohlcv ...` statement per row. +/// Production replaces this with a single `INSERT INTO prices.price_ohlcv ... +/// VALUES (...)` batched through `clickhouse::Client::insert`; the per-row +/// form here is what BE reads in the meeting to confirm the column shape. +pub struct SqlFileSink { + out_dir: PathBuf, +} + +impl SqlFileSink { + pub fn new(out_dir: impl AsRef) -> Self { + Self { + out_dir: out_dir.as_ref().to_path_buf(), + } + } +} + +impl OhlcvSink for SqlFileSink { + async fn write(&self, rows: &[OhlcvRow]) -> Result<(), SinkError> { + if rows.is_empty() { + return Ok(()); + } + tokio::fs::create_dir_all(&self.out_dir) + .await + .map_err(|e| SinkError::Write(e.to_string()))?; + let path = self.out_dir.join(format!( + "ohlcv-{}.sql", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis()) + .unwrap_or(0) + )); + let mut sql = String::new(); + for row in rows { + sql.push_str(&row_to_insert(row)); + sql.push('\n'); + } + tokio::fs::write(&path, sql) + .await + .map_err(|e| SinkError::Write(e.to_string())) + } +} + +fn row_to_insert(r: &OhlcvRow) -> String { + format!( + "INSERT INTO prices.price_ohlcv (timestamp, asset_id, granularity, quote_asset_id, source, open, high, low, close, volume_base, volume_quote, trade_count, vwap_num, vwap_den) VALUES ({ts}, '{aid}', '{gr}', '{qaid}', '{src}', {o}, {h}, {l}, {c}, {vb}, {vq}, {tc}, {vn}, {vd});", + ts = r.key.timestamp_minute, + aid = r.key.asset_id, + gr = r.key.granularity, + qaid = r.key.quote_asset_id, + src = r.key.source, + o = r.open, + h = r.high, + l = r.low, + c = r.close, + vb = r.volume_base, + vq = r.volume_quote, + tc = r.trade_count, + vn = r.vwap_numerator, + vd = r.vwap_denominator, + ) +} diff --git a/packages/prices-ledger-processor/src/sink/stdout.rs b/packages/prices-ledger-processor/src/sink/stdout.rs new file mode 100644 index 0000000..cdc87cf --- /dev/null +++ b/packages/prices-ledger-processor/src/sink/stdout.rs @@ -0,0 +1,14 @@ +use super::{OhlcvSink, SinkError}; +use crate::bucket::OhlcvRow; + +pub struct StdoutJsonSink; + +impl OhlcvSink for StdoutJsonSink { + async fn write(&self, rows: &[OhlcvRow]) -> Result<(), SinkError> { + for row in rows { + let line = serde_json::to_string(row).map_err(|e| SinkError::Write(e.to_string()))?; + println!("{line}"); + } + Ok(()) + } +} diff --git a/packages/prices-ledger-processor/tests/reconcile_e2e.rs b/packages/prices-ledger-processor/tests/reconcile_e2e.rs new file mode 100644 index 0000000..a73367e --- /dev/null +++ b/packages/prices-ledger-processor/tests/reconcile_e2e.rs @@ -0,0 +1,202 @@ +use std::sync::Mutex; + +use extractors_core::{SorobanEventRow, VenueRegistry}; +use phoenix_extractor::PhoenixPoolRegistry; +use prices_ledger_processor::{ + bucket::OhlcvRow, + cursor::{Cursor, StubFileCursor}, + galexie_key::ledger_s3_key, + object_fetcher::LocalDiskFetcher, + reconcile::{DecodedLedger, LedgerDecoder, Reconciler}, + sink::{OhlcvSink, SinkError}, +}; +use tempfile::tempdir; + +struct CaptureSink { + rows: Mutex>, +} + +impl OhlcvSink for CaptureSink { + async fn write(&self, rows: &[OhlcvRow]) -> Result<(), SinkError> { + self.rows.lock().unwrap().extend_from_slice(rows); + Ok(()) + } +} + +/// Returns one empty `DecodedLedger` for each fetched object, with the +/// ledger sequence parsed back out of the `--{seq}.xdr.zst` suffix in +/// the bytes (so the test can wire decode to fixture content trivially). +/// No event groups → no trades → no rows. Tests cursor + fetcher + loop. +struct EmptyDecoder; + +impl LedgerDecoder for EmptyDecoder { + async fn decode(&self, bytes: &[u8]) -> Result, String> { + let seq: u64 = std::str::from_utf8(bytes) + .map_err(|e| e.to_string())? + .trim() + .parse() + .map_err(|e: std::num::ParseIntError| e.to_string())?; + Ok(vec![DecodedLedger { + ledger_sequence: seq, + closed_at_unix_seconds: 1_700_000_000, + event_groups: Vec::new(), + }]) + } +} + +/// Returns one `DecodedLedger` with one event group whose first contract +/// is not in any registry → dispatch returns `Ok(vec![])`. Still no +/// trades, but proves the dispatch path executes. +struct SingleEmptyGroupDecoder; + +impl LedgerDecoder for SingleEmptyGroupDecoder { + async fn decode(&self, bytes: &[u8]) -> Result, String> { + let seq: u64 = std::str::from_utf8(bytes).unwrap().trim().parse().unwrap(); + Ok(vec![DecodedLedger { + ledger_sequence: seq, + closed_at_unix_seconds: 1_700_000_000, + event_groups: vec![vec![SorobanEventRow { + contract_id: "C-unknown".into(), + transaction_id: "T".into(), + ledger_sequence: seq, + event_index: 0, + topics: Vec::new(), + data: extractors_core::TaggedValue::Null, + }]], + }]) + } +} + +#[tokio::test] +async fn empty_fixture_dir_no_op_returns_zero_persisted() { + let dir = tempdir().unwrap(); + let cursor_path = dir.path().join("cursor.txt"); + let cursor = StubFileCursor::new(&cursor_path); + cursor.write(99).await.unwrap(); + + let reconciler = Reconciler { + fetcher: LocalDiskFetcher::new(dir.path().join("nope")), + cursor, + sink: CaptureSink { + rows: Mutex::new(Vec::new()), + }, + decoder: EmptyDecoder, + venue_registry: VenueRegistry::new(), + phoenix_registry: PhoenixPoolRegistry::default(), + }; + + let stats = reconciler.run(8).await.unwrap(); + assert_eq!(stats.start_cursor, 99); + assert_eq!(stats.end_cursor, 99); + assert_eq!(stats.ledgers_persisted, 0); + assert_eq!(stats.rows_emitted, 0); +} + +#[tokio::test] +async fn contiguous_run_advances_cursor_until_gap() { + let dir = tempdir().unwrap(); + let fixtures = dir.path().join("ledgers"); + // Seed three contiguous "ledgers" 100, 101, 102, then a gap at 103. + for seq in [100u64, 101, 102] { + let key = ledger_s3_key(seq as i64); + let path = fixtures.join(&key); + tokio::fs::create_dir_all(path.parent().unwrap()) + .await + .unwrap(); + tokio::fs::write(&path, format!("{seq}")).await.unwrap(); + } + + let cursor_path = dir.path().join("cursor.txt"); + let cursor = StubFileCursor::new(&cursor_path); + cursor.write(99).await.unwrap(); + + let reconciler = Reconciler { + fetcher: LocalDiskFetcher::new(&fixtures), + cursor, + sink: CaptureSink { + rows: Mutex::new(Vec::new()), + }, + decoder: EmptyDecoder, + venue_registry: VenueRegistry::new(), + phoenix_registry: PhoenixPoolRegistry::default(), + }; + + let stats = reconciler.run(8).await.unwrap(); + assert_eq!(stats.start_cursor, 99); + assert_eq!(stats.end_cursor, 102); + assert_eq!(stats.ledgers_persisted, 3); + assert_eq!(stats.rows_emitted, 0); + + // Cursor file ends up at 102 — next invocation resumes here. + let cursor = StubFileCursor::new(&cursor_path); + assert_eq!(cursor.read().await.unwrap(), 102); +} + +#[tokio::test] +async fn unknown_contract_dispatch_does_not_fail() { + let dir = tempdir().unwrap(); + let fixtures = dir.path().join("ledgers"); + let key = ledger_s3_key(200); + let path = fixtures.join(&key); + tokio::fs::create_dir_all(path.parent().unwrap()) + .await + .unwrap(); + tokio::fs::write(&path, "200").await.unwrap(); + + let cursor_path = dir.path().join("cursor.txt"); + let cursor = StubFileCursor::new(&cursor_path); + cursor.write(199).await.unwrap(); + + let reconciler = Reconciler { + fetcher: LocalDiskFetcher::new(&fixtures), + cursor, + sink: CaptureSink { + rows: Mutex::new(Vec::new()), + }, + decoder: SingleEmptyGroupDecoder, + venue_registry: VenueRegistry::new(), + phoenix_registry: PhoenixPoolRegistry::default(), + }; + + let stats = reconciler.run(2).await.unwrap(); + assert_eq!(stats.ledgers_persisted, 1); + assert_eq!(stats.end_cursor, 200); +} + +#[tokio::test] +async fn idempotent_on_re_run_from_same_cursor() { + let dir = tempdir().unwrap(); + let fixtures = dir.path().join("ledgers"); + let key = ledger_s3_key(50); + let path = fixtures.join(&key); + tokio::fs::create_dir_all(path.parent().unwrap()) + .await + .unwrap(); + tokio::fs::write(&path, "50").await.unwrap(); + + let cursor_path = dir.path().join("cursor.txt"); + + let run = || async { + let cursor = StubFileCursor::new(&cursor_path); + cursor.write(49).await.unwrap(); + let sink = CaptureSink { + rows: Mutex::new(Vec::new()), + }; + let reconciler = Reconciler { + fetcher: LocalDiskFetcher::new(&fixtures), + cursor, + sink, + decoder: EmptyDecoder, + venue_registry: VenueRegistry::new(), + phoenix_registry: PhoenixPoolRegistry::default(), + }; + reconciler.run(8).await.unwrap() + }; + + let first = run().await; + let second = run().await; + assert_eq!(first.start_cursor, second.start_cursor); + assert_eq!(first.end_cursor, second.end_cursor); + assert_eq!(first.ledgers_persisted, second.ledgers_persisted); + assert_eq!(first.rows_emitted, second.rows_emitted); +} From fb57196cb5591eb382a44905f6781752c8ec14e0 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Mon, 8 Jun 2026 16:22:52 +0200 Subject: [PATCH 05/17] =?UTF-8?q?feat(lore-0038):=20Phase=202=20=E2=80=94?= =?UTF-8?q?=20real=20XDR=20decode=20+=20Lambda=20entrypoint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the Phase 1 scaffolding to BE's actual xdr-parser crate (workspace dep already pointing at soroban-block-explorer's develop branch). The reconcile loop now decompresses .xdr.zst payloads, deserializes LedgerCloseMetaBatch via stellar-xdr, walks V0/V1/V2 tx_processing, calls xdr_parser::extract_events per tx, adapts the returned tagged JSON to extractors-core::TaggedValue, and groups by (tx_id, contract_id) for dispatch. The event filter keeps TxLevel + PerOp (Protocol 23+) sources and drops Diagnostic — diagnostic events can include byte-identical Contract-typed mirrors of consensus events (BE task 0182), which would double-count if ingested. Binary topology now matches the spec's two modes: - src/main.rs is the Lambda entrypoint named `prices-ledger-processor` — wires an SqsEvent handler that ignores the doorbell body, runs reconcile, and returns SqsBatchResponse with per-message batch-item-failures on hard errors (mirrors BE's pattern). - src/bin/cli.rs is the CLI binary named `prices-cli` for local iteration via `cargo run --bin prices-cli`. JSON→TaggedValue adapter covers the AMM-relevant ScVal shapes (sym, string, address, i128, u128, vec, map); unsupported types collapse to Null. 7 new unit tests verify the adapter on direct and nested shapes. Manual smoke: copied 3 contiguous BE ledgers (62460540-62460542) into the gitignored fixtures dir and ran: prices-cli --cursor 62460539 --max-iterations 5 \ --fixtures-dir packages/prices-ledger-processor/fixtures/ledgers \ --cursor-file /tmp/prices-smoke-cursor.txt Result: 3 ledgers processed end-to-end, cursor advanced 62460539 → 62460542, "reached gap on S3" at 62460543 as expected. Zero trades emitted because VenueRegistry is empty (no Phoenix pools registered) — kernel correctness, not decode behaviour; populating the registry is configuration work tracked separately. Workspace builds clean, clippy clean on this crate, 34 tests pass (23 Phase 1 unit + 7 new decode + 4 e2e). --- Cargo.lock | 361 +++++++++++++++++- packages/prices-ledger-processor/Cargo.toml | 11 + .../prices-ledger-processor/src/bin/cli.rs | 101 +++++ .../prices-ledger-processor/src/decode.rs | 293 ++++++++++++++ packages/prices-ledger-processor/src/lib.rs | 1 + packages/prices-ledger-processor/src/main.rs | 180 ++++----- 6 files changed, 856 insertions(+), 91 deletions(-) create mode 100644 packages/prices-ledger-processor/src/bin/cli.rs create mode 100644 packages/prices-ledger-processor/src/decode.rs diff --git a/Cargo.lock b/Cargo.lock index 350aaf8..5c5692a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,6 +94,28 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -106,6 +128,18 @@ version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" +[[package]] +name = "aws_lambda_events" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "144ec7565561115498a288850cc6a42b279e09b6c4b88f623eecb9c8ca96c08c" +dependencies = [ + "base64", + "serde", + "serde_json", + "serde_with", +] + [[package]] name = "base64" version = "0.22.1" @@ -163,6 +197,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "bs58" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4" +dependencies = [ + "tinyvec", +] + [[package]] name = "bstr" version = "1.12.1" @@ -387,12 +430,56 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.117", +] + [[package]] name = "data-encoding" version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", + "serde_core", +] + [[package]] name = "digest" version = "0.10.7" @@ -425,6 +512,12 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + [[package]] name = "equivalent" version = "1.0.2" @@ -668,6 +761,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-serde" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f056c8559e3757392c8d091e796416e4649d8e49e88b8d76df6c002f05027fd" +dependencies = [ + "http", + "serde", +] + [[package]] name = "httparse" version = "1.10.1" @@ -820,6 +923,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -841,6 +950,17 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + [[package]] name = "indexmap" version = "2.14.0" @@ -849,6 +969,8 @@ checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", "hashbrown 0.17.1", + "serde", + "serde_core", ] [[package]] @@ -885,6 +1007,55 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lambda_runtime" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed49669d6430292aead991e19bf13153135a884f916e68f32997c951af637ebe" +dependencies = [ + "async-stream", + "base64", + "bytes", + "futures", + "http", + "http-body", + "http-body-util", + "http-serde", + "hyper", + "hyper-util", + "lambda_runtime_api_client", + "pin-project", + "serde", + "serde_json", + "serde_path_to_error", + "tokio", + "tokio-stream", + "tower", + "tower-layer", + "tracing", +] + +[[package]] +name = "lambda_runtime_api_client" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c90a10f094475a34a04da2be11686c4dcfe214d93413162db9ffdff3d3af293a" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "tokio", + "tower", + "tower-service", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -974,6 +1145,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "num-conv" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441" + [[package]] name = "num-traits" version = "0.2.19" @@ -1032,6 +1209,26 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "pin-project" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2466b2336ed02bcdca6b294417127b90ec92038d1d5c4fbeac971a922e0e0924" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c96395f0a926bc13b1c17622aaddda1ecb55d49c8f1bf9777e4d877800a43f8b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -1053,6 +1250,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1066,17 +1269,22 @@ dependencies = [ name = "prices-ledger-processor" version = "0.1.0" dependencies = [ + "aws_lambda_events", "clap", "extractors-core", + "hex", + "lambda_runtime", "ledger-processor", "phoenix-extractor", "serde", "serde_json", + "stellar-xdr", "tempfile", "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", + "xdr-parser", ] [[package]] @@ -1201,6 +1409,26 @@ dependencies = [ "bitflags", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "regex-automata" version = "0.4.14" @@ -1298,6 +1526,30 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "schemars" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -1391,6 +1643,49 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_with" +version = "3.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a5c54c7310e7b8b9577c286d7e399ddd876c3e12b3ed917a8aabc4b96e9e8c" +dependencies = [ + "base64", + "bs58", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.14.0", + "schemars 0.9.0", + "schemars 1.2.1", + "serde_core", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d57bc0c8b9a17920c178daa6bb924850d54a9c97ab45194bb8c17ad66bb660" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "sha2" version = "0.10.9" @@ -1606,6 +1901,37 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.8.3" @@ -1659,6 +1985,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml_datetime" version = "1.1.1+spec-1.1.0" @@ -1674,7 +2011,7 @@ version = "0.25.11+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" dependencies = [ - "indexmap", + "indexmap 2.14.0", "toml_datetime", "toml_parser", "winnow", @@ -1689,6 +2026,27 @@ dependencies = [ "winnow", ] +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + [[package]] name = "tower-service" version = "0.3.3" @@ -1701,6 +2059,7 @@ version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", diff --git a/packages/prices-ledger-processor/Cargo.toml b/packages/prices-ledger-processor/Cargo.toml index f9b4b29..512dc6d 100644 --- a/packages/prices-ledger-processor/Cargo.toml +++ b/packages/prices-ledger-processor/Cargo.toml @@ -12,11 +12,18 @@ path = "src/lib.rs" name = "prices-ledger-processor" path = "src/main.rs" +[[bin]] +name = "prices-cli" +path = "src/bin/cli.rs" + [dependencies] extractors-core = { path = "../extractors-core" } ledger-processor = { path = "../ledger-processor" } phoenix-extractor = { path = "../phoenix-extractor" } +stellar-xdr = { workspace = true } +xdr-parser = { workspace = true } + clap = { workspace = true } serde = { workspace = true } serde_json = "1" @@ -25,5 +32,9 @@ tokio = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } +lambda_runtime = "0.13" +aws_lambda_events = { version = "0.16", default-features = false, features = ["sqs"] } +hex = "0.4" + [dev-dependencies] tempfile = "3" diff --git a/packages/prices-ledger-processor/src/bin/cli.rs b/packages/prices-ledger-processor/src/bin/cli.rs new file mode 100644 index 0000000..4dd2ece --- /dev/null +++ b/packages/prices-ledger-processor/src/bin/cli.rs @@ -0,0 +1,101 @@ +use std::path::PathBuf; + +use clap::{Parser, ValueEnum}; +use extractors_core::VenueRegistry; +use phoenix_extractor::PhoenixPoolRegistry; +use prices_ledger_processor::{ + cursor::{Cursor, StubFileCursor}, + decode::XdrLedgerDecoder, + object_fetcher::LocalDiskFetcher, + reconcile::Reconciler, + sink::{SqlFileSink, StdoutJsonSink}, +}; +use tracing::info; + +#[derive(Parser, Debug)] +#[command( + name = "prices-cli", + about = "Local CLI driver for the Prices Ledger Processor prototype (task 0038)" +)] +struct Args { + /// Initial cursor value (ledger sequence the run starts AFTER). + /// Always overwrites the cursor file before the run. + #[arg(long)] + cursor: u64, + + /// Maximum reconcile iterations per invocation. + #[arg(long, default_value_t = 16)] + max_iterations: usize, + + /// Sink selection. + #[arg(long, value_enum, default_value_t = SinkKind::Stdout)] + sink: SinkKind, + + /// Local fixture root — keys derived by `ledger_s3_key` are joined onto this. + #[arg(long, default_value = "fixtures/ledgers")] + fixtures_dir: PathBuf, + + /// Where the cursor file lives. + #[arg(long, default_value = "out/cursor.txt")] + cursor_file: PathBuf, + + /// Where SQL-file sink output lands. + #[arg(long, default_value = "out")] + out_dir: PathBuf, +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum SinkKind { + Stdout, + SqlFile, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .json() + .init(); + + let args = Args::parse(); + + let cursor = StubFileCursor::new(&args.cursor_file); + cursor.write(args.cursor).await?; + + let fetcher = LocalDiskFetcher::new(&args.fixtures_dir); + + let stats = match args.sink { + SinkKind::Stdout => { + let reconciler = Reconciler { + fetcher, + cursor, + sink: StdoutJsonSink, + decoder: XdrLedgerDecoder, + venue_registry: VenueRegistry::new(), + phoenix_registry: PhoenixPoolRegistry::default(), + }; + reconciler.run(args.max_iterations).await? + } + SinkKind::SqlFile => { + let reconciler = Reconciler { + fetcher, + cursor, + sink: SqlFileSink::new(&args.out_dir), + decoder: XdrLedgerDecoder, + venue_registry: VenueRegistry::new(), + phoenix_registry: PhoenixPoolRegistry::default(), + }; + reconciler.run(args.max_iterations).await? + } + }; + + info!( + start = stats.start_cursor, + end = stats.end_cursor, + persisted = stats.ledgers_persisted, + rows = stats.rows_emitted, + "reconcile complete" + ); + + Ok(()) +} diff --git a/packages/prices-ledger-processor/src/decode.rs b/packages/prices-ledger-processor/src/decode.rs new file mode 100644 index 0000000..b703342 --- /dev/null +++ b/packages/prices-ledger-processor/src/decode.rs @@ -0,0 +1,293 @@ +//! XDR ledger decode + Soroban event walk. +//! +//! Wraps BE's `xdr_parser` crate: `decompress_zstd` → deserialize +//! `LedgerCloseMetaBatch` → per-ledger `extract_ledger` + per-tx +//! `extract_events`. The adapter converts xdr-parser's tagged JSON +//! representation of ScVal into the kernel's `TaggedValue` enum. +//! +//! Filter policy: only `EventSource::TxLevel` and `EventSource::PerOp` +//! (Protocol 23+) events are kept. Diagnostic events are dropped — +//! they can include byte-identical Contract-typed mirrors of consensus +//! events (BE task 0182), which would double-count. + +use std::collections::HashMap; + +use extractors_core::{SorobanEventRow, TaggedValue}; +use stellar_xdr::curr::{LedgerCloseMeta, LedgerCloseMetaBatch, Limits, ReadXdr, TransactionMeta}; +use xdr_parser::{decompress_zstd, extract_events, extract_ledger, types::EventSource}; + +use crate::reconcile::{DecodedLedger, LedgerDecoder}; + +pub struct XdrLedgerDecoder; + +impl LedgerDecoder for XdrLedgerDecoder { + async fn decode(&self, bytes: &[u8]) -> Result, String> { + let decompressed = decompress_zstd(bytes).map_err(|e| format!("decompress: {e:?}"))?; + + let batch = LedgerCloseMetaBatch::from_xdr(decompressed.as_slice(), Limits::none()) + .map_err(|e| format!("deserialize batch: {e}"))?; + + let mut out = Vec::with_capacity(batch.ledger_close_metas.len()); + for meta in batch.ledger_close_metas.iter() { + let header = extract_ledger(meta); + let event_groups = walk_ledger_events(meta, header.sequence, header.closed_at); + out.push(DecodedLedger { + ledger_sequence: header.sequence as u64, + closed_at_unix_seconds: header.closed_at, + event_groups, + }); + } + Ok(out) + } +} + +fn walk_ledger_events( + meta: &LedgerCloseMeta, + ledger_seq: u32, + closed_at: i64, +) -> Vec> { + let mut by_group: HashMap<(String, String), Vec> = HashMap::new(); + + for (tx_hash, tx_meta) in iter_tx_metas(meta) { + for evt in extract_events(tx_meta, &tx_hash, ledger_seq, closed_at) { + if !matches!(evt.source, EventSource::TxLevel | EventSource::PerOp) { + continue; + } + let Some(contract_id) = evt.contract_id.clone() else { + continue; + }; + let topics = match evt.topics { + serde_json::Value::Array(arr) => arr.iter().map(json_to_tagged).collect(), + _ => Vec::new(), + }; + let row = SorobanEventRow { + contract_id: contract_id.clone(), + transaction_id: evt.transaction_hash.clone(), + ledger_sequence: ledger_seq as u64, + event_index: evt.event_index, + topics, + data: json_to_tagged(&evt.data), + }; + by_group + .entry((evt.transaction_hash, contract_id)) + .or_default() + .push(row); + } + } + + // Stable order within each group: by event_index. Order across + // groups is HashMap-iteration order, which is acceptable because + // dispatch is per-group and the bucketer is commutative for + // distinct (timestamp, asset_id, source) keys. + let mut groups: Vec> = by_group.into_values().collect(); + for g in groups.iter_mut() { + g.sort_by_key(|r| r.event_index); + } + groups +} + +fn iter_tx_metas(meta: &LedgerCloseMeta) -> Vec<(String, &TransactionMeta)> { + match meta { + LedgerCloseMeta::V0(v) => v + .tx_processing + .iter() + .map(|p| { + ( + hex::encode(p.result.transaction_hash.0), + &p.tx_apply_processing, + ) + }) + .collect(), + LedgerCloseMeta::V1(v) => v + .tx_processing + .iter() + .map(|p| { + ( + hex::encode(p.result.transaction_hash.0), + &p.tx_apply_processing, + ) + }) + .collect(), + LedgerCloseMeta::V2(v) => v + .tx_processing + .iter() + .map(|p| { + ( + hex::encode(p.result.transaction_hash.0), + &p.tx_apply_processing, + ) + }) + .collect(), + } +} + +/// Convert one `{"type": "...", "value": ...}` tagged JSON node into a +/// `TaggedValue`. Types we don't yet handle (bool, u32, bytes, error, …) +/// collapse to `Null` — the kernel's Phoenix XYK extractor only inspects +/// sym/address/i128/vec/map shapes, so this is sufficient for the +/// extractors wired in by task 0037. Unsupported types become visible +/// to future extractors as `Null` and will need adapter extensions. +pub(crate) fn json_to_tagged(v: &serde_json::Value) -> TaggedValue { + let Some(obj) = v.as_object() else { + return TaggedValue::Null; + }; + let type_name = obj.get("type").and_then(|t| t.as_str()).unwrap_or(""); + let value = obj.get("value").unwrap_or(&serde_json::Value::Null); + match type_name { + "sym" => value + .as_str() + .map(|s| TaggedValue::Symbol(s.to_string())) + .unwrap_or(TaggedValue::Null), + "string" => value + .as_str() + .map(|s| TaggedValue::String(s.to_string())) + .unwrap_or(TaggedValue::Null), + "address" => value + .as_str() + .map(|s| TaggedValue::Address(s.to_string())) + .unwrap_or(TaggedValue::Null), + "i128" => value + .as_str() + .and_then(|s| s.parse::().ok()) + .map(TaggedValue::I128) + .unwrap_or(TaggedValue::Null), + "u128" => value + .as_str() + .and_then(|s| s.parse::().ok()) + .and_then(|u| i128::try_from(u).ok()) + .map(TaggedValue::I128) + .unwrap_or(TaggedValue::Null), + "vec" => match value.as_array() { + Some(arr) => TaggedValue::Vec(arr.iter().map(json_to_tagged).collect()), + None => TaggedValue::Null, + }, + "map" => match value.as_array() { + Some(arr) => TaggedValue::Map( + arr.iter() + .filter_map(|e| { + let k = e.get("key")?; + let v = e.get("value")?; + Some((json_to_tagged(k), json_to_tagged(v))) + }) + .collect(), + ), + None => TaggedValue::Null, + }, + _ => TaggedValue::Null, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn sym_address_i128_roundtrip() { + assert_eq!( + json_to_tagged(&json!({"type": "sym", "value": "swap"})), + TaggedValue::Symbol("swap".into()) + ); + assert_eq!( + json_to_tagged(&json!({"type": "address", "value": "CABCDEF"})), + TaggedValue::Address("CABCDEF".into()) + ); + assert_eq!( + json_to_tagged(&json!({"type": "i128", "value": "-12345"})), + TaggedValue::I128(-12_345) + ); + assert_eq!( + json_to_tagged(&json!({"type": "u128", "value": "12345"})), + TaggedValue::I128(12_345) + ); + } + + #[test] + fn vec_recursively_adapts() { + let v = json!({ + "type": "vec", + "value": [ + {"type": "sym", "value": "swap"}, + {"type": "address", "value": "CPOOL"}, + {"type": "address", "value": "CTRADER"}, + ], + }); + match json_to_tagged(&v) { + TaggedValue::Vec(items) => { + assert_eq!(items.len(), 3); + assert_eq!(items[0], TaggedValue::Symbol("swap".into())); + assert_eq!(items[1], TaggedValue::Address("CPOOL".into())); + } + other => panic!("expected Vec, got {other:?}"), + } + } + + #[test] + fn map_adapts_key_value_pairs() { + let v = json!({ + "type": "map", + "value": [ + {"key": {"type": "sym", "value": "token_in"}, + "value": {"type": "address", "value": "CXLM"}}, + {"key": {"type": "sym", "value": "amount_in"}, + "value": {"type": "i128", "value": "1000"}}, + ], + }); + match json_to_tagged(&v) { + TaggedValue::Map(pairs) => { + assert_eq!(pairs.len(), 2); + assert_eq!(pairs[0].0, TaggedValue::Symbol("token_in".into())); + assert_eq!(pairs[0].1, TaggedValue::Address("CXLM".into())); + assert_eq!(pairs[1].1, TaggedValue::I128(1000)); + } + other => panic!("expected Map, got {other:?}"), + } + } + + #[test] + fn nested_map_in_vec() { + let v = json!({ + "type": "vec", + "value": [{ + "type": "map", + "value": [{ + "key": {"type": "sym", "value": "k"}, + "value": {"type": "i128", "value": "1"}, + }], + }], + }); + let out = json_to_tagged(&v); + if let TaggedValue::Vec(items) = out { + assert!(matches!(items[0], TaggedValue::Map(_))); + } else { + panic!("expected outer Vec"); + } + } + + #[test] + fn unsupported_type_falls_back_to_null() { + assert_eq!( + json_to_tagged(&json!({"type": "bool", "value": true})), + TaggedValue::Null + ); + assert_eq!( + json_to_tagged(&json!({"type": "bytes", "value": "deadbeef"})), + TaggedValue::Null + ); + } + + #[test] + fn malformed_i128_falls_back_to_null() { + assert_eq!( + json_to_tagged(&json!({"type": "i128", "value": "not a number"})), + TaggedValue::Null + ); + } + + #[test] + fn missing_type_field_is_null() { + assert_eq!(json_to_tagged(&json!({"value": "x"})), TaggedValue::Null); + assert_eq!(json_to_tagged(&json!(null)), TaggedValue::Null); + assert_eq!(json_to_tagged(&json!("bare-string")), TaggedValue::Null); + } +} diff --git a/packages/prices-ledger-processor/src/lib.rs b/packages/prices-ledger-processor/src/lib.rs index fbd8a19..e459ca3 100644 --- a/packages/prices-ledger-processor/src/lib.rs +++ b/packages/prices-ledger-processor/src/lib.rs @@ -1,5 +1,6 @@ pub mod bucket; pub mod cursor; +pub mod decode; pub mod galexie_key; pub mod object_fetcher; pub mod reconcile; diff --git a/packages/prices-ledger-processor/src/main.rs b/packages/prices-ledger-processor/src/main.rs index 70289de..04a19ad 100644 --- a/packages/prices-ledger-processor/src/main.rs +++ b/packages/prices-ledger-processor/src/main.rs @@ -1,111 +1,111 @@ +//! Lambda entrypoint — SQS doorbell handler. +//! +//! Mirrors BE's indexer cold-start shape (eager config validation, +//! structured JSON tracing, single shared state passed by reference to +//! every invocation). The SQS message body is **ignored**; each +//! invocation just runs the reconcile loop. +//! +//! Phase 2 prototype: the fetcher / cursor / sink are still the +//! local-disk stubs. The Lambda mode exists to prove the +//! `lambda_runtime` event-loop wires up cleanly — a `cargo lambda +//! invoke` against a stub doorbell event runs end-to-end. + use std::path::PathBuf; +use std::sync::Arc; -use clap::{Parser, ValueEnum}; +use aws_lambda_events::sqs::{SqsBatchResponse, SqsEvent}; use extractors_core::VenueRegistry; +use lambda_runtime::{Error, LambdaEvent, service_fn}; use phoenix_extractor::PhoenixPoolRegistry; use prices_ledger_processor::{ - cursor::{Cursor, StubFileCursor}, - object_fetcher::LocalDiskFetcher, - reconcile::{DecodedLedger, LedgerDecoder, Reconciler}, - sink::{SqlFileSink, StdoutJsonSink}, + cursor::StubFileCursor, decode::XdrLedgerDecoder, object_fetcher::LocalDiskFetcher, + reconcile::Reconciler, sink::StdoutJsonSink, }; -use tracing::info; - -#[derive(Parser, Debug)] -#[command( - name = "prices-ledger-processor", - about = "Local-only prototype of the Prices Ledger Processor Lambda (task 0038)" -)] -struct Args { - /// Initial cursor value (ledger sequence the run starts AFTER). - /// Always overwrites the cursor file before the run. - #[arg(long)] - cursor: u64, - - /// Maximum reconcile iterations per invocation. - #[arg(long, default_value_t = 16)] - max_iterations: usize, - - /// Sink selection. - #[arg(long, value_enum, default_value_t = SinkKind::Stdout)] - sink: SinkKind, - - /// Local fixture root — keys derived by `ledger_s3_key` are joined onto this. - #[arg(long, default_value = "fixtures/ledgers")] - fixtures_dir: PathBuf, - - /// Where the cursor file lives. - #[arg(long, default_value = "out/cursor.txt")] - cursor_file: PathBuf, - - /// Where SQL-file sink output lands. - #[arg(long, default_value = "out")] - out_dir: PathBuf, -} - -#[derive(Copy, Clone, Debug, ValueEnum)] -enum SinkKind { - Stdout, - SqlFile, -} +use tracing::{error, info}; -/// Phase-1 no-op decoder. Returns an empty ledger list regardless of input, -/// so the loop exercises cursor / fetcher / sink wiring without a real -/// xdr-parser integration. Phase 2 replaces this with the real walk. -struct NoopDecoder; +const ENV_FIXTURES_DIR: &str = "FIXTURES_DIR"; +const ENV_CURSOR_FILE: &str = "CURSOR_FILE"; +const ENV_MAX_ITERATIONS: &str = "MAX_ITERATIONS"; +const DEFAULT_FIXTURES_DIR: &str = "fixtures/ledgers"; +const DEFAULT_CURSOR_FILE: &str = "out/cursor.txt"; +const DEFAULT_MAX_ITERATIONS: usize = 16; -impl LedgerDecoder for NoopDecoder { - async fn decode(&self, _bytes: &[u8]) -> Result, String> { - Ok(Vec::new()) - } -} +type R = Reconciler; #[tokio::main] -async fn main() -> Result<(), Box> { +async fn main() -> Result<(), Error> { tracing_subscriber::fmt() .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) .json() .init(); - let args = Args::parse(); + let fixtures_dir = std::env::var(ENV_FIXTURES_DIR) + .map(PathBuf::from) + .unwrap_or_else(|_| PathBuf::from(DEFAULT_FIXTURES_DIR)); + let cursor_file = std::env::var(ENV_CURSOR_FILE) + .map(PathBuf::from) + .unwrap_or_else(|_| PathBuf::from(DEFAULT_CURSOR_FILE)); + let max_iterations: usize = std::env::var(ENV_MAX_ITERATIONS) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(DEFAULT_MAX_ITERATIONS); - let cursor = StubFileCursor::new(&args.cursor_file); - cursor.write(args.cursor).await?; + info!( + fixtures_dir = %fixtures_dir.display(), + cursor_file = %cursor_file.display(), + max_iterations, + "prices-ledger-processor cold start" + ); - let fetcher = LocalDiskFetcher::new(&args.fixtures_dir); + let reconciler: Arc = Arc::new(Reconciler { + fetcher: LocalDiskFetcher::new(&fixtures_dir), + cursor: StubFileCursor::new(&cursor_file), + sink: StdoutJsonSink, + decoder: XdrLedgerDecoder, + venue_registry: VenueRegistry::new(), + phoenix_registry: PhoenixPoolRegistry::default(), + }); + + lambda_runtime::run(service_fn(move |event: LambdaEvent| { + let r = reconciler.clone(); + async move { handler(event, r, max_iterations).await } + })) + .await +} - let stats = match args.sink { - SinkKind::Stdout => { - let reconciler = Reconciler { - fetcher, - cursor, - sink: StdoutJsonSink, - decoder: NoopDecoder, - venue_registry: VenueRegistry::new(), - phoenix_registry: PhoenixPoolRegistry::default(), - }; - reconciler.run(args.max_iterations).await? - } - SinkKind::SqlFile => { - let reconciler = Reconciler { - fetcher, - cursor, - sink: SqlFileSink::new(&args.out_dir), - decoder: NoopDecoder, - venue_registry: VenueRegistry::new(), - phoenix_registry: PhoenixPoolRegistry::default(), - }; - reconciler.run(args.max_iterations).await? +async fn handler( + event: LambdaEvent, + reconciler: Arc, + max_iterations: usize, +) -> Result { + let (payload, _ctx) = event.into_parts(); + let mut batch_item_failures = Vec::new(); + + for msg in &payload.records { + let message_id = msg.message_id.clone().unwrap_or_default(); + match reconciler.run(max_iterations).await { + Ok(stats) => info!( + message_id = %message_id, + start = stats.start_cursor, + end = stats.end_cursor, + persisted = stats.ledgers_persisted, + rows = stats.rows_emitted, + "doorbell processed" + ), + Err(e) => { + error!( + message_id = %message_id, + error = %e, + "reconcile failed — will redeliver doorbell" + ); + batch_item_failures.push(aws_lambda_events::sqs::BatchItemFailure { + item_identifier: message_id, + }); + } } - }; - - info!( - start = stats.start_cursor, - end = stats.end_cursor, - persisted = stats.ledgers_persisted, - rows = stats.rows_emitted, - "reconcile complete" - ); + } - Ok(()) + Ok(SqsBatchResponse { + batch_item_failures, + }) } From b61d6ff21cb3213b24bbdb68d61b2d7be4069d21 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Mon, 8 Jun 2026 18:39:53 +0200 Subject: [PATCH 06/17] chore(lore-0038): block task pending BE cross-team meeting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local-prototype deliverables shipped on branch feat/0038_prices-ledger-processor-lambda (PR #34): the design spec, Phase 1 scaffolding, and Phase 2 real XDR decode. Task now blocks on the cross-team conversation that resolves Part C of notes/G-local-prototype-spec.md — SQS notification ownership, env-var injection contract, xdr-parser pinning + semver, db-clickhouse::mtls reuse, Caddyfile CN_USER_MAP, and cert issuance. Original engineering gates (BE 0227, task 0047) also remain open. Unblocks when the meeting answers Part C and either gating event clears. --- .../README.md | 20 ++++++++++++++++++- .../notes/G-local-prototype-spec.md | 0 2 files changed, 19 insertions(+), 1 deletion(-) rename lore/1-tasks/{active => blocked}/0038_FEATURE_prices-ledger-processor-lambda/README.md (93%) rename lore/1-tasks/{active => blocked}/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md (100%) diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md b/lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/README.md similarity index 93% rename from lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md rename to lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/README.md index 9b711e9..72d33ee 100644 --- a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md +++ b/lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/README.md @@ -2,7 +2,7 @@ id: "0038" title: "Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv" type: FEATURE -status: active +status: blocked related_adr: ["0001", "0003", "0004", "0005", "0006", "0007"] related_tasks: ["0011", "0037", "0045", "0047", "0048"] tags: [layer-indexing, priority-high, effort-large, milestone-M1, stream-1, lambda, ingestion, rust, aws, clickhouse, hetzner] @@ -94,6 +94,24 @@ history: react to before the gating events clear — see the forthcoming G-note on local-prototype scope under `notes/G-local-prototype-spec.md`. + - date: 2026-06-08 + status: blocked + who: oski + note: > + Local-prototype scope shipped: spec G-note + runnable Phase 1 + scaffolding + Phase 2 real XDR decode against + BE-sourced fixtures (commits f17353f, 1137464, bd2ea9d, + 10b60a3, fb57196 on branch feat/0038_prices-ledger-processor-lambda; + PR #34). Task moves back to blocked pending the cross-team + meeting with the BE team — the Part C asks in + `notes/G-local-prototype-spec.md` are the agenda + (SQS notification ownership, env-var injection vs SSM-at-runtime, + xdr-parser tag-pinning + semver, `db-clickhouse::mtls` reuse, + Caddyfile `CLICKHOUSE_CN_USER_MAP` for `prices-api-{env}`, + mTLS cert issuance). Original engineering gates + (BE 0227 + task 0047) also remain open. Unblocks: after the + meeting answers Part C, and either gating engineering event + clears. --- # Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md b/lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md similarity index 100% rename from lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md rename to lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md From 2fe8208820c9a498cf1568ae34de9f1b38a329a7 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Wed, 10 Jun 2026 17:02:30 +0200 Subject: [PATCH 07/17] docs(lore-0038): record SNS fan-out decision, unblock task The 2026-06-10 cross-team meeting resolved Part C.1 of the local-prototype spec (SQS notification ownership) in favour of SNS fan-out: BE repoints its bucket notification from S3->SQS to S3->SNS->SQS, and prices-api owns its own prices-ingest queue + DLQ subscribing to BE's topic. The doorbell-cursor reconcile loop is unaffected (the Lambda ignores the message body), so this is a documentation + narrative change only. Move task 0038 blocked -> active, add the decision history entry, resolve spec C.1 inline with the final topology, and add the new /platform/{env}/ledger-events-topic-arn handshake key to C.2. --- .../README.md | 96 ++++++++++++++++++- .../notes/G-local-prototype-spec.md | 50 ++++++++++ packages/prices-ledger-processor/src/main.rs | 8 ++ 3 files changed, 149 insertions(+), 5 deletions(-) rename lore/1-tasks/{blocked => active}/0038_FEATURE_prices-ledger-processor-lambda/README.md (73%) rename lore/1-tasks/{blocked => active}/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md (92%) diff --git a/lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/README.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md similarity index 73% rename from lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/README.md rename to lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md index 72d33ee..eec9ecc 100644 --- a/lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/README.md +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md @@ -2,9 +2,9 @@ id: "0038" title: "Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv" type: FEATURE -status: blocked +status: active related_adr: ["0001", "0003", "0004", "0005", "0006", "0007"] -related_tasks: ["0011", "0037", "0045", "0047", "0048"] +related_tasks: ["0011", "0037", "0045", "0047", "0048", "0050"] tags: [layer-indexing, priority-high, effort-large, milestone-M1, stream-1, lambda, ingestion, rust, aws, clickhouse, hetzner] milestone: 1 links: @@ -112,6 +112,27 @@ history: (BE 0227 + task 0047) also remain open. Unblocks: after the meeting answers Part C, and either gating engineering event clears. + - date: 2026-06-10 + status: active + who: oski + note: > + Cross-team meeting held. **Part C.1 RESOLVED: SNS fan-out** + (not a second direct S3→SQS notification). BE will refactor + their bucket-side notification from `S3 → SQS` to + `S3 → SNS → SQS` (`SnsDestination` + `rawMessageDelivery: true` + so their indexer's S3-event parser is unchanged); prices-api + owns its **own** `prices-ingest-{env}` SQS queue + DLQ + subscribing to the BE SNS topic, plus its own Lambda. Failure + isolation preserved (a prices-side backlog/DLQ never pressures + BE's indexer queue). The doorbell-cursor design is unaffected + by the transport choice — the Lambda ignores the message body + regardless of SNS-vs-SQS — so no reconcile-loop change; only + doc/comment narrative and the (gated) CDK wiring change. + Decision recorded inline in `notes/G-local-prototype-spec.md` + §C.1. The SNS-topic ownership + cross-account subscription is + the cross-team artefact tracked by task 0050. Moved back to + active for continued local-scope work; the production AWS + wiring (Part E) stays gated on BE 0227 + task 0047. --- # Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv @@ -133,9 +154,14 @@ general-overview doc; the historical half lives in ADR 0001 Per the general-overview doc §2.1 (Components Hosted by Prices API) and §5.2 (Prices Ledger Processor (Rust)), the live ingestion path -is a Rust Lambda registered as a **second S3 event notification -target** on Block Explorer's existing `stellar-ledger-data/` bucket -(the first target is BE's own Ledger Processor). Per ADR 0001 §4 +is a Rust Lambda driven by a **content-free SQS doorbell**. Per the +2026-06-10 cross-team decision (history below; spec §C.1), BE's +`stellar-ledger-data/` bucket fans out object-created events via +**SNS** (`S3 → SNS → SQS`); prices-api owns its **own** +`prices-ingest-{env}` SQS queue + DLQ subscribed to that topic, so a +prices-side backlog can never pressure BE's indexer queue. (BE's own +queue subscribes to the same topic with `rawMessageDelivery: true`, +leaving their indexer's S3-event parser unchanged.) Per ADR 0001 §4 (Decision point 4 — "Live go-forward Soroban AMM ingestion does NOT depend on CH"), this Lambda is the system of record for live Soroban AMM swaps once Stream 1 has landed its one-shot historical @@ -304,3 +330,63 @@ In `infra/aws-cdk/` (created by 0011): - The 1-min UPSERT contract is shared with both backfill streams; keep the merge SQL in a shared `packages/ohlcv-writer` module (or similar) so live + backfill writers stay in sync. + +## Implementation Notes + +> The `## Implementation Plan` above predates ADR 0007 and still +> describes the retired RDS/sqlx/VPC shape. The authoritative design +> is `notes/G-local-prototype-spec.md` (CH + mTLS + no-VPC + +> SNS-doorbell). What was actually built: + +**Local prototype (Phase 1–2, branch `feat/0038_…`, PR #34).** +`packages/prices-ledger-processor` mirrors BE's indexer structure +with three production swap-seams (`ObjectFetcher`, `Cursor`, +`OhlcvSink`). The doorbell-cursor reconcile loop (`src/reconcile.rs`) +reads the cursor, derives the Galexie S3 key for `cursor+1`, fetches, +decodes, dispatches via the 0037 kernel, buckets to 1-min OHLCV, and +**advances the cursor last** — the ordering barrier. Runs against +local fixtures; `cargo check -p prices-ledger-processor` green. + +**SNS decision + CDK ingest wiring (2026-06-10).** Folded the live +ingest wiring into `infra/src/lib/stacks/compute-stack.ts`: +prices-owned `prices-ingest-{env}` SQS + `prices-ingest-dlq-{env}` +DLQ (`maxReceiveCount=10`), an SNS subscription to BE's imported +`ledger-events` topic (`rawMessageDelivery`), the ledger-processor +`lambda.Function` (ARM64 / `provided.al2023`, `reservedConcurrency=1`, +`batchSize=1`, `timeout+60s` visibility), the event-source-mapping, +and IAM (S3 read on BE's bucket, CloudWatch lag metric, X-Ray). +Env-var contract sourced from `/platform/{env}/*` SSM at deploy +(spec §C.2, incl. the new `ledger-events-topic-arn` key). `nx build` ++ `cdk synth Prices-production-Compute` both pass. **Prepare-only — +no deploy** (gated on BE 0227 + task 0047 + BE publishing the SSM +keys/topic). + +## Design Decisions + +### Emerged + +1. **Ingest wiring lives in `ComputeStack`, not a separate + `IngestStack`.** First drafted as a standalone stack consuming + ComputeStack's `ledgerProcessorRole`; this created a + CloudFormation **dependency cycle** — the event-source-mapping and + the queue/bucket grants mutate the role's policy with the other + stack's ARNs, so Compute↔Ingest depend on each other. Co-locating + role + queue + Function in one stack removes the cycle and matches + BE's single-`compute-stack.ts` shape. (`ingest-stack.ts` moved to + `.trash/`.) +2. **`lambda.Function` + `Code.fromAsset`, not `RustFunction`.** The + prices infra doesn't carry `cargo-lambda-cdk`; rather than add an + uninstalled dependency, the Function consumes the pre-built + `cargo lambda build` bootstrap. Adopting `RustFunction` (synth-time + build, exactly BE's shape) is a follow-up once the dep lands. +3. **`reservedConcurrency` pinned to exactly 1 in `validateConfig`.** + Not a tunable — serial execution is the ordering guarantee, so the + config validator rejects any other value rather than letting a + typo silently break ordering at deploy. + +## Future Work + +- Adopt `cargo-lambda-cdk` `RustFunction` (drop the `fromAsset` seam). +- Production-rewrite punch-list — see spec Part E (gated on BE 0227 + + task 0047): S3-client `ObjectFetcher`, CH-backed cursor (spec D.1), + mTLS CH `OhlcvSink`, CW lag alarm, end-to-end smoke. diff --git a/lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md similarity index 92% rename from lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md rename to lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md index a2c0ba4..2c8420b 100644 --- a/lore/1-tasks/blocked/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md @@ -447,6 +447,55 @@ indexer queue. diagnostic exports), we'd want a prefix filter on our subscription so we don't process them. +> **✅ RESOLVED — 2026-06-10 cross-team meeting → SNS fan-out.** +> +> BE and prices-api agreed to move the bucket-side to **SNS** rather +> than wire a second direct `S3 → SQS` notification. Final shape: +> +> ``` +> ledger PutObject → S3 ObjectCreated +> → SNS topic (BE-owned, on stellar-ledger-data) +> ├─ SQS ledger-ingest-{env} (BE) — rawMessageDelivery=true +> └─ SQS prices-ingest-{env} (prices-api) + its own DLQ +> → prices Lambda (this task) +> ``` +> +> **Ownership split (the user's words):** *"BE will refactor the code +> to use SNS; prices-api does its own SQS with DLQ and Lambda."* +> +> - **BE side:** repoint the existing notification to +> `SnsDestination(topic)` (was `SqsDestination(ingestQueue)`) and +> re-subscribe their own queue to the topic with +> `rawMessageDelivery: true` so the SQS body stays byte-identical +> to today and their indexer's S3-event parser is unchanged. BE +> adds a topic resource policy permitting the prices-api account +> to `sns:Subscribe`. +> - **prices-api side:** own `prices-ingest-{env}` SQS + DLQ +> (`maxReceiveCount = 10`, `visibilityTimeout = lambdaTimeout + 60s`, +> per §C.8), subscribe it to the BE topic (cross-account), and a +> queue policy permitting the topic to deliver. This is the +> prices-side CDK in the Part E punch-list (gated on BE 0227 + +> task 0047). +> +> **Why SNS over a second direct notification:** failure isolation +> *and* extensibility — a third/fourth consumer (asset-discovery, +> analytics) just adds a subscription with no further change to BE's +> bucket. EventBridge was considered (lighter for BE — additive bus +> toggle, their `S3 → SQS` untouched) but SNS was chosen for lowest +> latency and because the cross-team contract is being negotiated +> around a topic; topic ownership + subscription is tracked by +> **task 0050**. +> +> **Impact on this Lambda's code: none to the reconcile loop.** The +> doorbell is content-free — the handler ignores the SQS message body +> whether it arrives raw or SNS-wrapped — so the doorbell-cursor +> mechanism (`src/reconcile.rs`) is unaffected. Only doc/comment +> narrative and the (gated) CDK wiring carry the SNS shape. +> +> Sub-question (2) (prefix filter): deferred — `.xdr.zst` suffix +> remains sufficient; BE has no plans for other object types on the +> bucket. Revisit only if that changes. + ### C.2 — Env-var injection contract (NOT SSM-at-runtime) **Correction to the earlier draft.** I previously proposed @@ -463,6 +512,7 @@ into Lambda env vars** at deploy. We mirror that. | `/platform/{env}/stellar-ledger-data-bucket-arn` | String | (CDK-side, for IAM grant) | | `/platform/{env}/ch-domain` | String | `CH_DOMAIN` (Caddy host) | | `/platform/{env}/stellar-network-passphrase` | String | `STELLAR_NETWORK_PASSPHRASE` (xdr-parser cache init) | +| `/platform/{env}/ledger-events-topic-arn` | String | (CDK-side, SNS topic the prices queue subscribes to — added by the §C.1 SNS decision) | **Why this changes the contract.** No prices-api Lambda runtime reads from SSM. The Lambda only sees env vars. SSM is the diff --git a/packages/prices-ledger-processor/src/main.rs b/packages/prices-ledger-processor/src/main.rs index 04a19ad..5e1a6cd 100644 --- a/packages/prices-ledger-processor/src/main.rs +++ b/packages/prices-ledger-processor/src/main.rs @@ -5,6 +5,14 @@ //! every invocation). The SQS message body is **ignored**; each //! invocation just runs the reconcile loop. //! +//! Doorbell transport (2026-06-10 cross-team decision, spec §C.1): +//! production doorbells reach this Lambda via **SNS fan-out** — +//! `S3 ObjectCreated → SNS (BE-owned) → prices-ingest-{env} SQS + DLQ +//! → this Lambda`. Because the body is ignored, the handler is +//! identical whether the message is raw or SNS-wrapped; the `SqsEvent` +//! envelope is all we deserialise. Failure isolation: the prices queue +//! is prices-owned, so a backlog here never pressures BE's indexer. +//! //! Phase 2 prototype: the fetcher / cursor / sink are still the //! local-disk stubs. The Lambda mode exists to prove the //! `lambda_runtime` event-loop wires up cleanly — a `cargo lambda From 3c1ad158e695a8faa55719d4999dcfaaf1f3a7d9 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Wed, 10 Jun 2026 17:03:48 +0200 Subject: [PATCH 08/17] feat(lore-0038): add prices-owned SQS+DLQ+SNS ingest CDK wiring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the live Prices Ledger Processor Lambda and its doorbell source into ComputeStack, mirroring BE's compute-stack.ts: a prices-owned prices-ingest-{env} SQS queue + DLQ subscribed to BE's ledger-events SNS topic (rawMessageDelivery), the ledger-processor Lambda (ARM64/provided.al2023, reservedConcurrency=1, batchSize=1, visibility=timeout+60s, maxReceiveCount=10), the event-source-mapping, and IAM (S3 read on BE's bucket, CloudWatch lag metric, X-Ray). The env-var contract is sourced from /platform/{env}/* SSM at deploy, including the new ledger-events-topic-arn key from the SNS decision. The wiring lives in ComputeStack rather than a separate stack: the ESM and queue/bucket grants mutate the Lambda role's policy, so a split creates a CloudFormation dependency cycle (BE keeps the same single-stack shape). Uses lambda.Function + Code.fromAsset since the infra does not carry cargo-lambda-cdk; RustFunction is a follow-up. Adds a ledgerProcessor config block to EnvironmentConfig with reservedConcurrency pinned to exactly 1 in validateConfig — serial execution is the ordering guarantee, not a tunable. Prepare-only: deploy is gated on BE 0227 + task 0047 + BE publishing the platform SSM keys/topic. nx build + cdk synth both pass. --- infra/envs/production.json | 7 + infra/src/lib/stacks/compute-stack.ts | 317 ++++++++++++++++++++++++-- infra/src/lib/types.ts | 63 +++++ 3 files changed, 363 insertions(+), 24 deletions(-) diff --git a/infra/envs/production.json b/infra/envs/production.json index ced2b18..c22b0a7 100644 --- a/infra/envs/production.json +++ b/infra/envs/production.json @@ -9,5 +9,12 @@ "oracleWatcher": "rate(5 minutes)", "assetDiscovery": "rate(1 hour)", "cleanup": "cron(0 3 * * ? *)" + }, + "ledgerProcessor": { + "memoryMb": 512, + "timeoutSeconds": 60, + "reservedConcurrency": 1, + "sqsBatchSize": 1, + "maxReceiveCount": 10 } } diff --git a/infra/src/lib/stacks/compute-stack.ts b/infra/src/lib/stacks/compute-stack.ts index 189e888..0e776e3 100644 --- a/infra/src/lib/stacks/compute-stack.ts +++ b/infra/src/lib/stacks/compute-stack.ts @@ -1,7 +1,14 @@ import * as cdk from 'aws-cdk-lib'; -import type * as iam from 'aws-cdk-lib/aws-iam'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import * as lambdaEventSources from 'aws-cdk-lib/aws-lambda-event-sources'; import * as logs from 'aws-cdk-lib/aws-logs'; +import * as s3 from 'aws-cdk-lib/aws-s3'; import type * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; +import * as sns from 'aws-cdk-lib/aws-sns'; +import * as snsSubscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; +import * as sqs from 'aws-cdk-lib/aws-sqs'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; import type { Construct } from 'constructs'; import type { EnvironmentConfig } from '../types.js'; @@ -10,7 +17,53 @@ import { PRICES_LAMBDA_LOG_RETENTION, createPricesLambdaRole, lambdaLogGroupName, + pricesLambdaDefaults, } from '../lambda-baseline.js'; +import { secretsManagerLayerArn } from '../mtls.js'; + +const DLQ_RETENTION_DAYS = 14; + +/** + * Cargo-lambda build output for the `prices-ledger-processor` binary. + * + * BE defines the equivalent Lambda with `cargo-lambda-cdk`'s + * `RustFunction`, which shells out to `cargo lambda build` at synth. + * The prices-api infra does not (yet) carry that dependency, so this + * stack consumes the pre-built `provided.al2023` bootstrap via + * `Code.fromAsset`. Build it first: + * + * cargo lambda build -p prices-ledger-processor --release --arm64 + * + * which writes `target/lambda/prices-ledger-processor/bootstrap`. + * Production follow-up: add `cargo-lambda-cdk` and swap this for + * `RustFunction` so synth builds the binary (mirrors BE exactly). + * Override with `LEDGER_PROCESSOR_ASSET_DIR` if needed. + */ +const LEDGER_PROCESSOR_ASSET_DIR = + process.env['LEDGER_PROCESSOR_ASSET_DIR'] ?? + '../target/lambda/prices-ledger-processor'; + +/** + * SSM keys the BE team publishes under the platform namespace and the + * prices-api CDK reads **at deploy time** (NOT at Lambda runtime — the + * Lambda only ever sees env vars; SSM is the deploy handshake). See + * task 0038 spec §C.2. + * + * `ledgerEventsTopicArn` is the artefact of the 2026-06-10 SNS + * decision (§C.1): BE moves the bucket notification to an SNS topic + * and publishes its ARN here for prices-api to subscribe to. Topic + * ownership + the cross-team handshake is tracked by task 0050. + */ +function platformSsmKeys(envName: string) { + const base = `/platform/${envName}`; + return { + ledgerBucketName: `${base}/stellar-ledger-data-bucket-name`, + ledgerBucketArn: `${base}/stellar-ledger-data-bucket-arn`, + chDomain: `${base}/ch-domain`, + networkPassphrase: `${base}/stellar-network-passphrase`, + ledgerEventsTopicArn: `${base}/ledger-events-topic-arn`, + }; +} export interface ComputeStackProps extends cdk.StackProps { readonly config: EnvironmentConfig; @@ -19,31 +72,62 @@ export interface ComputeStackProps extends cdk.StackProps { } /** - * Compute layer for prices-api: per-Lambda IAM roles + LogGroups - * for the two anchor Lambdas, with no actual Function definitions - * yet. Downstream tasks attach `RustFunction` constructs to these - * roles + log groups: + * Compute layer for prices-api. + * + * Owns the per-Lambda IAM roles + LogGroups for the two anchor + * Lambdas, plus the full wiring of the live **Prices Ledger Processor** + * Lambda (task 0038): its SQS doorbell queue + DLQ, the SNS fan-out + * subscription, the Function itself, and the event-source-mapping. + * + * - `ledgerProcessorRole` / `ledgerProcessorLogGroup` + + * `ledgerProcessorFunction` — task 0038 (live S3-event-driven + * ingest). Role + queue + function are deliberately co-located in + * one stack: the event-source-mapping and the queue/bucket grants + * mutate the role's policy, so splitting them across stacks creates + * a CloudFormation dependency cycle. (BE keeps the same shape in a + * single `compute-stack.ts`.) + * - `apiHandlerRole` / `apiHandlerLogGroup` — consumed by task 0040 + * (axum REST handlers behind API Gateway); no Function yet. * - * - `ledgerProcessorRole` / `ledgerProcessorLogGroup` — consumed by - * task 0038 (live S3-event-driven ingest Lambda). - * - `apiHandlerRole` / `apiHandlerLogGroup` — consumed by task - * 0040 (axum REST handlers behind API Gateway). + * The four periodic-worker roles (task 0039) and the backfill-status + * role (task 0055) are NOT pre-created here — those Lambdas are + * coupled to the EventBridge Scheduler rules (0039) and API Gateway + * routes (0055) defined alongside them; each calls + * `createPricesLambdaRole` from `lib/lambda-baseline.ts`. * - * The four periodic-worker roles (task 0039: price updater, oracle - * watcher, asset discovery, cleanup) and the backfill-status role - * (task 0055) are NOT pre-created here — those Lambdas are - * closely coupled to the EventBridge Scheduler rules (0039) and - * API Gateway routes (0055) defined alongside them. Each of those - * tasks calls `createPricesLambdaRole` from `lib/lambda-baseline.ts` - * to construct a baseline role and then extends it with - * stack-specific permissions. + * Ingest topology (2026-06-10 cross-team decision, spec §C.1 — SNS + * fan-out): * - * No VPC. Per ADR 0007 §3.6, Lambdas reach the Hetzner Caddy - * address over the public internet; gating is mTLS at Caddy. + * ledger PutObject → S3 ObjectCreated (BE's stellar-ledger-data) + * → SNS topic (BE-owned) + * ├─ SQS ledger-ingest-{env} (BE indexer) + * └─ SQS prices-ingest-{env} (this stack) + DLQ + * → ledger-processor Lambda + * + * prices-api owns its **own** queue + DLQ subscribed to BE's topic, so + * a prices-side backlog never pressures BE's indexer queue (failure + * isolation). BE and prices share one AWS account, so the SNS→SQS + * subscription is same-account — no cross-account topic policy is + * required, only the queue resource policy `SqsSubscription` adds. + * + * Mirrors BE's `compute-stack.ts`: `reservedConcurrentExecutions = 1` + * and `batchSize = 1` are load-bearing for ordering (the doorbell- + * cursor reconcile loop races the cursor under concurrency), not perf + * knobs; `maxReceiveCount = 10` absorbs the ESM over-poll/throttle + * churn that concurrency=1 induces; `visibilityTimeout = timeout + 60s` + * so SQS never redelivers a doorbell still being processed. + * + * No VPC (ADR 0007 §3.6); identity to the Hetzner Caddy/CH endpoint is + * mTLS, the bundle sourced via the Parameters and Secrets extension + * layer. Deploy is gated on BE 0227 + task 0047 + BE publishing the + * platform SSM keys; this ingest wiring is authored prepare-only. */ export class ComputeStack extends cdk.Stack { public readonly ledgerProcessorRole: iam.Role; public readonly ledgerProcessorLogGroup: logs.LogGroup; + public readonly ledgerProcessorFunction: lambda.Function; + public readonly ingestQueue: sqs.Queue; + public readonly ingestDlq: sqs.Queue; public readonly apiHandlerRole: iam.Role; public readonly apiHandlerLogGroup: logs.LogGroup; @@ -51,9 +135,15 @@ export class ComputeStack extends cdk.Stack { super(scope, id, props); const { config, mtlsCertSecret, mtlsKeySecret } = props; + const { envName, awsRegion } = config; + const lp = config.ledgerProcessor; const accountId = cdk.Stack.of(this).account; const ctx = { config, accountId, mtlsCertSecret, mtlsKeySecret }; + const keys = platformSsmKeys(envName); + // --------------------------------------------------------------- + // Ledger Processor: baseline role + log group + // --------------------------------------------------------------- this.ledgerProcessorRole = createPricesLambdaRole( this, 'LedgerProcessorRole', @@ -63,30 +153,209 @@ export class ComputeStack extends cdk.Stack { this, 'LedgerProcessorLogGroup', { - logGroupName: lambdaLogGroupName(config.envName, 'ledger-processor'), + logGroupName: lambdaLogGroupName(envName, 'ledger-processor'), retention: PRICES_LAMBDA_LOG_RETENTION, removalPolicy: PRICES_LAMBDA_LOG_REMOVAL_POLICY, }, ); + // --------------------------------------------------------------- + // Deploy-time SSM reads (BE-published platform identifiers). + // valueForStringParameter resolves via a CFN parameter at deploy; + // the Lambda only ever sees the resulting env-var values. + // --------------------------------------------------------------- + const ledgerBucketName = ssm.StringParameter.valueForStringParameter( + this, + keys.ledgerBucketName, + ); + const ledgerBucketArn = ssm.StringParameter.valueForStringParameter( + this, + keys.ledgerBucketArn, + ); + const chDomain = ssm.StringParameter.valueForStringParameter( + this, + keys.chDomain, + ); + const networkPassphrase = ssm.StringParameter.valueForStringParameter( + this, + keys.networkPassphrase, + ); + const ledgerEventsTopicArn = ssm.StringParameter.valueForStringParameter( + this, + keys.ledgerEventsTopicArn, + ); + + // --------------------------------------------------------------- + // SQS DLQ + prices ingest queue (prices-owned doorbell source) + // --------------------------------------------------------------- + this.ingestDlq = new sqs.Queue(this, 'PricesIngestDlq', { + queueName: `prices-ingest-dlq-${envName}`, + retentionPeriod: cdk.Duration.days(DLQ_RETENTION_DAYS), + }); + + this.ingestQueue = new sqs.Queue(this, 'PricesIngestQueue', { + queueName: `prices-ingest-${envName}`, + // MUST be >= the Lambda timeout, else SQS redelivers a doorbell + // the Lambda is still legitimately draining. timeout + 60s margin. + visibilityTimeout: cdk.Duration.seconds(lp.timeoutSeconds + 60), + retentionPeriod: cdk.Duration.days(DLQ_RETENTION_DAYS), + deadLetterQueue: { + queue: this.ingestDlq, + maxReceiveCount: lp.maxReceiveCount, + }, + }); + + // --------------------------------------------------------------- + // SNS fan-out subscription — BE-owned topic → our queue + // --------------------------------------------------------------- + // Import the BE topic by ARN (published under the platform SSM key + // per the SNS decision). Adding the SqsSubscription creates the + // AWS::SNS::Subscription in THIS stack and attaches the queue + // resource policy that lets the topic deliver — prices owns the + // subscription side, BE owns the topic. rawMessageDelivery keeps + // the body the bare S3 event (our Lambda ignores it regardless). + const ledgerEventsTopic = sns.Topic.fromTopicArn( + this, + 'BeLedgerEventsTopic', + ledgerEventsTopicArn, + ); + ledgerEventsTopic.addSubscription( + new snsSubscriptions.SqsSubscription(this.ingestQueue, { + rawMessageDelivery: true, + }), + ); + + // --------------------------------------------------------------- + // Secrets Manager extension layer (mTLS bundle fetch at cold start) + // --------------------------------------------------------------- + const secretsExtensionLayer = lambda.LayerVersion.fromLayerVersionArn( + this, + 'SecretsExtensionLayer', + secretsManagerLayerArn(awsRegion), + ); + + // --------------------------------------------------------------- + // Ledger Processor Lambda + SQS event-source-mapping + // --------------------------------------------------------------- + this.ledgerProcessorFunction = new lambda.Function( + this, + 'LedgerProcessorFunction', + { + ...pricesLambdaDefaults, // ARM64 + PROVIDED_AL2023 (ADR 0006/0007) + functionName: `prices-${envName}-ledger-processor`, + // cargo-lambda emits a single self-contained `bootstrap` binary; + // PROVIDED_AL2023 custom runtimes always use the `bootstrap` + // handler name. + handler: 'bootstrap', + code: lambda.Code.fromAsset(LEDGER_PROCESSOR_ASSET_DIR), + role: this.ledgerProcessorRole, + logGroup: this.ledgerProcessorLogGroup, + memorySize: lp.memoryMb, + timeout: cdk.Duration.seconds(lp.timeoutSeconds), + // Load-bearing: serial execution is the ordering guarantee. The + // reconcile loop reads a cursor and advances it; two concurrent + // invocations would race it. validateConfig pins this to 1. + reservedConcurrentExecutions: lp.reservedConcurrency, + tracing: lambda.Tracing.ACTIVE, + layers: [secretsExtensionLayer], + environment: { + ENV_NAME: envName, + RUST_LOG: 'info', + // Source bucket for ledger XDR objects. The Lambda derives S3 + // keys from ledger numbers (Galexie scheme) and HEAD/GETs this + // bucket; it does NOT parse the SQS doorbell body. + BUCKET_NAME: ledgerBucketName, + // mTLS endpoint (Caddy host on the Hetzner box). + CH_DOMAIN: chDomain, + // Required by xdr-parser's network-id cache (SAC derivation). + STELLAR_NETWORK_PASSPHRASE: networkPassphrase, + // prices-api uses two separate mTLS secrets (cert + key) per + // ADR 0007 §3.5; names match SecretsStack. Task 0052's + // clickhouse client crate reads these via the extension. + MTLS_CERT_SECRET_NAME: `prices/${envName}/clickhouse-mtls-cert`, + MTLS_KEY_SECRET_NAME: `prices/${envName}/clickhouse-mtls-key`, + // In-memory caching in the secrets extension — repeat reads in + // one execution environment hit RAM, not Secrets Manager. + PARAMETERS_SECRETS_EXTENSION_CACHE_ENABLED: 'true', + }, + }, + ); + + // batchSize 1 mirrors BE (the doorbell body is ignored, so larger + // batches buy nothing under concurrency=1). reportBatchItemFailures + // lets the handler fail just the offending doorbell; SQS redelivers + // it up to maxReceiveCount, then it lands in the DLQ. + this.ledgerProcessorFunction.addEventSource( + new lambdaEventSources.SqsEventSource(this.ingestQueue, { + batchSize: lp.sqsBatchSize, + reportBatchItemFailures: true, + }), + ); + + // --------------------------------------------------------------- + // IAM — S3 read on BE's bucket (same-account → plain IAM grant, no + // bucket policy from BE) + CloudWatch lag metric + X-Ray. + // --------------------------------------------------------------- + const ledgerBucket = s3.Bucket.fromBucketAttributes(this, 'LedgerBucket', { + bucketArn: ledgerBucketArn, + bucketName: ledgerBucketName, + }); + ledgerBucket.grantRead(this.ledgerProcessorRole); + + this.ledgerProcessorRole.addToPrincipalPolicy( + new iam.PolicyStatement({ + sid: 'PublishLagMetric', + actions: ['cloudwatch:PutMetricData'], + resources: ['*'], + conditions: { + StringEquals: { 'cloudwatch:namespace': 'PricesApi/LedgerProcessor' }, + }, + }), + ); + this.ledgerProcessorRole.addToPrincipalPolicy( + new iam.PolicyStatement({ + sid: 'XRayWrite', + actions: ['xray:PutTraceSegments', 'xray:PutTelemetryRecords'], + resources: ['*'], + }), + ); + + // --------------------------------------------------------------- + // API Handler: baseline role + log group (Function lands in 0040) + // --------------------------------------------------------------- this.apiHandlerRole = createPricesLambdaRole(this, 'ApiHandlerRole', ctx); this.apiHandlerLogGroup = new logs.LogGroup(this, 'ApiHandlerLogGroup', { - logGroupName: lambdaLogGroupName(config.envName, 'api-handler'), + logGroupName: lambdaLogGroupName(envName, 'api-handler'), retention: PRICES_LAMBDA_LOG_RETENTION, removalPolicy: PRICES_LAMBDA_LOG_REMOVAL_POLICY, }); + // --------------------------------------------------------------- + // Outputs + // --------------------------------------------------------------- new cdk.CfnOutput(this, 'LedgerProcessorRoleArn', { value: this.ledgerProcessorRole.roleArn, - description: `Ledger Processor Lambda execution role ARN (${config.envName})`, + description: `Ledger Processor Lambda execution role ARN (${envName})`, + }); + new cdk.CfnOutput(this, 'LedgerProcessorFunctionArn', { + value: this.ledgerProcessorFunction.functionArn, + description: `Prices Ledger Processor Lambda ARN (${envName})`, + }); + new cdk.CfnOutput(this, 'PricesIngestQueueUrl', { + value: this.ingestQueue.queueUrl, + description: `Prices ingest queue URL (${envName})`, + }); + new cdk.CfnOutput(this, 'PricesIngestDlqUrl', { + value: this.ingestDlq.queueUrl, + description: `Prices ingest DLQ URL (${envName})`, }); new cdk.CfnOutput(this, 'ApiHandlerRoleArn', { value: this.apiHandlerRole.roleArn, - description: `API Handler Lambda execution role ARN (${config.envName})`, + description: `API Handler Lambda execution role ARN (${envName})`, }); cdk.Tags.of(this).add('Project', 'stellar-prices-api'); cdk.Tags.of(this).add('ManagedBy', 'cdk'); - cdk.Tags.of(this).add('Environment', config.envName); + cdk.Tags.of(this).add('Environment', envName); } } diff --git a/infra/src/lib/types.ts b/infra/src/lib/types.ts index a3bca48..b955e99 100644 --- a/infra/src/lib/types.ts +++ b/infra/src/lib/types.ts @@ -62,6 +62,35 @@ export interface EnvironmentConfig { /** Old-data partition drop (ALTER TABLE … DROP PARTITION). */ readonly cleanup: string; }; + + // Ledger Processor ingest (consumed by IngestStack — task 0038) + + /** + * Sizing + SQS-source tuning for the live Prices Ledger Processor + * Lambda. The Lambda is a content-free SQS "doorbell" consumer; per + * the 2026-06-10 cross-team decision (task 0038 §C.1) the doorbells + * arrive via SNS fan-out off BE's `stellar-ledger-data` bucket + * (`S3 → SNS → prices-ingest SQS + DLQ → Lambda`). + * + * Mirrors BE's indexer knobs (`compute-stack.ts`): `batchSize = 1` + * and `reservedConcurrency = 1` are **load-bearing for ordering** + * — two concurrent invocations would race the cursor — not perf + * preferences. `maxReceiveCount = 10` (vs the usual 3) absorbs the + * ESM over-poll/throttle churn that `concurrency = 1` induces so a + * processable doorbell is never false-DLQ'd. + */ + readonly ledgerProcessor: { + /** Lambda memory (MB). */ + readonly memoryMb: number; + /** Lambda timeout (seconds). The SQS visibility timeout is set to this + 60s. */ + readonly timeoutSeconds: number; + /** Reserved concurrency. MUST be 1 — serial execution is the ordering guarantee. */ + readonly reservedConcurrency: number; + /** SQS event-source batch size. 1 mirrors BE (doorbell, body ignored). */ + readonly sqsBatchSize: number; + /** SQS redrive threshold before a message lands in the DLQ. */ + readonly maxReceiveCount: number; + }; } /** @@ -129,6 +158,40 @@ export function validateConfig(config: EnvironmentConfig): void { } } + const lp = config.ledgerProcessor; + if (!lp || typeof lp !== 'object') { + errors.push('ledgerProcessor missing or not an object'); + } else { + if (!Number.isInteger(lp.memoryMb) || lp.memoryMb < 128) { + errors.push( + `ledgerProcessor.memoryMb must be an integer >= 128, got: ${lp.memoryMb}`, + ); + } + if (!Number.isInteger(lp.timeoutSeconds) || lp.timeoutSeconds < 1) { + errors.push( + `ledgerProcessor.timeoutSeconds must be a positive integer, got: ${lp.timeoutSeconds}`, + ); + } + // Ordering correctness depends on serial execution — reject anything + // but 1. Two concurrent invocations would race the cursor (BE's + // load-bearing `reservedConcurrentExecutions = 1`, mirrored here). + if (lp.reservedConcurrency !== 1) { + errors.push( + `ledgerProcessor.reservedConcurrency must be exactly 1 (serial execution is the ordering guarantee), got: ${lp.reservedConcurrency}`, + ); + } + if (!Number.isInteger(lp.sqsBatchSize) || lp.sqsBatchSize < 1) { + errors.push( + `ledgerProcessor.sqsBatchSize must be a positive integer, got: ${lp.sqsBatchSize}`, + ); + } + if (!Number.isInteger(lp.maxReceiveCount) || lp.maxReceiveCount < 1) { + errors.push( + `ledgerProcessor.maxReceiveCount must be a positive integer, got: ${lp.maxReceiveCount}`, + ); + } + } + if (errors.length > 0) { throw new Error( `Invalid EnvironmentConfig for "${config.envName}":\n - ${errors.join( From ac7223454369fbbda8328f3b8cac1ca0b81bc475 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Wed, 10 Jun 2026 18:48:16 +0200 Subject: [PATCH 09/17] docs(lore-0050): add ready-to-implement BE SNS fan-out ask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 2026-06-10 meeting resolved the SNS-vs-direct-notification question (task 0038 §C.1) in favour of SNS fan-out. Add a BE-facing note that specs item 1 of 0050 concretely: the stellar-ledger-data compute-stack.ts change (topic + repoint to SnsDestination + own-queue re-subscribe with rawMessageDelivery), the canonical /platform/{env}/ledger-events-topic-arn SSM key, the same-account "no cross-account policy / no prices queue ARN" notes, and a cutover plan. Grounded in BE's compute-stack.ts and 0038's CDK (PR #34). Convert 0050 to directory form, add a history entry, and flag that the SNS item can ship independently of BE 0227 / task 0047. --- .../README.md} | 33 +++- .../notes/G-be-sns-fanout-ask.md | 173 ++++++++++++++++++ 2 files changed, 198 insertions(+), 8 deletions(-) rename lore/1-tasks/backlog/{0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning.md => 0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/README.md} (83%) create mode 100644 lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-ask.md diff --git a/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning.md b/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/README.md similarity index 83% rename from lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning.md rename to lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/README.md index 502e7c1..755a0a8 100644 --- a/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning.md +++ b/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/README.md @@ -8,12 +8,12 @@ related_tasks: ["0045", "0047", "0011", "0038"] tags: [layer-infra, priority-high, effort-medium, milestone-M1, cross-team, block-explorer, hetzner, clickhouse, mtls, sns] milestone: 1 links: - - "../../../docs/prices-api-general-overview.md" - - "../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" - - "../archive/0045_RESEARCH_cross-team-bundle-with-be-on-hetzner-ch-tenancy/notes/G-be-agreement-record.md" - - "./0047_RESEARCH_cross-tenant-throughput-verification-on-shared-hetzner-ch.md" - - "./0011_FEATURE_bootstrap-cdk-with-ssm-platform-lookups.md" - - "../blocked/0038_FEATURE_prices-ledger-processor-lambda.md" + - "../../../../docs/prices-api-general-overview.md" + - "../../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" + - "../../archive/0045_RESEARCH_cross-team-bundle-with-be-on-hetzner-ch-tenancy/notes/G-be-agreement-record.md" + - "../0047_RESEARCH_cross-tenant-throughput-verification-on-shared-hetzner-ch.md" + - "../0011_FEATURE_bootstrap-cdk-with-ssm-platform-lookups.md" + - "../../active/0038_FEATURE_prices-ledger-processor-lambda/README.md" history: - date: 2026-05-21 status: backlog @@ -26,6 +26,21 @@ history: require BE-side CDK + operator action. Capturing them as one task makes the cross-team hand-off trackable; without it these slip into informal Slack threads and gate everything downstream. + - date: 2026-06-10 + status: backlog + who: oski + note: > + Converted to directory form. The 2026-06-10 cross-team meeting + resolved the SNS-vs-direct-notification question (task 0038 + §C.1) in favour of **SNS fan-out**; item 1 now has a + ready-to-implement BE ask at + `notes/G-be-sns-fanout-ask.md` (topic + repoint with + `rawMessageDelivery` + the `/platform/{env}/ledger-events-topic-arn` + SSM key, grounded in BE's compute-stack.ts and 0038's CDK in + PR #34). Note: the SNS item can ship independently of BE 0227 / + task 0047 (it's S3+SNS+SSM, not Hetzner-CH) — only the mTLS-cert + and `prices.*`-DB items stay gated. Task stays backlog pending + BE scheduling. --- # BE-side prep — SNS fan-out + mTLS client cert + prices DB provisioning @@ -86,8 +101,10 @@ Produce a short written checklist (1–2 pages) under to deliver for each of the three items, with success signals prices-api can verify independently: -- SNS topic ARN per env, published to a known SSM key (e.g. - `/platform/{env}/stellar-ledger-data-sns-arn`). +- SNS topic ARN per env, published to a known SSM key + (`/platform/{env}/ledger-events-topic-arn` — canonical, per the + SNS-fan-out ask in `notes/G-be-sns-fanout-ask.md`, which is the + ready-to-implement spec for this item post the 2026-06-10 meeting). - Per-env mTLS cert + key handed off via a secure channel; CA certificate exported as a static asset checked into prices-api for trust-chain verification. diff --git a/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-ask.md b/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-ask.md new file mode 100644 index 0000000..ec97a4b --- /dev/null +++ b/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-ask.md @@ -0,0 +1,173 @@ +--- +id: "G-be-sns-fanout-ask" +title: "BE-side ask — SNS fan-out on stellar-ledger-data (ready-to-implement)" +type: G +task: "0050" +status: mature +spawned_from: [] +spawns: [] +related_notes: [] +links: + - "../../active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md" + - "../../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" +--- + +# BE-side ask — SNS fan-out on `stellar-ledger-data` + +> **Audience:** BE team (soroban-block-explorer infra). +> **Status:** agreed at the 2026-06-10 cross-team meeting; this is the +> concrete implementation ask. Scopes **item 1** of task 0050 (the SNS +> fan-out); the mTLS-cert and `prices.*`-DB items are unchanged and +> live in the parent README. +> **Why now:** prices-api task 0038 has authored its side (PR #34) — +> its CDK already imports the topic ARN, subscribes its own queue, and +> reads the platform SSM keys below. It is **prepare-only** until BE +> lands this. + +--- + +## TL;DR + +Move the `stellar-ledger-data` bucket notification from +`S3 → SQS` to `S3 → SNS → SQS`, so a second tenant (prices-api) can +subscribe its own queue. One topic per env, BE's existing indexer +queue re-subscribed with **`rawMessageDelivery: true`** (so the +indexer's S3-event parser is byte-for-byte unchanged), and the topic +ARN published to one SSM key. Same AWS account, so prices owns its +own subscription — BE does **not** need the prices queue ARN or a +cross-account policy. + +Target end-state: + +``` +ledger PutObject → S3 ObjectCreated (.xdr.zst) + → SNS topic {env}-ledger-events (BE-owned, NEW) + ├─ SQS {env}-ledger-ingest (BE) rawMessageDelivery=true + └─ SQS prices-ingest-{env} (prices, already in PR #34) +``` + +--- + +## The change in BE's `infra/.../compute-stack.ts` + +Today (BE `compute-stack.ts`): + +```ts +// ~L278 — direct S3 → SQS +ledgerBucket.addEventNotification( + s3.EventType.OBJECT_CREATED, + new s3n.SqsDestination(ingestQueue), + { suffix: '.xdr.zst' }, +); +``` + +Proposed: + +```ts +import * as sns from 'aws-cdk-lib/aws-sns'; +import * as subs from 'aws-cdk-lib/aws-sns-subscriptions'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; + +// NEW — topic the bucket fans out to +const ledgerEventsTopic = new sns.Topic(this, 'LedgerEventsTopic', { + topicName: `${config.envName}-ledger-events`, +}); + +// CHANGED — S3 → SNS (was SqsDestination(ingestQueue)). One destination +// per overlapping event+suffix, so this REPLACES the direct wiring. +ledgerBucket.addEventNotification( + s3.EventType.OBJECT_CREATED, + new s3n.SnsDestination(ledgerEventsTopic), + { suffix: '.xdr.zst' }, +); + +// NEW — BE's own indexer queue re-subscribes. rawMessageDelivery keeps +// the SQS body identical to today, so the indexer's S3-event parser is +// UNCHANGED. (Without it, SNS wraps the event in an envelope and the +// parser breaks on every ledger.) +ledgerEventsTopic.addSubscription( + new subs.SqsSubscription(ingestQueue, { rawMessageDelivery: true }), +); + +// NEW — publish the topic ARN for prices-api's CDK to read at deploy. +new ssm.StringParameter(this, 'LedgerEventsTopicArnParam', { + parameterName: `/platform/${config.envName}/ledger-events-topic-arn`, + stringValue: ledgerEventsTopic.topicArn, +}); +``` + +`SnsDestination` auto-adds the topic policy letting S3 publish; CDK +handles that. The `dlq`, `ingestQueue` (visibility, `maxReceiveCount`), +and the indexer's SQS event-source-mapping are all **unchanged** — only +the *source* of the doorbell moves. + +--- + +## SSM keys BE publishes under `/platform/{env}/*` + +prices-api's CDK (PR #34) reads these **at deploy time** (never at +Lambda runtime). Canonical names — these are what 0038's stack already +references: + +| SSM key | Value | Status | +|---|---|---| +| `/platform/{env}/ledger-events-topic-arn` | the new SNS topic ARN | **NEW (this ask)** | +| `/platform/{env}/stellar-ledger-data-bucket-name` | bucket name | confirm published | +| `/platform/{env}/stellar-ledger-data-bucket-arn` | bucket ARN | confirm published | +| `/platform/{env}/ch-domain` | Caddy/CH host | confirm published | +| `/platform/{env}/stellar-network-passphrase` | mainnet/testnet passphrase | confirm published | + +> Supersedes the older `/platform/{env}/stellar-ledger-data-sns-arn` +> name floated in the 0050 README Step 1 — use +> `ledger-events-topic-arn` to match 0038's CDK. + +--- + +## What BE does NOT need to do + +- **No prices queue ARN.** Same AWS account, and prices owns the + subscription side — prices-api's CDK subscribes `prices-ingest-{env}` + to the topic itself (already in PR #34). +- **No cross-account topic policy.** Same account → prices' deploy + role subscribes via its own `sns:Subscribe` IAM. *Only* confirm BE's + topic policy doesn't explicitly restrict subscribers to BE + principals (a default CDK `sns.Topic` does not). +- **No DLQ / consumer changes.** prices owns its own DLQ + Lambda. + +--- + +## Cutover (BE's critical path — handle with care) + +Because S3 allows one destination per overlapping `event + suffix`, +this is a **replace** of the live notification, not an add: + +1. Deploy to a non-prod env first; confirm BE's indexer keeps draining + (this is the `rawMessageDelivery` check — if the indexer starts + failing to parse, raw delivery wasn't applied). +2. On prod, deploy during a low-write window if possible; the + `PutBucketNotificationConfiguration` swap is near-atomic but it is + the one path that must never silently drop ledgers. +3. prices-api subscribes after the topic + SSM key exist. + +> **Alternative considered:** EventBridge (additive bucket toggle, +> leaves BE's `S3 → SQS` untouched — lower BE effort/risk). The meeting +> chose **SNS** for lowest latency and because the cross-team contract +> is built around a topic. Recorded here so the trade-off is on file. + +--- + +## Verification (joint) + +- A new `.xdr.zst` PutObject delivers to **both** queues independently. +- BE indexer continues processing post-cutover (no parser errors). +- prices subscribes a throwaway queue to the topic ARN and observes a + delivery — capture the envelope as a fixture for 0038 (per the 0050 + README Step 3 note; still wanted even though the prices Lambda ignores + the body). + +## Gating + +Topic + SSM publish can land **independently of BE 0227 / task 0047** +(they're S3+SNS+SSM, not Hetzner-CH). The mTLS-cert and `prices.*`-DB +items of 0050 remain gated on BE 0227; this SNS item does not — it can +ship in Week 1 to unblock 0038's deploy prep. From e854145cf258a1da473c97ea8d586ab592e66872 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Thu, 18 Jun 2026 15:44:16 +0200 Subject: [PATCH 10/17] docs(lore-0050): add step-by-step BE SNS fan-out handoff runbook Self-contained, copy-pasteable runbook for the BE team to implement the S3 -> SNS -> SQS fan-out on stellar-ledger-data. Grounded in the current soroban-block-explorer compute-stack.ts: real line refs (notification at L386-389), the three missing imports, and the /platform/{env}/* SSM keys framed as net-new (BE publishes none today). Cross-links the existing G-be-sns-fanout-ask rationale note rather than duplicating it. --- .../notes/G-be-sns-fanout-handoff.md | 227 ++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-handoff.md diff --git a/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-handoff.md b/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-handoff.md new file mode 100644 index 0000000..c6e0a8c --- /dev/null +++ b/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-handoff.md @@ -0,0 +1,227 @@ +--- +id: "G-be-sns-fanout-handoff" +title: "BE handoff — SNS fan-out implementation runbook (step-by-step)" +type: G +task: "0050" +status: mature +spawned_from: ["G-be-sns-fanout-ask"] +spawns: [] +related_notes: ["G-be-sns-fanout-ask"] +links: + - "../../active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md" + - "../../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" +--- + +# BE handoff — SNS fan-out on `stellar-ledger-data` + +> **Audience:** BE team (soroban-block-explorer infra). +> **Status:** agreed at the 2026-06-10 cross-team meeting. This is a +> self-contained, copy-pasteable runbook — hand it straight to whoever +> owns `soroban-block-explorer/infra`. +> **Grounded in:** the *current* `infra/src/lib/stacks/compute-stack.ts` +> on `develop` (verified 2026-06-18). Line numbers below are real. +> **Why now:** prices-api's side is already authored (PR #34) — its CDK +> imports the topic ARN, subscribes its own queue, reads the SSM keys +> below, and is prepare-only until you land this. This item is **not** +> gated on BE 0227 or throughput verification (it's pure S3+SNS+SSM), so +> it can ship now. + +--- + +## TL;DR + +Today the `stellar-ledger-data` bucket fires `S3 → SQS` straight at the +indexer's `ingestQueue`. We need a **second tenant** (prices-api) to +receive the same `ObjectCreated` doorbells on its own queue. S3 allows +only **one** destination per overlapping `event + suffix`, so the clean +fan-out is to insert an **SNS topic**: + +``` +ledger PutObject → S3 ObjectCreated (.xdr.zst) + → SNS topic {env}-ledger-events (BE-owned, NEW) + ├─ SQS {env}-ledger-ingest (BE) rawMessageDelivery=true ← unchanged behaviour + └─ SQS prices-ingest-{env} (prices-api, already in PR #34) +``` + +The whole change is **one file** (`compute-stack.ts`) plus publishing a +few SSM keys. Same AWS account as prices-api, so **you do not need any +prices ARN, and no cross-account policy.** + +--- + +## Scope of the change + +- **One file:** `infra/src/lib/stacks/compute-stack.ts`. +- **Net-new SSM keys** under a `/platform/{env}/*` namespace (none exist + today — these are created, not "confirmed"). +- **No change** to: the `dlq`, the `ingestQueue` config (visibility, + `maxReceiveCount`), the indexer's SQS event-source-mapping, or the + indexer Rust code. Only the *source* of the doorbell moves, and raw + delivery keeps the SQS body byte-identical. + +--- + +## Step-by-step + +### Step 1 — add the three imports + +`compute-stack.ts` currently imports `cdk, s3, s3n, sqs` (L1–9). Add: + +```ts +import * as sns from 'aws-cdk-lib/aws-sns'; +import * as subs from 'aws-cdk-lib/aws-sns-subscriptions'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; +``` + +(`ssm` is already a workspace dep — `hetzner-dns-stack.ts` uses it.) + +### Step 2 — create the topic + +Put this just **above** the bucket-notification block (currently L386). +`config.envName` is the same field used for the queue names +(`${config.envName}-ledger-ingest`): + +```ts +// NEW — fan-out topic the bucket publishes to. One per env. +const ledgerEventsTopic = new sns.Topic(this, 'LedgerEventsTopic', { + topicName: `${config.envName}-ledger-events`, +}); +``` + +### Step 3 — repoint the bucket notification (S3 → SNS) + +Replace the existing block at **L386–389**: + +```ts +// ── BEFORE (current L386-389) ── +ledgerBucket.addEventNotification( + s3.EventType.OBJECT_CREATED, + new s3n.SqsDestination(ingestQueue), + { suffix: '.xdr.zst' } +); +``` + +```ts +// ── AFTER ── +// S3 → SNS (was SqsDestination(ingestQueue)). S3 allows one destination +// per overlapping event+suffix, so this REPLACES the direct wiring; the +// indexer now receives doorbells via its SNS subscription (Step 4). +ledgerBucket.addEventNotification( + s3.EventType.OBJECT_CREATED, + new s3n.SnsDestination(ledgerEventsTopic), + { suffix: '.xdr.zst' } +); +``` + +`SnsDestination` auto-adds the topic policy letting S3 publish — CDK +handles that for you. + +### Step 4 — re-subscribe the indexer queue (⚠️ `rawMessageDelivery: true`) + +This is the one detail that keeps the indexer untouched. With raw +delivery the SQS body is byte-for-byte identical to today's direct +`S3 → SQS` event; without it, SNS wraps the event in an envelope and the +indexer's S3-event parser breaks on **every** ledger. + +```ts +// NEW — the indexer's own queue subscribes to the topic. rawMessageDelivery +// keeps the SQS message body identical to the old direct S3→SQS shape, so the +// indexer's event-source-mapping and parser are UNCHANGED. +ledgerEventsTopic.addSubscription( + new subs.SqsSubscription(ingestQueue, { rawMessageDelivery: true }), +); +``` + +Leave the existing `processorFunction.addEventSource(new SqsEventSource( +ingestQueue, …))` (L399) and `ingestQueue.grantConsumeMessages(…)` (L411) +exactly as they are — they still drain `ingestQueue`. + +### Step 5 — publish the topic ARN to SSM + +prices-api's CDK reads this **at deploy time** (never at Lambda runtime): + +```ts +// NEW — hand the topic ARN to prices-api's CDK via SSM. +new ssm.StringParameter(this, 'LedgerEventsTopicArnParam', { + parameterName: `/platform/${config.envName}/ledger-events-topic-arn`, + stringValue: ledgerEventsTopic.topicArn, +}); +``` + +### Step 6 — publish the remaining `/platform/{env}/*` keys (net-new) + +These do **not** exist in your infra today (you only publish +`EcrRepoUriParam` and read the Hetzner CH IP). prices-api's CDK consumes +all of them at deploy. The bucket name/arn are already in hand inside +`ComputeStack` (props `ledgerBucketName` / `ledgerBucketArn`, L20–21), so +publishing them is a one-liner each; `ch-domain` and the network +passphrase come from wherever you keep them today. + +| SSM key (String) | Value | Source in your code | +|---|---|---| +| `/platform/{env}/ledger-events-topic-arn` | new SNS topic ARN | Step 5 | +| `/platform/{env}/stellar-ledger-data-bucket-name` | bucket name | `props.ledgerBucketName` | +| `/platform/{env}/stellar-ledger-data-bucket-arn` | bucket ARN | `props.ledgerBucketArn` | +| `/platform/{env}/ch-domain` | Caddy/ClickHouse host | your Hetzner CH domain | +| `/platform/{env}/stellar-network-passphrase` | mainnet/testnet passphrase | indexer env config | + +> If any of these already live under a different key name, just tell us +> the names and we'll point prices-api's CDK at them instead — the table +> above is the canonical set our stack references. + +### Step 7 — confirm the topic policy isn't subscriber-restricted + +Same AWS account, so prices-api subscribes `prices-ingest-{env}` to the +topic via its **own** deploy-role `sns:Subscribe` IAM — no cross-account +policy needed from you. Only confirm your topic policy doesn't explicitly +restrict subscribers to BE principals. A default CDK `sns.Topic` (as +above) does **not**, so there's normally nothing to do here. + +--- + +## What BE does NOT need to do + +- ❌ No prices queue ARN (same account; prices owns the subscribe side). +- ❌ No cross-account topic policy. +- ❌ No DLQ / consumer / event-source-mapping changes. +- ❌ No indexer Rust changes (raw delivery → body unchanged). + +--- + +## Cutover (the one path that must never drop ledgers) + +Because S3 permits one destination per overlapping `event + suffix`, +Step 3 is a **replace** of the live notification, not an add: + +1. **Deploy to a non-prod env first.** Confirm the indexer keeps draining + `ingestQueue`. If it suddenly fails to parse messages, raw delivery + wasn't applied (Step 4) — fix before touching prod. +2. **On prod, deploy in a low-write window if possible.** The + `PutBucketNotificationConfiguration` swap is near-atomic, but it's the + single path that must not silently drop a ledger. +3. **prices-api subscribes after** the topic + SSM key exist. + +--- + +## Joint verification + +- A new `.xdr.zst` PutObject delivers to **both** queues independently. +- BE indexer continues processing post-cutover with **no parser errors**. +- prices-api subscribes a throwaway queue to the topic ARN and observes a + delivery (captures the envelope as a test fixture). + +--- + +## Alternative considered (on the record) + +EventBridge was weighed — additive bucket toggle, leaves your `S3 → SQS` +untouched, lower BE effort/risk. The meeting chose **SNS** for lowest +latency and because the cross-team contract is built around a topic. +Recorded so the trade-off is on file. + +--- + +*Deeper rationale + meeting provenance: [[G-be-sns-fanout-ask]] and the +0038 spec `notes/G-local-prototype-spec.md` §C.1. Topic ownership is +tracked by this task (0050); prices-api's subscriber-side CDK is in +PR #34 (task 0038).* From fed74bc6578d7c5bacaf88333d72047e07a29f5e Mon Sep 17 00:00:00 2001 From: karczuRF Date: Tue, 23 Jun 2026 12:06:26 +0200 Subject: [PATCH 11/17] fix(lore-0038): use single mTLS bundle secret for CH client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ledger-processor Lambda set MTLS_CERT_SECRET_NAME + MTLS_KEY_SECRET_NAME, but the shipped client (prices-clickhouse mtls.rs) reads a single MTLS_SECRET_NAME holding a {cert,key,ca} JSON bundle — so the Lambda would fail cold start with MtlsError::MissingEnv. Reconcile the CDK to the single-bundle, two-identity model decided in task 0063, mirroring BE: - compute-stack: ledger Function uses MTLS_SECRET_NAME (writer bundle); each role granted read on only its own secret. - secrets-stack: stop creating the cert/key secrets; publish the two bundle names to SSM (operator issues/creates the material out-of-band, like BE). - mtls helpers: mtlsClientCn / mtlsSecretName / mtlsSecretArnFromParts as the single source of truth for the env-suffixed CN + secret name. - lambda-baseline / app / index: thread the single secret name. CNs are env-suffixed (prices-ingestion-production / prices-api-production) to stay unique in BE's shared CA namespace. --- infra/src/index.ts | 14 +++- infra/src/lib/app.ts | 16 ++-- infra/src/lib/lambda-baseline.ts | 20 +++-- infra/src/lib/mtls.ts | 57 ++++++++++++- infra/src/lib/stacks/compute-stack.ts | 52 +++++++++--- infra/src/lib/stacks/secrets-stack.ts | 111 ++++++++++++-------------- 6 files changed, 177 insertions(+), 93 deletions(-) diff --git a/infra/src/index.ts b/infra/src/index.ts index faa858e..52a01b1 100644 --- a/infra/src/index.ts +++ b/infra/src/index.ts @@ -30,5 +30,15 @@ export type { BaselineLambdaContext } from './lib/lambda-baseline.js'; // mTLS helpers — used by downstream stacks that attach Lambdas // requiring the AWS Parameters and Secrets Lambda Extension layer -// and per-service mTLS Secret ARN grants. -export { secretsManagerLayerArn, mtlsSecretArn } from './lib/mtls.js'; +// and per-service mTLS Secret ARN grants. `mtlsClientCn` / +// `mtlsSecretName` are the single source of truth for the env-suffixed +// CN + bundle secret name (0063); 0038/0040 set `MTLS_SECRET_NAME` from +// the `ComputeStack` props derived from them. +export { + secretsManagerLayerArn, + mtlsSecretArn, + mtlsSecretArnFromParts, + mtlsClientCn, + mtlsSecretName, +} from './lib/mtls.js'; +export type { MtlsRole } from './lib/mtls.js'; diff --git a/infra/src/lib/app.ts b/infra/src/lib/app.ts index a6ef189..902b9cf 100644 --- a/infra/src/lib/app.ts +++ b/infra/src/lib/app.ts @@ -23,15 +23,13 @@ export function createApp({ config }: CreateAppOptions): void { const prefix = `Prices-${config.envName}`; - const secrets = new SecretsStack(app, `${prefix}-Secrets`, { env, config }); - - const compute = new ComputeStack(app, `${prefix}-Compute`, { - env, - config, - mtlsCertSecret: secrets.mtlsCertSecret, - mtlsKeySecret: secrets.mtlsKeySecret, - }); - compute.addDependency(secrets); + // SecretsStack only publishes the mTLS bundle secret NAMES to SSM — it does + // not create the secrets (operator-issued out-of-band; BE-mirroring). So + // ComputeStack derives its own secret names from the shared `mtlsSecretName` + // helper and needs no cross-stack reference / dependency on SecretsStack. + new SecretsStack(app, `${prefix}-Secrets`, { env, config }); + + new ComputeStack(app, `${prefix}-Compute`, { env, config }); // ApiGatewayStack is independent of ComputeStack in the skeleton // (no Lambda integration yet — task 0040 wires the cross-stack diff --git a/infra/src/lib/lambda-baseline.ts b/infra/src/lib/lambda-baseline.ts index 7f61148..f6af3be 100644 --- a/infra/src/lib/lambda-baseline.ts +++ b/infra/src/lib/lambda-baseline.ts @@ -2,16 +2,23 @@ import * as cdk from 'aws-cdk-lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as lambda from 'aws-cdk-lib/aws-lambda'; import * as logs from 'aws-cdk-lib/aws-logs'; -import type * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import type { Construct } from 'constructs'; import type { EnvironmentConfig } from './types.js'; +import { mtlsSecretArnFromParts } from './mtls.js'; export interface BaselineLambdaContext { readonly config: EnvironmentConfig; readonly accountId: string; - readonly mtlsCertSecret: secretsmanager.ISecret; - readonly mtlsKeySecret: secretsmanager.ISecret; + /** + * Secrets Manager NAME of the single `{cert,key,ca}` bundle this Lambda + * reads (its `MTLS_SECRET_NAME`). Each Lambda is granted read on only its + * own bundle — least privilege, mirroring BE's per-service grant. The + * secret is created out-of-band by the operator (see `SecretsStack`); the + * grant is on the by-name wildcard ARN, so it does not require the secret + * to exist at synth time. Derive with `mtlsSecretName(envName, role)`. + */ + readonly mtlsSecretName: string; } /** @@ -20,7 +27,8 @@ export interface BaselineLambdaContext { * * 1. CloudWatch Logs write — via AWSLambdaBasicExecutionRole managed * policy (attached separately at role construction time). - * 2. Read the two mTLS material secrets from Secrets Manager. + * 2. Read its own mTLS bundle secret from Secrets Manager (one secret, + * by-name wildcard ARN — BE-mirroring; the operator creates the value). * 3. Read both SSM namespaces — /platform/{env}/* (BE-published) and * /prices/{env}/* (prices-api-published). Read-only here; the * deploy role (CicdStack) is the only principal that writes. @@ -41,7 +49,9 @@ export function baselineLambdaPolicyStatements( new iam.PolicyStatement({ sid: 'ReadMtlsMaterial', actions: ['secretsmanager:GetSecretValue'], - resources: [ctx.mtlsCertSecret.secretArn, ctx.mtlsKeySecret.secretArn], + resources: [ + mtlsSecretArnFromParts(region, accountId, ctx.mtlsSecretName), + ], }), new iam.PolicyStatement({ sid: 'ReadSsmNamespaces', diff --git a/infra/src/lib/mtls.ts b/infra/src/lib/mtls.ts index 70c2ae6..0711108 100644 --- a/infra/src/lib/mtls.ts +++ b/infra/src/lib/mtls.ts @@ -45,11 +45,51 @@ export function secretsManagerLayerArn(region: string): string { return arn; } +/** + * The two mTLS client identities prices-api presents to BE's Hetzner + * ClickHouse, mirroring BE's per-service Lambda model: + * + * - `ingestion` → CH user `prices_writer` (ledger processor + periodic + * workers; `SELECT, INSERT, OPTIMIZE ON prices.*`). + * - `api` → CH user `prices_reader` (axum read handlers; + * `SELECT ON prices.*`). + */ +export type MtlsRole = 'ingestion' | 'api'; + +/** + * Canonical mTLS client-cert CN for a role, env-suffixed to mirror BE + * (`lambda-ingestion-production`). prices-api shares BE's CA, so these CNs + * live in BE's CA namespace and must stay globally unique there — hence the + * `-${envName}` suffix. The CN is the single thread tying the cert subject, + * the Caddy `CLICKHOUSE_CN_USER_MAP` key, the CH user, and the secret name + * together; keep all four derived from this one string. + * + * mtlsClientCn('production', 'ingestion') === 'prices-ingestion-production' + */ +export function mtlsClientCn(envName: string, role: MtlsRole): string { + return `prices-${role}-${envName}`; +} + +/** + * Secrets Manager secret name holding the single `{cert,key,ca}` JSON bundle + * for a role — the value `MTLS_SECRET_NAME` resolves to at Lambda runtime + * (see `packages/prices-clickhouse/src/mtls.rs`). The secret is created + * out-of-band by the operator (BE-mirroring: CDK does NOT manage the material; + * see `SecretsStack`), so this name must match the `--secret-id` the issuance + * runbook uploads to (0063 `notes/G-provisioning-plan.md` §5). + * + * mtlsSecretName('production', 'ingestion') + * === 'prices/production/clickhouse-mtls-prices-ingestion-production' + */ +export function mtlsSecretName(envName: string, role: MtlsRole): string { + return `prices/${envName}/clickhouse-mtls-${mtlsClientCn(envName, role)}`; +} + /** * Build the wildcard-suffixed Secrets Manager ARN for a secret name. * * AWS Secrets Manager appends a random 6-char suffix to every secret - * ARN (e.g. `…secret:prices/production/mtls/ledger-processor-production-aBcDeF`). + * ARN (e.g. `…secret:prices/production/clickhouse-mtls-prices-api-production-aBcDeF`). * IAM grants must use a wildcard to match. Returns the ARN form * `arn:aws:secretsmanager:::secret:-*`. * @@ -57,6 +97,19 @@ export function secretsManagerLayerArn(region: string): string { * works in both per-account synth and assumed-role deploys. */ export function mtlsSecretArn(scope: cdk.Stack, secretName: string): string { + return mtlsSecretArnFromParts(scope.region, scope.account, secretName); +} + +/** + * Scope-free variant of {@link mtlsSecretArn} for callers that already hold + * the region + account (e.g. `lambda-baseline` builds the grant from the + * resolved `EnvironmentConfig.awsRegion` + account id, without a `Stack`). + */ +export function mtlsSecretArnFromParts( + region: string, + account: string, + secretName: string, +): string { // Reject IAM-meaningful wildcards in the secret name. The function builds // an ARN like `…:secret:${secretName}-*` for IAM grants; an unexpected `*` // or `?` inside `secretName` would silently widen the grant beyond the @@ -67,7 +120,5 @@ export function mtlsSecretArn(scope: cdk.Stack, secretName: string): string { `those characters widen the IAM grant beyond a single secret.`, ); } - const region = scope.region; - const account = scope.account; return `arn:aws:secretsmanager:${region}:${account}:secret:${secretName}-*`; } diff --git a/infra/src/lib/stacks/compute-stack.ts b/infra/src/lib/stacks/compute-stack.ts index 0e776e3..df5fa16 100644 --- a/infra/src/lib/stacks/compute-stack.ts +++ b/infra/src/lib/stacks/compute-stack.ts @@ -4,7 +4,6 @@ import * as lambda from 'aws-cdk-lib/aws-lambda'; import * as lambdaEventSources from 'aws-cdk-lib/aws-lambda-event-sources'; import * as logs from 'aws-cdk-lib/aws-logs'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import type * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import * as sns from 'aws-cdk-lib/aws-sns'; import * as snsSubscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; import * as sqs from 'aws-cdk-lib/aws-sqs'; @@ -19,7 +18,7 @@ import { lambdaLogGroupName, pricesLambdaDefaults, } from '../lambda-baseline.js'; -import { secretsManagerLayerArn } from '../mtls.js'; +import { mtlsSecretName, secretsManagerLayerArn } from '../mtls.js'; const DLQ_RETENTION_DAYS = 14; @@ -67,8 +66,6 @@ function platformSsmKeys(envName: string) { export interface ComputeStackProps extends cdk.StackProps { readonly config: EnvironmentConfig; - readonly mtlsCertSecret: secretsmanager.ISecret; - readonly mtlsKeySecret: secretsmanager.ISecret; } /** @@ -130,24 +127,47 @@ export class ComputeStack extends cdk.Stack { public readonly ingestDlq: sqs.Queue; public readonly apiHandlerRole: iam.Role; public readonly apiHandlerLogGroup: logs.LogGroup; + /** + * `MTLS_SECRET_NAME` the ledger-processor (writer) Lambda reads — the + * single `{cert,key,ca}` bundle for CH user `prices_writer`. Set on the + * Function env below; the role is granted read on exactly this secret. + */ + public readonly ledgerProcessorMtlsSecretName: string; + /** + * `MTLS_SECRET_NAME` the api-handler (reader) Lambda must read — the + * single bundle for CH user `prices_reader`. Set on the Function env in + * task 0040; the role is already granted read on exactly this secret. + */ + public readonly apiHandlerMtlsSecretName: string; constructor(scope: Construct, id: string, props: ComputeStackProps) { super(scope, id, props); - const { config, mtlsCertSecret, mtlsKeySecret } = props; + const { config } = props; const { envName, awsRegion } = config; const lp = config.ledgerProcessor; const accountId = cdk.Stack.of(this).account; - const ctx = { config, accountId, mtlsCertSecret, mtlsKeySecret }; const keys = platformSsmKeys(envName); + // Two mTLS identities, mirroring BE's per-service split: the ledger + // processor writes as `prices_writer` (ingestion bundle); the api handler + // reads as `prices_reader` (api bundle). Each role is granted read on ONLY + // its own secret (least privilege). The secrets are created out-of-band by + // the operator (see SecretsStack); CDK only names + grants + sets the env. + this.ledgerProcessorMtlsSecretName = mtlsSecretName(envName, 'ingestion'); + this.apiHandlerMtlsSecretName = mtlsSecretName(envName, 'api'); + // --------------------------------------------------------------- // Ledger Processor: baseline role + log group // --------------------------------------------------------------- this.ledgerProcessorRole = createPricesLambdaRole( this, 'LedgerProcessorRole', - ctx, + { + config, + accountId, + mtlsSecretName: this.ledgerProcessorMtlsSecretName, + }, ); this.ledgerProcessorLogGroup = new logs.LogGroup( this, @@ -269,11 +289,13 @@ export class ComputeStack extends cdk.Stack { CH_DOMAIN: chDomain, // Required by xdr-parser's network-id cache (SAC derivation). STELLAR_NETWORK_PASSPHRASE: networkPassphrase, - // prices-api uses two separate mTLS secrets (cert + key) per - // ADR 0007 §3.5; names match SecretsStack. Task 0052's - // clickhouse client crate reads these via the extension. - MTLS_CERT_SECRET_NAME: `prices/${envName}/clickhouse-mtls-cert`, - MTLS_KEY_SECRET_NAME: `prices/${envName}/clickhouse-mtls-key`, + // Single {cert,key,ca} bundle secret (task 0052/0063). Task 0052's + // clickhouse client crate reads exactly this one env var + // (`MTLS_SECRET_NAME`) and parses the JSON bundle via the extension + // — see packages/prices-clickhouse/src/mtls.rs:233. Name is derived + // from the shared mtlsSecretName helper, so it can't drift from the + // SecretsStack publication or the operator's create-secret. + MTLS_SECRET_NAME: this.ledgerProcessorMtlsSecretName, // In-memory caching in the secrets extension — repeat reads in // one execution environment hit RAM, not Secrets Manager. PARAMETERS_SECRETS_EXTENSION_CACHE_ENABLED: 'true', @@ -323,7 +345,11 @@ export class ComputeStack extends cdk.Stack { // --------------------------------------------------------------- // API Handler: baseline role + log group (Function lands in 0040) // --------------------------------------------------------------- - this.apiHandlerRole = createPricesLambdaRole(this, 'ApiHandlerRole', ctx); + this.apiHandlerRole = createPricesLambdaRole(this, 'ApiHandlerRole', { + config, + accountId, + mtlsSecretName: this.apiHandlerMtlsSecretName, + }); this.apiHandlerLogGroup = new logs.LogGroup(this, 'ApiHandlerLogGroup', { logGroupName: lambdaLogGroupName(envName, 'api-handler'), retention: PRICES_LAMBDA_LOG_RETENTION, diff --git a/infra/src/lib/stacks/secrets-stack.ts b/infra/src/lib/stacks/secrets-stack.ts index e37c812..a7cc9e1 100644 --- a/infra/src/lib/stacks/secrets-stack.ts +++ b/infra/src/lib/stacks/secrets-stack.ts @@ -1,96 +1,85 @@ import * as cdk from 'aws-cdk-lib'; -import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import * as ssm from 'aws-cdk-lib/aws-ssm'; import type { Construct } from 'constructs'; import type { EnvironmentConfig } from '../types.js'; +import { mtlsSecretName } from '../mtls.js'; export interface SecretsStackProps extends cdk.StackProps { readonly config: EnvironmentConfig; } /** - * Secrets Manager slots for the mTLS material that prices-api uses - * to connect to BE's Hetzner ClickHouse over HTTPS-mTLS. + * Publishes the canonical Secrets Manager **names** for the prices-api mTLS + * bundles. It deliberately does NOT create the secrets. * - * Per ADR 0007 §3.5: two secrets per env (cert + key, separately). - * BE's per-AWS-service issuance script (task 0050) produces the real - * PEMs; an operator uploads them post-deploy via: + * ## Why no `new secretsmanager.Secret` (BE-mirroring) * - * aws secretsmanager put-secret-value \ - * --secret-id prices/{env}/clickhouse-mtls-cert \ - * --secret-string "$(cat .pem)" + * BE never CDK-manages the mTLS material for its Lambdas: `compute-stack.ts` + * builds the secret *name*, grants `secretsmanager:GetSecretValue` on the + * by-name ARN, sets `MTLS_SECRET_NAME`, and the operator creates the secret + * out-of-band (`infra-hetzner/ca/issue-client-cert.sh` → `aws secretsmanager + * create-secret`). We mirror that exactly: * - * aws secretsmanager put-secret-value \ - * --secret-id prices/{env}/clickhouse-mtls-key \ - * --secret-string "$(cat .pem)" + * - The secret holds the **single `{cert,key,ca}` JSON bundle** that + * `packages/prices-clickhouse/src/mtls.rs` parses at runtime — NOT the old + * two-secret cert/key split this stack used to create. The CA private key + * never enters CDK; cert/key bytes are operator-issued and uploaded. + * - Letting CloudFormation own the secret would (a) require a random + * placeholder that the runtime client cannot parse as a bundle, and (b) + * collide with the operator's `create-secret` (CFN refuses to create a name + * that already exists). Naming-only avoids both. * - * The CDK template intentionally does NOT contain the PEM values — - * `generateSecretString` creates a random placeholder on first - * deploy; subsequent `cdk deploy` invocations do not re-randomize as - * long as the generator parameters are unchanged. Re-running deploy - * after the operator upload leaves the real PEMs intact. + * Per the SSM key contract, only the prices-owned secret **names** are + * published to `/prices/{env}/*` (identifiers, never trust material) so the + * issuance runbook and any out-of-band tooling read one source of truth. The + * names themselves come from {@link mtlsSecretName} — the same helper + * `ComputeStack` uses for the IAM grant + `MTLS_SECRET_NAME`, so the two can + * never drift (the failure mode we found in BE's own README-vs-CDK). * - * The Secret ARNs are published to SSM under the prices-api-owned - * namespace (`/prices/{env}/mtls-{cert,key}-secret-arn`) so task - * 0052's `clickhouse-client` crate can read them at Lambda init. + * Two identities (0063 decision, env-suffixed CNs): + * - `prices/{env}/clickhouse-mtls-prices-ingestion-{env}` → `prices_writer` + * - `prices/{env}/clickhouse-mtls-prices-api-{env}` → `prices_reader` */ export class SecretsStack extends cdk.Stack { - public readonly mtlsCertSecret: secretsmanager.ISecret; - public readonly mtlsKeySecret: secretsmanager.ISecret; + /** Secrets Manager name of the ingestion (writer) `{cert,key,ca}` bundle. */ + public readonly ingestionSecretName: string; + /** Secrets Manager name of the api (reader) `{cert,key,ca}` bundle. */ + public readonly apiSecretName: string; constructor(scope: Construct, id: string, props: SecretsStackProps) { super(scope, id, props); const { envName } = props.config; - this.mtlsCertSecret = new secretsmanager.Secret(this, 'MtlsCertSecret', { - secretName: `prices/${envName}/clickhouse-mtls-cert`, - description: - `mTLS client certificate (PEM) for prices-api → BE Hetzner ClickHouse, ${envName}. ` + - `Initial value is a CDK-generated random placeholder; operator replaces with the real ` + - `cert via 'aws secretsmanager put-secret-value' after BE task 0050 issuance.`, - generateSecretString: { - passwordLength: 64, - excludePunctuation: true, - }, - removalPolicy: cdk.RemovalPolicy.RETAIN, - }); - - this.mtlsKeySecret = new secretsmanager.Secret(this, 'MtlsKeySecret', { - secretName: `prices/${envName}/clickhouse-mtls-key`, - description: - `mTLS client private key (PEM) for prices-api → BE Hetzner ClickHouse, ${envName}. ` + - `Initial value is a CDK-generated random placeholder; operator replaces with the real ` + - `key via 'aws secretsmanager put-secret-value' after BE task 0050 issuance.`, - generateSecretString: { - passwordLength: 64, - excludePunctuation: true, - }, - removalPolicy: cdk.RemovalPolicy.RETAIN, - }); + this.ingestionSecretName = mtlsSecretName(envName, 'ingestion'); + this.apiSecretName = mtlsSecretName(envName, 'api'); - new ssm.StringParameter(this, 'MtlsCertSecretArnParam', { - parameterName: `/prices/${envName}/mtls-cert-secret-arn`, - stringValue: this.mtlsCertSecret.secretArn, + new ssm.StringParameter(this, 'MtlsIngestionSecretNameParam', { + parameterName: `/prices/${envName}/mtls-ingestion-secret-name`, + stringValue: this.ingestionSecretName, description: - 'Secrets Manager ARN holding the prices-api mTLS client cert PEM', + 'Secrets Manager NAME of the prices-api ingestion (writer) mTLS ' + + '{cert,key,ca} bundle. Operator creates the secret out-of-band; ' + + 'CDK only names + grants. Value = MTLS_SECRET_NAME for writer Lambdas.', }); - new ssm.StringParameter(this, 'MtlsKeySecretArnParam', { - parameterName: `/prices/${envName}/mtls-key-secret-arn`, - stringValue: this.mtlsKeySecret.secretArn, + new ssm.StringParameter(this, 'MtlsApiSecretNameParam', { + parameterName: `/prices/${envName}/mtls-api-secret-name`, + stringValue: this.apiSecretName, description: - 'Secrets Manager ARN holding the prices-api mTLS client key PEM', + 'Secrets Manager NAME of the prices-api api (reader) mTLS ' + + '{cert,key,ca} bundle. Operator creates the secret out-of-band; ' + + 'CDK only names + grants. Value = MTLS_SECRET_NAME for reader Lambdas.', }); - new cdk.CfnOutput(this, 'MtlsCertSecretArn', { - value: this.mtlsCertSecret.secretArn, - description: `mTLS cert Secrets Manager ARN for ${envName}`, + new cdk.CfnOutput(this, 'MtlsIngestionSecretName', { + value: this.ingestionSecretName, + description: `mTLS ingestion (writer) bundle secret name for ${envName}`, }); - new cdk.CfnOutput(this, 'MtlsKeySecretArn', { - value: this.mtlsKeySecret.secretArn, - description: `mTLS key Secrets Manager ARN for ${envName}`, + new cdk.CfnOutput(this, 'MtlsApiSecretName', { + value: this.apiSecretName, + description: `mTLS api (reader) bundle secret name for ${envName}`, }); cdk.Tags.of(this).add('Project', 'stellar-prices-api'); From b2054c665be0197e026f094a8e45b59d1cd52d40 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Tue, 23 Jun 2026 13:14:13 +0200 Subject: [PATCH 12/17] fix(lore-0038): pass Soroswap registry to dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The develop merge added a 4th parameter (SoroswapPoolRegistry) to ledger_processor::dispatch, but prices-ledger-processor still called it with 3 args — breaking the build (E0061). Add an empty soroswap_registry to Reconciler and thread it through the reconcile loop + all construction sites (main, cli, e2e tests), matching the existing empty venue/phoenix registry pattern. Soroswap pools yield no trades until pool discovery populates the registry (a separate concern). Cargo.lock is regenerated: the new soroswap-extractor edge plus the already-required aws_lambda_events deps and the develop enrichment-worker removal, reconciling the lockfile with the branch's Cargo.toml. --- Cargo.lock | 113 +++++++++++++++--- packages/prices-ledger-processor/Cargo.toml | 1 + .../prices-ledger-processor/src/bin/cli.rs | 3 + packages/prices-ledger-processor/src/main.rs | 2 + .../prices-ledger-processor/src/reconcile.rs | 10 +- .../tests/reconcile_e2e.rs | 5 + 6 files changed, 116 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8debf04..cfc4e90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,6 +150,18 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "aws_lambda_events" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "144ec7565561115498a288850cc6a42b279e09b6c4b88f623eecb9c8ca96c08c" +dependencies = [ + "base64", + "serde", + "serde_json", + "serde_with", +] + [[package]] name = "base64" version = "0.22.1" @@ -538,22 +550,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" [[package]] -name = "enrichment-worker" -version = "0.1.0" -dependencies = [ - "clap", - "clickhouse", - "lambda_runtime", - "prices-clickhouse", - "rust_decimal", - "serde", - "serde_json", - "tempfile", - "thiserror 2.0.18", - "tokio", - "tracing", - "tracing-subscriber", -] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] name = "equivalent" @@ -1030,6 +1030,8 @@ checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", "hashbrown 0.17.1", + "serde", + "serde_core", ] [[package]] @@ -1352,6 +1354,29 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "prices-ledger-processor" +version = "0.1.0" +dependencies = [ + "aws_lambda_events", + "clap", + "extractors-core", + "hex", + "lambda_runtime", + "ledger-processor", + "phoenix-extractor", + "serde", + "serde_json", + "soroswap-extractor", + "stellar-xdr", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tracing", + "tracing-subscriber", + "xdr-parser", +] + [[package]] name = "proc-macro-crate" version = "3.5.0" @@ -1687,6 +1712,30 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "schemars" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -1812,6 +1861,38 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a5c54c7310e7b8b9577c286d7e399ddd876c3e12b3ed917a8aabc4b96e9e8c" +dependencies = [ + "base64", + "bs58", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.14.0", + "schemars 0.9.0", + "schemars 1.2.1", + "serde_core", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d57bc0c8b9a17920c178daa6bb924850d54a9c97ab45194bb8c17ad66bb660" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "sha2" version = "0.10.9" diff --git a/packages/prices-ledger-processor/Cargo.toml b/packages/prices-ledger-processor/Cargo.toml index 512dc6d..cad8deb 100644 --- a/packages/prices-ledger-processor/Cargo.toml +++ b/packages/prices-ledger-processor/Cargo.toml @@ -20,6 +20,7 @@ path = "src/bin/cli.rs" extractors-core = { path = "../extractors-core" } ledger-processor = { path = "../ledger-processor" } phoenix-extractor = { path = "../phoenix-extractor" } +soroswap-extractor = { path = "../soroswap-extractor" } stellar-xdr = { workspace = true } xdr-parser = { workspace = true } diff --git a/packages/prices-ledger-processor/src/bin/cli.rs b/packages/prices-ledger-processor/src/bin/cli.rs index 4dd2ece..e404cad 100644 --- a/packages/prices-ledger-processor/src/bin/cli.rs +++ b/packages/prices-ledger-processor/src/bin/cli.rs @@ -10,6 +10,7 @@ use prices_ledger_processor::{ reconcile::Reconciler, sink::{SqlFileSink, StdoutJsonSink}, }; +use soroswap_extractor::SoroswapPoolRegistry; use tracing::info; #[derive(Parser, Debug)] @@ -73,6 +74,7 @@ async fn main() -> Result<(), Box> { decoder: XdrLedgerDecoder, venue_registry: VenueRegistry::new(), phoenix_registry: PhoenixPoolRegistry::default(), + soroswap_registry: SoroswapPoolRegistry::new(), }; reconciler.run(args.max_iterations).await? } @@ -84,6 +86,7 @@ async fn main() -> Result<(), Box> { decoder: XdrLedgerDecoder, venue_registry: VenueRegistry::new(), phoenix_registry: PhoenixPoolRegistry::default(), + soroswap_registry: SoroswapPoolRegistry::new(), }; reconciler.run(args.max_iterations).await? } diff --git a/packages/prices-ledger-processor/src/main.rs b/packages/prices-ledger-processor/src/main.rs index 5e1a6cd..705e153 100644 --- a/packages/prices-ledger-processor/src/main.rs +++ b/packages/prices-ledger-processor/src/main.rs @@ -29,6 +29,7 @@ use prices_ledger_processor::{ cursor::StubFileCursor, decode::XdrLedgerDecoder, object_fetcher::LocalDiskFetcher, reconcile::Reconciler, sink::StdoutJsonSink, }; +use soroswap_extractor::SoroswapPoolRegistry; use tracing::{error, info}; const ENV_FIXTURES_DIR: &str = "FIXTURES_DIR"; @@ -72,6 +73,7 @@ async fn main() -> Result<(), Error> { decoder: XdrLedgerDecoder, venue_registry: VenueRegistry::new(), phoenix_registry: PhoenixPoolRegistry::default(), + soroswap_registry: SoroswapPoolRegistry::new(), }); lambda_runtime::run(service_fn(move |event: LambdaEvent| { diff --git a/packages/prices-ledger-processor/src/reconcile.rs b/packages/prices-ledger-processor/src/reconcile.rs index c32e62e..ddc60cd 100644 --- a/packages/prices-ledger-processor/src/reconcile.rs +++ b/packages/prices-ledger-processor/src/reconcile.rs @@ -14,6 +14,7 @@ use std::future::Future; use extractors_core::{SorobanEventRow, VenueRegistry}; use ledger_processor::dispatch::{DispatchError, dispatch}; use phoenix_extractor::PhoenixPoolRegistry; +use soroswap_extractor::SoroswapPoolRegistry; use tracing::{info, warn}; use crate::bucket::Bucketer; @@ -67,6 +68,7 @@ pub struct Reconciler { pub decoder: D, pub venue_registry: VenueRegistry, pub phoenix_registry: PhoenixPoolRegistry, + pub soroswap_registry: SoroswapPoolRegistry, } impl Reconciler @@ -104,8 +106,12 @@ where let mut max_seq = current; for ledger in ledgers { for group in &ledger.event_groups { - let trades = match dispatch(group, &self.venue_registry, &self.phoenix_registry) - { + let trades = match dispatch( + group, + &self.venue_registry, + &self.phoenix_registry, + &self.soroswap_registry, + ) { Ok(t) => t, Err(DispatchError::VenueNotImplemented { venue, contract_id }) => { warn!(?venue, %contract_id, "venue extractor not yet implemented — skipping"); diff --git a/packages/prices-ledger-processor/tests/reconcile_e2e.rs b/packages/prices-ledger-processor/tests/reconcile_e2e.rs index a73367e..5d6a81b 100644 --- a/packages/prices-ledger-processor/tests/reconcile_e2e.rs +++ b/packages/prices-ledger-processor/tests/reconcile_e2e.rs @@ -10,6 +10,7 @@ use prices_ledger_processor::{ reconcile::{DecodedLedger, LedgerDecoder, Reconciler}, sink::{OhlcvSink, SinkError}, }; +use soroswap_extractor::SoroswapPoolRegistry; use tempfile::tempdir; struct CaptureSink { @@ -83,6 +84,7 @@ async fn empty_fixture_dir_no_op_returns_zero_persisted() { decoder: EmptyDecoder, venue_registry: VenueRegistry::new(), phoenix_registry: PhoenixPoolRegistry::default(), + soroswap_registry: SoroswapPoolRegistry::new(), }; let stats = reconciler.run(8).await.unwrap(); @@ -119,6 +121,7 @@ async fn contiguous_run_advances_cursor_until_gap() { decoder: EmptyDecoder, venue_registry: VenueRegistry::new(), phoenix_registry: PhoenixPoolRegistry::default(), + soroswap_registry: SoroswapPoolRegistry::new(), }; let stats = reconciler.run(8).await.unwrap(); @@ -156,6 +159,7 @@ async fn unknown_contract_dispatch_does_not_fail() { decoder: SingleEmptyGroupDecoder, venue_registry: VenueRegistry::new(), phoenix_registry: PhoenixPoolRegistry::default(), + soroswap_registry: SoroswapPoolRegistry::new(), }; let stats = reconciler.run(2).await.unwrap(); @@ -189,6 +193,7 @@ async fn idempotent_on_re_run_from_same_cursor() { decoder: EmptyDecoder, venue_registry: VenueRegistry::new(), phoenix_registry: PhoenixPoolRegistry::default(), + soroswap_registry: SoroswapPoolRegistry::new(), }; reconciler.run(8).await.unwrap() }; From da87008ba80bf778675c1ef83bee4a4a343178c2 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Wed, 24 Jun 2026 14:48:55 +0200 Subject: [PATCH 13/17] feat(lore-0038): reuse tested ingest core for live ledger processor Extract packages/prices-ingest-core (decode/canonical/price/tick/bucket/ filter/soroban + a transport-agnostic OhlcvWriter) out of sdex-backfill and repoint both crates at it, so the live Lambda and the backfill emit byte-identical prices.price_ohlcv_1m rows (shared surrogate asset ids, SAC->classic collapse, preferred-quote orientation, Decimal/version). The prototype's hand-rolled decode/bucket diverged from the tested code (String ids, lexicographic orientation, f64) and would split liquidity once written to the shared table. Refactor the Lambda onto the core and add the two data-plane seams that 0052/0063 unblocked: an S3 ObjectFetcher (aws-sdk-s3, lambda feature) and a ClickHouseSink over prices-clickhouse::mtls (aws-mtls feature, the task-0052 client). Default build stays lean (no rustls/lambda); --features lambda compiles the full SQS-doorbell + S3 + mTLS path. Prepare-only -- no deploy, no prod writes (Part E still gated on BE 0227 + task 0047). Tests: 13 core + 5 sdex (regression gate) + 15 lambda-unit + 3 real-fixture e2e; fmt + clippy clean. Spawns 0064/0065/0066. --- .../0038-lambda-prototype}/bucket.rs | 0 .../0038-lambda-prototype}/decode.rs | 0 .../0038-lambda-prototype}/sink/sql_file.rs | 0 .../0038-lambda-prototype}/sink/stdout.rs | 0 Cargo.lock | 1275 ++++++++++++++++- Cargo.toml | 9 + .../README.md | 144 +- ...E_ch-backed-cursor-for-ledger-processor.md | 45 + ...65_FEATURE_periodic-ohlcv-reaggregation.md | 49 + ...r-processor-rustfunction-and-lag-metric.md | 46 + packages/prices-ingest-core/Cargo.toml | 28 + .../src/bucket.rs | 0 .../src/canonical.rs | 0 packages/prices-ingest-core/src/decode.rs | 29 + packages/prices-ingest-core/src/error.rs | 18 + .../src/filter.rs | 0 packages/prices-ingest-core/src/lib.rs | 43 + .../src/price.rs | 0 .../src/soroban.rs | 2 +- .../src/tick.rs | 0 packages/prices-ingest-core/src/writer.rs | 256 ++++ packages/prices-ledger-processor/Cargo.toml | 38 +- packages/prices-ledger-processor/README.md | 91 ++ .../prices-ledger-processor/src/bin/cli.rs | 97 +- packages/prices-ledger-processor/src/lib.rs | 20 +- packages/prices-ledger-processor/src/main.rs | 97 +- .../src/object_fetcher/mod.rs | 6 + .../src/object_fetcher/s3.rs | 64 + .../prices-ledger-processor/src/reconcile.rs | 199 +-- .../prices-ledger-processor/src/sink/mod.rs | 162 ++- .../tests/reconcile_e2e.rs | 254 ++-- packages/sdex-backfill/Cargo.toml | 6 +- packages/sdex-backfill/src/error.rs | 3 + packages/sdex-backfill/src/ingest.rs | 9 +- packages/sdex-backfill/src/main.rs | 6 - packages/sdex-backfill/src/run.rs | 4 +- packages/sdex-backfill/src/sink.rs | 264 +--- 37 files changed, 2584 insertions(+), 680 deletions(-) rename {packages/prices-ledger-processor/src => .trash/0038-lambda-prototype}/bucket.rs (100%) rename {packages/prices-ledger-processor/src => .trash/0038-lambda-prototype}/decode.rs (100%) rename {packages/prices-ledger-processor/src => .trash/0038-lambda-prototype}/sink/sql_file.rs (100%) rename {packages/prices-ledger-processor/src => .trash/0038-lambda-prototype}/sink/stdout.rs (100%) create mode 100644 lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md create mode 100644 lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md create mode 100644 lore/1-tasks/backlog/0066_FEATURE_ledger-processor-rustfunction-and-lag-metric.md create mode 100644 packages/prices-ingest-core/Cargo.toml rename packages/{sdex-backfill => prices-ingest-core}/src/bucket.rs (100%) rename packages/{sdex-backfill => prices-ingest-core}/src/canonical.rs (100%) create mode 100644 packages/prices-ingest-core/src/decode.rs create mode 100644 packages/prices-ingest-core/src/error.rs rename packages/{sdex-backfill => prices-ingest-core}/src/filter.rs (100%) create mode 100644 packages/prices-ingest-core/src/lib.rs rename packages/{sdex-backfill => prices-ingest-core}/src/price.rs (100%) rename packages/{sdex-backfill => prices-ingest-core}/src/soroban.rs (99%) rename packages/{sdex-backfill => prices-ingest-core}/src/tick.rs (100%) create mode 100644 packages/prices-ingest-core/src/writer.rs create mode 100644 packages/prices-ledger-processor/README.md create mode 100644 packages/prices-ledger-processor/src/object_fetcher/s3.rs diff --git a/packages/prices-ledger-processor/src/bucket.rs b/.trash/0038-lambda-prototype/bucket.rs similarity index 100% rename from packages/prices-ledger-processor/src/bucket.rs rename to .trash/0038-lambda-prototype/bucket.rs diff --git a/packages/prices-ledger-processor/src/decode.rs b/.trash/0038-lambda-prototype/decode.rs similarity index 100% rename from packages/prices-ledger-processor/src/decode.rs rename to .trash/0038-lambda-prototype/decode.rs diff --git a/packages/prices-ledger-processor/src/sink/sql_file.rs b/.trash/0038-lambda-prototype/sink/sql_file.rs similarity index 100% rename from packages/prices-ledger-processor/src/sink/sql_file.rs rename to .trash/0038-lambda-prototype/sink/sql_file.rs diff --git a/packages/prices-ledger-processor/src/sink/stdout.rs b/.trash/0038-lambda-prototype/sink/stdout.rs similarity index 100% rename from packages/prices-ledger-processor/src/sink/stdout.rs rename to .trash/0038-lambda-prototype/sink/stdout.rs diff --git a/Cargo.lock b/Cargo.lock index cfc4e90..9b59690 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,6 +22,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -88,6 +94,15 @@ dependencies = [ "extractors-core", ] +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + [[package]] name = "arrayvec" version = "0.7.6" @@ -128,6 +143,49 @@ version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" +[[package]] +name = "aws-config" +version = "1.8.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33f815b73a3899c03b380d543532e5865f230dce9678d108dc10732a8682275" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.4.0", + "sha1 0.10.6", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + [[package]] name = "aws-lc-rs" version = "1.17.0" @@ -150,6 +208,414 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "aws-runtime" +version = "1.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c9b9de216a988dd54b754a82a7660cfe14cee4f6782ae4524470972fa0ccb39" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-s3" +version = "1.137.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd7213994e2ff9382ff100403b78c30d1b74cdfcd8fa9d0d1dc3a94a5c4874" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac 0.13.0", + "http 0.2.12", + "http 1.4.0", + "http-body 1.0.1", + "lru", + "percent-encoding", + "regex-lite", + "sha2 0.11.0", + "tracing", + "url", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.102.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c82b3ac19f1431854f7ace3a7531674633e286bfdde21976893bfee36fd493b" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.104.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "321000d2b4c5519ee573f73167f612efd7329322d9b26969ad1979f0427f1913" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.107.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d0d328ba962af23ecfa3c9f23b98d3d35e325fa218d7f13d17a6bf522f8a560" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bae38512beae0ffee7010fc24e7a8a123c53efdfef42a61e80fda4882418dc71" +dependencies = [ + "aws-credential-types", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "crypto-bigint", + "form_urlencoded", + "hex", + "hmac 0.13.0", + "http 0.2.12", + "http 1.4.0", + "p256", + "percent-encoding", + "sha2 0.11.0", + "subtle", + "time", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-checksums" +version = "0.64.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9e8e65f4f81fcccdeb6c3eca2af17ac21d421a1786a26a394aecf421d616d3a" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "md-5", + "pin-project-lite", + "sha1 0.11.0", + "sha2 0.11.0", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78d8391e65fcea47c586a22e1a41f173b38615b112b2c6b7a44e80cec3e6b706" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231" +dependencies = [ + "aws-smithy-eventstream", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3ef8931ad1c98aa6a55b4256f847f3116090819844e0dd41ea682cac5dd2d3" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.15", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.9.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.9", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.40", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower 0.5.3", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.62.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "701a947f4797e52a911e114a898667c746c39feea467bbd1abd7b3721f702ffa" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e6f5caf6fea86f8c2206541ab5857cfcda9013426cdbe8fa0098b9e2d32182" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9db177daa6ba8afb9ee1aefcf548c907abcf52065e394ee11a92780057fe0e8c" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api-macros", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-runtime-api-macros" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7396fd9500589e62e460e987ecb671bad374934e55ec3b5f498cc7a8a8a7b7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "aws-smithy-schema" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7442cb268338f0eb8278140a107c046756aa01093d8ef5e99628d34ae09c94f5" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "http 1.4.0", +] + +[[package]] +name = "aws-smithy-types" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b42fcf341259d85ca10fac9a2f6448a8ec691c6955a18e45bc3b71a85fab85" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d16bf10b03a3c01e6b3b7d47cd964e873ffe9e7d4e80fad16bd4c077cb068531" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "rustc_version", + "tracing", +] + [[package]] name = "aws_lambda_events" version = "0.16.1" @@ -162,12 +628,34 @@ dependencies = [ "serde_with", ] +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bitflags" version = "2.11.1" @@ -195,6 +683,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2f6c7dbe95a6ed67ad9f18e57daf93a2f034c524b99fd2b76d18fdfeb6660aa" +dependencies = [ + "hybrid-array", +] + [[package]] name = "borsh" version = "1.6.1" @@ -271,6 +768,16 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + [[package]] name = "cc" version = "1.2.62" @@ -379,7 +886,7 @@ dependencies = [ "futures", "futures-channel", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "lz4_flex", "quanta", @@ -413,12 +920,40 @@ dependencies = [ "cc", ] +[[package]] +name = "cmov" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c9ea0ac24bc397ab3c98583a3c9ba74fa56b09a4449bbe172b9b1ddb016027a" + [[package]] name = "colorchoice" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -434,6 +969,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crate-git-revision" version = "0.0.6" @@ -445,12 +989,43 @@ dependencies = [ "serde_json", ] +[[package]] +name = "crc-fast" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e75b2483e97a5a7da73ac68a05b629f9c53cff58d8ed1c77866079e18b00dba5" +dependencies = [ + "digest 0.10.7", + "spin", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "generic-array", + "rand_core", + "subtle", + "zeroize", +] + [[package]] name = "crypto-common" version = "0.1.7" @@ -461,6 +1036,24 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-common" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "ctutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" +dependencies = [ + "cmov", +] + [[package]] name = "darling" version = "0.23.0" @@ -501,6 +1094,17 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid 0.9.6", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.8" @@ -517,8 +1121,22 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "crypto-common", + "block-buffer 0.10.4", + "const-oid 0.9.6", + "crypto-common 0.1.7", + "subtle", +] + +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.1", + "const-oid 0.10.2", + "crypto-common 0.2.2", + "ctutils", ] [[package]] @@ -555,6 +1173,46 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" +dependencies = [ + "der", + "digest 0.10.7", + "elliptic-curve", + "rfc6979", + "signature", + "spki", +] + +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + +[[package]] +name = "elliptic-curve" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct", + "crypto-bigint", + "digest 0.10.7", + "ff", + "generic-array", + "group", + "pem-rfc7468", + "pkcs8", + "rand_core", + "sec1", + "subtle", + "zeroize", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -596,12 +1254,34 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" +[[package]] +name = "ff" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +dependencies = [ + "rand_core", + "subtle", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -719,6 +1399,7 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", + "zeroize", ] [[package]] @@ -744,6 +1425,55 @@ dependencies = [ "wasip2", ] +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff", + "rand_core", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap 2.14.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb093c84e8bd9b188d4c4a8cb6579fc016968d14c99882163cd3ff402a4f155" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", + "indexmap 2.14.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -753,6 +1483,17 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "hashbrown" version = "0.17.1" @@ -771,6 +1512,35 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.7", +] + +[[package]] +name = "hmac" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" +dependencies = [ + "digest 0.11.3", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.4.0" @@ -781,6 +1551,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -788,7 +1569,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.4.0", ] [[package]] @@ -799,8 +1580,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -810,15 +1591,54 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f056c8559e3757392c8d091e796416e4649d8e49e88b8d76df6c002f05027fd" dependencies = [ - "http", - "serde", + "http 1.4.0", + "serde", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", ] [[package]] -name = "httparse" -version = "1.10.1" +name = "hyper" +version = "0.14.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] [[package]] name = "hyper" @@ -830,8 +1650,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "http", - "http-body", + "h2 0.4.15", + "http 1.4.0", + "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", @@ -840,18 +1661,34 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ - "http", - "hyper", + "http 1.4.0", + "hyper 1.9.0", "hyper-util", - "rustls", + "rustls 0.23.40", + "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", ] @@ -865,14 +1702,14 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", - "hyper", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.9.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -1084,11 +1921,11 @@ dependencies = [ "base64", "bytes", "futures", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "http-serde", - "hyper", + "hyper 1.9.0", "hyper-util", "lambda_runtime_api_client", "pin-project", @@ -1111,10 +1948,10 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "tokio", "tower 0.4.13", @@ -1173,6 +2010,15 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lru" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" +dependencies = [ + "hashbrown 0.16.1", +] + [[package]] name = "lz4_flex" version = "0.11.6" @@ -1188,6 +2034,16 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "md-5" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" +dependencies = [ + "cfg-if", + "digest 0.11.3", +] + [[package]] name = "memchr" version = "2.8.0" @@ -1220,6 +2076,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441" +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1241,6 +2106,30 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + +[[package]] +name = "p256" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b" +dependencies = [ + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2 0.10.9", +] + [[package]] name = "parking_lot" version = "0.12.5" @@ -1264,6 +2153,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1304,6 +2202,22 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.33" @@ -1339,10 +2253,10 @@ name = "prices-clickhouse" version = "0.1.0" dependencies = [ "clickhouse", - "hyper-rustls", + "hyper-rustls 0.27.9", "hyper-util", "reqwest", - "rustls", + "rustls 0.23.40", "rustls-pemfile", "rustls-pki-types", "serde", @@ -1355,26 +2269,54 @@ dependencies = [ ] [[package]] -name = "prices-ledger-processor" +name = "prices-ingest-core" version = "0.1.0" dependencies = [ - "aws_lambda_events", - "clap", + "aquarius-extractor", + "clickhouse", "extractors-core", - "hex", - "lambda_runtime", "ledger-processor", "phoenix-extractor", + "prices-clickhouse", + "rust_decimal", "serde", "serde_json", + "sha2 0.10.9", "soroswap-extractor", + "stellar-strkey", "stellar-xdr", + "thiserror 2.0.18", + "tracing", + "xdr-parser", +] + +[[package]] +name = "prices-ledger-processor" +version = "0.1.0" +dependencies = [ + "aws-config", + "aws-sdk-s3", + "aws_lambda_events", + "clap", + "lambda_runtime", + "prices-clickhouse", + "prices-ingest-core", + "serde", + "serde_json", "tempfile", "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", - "xdr-parser", +] + +[[package]] +name = "primeorder" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6" +dependencies = [ + "elliptic-curve", ] [[package]] @@ -1530,6 +2472,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + [[package]] name = "regex-syntax" version = "0.8.10" @@ -1560,10 +2508,10 @@ dependencies = [ "base64", "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "js-sys", "log", @@ -1583,6 +2531,16 @@ dependencies = [ "web-sys", ] +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac 0.12.1", + "subtle", +] + [[package]] name = "ring" version = "0.17.14" @@ -1643,6 +2601,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "1.1.4" @@ -1656,6 +2623,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.40" @@ -1665,11 +2644,23 @@ dependencies = [ "aws-lc-rs", "once_cell", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.13", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "2.2.0" @@ -1688,6 +2679,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.13" @@ -1712,6 +2713,15 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "schemars" version = "0.9.0" @@ -1742,22 +2752,28 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "sdex-backfill" version = "0.1.0" dependencies = [ - "aquarius-extractor", "clap", "clickhouse", - "extractors-core", - "ledger-processor", - "phoenix-extractor", "prices-clickhouse", + "prices-ingest-core", "rust_decimal", "serde", "serde_json", - "sha2", - "soroswap-extractor", + "sha2 0.10.9", "stellar-strkey", "stellar-xdr", "thiserror 2.0.18", @@ -1784,6 +2800,49 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" @@ -1893,6 +2952,28 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha1" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", +] + [[package]] name = "sha2" version = "0.10.9" @@ -1900,8 +2981,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", - "digest", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", ] [[package]] @@ -1929,6 +3021,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest 0.10.7", + "rand_core", +] + [[package]] name = "simdutf8" version = "0.1.5" @@ -1947,6 +3049,16 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.3" @@ -1964,6 +3076,22 @@ dependencies = [ "extractors-core", ] +[[package]] +name = "spin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -1997,7 +3125,7 @@ dependencies = [ "escape-bytes", "ethnum", "hex", - "sha2", + "sha2 0.10.9", "stellar-strkey", ] @@ -2191,7 +3319,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] @@ -2207,13 +3335,23 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.40", "tokio", ] @@ -2228,6 +3366,19 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml_datetime" version = "1.1.1+spec-1.1.0" @@ -2297,8 +3448,8 @@ dependencies = [ "bitflags", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", "tower 0.5.3", "tower-layer", @@ -2429,6 +3580,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -2463,6 +3620,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "want" version = "0.3.1" @@ -2765,12 +3928,18 @@ dependencies = [ "hex", "serde", "serde_json", - "sha2", + "sha2 0.10.9", "stellar-xdr", "tracing", "zstd", ] +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "yoke" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 79a4690..a124b6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "packages/soroswap-extractor", "packages/aquarius-extractor", "packages/ledger-processor", + "packages/prices-ingest-core", "packages/prices-ledger-processor", ] @@ -25,3 +26,11 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" rust_decimal = { version = "1", features = ["serde-with-str"] } thiserror = "2" + +# AWS / Lambda — used only by the live Prices Ledger Processor (task 0038), +# behind its `lambda` / `aws-mtls` cargo features so default builds stay lean. +lambda_runtime = "0.13" +aws-config = "1" +aws-sdk-s3 = "1" +aws_lambda_events = { version = "0.16", default-features = false, features = ["sqs"] } +tempfile = "3" diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md index eec9ecc..0cac8ad 100644 --- a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md @@ -133,6 +133,30 @@ history: the cross-team artefact tracked by task 0050. Moved back to active for continued local-scope work; the production AWS wiring (Part E) stays gated on BE 0227 + task 0047. + - date: 2026-06-24 + status: active + who: oski + note: > + Refactored the Lambda onto the **shared, tested ingestion core** + and landed the two production data-plane seams 0052/0063 unblocked. + The prototype's hand-rolled decode/bucket/canonicalisation diverged + from the tested `sdex-backfill` (String asset ids + lexicographic + orientation + f64, vs the real `price_ohlcv_1m`'s UInt32 surrogate + ids + SAC→classic collapse + Decimal/version) — so writing it to the + **shared** `prices.price_ohlcv_1m` would split liquidity. Extracted + `packages/prices-ingest-core` (canonical/price/tick/bucket/filter/ + soroban + the transport-agnostic `OhlcvWriter`) out of sdex-backfill + and repointed both crates at it, so live + backfill now emit + byte-identical rows. Replaced the prototype `bucket.rs`/`decode.rs`/ + stdout+sql_file sinks (→ `.trash/`) with: a core-backed reconcile + loop, an `S3Fetcher` (`aws-sdk-s3`, `lambda` feature), and a + `ClickHouseSink` over `prices-clickhouse::mtls` (`aws-mtls` feature, + the task-0052 client). Default build stays lean (no rustls/lambda); + `--features lambda` compiles the full SQS-doorbell + S3 + mTLS path. + Tests: 13 core + 5 sdex (regression gate green) + 15 lambda-unit + 3 + real-fixture e2e (decode→bucket→cursor, gap-stop, idempotent). fmt + + clippy clean. **Prepare-only — no deploy, no prod writes** (Part E + deploy/cert/Caddy still gated on BE 0227 + task 0047). Stays active. --- # Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv @@ -275,26 +299,34 @@ In `infra/aws-cdk/` (created by 0011): ## Acceptance Criteria -- [ ] `packages/prices-ledger-processor` binary builds against - `provided.al2` (cargo lambda or equivalent). -- [ ] Lambda is registered as a second S3 notification target on - BE's `stellar-ledger-data/` bucket via CDK; no conflict - with BE's own Ledger Processor registration. -- [ ] Given a recorded `LedgerCloseMeta` containing ≥1 Soroban - AMM swap and ≥1 SDEX trade, the binary writes the expected - 1-min `price_ohlcv` rows via UPSERT with the ADR 0003 PK - shape and ADR 0004 multi-source columns. -- [ ] Re-invoking with the same ledger event is idempotent: row - counts and column values unchanged (incremental-merge - preserves `open`, refreshes `close`, etc.). -- [ ] `prices.ledger_processor.lag_seconds` metric published to - CloudWatch; alarm wired up to fire on >60s sustained lag. -- [ ] Integration test covers: S3 event → fetched object → - decoded XDR → dispatched extract → UPSERTed row against - a local Postgres mirroring the 0011 schema. -- [ ] Docs: README in `packages/prices-ledger-processor` - describing the S3 event contract, the BE-coordination - step for bucket notifications, and the SSM keys consumed. +> Criteria below are the post-ADR-0007 (CH + mTLS + ReplacingMergeTree) +> shape; the original RDS/UPSERT wording is superseded. + +- [x] `packages/prices-ledger-processor` builds; the `lambda`-feature + binary compiles the full `provided.al2023` path (S3 + mTLS + + `lambda_runtime`). `cargo lambda` ZIP packaging is the deploy step. +- [x] Decode → extract → bucket → write reuses the **tested** + `prices-ingest-core` (same code as `sdex-backfill`), so live rows + match the backfill: ADR 0003 PK (`asset_id, quote_asset_id, + source, timestamp`), ADR 0004 multi-source columns, UInt32 + surrogate ids with SAC→classic collapse, `Decimal(38,14)`. +- [x] Re-invoking from the same cursor is idempotent — proven by the + `idempotent_on_re_run_from_same_cursor` e2e test (deterministic + candle set + `version` → ReplacingMergeTree collapses re-inserts). +- [x] Real-fixture integration test: S3-equivalent object → + `decode_object` → dispatch/extract → bucketed candles → cursor + advance, gap-stop, idempotency (`tests/reconcile_e2e.rs`, + self-skips when fixtures absent). +- [x] mTLS sink goes through `prices-clickhouse::mtls` (task 0052), not + reinvented; CH error bodies redacted via `safe_log` before logging. +- [x] Docs: `packages/prices-ledger-processor/README.md` — S3/SNS event + contract, BE-coordination (task 0050), env-var/SSM keys consumed. +- [x] Lambda registered as the prices SNS→SQS doorbell target via CDK + (`infra/.../compute-stack.ts`, prepare-only — 2026-06-10). +- [ ] `prices.ledger_processor.lag_seconds` CloudWatch metric + >60s + alarm — **deferred** (CW emit is a deploy concern; spec Part E). +- [ ] Live mTLS write against the Hetzner `prices` DB — **deferred** + (prepare-not-deploy; transport already proven by task 0052's smoke). ## Blocked on @@ -361,6 +393,44 @@ Env-var contract sourced from `/platform/{env}/*` SSM at deploy no deploy** (gated on BE 0227 + task 0047 + BE publishing the SSM keys/topic). +**Shared-core refactor + data-plane seams (2026-06-24).** The +prototype reimplemented decode/bucket/canonicalisation by hand and it +diverged from the tested `sdex-backfill` — fatal once a real sink +writes to the *shared* `prices.price_ohlcv_1m` (different asset ids + +orientation → split liquidity). Fixed by extracting +`packages/prices-ingest-core` (the tested `canonical`/`price`/`tick`/ +`bucket`/`filter`/`soroban` modules + a transport-agnostic +`OhlcvWriter` split out of the backfill `Sink`) and repointing **both** +`sdex-backfill` and this Lambda at it. The Lambda now keeps only its +transport shell: +- `src/reconcile.rs` — doorbell-cursor loop calling `prices_ingest_core` + (`extract_trades` + `process_ledger` → `CandleAccumulator`), warm + `AssetRegistry` + `Registries` loaded from `prices.assets` at cold + start, accumulate across the contiguous run, flush + advance cursor + **last**. +- `src/object_fetcher/s3.rs` — `S3Fetcher` (`aws-sdk-s3` GetObject; + `NoSuchKey`→gap), `lambda` feature. +- `src/sink/mod.rs` — `ClickHouseSink` over the shared `OhlcvWriter`; + `plaintext` (local) and `from_lambda_env` (mTLS via + `prices-clickhouse::mtls`, `aws-mtls` feature); writes retried via + `retry.rs`, CH errors redacted via `safe_log`. +- `src/bin/cli.rs` — local fixture runner (`--dry-run` counts; else + writes to local plaintext CH). +- `src/main.rs` — SQS-doorbell entrypoint (`lambda` feature, eager + cold-start init). + +Retired to `.trash/0038-lambda-prototype/`: `bucket.rs`, `decode.rs`, +`sink/{sql_file,stdout}.rs`. Feature matrix: `default` lean (no +rustls/lambda), `aws-mtls`, `lambda` (= `aws-mtls` + runtime + S3). +Tests: 13 core + 5 sdex (regression gate) + 15 lambda-unit + 3 +real-fixture e2e. fmt + clippy clean. + +**Broken/modified tests:** `tests/reconcile_e2e.rs` rewritten — the old +synthetic-`LedgerDecoder` fakes are gone (the decode seam was removed in +favour of the shared `decode_object`); it now drives the real pipeline +over the three bundled fixture ledgers (62460540–542) and self-skips +when fixtures are absent. Intentional, not a regression. + ## Design Decisions ### Emerged @@ -383,10 +453,36 @@ keys/topic). Not a tunable — serial execution is the ordering guarantee, so the config validator rejects any other value rather than letting a typo silently break ordering at deploy. +4. **Refactor onto the shared core instead of keeping the prototype's + own decode/bucket (2026-06-24).** The user-confirmed call: a real + sink writing the prototype's String-id/f64/lexicographic rows to the + *shared* `prices.price_ohlcv_1m` would not match the backfill → + split liquidity. Resolved by extracting `prices-ingest-core` and + reusing it (partial "reconcile" of the live path onto the tested + code), not by reconciling ids inside the sink. Realises the task's + own Notes ask ("keep the merge SQL in a shared module so live + + backfill writers stay in sync"). +5. **`OhlcvWriter` takes a `clickhouse::Client`, not a URL.** Lets the + one writer serve both the plaintext local client and the task-0052 + mTLS client (both are `clickhouse::Client`) — the audit rule that + every remote CH access goes through `prices-clickhouse::mtls` holds. +6. **Candles accumulate across the whole contiguous run, flushed once.** + Matches the backfill's per-chunk accumulation so intra-run minutes + aggregate. A minute split across two *separate* invocations lands as + two `version`-keyed rows (RMT keeps the latest) — the same + characteristic the backfill has across partition boundaries; the fix + is a periodic re-aggregation (spawned as backlog). ## Future Work -- Adopt `cargo-lambda-cdk` `RustFunction` (drop the `fromAsset` seam). -- Production-rewrite punch-list — see spec Part E (gated on BE 0227 + - task 0047): S3-client `ObjectFetcher`, CH-backed cursor (spec D.1), - mTLS CH `OhlcvSink`, CW lag alarm, end-to-end smoke. +> Each item below is spawned as a backlog task (don't leave as prose). + +- **0064** — CH-backed cursor (replace `StubFileCursor`; spec D.1). +- **0065** — periodic OHLCV re-aggregation for cross-invocation / + cross-chunk intra-minute candles (live + backfill share the gap). +- **0066** — `cargo-lambda-cdk` `RustFunction` + CloudWatch + `lag_seconds` metric/alarm, and unify the dual rustls (0.21 from + `aws-sdk-s3` vs 0.23 from mTLS) to shrink the Lambda ZIP. +- Production deploy + live end-to-end smoke — spec Part E, still gated + on BE 0227 + task 0047 (not a standalone backlog item; unblocks with + those gates). diff --git a/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md b/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md new file mode 100644 index 0000000..2881286 --- /dev/null +++ b/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md @@ -0,0 +1,45 @@ +--- +id: "0064" +title: "ClickHouse-backed cursor for the Prices Ledger Processor" +type: FEATURE +status: backlog +related_adr: ["0007"] +related_tasks: ["0038"] +tags: [layer-indexing, priority-medium, effort-small, lambda, clickhouse, cursor] +links: + - "../active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md" +history: + - date: 2026-06-24 + status: backlog + who: oski + note: "Spawned from 0038 future work (spec Part D.1)." +--- + +# ClickHouse-backed cursor for the Prices Ledger Processor + +## Summary + +Replace the Lambda's `StubFileCursor` (a `/tmp` file, lost on cold start) +with a durable cursor read from / written to ClickHouse, so the +doorbell-cursor reconcile loop resumes correctly across container churn. + +## Context + +Task 0038 ships with `StubFileCursor` as a placeholder. The production +cursor design is the open question in `G-local-prototype-spec.md` Part D.1. +BE's cursor is `max(sequence) FROM default.ledgers`; we only persist +pricing-relevant ledgers, so `max(...) FROM prices.price_ohlcv_1m` undercounts. + +## Implementation + +- Lean: own single-row `prices.processed_ledgers` (ReplacingMergeTree, + updated last per run — D.1 option 1). +- Implement `Cursor` over `prices-clickhouse` (mTLS client); wire into + `main.rs` in place of `StubFileCursor`. +- Decide seed-on-empty behaviour (env `INITIAL_CURSOR` vs first-S3-probe). + +## Acceptance Criteria + +- [ ] `prices.processed_ledgers` (or chosen design) added to the schema. +- [ ] CH `Cursor` impl; reconcile resumes from CH across cold starts. +- [ ] Idempotent: re-run from the persisted cursor is a no-op past the tip. diff --git a/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md b/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md new file mode 100644 index 0000000..ad32664 --- /dev/null +++ b/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md @@ -0,0 +1,49 @@ +--- +id: "0065" +title: "Periodic OHLCV re-aggregation for cross-chunk intra-minute candles" +type: FEATURE +status: backlog +related_adr: ["0004", "0007"] +related_tasks: ["0038", "0039"] +tags: [layer-indexing, priority-medium, effort-medium, clickhouse, ohlcv] +links: + - "../archive/0048_RESEARCH_soroban-events-pricing-decoder-spec/notes/G-soroban-events-pricing-decoder.md" +history: + - date: 2026-06-24 + status: backlog + who: oski + note: "Spawned from 0038 future work (cross-invocation intra-minute merge gap)." +--- + +# Periodic OHLCV re-aggregation for cross-chunk intra-minute candles + +## Summary + +Close the intra-minute aggregation gap shared by **both** writers: the live +Lambda (per contiguous run) and the backfill (per partition) accumulate +candles in memory and flush per chunk. When a single minute spans two +chunks/invocations, two rows land with the same PK but different `version`, +and `ReplacingMergeTree(version)` keeps only the latest — dropping the other +chunk's trades for that minute. + +## Context + +`price_ohlcv_1m` is `ReplacingMergeTree(version)` keyed by +`(asset_id, quote_asset_id, source, timestamp)`. RMT **replaces**, it does +not sum — so per-chunk partial candles for a boundary minute don't merge. +Negligible-but-real (one minute per chunk boundary). Same root cause for live +and backfill since both now use `prices-ingest-core`'s `CandleAccumulator`. + +## Implementation (options to evaluate) + +- A periodic worker (task 0039 family) that re-reads raw trades/`_1m FINAL` + and rewrites boundary minutes with a higher `version`; OR +- An `AggregatingMergeTree` / SummingMergeTree variant for the write path so + partial candles combine on merge; OR +- Emit candles keyed to include a chunk discriminator and re-roll at read. + +## Acceptance Criteria + +- [ ] A minute split across two runs/chunks aggregates to one correct candle. +- [ ] Fix applies to both live (0038) and backfill writers (shared core). +- [ ] Regression test with a deliberately split-minute fixture. diff --git a/lore/1-tasks/backlog/0066_FEATURE_ledger-processor-rustfunction-and-lag-metric.md b/lore/1-tasks/backlog/0066_FEATURE_ledger-processor-rustfunction-and-lag-metric.md new file mode 100644 index 0000000..b8cc792 --- /dev/null +++ b/lore/1-tasks/backlog/0066_FEATURE_ledger-processor-rustfunction-and-lag-metric.md @@ -0,0 +1,46 @@ +--- +id: "0066" +title: "Ledger Processor: RustFunction CDK + lag metric + rustls dedup" +type: FEATURE +status: backlog +related_adr: ["0006", "0007"] +related_tasks: ["0038"] +tags: [layer-indexing, priority-low, effort-small, lambda, cdk, observability] +links: + - "../active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md" +history: + - date: 2026-06-24 + status: backlog + who: oski + note: "Spawned from 0038 future work (packaging + observability + dep hygiene)." +--- + +# Ledger Processor: RustFunction CDK + lag metric + rustls dedup + +## Summary + +Three small production-hardening items for the Prices Ledger Processor that +are out of scope for the deploy-deferred build. + +## Context + +Spawned from task 0038. The Lambda is code-complete behind the `lambda` +feature; these are deploy/observability/dep-hygiene polish. + +## Implementation + +- **`cargo-lambda-cdk` `RustFunction`**: drop the `Code.fromAsset` seam in + `infra/.../compute-stack.ts` for synth-time builds (BE's exact shape) once + the dep is added. +- **`prices.ledger_processor.lag_seconds`**: emit `now() - ledger.closed_at` + per invocation to CloudWatch (namespace `prices/lambda`) + a >60s-sustained + alarm (spec §9.6 / §C.8). +- **rustls dedup**: `aws-sdk-s3` pulls rustls 0.21 (older smithy) alongside + our 0.23.40 (mTLS). Unify to one version to shrink the `provided.al2023` + ZIP — investigate aws-smithy-http-client TLS feature selection. + +## Acceptance Criteria + +- [ ] `RustFunction` synth-time build wired (no pre-built asset seam). +- [ ] `lag_seconds` metric + alarm present in the CDK synth. +- [ ] `cargo tree --features lambda` shows a single rustls version. diff --git a/packages/prices-ingest-core/Cargo.toml b/packages/prices-ingest-core/Cargo.toml new file mode 100644 index 0000000..3f61b63 --- /dev/null +++ b/packages/prices-ingest-core/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "prices-ingest-core" +version = "0.1.0" +edition = "2024" +description = "Shared ledger→OHLCV ingestion core (decode, extract, canonicalise, bucket, write) used by both the SDEX backfill CLI and the live Prices Ledger Processor Lambda" + +[lib] +name = "prices_ingest_core" +path = "src/lib.rs" + +[dependencies] +stellar-xdr = { workspace = true } +stellar-strkey = { workspace = true } +sha2 = { workspace = true } +xdr-parser = { workspace = true } +clickhouse = { workspace = true } +rust_decimal = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +tracing = { workspace = true } +thiserror = { workspace = true } + +prices-clickhouse = { path = "../prices-clickhouse" } +extractors-core = { path = "../extractors-core" } +phoenix-extractor = { path = "../phoenix-extractor" } +soroswap-extractor = { path = "../soroswap-extractor" } +aquarius-extractor = { path = "../aquarius-extractor" } +ledger-processor = { path = "../ledger-processor" } diff --git a/packages/sdex-backfill/src/bucket.rs b/packages/prices-ingest-core/src/bucket.rs similarity index 100% rename from packages/sdex-backfill/src/bucket.rs rename to packages/prices-ingest-core/src/bucket.rs diff --git a/packages/sdex-backfill/src/canonical.rs b/packages/prices-ingest-core/src/canonical.rs similarity index 100% rename from packages/sdex-backfill/src/canonical.rs rename to packages/prices-ingest-core/src/canonical.rs diff --git a/packages/prices-ingest-core/src/decode.rs b/packages/prices-ingest-core/src/decode.rs new file mode 100644 index 0000000..2f00648 --- /dev/null +++ b/packages/prices-ingest-core/src/decode.rs @@ -0,0 +1,29 @@ +//! `*.xdr.zst` object bytes → `Vec`. +//! +//! Wraps BE's `xdr-parser` (`decompress_zstd` + `deserialize_batch`) — the same +//! two calls the SDEX backfill makes per ledger file (`sdex-backfill::ingest`). +//! A Galexie object is a zstd-compressed `LedgerCloseMetaBatch`; with +//! `ledgers_per_file = 1` the returned vec is usually a single ledger, but the +//! batch shape is honoured so a multi-ledger file decodes correctly too. + +use stellar_xdr::curr::LedgerCloseMeta; + +use crate::error::IngestError; + +/// Decompress + deserialize one Galexie `*.xdr.zst` object into its ledgers. +pub fn decode_object(compressed: &[u8]) -> Result, IngestError> { + let xdr_bytes = xdr_parser::decompress_zstd(compressed)?; + let batch = xdr_parser::deserialize_batch(&xdr_bytes)?; + Ok(batch.ledger_close_metas.to_vec()) +} + +/// The ledger sequence number of a `LedgerCloseMeta` (all protocol versions). +/// The live Lambda uses this to advance its doorbell cursor to the highest +/// ledger actually processed in a run. +pub fn ledger_sequence(lcm: &LedgerCloseMeta) -> u32 { + match lcm { + LedgerCloseMeta::V0(v) => v.ledger_header.header.ledger_seq, + LedgerCloseMeta::V1(v) => v.ledger_header.header.ledger_seq, + LedgerCloseMeta::V2(v) => v.ledger_header.header.ledger_seq, + } +} diff --git a/packages/prices-ingest-core/src/error.rs b/packages/prices-ingest-core/src/error.rs new file mode 100644 index 0000000..35eb587 --- /dev/null +++ b/packages/prices-ingest-core/src/error.rs @@ -0,0 +1,18 @@ +//! Shared error type for the ingestion core. + +/// Errors raised while decoding ledgers or reading/writing ClickHouse. The +/// SDEX backfill wraps this in its own `BackfillError` (which adds the +/// S3-partition-sync variants); the Lambda surfaces it through its reconcile +/// error. Keeping the shared variants here means both binaries classify +/// transient ClickHouse failures the same way. +#[derive(Debug, thiserror::Error)] +pub enum IngestError { + #[error("io: {0}")] + Io(#[from] std::io::Error), + + #[error("xdr parse: {0}")] + Parse(#[from] xdr_parser::ParseError), + + #[error("clickhouse: {0}")] + Clickhouse(#[from] clickhouse::error::Error), +} diff --git a/packages/sdex-backfill/src/filter.rs b/packages/prices-ingest-core/src/filter.rs similarity index 100% rename from packages/sdex-backfill/src/filter.rs rename to packages/prices-ingest-core/src/filter.rs diff --git a/packages/prices-ingest-core/src/lib.rs b/packages/prices-ingest-core/src/lib.rs new file mode 100644 index 0000000..bd7a95c --- /dev/null +++ b/packages/prices-ingest-core/src/lib.rs @@ -0,0 +1,43 @@ +//! prices ingestion core — the shared ledger→OHLCV pipeline. +//! +//! This crate owns the *tested* decode → extract → canonicalise → bucket → +//! write pipeline that was first written for the SDEX historical backfill +//! (`sdex-backfill`) and is now reused verbatim by the live **Prices Ledger +//! Processor Lambda** (`prices-ledger-processor`, task 0038). Both writers go +//! through the same modules so live and backfill produce **identical** +//! `prices.price_ohlcv_1m` rows (same surrogate `asset_id`s via the +//! [`AssetRegistry`], same SAC→classic collapse, same preferred-quote +//! orientation, same `Decimal(38,14)` scaling, same `version`). Splitting this +//! into its own crate is what prevents the two paths from drifting. +//! +//! Layers, in pipeline order: +//! - [`filter`] — classic SDEX trades from `LedgerCloseMeta` operation results. +//! - [`soroban`] — Soroban AMM trades + oracle samples from contract events. +//! - [`canonical`] — asset identity, the [`AssetRegistry`] surrogate-id store, +//! and `(base, quote)` canonicalisation. +//! - [`price`] / [`tick`] — per-trade price + the [`TradeTick`] the bucketer eats. +//! - [`bucket`] — 1-minute OHLCV accumulation ([`CandleAccumulator`]). +//! - [`writer`] — the transport-agnostic ClickHouse [`OhlcvWriter`] (works with a +//! plaintext local client *or* the task-0052 mTLS client — both are a +//! `clickhouse::Client`). +//! - [`decode`] — `*.xdr.zst` object bytes → `Vec`. + +pub mod bucket; +pub mod canonical; +pub mod decode; +pub mod error; +pub mod filter; +pub mod price; +pub mod soroban; +pub mod tick; +pub mod writer; + +pub use bucket::{CandleAccumulator, OhlcvCandle}; +pub use canonical::{AssetIdentity, AssetRegistry, CanonicalPair, canonicalise}; +pub use decode::{decode_object, ledger_sequence}; +pub use error::IngestError; +pub use filter::{RawTrade, extract_trades}; +pub use price::{compute_price, stroops_to_decimal}; +pub use soroban::{LedgerSoroban, Registries, process_ledger}; +pub use tick::{TradeTick, raw_trade_to_tick}; +pub use writer::{OhlcvWriter, OracleSample}; diff --git a/packages/sdex-backfill/src/price.rs b/packages/prices-ingest-core/src/price.rs similarity index 100% rename from packages/sdex-backfill/src/price.rs rename to packages/prices-ingest-core/src/price.rs diff --git a/packages/sdex-backfill/src/soroban.rs b/packages/prices-ingest-core/src/soroban.rs similarity index 99% rename from packages/sdex-backfill/src/soroban.rs rename to packages/prices-ingest-core/src/soroban.rs index 962ebd9..e6f2601 100644 --- a/packages/sdex-backfill/src/soroban.rs +++ b/packages/prices-ingest-core/src/soroban.rs @@ -24,8 +24,8 @@ use xdr_parser::extract_events; use xdr_parser::types::EventSource; use crate::canonical::{AssetIdentity, AssetRegistry, USDC_ISSUER, USDT_ISSUER, canonicalise}; -use crate::sink::OracleSample; use crate::tick::TradeTick; +use crate::writer::OracleSample; /// AMM token amounts are treated as 7-decimal (Stellar SAC convention). Token /// decimals vary; this is a documented sizing-measurement approximation. diff --git a/packages/sdex-backfill/src/tick.rs b/packages/prices-ingest-core/src/tick.rs similarity index 100% rename from packages/sdex-backfill/src/tick.rs rename to packages/prices-ingest-core/src/tick.rs diff --git a/packages/prices-ingest-core/src/writer.rs b/packages/prices-ingest-core/src/writer.rs new file mode 100644 index 0000000..3f733be --- /dev/null +++ b/packages/prices-ingest-core/src/writer.rs @@ -0,0 +1,256 @@ +//! Transport-agnostic ClickHouse writer for `prices.*`. +//! +//! Holds a `clickhouse::Client` and knows how to write the shared row shapes +//! (`price_ohlcv_1m`, `assets`, `oracle_prices`) and load the asset registry. +//! It does **not** care how the client was built: a plaintext local-dev client +//! ([`OhlcvWriter::plaintext`]) and the task-0052 mTLS client (passed to +//! [`OhlcvWriter::new`]) are both just a `clickhouse::Client`, so the same +//! writer serves the local backfill and the live Lambda's remote mTLS sink. +//! +//! Backfill-only bookkeeping (`backfill_sdex_ledgers` resume set) is **not** +//! here — it lives in `sdex-backfill`'s thin wrapper, since the live Lambda +//! uses its own doorbell cursor instead. + +use clickhouse::Client; +use rust_decimal::Decimal; +use serde::{Deserialize, Serialize}; +use tracing::info; + +use crate::bucket::OhlcvCandle; +use crate::canonical::{AssetIdentity, AssetRegistry}; +use crate::error::IngestError; + +/// Convert a `Decimal` to the `i128` mantissa ClickHouse expects for a +/// `Decimal(38, 14)` column. Saturates rather than panicking: AMM +/// amounts/prices are i128-derived and can exceed the 38-digit budget, and an +/// out-of-range value should clamp, not abort the whole run. +pub fn decimal_to_i128(d: Decimal) -> i128 { + let d = d.round_dp(14); + let factor = 10i128.pow(14 - d.scale()); + d.mantissa().saturating_mul(factor) +} + +/// A ClickHouse writer over `prices.*`. Cheap to clone (the client is). +pub struct OhlcvWriter { + client: Client, +} + +impl OhlcvWriter { + /// Wrap an already-built client (e.g. the mTLS client from + /// `prices_clickhouse::mtls::client_from_lambda_env`). + pub fn new(client: Client) -> Self { + Self { client } + } + + /// Build a plaintext client for local-dev / Docker ClickHouse. + pub fn plaintext(url: &str) -> Self { + Self { + client: Client::default().with_url(url), + } + } + + /// Borrow the underlying client (e.g. for backfill-only resume queries). + pub fn client(&self) -> &Client { + &self.client + } + + /// Cheap connectivity probe (`SELECT 1`). + pub async fn preflight(&self) -> Result<(), IngestError> { + self.client.query("SELECT 1").execute().await?; + Ok(()) + } + + /// Load the existing `prices.assets` rows as `(asset_id, identity)` so a + /// run reuses surrogate ids rather than reassigning them. + pub async fn load_assets(&self) -> Result, IngestError> { + let rows = self + .client + .query( + "SELECT asset_id, asset_code, issuer_address, contract_address FROM prices.assets", + ) + .fetch_all::() + .await?; + + let assets: Vec<(u32, AssetIdentity)> = rows + .into_iter() + .map(|r| { + let identity = if !r.contract_address.is_empty() { + AssetIdentity::Contract(r.contract_address) + } else if r.asset_code == "XLM" && r.issuer_address.is_empty() { + AssetIdentity::Native + } else { + AssetIdentity::Credit { + code: r.asset_code, + issuer: r.issuer_address, + } + }; + (r.asset_id, identity) + }) + .collect(); + + info!( + existing_assets = assets.len(), + "loaded asset registry from ClickHouse" + ); + Ok(assets) + } + + /// Write a batch of candles for one `source` into `prices.price_ohlcv_1m`. + pub async fn write_candles( + &self, + candles: &[OhlcvCandle], + source: &str, + ) -> Result<(), IngestError> { + if candles.is_empty() { + return Ok(()); + } + + let mut insert = self.client.insert("prices.price_ohlcv_1m")?; + + for candle in candles { + insert + .write(&OhlcvRow { + timestamp: candle.minute_start, + asset_id: candle.asset_id, + quote_asset_id: candle.quote_asset_id, + source: source.to_string(), + open: decimal_to_i128(candle.open), + high: decimal_to_i128(candle.high), + low: decimal_to_i128(candle.low), + close: decimal_to_i128(candle.close), + volume_base: decimal_to_i128(candle.volume_base), + volume_quote: decimal_to_i128(candle.volume_quote), + // DEFAULT 0 — the 0026 enrichment Lambda fills this + // (volume_quote_usd = oracle_price * volume_quote). + volume_quote_usd: 0, + // DEFAULT 0 — the enrichment pass fills this (task 0061, + // close_usd = oracle_price * close), same as volume_quote_usd. + close_usd: 0, + vwap: decimal_to_i128(candle.vwap), + trade_count: candle.trade_count, + version: candle.version, + }) + .await?; + } + insert.end().await?; + Ok(()) + } + + /// Write the asset registry into `prices.assets` (idempotent via + /// ReplacingMergeTree on the asset sort key). + pub async fn write_assets(&self, registry: &AssetRegistry) -> Result<(), IngestError> { + let mut insert = self.client.insert("prices.assets")?; + + for (identity, &id) in registry.assets() { + let (asset_code, asset_type, issuer_address, contract_address) = match identity { + AssetIdentity::Native => { + ("XLM".to_string(), "classic", String::new(), String::new()) + } + AssetIdentity::Credit { code, issuer } => { + (code.clone(), "classic", issuer.clone(), String::new()) + } + AssetIdentity::Contract(addr) => { + (String::new(), "soroban", String::new(), addr.clone()) + } + }; + // The SAC that wraps this classic asset (§12.4) — '' for a pure + // Soroban token. Lets a read-time consumer resolve a SAC-wrapped leg. + let sac_address = registry.sac_address_of(identity).unwrap_or_default(); + + insert + .write(&AssetRow { + asset_id: id, + asset_code, + asset_type: asset_type.to_string(), + issuer_address, + contract_address, + sac_address, + home_domain: String::new(), + is_active: 1, + }) + .await?; + } + insert.end().await?; + Ok(()) + } + + /// Write decoded oracle price samples into `prices.oracle_prices`. + pub async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), IngestError> { + if samples.is_empty() { + return Ok(()); + } + let mut insert = self.client.insert("prices.oracle_prices")?; + for s in samples { + insert + .write(&OracleRow { + timestamp: s.timestamp, + asset_id: s.asset_id, + oracle_name: s.oracle_name.clone(), + price_usd: s.price_usd, + raw_data: s.raw_data.clone(), + }) + .await?; + } + insert.end().await?; + Ok(()) + } +} + +#[derive(Debug, Serialize, clickhouse::Row)] +struct OhlcvRow { + timestamp: u32, + asset_id: u32, + quote_asset_id: u32, + source: String, + open: i128, + high: i128, + low: i128, + close: i128, + volume_base: i128, + volume_quote: i128, + volume_quote_usd: i128, + close_usd: i128, + vwap: i128, + trade_count: u32, + version: u64, +} + +#[derive(Debug, Serialize, clickhouse::Row)] +struct AssetRow { + asset_id: u32, + asset_code: String, + asset_type: String, + issuer_address: String, + contract_address: String, + sac_address: String, + home_domain: String, + is_active: u8, +} + +#[derive(Debug, Deserialize, clickhouse::Row)] +struct ExistingAssetRow { + asset_id: u32, + asset_code: String, + issuer_address: String, + contract_address: String, +} + +/// One decoded oracle price sample, ready for `prices.oracle_prices`. +#[derive(Debug, Clone)] +pub struct OracleSample { + pub timestamp: u32, + pub asset_id: u32, + pub oracle_name: String, + /// price_usd scaled to 14 decimals (matches Decimal(38,14)). + pub price_usd: i128, + pub raw_data: String, +} + +#[derive(Debug, Serialize, clickhouse::Row)] +struct OracleRow { + timestamp: u32, + asset_id: u32, + oracle_name: String, + price_usd: i128, + raw_data: String, +} diff --git a/packages/prices-ledger-processor/Cargo.toml b/packages/prices-ledger-processor/Cargo.toml index cad8deb..8e61d31 100644 --- a/packages/prices-ledger-processor/Cargo.toml +++ b/packages/prices-ledger-processor/Cargo.toml @@ -2,40 +2,50 @@ name = "prices-ledger-processor" version = "0.1.0" edition = "2024" -description = "Prices Ledger Processor — local-only prototype (task 0038) of the live ingestion Lambda" +description = "Prices Ledger Processor — live S3/SNS-doorbell ingestion into prices.price_ohlcv_1m over mTLS (task 0038)" [lib] name = "prices_ledger_processor" path = "src/lib.rs" +# The SQS-doorbell Lambda entrypoint. Behind `lambda` so a default `cargo build` +# stays lean (no lambda_runtime / aws-sdk-s3 / rustls mTLS stack); build it with +# `cargo build -p prices-ledger-processor --features lambda` (→ `cargo lambda` for +# the provided.al2023 ZIP, ADR 0006). [[bin]] name = "prices-ledger-processor" path = "src/main.rs" +required-features = ["lambda"] +# Local fixture runner — drives the same reconcile loop against local-disk +# fixtures and a local (plaintext) ClickHouse. Always builds. [[bin]] name = "prices-cli" path = "src/bin/cli.rs" -[dependencies] -extractors-core = { path = "../extractors-core" } -ledger-processor = { path = "../ledger-processor" } -phoenix-extractor = { path = "../phoenix-extractor" } -soroswap-extractor = { path = "../soroswap-extractor" } +[features] +default = [] +# Remote ClickHouse over mTLS via the task-0052 prices-clickhouse::mtls client. +aws-mtls = ["prices-clickhouse/aws-mtls"] +# Full Lambda: SQS doorbell runtime + S3 object fetch + the mTLS sink. +lambda = ["aws-mtls", "dep:lambda_runtime", "dep:aws_lambda_events", "dep:aws-sdk-s3", "dep:aws-config"] -stellar-xdr = { workspace = true } -xdr-parser = { workspace = true } +[dependencies] +prices-ingest-core = { path = "../prices-ingest-core" } +prices-clickhouse = { path = "../prices-clickhouse" } +tokio = { workspace = true } clap = { workspace = true } serde = { workspace = true } -serde_json = "1" +serde_json = { workspace = true } thiserror = { workspace = true } -tokio = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } -lambda_runtime = "0.13" -aws_lambda_events = { version = "0.16", default-features = false, features = ["sqs"] } -hex = "0.4" +lambda_runtime = { workspace = true, optional = true } +aws_lambda_events = { workspace = true, optional = true } +aws-sdk-s3 = { workspace = true, optional = true } +aws-config = { workspace = true, optional = true } [dev-dependencies] -tempfile = "3" +tempfile = { workspace = true } diff --git a/packages/prices-ledger-processor/README.md b/packages/prices-ledger-processor/README.md new file mode 100644 index 0000000..051f879 --- /dev/null +++ b/packages/prices-ledger-processor/README.md @@ -0,0 +1,91 @@ +# prices-ledger-processor + +Live ingestion of Stellar ledgers into `prices.price_ohlcv_1m` (task 0038). + +An SQS **doorbell** triggers a **doorbell-cursor reconcile loop** (mirroring BE's +production indexer): read cursor → derive the next Galexie S3 key → fetch → +decode → extract + bucket → write OHLCV candles to the shared Hetzner ClickHouse +over mTLS → advance the cursor **last**. + +## What it reuses (no drift) + +The decode → extract → canonicalise → bucket → write pipeline is **not** +reimplemented here — it is [`prices-ingest-core`](../prices-ingest-core), the +same tested code the SDEX backfill (`sdex-backfill`) runs. Live and backfill +therefore emit byte-identical `prices.price_ohlcv_1m` rows: same surrogate +`asset_id`s (via the shared `AssetRegistry`, with SAC→classic collapse), same +preferred-quote orientation, same `Decimal(38,14)` scaling, same `version`. + +This crate owns only the **transport seams**: + +| Seam | Local (default) | Production (`lambda` feature) | +|------|-----------------|-------------------------------| +| `object_fetcher` | `LocalDiskFetcher` (fixtures) | `S3Fetcher` (`aws-sdk-s3` GetObject) | +| `sink` | `ClickHouseSink::plaintext` / `CountingSink` | `ClickHouseSink::from_lambda_env` (mTLS via [`prices-clickhouse::mtls`](../prices-clickhouse), task 0052) | +| `cursor` | `StubFileCursor` | `StubFileCursor` (CH-backed cursor is a follow-up — G-note Part D.1) | + +## Cargo features + +- `default` — lean: the local fixture runner only. No rustls / lambda / aws SDK. +- `aws-mtls` — the remote ClickHouse-over-mTLS sink. +- `lambda` — full Lambda: `lambda_runtime` SQS runtime + `aws-sdk-s3` fetch + + `aws-mtls`. Build: `cargo build -p prices-ledger-processor --features lambda` + (then `cargo lambda` for the `provided.al2023` ZIP, ADR 0006). + +## Run it locally + +```bash +# parse + bucket only, no DB (uses bundled fixtures) +cargo run -p prices-ledger-processor --bin prices-cli -- --cursor 62460539 --dry-run + +# write into a local Docker ClickHouse (apply the prices schema first via +# `prices-clickhouse-init`) +CLICKHOUSE_URL=http://localhost:8123 \ + cargo run -p prices-ledger-processor --bin prices-cli -- --cursor 62460539 +``` + +Fixtures live under `fixtures/ledgers/` and are **gitignored** +(large binary Galexie objects copied locally); the integration test self-skips +when they are absent. + +## Event contract (production) + +Doorbells reach the Lambda via **SNS fan-out** (2026-06-10 cross-team decision): + +``` +ledger PutObject → S3 ObjectCreated + → SNS topic (BE-owned, on stellar-ledger-data) + ├─ SQS ledger-ingest-{env} (BE) + └─ SQS prices-ingest-{env} (prices-api) + DLQ → this Lambda +``` + +The SQS message **body is ignored** — order comes from the cursor + S3 contents, +not delivery order (so no FIFO needed). `reservedConcurrency = 1` (CDK) keeps +runs serial, which is the ordering guarantee. Adding the prices SNS subscription +on BE's bucket is a cross-team change (tracked by task 0050); the CDK wiring is +already in `infra/` (prepare-only). + +## Environment variables + +Injected by CDK at deploy from `/platform/{env}/*` SSM (deploy-time handshake — +the Lambda reads only env vars, never SSM at runtime): + +| Var | Used by | Meaning | +|-----|---------|---------| +| `BUCKET_NAME` | `S3Fetcher` | BE's `stellar-ledger-data` bucket | +| `CH_DOMAIN` | `prices-clickhouse::mtls` | Caddy host fronting the Hetzner cluster | +| `MTLS_SECRET_NAME` | `prices-clickhouse::mtls` | Secrets Manager bundle (cert+key+ca) name | +| `CURSOR_FILE` / `INITIAL_CURSOR` | `StubFileCursor` | cursor checkpoint path / cold-start seed | +| `MAX_ITERATIONS` | reconcile loop | max contiguous ledgers per invocation (default 16) | +| `CLICKHOUSE_URL` | local CLI only | plaintext local ClickHouse endpoint | + +## Known follow-ups + +- **Cross-invocation intra-minute aggregation.** Candles aggregate across one + contiguous run; a minute split across two separate runs lands as two + `version`-keyed rows (ReplacingMergeTree keeps the latest). Same characteristic + the backfill has across partition boundaries; a periodic re-aggregation / + AggregatingMergeTree is the fix. +- **CH-backed cursor** (G-note Part D.1) — replace the file cursor. +- **rustls dedup** — `aws-sdk-s3` pulls an older rustls 0.21 alongside our + 0.23.40; unify to shrink the Lambda ZIP. diff --git a/packages/prices-ledger-processor/src/bin/cli.rs b/packages/prices-ledger-processor/src/bin/cli.rs index e404cad..0ba4fb9 100644 --- a/packages/prices-ledger-processor/src/bin/cli.rs +++ b/packages/prices-ledger-processor/src/bin/cli.rs @@ -1,38 +1,47 @@ +//! Local fixture runner for the Prices Ledger Processor (task 0038). +//! +//! Drives the *same* reconcile loop the Lambda runs, but against local-disk +//! fixtures and a local (plaintext) ClickHouse — no AWS, no mTLS. Use it to +//! exercise the full decode → extract → bucket → write pipeline end-to-end: +//! +//! ```bash +//! # write into local Docker ClickHouse (apply prices schema first) +//! CLICKHOUSE_URL=http://localhost:8123 cargo run -p prices-ledger-processor \ +//! --bin prices-cli -- --cursor 62460539 --max-iterations 16 +//! +//! # parse + bucket only, no DB writes +//! cargo run -p prices-ledger-processor --bin prices-cli -- \ +//! --cursor 62460539 --dry-run +//! ``` + use std::path::PathBuf; -use clap::{Parser, ValueEnum}; -use extractors_core::VenueRegistry; -use phoenix_extractor::PhoenixPoolRegistry; +use clap::Parser; +use prices_ingest_core::{AssetRegistry, Registries}; use prices_ledger_processor::{ cursor::{Cursor, StubFileCursor}, - decode::XdrLedgerDecoder, object_fetcher::LocalDiskFetcher, - reconcile::Reconciler, - sink::{SqlFileSink, StdoutJsonSink}, + reconcile::{Reconciler, RunStats}, + sink::{ClickHouseSink, CountingSink}, }; -use soroswap_extractor::SoroswapPoolRegistry; use tracing::info; #[derive(Parser, Debug)] #[command( name = "prices-cli", - about = "Local CLI driver for the Prices Ledger Processor prototype (task 0038)" + about = "Local fixture runner for the Prices Ledger Processor (task 0038)" )] struct Args { - /// Initial cursor value (ledger sequence the run starts AFTER). - /// Always overwrites the cursor file before the run. + /// Initial cursor (the run starts at this ledger + 1). Overwrites the + /// cursor file before the run. #[arg(long)] cursor: u64, - /// Maximum reconcile iterations per invocation. + /// Maximum reconcile iterations (contiguous ledgers) per run. #[arg(long, default_value_t = 16)] max_iterations: usize, - /// Sink selection. - #[arg(long, value_enum, default_value_t = SinkKind::Stdout)] - sink: SinkKind, - - /// Local fixture root — keys derived by `ledger_s3_key` are joined onto this. + /// Local fixture root — derived Galexie keys are joined onto this. #[arg(long, default_value = "fixtures/ledgers")] fixtures_dir: PathBuf, @@ -40,15 +49,13 @@ struct Args { #[arg(long, default_value = "out/cursor.txt")] cursor_file: PathBuf, - /// Where SQL-file sink output lands. - #[arg(long, default_value = "out")] - out_dir: PathBuf, -} + /// Local ClickHouse endpoint (plaintext). Ignored with --dry-run. + #[arg(long, env = "CLICKHOUSE_URL", default_value = "http://localhost:8123")] + clickhouse_url: String, -#[derive(Copy, Clone, Debug, ValueEnum)] -enum SinkKind { - Stdout, - SqlFile, + /// Parse + bucket only; do not write to ClickHouse (counts rows). + #[arg(long, default_value_t = false)] + dry_run: bool, } #[tokio::main] @@ -62,34 +69,23 @@ async fn main() -> Result<(), Box> { let cursor = StubFileCursor::new(&args.cursor_file); cursor.write(args.cursor).await?; - let fetcher = LocalDiskFetcher::new(&args.fixtures_dir); - let stats = match args.sink { - SinkKind::Stdout => { - let reconciler = Reconciler { - fetcher, - cursor, - sink: StdoutJsonSink, - decoder: XdrLedgerDecoder, - venue_registry: VenueRegistry::new(), - phoenix_registry: PhoenixPoolRegistry::default(), - soroswap_registry: SoroswapPoolRegistry::new(), - }; - reconciler.run(args.max_iterations).await? - } - SinkKind::SqlFile => { - let reconciler = Reconciler { - fetcher, - cursor, - sink: SqlFileSink::new(&args.out_dir), - decoder: XdrLedgerDecoder, - venue_registry: VenueRegistry::new(), - phoenix_registry: PhoenixPoolRegistry::default(), - soroswap_registry: SoroswapPoolRegistry::new(), - }; - reconciler.run(args.max_iterations).await? - } + let stats: RunStats = if args.dry_run { + let reconciler = Reconciler::new( + fetcher, + cursor, + CountingSink::default(), + AssetRegistry::from_existing(Vec::new()), + Registries::new(), + ); + reconciler.run(args.max_iterations).await? + } else { + let sink = ClickHouseSink::plaintext(&args.clickhouse_url); + sink.preflight().await?; + let registry = sink.load_registry().await?; + let reconciler = Reconciler::new(fetcher, cursor, sink, registry, Registries::new()); + reconciler.run(args.max_iterations).await? }; info!( @@ -97,6 +93,7 @@ async fn main() -> Result<(), Box> { end = stats.end_cursor, persisted = stats.ledgers_persisted, rows = stats.rows_emitted, + dry_run = args.dry_run, "reconcile complete" ); diff --git a/packages/prices-ledger-processor/src/lib.rs b/packages/prices-ledger-processor/src/lib.rs index e459ca3..66b613c 100644 --- a/packages/prices-ledger-processor/src/lib.rs +++ b/packages/prices-ledger-processor/src/lib.rs @@ -1,6 +1,22 @@ -pub mod bucket; +//! Prices Ledger Processor — live ingestion of Stellar ledgers into +//! `prices.price_ohlcv_1m` (task 0038). +//! +//! Shape mirrors BE's production indexer: an SQS **doorbell** triggers a +//! **doorbell-cursor reconcile loop** ([`reconcile`]) that walks contiguous +//! ledgers from S3, decodes + extracts + buckets them, writes OHLCV candles to +//! the shared Hetzner ClickHouse over mTLS, and advances its cursor last. +//! +//! The decode → extract → canonicalise → bucket → write pipeline is **not** +//! reimplemented here: it is `prices_ingest_core`, the same tested code the SDEX +//! backfill uses, so live and backfill rows are identical (same surrogate +//! `asset_id`s, SAC collapse, orientation, `Decimal`/`version`). This crate owns +//! only the *transport* seams: +//! - [`object_fetcher`] — local-disk (fixtures/tests) vs S3 (`lambda` feature). +//! - [`cursor`] — the ledger-sequence checkpoint. +//! - [`sink`] — the ClickHouse writer (plaintext local vs `aws-mtls` remote). +//! - [`galexie_key`] / [`retry`] / [`safe_log`] — key derivation, backoff, log redaction. + pub mod cursor; -pub mod decode; pub mod galexie_key; pub mod object_fetcher; pub mod reconcile; diff --git a/packages/prices-ledger-processor/src/main.rs b/packages/prices-ledger-processor/src/main.rs index 705e153..2208ea5 100644 --- a/packages/prices-ledger-processor/src/main.rs +++ b/packages/prices-ledger-processor/src/main.rs @@ -1,45 +1,39 @@ -//! Lambda entrypoint — SQS doorbell handler. +//! Lambda entrypoint — SQS doorbell handler (built only with `--features lambda`). //! -//! Mirrors BE's indexer cold-start shape (eager config validation, -//! structured JSON tracing, single shared state passed by reference to -//! every invocation). The SQS message body is **ignored**; each -//! invocation just runs the reconcile loop. +//! Cold start mirrors BE's indexer: eager config + connectivity validation, then +//! one shared [`Reconciler`] reused across invocations. The SQS message body is +//! **ignored** — production doorbells arrive via SNS fan-out +//! (`S3 ObjectCreated → SNS (BE-owned) → prices-ingest-{env} SQS + DLQ → here`, +//! 2026-06-10 cross-team decision); raw or SNS-wrapped, the handler just runs +//! the doorbell-cursor reconcile loop. `reservedConcurrency = 1` (set in CDK) +//! keeps runs serial, which is the ordering guarantee. //! -//! Doorbell transport (2026-06-10 cross-team decision, spec §C.1): -//! production doorbells reach this Lambda via **SNS fan-out** — -//! `S3 ObjectCreated → SNS (BE-owned) → prices-ingest-{env} SQS + DLQ -//! → this Lambda`. Because the body is ignored, the handler is -//! identical whether the message is raw or SNS-wrapped; the `SqsEvent` -//! envelope is all we deserialise. Failure isolation: the prices queue -//! is prices-owned, so a backlog here never pressures BE's indexer. -//! -//! Phase 2 prototype: the fetcher / cursor / sink are still the -//! local-disk stubs. The Lambda mode exists to prove the -//! `lambda_runtime` event-loop wires up cleanly — a `cargo lambda -//! invoke` against a stub doorbell event runs end-to-end. +//! Transport here is production: S3 object fetch + ClickHouse over mTLS (task +//! 0052). The cursor is still a file checkpoint (`CURSOR_FILE`, seeded from +//! `INITIAL_CURSOR`) pending the CH-backed cursor decision (G-note Part D.1). use std::path::PathBuf; use std::sync::Arc; -use aws_lambda_events::sqs::{SqsBatchResponse, SqsEvent}; -use extractors_core::VenueRegistry; -use lambda_runtime::{Error, LambdaEvent, service_fn}; -use phoenix_extractor::PhoenixPoolRegistry; +use aws_lambda_events::sqs::{BatchItemFailure, SqsBatchResponse, SqsEvent}; +use lambda_runtime::{Error, LambdaEvent, run, service_fn}; +use prices_ingest_core::Registries; use prices_ledger_processor::{ - cursor::StubFileCursor, decode::XdrLedgerDecoder, object_fetcher::LocalDiskFetcher, - reconcile::Reconciler, sink::StdoutJsonSink, + cursor::{Cursor, StubFileCursor}, + object_fetcher::S3Fetcher, + reconcile::Reconciler, + sink::ClickHouseSink, }; -use soroswap_extractor::SoroswapPoolRegistry; use tracing::{error, info}; -const ENV_FIXTURES_DIR: &str = "FIXTURES_DIR"; +const ENV_BUCKET: &str = "BUCKET_NAME"; const ENV_CURSOR_FILE: &str = "CURSOR_FILE"; +const ENV_INITIAL_CURSOR: &str = "INITIAL_CURSOR"; const ENV_MAX_ITERATIONS: &str = "MAX_ITERATIONS"; -const DEFAULT_FIXTURES_DIR: &str = "fixtures/ledgers"; -const DEFAULT_CURSOR_FILE: &str = "out/cursor.txt"; +const DEFAULT_CURSOR_FILE: &str = "/tmp/prices-cursor.txt"; const DEFAULT_MAX_ITERATIONS: usize = 16; -type R = Reconciler; +type R = Reconciler; #[tokio::main] async fn main() -> Result<(), Error> { @@ -48,9 +42,10 @@ async fn main() -> Result<(), Error> { .json() .init(); - let fixtures_dir = std::env::var(ENV_FIXTURES_DIR) - .map(PathBuf::from) - .unwrap_or_else(|_| PathBuf::from(DEFAULT_FIXTURES_DIR)); + // Eager cold-start init — a missing env / unreachable cluster should be a + // Lambda Init error, not a per-event panic. + let bucket = std::env::var(ENV_BUCKET) + .map_err(|_| Error::from(format!("{ENV_BUCKET} env var is required")))?; let cursor_file = std::env::var(ENV_CURSOR_FILE) .map(PathBuf::from) .unwrap_or_else(|_| PathBuf::from(DEFAULT_CURSOR_FILE)); @@ -59,24 +54,38 @@ async fn main() -> Result<(), Error> { .and_then(|s| s.parse().ok()) .unwrap_or(DEFAULT_MAX_ITERATIONS); + let cursor = StubFileCursor::new(&cursor_file); + // Seed the cursor on a fresh container if it has no checkpoint yet. + if cursor.read().await.is_err() + && let Some(seed) = std::env::var(ENV_INITIAL_CURSOR) + .ok() + .and_then(|s| s.parse::().ok()) + { + cursor.write(seed).await?; + info!(seed, "seeded cursor from INITIAL_CURSOR"); + } + + let fetcher = S3Fetcher::from_env(&bucket).await; + let sink = ClickHouseSink::from_lambda_env().await?; + sink.preflight().await?; + let registry = sink.load_registry().await?; + info!( - fixtures_dir = %fixtures_dir.display(), + %bucket, cursor_file = %cursor_file.display(), max_iterations, - "prices-ledger-processor cold start" + "prices-ledger-processor cold start ready" ); - let reconciler: Arc = Arc::new(Reconciler { - fetcher: LocalDiskFetcher::new(&fixtures_dir), - cursor: StubFileCursor::new(&cursor_file), - sink: StdoutJsonSink, - decoder: XdrLedgerDecoder, - venue_registry: VenueRegistry::new(), - phoenix_registry: PhoenixPoolRegistry::default(), - soroswap_registry: SoroswapPoolRegistry::new(), - }); + let reconciler: Arc = Arc::new(Reconciler::new( + fetcher, + cursor, + sink, + registry, + Registries::new(), + )); - lambda_runtime::run(service_fn(move |event: LambdaEvent| { + run(service_fn(move |event: LambdaEvent| { let r = reconciler.clone(); async move { handler(event, r, max_iterations).await } })) @@ -108,7 +117,7 @@ async fn handler( error = %e, "reconcile failed — will redeliver doorbell" ); - batch_item_failures.push(aws_lambda_events::sqs::BatchItemFailure { + batch_item_failures.push(BatchItemFailure { item_identifier: message_id, }); } diff --git a/packages/prices-ledger-processor/src/object_fetcher/mod.rs b/packages/prices-ledger-processor/src/object_fetcher/mod.rs index 3ec7f13..3621903 100644 --- a/packages/prices-ledger-processor/src/object_fetcher/mod.rs +++ b/packages/prices-ledger-processor/src/object_fetcher/mod.rs @@ -4,8 +4,12 @@ use std::future::Future; pub mod local_disk; +#[cfg(feature = "lambda")] +pub mod s3; pub use local_disk::LocalDiskFetcher; +#[cfg(feature = "lambda")] +pub use s3::S3Fetcher; #[derive(Debug, thiserror::Error)] pub enum FetchError { @@ -15,6 +19,8 @@ pub enum FetchError { #[source] source: std::io::Error, }, + #[error("object-store error fetching {key}: {detail}")] + Backend { key: String, detail: String }, } pub trait ObjectFetcher { diff --git a/packages/prices-ledger-processor/src/object_fetcher/s3.rs b/packages/prices-ledger-processor/src/object_fetcher/s3.rs new file mode 100644 index 0000000..d2daaf7 --- /dev/null +++ b/packages/prices-ledger-processor/src/object_fetcher/s3.rs @@ -0,0 +1,64 @@ +//! S3-backed [`ObjectFetcher`] — the production fetch path. +//! +//! Reads Galexie `*.xdr.zst` objects from BE's `stellar-ledger-data` bucket by +//! their derived key. A `NoSuchKey` is mapped to `Ok(None)` (a gap → the +//! reconcile loop stops cleanly), every other S3 error to `Err`. Bucket name +//! arrives via env var (CDK injects it from `/platform/{env}/…` SSM at deploy). + +use aws_sdk_s3::Client; + +use super::{FetchError, ObjectFetcher}; + +pub struct S3Fetcher { + client: Client, + bucket: String, +} + +impl S3Fetcher { + pub fn new(client: Client, bucket: impl Into) -> Self { + Self { + client, + bucket: bucket.into(), + } + } + + /// Build from the ambient AWS config (Lambda execution role). + pub async fn from_env(bucket: impl Into) -> Self { + let cfg = aws_config::defaults(aws_config::BehaviorVersion::latest()) + .load() + .await; + Self::new(Client::new(&cfg), bucket) + } +} + +impl ObjectFetcher for S3Fetcher { + async fn fetch(&self, key: &str) -> Result>, FetchError> { + match self + .client + .get_object() + .bucket(&self.bucket) + .key(key) + .send() + .await + { + Ok(out) => { + let data = out.body.collect().await.map_err(|e| FetchError::Backend { + key: key.to_string(), + detail: e.to_string(), + })?; + Ok(Some(data.into_bytes().to_vec())) + } + Err(err) => { + let svc = err.into_service_error(); + if svc.is_no_such_key() { + Ok(None) + } else { + Err(FetchError::Backend { + key: key.to_string(), + detail: svc.to_string(), + }) + } + } + } + } +} diff --git a/packages/prices-ledger-processor/src/reconcile.rs b/packages/prices-ledger-processor/src/reconcile.rs index ddc60cd..e686c53 100644 --- a/packages/prices-ledger-processor/src/reconcile.rs +++ b/packages/prices-ledger-processor/src/reconcile.rs @@ -1,27 +1,33 @@ //! Doorbell-cursor reconcile loop. //! -//! Mirrors BE's indexer (`crates/indexer/src/handler/mod.rs:201`): -//! read cursor, derive next S3 key, fetch, decode, dispatch, bucket, -//! sink, advance cursor last. Stops at the first gap or -//! `max_iterations`. The cursor write is the **ordering barrier** — -//! a crash before it leaves the cursor unchanged and the next -//! invocation re-processes the same ledger (idempotent via the -//! ReplacingMergeTree / merge semantics in production; via the -//! pure-function bucketer in the prototype). - -use std::future::Future; - -use extractors_core::{SorobanEventRow, VenueRegistry}; -use ledger_processor::dispatch::{DispatchError, dispatch}; -use phoenix_extractor::PhoenixPoolRegistry; -use soroswap_extractor::SoroswapPoolRegistry; -use tracing::{info, warn}; - -use crate::bucket::Bucketer; +//! Mirrors BE's indexer: read cursor, derive the next S3 key, fetch, decode, +//! extract+bucket, write, advance the cursor **last**. Stops at the first gap or +//! `max_iterations`. The cursor write is the ordering barrier — a crash before +//! it leaves the cursor unchanged and the next invocation re-processes the run +//! (idempotent: ReplacingMergeTree collapses re-inserts by `version`). +//! +//! The decode→extract→canonicalise→bucket step is `prices_ingest_core` — the +//! same code the SDEX backfill runs — so live candles are byte-identical to +//! backfilled ones. Candles accumulate across the whole contiguous run and are +//! flushed once at the end, so all ledgers sharing a minute aggregate into one +//! candle (matching the backfill's per-chunk accumulation). The only residual +//! is a minute split across two separate invocations/runs; that is the same +//! `version`-keyed characteristic the backfill has across partition boundaries, +//! and a periodic re-aggregation is tracked as a follow-up. + +use std::collections::HashMap; + +use prices_ingest_core::{ + AssetRegistry, CandleAccumulator, OracleSample, Registries, decode_object, extract_trades, + ledger_sequence, process_ledger, raw_trade_to_tick, +}; +use tokio::sync::Mutex; +use tracing::info; + use crate::cursor::{Cursor, CursorError}; use crate::galexie_key::ledger_s3_key; use crate::object_fetcher::{FetchError, ObjectFetcher}; -use crate::sink::{OhlcvSink, SinkError}; +use crate::sink::{CandleSink, SinkError}; #[derive(Debug, thiserror::Error)] pub enum ReconcileError { @@ -31,28 +37,10 @@ pub enum ReconcileError { Fetch(#[from] FetchError), #[error("decode error: {0}")] Decode(String), - #[error("dispatch error: {0}")] - Dispatch(String), #[error("sink error: {0}")] Sink(#[from] SinkError), } -#[derive(Debug, Clone)] -pub struct DecodedLedger { - pub ledger_sequence: u64, - pub closed_at_unix_seconds: i64, - /// Soroban events grouped by `(transaction_id, contract_id)` — the - /// shape the kernel from task 0037 dispatches on. - pub event_groups: Vec>, -} - -pub trait LedgerDecoder { - fn decode( - &self, - bytes: &[u8], - ) -> impl Future, String>> + Send; -} - #[derive(Debug, Clone, Default)] pub struct RunStats { pub start_cursor: u64, @@ -61,28 +49,58 @@ pub struct RunStats { pub rows_emitted: u64, } -pub struct Reconciler { - pub fetcher: F, - pub cursor: C, - pub sink: S, - pub decoder: D, - pub venue_registry: VenueRegistry, - pub phoenix_registry: PhoenixPoolRegistry, - pub soroswap_registry: SoroswapPoolRegistry, +/// Warm per-container processing state: the surrogate-id registry (loaded from +/// `prices.assets` at cold start) and the incrementally-grown AMM venue/pool +/// registries. Persisting these across invocations lets a warm Lambda resolve +/// pools discovered earlier in its lifetime. +pub struct ProcessingState { + pub assets: AssetRegistry, + pub registries: Registries, +} + +pub struct Reconciler { + fetcher: F, + cursor: C, + sink: S, + state: Mutex, } -impl Reconciler +impl Reconciler where F: ObjectFetcher + Sync, C: Cursor + Sync, - S: OhlcvSink + Sync, - D: LedgerDecoder + Sync, + S: CandleSink + Sync, { + pub fn new( + fetcher: F, + cursor: C, + sink: S, + assets: AssetRegistry, + registries: Registries, + ) -> Self { + Self { + fetcher, + cursor, + sink, + state: Mutex::new(ProcessingState { assets, registries }), + } + } + pub async fn run(&self, max_iterations: usize) -> Result { + let mut st = self.state.lock().await; + // Deref the guard once so `registries` and `assets` can be borrowed as + // disjoint fields (a borrow through the guard's DerefMut each time would + // conflict). + let state = &mut *st; + let start = self.cursor.read().await?; let mut current = start; let mut persisted = 0u64; - let mut rows_emitted = 0u64; + + // Accumulate across the whole contiguous run, flush once at the end. + let mut sdex = CandleAccumulator::new(); + let mut amm: HashMap<&'static str, CandleAccumulator> = HashMap::new(); + let mut oracle: Vec = Vec::new(); for _ in 0..max_iterations { let next = current + 1; @@ -96,47 +114,62 @@ where break; }; - let ledgers = self - .decoder - .decode(&bytes) - .await - .map_err(ReconcileError::Decode)?; - - let mut bucketer = Bucketer::new(); - let mut max_seq = current; - for ledger in ledgers { - for group in &ledger.event_groups { - let trades = match dispatch( - group, - &self.venue_registry, - &self.phoenix_registry, - &self.soroswap_registry, - ) { - Ok(t) => t, - Err(DispatchError::VenueNotImplemented { venue, contract_id }) => { - warn!(?venue, %contract_id, "venue extractor not yet implemented — skipping"); - Vec::new() - } - Err(e) => return Err(ReconcileError::Dispatch(e.to_string())), - }; - for trade in &trades { - bucketer.ingest(ledger.closed_at_unix_seconds, trade); - } + let lcms = decode_object(&bytes).map_err(|e| ReconcileError::Decode(e.to_string()))?; + let mut obj_max = current; + for lcm in &lcms { + // Classic SDEX trades from operation results. + for trade in extract_trades(lcm) { + sdex.merge(&raw_trade_to_tick(&trade, &mut state.assets)); } - if ledger.ledger_sequence > max_seq { - max_seq = ledger.ledger_sequence; + // Soroban AMM trades + oracle samples. + let sob = process_ledger(lcm, &mut state.registries, &mut state.assets); + for (source, tick) in &sob.amm_ticks { + amm.entry(source) + .or_insert_with(CandleAccumulator::new) + .merge(tick); } + oracle.extend(sob.oracle); + obj_max = obj_max.max(ledger_sequence(lcm) as u64); } - let rows = bucketer.drain(); - rows_emitted += rows.len() as u64; - self.sink.write(&rows).await?; - self.cursor.write(max_seq).await?; - info!(ledger = max_seq, rows = rows.len(), "ledger persisted"); - current = max_seq; + current = obj_max.max(next); persisted += 1; } + if persisted == 0 { + return Ok(RunStats { + start_cursor: start, + end_cursor: start, + ledgers_persisted: 0, + rows_emitted: 0, + }); + } + + // Flush + write, then advance the cursor LAST (ordering barrier). + let mut rows_emitted = 0u64; + + let sdex_candles = sdex.flush_all(); + rows_emitted += sdex_candles.len() as u64; + self.sink.write_candles(&sdex_candles, "sdex").await?; + + for (source, mut acc) in amm { + let candles = acc.flush_all(); + rows_emitted += candles.len() as u64; + self.sink.write_candles(&candles, source).await?; + } + + self.sink.write_oracle(&oracle).await?; + self.sink.write_assets(&state.assets).await?; + self.cursor.write(current).await?; + + info!( + start, + end = current, + persisted, + rows = rows_emitted, + "reconcile run complete" + ); + Ok(RunStats { start_cursor: start, end_cursor: current, diff --git a/packages/prices-ledger-processor/src/sink/mod.rs b/packages/prices-ledger-processor/src/sink/mod.rs index d1614f2..c3e17a9 100644 --- a/packages/prices-ledger-processor/src/sink/mod.rs +++ b/packages/prices-ledger-processor/src/sink/mod.rs @@ -1,16 +1,19 @@ -//! OHLCV sink trait — the production-swap seam where prototype-mode -//! stdout / SQL-file writes become a `clickhouse::Client` insert against -//! `prices.price_ohlcv` (per ADRs 0003, 0004, 0007). +//! OHLCV sink — the seam that turns bucketed candles into ClickHouse rows. +//! +//! The real sink ([`ClickHouseSink`]) wraps the shared +//! [`prices_ingest_core::OhlcvWriter`], so it writes the exact same +//! `prices.price_ohlcv_1m` rows as the SDEX backfill. It is transport-agnostic: +//! [`ClickHouseSink::plaintext`] talks to a local Docker ClickHouse, and +//! (with the `aws-mtls` feature) [`ClickHouseSink::from_lambda_env`] talks to the +//! shared Hetzner cluster over mTLS via the task-0052 client. Tests use the +//! in-memory [`CountingSink`]. use std::future::Future; -use crate::bucket::OhlcvRow; +use prices_ingest_core::{AssetRegistry, OhlcvCandle, OhlcvWriter, OracleSample}; -pub mod sql_file; -pub mod stdout; - -pub use sql_file::SqlFileSink; -pub use stdout::StdoutJsonSink; +use crate::retry::{DEFAULT_BACKOFF_MS, retry_with_backoff}; +use crate::safe_log::safe_response_token; #[derive(Debug, thiserror::Error)] pub enum SinkError { @@ -18,6 +21,143 @@ pub enum SinkError { Write(String), } -pub trait OhlcvSink { - fn write(&self, rows: &[OhlcvRow]) -> impl Future> + Send; +/// Writes the three shared `prices.*` outputs of a reconcile run. Candle writes +/// are idempotent (ReplacingMergeTree keyed by `version`), so the sink may be +/// retried freely. +pub trait CandleSink { + fn write_candles( + &self, + candles: &[OhlcvCandle], + source: &str, + ) -> impl Future> + Send; + + fn write_oracle( + &self, + samples: &[OracleSample], + ) -> impl Future> + Send; + + fn write_assets( + &self, + registry: &AssetRegistry, + ) -> impl Future> + Send; +} + +/// ClickHouse sink backed by the shared [`OhlcvWriter`]. Works against either a +/// plaintext local client or the mTLS remote client — both are a +/// `clickhouse::Client`. +pub struct ClickHouseSink { + writer: OhlcvWriter, +} + +impl ClickHouseSink { + /// Local / Docker ClickHouse over plain HTTP (no TLS). Used by the CLI + /// fixture runner and the local integration test. + pub fn plaintext(url: &str) -> Self { + Self { + writer: OhlcvWriter::plaintext(url), + } + } + + /// Remote Hetzner ClickHouse over mTLS, built from the Lambda's + /// `MTLS_SECRET_NAME` / `CH_DOMAIN` env vars via the task-0052 client. + #[cfg(feature = "aws-mtls")] + pub async fn from_lambda_env() -> Result { + let client = + prices_clickhouse::mtls::client_from_lambda_env(prices_clickhouse::PROD_DATABASE) + .await + .map_err(|e| SinkError::Write(format!("mtls client init: {e}")))?; + Ok(Self { + writer: OhlcvWriter::new(client), + }) + } + + /// Probe connectivity (`SELECT 1`). Call once at cold start so an + /// unreachable cluster surfaces as a Lambda Init error, not per-event. + pub async fn preflight(&self) -> Result<(), SinkError> { + self.writer.preflight().await.map_err(redact) + } + + /// Load the existing asset registry from `prices.assets` so surrogate ids + /// are reused (not reassigned) across cold starts — the load-bearing + /// guarantee that live ids match the backfill's. + pub async fn load_registry(&self) -> Result { + let existing = self.writer.load_assets().await.map_err(redact)?; + Ok(AssetRegistry::from_existing(existing)) + } +} + +impl CandleSink for ClickHouseSink { + async fn write_candles(&self, candles: &[OhlcvCandle], source: &str) -> Result<(), SinkError> { + // Idempotent (RMT by version) → retry every failure as transient. + // Finer permanent-vs-transient classification is a follow-up. + retry_with_backoff( + &DEFAULT_BACKOFF_MS, + |_| true, + || async { + self.writer + .write_candles(candles, source) + .await + .map_err(redact) + }, + ) + .await + .map(|_| ()) + } + + async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), SinkError> { + retry_with_backoff( + &DEFAULT_BACKOFF_MS, + |_| true, + || async { self.writer.write_oracle(samples).await.map_err(redact) }, + ) + .await + .map(|_| ()) + } + + async fn write_assets(&self, registry: &AssetRegistry) -> Result<(), SinkError> { + retry_with_backoff( + &DEFAULT_BACKOFF_MS, + |_| true, + || async { self.writer.write_assets(registry).await.map_err(redact) }, + ) + .await + .map(|_| ()) + } +} + +/// Map an ingest error into a sink error WITHOUT leaking row data: a ClickHouse +/// `BadResponse` body can echo offending values, so only the leading +/// `Code: NNN` token survives (see [`safe_response_token`]). +fn redact(e: prices_ingest_core::IngestError) -> SinkError { + match &e { + prices_ingest_core::IngestError::Clickhouse(err) => { + SinkError::Write(safe_response_token(&err.to_string())) + } + other => SinkError::Write(other.to_string()), + } +} + +/// In-memory sink for tests and `--dry-run`: counts rows, touches no network. +#[derive(Default)] +pub struct CountingSink { + pub candles: std::sync::atomic::AtomicU64, + pub oracle: std::sync::atomic::AtomicU64, +} + +impl CandleSink for CountingSink { + async fn write_candles(&self, candles: &[OhlcvCandle], _source: &str) -> Result<(), SinkError> { + self.candles + .fetch_add(candles.len() as u64, std::sync::atomic::Ordering::Relaxed); + Ok(()) + } + + async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), SinkError> { + self.oracle + .fetch_add(samples.len() as u64, std::sync::atomic::Ordering::Relaxed); + Ok(()) + } + + async fn write_assets(&self, _registry: &AssetRegistry) -> Result<(), SinkError> { + Ok(()) + } } diff --git a/packages/prices-ledger-processor/tests/reconcile_e2e.rs b/packages/prices-ledger-processor/tests/reconcile_e2e.rs index 5d6a81b..73b84a3 100644 --- a/packages/prices-ledger-processor/tests/reconcile_e2e.rs +++ b/packages/prices-ledger-processor/tests/reconcile_e2e.rs @@ -1,207 +1,123 @@ -use std::sync::Mutex; - -use extractors_core::{SorobanEventRow, VenueRegistry}; -use phoenix_extractor::PhoenixPoolRegistry; +//! End-to-end reconcile test against the bundled real Galexie fixtures. +//! +//! Drives the production pipeline (`prices_ingest_core` decode → extract → +//! bucket) over the three contiguous fixture ledgers 62460540–62460542 using a +//! local-disk fetcher and an in-memory counting sink (no ClickHouse). Proves the +//! doorbell-cursor loop decodes real XDR, advances the cursor to the last +//! contiguous ledger, stops at the gap, and is idempotent on re-run. +//! +//! Fixtures are gitignored (large binary Galexie objects, copied locally), so +//! each test **self-skips** when they are absent — matching the repo's +//! self-skipping integration-test convention (`prices-clickhouse` mtls smoke). + +use std::path::PathBuf; + +use prices_ingest_core::{AssetRegistry, Registries}; use prices_ledger_processor::{ - bucket::OhlcvRow, cursor::{Cursor, StubFileCursor}, - galexie_key::ledger_s3_key, object_fetcher::LocalDiskFetcher, - reconcile::{DecodedLedger, LedgerDecoder, Reconciler}, - sink::{OhlcvSink, SinkError}, + reconcile::Reconciler, + sink::CountingSink, }; -use soroswap_extractor::SoroswapPoolRegistry; use tempfile::tempdir; -struct CaptureSink { - rows: Mutex>, -} +const FIRST_FIXTURE: u64 = 62_460_540; +const LAST_FIXTURE: u64 = 62_460_542; -impl OhlcvSink for CaptureSink { - async fn write(&self, rows: &[OhlcvRow]) -> Result<(), SinkError> { - self.rows.lock().unwrap().extend_from_slice(rows); - Ok(()) - } +fn fixtures_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("fixtures/ledgers") } -/// Returns one empty `DecodedLedger` for each fetched object, with the -/// ledger sequence parsed back out of the `--{seq}.xdr.zst` suffix in -/// the bytes (so the test can wire decode to fixture content trivially). -/// No event groups → no trades → no rows. Tests cursor + fetcher + loop. -struct EmptyDecoder; - -impl LedgerDecoder for EmptyDecoder { - async fn decode(&self, bytes: &[u8]) -> Result, String> { - let seq: u64 = std::str::from_utf8(bytes) - .map_err(|e| e.to_string())? - .trim() - .parse() - .map_err(|e: std::num::ParseIntError| e.to_string())?; - Ok(vec![DecodedLedger { - ledger_sequence: seq, - closed_at_unix_seconds: 1_700_000_000, - event_groups: Vec::new(), - }]) - } +/// The first fixture file must be present, else the test self-skips. +fn fixtures_present() -> bool { + let key = format!("FC47D9FF--62400000-62463999/FC46ED83--{FIRST_FIXTURE}.xdr.zst"); + fixtures_dir().join(key).exists() } -/// Returns one `DecodedLedger` with one event group whose first contract -/// is not in any registry → dispatch returns `Ok(vec![])`. Still no -/// trades, but proves the dispatch path executes. -struct SingleEmptyGroupDecoder; - -impl LedgerDecoder for SingleEmptyGroupDecoder { - async fn decode(&self, bytes: &[u8]) -> Result, String> { - let seq: u64 = std::str::from_utf8(bytes).unwrap().trim().parse().unwrap(); - Ok(vec![DecodedLedger { - ledger_sequence: seq, - closed_at_unix_seconds: 1_700_000_000, - event_groups: vec![vec![SorobanEventRow { - contract_id: "C-unknown".into(), - transaction_id: "T".into(), - ledger_sequence: seq, - event_index: 0, - topics: Vec::new(), - data: extractors_core::TaggedValue::Null, - }]], - }]) - } +macro_rules! skip_if_no_fixtures { + () => { + if !fixtures_present() { + eprintln!( + "skipping: no local fixtures under packages/prices-ledger-processor/fixtures/" + ); + return; + } + }; } -#[tokio::test] -async fn empty_fixture_dir_no_op_returns_zero_persisted() { - let dir = tempdir().unwrap(); - let cursor_path = dir.path().join("cursor.txt"); - let cursor = StubFileCursor::new(&cursor_path); - cursor.write(99).await.unwrap(); - - let reconciler = Reconciler { - fetcher: LocalDiskFetcher::new(dir.path().join("nope")), +fn reconciler( + fixtures: PathBuf, + cursor: StubFileCursor, +) -> Reconciler { + Reconciler::new( + LocalDiskFetcher::new(fixtures), cursor, - sink: CaptureSink { - rows: Mutex::new(Vec::new()), - }, - decoder: EmptyDecoder, - venue_registry: VenueRegistry::new(), - phoenix_registry: PhoenixPoolRegistry::default(), - soroswap_registry: SoroswapPoolRegistry::new(), - }; - - let stats = reconciler.run(8).await.unwrap(); - assert_eq!(stats.start_cursor, 99); - assert_eq!(stats.end_cursor, 99); - assert_eq!(stats.ledgers_persisted, 0); - assert_eq!(stats.rows_emitted, 0); + CountingSink::default(), + AssetRegistry::from_existing(Vec::new()), + Registries::new(), + ) } #[tokio::test] -async fn contiguous_run_advances_cursor_until_gap() { +async fn contiguous_run_decodes_real_fixtures_and_advances_cursor() { + skip_if_no_fixtures!(); let dir = tempdir().unwrap(); - let fixtures = dir.path().join("ledgers"); - // Seed three contiguous "ledgers" 100, 101, 102, then a gap at 103. - for seq in [100u64, 101, 102] { - let key = ledger_s3_key(seq as i64); - let path = fixtures.join(&key); - tokio::fs::create_dir_all(path.parent().unwrap()) - .await - .unwrap(); - tokio::fs::write(&path, format!("{seq}")).await.unwrap(); - } - - let cursor_path = dir.path().join("cursor.txt"); - let cursor = StubFileCursor::new(&cursor_path); - cursor.write(99).await.unwrap(); - - let reconciler = Reconciler { - fetcher: LocalDiskFetcher::new(&fixtures), - cursor, - sink: CaptureSink { - rows: Mutex::new(Vec::new()), - }, - decoder: EmptyDecoder, - venue_registry: VenueRegistry::new(), - phoenix_registry: PhoenixPoolRegistry::default(), - soroswap_registry: SoroswapPoolRegistry::new(), - }; - - let stats = reconciler.run(8).await.unwrap(); - assert_eq!(stats.start_cursor, 99); - assert_eq!(stats.end_cursor, 102); - assert_eq!(stats.ledgers_persisted, 3); - assert_eq!(stats.rows_emitted, 0); + let cursor = StubFileCursor::new(dir.path().join("cursor.txt")); + cursor.write(FIRST_FIXTURE - 1).await.unwrap(); - // Cursor file ends up at 102 — next invocation resumes here. - let cursor = StubFileCursor::new(&cursor_path); - assert_eq!(cursor.read().await.unwrap(), 102); + let stats = reconciler(fixtures_dir(), cursor) + .run(16) + .await + .expect("real-fixture reconcile run should succeed"); + + assert_eq!(stats.start_cursor, FIRST_FIXTURE - 1); + assert_eq!( + stats.end_cursor, LAST_FIXTURE, + "cursor should advance to the last contiguous fixture ledger" + ); + assert_eq!( + stats.ledgers_persisted, 3, + "all three contiguous fixtures should be processed" + ); + + // Cursor file persisted at the last ledger → next invocation resumes here. + let resumed = StubFileCursor::new(dir.path().join("cursor.txt")); + assert_eq!(resumed.read().await.unwrap(), LAST_FIXTURE); } #[tokio::test] -async fn unknown_contract_dispatch_does_not_fail() { +async fn gap_stop_when_no_new_ledger() { + skip_if_no_fixtures!(); let dir = tempdir().unwrap(); - let fixtures = dir.path().join("ledgers"); - let key = ledger_s3_key(200); - let path = fixtures.join(&key); - tokio::fs::create_dir_all(path.parent().unwrap()) - .await - .unwrap(); - tokio::fs::write(&path, "200").await.unwrap(); + let cursor = StubFileCursor::new(dir.path().join("cursor.txt")); + // Start past the last fixture → next key (62460543) is a miss → gap stop. + cursor.write(LAST_FIXTURE).await.unwrap(); - let cursor_path = dir.path().join("cursor.txt"); - let cursor = StubFileCursor::new(&cursor_path); - cursor.write(199).await.unwrap(); + let stats = reconciler(fixtures_dir(), cursor).run(16).await.unwrap(); - let reconciler = Reconciler { - fetcher: LocalDiskFetcher::new(&fixtures), - cursor, - sink: CaptureSink { - rows: Mutex::new(Vec::new()), - }, - decoder: SingleEmptyGroupDecoder, - venue_registry: VenueRegistry::new(), - phoenix_registry: PhoenixPoolRegistry::default(), - soroswap_registry: SoroswapPoolRegistry::new(), - }; - - let stats = reconciler.run(2).await.unwrap(); - assert_eq!(stats.ledgers_persisted, 1); - assert_eq!(stats.end_cursor, 200); + assert_eq!(stats.ledgers_persisted, 0); + assert_eq!(stats.end_cursor, LAST_FIXTURE); + assert_eq!(stats.rows_emitted, 0); } #[tokio::test] async fn idempotent_on_re_run_from_same_cursor() { - let dir = tempdir().unwrap(); - let fixtures = dir.path().join("ledgers"); - let key = ledger_s3_key(50); - let path = fixtures.join(&key); - tokio::fs::create_dir_all(path.parent().unwrap()) - .await - .unwrap(); - tokio::fs::write(&path, "50").await.unwrap(); - - let cursor_path = dir.path().join("cursor.txt"); - + skip_if_no_fixtures!(); let run = || async { - let cursor = StubFileCursor::new(&cursor_path); - cursor.write(49).await.unwrap(); - let sink = CaptureSink { - rows: Mutex::new(Vec::new()), - }; - let reconciler = Reconciler { - fetcher: LocalDiskFetcher::new(&fixtures), - cursor, - sink, - decoder: EmptyDecoder, - venue_registry: VenueRegistry::new(), - phoenix_registry: PhoenixPoolRegistry::default(), - soroswap_registry: SoroswapPoolRegistry::new(), - }; - reconciler.run(8).await.unwrap() + let dir = tempdir().unwrap(); + let cursor = StubFileCursor::new(dir.path().join("cursor.txt")); + cursor.write(FIRST_FIXTURE - 1).await.unwrap(); + reconciler(fixtures_dir(), cursor).run(16).await.unwrap() }; let first = run().await; let second = run().await; + assert_eq!(first.start_cursor, second.start_cursor); assert_eq!(first.end_cursor, second.end_cursor); assert_eq!(first.ledgers_persisted, second.ledgers_persisted); - assert_eq!(first.rows_emitted, second.rows_emitted); + assert_eq!( + first.rows_emitted, second.rows_emitted, + "row count must be deterministic across identical runs" + ); } diff --git a/packages/sdex-backfill/Cargo.toml b/packages/sdex-backfill/Cargo.toml index 24e880f..e29b915 100644 --- a/packages/sdex-backfill/Cargo.toml +++ b/packages/sdex-backfill/Cargo.toml @@ -22,9 +22,5 @@ serde = { workspace = true } serde_json = { workspace = true } rust_decimal = { workspace = true } thiserror = { workspace = true } +prices-ingest-core = { path = "../prices-ingest-core" } prices-clickhouse = { path = "../prices-clickhouse" } -extractors-core = { path = "../extractors-core" } -phoenix-extractor = { path = "../phoenix-extractor" } -soroswap-extractor = { path = "../soroswap-extractor" } -aquarius-extractor = { path = "../aquarius-extractor" } -ledger-processor = { path = "../ledger-processor" } diff --git a/packages/sdex-backfill/src/error.rs b/packages/sdex-backfill/src/error.rs index aba758f..ed9d74a 100644 --- a/packages/sdex-backfill/src/error.rs +++ b/packages/sdex-backfill/src/error.rs @@ -38,4 +38,7 @@ pub enum BackfillError { #[error("clickhouse: {0}")] Clickhouse(#[from] clickhouse::error::Error), + + #[error("ingest: {0}")] + Ingest(#[from] prices_ingest_core::IngestError), } diff --git a/packages/sdex-backfill/src/ingest.rs b/packages/sdex-backfill/src/ingest.rs index 0a6b67e..6ac6638 100644 --- a/packages/sdex-backfill/src/ingest.rs +++ b/packages/sdex-backfill/src/ingest.rs @@ -4,14 +4,13 @@ use std::time::{Duration, Instant}; use tracing::{info, warn}; -use crate::bucket::CandleAccumulator; -use crate::canonical::AssetRegistry; +use prices_ingest_core::{ + AssetRegistry, CandleAccumulator, Registries, extract_trades, process_ledger, raw_trade_to_tick, +}; + use crate::error::BackfillError; -use crate::filter::extract_trades; use crate::partition::Partition; use crate::sink::{OracleSample, Sink}; -use crate::soroban::{Registries, process_ledger}; -use crate::tick::raw_trade_to_tick; const ORACLE_FLUSH_THRESHOLD: usize = 50_000; diff --git a/packages/sdex-backfill/src/main.rs b/packages/sdex-backfill/src/main.rs index 3721a9d..a9ba3ee 100644 --- a/packages/sdex-backfill/src/main.rs +++ b/packages/sdex-backfill/src/main.rs @@ -1,17 +1,11 @@ -mod bucket; -mod canonical; mod cli; mod error; -mod filter; mod ingest; mod obs; mod partition; -mod price; mod run; mod sink; -mod soroban; mod sync; -mod tick; use clap::Parser; diff --git a/packages/sdex-backfill/src/run.rs b/packages/sdex-backfill/src/run.rs index 9a8bd2c..8ff00cf 100644 --- a/packages/sdex-backfill/src/run.rs +++ b/packages/sdex-backfill/src/run.rs @@ -5,12 +5,12 @@ use tokio::process::Command; use tokio::task::JoinHandle; use tracing::{info, warn}; -use crate::canonical::AssetRegistry; +use prices_ingest_core::{AssetRegistry, Registries}; + use crate::error::BackfillError; use crate::ingest::{PartitionStats, index_partition}; use crate::partition::{Partition, partitions_for_range}; use crate::sink::Sink; -use crate::soroban::Registries; use crate::sync::{SyncOutcome, sync_partition}; pub async fn execute( diff --git a/packages/sdex-backfill/src/sink.rs b/packages/sdex-backfill/src/sink.rs index 536b414..54be549 100644 --- a/packages/sdex-backfill/src/sink.rs +++ b/packages/sdex-backfill/src/sink.rs @@ -1,47 +1,74 @@ +//! Backfill ClickHouse sink — a thin wrapper over the shared +//! [`prices_ingest_core::OhlcvWriter`]. +//! +//! The candle / asset / oracle writes (and the asset-registry load) are shared +//! with the live Lambda and live in the core writer, so both paths emit +//! byte-identical `prices.*` rows. This wrapper adds only the **backfill-only** +//! resume bookkeeping against `prices.backfill_sdex_ledgers` (the live Lambda +//! uses its own doorbell cursor instead). `OracleSample` is re-exported so the +//! rest of the backfill keeps importing it from `crate::sink`. + use std::collections::HashSet; -use clickhouse::Client; -use rust_decimal::Decimal; -use serde::{Deserialize, Serialize}; +use clickhouse::Row; +use prices_ingest_core::canonical::AssetIdentity; +use prices_ingest_core::{AssetRegistry, OhlcvCandle, OhlcvWriter}; +use serde::Serialize; use tracing::info; -use crate::bucket::OhlcvCandle; -use crate::canonical::{AssetIdentity, AssetRegistry}; -use crate::error::BackfillError; +pub use prices_ingest_core::OracleSample; -fn decimal_to_i128(d: Decimal) -> i128 { - let d = d.round_dp(14); - // `Decimal(38,14)` holds at most 38 significant digits. AMM amounts/prices - // are i128-derived and can be far larger than SDEX stroops, so a naive - // `mantissa * 10^(14-scale)` can overflow i128 and panic. Saturate instead: - // an out-of-range value is clamped to the representable bound rather than - // aborting the whole backfill. - let factor = 10i128.pow(14 - d.scale()); - d.mantissa().saturating_mul(factor) -} +use crate::error::BackfillError; pub struct Sink { - client: Client, + writer: OhlcvWriter, } impl Sink { pub fn new(url: &str) -> Self { - let client = Client::default().with_url(url); - Self { client } + Self { + writer: OhlcvWriter::plaintext(url), + } } pub async fn preflight(&self) -> Result<(), BackfillError> { - self.client.query("SELECT 1").execute().await?; + self.writer.preflight().await?; + Ok(()) + } + + pub async fn load_assets(&self) -> Result, BackfillError> { + Ok(self.writer.load_assets().await?) + } + + pub async fn write_candles( + &self, + candles: &[OhlcvCandle], + source: &str, + ) -> Result<(), BackfillError> { + self.writer.write_candles(candles, source).await?; + Ok(()) + } + + pub async fn write_assets(&self, registry: &AssetRegistry) -> Result<(), BackfillError> { + self.writer.write_assets(registry).await?; + Ok(()) + } + + pub async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), BackfillError> { + self.writer.write_oracle(samples).await?; Ok(()) } + // --- backfill-only resume bookkeeping (prices.backfill_sdex_ledgers) --- + pub async fn load_completed( &self, start: u32, end: u32, ) -> Result, BackfillError> { let rows = self - .client + .writer + .client() .query( "SELECT sequence FROM prices.backfill_sdex_ledgers \ WHERE sequence BETWEEN ? AND ?", @@ -61,140 +88,14 @@ impl Sink { Ok(set) } - pub async fn load_assets(&self) -> Result, BackfillError> { - let rows = self - .client - .query( - "SELECT asset_id, asset_code, issuer_address, contract_address FROM prices.assets", - ) - .fetch_all::() - .await?; - - let assets: Vec<(u32, AssetIdentity)> = rows - .into_iter() - .map(|r| { - let identity = if !r.contract_address.is_empty() { - AssetIdentity::Contract(r.contract_address) - } else if r.asset_code == "XLM" && r.issuer_address.is_empty() { - AssetIdentity::Native - } else { - AssetIdentity::Credit { - code: r.asset_code, - issuer: r.issuer_address, - } - }; - (r.asset_id, identity) - }) - .collect(); - - info!( - existing_assets = assets.len(), - "loaded asset registry from ClickHouse" - ); - Ok(assets) - } - - pub async fn write_candles( - &self, - candles: &[OhlcvCandle], - source: &str, - ) -> Result<(), BackfillError> { - if candles.is_empty() { - return Ok(()); - } - - let mut insert = self.client.insert("prices.price_ohlcv_1m")?; - - for candle in candles { - insert - .write(&OhlcvRow { - timestamp: candle.minute_start, - asset_id: candle.asset_id, - quote_asset_id: candle.quote_asset_id, - source: source.to_string(), - open: decimal_to_i128(candle.open), - high: decimal_to_i128(candle.high), - low: decimal_to_i128(candle.low), - close: decimal_to_i128(candle.close), - volume_base: decimal_to_i128(candle.volume_base), - volume_quote: decimal_to_i128(candle.volume_quote), - // DEFAULT 0 — the 0026 enrichment Lambda fills this - // (volume_quote_usd = oracle_price * volume_quote). - volume_quote_usd: 0, - // DEFAULT 0 — the enrichment pass fills this (task 0061, - // close_usd = oracle_price * close), same as volume_quote_usd. - close_usd: 0, - vwap: decimal_to_i128(candle.vwap), - trade_count: candle.trade_count, - version: candle.version, - }) - .await?; - } - insert.end().await?; - Ok(()) - } - - pub async fn write_assets(&self, registry: &AssetRegistry) -> Result<(), BackfillError> { - let mut insert = self.client.insert("prices.assets")?; - - for (identity, &id) in registry.assets() { - let (asset_code, asset_type, issuer_address, contract_address) = match identity { - AssetIdentity::Native => { - ("XLM".to_string(), "classic", String::new(), String::new()) - } - AssetIdentity::Credit { code, issuer } => { - (code.clone(), "classic", issuer.clone(), String::new()) - } - AssetIdentity::Contract(addr) => { - (String::new(), "soroban", String::new(), addr.clone()) - } - }; - // The SAC that wraps this classic asset (§12.4) — '' for a pure - // Soroban token. Lets a read-time consumer resolve a SAC-wrapped leg. - let sac_address = registry.sac_address_of(identity).unwrap_or_default(); - - insert - .write(&AssetRow { - asset_id: id, - asset_code, - asset_type: asset_type.to_string(), - issuer_address, - contract_address, - sac_address, - home_domain: String::new(), - is_active: 1, - }) - .await?; - } - insert.end().await?; - Ok(()) - } - - pub async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), BackfillError> { - if samples.is_empty() { - return Ok(()); - } - let mut insert = self.client.insert("prices.oracle_prices")?; - for s in samples { - insert - .write(&OracleRow { - timestamp: s.timestamp, - asset_id: s.asset_id, - oracle_name: s.oracle_name.clone(), - price_usd: s.price_usd, - raw_data: s.raw_data.clone(), - }) - .await?; - } - insert.end().await?; - Ok(()) - } - pub async fn write_completed_ledgers(&self, sequences: &[u32]) -> Result<(), BackfillError> { if sequences.is_empty() { return Ok(()); } - let mut insert = self.client.insert("prices.backfill_sdex_ledgers")?; + let mut insert = self + .writer + .client() + .insert("prices.backfill_sdex_ledgers")?; for &seq in sequences { insert.write(&LedgerRow { sequence: seq }).await?; } @@ -203,66 +104,7 @@ impl Sink { } } -#[derive(Debug, Serialize, clickhouse::Row)] -struct OhlcvRow { - timestamp: u32, - asset_id: u32, - quote_asset_id: u32, - source: String, - open: i128, - high: i128, - low: i128, - close: i128, - volume_base: i128, - volume_quote: i128, - volume_quote_usd: i128, - close_usd: i128, - vwap: i128, - trade_count: u32, - version: u64, -} - -#[derive(Debug, Serialize, clickhouse::Row)] -struct AssetRow { - asset_id: u32, - asset_code: String, - asset_type: String, - issuer_address: String, - contract_address: String, - sac_address: String, - home_domain: String, - is_active: u8, -} - -#[derive(Debug, Serialize, clickhouse::Row)] +#[derive(Debug, Serialize, Row)] struct LedgerRow { sequence: u32, } - -#[derive(Debug, Deserialize, clickhouse::Row)] -struct ExistingAssetRow { - asset_id: u32, - asset_code: String, - issuer_address: String, - contract_address: String, -} - -/// One decoded oracle price sample, ready for `prices.oracle_prices`. -#[derive(Debug, Clone)] -pub struct OracleSample { - pub timestamp: u32, - pub asset_id: u32, - pub oracle_name: String, - /// price_usd scaled to 14 decimals (matches Decimal(38,14)). - pub price_usd: i128, - pub raw_data: String, -} - -#[derive(Debug, Serialize, clickhouse::Row)] -struct OracleRow { - timestamp: u32, - asset_id: u32, - oracle_name: String, - price_usd: i128, - raw_data: String, -} From 673f7759b51a248d9b8563fa2dede4632ed898cc Mon Sep 17 00:00:00 2001 From: karczuRF Date: Wed, 24 Jun 2026 18:42:46 +0200 Subject: [PATCH 14/17] fix(lore-0038): close ledger-processor review findings Address the safe-set findings from the PR #34 review of the live ledger processor: - wire INITIAL_CURSOR (from prices-owned SSM), CURSOR_FILE and MAX_ITERATIONS into the Lambda env so a fresh container can seed its cursor instead of erroring on cursor.read() and DLQ'ing every doorbell - grant kms:Decrypt when ledgerProcessor.bucketKmsKeyArn is set, so an SSE-KMS source bucket does not 403 every GetObject (which the fetcher maps to a hard error, not a gap) - cap the SQS event source at maxConcurrency 2 to curb the reservedConcurrency=1 over-poll/throttle churn that can false-DLQ a processable doorbell - redact ClickHouse BadResponse bodies at the error source in prices-ingest-core so the backfill writer path is leak-safe too, not only the Lambda sink - join the S3 and mTLS client builds and drop the redundant cold-start preflight (load_registry is already the connectivity probe) Findings #1/#3/#5 (minute-split flush, durable cursor, version namespace) are left to follow-up tasks 0064/0065 by design. --- infra/envs/production.json | 3 +- infra/src/lib/stacks/compute-stack.ts | 48 +++++++++++++++++++ infra/src/lib/types.ts | 31 ++++++++++++ packages/prices-ingest-core/src/error.rs | 6 ++- packages/prices-ingest-core/src/lib.rs | 2 + .../src/safe_log.rs | 15 +++++- packages/prices-ledger-processor/src/lib.rs | 4 +- packages/prices-ledger-processor/src/main.rs | 15 ++++-- .../prices-ledger-processor/src/sink/mod.rs | 15 ++---- 9 files changed, 121 insertions(+), 18 deletions(-) rename packages/{prices-ledger-processor => prices-ingest-core}/src/safe_log.rs (72%) diff --git a/infra/envs/production.json b/infra/envs/production.json index c22b0a7..09ea7b8 100644 --- a/infra/envs/production.json +++ b/infra/envs/production.json @@ -15,6 +15,7 @@ "timeoutSeconds": 60, "reservedConcurrency": 1, "sqsBatchSize": 1, - "maxReceiveCount": 10 + "maxReceiveCount": 10, + "maxIterations": 16 } } diff --git a/infra/src/lib/stacks/compute-stack.ts b/infra/src/lib/stacks/compute-stack.ts index df5fa16..11a01e1 100644 --- a/infra/src/lib/stacks/compute-stack.ts +++ b/infra/src/lib/stacks/compute-stack.ts @@ -204,6 +204,18 @@ export class ComputeStack extends cdk.Stack { this, keys.ledgerEventsTopicArn, ); + // Bootstrap ledger for the doorbell cursor (`INITIAL_CURSOR`). The + // reconcile loop's `/tmp` cursor file is empty on a fresh container, so + // without a seed `cursor.read()` errors and every doorbell DLQs — the + // Lambda can never start. Sourced from the prices-owned SSM namespace + // (the operator seeds "live ingestion starts here" at deploy prep, like + // the mTLS secrets) rather than committed config, so it is never a + // stale magic number. One-time bootstrap; superseded by the durable + // CH-backed cursor (task 0064). + const initialCursor = ssm.StringParameter.valueForStringParameter( + this, + `/prices/${envName}/ledger-processor/initial-cursor`, + ); // --------------------------------------------------------------- // SQS DLQ + prices ingest queue (prices-owned doorbell source) @@ -296,6 +308,18 @@ export class ComputeStack extends cdk.Stack { // from the shared mtlsSecretName helper, so it can't drift from the // SecretsStack publication or the operator's create-secret. MTLS_SECRET_NAME: this.ledgerProcessorMtlsSecretName, + // Bootstrap cursor seed. main.rs writes the `/tmp` cursor file + // from this on a fresh container; without it `cursor.read()` errors + // and every doorbell DLQs (the Lambda never starts). + INITIAL_CURSOR: initialCursor, + // Explicit cursor checkpoint path. `/tmp` is the only writable + // Lambda filesystem; matches the Rust default but pinned here so + // the runtime contract is visible. (Per-container ephemeral — + // durable cursor is task 0064.) + CURSOR_FILE: '/tmp/prices-cursor.txt', + // Max contiguous ledgers per reconcile run (bounds fetch+decode + // against the Lambda timeout). + MAX_ITERATIONS: String(lp.maxIterations), // In-memory caching in the secrets extension — repeat reads in // one execution environment hit RAM, not Secrets Manager. PARAMETERS_SECRETS_EXTENSION_CACHE_ENABLED: 'true', @@ -307,10 +331,19 @@ export class ComputeStack extends cdk.Stack { // batches buy nothing under concurrency=1). reportBatchItemFailures // lets the handler fail just the offending doorbell; SQS redelivers // it up to maxReceiveCount, then it lands in the DLQ. + // + // maxConcurrency caps the event-source's poller scaling. By default the + // ESM scales to 5 concurrent batches; with reservedConcurrency=1 the + // other 4 are throttle-rejected and their messages re-enqueue, each + // incrementing receiveCount — under a burst a processable doorbell can + // hit maxReceiveCount and false-DLQ before it is ever handled. 2 is the + // ESM minimum (it cannot equal the reserved 1), so this shrinks the + // over-poll window from 5→2 to complement maxReceiveCount. this.ledgerProcessorFunction.addEventSource( new lambdaEventSources.SqsEventSource(this.ingestQueue, { batchSize: lp.sqsBatchSize, reportBatchItemFailures: true, + maxConcurrency: 2, }), ); @@ -324,6 +357,21 @@ export class ComputeStack extends cdk.Stack { }); ledgerBucket.grantRead(this.ledgerProcessorRole); + // grantRead on a bucket imported by attributes (no `encryptionKey`) + // cannot infer an SSE-KMS key, so it adds no kms:Decrypt. If BE's + // bucket is KMS-encrypted, every GetObject would 403 (AccessDenied) — + // which S3Fetcher maps to a hard error that DLQ's the doorbell, not a + // gap. Grant decrypt explicitly when the key ARN is configured. + if (lp.bucketKmsKeyArn) { + this.ledgerProcessorRole.addToPrincipalPolicy( + new iam.PolicyStatement({ + sid: 'DecryptLedgerObjects', + actions: ['kms:Decrypt'], + resources: [lp.bucketKmsKeyArn], + }), + ); + } + this.ledgerProcessorRole.addToPrincipalPolicy( new iam.PolicyStatement({ sid: 'PublishLagMetric', diff --git a/infra/src/lib/types.ts b/infra/src/lib/types.ts index b955e99..a520c16 100644 --- a/infra/src/lib/types.ts +++ b/infra/src/lib/types.ts @@ -90,6 +90,23 @@ export interface EnvironmentConfig { readonly sqsBatchSize: number; /** SQS redrive threshold before a message lands in the DLQ. */ readonly maxReceiveCount: number; + /** + * Max contiguous ledgers walked per reconcile run (`MAX_ITERATIONS`). + * Bounds one invocation's S3 fetch + decode budget against the Lambda + * timeout; the Rust default is 16. + */ + readonly maxIterations: number; + /** + * KMS key ARN protecting BE's `stellar-ledger-data` bucket, if it is + * SSE-KMS encrypted. When set, the ledger-processor role is granted + * `kms:Decrypt` on this key — `grantRead` on a bucket imported by + * attributes (no `encryptionKey`) does NOT add it, so without this a + * KMS-encrypted bucket returns `AccessDenied` on every `GetObject` + * (which the fetcher maps to a hard error that DLQ's the doorbell, not + * a gap). Leave unset for an SSE-S3 / unencrypted bucket. Confirm with + * BE (task 0038 §C.2). + */ + readonly bucketKmsKeyArn?: string; }; } @@ -190,6 +207,20 @@ export function validateConfig(config: EnvironmentConfig): void { `ledgerProcessor.maxReceiveCount must be a positive integer, got: ${lp.maxReceiveCount}`, ); } + if (!Number.isInteger(lp.maxIterations) || lp.maxIterations < 1) { + errors.push( + `ledgerProcessor.maxIterations must be a positive integer, got: ${lp.maxIterations}`, + ); + } + if ( + lp.bucketKmsKeyArn !== undefined && + (typeof lp.bucketKmsKeyArn !== 'string' || + !lp.bucketKmsKeyArn.startsWith('arn:aws:kms:')) + ) { + errors.push( + `ledgerProcessor.bucketKmsKeyArn, when set, must be a KMS key ARN, got: ${lp.bucketKmsKeyArn}`, + ); + } } if (errors.length > 0) { diff --git a/packages/prices-ingest-core/src/error.rs b/packages/prices-ingest-core/src/error.rs index 35eb587..8a177c1 100644 --- a/packages/prices-ingest-core/src/error.rs +++ b/packages/prices-ingest-core/src/error.rs @@ -13,6 +13,10 @@ pub enum IngestError { #[error("xdr parse: {0}")] Parse(#[from] xdr_parser::ParseError), - #[error("clickhouse: {0}")] + // Self-redacting: a ClickHouse `BadResponse` body can echo offending row + // values, so the `Display` emits only the leading `Code: NNN` / status + // token, never the raw body. Applying it on the shared error means every + // consumer of the writer (live Lambda + SDEX backfill) is leak-safe. + #[error("clickhouse: {}", crate::safe_log::redact_clickhouse(.0))] Clickhouse(#[from] clickhouse::error::Error), } diff --git a/packages/prices-ingest-core/src/lib.rs b/packages/prices-ingest-core/src/lib.rs index bd7a95c..ec80b75 100644 --- a/packages/prices-ingest-core/src/lib.rs +++ b/packages/prices-ingest-core/src/lib.rs @@ -28,6 +28,7 @@ pub mod decode; pub mod error; pub mod filter; pub mod price; +pub mod safe_log; pub mod soroban; pub mod tick; pub mod writer; @@ -38,6 +39,7 @@ pub use decode::{decode_object, ledger_sequence}; pub use error::IngestError; pub use filter::{RawTrade, extract_trades}; pub use price::{compute_price, stroops_to_decimal}; +pub use safe_log::safe_response_token; pub use soroban::{LedgerSoroban, Registries, process_ledger}; pub use tick::{TradeTick, raw_trade_to_tick}; pub use writer::{OhlcvWriter, OracleSample}; diff --git a/packages/prices-ledger-processor/src/safe_log.rs b/packages/prices-ingest-core/src/safe_log.rs similarity index 72% rename from packages/prices-ledger-processor/src/safe_log.rs rename to packages/prices-ingest-core/src/safe_log.rs index 8f8aea3..9c87f5a 100644 --- a/packages/prices-ledger-processor/src/safe_log.rs +++ b/packages/prices-ingest-core/src/safe_log.rs @@ -1,4 +1,4 @@ -//! Error redaction wrappers for log emission. +//! Error redaction for log emission. //! //! Mirrors BE's `safe_error_message` / `safe_bad_response_token` //! (`crates/indexer/src/handler/mod.rs:436-485`). @@ -8,6 +8,12 @@ //! example — its body echoes offending row values into the message). //! Emit fixed labels plus, for HTTP/CH responses, only the leading //! `Code: NNN` or HTTP status token. +//! +//! This lives in the shared core (not the Lambda crate) so the redaction +//! is applied at the *source*: [`IngestError`](crate::IngestError)'s own +//! `Display` routes the ClickHouse variant through here, so every consumer +//! of the shared [`OhlcvWriter`](crate::OhlcvWriter) — the live Lambda +//! *and* the SDEX backfill — is leak-safe without each re-implementing it. /// Extract ONLY the leading code/status token from a wire-error body. /// Returns `"Code: NNN"` for a CH exception body, `"HTTP NNN"` for a @@ -27,6 +33,13 @@ pub fn safe_response_token(msg: &str) -> String { "detail suppressed".to_string() } +/// Redact a ClickHouse client error into a leak-safe label. Used by +/// [`IngestError`](crate::IngestError)'s `Display` so the offending-row +/// body a `BadResponse` echoes never reaches a log line. +pub fn redact_clickhouse(err: &clickhouse::error::Error) -> String { + safe_response_token(&err.to_string()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/packages/prices-ledger-processor/src/lib.rs b/packages/prices-ledger-processor/src/lib.rs index 66b613c..7bb7f2f 100644 --- a/packages/prices-ledger-processor/src/lib.rs +++ b/packages/prices-ledger-processor/src/lib.rs @@ -14,12 +14,12 @@ //! - [`object_fetcher`] — local-disk (fixtures/tests) vs S3 (`lambda` feature). //! - [`cursor`] — the ledger-sequence checkpoint. //! - [`sink`] — the ClickHouse writer (plaintext local vs `aws-mtls` remote). -//! - [`galexie_key`] / [`retry`] / [`safe_log`] — key derivation, backoff, log redaction. +//! - [`galexie_key`] / [`retry`] — key derivation, backoff. (Log redaction now +//! lives at the error source in `prices_ingest_core::safe_log`.) pub mod cursor; pub mod galexie_key; pub mod object_fetcher; pub mod reconcile; pub mod retry; -pub mod safe_log; pub mod sink; diff --git a/packages/prices-ledger-processor/src/main.rs b/packages/prices-ledger-processor/src/main.rs index 2208ea5..9d390a8 100644 --- a/packages/prices-ledger-processor/src/main.rs +++ b/packages/prices-ledger-processor/src/main.rs @@ -65,9 +65,18 @@ async fn main() -> Result<(), Error> { info!(seed, "seeded cursor from INITIAL_CURSOR"); } - let fetcher = S3Fetcher::from_env(&bucket).await; - let sink = ClickHouseSink::from_lambda_env().await?; - sink.preflight().await?; + // Build the S3 fetcher and the mTLS sink concurrently — they are + // independent (ambient AWS config load vs. Secrets-extension fetch + + // mTLS handshake), so joining them shaves their latency off cold start. + let (fetcher, sink) = tokio::join!( + S3Fetcher::from_env(&bucket), + ClickHouseSink::from_lambda_env() + ); + let sink = sink?; + // `load_registry` is the first ClickHouse round-trip, so it already + // surfaces an unreachable cluster as a Lambda Init error — a separate + // preflight `SELECT 1` would just be a redundant extra round-trip on the + // cold path. let registry = sink.load_registry().await?; info!( diff --git a/packages/prices-ledger-processor/src/sink/mod.rs b/packages/prices-ledger-processor/src/sink/mod.rs index c3e17a9..f985478 100644 --- a/packages/prices-ledger-processor/src/sink/mod.rs +++ b/packages/prices-ledger-processor/src/sink/mod.rs @@ -13,7 +13,6 @@ use std::future::Future; use prices_ingest_core::{AssetRegistry, OhlcvCandle, OhlcvWriter, OracleSample}; use crate::retry::{DEFAULT_BACKOFF_MS, retry_with_backoff}; -use crate::safe_log::safe_response_token; #[derive(Debug, thiserror::Error)] pub enum SinkError { @@ -125,16 +124,12 @@ impl CandleSink for ClickHouseSink { } } -/// Map an ingest error into a sink error WITHOUT leaking row data: a ClickHouse -/// `BadResponse` body can echo offending values, so only the leading -/// `Code: NNN` token survives (see [`safe_response_token`]). +/// Map an ingest error into a sink error. `IngestError`'s `Display` is already +/// leak-safe — its ClickHouse variant redacts the `BadResponse` body down to the +/// leading `Code: NNN` / status token (see +/// [`prices_ingest_core::safe_response_token`]) — so this is a plain string map. fn redact(e: prices_ingest_core::IngestError) -> SinkError { - match &e { - prices_ingest_core::IngestError::Clickhouse(err) => { - SinkError::Write(safe_response_token(&err.to_string())) - } - other => SinkError::Write(other.to_string()), - } + SinkError::Write(e.to_string()) } /// In-memory sink for tests and `--dry-run`: counts rows, touches no network. From 64f3a150de9e33495a0a6b4e8345d7fb327a46f6 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Wed, 24 Jun 2026 18:48:14 +0200 Subject: [PATCH 15/17] docs(lore-0038): record PR #34 review findings on 0064/0065 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture the review-confirmed nuances of the deferred findings on their owning follow-up tasks: - 0064 (durable cursor) — finding #3: cold-start rewind/reprocessing and the bootstrap DLQ failure mode, plus the interim INITIAL_CURSOR SSM seed shipped in PR #34 that this task supersedes. - 0065 (re-aggregation) — finding #1: the live path flushes per invocation, so the minute-split is ~one boundary minute per run, not the "negligible" per-partition rate; finding #5: the version = ledger*1000+op scheme can invert across ledgers (AMM op = first_event_index & 0xFFFF up to 65535), which constrains any "higher version wins" merge fix. --- ...E_ch-backed-cursor-for-ledger-processor.md | 22 ++++++++++++++ ...65_FEATURE_periodic-ohlcv-reaggregation.md | 29 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md b/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md index 2881286..c08aaaf 100644 --- a/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md +++ b/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md @@ -13,6 +13,10 @@ history: status: backlog who: oski note: "Spawned from 0038 future work (spec Part D.1)." + - date: 2026-06-24 + status: backlog + who: claude + note: "Added PR #34 review context for finding #3 (cold-start rewind + bootstrap; interim INITIAL_CURSOR SSM seed shipped)." --- # ClickHouse-backed cursor for the Prices Ledger Processor @@ -30,6 +34,24 @@ cursor design is the open question in `G-local-prototype-spec.md` Part D.1. BE's cursor is `max(sequence) FROM default.ledgers`; we only persist pricing-relevant ledgers, so `max(...) FROM prices.price_ohlcv_1m` undercounts. +## Review findings (PR #34 review, 2026-06-24) + +Finding #3 (durable cursor) was confirmed in the PR #34 review, with two +concrete failure modes this task removes: + +- **Cold-start rewind / reprocessing.** `/tmp` is per-container ephemeral. On + every container recycle the cursor is lost and re-seeded from the *static* + `INITIAL_CURSOR`, so the loop rewinds to a fixed ledger and re-walks the + whole `INITIAL_CURSOR..tip` span. Idempotent (RMT), but the redundant S3 + fetch + decode + write is paid on every cold start; if the seed is far + behind it can blow the Lambda timeout and livelock the doorbell. +- **Bootstrap.** Without a seed the loop errors on `cursor.read()` and DLQs + every doorbell. Interim mitigation already shipped in PR #34: `main.rs` + seeds from `INITIAL_CURSOR`, wired in CDK from the prices-owned SSM param + `/prices/{env}/ledger-processor/initial-cursor` (`compute-stack.ts`). This + task supersedes that stop-gap with the durable CH cursor and should retire + the static seed (or keep it only as a genuine first-run bootstrap). + ## Implementation - Lean: own single-row `prices.processed_ledgers` (ReplacingMergeTree, diff --git a/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md b/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md index ad32664..4df882f 100644 --- a/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md +++ b/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md @@ -13,6 +13,10 @@ history: status: backlog who: oski note: "Spawned from 0038 future work (cross-invocation intra-minute merge gap)." + - date: 2026-06-24 + status: backlog + who: claude + note: "Added PR #34 review context: finding #1 (live-path frequency correction) and finding #5 (version-namespace overflow caveat for the merge fix)." --- # Periodic OHLCV re-aggregation for cross-chunk intra-minute candles @@ -34,6 +38,31 @@ not sum — so per-chunk partial candles for a boundary minute don't merge. Negligible-but-real (one minute per chunk boundary). Same root cause for live and backfill since both now use `prices-ingest-core`'s `CandleAccumulator`. +## Review findings (PR #34 review, 2026-06-24) + +**Finding #1 — the live-path frequency is NOT negligible.** "One minute per +chunk boundary" holds for the backfill (large partitions), but the live Lambda +calls `flush_all()` every invocation (`reconcile.rs`), and with +`MAX_ITERATIONS=16` a run spans ~80-96s of ledgers — so a minute boundary +falls inside essentially *every* invocation. That is roughly one corrupted +(under-counted volume / wrong `open`) boundary minute per run in the live path, +not a rare edge. The in-code comment equating it with the backfill's partition +boundaries understates it; the fix is materially more impactful for 0038 than +the "negligible" framing suggests. + +**Finding #5 — the `version` scheme can invert across ledgers, which +constrains the fix.** `version = ledger_seq*1000 + operation_index` +(`bucket.rs`) assumes `operation_index < 1000`, but the AMM path sets it to +`first_event_index & 0xFFFF` (0..65535; `first_event_index` is `u32` in +`extractors-core`). A tx emitting ≥1000 events overflows the per-ledger +namespace, so a *later* ledger's candle can carry a *lower* `version` than an +earlier one. Any re-aggregation that relies on "higher version wins" must not +assume `version` is monotonic in ledger order — either widen the multiplier / +pack `(ledger, event_index)` without truncation, or make the merge +order-independent (Summing/Aggregating engine). Note: changing the version +formula also touches already-written backfill rows, so it needs a migration +decision. + ## Implementation (options to evaluate) - A periodic worker (task 0039 family) that re-reads raw trades/`_1m FINAL` From 2c3980ec5eba8aba489e4c5e0bbb6fe3918c4a93 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Wed, 24 Jun 2026 19:15:03 +0200 Subject: [PATCH 16/17] docs(lore-0038): add deploy-prep checklist (bootstrap cursor SSM + KMS) Document the operator prerequisites surfaced by the PR #34 review: - create SSM param /prices/{env}/ledger-processor/initial-cursor as the one-time bootstrap cursor seed (value = last ledger accounted for; loop resumes at value+1; not 0) - confirm whether the source bucket is SSE-KMS and set ledgerProcessor.bucketKmsKeyArn if so Synth fails fast without the SSM param, so a half-configured Lambda can't ship silently. --- .../README.md | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md index 0cac8ad..6b0bbfd 100644 --- a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md @@ -157,6 +157,17 @@ history: real-fixture e2e (decode→bucket→cursor, gap-stop, idempotent). fmt + clippy clean. **Prepare-only — no deploy, no prod writes** (Part E deploy/cert/Caddy still gated on BE 0227 + task 0047). Stays active. + - date: 2026-06-24 + status: active + who: claude + note: > + Applied the safe-set fixes from the PR #34 review (commit 673f775): + wired INITIAL_CURSOR (prices SSM) / CURSOR_FILE / MAX_ITERATIONS into + the Lambda env, optional kms:Decrypt grant, SQS maxConcurrency=2, + BadResponse redaction moved to the core error source, concurrent + cold-start init. Added a Deploy prerequisites checklist (bootstrap + cursor SSM param + source-bucket KMS confirmation). Findings #1/#3/#5 + annotated on follow-ups 0064/0065. Stays active. --- # Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv @@ -337,6 +348,27 @@ In `infra/aws-cdk/` (created by 0011): the `SwapExtractor` trait surface. Without 0037, this task has no extraction primitive to call. +## Deploy prerequisites (operator) + +> Prepare-only items the operator must complete before / at deploy. Synth +> fails fast if the SSM param below is absent, so a half-configured Lambda +> can't ship silently. + +- [ ] **Bootstrap cursor** — create SSM param + `/prices/{env}/ledger-processor/initial-cursor` (type `String`) with the + ledger live ingestion should resume from. The reconcile loop seeds its + cursor from this on first start and begins at `value + 1`, so set it to the + **last ledger already accounted for**: the SDEX backfill's + `max(sequence) FROM prices.backfill_sdex_ledgers` for a seamless handoff, + or `currentTip − 1` for a forward-only start. Do **not** use `0` (an + empty-table sentinel — would walk from genesis and never catch up). Wired + into the Lambda env from this key in `compute-stack.ts`; one-time + bootstrap, retired by task 0064. (PR #34 review, findings #2/#3.) +- [ ] **Source-bucket KMS** — confirm with BE whether `stellar-ledger-data` + is SSE-KMS encrypted. If so, set `ledgerProcessor.bucketKmsKeyArn` in + `infra/envs/{env}.json` so the role gets `kms:Decrypt`; otherwise every + `GetObject` 403s and the doorbell DLQs. (PR #34 review, finding #4.) + ## Out of scope - SDEX trade extractor body — 0037's skeleton stubs it; the real From cfd0bb66b8ea45238d59b6a8f6056a66d75ba469 Mon Sep 17 00:00:00 2001 From: karczuRF Date: Thu, 25 Jun 2026 09:35:30 +0200 Subject: [PATCH 17/17] =?UTF-8?q?docs(lore-0038):=20resolve=20deploy-prep?= =?UTF-8?q?=20KMS=20item=20=E2=80=94=20bucket=20is=20SSE-S3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BE's production-stellar-ledger-data is SSE-S3 (AES256), not SSE-KMS, per the BE repo ledger-bucket-stack.ts (S3_MANAGED) and BE task 0278, which dropped KMS for SSE-S3 to cut per-object KMS request cost on the public-XDR ingest pipeline. So the ledger-processor leaves bucketKmsKeyArn unset — grantRead's plain S3 perms suffice, no kms:Decrypt and no GetObject 403/DLQ risk. Ticks PR #34 review finding #4 on the deploy prerequisites checklist. --- .../README.md | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md index 6b0bbfd..bbc790c 100644 --- a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md @@ -168,6 +168,17 @@ history: cold-start init. Added a Deploy prerequisites checklist (bootstrap cursor SSM param + source-bucket KMS confirmation). Findings #1/#3/#5 annotated on follow-ups 0064/0065. Stays active. + - date: 2026-06-25 + status: active + who: claude + note: > + Resolved the deploy-prep source-bucket KMS item (finding #4): BE's + production-stellar-ledger-data is SSE-S3 (AES256), not SSE-KMS — + confirmed in the BE repo ledger-bucket-stack.ts (S3_MANAGED) and BE + task 0278 (dropped KMS for SSE-S3 to cut per-object KMS request cost + on the public XDR). So bucketKmsKeyArn stays unset; grantRead's plain + S3 perms suffice, no kms:Decrypt / 403 risk. Checklist item ticked. + Stays active. --- # Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv @@ -364,10 +375,19 @@ In `infra/aws-cdk/` (created by 0011): empty-table sentinel — would walk from genesis and never catch up). Wired into the Lambda env from this key in `compute-stack.ts`; one-time bootstrap, retired by task 0064. (PR #34 review, findings #2/#3.) -- [ ] **Source-bucket KMS** — confirm with BE whether `stellar-ledger-data` - is SSE-KMS encrypted. If so, set `ledgerProcessor.bucketKmsKeyArn` in - `infra/envs/{env}.json` so the role gets `kms:Decrypt`; otherwise every - `GetObject` 403s and the doorbell DLQs. (PR #34 review, finding #4.) +- [x] **Source-bucket KMS** — **resolved: not SSE-KMS, leave + `bucketKmsKeyArn` unset.** BE's `production-stellar-ledger-data` is + **SSE-S3 (AES256)**, confirmed in the BE repo + (`infra/src/lib/stacks/ledger-bucket-stack.ts:36`, + `s3.BucketEncryption.S3_MANAGED`) and BE task **0278** (completed + 2026-06-02): KMS was dropped for SSE-S3 to kill the per-object KMS + request cost on the high-volume ingest pipeline, since the XDR is public + on-chain data. So `grantRead`'s plain S3 perms suffice — no `kms:Decrypt` + needed and no `GetObject` 403/DLQ risk. Pre-0278 objects keep their old + SSE-KMS under the AWS-managed `aws/s3` key, still readable by a + same-account principal with S3 access (no explicit `kms:*` IAM required). + Our `bucketKmsKeyArn` slot is for a *customer* KMS key, which this bucket + has none of. (PR #34 review, finding #4.) ## Out of scope