diff --git a/.trash/0038-lambda-prototype/bucket.rs b/.trash/0038-lambda-prototype/bucket.rs new file mode 100644 index 0000000..d1e6cdb --- /dev/null +++ b/.trash/0038-lambda-prototype/bucket.rs @@ -0,0 +1,290 @@ +//! 1-minute OHLCV bucketing. +//! +//! Per ADR 0004 §Decision: incremental-merge update preserves `open`, +//! overwrites `close`, takes `GREATEST(high)` / `LEAST(low)`, sums +//! `volume_base` / `volume_quote` / `trade_count`, and accumulates +//! VWAP numerator/denominator pairs. +//! +//! Prototype simplifications (flagged for the BE meeting): +//! - Canonical `(base, quote)` is the lexicographically smaller / +//! larger of `(token_in, token_out)`. Production policy may differ. +//! - Prices are `f64`. Production may want a fixed-point or rational +//! representation; the merge formula is identical either way. + +use std::collections::HashMap; + +use extractors_core::{TradeRow, Venue}; +use serde::Serialize; + +const GRANULARITY_ONE_MINUTE: &str = "1m"; +const ONE_MINUTE_SECS: i64 = 60; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] +pub struct OhlcvKey { + pub timestamp_minute: i64, + pub asset_id: String, + pub granularity: String, + pub quote_asset_id: String, + pub source: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct OhlcvRow { + pub key: OhlcvKey, + pub open: f64, + pub high: f64, + pub low: f64, + pub close: f64, + pub volume_base: i128, + pub volume_quote: i128, + pub trade_count: u64, + pub vwap_numerator: f64, + pub vwap_denominator: i128, +} + +impl OhlcvRow { + fn merge(&mut self, price: f64, volume_base: i128, volume_quote: i128) { + if price > self.high { + self.high = price; + } + if price < self.low { + self.low = price; + } + self.close = price; + self.volume_base = self.volume_base.saturating_add(volume_base); + self.volume_quote = self.volume_quote.saturating_add(volume_quote); + self.trade_count += 1; + self.vwap_numerator += price * (volume_quote as f64); + self.vwap_denominator = self.vwap_denominator.saturating_add(volume_quote); + } + + pub fn vwap(&self) -> Option { + if self.vwap_denominator == 0 { + None + } else { + Some(self.vwap_numerator / (self.vwap_denominator as f64)) + } + } +} + +pub struct Bucketer { + by_key: HashMap, +} + +impl Bucketer { + pub fn new() -> Self { + Self { + by_key: HashMap::new(), + } + } + + pub fn ingest(&mut self, closed_at_unix_seconds: i64, trade: &TradeRow) { + let (asset_id, quote_asset_id, amount_base, amount_quote) = canonical_pair( + &trade.token_in, + &trade.token_out, + trade.amount_in, + trade.amount_out, + ); + if amount_base == 0 { + return; + } + let price = (amount_quote as f64) / (amount_base as f64); + let key = OhlcvKey { + timestamp_minute: floor_to_minute(closed_at_unix_seconds), + asset_id, + granularity: GRANULARITY_ONE_MINUTE.to_string(), + quote_asset_id, + source: venue_to_source(&trade.venue).to_string(), + }; + self.by_key + .entry(key.clone()) + .and_modify(|row| row.merge(price, amount_base, amount_quote)) + .or_insert_with(|| OhlcvRow { + key, + open: price, + high: price, + low: price, + close: price, + volume_base: amount_base, + volume_quote: amount_quote, + trade_count: 1, + vwap_numerator: price * (amount_quote as f64), + vwap_denominator: amount_quote, + }); + } + + pub fn drain(&mut self) -> Vec { + let mut rows: Vec = self.by_key.drain().map(|(_, v)| v).collect(); + rows.sort_by(|a, b| { + a.key + .timestamp_minute + .cmp(&b.key.timestamp_minute) + .then_with(|| a.key.asset_id.cmp(&b.key.asset_id)) + .then_with(|| a.key.quote_asset_id.cmp(&b.key.quote_asset_id)) + .then_with(|| a.key.source.cmp(&b.key.source)) + }); + rows + } +} + +impl Default for Bucketer { + fn default() -> Self { + Self::new() + } +} + +fn floor_to_minute(unix_seconds: i64) -> i64 { + (unix_seconds / ONE_MINUTE_SECS) * ONE_MINUTE_SECS +} + +fn canonical_pair( + token_in: &str, + token_out: &str, + amount_in: i128, + amount_out: i128, +) -> (String, String, i128, i128) { + if token_in <= token_out { + ( + token_in.to_string(), + token_out.to_string(), + amount_in, + amount_out, + ) + } else { + ( + token_out.to_string(), + token_in.to_string(), + amount_out, + amount_in, + ) + } +} + +fn venue_to_source(v: &Venue) -> &'static str { + match v { + Venue::Soroswap => "soroswap", + Venue::Aquarius => "aquarius", + Venue::Phoenix => "phoenix", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use extractors_core::Venue; + + fn trade( + venue: Venue, + token_in: &str, + token_out: &str, + amount_in: i128, + amount_out: i128, + ) -> TradeRow { + TradeRow { + venue, + contract_id: "C".into(), + transaction_id: "T".into(), + ledger_sequence: 1, + first_event_index: 0, + token_in: token_in.into(), + token_out: token_out.into(), + amount_in, + amount_out, + fee: None, + trader: None, + } + } + + #[test] + fn floor_to_minute_rounds_down() { + assert_eq!(floor_to_minute(0), 0); + assert_eq!(floor_to_minute(59), 0); + assert_eq!(floor_to_minute(60), 60); + assert_eq!(floor_to_minute(125), 120); + } + + #[test] + fn canonical_pair_orders_lexicographically() { + let (b, q, ab, aq) = canonical_pair("USDC", "XLM", 100, 200); + assert_eq!((b.as_str(), q.as_str(), ab, aq), ("USDC", "XLM", 100, 200)); + + let (b, q, ab, aq) = canonical_pair("XLM", "USDC", 100, 200); + assert_eq!((b.as_str(), q.as_str(), ab, aq), ("USDC", "XLM", 200, 100)); + } + + #[test] + fn single_trade_seeds_open_high_low_close_equal() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + let rows = b.drain(); + assert_eq!(rows.len(), 1); + let r = &rows[0]; + assert_eq!(r.open, 5.0); + assert_eq!(r.high, 5.0); + assert_eq!(r.low, 5.0); + assert_eq!(r.close, 5.0); + assert_eq!(r.volume_base, 10); + assert_eq!(r.volume_quote, 50); + assert_eq!(r.trade_count, 1); + assert_eq!(r.vwap(), Some(5.0)); + } + + #[test] + fn merges_two_trades_same_bucket() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); // price=5 + b.ingest(1_700_000_030, &trade(Venue::Phoenix, "USDC", "XLM", 5, 30)); // price=6 + let rows = b.drain(); + assert_eq!(rows.len(), 1); + let r = &rows[0]; + assert_eq!(r.open, 5.0); + assert_eq!(r.close, 6.0); + assert_eq!(r.high, 6.0); + assert_eq!(r.low, 5.0); + assert_eq!(r.volume_base, 15); + assert_eq!(r.volume_quote, 80); + assert_eq!(r.trade_count, 2); + // VWAP = (5*50 + 6*30) / (50+30) = (250 + 180) / 80 = 430/80 = 5.375 + let vwap = r.vwap().unwrap(); + assert!((vwap - 5.375).abs() < 1e-9); + } + + #[test] + fn different_minute_separate_buckets() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + b.ingest(1_700_000_090, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + assert_eq!(b.drain().len(), 2); + } + + #[test] + fn reverse_direction_same_pair_merges() { + // A swap USDC→XLM at price 5, then a swap XLM→USDC at amount_in=30, amount_out=5 + // → canonical pair is still (USDC, XLM) but with flipped base/quote on the input. + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + b.ingest(1_700_000_030, &trade(Venue::Phoenix, "XLM", "USDC", 30, 5)); + let rows = b.drain(); + assert_eq!(rows.len(), 1, "reverse direction must collapse to one key"); + let r = &rows[0]; + assert_eq!(r.trade_count, 2); + } + + #[test] + fn different_source_separate_buckets() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 10, 50)); + b.ingest( + 1_700_000_000, + &trade(Venue::Soroswap, "USDC", "XLM", 10, 50), + ); + assert_eq!(b.drain().len(), 2); + } + + #[test] + fn zero_amount_in_skipped() { + let mut b = Bucketer::new(); + b.ingest(1_700_000_000, &trade(Venue::Phoenix, "USDC", "XLM", 0, 50)); + assert_eq!(b.drain().len(), 0); + } +} diff --git a/.trash/0038-lambda-prototype/decode.rs b/.trash/0038-lambda-prototype/decode.rs new file mode 100644 index 0000000..b703342 --- /dev/null +++ b/.trash/0038-lambda-prototype/decode.rs @@ -0,0 +1,293 @@ +//! XDR ledger decode + Soroban event walk. +//! +//! Wraps BE's `xdr_parser` crate: `decompress_zstd` → deserialize +//! `LedgerCloseMetaBatch` → per-ledger `extract_ledger` + per-tx +//! `extract_events`. The adapter converts xdr-parser's tagged JSON +//! representation of ScVal into the kernel's `TaggedValue` enum. +//! +//! Filter policy: only `EventSource::TxLevel` and `EventSource::PerOp` +//! (Protocol 23+) events are kept. Diagnostic events are dropped — +//! they can include byte-identical Contract-typed mirrors of consensus +//! events (BE task 0182), which would double-count. + +use std::collections::HashMap; + +use extractors_core::{SorobanEventRow, TaggedValue}; +use stellar_xdr::curr::{LedgerCloseMeta, LedgerCloseMetaBatch, Limits, ReadXdr, TransactionMeta}; +use xdr_parser::{decompress_zstd, extract_events, extract_ledger, types::EventSource}; + +use crate::reconcile::{DecodedLedger, LedgerDecoder}; + +pub struct XdrLedgerDecoder; + +impl LedgerDecoder for XdrLedgerDecoder { + async fn decode(&self, bytes: &[u8]) -> Result, String> { + let decompressed = decompress_zstd(bytes).map_err(|e| format!("decompress: {e:?}"))?; + + let batch = LedgerCloseMetaBatch::from_xdr(decompressed.as_slice(), Limits::none()) + .map_err(|e| format!("deserialize batch: {e}"))?; + + let mut out = Vec::with_capacity(batch.ledger_close_metas.len()); + for meta in batch.ledger_close_metas.iter() { + let header = extract_ledger(meta); + let event_groups = walk_ledger_events(meta, header.sequence, header.closed_at); + out.push(DecodedLedger { + ledger_sequence: header.sequence as u64, + closed_at_unix_seconds: header.closed_at, + event_groups, + }); + } + Ok(out) + } +} + +fn walk_ledger_events( + meta: &LedgerCloseMeta, + ledger_seq: u32, + closed_at: i64, +) -> Vec> { + let mut by_group: HashMap<(String, String), Vec> = HashMap::new(); + + for (tx_hash, tx_meta) in iter_tx_metas(meta) { + for evt in extract_events(tx_meta, &tx_hash, ledger_seq, closed_at) { + if !matches!(evt.source, EventSource::TxLevel | EventSource::PerOp) { + continue; + } + let Some(contract_id) = evt.contract_id.clone() else { + continue; + }; + let topics = match evt.topics { + serde_json::Value::Array(arr) => arr.iter().map(json_to_tagged).collect(), + _ => Vec::new(), + }; + let row = SorobanEventRow { + contract_id: contract_id.clone(), + transaction_id: evt.transaction_hash.clone(), + ledger_sequence: ledger_seq as u64, + event_index: evt.event_index, + topics, + data: json_to_tagged(&evt.data), + }; + by_group + .entry((evt.transaction_hash, contract_id)) + .or_default() + .push(row); + } + } + + // Stable order within each group: by event_index. Order across + // groups is HashMap-iteration order, which is acceptable because + // dispatch is per-group and the bucketer is commutative for + // distinct (timestamp, asset_id, source) keys. + let mut groups: Vec> = by_group.into_values().collect(); + for g in groups.iter_mut() { + g.sort_by_key(|r| r.event_index); + } + groups +} + +fn iter_tx_metas(meta: &LedgerCloseMeta) -> Vec<(String, &TransactionMeta)> { + match meta { + LedgerCloseMeta::V0(v) => v + .tx_processing + .iter() + .map(|p| { + ( + hex::encode(p.result.transaction_hash.0), + &p.tx_apply_processing, + ) + }) + .collect(), + LedgerCloseMeta::V1(v) => v + .tx_processing + .iter() + .map(|p| { + ( + hex::encode(p.result.transaction_hash.0), + &p.tx_apply_processing, + ) + }) + .collect(), + LedgerCloseMeta::V2(v) => v + .tx_processing + .iter() + .map(|p| { + ( + hex::encode(p.result.transaction_hash.0), + &p.tx_apply_processing, + ) + }) + .collect(), + } +} + +/// Convert one `{"type": "...", "value": ...}` tagged JSON node into a +/// `TaggedValue`. Types we don't yet handle (bool, u32, bytes, error, …) +/// collapse to `Null` — the kernel's Phoenix XYK extractor only inspects +/// sym/address/i128/vec/map shapes, so this is sufficient for the +/// extractors wired in by task 0037. Unsupported types become visible +/// to future extractors as `Null` and will need adapter extensions. +pub(crate) fn json_to_tagged(v: &serde_json::Value) -> TaggedValue { + let Some(obj) = v.as_object() else { + return TaggedValue::Null; + }; + let type_name = obj.get("type").and_then(|t| t.as_str()).unwrap_or(""); + let value = obj.get("value").unwrap_or(&serde_json::Value::Null); + match type_name { + "sym" => value + .as_str() + .map(|s| TaggedValue::Symbol(s.to_string())) + .unwrap_or(TaggedValue::Null), + "string" => value + .as_str() + .map(|s| TaggedValue::String(s.to_string())) + .unwrap_or(TaggedValue::Null), + "address" => value + .as_str() + .map(|s| TaggedValue::Address(s.to_string())) + .unwrap_or(TaggedValue::Null), + "i128" => value + .as_str() + .and_then(|s| s.parse::().ok()) + .map(TaggedValue::I128) + .unwrap_or(TaggedValue::Null), + "u128" => value + .as_str() + .and_then(|s| s.parse::().ok()) + .and_then(|u| i128::try_from(u).ok()) + .map(TaggedValue::I128) + .unwrap_or(TaggedValue::Null), + "vec" => match value.as_array() { + Some(arr) => TaggedValue::Vec(arr.iter().map(json_to_tagged).collect()), + None => TaggedValue::Null, + }, + "map" => match value.as_array() { + Some(arr) => TaggedValue::Map( + arr.iter() + .filter_map(|e| { + let k = e.get("key")?; + let v = e.get("value")?; + Some((json_to_tagged(k), json_to_tagged(v))) + }) + .collect(), + ), + None => TaggedValue::Null, + }, + _ => TaggedValue::Null, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn sym_address_i128_roundtrip() { + assert_eq!( + json_to_tagged(&json!({"type": "sym", "value": "swap"})), + TaggedValue::Symbol("swap".into()) + ); + assert_eq!( + json_to_tagged(&json!({"type": "address", "value": "CABCDEF"})), + TaggedValue::Address("CABCDEF".into()) + ); + assert_eq!( + json_to_tagged(&json!({"type": "i128", "value": "-12345"})), + TaggedValue::I128(-12_345) + ); + assert_eq!( + json_to_tagged(&json!({"type": "u128", "value": "12345"})), + TaggedValue::I128(12_345) + ); + } + + #[test] + fn vec_recursively_adapts() { + let v = json!({ + "type": "vec", + "value": [ + {"type": "sym", "value": "swap"}, + {"type": "address", "value": "CPOOL"}, + {"type": "address", "value": "CTRADER"}, + ], + }); + match json_to_tagged(&v) { + TaggedValue::Vec(items) => { + assert_eq!(items.len(), 3); + assert_eq!(items[0], TaggedValue::Symbol("swap".into())); + assert_eq!(items[1], TaggedValue::Address("CPOOL".into())); + } + other => panic!("expected Vec, got {other:?}"), + } + } + + #[test] + fn map_adapts_key_value_pairs() { + let v = json!({ + "type": "map", + "value": [ + {"key": {"type": "sym", "value": "token_in"}, + "value": {"type": "address", "value": "CXLM"}}, + {"key": {"type": "sym", "value": "amount_in"}, + "value": {"type": "i128", "value": "1000"}}, + ], + }); + match json_to_tagged(&v) { + TaggedValue::Map(pairs) => { + assert_eq!(pairs.len(), 2); + assert_eq!(pairs[0].0, TaggedValue::Symbol("token_in".into())); + assert_eq!(pairs[0].1, TaggedValue::Address("CXLM".into())); + assert_eq!(pairs[1].1, TaggedValue::I128(1000)); + } + other => panic!("expected Map, got {other:?}"), + } + } + + #[test] + fn nested_map_in_vec() { + let v = json!({ + "type": "vec", + "value": [{ + "type": "map", + "value": [{ + "key": {"type": "sym", "value": "k"}, + "value": {"type": "i128", "value": "1"}, + }], + }], + }); + let out = json_to_tagged(&v); + if let TaggedValue::Vec(items) = out { + assert!(matches!(items[0], TaggedValue::Map(_))); + } else { + panic!("expected outer Vec"); + } + } + + #[test] + fn unsupported_type_falls_back_to_null() { + assert_eq!( + json_to_tagged(&json!({"type": "bool", "value": true})), + TaggedValue::Null + ); + assert_eq!( + json_to_tagged(&json!({"type": "bytes", "value": "deadbeef"})), + TaggedValue::Null + ); + } + + #[test] + fn malformed_i128_falls_back_to_null() { + assert_eq!( + json_to_tagged(&json!({"type": "i128", "value": "not a number"})), + TaggedValue::Null + ); + } + + #[test] + fn missing_type_field_is_null() { + assert_eq!(json_to_tagged(&json!({"value": "x"})), TaggedValue::Null); + assert_eq!(json_to_tagged(&json!(null)), TaggedValue::Null); + assert_eq!(json_to_tagged(&json!("bare-string")), TaggedValue::Null); + } +} diff --git a/.trash/0038-lambda-prototype/sink/sql_file.rs b/.trash/0038-lambda-prototype/sink/sql_file.rs new file mode 100644 index 0000000..5e24fb7 --- /dev/null +++ b/.trash/0038-lambda-prototype/sink/sql_file.rs @@ -0,0 +1,66 @@ +use std::path::{Path, PathBuf}; + +use super::{OhlcvSink, SinkError}; +use crate::bucket::OhlcvRow; + +/// Emits one `INSERT INTO prices.price_ohlcv ...` statement per row. +/// Production replaces this with a single `INSERT INTO prices.price_ohlcv ... +/// VALUES (...)` batched through `clickhouse::Client::insert`; the per-row +/// form here is what BE reads in the meeting to confirm the column shape. +pub struct SqlFileSink { + out_dir: PathBuf, +} + +impl SqlFileSink { + pub fn new(out_dir: impl AsRef) -> Self { + Self { + out_dir: out_dir.as_ref().to_path_buf(), + } + } +} + +impl OhlcvSink for SqlFileSink { + async fn write(&self, rows: &[OhlcvRow]) -> Result<(), SinkError> { + if rows.is_empty() { + return Ok(()); + } + tokio::fs::create_dir_all(&self.out_dir) + .await + .map_err(|e| SinkError::Write(e.to_string()))?; + let path = self.out_dir.join(format!( + "ohlcv-{}.sql", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis()) + .unwrap_or(0) + )); + let mut sql = String::new(); + for row in rows { + sql.push_str(&row_to_insert(row)); + sql.push('\n'); + } + tokio::fs::write(&path, sql) + .await + .map_err(|e| SinkError::Write(e.to_string())) + } +} + +fn row_to_insert(r: &OhlcvRow) -> String { + format!( + "INSERT INTO prices.price_ohlcv (timestamp, asset_id, granularity, quote_asset_id, source, open, high, low, close, volume_base, volume_quote, trade_count, vwap_num, vwap_den) VALUES ({ts}, '{aid}', '{gr}', '{qaid}', '{src}', {o}, {h}, {l}, {c}, {vb}, {vq}, {tc}, {vn}, {vd});", + ts = r.key.timestamp_minute, + aid = r.key.asset_id, + gr = r.key.granularity, + qaid = r.key.quote_asset_id, + src = r.key.source, + o = r.open, + h = r.high, + l = r.low, + c = r.close, + vb = r.volume_base, + vq = r.volume_quote, + tc = r.trade_count, + vn = r.vwap_numerator, + vd = r.vwap_denominator, + ) +} diff --git a/.trash/0038-lambda-prototype/sink/stdout.rs b/.trash/0038-lambda-prototype/sink/stdout.rs new file mode 100644 index 0000000..cdc87cf --- /dev/null +++ b/.trash/0038-lambda-prototype/sink/stdout.rs @@ -0,0 +1,14 @@ +use super::{OhlcvSink, SinkError}; +use crate::bucket::OhlcvRow; + +pub struct StdoutJsonSink; + +impl OhlcvSink for StdoutJsonSink { + async fn write(&self, rows: &[OhlcvRow]) -> Result<(), SinkError> { + for row in rows { + let line = serde_json::to_string(row).map_err(|e| SinkError::Write(e.to_string()))?; + println!("{line}"); + } + Ok(()) + } +} diff --git a/Cargo.lock b/Cargo.lock index c85138d..9b59690 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,6 +22,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -88,6 +94,15 @@ dependencies = [ "extractors-core", ] +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + [[package]] name = "arrayvec" version = "0.7.6" @@ -128,6 +143,49 @@ version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" +[[package]] +name = "aws-config" +version = "1.8.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33f815b73a3899c03b380d543532e5865f230dce9678d108dc10732a8682275" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.4.0", + "sha1 0.10.6", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + [[package]] name = "aws-lc-rs" version = "1.17.0" @@ -150,12 +208,454 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "aws-runtime" +version = "1.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c9b9de216a988dd54b754a82a7660cfe14cee4f6782ae4524470972fa0ccb39" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-s3" +version = "1.137.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd7213994e2ff9382ff100403b78c30d1b74cdfcd8fa9d0d1dc3a94a5c4874" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac 0.13.0", + "http 0.2.12", + "http 1.4.0", + "http-body 1.0.1", + "lru", + "percent-encoding", + "regex-lite", + "sha2 0.11.0", + "tracing", + "url", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.102.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c82b3ac19f1431854f7ace3a7531674633e286bfdde21976893bfee36fd493b" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.104.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "321000d2b4c5519ee573f73167f612efd7329322d9b26969ad1979f0427f1913" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.107.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d0d328ba962af23ecfa3c9f23b98d3d35e325fa218d7f13d17a6bf522f8a560" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bae38512beae0ffee7010fc24e7a8a123c53efdfef42a61e80fda4882418dc71" +dependencies = [ + "aws-credential-types", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "crypto-bigint", + "form_urlencoded", + "hex", + "hmac 0.13.0", + "http 0.2.12", + "http 1.4.0", + "p256", + "percent-encoding", + "sha2 0.11.0", + "subtle", + "time", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-checksums" +version = "0.64.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9e8e65f4f81fcccdeb6c3eca2af17ac21d421a1786a26a394aecf421d616d3a" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "md-5", + "pin-project-lite", + "sha1 0.11.0", + "sha2 0.11.0", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78d8391e65fcea47c586a22e1a41f173b38615b112b2c6b7a44e80cec3e6b706" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231" +dependencies = [ + "aws-smithy-eventstream", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3ef8931ad1c98aa6a55b4256f847f3116090819844e0dd41ea682cac5dd2d3" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.15", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.9.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.9", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.40", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower 0.5.3", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.62.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "701a947f4797e52a911e114a898667c746c39feea467bbd1abd7b3721f702ffa" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e6f5caf6fea86f8c2206541ab5857cfcda9013426cdbe8fa0098b9e2d32182" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9db177daa6ba8afb9ee1aefcf548c907abcf52065e394ee11a92780057fe0e8c" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api-macros", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-runtime-api-macros" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7396fd9500589e62e460e987ecb671bad374934e55ec3b5f498cc7a8a8a7b7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "aws-smithy-schema" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7442cb268338f0eb8278140a107c046756aa01093d8ef5e99628d34ae09c94f5" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "http 1.4.0", +] + +[[package]] +name = "aws-smithy-types" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b42fcf341259d85ca10fac9a2f6448a8ec691c6955a18e45bc3b71a85fab85" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d16bf10b03a3c01e6b3b7d47cd964e873ffe9e7d4e80fad16bd4c077cb068531" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "aws_lambda_events" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "144ec7565561115498a288850cc6a42b279e09b6c4b88f623eecb9c8ca96c08c" +dependencies = [ + "base64", + "serde", + "serde_json", + "serde_with", +] + +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bitflags" version = "2.11.1" @@ -183,6 +683,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2f6c7dbe95a6ed67ad9f18e57daf93a2f034c524b99fd2b76d18fdfeb6660aa" +dependencies = [ + "hybrid-array", +] + [[package]] name = "borsh" version = "1.6.1" @@ -207,6 +716,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "bs58" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4" +dependencies = [ + "tinyvec", +] + [[package]] name = "bstr" version = "1.12.1" @@ -250,6 +768,16 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + [[package]] name = "cc" version = "1.2.62" @@ -358,7 +886,7 @@ dependencies = [ "futures", "futures-channel", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "lz4_flex", "quanta", @@ -392,12 +920,40 @@ dependencies = [ "cc", ] +[[package]] +name = "cmov" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c9ea0ac24bc397ab3c98583a3c9ba74fa56b09a4449bbe172b9b1ddb016027a" + [[package]] name = "colorchoice" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -408,43 +964,156 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" name = "cpufeatures" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crate-git-revision" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c521bf1f43d31ed2f73441775ed31935d77901cb3451e44b38a1c1612fcbaf98" +dependencies = [ + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "crc-fast" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e75b2483e97a5a7da73ac68a05b629f9c53cff58d8ed1c77866079e18b00dba5" +dependencies = [ + "digest 0.10.7", + "spin", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "generic-array", + "rand_core", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "crypto-common" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "ctutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" +dependencies = [ + "cmov", +] + +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" dependencies = [ - "libc", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.117", ] [[package]] -name = "crate-git-revision" -version = "0.0.6" +name = "darling_macro" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c521bf1f43d31ed2f73441775ed31935d77901cb3451e44b38a1c1612fcbaf98" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ - "serde", - "serde_derive", - "serde_json", + "darling_core", + "quote", + "syn 2.0.117", ] [[package]] -name = "crossbeam-utils" -version = "0.8.21" +name = "data-encoding" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" [[package]] -name = "crypto-common" -version = "0.1.7" +name = "der" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ - "generic-array", - "typenum", + "const-oid 0.9.6", + "pem-rfc7468", + "zeroize", ] [[package]] -name = "data-encoding" -version = "2.11.0" +name = "deranged" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", + "serde_core", +] [[package]] name = "digest" @@ -452,8 +1121,22 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "crypto-common", + "block-buffer 0.10.4", + "const-oid 0.9.6", + "crypto-common 0.1.7", + "subtle", +] + +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.1", + "const-oid 0.10.2", + "crypto-common 0.2.2", + "ctutils", ] [[package]] @@ -485,21 +1168,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" [[package]] -name = "enrichment-worker" -version = "0.1.0" +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" dependencies = [ - "clap", - "clickhouse", - "lambda_runtime", - "prices-clickhouse", - "rust_decimal", - "serde", - "serde_json", - "tempfile", - "thiserror 2.0.18", - "tokio", - "tracing", - "tracing-subscriber", + "der", + "digest 0.10.7", + "elliptic-curve", + "rfc6979", + "signature", + "spki", +] + +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + +[[package]] +name = "elliptic-curve" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct", + "crypto-bigint", + "digest 0.10.7", + "ff", + "generic-array", + "group", + "pem-rfc7468", + "pkcs8", + "rand_core", + "sec1", + "subtle", + "zeroize", ] [[package]] @@ -543,12 +1254,34 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" +[[package]] +name = "ff" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +dependencies = [ + "rand_core", + "subtle", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -666,6 +1399,7 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", + "zeroize", ] [[package]] @@ -691,6 +1425,55 @@ dependencies = [ "wasip2", ] +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff", + "rand_core", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap 2.14.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb093c84e8bd9b188d4c4a8cb6579fc016968d14c99882163cd3ff402a4f155" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", + "indexmap 2.14.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -700,6 +1483,17 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "hashbrown" version = "0.17.1" @@ -718,6 +1512,35 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.7", +] + +[[package]] +name = "hmac" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" +dependencies = [ + "digest 0.11.3", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.4.0" @@ -728,6 +1551,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -735,7 +1569,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.4.0", ] [[package]] @@ -746,8 +1580,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -757,7 +1591,7 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f056c8559e3757392c8d091e796416e4649d8e49e88b8d76df6c002f05027fd" dependencies = [ - "http", + "http 1.4.0", "serde", ] @@ -767,6 +1601,45 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", +] + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.9.0" @@ -777,8 +1650,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "http", - "http-body", + "h2 0.4.15", + "http 1.4.0", + "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", @@ -787,18 +1661,34 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ - "http", - "hyper", + "http 1.4.0", + "hyper 1.9.0", "hyper-util", - "rustls", + "rustls 0.23.40", + "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", ] @@ -812,14 +1702,14 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", - "hyper", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.9.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -931,6 +1821,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.1.0" @@ -952,6 +1848,17 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + [[package]] name = "indexmap" version = "2.14.0" @@ -960,6 +1867,8 @@ checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", "hashbrown 0.17.1", + "serde", + "serde_core", ] [[package]] @@ -1012,11 +1921,11 @@ dependencies = [ "base64", "bytes", "futures", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "http-serde", - "hyper", + "hyper 1.9.0", "hyper-util", "lambda_runtime_api_client", "pin-project", @@ -1039,10 +1948,10 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "tokio", "tower 0.4.13", @@ -1101,6 +2010,15 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lru" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" +dependencies = [ + "hashbrown 0.16.1", +] + [[package]] name = "lz4_flex" version = "0.11.6" @@ -1116,6 +2034,16 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "md-5" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" +dependencies = [ + "cfg-if", + "digest 0.11.3", +] + [[package]] name = "memchr" version = "2.8.0" @@ -1142,6 +2070,21 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num-conv" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1163,6 +2106,30 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + +[[package]] +name = "p256" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b" +dependencies = [ + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2 0.10.9", +] + [[package]] name = "parking_lot" version = "0.12.5" @@ -1186,6 +2153,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1226,6 +2202,22 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.33" @@ -1241,6 +2233,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1255,10 +2253,10 @@ name = "prices-clickhouse" version = "0.1.0" dependencies = [ "clickhouse", - "hyper-rustls", + "hyper-rustls 0.27.9", "hyper-util", "reqwest", - "rustls", + "rustls 0.23.40", "rustls-pemfile", "rustls-pki-types", "serde", @@ -1270,6 +2268,57 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "prices-ingest-core" +version = "0.1.0" +dependencies = [ + "aquarius-extractor", + "clickhouse", + "extractors-core", + "ledger-processor", + "phoenix-extractor", + "prices-clickhouse", + "rust_decimal", + "serde", + "serde_json", + "sha2 0.10.9", + "soroswap-extractor", + "stellar-strkey", + "stellar-xdr", + "thiserror 2.0.18", + "tracing", + "xdr-parser", +] + +[[package]] +name = "prices-ledger-processor" +version = "0.1.0" +dependencies = [ + "aws-config", + "aws-sdk-s3", + "aws_lambda_events", + "clap", + "lambda_runtime", + "prices-clickhouse", + "prices-ingest-core", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "primeorder" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6" +dependencies = [ + "elliptic-curve", +] + [[package]] name = "proc-macro-crate" version = "3.5.0" @@ -1392,6 +2441,26 @@ dependencies = [ "bitflags", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "regex-automata" version = "0.4.14" @@ -1403,6 +2472,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + [[package]] name = "regex-syntax" version = "0.8.10" @@ -1433,10 +2508,10 @@ dependencies = [ "base64", "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.9.0", "hyper-util", "js-sys", "log", @@ -1456,6 +2531,16 @@ dependencies = [ "web-sys", ] +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac 0.12.1", + "subtle", +] + [[package]] name = "ring" version = "0.17.14" @@ -1516,6 +2601,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "1.1.4" @@ -1529,6 +2623,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.40" @@ -1538,11 +2644,23 @@ dependencies = [ "aws-lc-rs", "once_cell", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.13", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "2.2.0" @@ -1561,6 +2679,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.13" @@ -1585,28 +2713,67 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "schemars" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "sdex-backfill" version = "0.1.0" dependencies = [ - "aquarius-extractor", "clap", "clickhouse", - "extractors-core", - "ledger-processor", - "phoenix-extractor", "prices-clickhouse", + "prices-ingest-core", "rust_decimal", "serde", "serde_json", - "sha2", - "soroswap-extractor", + "sha2 0.10.9", "stellar-strkey", "stellar-xdr", "thiserror 2.0.18", @@ -1633,6 +2800,49 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" @@ -1710,6 +2920,60 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a5c54c7310e7b8b9577c286d7e399ddd876c3e12b3ed917a8aabc4b96e9e8c" +dependencies = [ + "base64", + "bs58", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.14.0", + "schemars 0.9.0", + "schemars 1.2.1", + "serde_core", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d57bc0c8b9a17920c178daa6bb924850d54a9c97ab45194bb8c17ad66bb660" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha1" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", +] + [[package]] name = "sha2" version = "0.10.9" @@ -1717,8 +2981,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", - "digest", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", ] [[package]] @@ -1746,6 +3021,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest 0.10.7", + "rand_core", +] + [[package]] name = "simdutf8" version = "0.1.5" @@ -1764,6 +3049,16 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.3" @@ -1781,6 +3076,22 @@ dependencies = [ "extractors-core", ] +[[package]] +name = "spin" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -1814,7 +3125,7 @@ dependencies = [ "escape-bytes", "ethnum", "hex", - "sha2", + "sha2 0.10.9", "stellar-strkey", ] @@ -1940,6 +3251,37 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.8.3" @@ -1977,7 +3319,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] @@ -1993,13 +3335,23 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.40", "tokio", ] @@ -2014,6 +3366,19 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml_datetime" version = "1.1.1+spec-1.1.0" @@ -2029,7 +3394,7 @@ version = "0.25.11+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" dependencies = [ - "indexmap", + "indexmap 2.14.0", "toml_datetime", "toml_parser", "winnow", @@ -2083,8 +3448,8 @@ dependencies = [ "bitflags", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", "tower 0.5.3", "tower-layer", @@ -2215,6 +3580,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -2249,6 +3620,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "want" version = "0.3.1" @@ -2551,12 +3928,18 @@ dependencies = [ "hex", "serde", "serde_json", - "sha2", + "sha2 0.10.9", "stellar-xdr", "tracing", "zstd", ] +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "yoke" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index a3e94d3..a124b6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,8 @@ members = [ "packages/soroswap-extractor", "packages/aquarius-extractor", "packages/ledger-processor", - "packages/enrichment-worker", + "packages/prices-ingest-core", + "packages/prices-ledger-processor", ] [workspace.dependencies] @@ -25,3 +26,11 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" rust_decimal = { version = "1", features = ["serde-with-str"] } thiserror = "2" + +# AWS / Lambda — used only by the live Prices Ledger Processor (task 0038), +# behind its `lambda` / `aws-mtls` cargo features so default builds stay lean. +lambda_runtime = "0.13" +aws-config = "1" +aws-sdk-s3 = "1" +aws_lambda_events = { version = "0.16", default-features = false, features = ["sqs"] } +tempfile = "3" diff --git a/infra/envs/production.json b/infra/envs/production.json index ced2b18..09ea7b8 100644 --- a/infra/envs/production.json +++ b/infra/envs/production.json @@ -9,5 +9,13 @@ "oracleWatcher": "rate(5 minutes)", "assetDiscovery": "rate(1 hour)", "cleanup": "cron(0 3 * * ? *)" + }, + "ledgerProcessor": { + "memoryMb": 512, + "timeoutSeconds": 60, + "reservedConcurrency": 1, + "sqsBatchSize": 1, + "maxReceiveCount": 10, + "maxIterations": 16 } } diff --git a/infra/src/index.ts b/infra/src/index.ts index faa858e..52a01b1 100644 --- a/infra/src/index.ts +++ b/infra/src/index.ts @@ -30,5 +30,15 @@ export type { BaselineLambdaContext } from './lib/lambda-baseline.js'; // mTLS helpers — used by downstream stacks that attach Lambdas // requiring the AWS Parameters and Secrets Lambda Extension layer -// and per-service mTLS Secret ARN grants. -export { secretsManagerLayerArn, mtlsSecretArn } from './lib/mtls.js'; +// and per-service mTLS Secret ARN grants. `mtlsClientCn` / +// `mtlsSecretName` are the single source of truth for the env-suffixed +// CN + bundle secret name (0063); 0038/0040 set `MTLS_SECRET_NAME` from +// the `ComputeStack` props derived from them. +export { + secretsManagerLayerArn, + mtlsSecretArn, + mtlsSecretArnFromParts, + mtlsClientCn, + mtlsSecretName, +} from './lib/mtls.js'; +export type { MtlsRole } from './lib/mtls.js'; diff --git a/infra/src/lib/app.ts b/infra/src/lib/app.ts index a6ef189..902b9cf 100644 --- a/infra/src/lib/app.ts +++ b/infra/src/lib/app.ts @@ -23,15 +23,13 @@ export function createApp({ config }: CreateAppOptions): void { const prefix = `Prices-${config.envName}`; - const secrets = new SecretsStack(app, `${prefix}-Secrets`, { env, config }); - - const compute = new ComputeStack(app, `${prefix}-Compute`, { - env, - config, - mtlsCertSecret: secrets.mtlsCertSecret, - mtlsKeySecret: secrets.mtlsKeySecret, - }); - compute.addDependency(secrets); + // SecretsStack only publishes the mTLS bundle secret NAMES to SSM — it does + // not create the secrets (operator-issued out-of-band; BE-mirroring). So + // ComputeStack derives its own secret names from the shared `mtlsSecretName` + // helper and needs no cross-stack reference / dependency on SecretsStack. + new SecretsStack(app, `${prefix}-Secrets`, { env, config }); + + new ComputeStack(app, `${prefix}-Compute`, { env, config }); // ApiGatewayStack is independent of ComputeStack in the skeleton // (no Lambda integration yet — task 0040 wires the cross-stack diff --git a/infra/src/lib/lambda-baseline.ts b/infra/src/lib/lambda-baseline.ts index 7f61148..f6af3be 100644 --- a/infra/src/lib/lambda-baseline.ts +++ b/infra/src/lib/lambda-baseline.ts @@ -2,16 +2,23 @@ import * as cdk from 'aws-cdk-lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as lambda from 'aws-cdk-lib/aws-lambda'; import * as logs from 'aws-cdk-lib/aws-logs'; -import type * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import type { Construct } from 'constructs'; import type { EnvironmentConfig } from './types.js'; +import { mtlsSecretArnFromParts } from './mtls.js'; export interface BaselineLambdaContext { readonly config: EnvironmentConfig; readonly accountId: string; - readonly mtlsCertSecret: secretsmanager.ISecret; - readonly mtlsKeySecret: secretsmanager.ISecret; + /** + * Secrets Manager NAME of the single `{cert,key,ca}` bundle this Lambda + * reads (its `MTLS_SECRET_NAME`). Each Lambda is granted read on only its + * own bundle — least privilege, mirroring BE's per-service grant. The + * secret is created out-of-band by the operator (see `SecretsStack`); the + * grant is on the by-name wildcard ARN, so it does not require the secret + * to exist at synth time. Derive with `mtlsSecretName(envName, role)`. + */ + readonly mtlsSecretName: string; } /** @@ -20,7 +27,8 @@ export interface BaselineLambdaContext { * * 1. CloudWatch Logs write — via AWSLambdaBasicExecutionRole managed * policy (attached separately at role construction time). - * 2. Read the two mTLS material secrets from Secrets Manager. + * 2. Read its own mTLS bundle secret from Secrets Manager (one secret, + * by-name wildcard ARN — BE-mirroring; the operator creates the value). * 3. Read both SSM namespaces — /platform/{env}/* (BE-published) and * /prices/{env}/* (prices-api-published). Read-only here; the * deploy role (CicdStack) is the only principal that writes. @@ -41,7 +49,9 @@ export function baselineLambdaPolicyStatements( new iam.PolicyStatement({ sid: 'ReadMtlsMaterial', actions: ['secretsmanager:GetSecretValue'], - resources: [ctx.mtlsCertSecret.secretArn, ctx.mtlsKeySecret.secretArn], + resources: [ + mtlsSecretArnFromParts(region, accountId, ctx.mtlsSecretName), + ], }), new iam.PolicyStatement({ sid: 'ReadSsmNamespaces', diff --git a/infra/src/lib/mtls.ts b/infra/src/lib/mtls.ts index 70c2ae6..0711108 100644 --- a/infra/src/lib/mtls.ts +++ b/infra/src/lib/mtls.ts @@ -45,11 +45,51 @@ export function secretsManagerLayerArn(region: string): string { return arn; } +/** + * The two mTLS client identities prices-api presents to BE's Hetzner + * ClickHouse, mirroring BE's per-service Lambda model: + * + * - `ingestion` → CH user `prices_writer` (ledger processor + periodic + * workers; `SELECT, INSERT, OPTIMIZE ON prices.*`). + * - `api` → CH user `prices_reader` (axum read handlers; + * `SELECT ON prices.*`). + */ +export type MtlsRole = 'ingestion' | 'api'; + +/** + * Canonical mTLS client-cert CN for a role, env-suffixed to mirror BE + * (`lambda-ingestion-production`). prices-api shares BE's CA, so these CNs + * live in BE's CA namespace and must stay globally unique there — hence the + * `-${envName}` suffix. The CN is the single thread tying the cert subject, + * the Caddy `CLICKHOUSE_CN_USER_MAP` key, the CH user, and the secret name + * together; keep all four derived from this one string. + * + * mtlsClientCn('production', 'ingestion') === 'prices-ingestion-production' + */ +export function mtlsClientCn(envName: string, role: MtlsRole): string { + return `prices-${role}-${envName}`; +} + +/** + * Secrets Manager secret name holding the single `{cert,key,ca}` JSON bundle + * for a role — the value `MTLS_SECRET_NAME` resolves to at Lambda runtime + * (see `packages/prices-clickhouse/src/mtls.rs`). The secret is created + * out-of-band by the operator (BE-mirroring: CDK does NOT manage the material; + * see `SecretsStack`), so this name must match the `--secret-id` the issuance + * runbook uploads to (0063 `notes/G-provisioning-plan.md` §5). + * + * mtlsSecretName('production', 'ingestion') + * === 'prices/production/clickhouse-mtls-prices-ingestion-production' + */ +export function mtlsSecretName(envName: string, role: MtlsRole): string { + return `prices/${envName}/clickhouse-mtls-${mtlsClientCn(envName, role)}`; +} + /** * Build the wildcard-suffixed Secrets Manager ARN for a secret name. * * AWS Secrets Manager appends a random 6-char suffix to every secret - * ARN (e.g. `…secret:prices/production/mtls/ledger-processor-production-aBcDeF`). + * ARN (e.g. `…secret:prices/production/clickhouse-mtls-prices-api-production-aBcDeF`). * IAM grants must use a wildcard to match. Returns the ARN form * `arn:aws:secretsmanager:::secret:-*`. * @@ -57,6 +97,19 @@ export function secretsManagerLayerArn(region: string): string { * works in both per-account synth and assumed-role deploys. */ export function mtlsSecretArn(scope: cdk.Stack, secretName: string): string { + return mtlsSecretArnFromParts(scope.region, scope.account, secretName); +} + +/** + * Scope-free variant of {@link mtlsSecretArn} for callers that already hold + * the region + account (e.g. `lambda-baseline` builds the grant from the + * resolved `EnvironmentConfig.awsRegion` + account id, without a `Stack`). + */ +export function mtlsSecretArnFromParts( + region: string, + account: string, + secretName: string, +): string { // Reject IAM-meaningful wildcards in the secret name. The function builds // an ARN like `…:secret:${secretName}-*` for IAM grants; an unexpected `*` // or `?` inside `secretName` would silently widen the grant beyond the @@ -67,7 +120,5 @@ export function mtlsSecretArn(scope: cdk.Stack, secretName: string): string { `those characters widen the IAM grant beyond a single secret.`, ); } - const region = scope.region; - const account = scope.account; return `arn:aws:secretsmanager:${region}:${account}:secret:${secretName}-*`; } diff --git a/infra/src/lib/stacks/compute-stack.ts b/infra/src/lib/stacks/compute-stack.ts index 189e888..11a01e1 100644 --- a/infra/src/lib/stacks/compute-stack.ts +++ b/infra/src/lib/stacks/compute-stack.ts @@ -1,7 +1,13 @@ import * as cdk from 'aws-cdk-lib'; -import type * as iam from 'aws-cdk-lib/aws-iam'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import * as lambdaEventSources from 'aws-cdk-lib/aws-lambda-event-sources'; import * as logs from 'aws-cdk-lib/aws-logs'; -import type * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import * as sns from 'aws-cdk-lib/aws-sns'; +import * as snsSubscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; +import * as sqs from 'aws-cdk-lib/aws-sqs'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; import type { Construct } from 'constructs'; import type { EnvironmentConfig } from '../types.js'; @@ -10,83 +16,420 @@ import { PRICES_LAMBDA_LOG_RETENTION, createPricesLambdaRole, lambdaLogGroupName, + pricesLambdaDefaults, } from '../lambda-baseline.js'; +import { mtlsSecretName, secretsManagerLayerArn } from '../mtls.js'; + +const DLQ_RETENTION_DAYS = 14; + +/** + * Cargo-lambda build output for the `prices-ledger-processor` binary. + * + * BE defines the equivalent Lambda with `cargo-lambda-cdk`'s + * `RustFunction`, which shells out to `cargo lambda build` at synth. + * The prices-api infra does not (yet) carry that dependency, so this + * stack consumes the pre-built `provided.al2023` bootstrap via + * `Code.fromAsset`. Build it first: + * + * cargo lambda build -p prices-ledger-processor --release --arm64 + * + * which writes `target/lambda/prices-ledger-processor/bootstrap`. + * Production follow-up: add `cargo-lambda-cdk` and swap this for + * `RustFunction` so synth builds the binary (mirrors BE exactly). + * Override with `LEDGER_PROCESSOR_ASSET_DIR` if needed. + */ +const LEDGER_PROCESSOR_ASSET_DIR = + process.env['LEDGER_PROCESSOR_ASSET_DIR'] ?? + '../target/lambda/prices-ledger-processor'; + +/** + * SSM keys the BE team publishes under the platform namespace and the + * prices-api CDK reads **at deploy time** (NOT at Lambda runtime — the + * Lambda only ever sees env vars; SSM is the deploy handshake). See + * task 0038 spec §C.2. + * + * `ledgerEventsTopicArn` is the artefact of the 2026-06-10 SNS + * decision (§C.1): BE moves the bucket notification to an SNS topic + * and publishes its ARN here for prices-api to subscribe to. Topic + * ownership + the cross-team handshake is tracked by task 0050. + */ +function platformSsmKeys(envName: string) { + const base = `/platform/${envName}`; + return { + ledgerBucketName: `${base}/stellar-ledger-data-bucket-name`, + ledgerBucketArn: `${base}/stellar-ledger-data-bucket-arn`, + chDomain: `${base}/ch-domain`, + networkPassphrase: `${base}/stellar-network-passphrase`, + ledgerEventsTopicArn: `${base}/ledger-events-topic-arn`, + }; +} export interface ComputeStackProps extends cdk.StackProps { readonly config: EnvironmentConfig; - readonly mtlsCertSecret: secretsmanager.ISecret; - readonly mtlsKeySecret: secretsmanager.ISecret; } /** - * Compute layer for prices-api: per-Lambda IAM roles + LogGroups - * for the two anchor Lambdas, with no actual Function definitions - * yet. Downstream tasks attach `RustFunction` constructs to these - * roles + log groups: + * Compute layer for prices-api. + * + * Owns the per-Lambda IAM roles + LogGroups for the two anchor + * Lambdas, plus the full wiring of the live **Prices Ledger Processor** + * Lambda (task 0038): its SQS doorbell queue + DLQ, the SNS fan-out + * subscription, the Function itself, and the event-source-mapping. + * + * - `ledgerProcessorRole` / `ledgerProcessorLogGroup` + + * `ledgerProcessorFunction` — task 0038 (live S3-event-driven + * ingest). Role + queue + function are deliberately co-located in + * one stack: the event-source-mapping and the queue/bucket grants + * mutate the role's policy, so splitting them across stacks creates + * a CloudFormation dependency cycle. (BE keeps the same shape in a + * single `compute-stack.ts`.) + * - `apiHandlerRole` / `apiHandlerLogGroup` — consumed by task 0040 + * (axum REST handlers behind API Gateway); no Function yet. + * + * The four periodic-worker roles (task 0039) and the backfill-status + * role (task 0055) are NOT pre-created here — those Lambdas are + * coupled to the EventBridge Scheduler rules (0039) and API Gateway + * routes (0055) defined alongside them; each calls + * `createPricesLambdaRole` from `lib/lambda-baseline.ts`. + * + * Ingest topology (2026-06-10 cross-team decision, spec §C.1 — SNS + * fan-out): * - * - `ledgerProcessorRole` / `ledgerProcessorLogGroup` — consumed by - * task 0038 (live S3-event-driven ingest Lambda). - * - `apiHandlerRole` / `apiHandlerLogGroup` — consumed by task - * 0040 (axum REST handlers behind API Gateway). + * ledger PutObject → S3 ObjectCreated (BE's stellar-ledger-data) + * → SNS topic (BE-owned) + * ├─ SQS ledger-ingest-{env} (BE indexer) + * └─ SQS prices-ingest-{env} (this stack) + DLQ + * → ledger-processor Lambda * - * The four periodic-worker roles (task 0039: price updater, oracle - * watcher, asset discovery, cleanup) and the backfill-status role - * (task 0055) are NOT pre-created here — those Lambdas are - * closely coupled to the EventBridge Scheduler rules (0039) and - * API Gateway routes (0055) defined alongside them. Each of those - * tasks calls `createPricesLambdaRole` from `lib/lambda-baseline.ts` - * to construct a baseline role and then extends it with - * stack-specific permissions. + * prices-api owns its **own** queue + DLQ subscribed to BE's topic, so + * a prices-side backlog never pressures BE's indexer queue (failure + * isolation). BE and prices share one AWS account, so the SNS→SQS + * subscription is same-account — no cross-account topic policy is + * required, only the queue resource policy `SqsSubscription` adds. * - * No VPC. Per ADR 0007 §3.6, Lambdas reach the Hetzner Caddy - * address over the public internet; gating is mTLS at Caddy. + * Mirrors BE's `compute-stack.ts`: `reservedConcurrentExecutions = 1` + * and `batchSize = 1` are load-bearing for ordering (the doorbell- + * cursor reconcile loop races the cursor under concurrency), not perf + * knobs; `maxReceiveCount = 10` absorbs the ESM over-poll/throttle + * churn that concurrency=1 induces; `visibilityTimeout = timeout + 60s` + * so SQS never redelivers a doorbell still being processed. + * + * No VPC (ADR 0007 §3.6); identity to the Hetzner Caddy/CH endpoint is + * mTLS, the bundle sourced via the Parameters and Secrets extension + * layer. Deploy is gated on BE 0227 + task 0047 + BE publishing the + * platform SSM keys; this ingest wiring is authored prepare-only. */ export class ComputeStack extends cdk.Stack { public readonly ledgerProcessorRole: iam.Role; public readonly ledgerProcessorLogGroup: logs.LogGroup; + public readonly ledgerProcessorFunction: lambda.Function; + public readonly ingestQueue: sqs.Queue; + public readonly ingestDlq: sqs.Queue; public readonly apiHandlerRole: iam.Role; public readonly apiHandlerLogGroup: logs.LogGroup; + /** + * `MTLS_SECRET_NAME` the ledger-processor (writer) Lambda reads — the + * single `{cert,key,ca}` bundle for CH user `prices_writer`. Set on the + * Function env below; the role is granted read on exactly this secret. + */ + public readonly ledgerProcessorMtlsSecretName: string; + /** + * `MTLS_SECRET_NAME` the api-handler (reader) Lambda must read — the + * single bundle for CH user `prices_reader`. Set on the Function env in + * task 0040; the role is already granted read on exactly this secret. + */ + public readonly apiHandlerMtlsSecretName: string; constructor(scope: Construct, id: string, props: ComputeStackProps) { super(scope, id, props); - const { config, mtlsCertSecret, mtlsKeySecret } = props; + const { config } = props; + const { envName, awsRegion } = config; + const lp = config.ledgerProcessor; const accountId = cdk.Stack.of(this).account; - const ctx = { config, accountId, mtlsCertSecret, mtlsKeySecret }; + const keys = platformSsmKeys(envName); + + // Two mTLS identities, mirroring BE's per-service split: the ledger + // processor writes as `prices_writer` (ingestion bundle); the api handler + // reads as `prices_reader` (api bundle). Each role is granted read on ONLY + // its own secret (least privilege). The secrets are created out-of-band by + // the operator (see SecretsStack); CDK only names + grants + sets the env. + this.ledgerProcessorMtlsSecretName = mtlsSecretName(envName, 'ingestion'); + this.apiHandlerMtlsSecretName = mtlsSecretName(envName, 'api'); + // --------------------------------------------------------------- + // Ledger Processor: baseline role + log group + // --------------------------------------------------------------- this.ledgerProcessorRole = createPricesLambdaRole( this, 'LedgerProcessorRole', - ctx, + { + config, + accountId, + mtlsSecretName: this.ledgerProcessorMtlsSecretName, + }, ); this.ledgerProcessorLogGroup = new logs.LogGroup( this, 'LedgerProcessorLogGroup', { - logGroupName: lambdaLogGroupName(config.envName, 'ledger-processor'), + logGroupName: lambdaLogGroupName(envName, 'ledger-processor'), retention: PRICES_LAMBDA_LOG_RETENTION, removalPolicy: PRICES_LAMBDA_LOG_REMOVAL_POLICY, }, ); - this.apiHandlerRole = createPricesLambdaRole(this, 'ApiHandlerRole', ctx); + // --------------------------------------------------------------- + // Deploy-time SSM reads (BE-published platform identifiers). + // valueForStringParameter resolves via a CFN parameter at deploy; + // the Lambda only ever sees the resulting env-var values. + // --------------------------------------------------------------- + const ledgerBucketName = ssm.StringParameter.valueForStringParameter( + this, + keys.ledgerBucketName, + ); + const ledgerBucketArn = ssm.StringParameter.valueForStringParameter( + this, + keys.ledgerBucketArn, + ); + const chDomain = ssm.StringParameter.valueForStringParameter( + this, + keys.chDomain, + ); + const networkPassphrase = ssm.StringParameter.valueForStringParameter( + this, + keys.networkPassphrase, + ); + const ledgerEventsTopicArn = ssm.StringParameter.valueForStringParameter( + this, + keys.ledgerEventsTopicArn, + ); + // Bootstrap ledger for the doorbell cursor (`INITIAL_CURSOR`). The + // reconcile loop's `/tmp` cursor file is empty on a fresh container, so + // without a seed `cursor.read()` errors and every doorbell DLQs — the + // Lambda can never start. Sourced from the prices-owned SSM namespace + // (the operator seeds "live ingestion starts here" at deploy prep, like + // the mTLS secrets) rather than committed config, so it is never a + // stale magic number. One-time bootstrap; superseded by the durable + // CH-backed cursor (task 0064). + const initialCursor = ssm.StringParameter.valueForStringParameter( + this, + `/prices/${envName}/ledger-processor/initial-cursor`, + ); + + // --------------------------------------------------------------- + // SQS DLQ + prices ingest queue (prices-owned doorbell source) + // --------------------------------------------------------------- + this.ingestDlq = new sqs.Queue(this, 'PricesIngestDlq', { + queueName: `prices-ingest-dlq-${envName}`, + retentionPeriod: cdk.Duration.days(DLQ_RETENTION_DAYS), + }); + + this.ingestQueue = new sqs.Queue(this, 'PricesIngestQueue', { + queueName: `prices-ingest-${envName}`, + // MUST be >= the Lambda timeout, else SQS redelivers a doorbell + // the Lambda is still legitimately draining. timeout + 60s margin. + visibilityTimeout: cdk.Duration.seconds(lp.timeoutSeconds + 60), + retentionPeriod: cdk.Duration.days(DLQ_RETENTION_DAYS), + deadLetterQueue: { + queue: this.ingestDlq, + maxReceiveCount: lp.maxReceiveCount, + }, + }); + + // --------------------------------------------------------------- + // SNS fan-out subscription — BE-owned topic → our queue + // --------------------------------------------------------------- + // Import the BE topic by ARN (published under the platform SSM key + // per the SNS decision). Adding the SqsSubscription creates the + // AWS::SNS::Subscription in THIS stack and attaches the queue + // resource policy that lets the topic deliver — prices owns the + // subscription side, BE owns the topic. rawMessageDelivery keeps + // the body the bare S3 event (our Lambda ignores it regardless). + const ledgerEventsTopic = sns.Topic.fromTopicArn( + this, + 'BeLedgerEventsTopic', + ledgerEventsTopicArn, + ); + ledgerEventsTopic.addSubscription( + new snsSubscriptions.SqsSubscription(this.ingestQueue, { + rawMessageDelivery: true, + }), + ); + + // --------------------------------------------------------------- + // Secrets Manager extension layer (mTLS bundle fetch at cold start) + // --------------------------------------------------------------- + const secretsExtensionLayer = lambda.LayerVersion.fromLayerVersionArn( + this, + 'SecretsExtensionLayer', + secretsManagerLayerArn(awsRegion), + ); + + // --------------------------------------------------------------- + // Ledger Processor Lambda + SQS event-source-mapping + // --------------------------------------------------------------- + this.ledgerProcessorFunction = new lambda.Function( + this, + 'LedgerProcessorFunction', + { + ...pricesLambdaDefaults, // ARM64 + PROVIDED_AL2023 (ADR 0006/0007) + functionName: `prices-${envName}-ledger-processor`, + // cargo-lambda emits a single self-contained `bootstrap` binary; + // PROVIDED_AL2023 custom runtimes always use the `bootstrap` + // handler name. + handler: 'bootstrap', + code: lambda.Code.fromAsset(LEDGER_PROCESSOR_ASSET_DIR), + role: this.ledgerProcessorRole, + logGroup: this.ledgerProcessorLogGroup, + memorySize: lp.memoryMb, + timeout: cdk.Duration.seconds(lp.timeoutSeconds), + // Load-bearing: serial execution is the ordering guarantee. The + // reconcile loop reads a cursor and advances it; two concurrent + // invocations would race it. validateConfig pins this to 1. + reservedConcurrentExecutions: lp.reservedConcurrency, + tracing: lambda.Tracing.ACTIVE, + layers: [secretsExtensionLayer], + environment: { + ENV_NAME: envName, + RUST_LOG: 'info', + // Source bucket for ledger XDR objects. The Lambda derives S3 + // keys from ledger numbers (Galexie scheme) and HEAD/GETs this + // bucket; it does NOT parse the SQS doorbell body. + BUCKET_NAME: ledgerBucketName, + // mTLS endpoint (Caddy host on the Hetzner box). + CH_DOMAIN: chDomain, + // Required by xdr-parser's network-id cache (SAC derivation). + STELLAR_NETWORK_PASSPHRASE: networkPassphrase, + // Single {cert,key,ca} bundle secret (task 0052/0063). Task 0052's + // clickhouse client crate reads exactly this one env var + // (`MTLS_SECRET_NAME`) and parses the JSON bundle via the extension + // — see packages/prices-clickhouse/src/mtls.rs:233. Name is derived + // from the shared mtlsSecretName helper, so it can't drift from the + // SecretsStack publication or the operator's create-secret. + MTLS_SECRET_NAME: this.ledgerProcessorMtlsSecretName, + // Bootstrap cursor seed. main.rs writes the `/tmp` cursor file + // from this on a fresh container; without it `cursor.read()` errors + // and every doorbell DLQs (the Lambda never starts). + INITIAL_CURSOR: initialCursor, + // Explicit cursor checkpoint path. `/tmp` is the only writable + // Lambda filesystem; matches the Rust default but pinned here so + // the runtime contract is visible. (Per-container ephemeral — + // durable cursor is task 0064.) + CURSOR_FILE: '/tmp/prices-cursor.txt', + // Max contiguous ledgers per reconcile run (bounds fetch+decode + // against the Lambda timeout). + MAX_ITERATIONS: String(lp.maxIterations), + // In-memory caching in the secrets extension — repeat reads in + // one execution environment hit RAM, not Secrets Manager. + PARAMETERS_SECRETS_EXTENSION_CACHE_ENABLED: 'true', + }, + }, + ); + + // batchSize 1 mirrors BE (the doorbell body is ignored, so larger + // batches buy nothing under concurrency=1). reportBatchItemFailures + // lets the handler fail just the offending doorbell; SQS redelivers + // it up to maxReceiveCount, then it lands in the DLQ. + // + // maxConcurrency caps the event-source's poller scaling. By default the + // ESM scales to 5 concurrent batches; with reservedConcurrency=1 the + // other 4 are throttle-rejected and their messages re-enqueue, each + // incrementing receiveCount — under a burst a processable doorbell can + // hit maxReceiveCount and false-DLQ before it is ever handled. 2 is the + // ESM minimum (it cannot equal the reserved 1), so this shrinks the + // over-poll window from 5→2 to complement maxReceiveCount. + this.ledgerProcessorFunction.addEventSource( + new lambdaEventSources.SqsEventSource(this.ingestQueue, { + batchSize: lp.sqsBatchSize, + reportBatchItemFailures: true, + maxConcurrency: 2, + }), + ); + + // --------------------------------------------------------------- + // IAM — S3 read on BE's bucket (same-account → plain IAM grant, no + // bucket policy from BE) + CloudWatch lag metric + X-Ray. + // --------------------------------------------------------------- + const ledgerBucket = s3.Bucket.fromBucketAttributes(this, 'LedgerBucket', { + bucketArn: ledgerBucketArn, + bucketName: ledgerBucketName, + }); + ledgerBucket.grantRead(this.ledgerProcessorRole); + + // grantRead on a bucket imported by attributes (no `encryptionKey`) + // cannot infer an SSE-KMS key, so it adds no kms:Decrypt. If BE's + // bucket is KMS-encrypted, every GetObject would 403 (AccessDenied) — + // which S3Fetcher maps to a hard error that DLQ's the doorbell, not a + // gap. Grant decrypt explicitly when the key ARN is configured. + if (lp.bucketKmsKeyArn) { + this.ledgerProcessorRole.addToPrincipalPolicy( + new iam.PolicyStatement({ + sid: 'DecryptLedgerObjects', + actions: ['kms:Decrypt'], + resources: [lp.bucketKmsKeyArn], + }), + ); + } + + this.ledgerProcessorRole.addToPrincipalPolicy( + new iam.PolicyStatement({ + sid: 'PublishLagMetric', + actions: ['cloudwatch:PutMetricData'], + resources: ['*'], + conditions: { + StringEquals: { 'cloudwatch:namespace': 'PricesApi/LedgerProcessor' }, + }, + }), + ); + this.ledgerProcessorRole.addToPrincipalPolicy( + new iam.PolicyStatement({ + sid: 'XRayWrite', + actions: ['xray:PutTraceSegments', 'xray:PutTelemetryRecords'], + resources: ['*'], + }), + ); + + // --------------------------------------------------------------- + // API Handler: baseline role + log group (Function lands in 0040) + // --------------------------------------------------------------- + this.apiHandlerRole = createPricesLambdaRole(this, 'ApiHandlerRole', { + config, + accountId, + mtlsSecretName: this.apiHandlerMtlsSecretName, + }); this.apiHandlerLogGroup = new logs.LogGroup(this, 'ApiHandlerLogGroup', { - logGroupName: lambdaLogGroupName(config.envName, 'api-handler'), + logGroupName: lambdaLogGroupName(envName, 'api-handler'), retention: PRICES_LAMBDA_LOG_RETENTION, removalPolicy: PRICES_LAMBDA_LOG_REMOVAL_POLICY, }); + // --------------------------------------------------------------- + // Outputs + // --------------------------------------------------------------- new cdk.CfnOutput(this, 'LedgerProcessorRoleArn', { value: this.ledgerProcessorRole.roleArn, - description: `Ledger Processor Lambda execution role ARN (${config.envName})`, + description: `Ledger Processor Lambda execution role ARN (${envName})`, + }); + new cdk.CfnOutput(this, 'LedgerProcessorFunctionArn', { + value: this.ledgerProcessorFunction.functionArn, + description: `Prices Ledger Processor Lambda ARN (${envName})`, + }); + new cdk.CfnOutput(this, 'PricesIngestQueueUrl', { + value: this.ingestQueue.queueUrl, + description: `Prices ingest queue URL (${envName})`, + }); + new cdk.CfnOutput(this, 'PricesIngestDlqUrl', { + value: this.ingestDlq.queueUrl, + description: `Prices ingest DLQ URL (${envName})`, }); new cdk.CfnOutput(this, 'ApiHandlerRoleArn', { value: this.apiHandlerRole.roleArn, - description: `API Handler Lambda execution role ARN (${config.envName})`, + description: `API Handler Lambda execution role ARN (${envName})`, }); cdk.Tags.of(this).add('Project', 'stellar-prices-api'); cdk.Tags.of(this).add('ManagedBy', 'cdk'); - cdk.Tags.of(this).add('Environment', config.envName); + cdk.Tags.of(this).add('Environment', envName); } } diff --git a/infra/src/lib/stacks/secrets-stack.ts b/infra/src/lib/stacks/secrets-stack.ts index e37c812..a7cc9e1 100644 --- a/infra/src/lib/stacks/secrets-stack.ts +++ b/infra/src/lib/stacks/secrets-stack.ts @@ -1,96 +1,85 @@ import * as cdk from 'aws-cdk-lib'; -import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import * as ssm from 'aws-cdk-lib/aws-ssm'; import type { Construct } from 'constructs'; import type { EnvironmentConfig } from '../types.js'; +import { mtlsSecretName } from '../mtls.js'; export interface SecretsStackProps extends cdk.StackProps { readonly config: EnvironmentConfig; } /** - * Secrets Manager slots for the mTLS material that prices-api uses - * to connect to BE's Hetzner ClickHouse over HTTPS-mTLS. + * Publishes the canonical Secrets Manager **names** for the prices-api mTLS + * bundles. It deliberately does NOT create the secrets. * - * Per ADR 0007 §3.5: two secrets per env (cert + key, separately). - * BE's per-AWS-service issuance script (task 0050) produces the real - * PEMs; an operator uploads them post-deploy via: + * ## Why no `new secretsmanager.Secret` (BE-mirroring) * - * aws secretsmanager put-secret-value \ - * --secret-id prices/{env}/clickhouse-mtls-cert \ - * --secret-string "$(cat .pem)" + * BE never CDK-manages the mTLS material for its Lambdas: `compute-stack.ts` + * builds the secret *name*, grants `secretsmanager:GetSecretValue` on the + * by-name ARN, sets `MTLS_SECRET_NAME`, and the operator creates the secret + * out-of-band (`infra-hetzner/ca/issue-client-cert.sh` → `aws secretsmanager + * create-secret`). We mirror that exactly: * - * aws secretsmanager put-secret-value \ - * --secret-id prices/{env}/clickhouse-mtls-key \ - * --secret-string "$(cat .pem)" + * - The secret holds the **single `{cert,key,ca}` JSON bundle** that + * `packages/prices-clickhouse/src/mtls.rs` parses at runtime — NOT the old + * two-secret cert/key split this stack used to create. The CA private key + * never enters CDK; cert/key bytes are operator-issued and uploaded. + * - Letting CloudFormation own the secret would (a) require a random + * placeholder that the runtime client cannot parse as a bundle, and (b) + * collide with the operator's `create-secret` (CFN refuses to create a name + * that already exists). Naming-only avoids both. * - * The CDK template intentionally does NOT contain the PEM values — - * `generateSecretString` creates a random placeholder on first - * deploy; subsequent `cdk deploy` invocations do not re-randomize as - * long as the generator parameters are unchanged. Re-running deploy - * after the operator upload leaves the real PEMs intact. + * Per the SSM key contract, only the prices-owned secret **names** are + * published to `/prices/{env}/*` (identifiers, never trust material) so the + * issuance runbook and any out-of-band tooling read one source of truth. The + * names themselves come from {@link mtlsSecretName} — the same helper + * `ComputeStack` uses for the IAM grant + `MTLS_SECRET_NAME`, so the two can + * never drift (the failure mode we found in BE's own README-vs-CDK). * - * The Secret ARNs are published to SSM under the prices-api-owned - * namespace (`/prices/{env}/mtls-{cert,key}-secret-arn`) so task - * 0052's `clickhouse-client` crate can read them at Lambda init. + * Two identities (0063 decision, env-suffixed CNs): + * - `prices/{env}/clickhouse-mtls-prices-ingestion-{env}` → `prices_writer` + * - `prices/{env}/clickhouse-mtls-prices-api-{env}` → `prices_reader` */ export class SecretsStack extends cdk.Stack { - public readonly mtlsCertSecret: secretsmanager.ISecret; - public readonly mtlsKeySecret: secretsmanager.ISecret; + /** Secrets Manager name of the ingestion (writer) `{cert,key,ca}` bundle. */ + public readonly ingestionSecretName: string; + /** Secrets Manager name of the api (reader) `{cert,key,ca}` bundle. */ + public readonly apiSecretName: string; constructor(scope: Construct, id: string, props: SecretsStackProps) { super(scope, id, props); const { envName } = props.config; - this.mtlsCertSecret = new secretsmanager.Secret(this, 'MtlsCertSecret', { - secretName: `prices/${envName}/clickhouse-mtls-cert`, - description: - `mTLS client certificate (PEM) for prices-api → BE Hetzner ClickHouse, ${envName}. ` + - `Initial value is a CDK-generated random placeholder; operator replaces with the real ` + - `cert via 'aws secretsmanager put-secret-value' after BE task 0050 issuance.`, - generateSecretString: { - passwordLength: 64, - excludePunctuation: true, - }, - removalPolicy: cdk.RemovalPolicy.RETAIN, - }); - - this.mtlsKeySecret = new secretsmanager.Secret(this, 'MtlsKeySecret', { - secretName: `prices/${envName}/clickhouse-mtls-key`, - description: - `mTLS client private key (PEM) for prices-api → BE Hetzner ClickHouse, ${envName}. ` + - `Initial value is a CDK-generated random placeholder; operator replaces with the real ` + - `key via 'aws secretsmanager put-secret-value' after BE task 0050 issuance.`, - generateSecretString: { - passwordLength: 64, - excludePunctuation: true, - }, - removalPolicy: cdk.RemovalPolicy.RETAIN, - }); + this.ingestionSecretName = mtlsSecretName(envName, 'ingestion'); + this.apiSecretName = mtlsSecretName(envName, 'api'); - new ssm.StringParameter(this, 'MtlsCertSecretArnParam', { - parameterName: `/prices/${envName}/mtls-cert-secret-arn`, - stringValue: this.mtlsCertSecret.secretArn, + new ssm.StringParameter(this, 'MtlsIngestionSecretNameParam', { + parameterName: `/prices/${envName}/mtls-ingestion-secret-name`, + stringValue: this.ingestionSecretName, description: - 'Secrets Manager ARN holding the prices-api mTLS client cert PEM', + 'Secrets Manager NAME of the prices-api ingestion (writer) mTLS ' + + '{cert,key,ca} bundle. Operator creates the secret out-of-band; ' + + 'CDK only names + grants. Value = MTLS_SECRET_NAME for writer Lambdas.', }); - new ssm.StringParameter(this, 'MtlsKeySecretArnParam', { - parameterName: `/prices/${envName}/mtls-key-secret-arn`, - stringValue: this.mtlsKeySecret.secretArn, + new ssm.StringParameter(this, 'MtlsApiSecretNameParam', { + parameterName: `/prices/${envName}/mtls-api-secret-name`, + stringValue: this.apiSecretName, description: - 'Secrets Manager ARN holding the prices-api mTLS client key PEM', + 'Secrets Manager NAME of the prices-api api (reader) mTLS ' + + '{cert,key,ca} bundle. Operator creates the secret out-of-band; ' + + 'CDK only names + grants. Value = MTLS_SECRET_NAME for reader Lambdas.', }); - new cdk.CfnOutput(this, 'MtlsCertSecretArn', { - value: this.mtlsCertSecret.secretArn, - description: `mTLS cert Secrets Manager ARN for ${envName}`, + new cdk.CfnOutput(this, 'MtlsIngestionSecretName', { + value: this.ingestionSecretName, + description: `mTLS ingestion (writer) bundle secret name for ${envName}`, }); - new cdk.CfnOutput(this, 'MtlsKeySecretArn', { - value: this.mtlsKeySecret.secretArn, - description: `mTLS key Secrets Manager ARN for ${envName}`, + new cdk.CfnOutput(this, 'MtlsApiSecretName', { + value: this.apiSecretName, + description: `mTLS api (reader) bundle secret name for ${envName}`, }); cdk.Tags.of(this).add('Project', 'stellar-prices-api'); diff --git a/infra/src/lib/types.ts b/infra/src/lib/types.ts index a3bca48..a520c16 100644 --- a/infra/src/lib/types.ts +++ b/infra/src/lib/types.ts @@ -62,6 +62,52 @@ export interface EnvironmentConfig { /** Old-data partition drop (ALTER TABLE … DROP PARTITION). */ readonly cleanup: string; }; + + // Ledger Processor ingest (consumed by IngestStack — task 0038) + + /** + * Sizing + SQS-source tuning for the live Prices Ledger Processor + * Lambda. The Lambda is a content-free SQS "doorbell" consumer; per + * the 2026-06-10 cross-team decision (task 0038 §C.1) the doorbells + * arrive via SNS fan-out off BE's `stellar-ledger-data` bucket + * (`S3 → SNS → prices-ingest SQS + DLQ → Lambda`). + * + * Mirrors BE's indexer knobs (`compute-stack.ts`): `batchSize = 1` + * and `reservedConcurrency = 1` are **load-bearing for ordering** + * — two concurrent invocations would race the cursor — not perf + * preferences. `maxReceiveCount = 10` (vs the usual 3) absorbs the + * ESM over-poll/throttle churn that `concurrency = 1` induces so a + * processable doorbell is never false-DLQ'd. + */ + readonly ledgerProcessor: { + /** Lambda memory (MB). */ + readonly memoryMb: number; + /** Lambda timeout (seconds). The SQS visibility timeout is set to this + 60s. */ + readonly timeoutSeconds: number; + /** Reserved concurrency. MUST be 1 — serial execution is the ordering guarantee. */ + readonly reservedConcurrency: number; + /** SQS event-source batch size. 1 mirrors BE (doorbell, body ignored). */ + readonly sqsBatchSize: number; + /** SQS redrive threshold before a message lands in the DLQ. */ + readonly maxReceiveCount: number; + /** + * Max contiguous ledgers walked per reconcile run (`MAX_ITERATIONS`). + * Bounds one invocation's S3 fetch + decode budget against the Lambda + * timeout; the Rust default is 16. + */ + readonly maxIterations: number; + /** + * KMS key ARN protecting BE's `stellar-ledger-data` bucket, if it is + * SSE-KMS encrypted. When set, the ledger-processor role is granted + * `kms:Decrypt` on this key — `grantRead` on a bucket imported by + * attributes (no `encryptionKey`) does NOT add it, so without this a + * KMS-encrypted bucket returns `AccessDenied` on every `GetObject` + * (which the fetcher maps to a hard error that DLQ's the doorbell, not + * a gap). Leave unset for an SSE-S3 / unencrypted bucket. Confirm with + * BE (task 0038 §C.2). + */ + readonly bucketKmsKeyArn?: string; + }; } /** @@ -129,6 +175,54 @@ export function validateConfig(config: EnvironmentConfig): void { } } + const lp = config.ledgerProcessor; + if (!lp || typeof lp !== 'object') { + errors.push('ledgerProcessor missing or not an object'); + } else { + if (!Number.isInteger(lp.memoryMb) || lp.memoryMb < 128) { + errors.push( + `ledgerProcessor.memoryMb must be an integer >= 128, got: ${lp.memoryMb}`, + ); + } + if (!Number.isInteger(lp.timeoutSeconds) || lp.timeoutSeconds < 1) { + errors.push( + `ledgerProcessor.timeoutSeconds must be a positive integer, got: ${lp.timeoutSeconds}`, + ); + } + // Ordering correctness depends on serial execution — reject anything + // but 1. Two concurrent invocations would race the cursor (BE's + // load-bearing `reservedConcurrentExecutions = 1`, mirrored here). + if (lp.reservedConcurrency !== 1) { + errors.push( + `ledgerProcessor.reservedConcurrency must be exactly 1 (serial execution is the ordering guarantee), got: ${lp.reservedConcurrency}`, + ); + } + if (!Number.isInteger(lp.sqsBatchSize) || lp.sqsBatchSize < 1) { + errors.push( + `ledgerProcessor.sqsBatchSize must be a positive integer, got: ${lp.sqsBatchSize}`, + ); + } + if (!Number.isInteger(lp.maxReceiveCount) || lp.maxReceiveCount < 1) { + errors.push( + `ledgerProcessor.maxReceiveCount must be a positive integer, got: ${lp.maxReceiveCount}`, + ); + } + if (!Number.isInteger(lp.maxIterations) || lp.maxIterations < 1) { + errors.push( + `ledgerProcessor.maxIterations must be a positive integer, got: ${lp.maxIterations}`, + ); + } + if ( + lp.bucketKmsKeyArn !== undefined && + (typeof lp.bucketKmsKeyArn !== 'string' || + !lp.bucketKmsKeyArn.startsWith('arn:aws:kms:')) + ) { + errors.push( + `ledgerProcessor.bucketKmsKeyArn, when set, must be a KMS key ARN, got: ${lp.bucketKmsKeyArn}`, + ); + } + } + if (errors.length > 0) { throw new Error( `Invalid EnvironmentConfig for "${config.envName}":\n - ${errors.join( diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda.md deleted file mode 100644 index f545a1c..0000000 --- a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda.md +++ /dev/null @@ -1,288 +0,0 @@ ---- -id: "0038" -title: "Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv" -type: FEATURE -status: active -related_adr: ["0001", "0003", "0004", "0005", "0006", "0007"] -related_tasks: ["0011", "0037", "0045", "0047", "0048"] -tags: [layer-indexing, priority-high, effort-large, milestone-M1, stream-1, lambda, ingestion, rust, aws, clickhouse, hetzner] -milestone: 1 -links: - - "../../../docs/prices-api-general-overview.md" - - "../../2-adrs/0001_stream1-clickhouse-sourced-amm-backfill.md" - - "../../2-adrs/0003_price-ohlcv-pk-includes-quote-asset-id.md" - - "../../2-adrs/0004_price-ohlcv-multi-source-merge-columns.md" - - "../../2-adrs/0005_stream2-sdex-local-workstation-backfill.md" - - "../../2-adrs/0006_runtime-framework-rust-axum.md" - - "../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" - - "../archive/0045_RESEARCH_cross-team-bundle-with-be-on-hetzner-ch-tenancy/notes/G-be-agreement-record.md" - - "../backlog/0011_FEATURE_bootstrap-cdk-with-ssm-platform-lookups.md" - - "../backlog/0037_FEATURE_tranche1-ledger-processor-skeleton.md" - - "../backlog/0047_RESEARCH_cross-tenant-throughput-verification-on-shared-hetzner-ch.md" -history: - - date: 2026-05-18 - status: backlog - who: oski - note: > - Drafted to fill the gap between 0011 (CDK + RDS bootstrap) and - 0037 (Rust extractor skeleton). The general-overview §5.2 and - ADR 0001 §4 (point 4 of the Decision) both name a live - "Prices Ledger Processor Lambda" but no task represented it - end-to-end. This task wires the 0037 dispatch kernel into a - Lambda binary that consumes S3 PutObject events on BE's - `stellar-ledger-data/` bucket and writes 1-min OHLCV rows - into the prices RDS provisioned by 0011. - - date: 2026-05-18 - status: blocked - who: oski - by: ["0011", "0037"] - note: > - Moved to blocked/ — 0011 provides the RDS + Lambda CDK - stack scaffolding; 0037 provides the extractor kernel and - dispatch surface. Both are themselves backlog as of this - date. - - date: 2026-05-18 - status: blocked - who: okarcz - note: > - Redesign pending. Task 0044's research (synthesis §3) and - ADR 0007 (proposed) call for major rewrite of this task — - sqlx → `clickhouse` crate; PG UPSERT with ADR 0004 merge - formula → ReplacingMergeTree per-source rows; VPC Lambda - → no-VPC + mTLS. Hold rewrite until both gating events - clear: (1) BE Hetzner CH ships, (2) ADR 0007 accepted - (gated on task 0045). Do not start implementation against - this spec. - - date: 2026-05-19 - status: blocked - who: okarcz - note: > - Decoder + Lambda E2E spec landed in task 0048's G-note - (`lore/1-tasks/active/0048_…/notes/G-soroban-events-pricing-decoder.md`). - Spec is grounded in a 10k uniform sample of the local - backfill CH and aligned with ADR 0007. When the - gating events clear, the rewrite implements 0048 directly - (not the §Implementation Plan in this file, which - assumed the RDS/VPC shape). - - date: 2026-05-20 - status: blocked - who: okarcz - note: > - ADR 0007 accepted (PR closing task 0045 lands today; BE - agreement record at G-be-agreement-record.md is the - authoritative cross-team contract). The architectural - uncertainty is resolved; this task's blockers are now - strictly engineering: (a) BE 0227 ships the Hetzner mTLS - endpoint, (b) task 0047 verifies cross-tenant throughput - GREEN/YELLOW (a RED outcome supersedes ADR 0007 to the - sidecar-CH variant — same rewrite shape, different host). - Task stays blocked; rewrite begins once (a) and (b) clear. - - date: 2026-06-08 - status: active - who: oski - note: > - Activated with scope reduction. Original engineering blockers - ((a) BE 0227 mTLS endpoint, (b) task 0047 cross-tenant - throughput verification) remain unresolved, so this activation - is **local-only**: build a runnable local Lambda binary, - exercise it against recorded fixtures, and produce a written - design document for cross-team discussion with the BE team. - Out-of-scope for this activation: any AWS deploy, S3 - notification registration on BE's bucket, SSM platform-key - consumption, CDK stack apply, or live RDS/CH writes. The - goal is to give BE something concrete (binary + spec) to - react to before the gating events clear — see the - forthcoming G-note on local-prototype scope under - `notes/G-local-prototype-spec.md`. ---- - -# Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv - -## Summary - -Build the production Lambda function that drives **live go-forward -ingestion** for prices-api: an S3-PutObject-triggered Rust binary -that consumes one `LedgerCloseMeta` XDR file per invocation, -extracts SDEX trades and Soroban AMM swaps via the -`packages/ledger-processor::dispatch` kernel from task 0037, and -UPSERTs 1-minute OHLCV rows into the cloud RDS `price_ohlcv` table -using the PK shape mandated by ADR 0003. This is the on-tip half -of the data ingestion layer described in §1.2 / §5.2 of the -general-overview doc; the historical half lives in ADR 0001 -(Stream 1) and ADR 0005 (Stream 2). - -## Context - -Per the general-overview doc §2.1 (Components Hosted by Prices API) -and §5.2 (Prices Ledger Processor (Rust)), the live ingestion path -is a Rust Lambda registered as a **second S3 event notification -target** on Block Explorer's existing `stellar-ledger-data/` bucket -(the first target is BE's own Ledger Processor). Per ADR 0001 §4 -(Decision point 4 — "Live go-forward Soroban AMM ingestion does -NOT depend on CH"), this Lambda is the system of record for live -Soroban AMM swaps once Stream 1 has landed its one-shot historical -push; CH is bounded to the historical window. - -As of 2026-05-18: - -- 0011 (CDK bootstrap) provisions the RDS, the Lambda execution - role, and the per-env CDK stack scaffolding — but does not yet - create the Prices Ledger Processor function itself. -- 0037 (Tranche 1 Ledger Processor skeleton) lands the workspace - layout, the per-venue `SwapExtractor` trait, the Phoenix pool - registry, and a stub `dispatch()` function — but no Lambda - packaging, no S3 client, no RDS writer, no XDR decode. - -This task fills the gap: it takes the kernel from 0037, wraps it -in `lambda_runtime`, and wires the S3-event → XDR-decode → -extract → bucket → UPSERT loop end-to-end. - -## Implementation Plan - -### Step 1: Lambda binary crate - -Add `packages/prices-ledger-processor` (binary crate) inside the -workspace established by 0037. Depend on: - -- `lambda_runtime` — `provided.al2` custom runtime entrypoint - (per overview §8 Tech Stack and ADR 0006 §Decision). -- `aws_sdk_s3` — to GET the ledger object referenced by the event. -- `xdr-parser` — BE-authored crate consumed as a git Cargo dep - per ADR 0005 §3; decodes `LedgerCloseMeta` and the - `SorobanTransactionMeta.events` / `OperationResult` shapes. -- `packages/ledger-processor::dispatch` — the kernel from 0037. -- `sqlx` (Postgres, async, compile-time queries) — per ADR 0006. - -`main` should: deserialize an `S3Event`, fetch the object, -zstd-decompress, parse as `LedgerCloseMeta`, hand the parsed -ledger to `dispatch()`, bucket the returned trades into 1-min -OHLCV candles, and UPSERT into `price_ohlcv`. - -### Step 2: S3-event handler - -For each S3 record in the incoming batch: - -1. GET the object from the bucket/key in the event. -2. Decompress (`zstd`); the Galexie output is `*.xdr.zstd` - (§5.1). -3. Parse via `xdr-parser` into `LedgerCloseMeta`. -4. Pass to `dispatch()` — the kernel does Soroban AMM extraction - today (0037) and SDEX trade extraction once that extractor - is wired in (see ADR 0002 / task 0022's spec for SDEX - trade-shaped op types and `ClaimAtom` → `TradeTick`). - -### Step 3: 1-minute OHLCV bucketing + UPSERT - -Per overview §5.2 "Write semantics — UPSERT, not INSERT": - -- Group extracted trades by `(floor_minute(closed_at), asset_id, - '1m', source)`. -- For each bucket, emit one `INSERT ... ON CONFLICT (timestamp, - asset_id, granularity) DO UPDATE` with the **incremental-merge** - update expression (preserve `open`, overwrite `close`, - `GREATEST(high)`, `LEAST(low)`, sum `volume_base` / - `volume_quote_usd` / `trade_count`, recompute `vwap`). -- PK is the ADR 0003 shape (`timestamp, asset_id, granularity`) - — quote_asset_id participates per that ADR if/when the column - is added; this task follows whatever PK shape the 0011 schema - migration lands. -- `source` column is set to `'sdex' | 'soroswap' | 'aquarius' | - 'phoenix'` per overview §3.2 examples and ADR 0004's - multi-source merge columns. - -### Step 4: CDK Lambda stack wiring (depends on 0011) - -In `infra/aws-cdk/` (created by 0011): - -- New `LedgerProcessorStack` (or extension of an existing Lambda - stack) defining the `prices-ledger-processor` function: - runtime `provided.al2`, memory 512–1024 MB (size at impl - time), timeout 60s, VPC attachment to BE's VPC (per §11.1, - via the 0011 SSM lookups). -- IAM role: read on BE's `stellar-ledger-data/` bucket, write - on RDS via Secrets Manager DB credentials. -- **S3 notification registration**: add the Lambda as a second - notification target on BE's bucket. Per §5.1 this requires - coordination with the BE team (the bucket is BE-owned). - Document the SSM key BE must publish (e.g. - `/platform/{env}/stellar-ledger-data-bucket-arn`) and any - BE-side stack change required. -- CloudWatch alarms: invocation errors, duration p95, DLQ depth - (deferred to follow-up if DLQ design is non-trivial). - -### Step 5: Tests - -- Unit: feed a recorded `LedgerCloseMeta` fixture through - `dispatch()` + bucketing and assert the emitted UPSERT rows - (use sqlx test-tx pattern). -- Integration: spin up a local Postgres (Docker) with the 0011 - schema applied, run the binary against a recorded S3 event - + fixture, assert rows land with the expected PK and - incremental-merge semantics. - -### Step 6: Observability - -- Structured logs (JSON) per invocation: ledger sequence, - decode time, trade count by source, UPSERT count, total - duration. CloudWatch Logs + X-Ray per §2.1. -- Metric: `prices.ledger_processor.lag_seconds` = - `now() - ledger.closed_at` at invocation time; alarms if - >60s sustained (matches the §5.1 Galexie lag alarm shape). - -## Acceptance Criteria - -- [ ] `packages/prices-ledger-processor` binary builds against - `provided.al2` (cargo lambda or equivalent). -- [ ] Lambda is registered as a second S3 notification target on - BE's `stellar-ledger-data/` bucket via CDK; no conflict - with BE's own Ledger Processor registration. -- [ ] Given a recorded `LedgerCloseMeta` containing ≥1 Soroban - AMM swap and ≥1 SDEX trade, the binary writes the expected - 1-min `price_ohlcv` rows via UPSERT with the ADR 0003 PK - shape and ADR 0004 multi-source columns. -- [ ] Re-invoking with the same ledger event is idempotent: row - counts and column values unchanged (incremental-merge - preserves `open`, refreshes `close`, etc.). -- [ ] `prices.ledger_processor.lag_seconds` metric published to - CloudWatch; alarm wired up to fire on >60s sustained lag. -- [ ] Integration test covers: S3 event → fetched object → - decoded XDR → dispatched extract → UPSERTed row against - a local Postgres mirroring the 0011 schema. -- [ ] Docs: README in `packages/prices-ledger-processor` - describing the S3 event contract, the BE-coordination - step for bucket notifications, and the SSM keys consumed. - -## Blocked on - -- **0011** — RDS + CDK Lambda stack scaffolding + SSM platform - lookups (VPC, BE bucket ARN). Without 0011, this Lambda has - no target DB and no CDK stack to live in. -- **0037** — `packages/ledger-processor::dispatch` kernel and - the `SwapExtractor` trait surface. Without 0037, this task - has no extraction primitive to call. - -## Out of scope - -- SDEX trade extractor body — 0037's skeleton stubs it; the real - body is task 0022's spec landed under a separate FEATURE task - (not yet spawned). This Lambda just calls `dispatch()` and - uses whatever extractors exist at the time. -- Historical backfill — Stream 1 (ADR 0001) and Stream 2 - (ADR 0005) are separate paths; this task is **live only**. -- Asset registry maintenance — handled by 0039's Asset Discovery - worker. -- Current-price aggregation across sources — handled by 0039's - Current Price Updater worker. - -## Notes - -- This is the first Rust Lambda in the project per ADR 0006 - §Decision. Conventions for `cargo lambda` packaging, CI build - caching, and `provided.al2` ZIP layout established here will - be reused by 0039 and 0040. -- Coordinate the S3 notification registration with the BE team - early — adding a second notification target to a bucket BE - owns is a cross-team change, not a unilateral one. -- The 1-min UPSERT contract is shared with both backfill - streams; keep the merge SQL in a shared `packages/ohlcv-writer` - module (or similar) so live + backfill writers stay in sync. diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md new file mode 100644 index 0000000..bbc790c --- /dev/null +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/README.md @@ -0,0 +1,540 @@ +--- +id: "0038" +title: "Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv" +type: FEATURE +status: active +related_adr: ["0001", "0003", "0004", "0005", "0006", "0007"] +related_tasks: ["0011", "0037", "0045", "0047", "0048", "0050"] +tags: [layer-indexing, priority-high, effort-large, milestone-M1, stream-1, lambda, ingestion, rust, aws, clickhouse, hetzner] +milestone: 1 +links: + - "../../../../docs/prices-api-general-overview.md" + - "../../../2-adrs/0001_stream1-clickhouse-sourced-amm-backfill.md" + - "../../../2-adrs/0003_price-ohlcv-pk-includes-quote-asset-id.md" + - "../../../2-adrs/0004_price-ohlcv-multi-source-merge-columns.md" + - "../../../2-adrs/0005_stream2-sdex-local-workstation-backfill.md" + - "../../../2-adrs/0006_runtime-framework-rust-axum.md" + - "../../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" + - "../../archive/0045_RESEARCH_cross-team-bundle-with-be-on-hetzner-ch-tenancy/notes/G-be-agreement-record.md" + - "../../archive/0011_FEATURE_bootstrap-cdk-with-ssm-platform-lookups.md" + - "../../archive/0037_FEATURE_tranche1-ledger-processor-skeleton.md" + - "../../backlog/0047_RESEARCH_cross-tenant-throughput-verification-on-shared-hetzner-ch.md" +history: + - date: 2026-05-18 + status: backlog + who: oski + note: > + Drafted to fill the gap between 0011 (CDK + RDS bootstrap) and + 0037 (Rust extractor skeleton). The general-overview §5.2 and + ADR 0001 §4 (point 4 of the Decision) both name a live + "Prices Ledger Processor Lambda" but no task represented it + end-to-end. This task wires the 0037 dispatch kernel into a + Lambda binary that consumes S3 PutObject events on BE's + `stellar-ledger-data/` bucket and writes 1-min OHLCV rows + into the prices RDS provisioned by 0011. + - date: 2026-05-18 + status: blocked + who: oski + by: ["0011", "0037"] + note: > + Moved to blocked/ — 0011 provides the RDS + Lambda CDK + stack scaffolding; 0037 provides the extractor kernel and + dispatch surface. Both are themselves backlog as of this + date. + - date: 2026-05-18 + status: blocked + who: okarcz + note: > + Redesign pending. Task 0044's research (synthesis §3) and + ADR 0007 (proposed) call for major rewrite of this task — + sqlx → `clickhouse` crate; PG UPSERT with ADR 0004 merge + formula → ReplacingMergeTree per-source rows; VPC Lambda + → no-VPC + mTLS. Hold rewrite until both gating events + clear: (1) BE Hetzner CH ships, (2) ADR 0007 accepted + (gated on task 0045). Do not start implementation against + this spec. + - date: 2026-05-19 + status: blocked + who: okarcz + note: > + Decoder + Lambda E2E spec landed in task 0048's G-note + (`lore/1-tasks/active/0048_…/notes/G-soroban-events-pricing-decoder.md`). + Spec is grounded in a 10k uniform sample of the local + backfill CH and aligned with ADR 0007. When the + gating events clear, the rewrite implements 0048 directly + (not the §Implementation Plan in this file, which + assumed the RDS/VPC shape). + - date: 2026-05-20 + status: blocked + who: okarcz + note: > + ADR 0007 accepted (PR closing task 0045 lands today; BE + agreement record at G-be-agreement-record.md is the + authoritative cross-team contract). The architectural + uncertainty is resolved; this task's blockers are now + strictly engineering: (a) BE 0227 ships the Hetzner mTLS + endpoint, (b) task 0047 verifies cross-tenant throughput + GREEN/YELLOW (a RED outcome supersedes ADR 0007 to the + sidecar-CH variant — same rewrite shape, different host). + Task stays blocked; rewrite begins once (a) and (b) clear. + - date: 2026-06-08 + status: active + who: oski + note: > + Activated with scope reduction. Original engineering blockers + ((a) BE 0227 mTLS endpoint, (b) task 0047 cross-tenant + throughput verification) remain unresolved, so this activation + is **local-only**: build a runnable local Lambda binary, + exercise it against recorded fixtures, and produce a written + design document for cross-team discussion with the BE team. + Out-of-scope for this activation: any AWS deploy, S3 + notification registration on BE's bucket, SSM platform-key + consumption, CDK stack apply, or live RDS/CH writes. The + goal is to give BE something concrete (binary + spec) to + react to before the gating events clear — see the + forthcoming G-note on local-prototype scope under + `notes/G-local-prototype-spec.md`. + - date: 2026-06-08 + status: blocked + who: oski + note: > + Local-prototype scope shipped: spec G-note + runnable Phase 1 + scaffolding + Phase 2 real XDR decode against + BE-sourced fixtures (commits f17353f, 1137464, bd2ea9d, + 10b60a3, fb57196 on branch feat/0038_prices-ledger-processor-lambda; + PR #34). Task moves back to blocked pending the cross-team + meeting with the BE team — the Part C asks in + `notes/G-local-prototype-spec.md` are the agenda + (SQS notification ownership, env-var injection vs SSM-at-runtime, + xdr-parser tag-pinning + semver, `db-clickhouse::mtls` reuse, + Caddyfile `CLICKHOUSE_CN_USER_MAP` for `prices-api-{env}`, + mTLS cert issuance). Original engineering gates + (BE 0227 + task 0047) also remain open. Unblocks: after the + meeting answers Part C, and either gating engineering event + clears. + - date: 2026-06-10 + status: active + who: oski + note: > + Cross-team meeting held. **Part C.1 RESOLVED: SNS fan-out** + (not a second direct S3→SQS notification). BE will refactor + their bucket-side notification from `S3 → SQS` to + `S3 → SNS → SQS` (`SnsDestination` + `rawMessageDelivery: true` + so their indexer's S3-event parser is unchanged); prices-api + owns its **own** `prices-ingest-{env}` SQS queue + DLQ + subscribing to the BE SNS topic, plus its own Lambda. Failure + isolation preserved (a prices-side backlog/DLQ never pressures + BE's indexer queue). The doorbell-cursor design is unaffected + by the transport choice — the Lambda ignores the message body + regardless of SNS-vs-SQS — so no reconcile-loop change; only + doc/comment narrative and the (gated) CDK wiring change. + Decision recorded inline in `notes/G-local-prototype-spec.md` + §C.1. The SNS-topic ownership + cross-account subscription is + the cross-team artefact tracked by task 0050. Moved back to + active for continued local-scope work; the production AWS + wiring (Part E) stays gated on BE 0227 + task 0047. + - date: 2026-06-24 + status: active + who: oski + note: > + Refactored the Lambda onto the **shared, tested ingestion core** + and landed the two production data-plane seams 0052/0063 unblocked. + The prototype's hand-rolled decode/bucket/canonicalisation diverged + from the tested `sdex-backfill` (String asset ids + lexicographic + orientation + f64, vs the real `price_ohlcv_1m`'s UInt32 surrogate + ids + SAC→classic collapse + Decimal/version) — so writing it to the + **shared** `prices.price_ohlcv_1m` would split liquidity. Extracted + `packages/prices-ingest-core` (canonical/price/tick/bucket/filter/ + soroban + the transport-agnostic `OhlcvWriter`) out of sdex-backfill + and repointed both crates at it, so live + backfill now emit + byte-identical rows. Replaced the prototype `bucket.rs`/`decode.rs`/ + stdout+sql_file sinks (→ `.trash/`) with: a core-backed reconcile + loop, an `S3Fetcher` (`aws-sdk-s3`, `lambda` feature), and a + `ClickHouseSink` over `prices-clickhouse::mtls` (`aws-mtls` feature, + the task-0052 client). Default build stays lean (no rustls/lambda); + `--features lambda` compiles the full SQS-doorbell + S3 + mTLS path. + Tests: 13 core + 5 sdex (regression gate green) + 15 lambda-unit + 3 + real-fixture e2e (decode→bucket→cursor, gap-stop, idempotent). fmt + + clippy clean. **Prepare-only — no deploy, no prod writes** (Part E + deploy/cert/Caddy still gated on BE 0227 + task 0047). Stays active. + - date: 2026-06-24 + status: active + who: claude + note: > + Applied the safe-set fixes from the PR #34 review (commit 673f775): + wired INITIAL_CURSOR (prices SSM) / CURSOR_FILE / MAX_ITERATIONS into + the Lambda env, optional kms:Decrypt grant, SQS maxConcurrency=2, + BadResponse redaction moved to the core error source, concurrent + cold-start init. Added a Deploy prerequisites checklist (bootstrap + cursor SSM param + source-bucket KMS confirmation). Findings #1/#3/#5 + annotated on follow-ups 0064/0065. Stays active. + - date: 2026-06-25 + status: active + who: claude + note: > + Resolved the deploy-prep source-bucket KMS item (finding #4): BE's + production-stellar-ledger-data is SSE-S3 (AES256), not SSE-KMS — + confirmed in the BE repo ledger-bucket-stack.ts (S3_MANAGED) and BE + task 0278 (dropped KMS for SSE-S3 to cut per-object KMS request cost + on the public XDR). So bucketKmsKeyArn stays unset; grantRead's plain + S3 perms suffice, no kms:Decrypt / 403 risk. Checklist item ticked. + Stays active. +--- + +# Prices Ledger Processor Lambda — live S3-event-driven ingestion into price_ohlcv + +## Summary + +Build the production Lambda function that drives **live go-forward +ingestion** for prices-api: an S3-PutObject-triggered Rust binary +that consumes one `LedgerCloseMeta` XDR file per invocation, +extracts SDEX trades and Soroban AMM swaps via the +`packages/ledger-processor::dispatch` kernel from task 0037, and +UPSERTs 1-minute OHLCV rows into the cloud RDS `price_ohlcv` table +using the PK shape mandated by ADR 0003. This is the on-tip half +of the data ingestion layer described in §1.2 / §5.2 of the +general-overview doc; the historical half lives in ADR 0001 +(Stream 1) and ADR 0005 (Stream 2). + +## Context + +Per the general-overview doc §2.1 (Components Hosted by Prices API) +and §5.2 (Prices Ledger Processor (Rust)), the live ingestion path +is a Rust Lambda driven by a **content-free SQS doorbell**. Per the +2026-06-10 cross-team decision (history below; spec §C.1), BE's +`stellar-ledger-data/` bucket fans out object-created events via +**SNS** (`S3 → SNS → SQS`); prices-api owns its **own** +`prices-ingest-{env}` SQS queue + DLQ subscribed to that topic, so a +prices-side backlog can never pressure BE's indexer queue. (BE's own +queue subscribes to the same topic with `rawMessageDelivery: true`, +leaving their indexer's S3-event parser unchanged.) Per ADR 0001 §4 +(Decision point 4 — "Live go-forward Soroban AMM ingestion does +NOT depend on CH"), this Lambda is the system of record for live +Soroban AMM swaps once Stream 1 has landed its one-shot historical +push; CH is bounded to the historical window. + +As of 2026-05-18: + +- 0011 (CDK bootstrap) provisions the RDS, the Lambda execution + role, and the per-env CDK stack scaffolding — but does not yet + create the Prices Ledger Processor function itself. +- 0037 (Tranche 1 Ledger Processor skeleton) lands the workspace + layout, the per-venue `SwapExtractor` trait, the Phoenix pool + registry, and a stub `dispatch()` function — but no Lambda + packaging, no S3 client, no RDS writer, no XDR decode. + +This task fills the gap: it takes the kernel from 0037, wraps it +in `lambda_runtime`, and wires the S3-event → XDR-decode → +extract → bucket → UPSERT loop end-to-end. + +## Implementation Plan + +### Step 1: Lambda binary crate + +Add `packages/prices-ledger-processor` (binary crate) inside the +workspace established by 0037. Depend on: + +- `lambda_runtime` — `provided.al2` custom runtime entrypoint + (per overview §8 Tech Stack and ADR 0006 §Decision). +- `aws_sdk_s3` — to GET the ledger object referenced by the event. +- `xdr-parser` — BE-authored crate consumed as a git Cargo dep + per ADR 0005 §3; decodes `LedgerCloseMeta` and the + `SorobanTransactionMeta.events` / `OperationResult` shapes. +- `packages/ledger-processor::dispatch` — the kernel from 0037. +- `sqlx` (Postgres, async, compile-time queries) — per ADR 0006. + +`main` should: deserialize an `S3Event`, fetch the object, +zstd-decompress, parse as `LedgerCloseMeta`, hand the parsed +ledger to `dispatch()`, bucket the returned trades into 1-min +OHLCV candles, and UPSERT into `price_ohlcv`. + +### Step 2: S3-event handler + +For each S3 record in the incoming batch: + +1. GET the object from the bucket/key in the event. +2. Decompress (`zstd`); the Galexie output is `*.xdr.zstd` + (§5.1). +3. Parse via `xdr-parser` into `LedgerCloseMeta`. +4. Pass to `dispatch()` — the kernel does Soroban AMM extraction + today (0037) and SDEX trade extraction once that extractor + is wired in (see ADR 0002 / task 0022's spec for SDEX + trade-shaped op types and `ClaimAtom` → `TradeTick`). + +### Step 3: 1-minute OHLCV bucketing + UPSERT + +Per overview §5.2 "Write semantics — UPSERT, not INSERT": + +- Group extracted trades by `(floor_minute(closed_at), asset_id, + '1m', source)`. +- For each bucket, emit one `INSERT ... ON CONFLICT (timestamp, + asset_id, granularity) DO UPDATE` with the **incremental-merge** + update expression (preserve `open`, overwrite `close`, + `GREATEST(high)`, `LEAST(low)`, sum `volume_base` / + `volume_quote_usd` / `trade_count`, recompute `vwap`). +- PK is the ADR 0003 shape (`timestamp, asset_id, granularity`) + — quote_asset_id participates per that ADR if/when the column + is added; this task follows whatever PK shape the 0011 schema + migration lands. +- `source` column is set to `'sdex' | 'soroswap' | 'aquarius' | + 'phoenix'` per overview §3.2 examples and ADR 0004's + multi-source merge columns. + +### Step 4: CDK Lambda stack wiring (depends on 0011) + +In `infra/aws-cdk/` (created by 0011): + +- New `LedgerProcessorStack` (or extension of an existing Lambda + stack) defining the `prices-ledger-processor` function: + runtime `provided.al2`, memory 512–1024 MB (size at impl + time), timeout 60s, VPC attachment to BE's VPC (per §11.1, + via the 0011 SSM lookups). +- IAM role: read on BE's `stellar-ledger-data/` bucket, write + on RDS via Secrets Manager DB credentials. +- **S3 notification registration**: add the Lambda as a second + notification target on BE's bucket. Per §5.1 this requires + coordination with the BE team (the bucket is BE-owned). + Document the SSM key BE must publish (e.g. + `/platform/{env}/stellar-ledger-data-bucket-arn`) and any + BE-side stack change required. +- CloudWatch alarms: invocation errors, duration p95, DLQ depth + (deferred to follow-up if DLQ design is non-trivial). + +### Step 5: Tests + +- Unit: feed a recorded `LedgerCloseMeta` fixture through + `dispatch()` + bucketing and assert the emitted UPSERT rows + (use sqlx test-tx pattern). +- Integration: spin up a local Postgres (Docker) with the 0011 + schema applied, run the binary against a recorded S3 event + + fixture, assert rows land with the expected PK and + incremental-merge semantics. + +### Step 6: Observability + +- Structured logs (JSON) per invocation: ledger sequence, + decode time, trade count by source, UPSERT count, total + duration. CloudWatch Logs + X-Ray per §2.1. +- Metric: `prices.ledger_processor.lag_seconds` = + `now() - ledger.closed_at` at invocation time; alarms if + >60s sustained (matches the §5.1 Galexie lag alarm shape). + +## Acceptance Criteria + +> Criteria below are the post-ADR-0007 (CH + mTLS + ReplacingMergeTree) +> shape; the original RDS/UPSERT wording is superseded. + +- [x] `packages/prices-ledger-processor` builds; the `lambda`-feature + binary compiles the full `provided.al2023` path (S3 + mTLS + + `lambda_runtime`). `cargo lambda` ZIP packaging is the deploy step. +- [x] Decode → extract → bucket → write reuses the **tested** + `prices-ingest-core` (same code as `sdex-backfill`), so live rows + match the backfill: ADR 0003 PK (`asset_id, quote_asset_id, + source, timestamp`), ADR 0004 multi-source columns, UInt32 + surrogate ids with SAC→classic collapse, `Decimal(38,14)`. +- [x] Re-invoking from the same cursor is idempotent — proven by the + `idempotent_on_re_run_from_same_cursor` e2e test (deterministic + candle set + `version` → ReplacingMergeTree collapses re-inserts). +- [x] Real-fixture integration test: S3-equivalent object → + `decode_object` → dispatch/extract → bucketed candles → cursor + advance, gap-stop, idempotency (`tests/reconcile_e2e.rs`, + self-skips when fixtures absent). +- [x] mTLS sink goes through `prices-clickhouse::mtls` (task 0052), not + reinvented; CH error bodies redacted via `safe_log` before logging. +- [x] Docs: `packages/prices-ledger-processor/README.md` — S3/SNS event + contract, BE-coordination (task 0050), env-var/SSM keys consumed. +- [x] Lambda registered as the prices SNS→SQS doorbell target via CDK + (`infra/.../compute-stack.ts`, prepare-only — 2026-06-10). +- [ ] `prices.ledger_processor.lag_seconds` CloudWatch metric + >60s + alarm — **deferred** (CW emit is a deploy concern; spec Part E). +- [ ] Live mTLS write against the Hetzner `prices` DB — **deferred** + (prepare-not-deploy; transport already proven by task 0052's smoke). + +## Blocked on + +- **0011** — RDS + CDK Lambda stack scaffolding + SSM platform + lookups (VPC, BE bucket ARN). Without 0011, this Lambda has + no target DB and no CDK stack to live in. +- **0037** — `packages/ledger-processor::dispatch` kernel and + the `SwapExtractor` trait surface. Without 0037, this task + has no extraction primitive to call. + +## Deploy prerequisites (operator) + +> Prepare-only items the operator must complete before / at deploy. Synth +> fails fast if the SSM param below is absent, so a half-configured Lambda +> can't ship silently. + +- [ ] **Bootstrap cursor** — create SSM param + `/prices/{env}/ledger-processor/initial-cursor` (type `String`) with the + ledger live ingestion should resume from. The reconcile loop seeds its + cursor from this on first start and begins at `value + 1`, so set it to the + **last ledger already accounted for**: the SDEX backfill's + `max(sequence) FROM prices.backfill_sdex_ledgers` for a seamless handoff, + or `currentTip − 1` for a forward-only start. Do **not** use `0` (an + empty-table sentinel — would walk from genesis and never catch up). Wired + into the Lambda env from this key in `compute-stack.ts`; one-time + bootstrap, retired by task 0064. (PR #34 review, findings #2/#3.) +- [x] **Source-bucket KMS** — **resolved: not SSE-KMS, leave + `bucketKmsKeyArn` unset.** BE's `production-stellar-ledger-data` is + **SSE-S3 (AES256)**, confirmed in the BE repo + (`infra/src/lib/stacks/ledger-bucket-stack.ts:36`, + `s3.BucketEncryption.S3_MANAGED`) and BE task **0278** (completed + 2026-06-02): KMS was dropped for SSE-S3 to kill the per-object KMS + request cost on the high-volume ingest pipeline, since the XDR is public + on-chain data. So `grantRead`'s plain S3 perms suffice — no `kms:Decrypt` + needed and no `GetObject` 403/DLQ risk. Pre-0278 objects keep their old + SSE-KMS under the AWS-managed `aws/s3` key, still readable by a + same-account principal with S3 access (no explicit `kms:*` IAM required). + Our `bucketKmsKeyArn` slot is for a *customer* KMS key, which this bucket + has none of. (PR #34 review, finding #4.) + +## Out of scope + +- SDEX trade extractor body — 0037's skeleton stubs it; the real + body is task 0022's spec landed under a separate FEATURE task + (not yet spawned). This Lambda just calls `dispatch()` and + uses whatever extractors exist at the time. +- Historical backfill — Stream 1 (ADR 0001) and Stream 2 + (ADR 0005) are separate paths; this task is **live only**. +- Asset registry maintenance — handled by 0039's Asset Discovery + worker. +- Current-price aggregation across sources — handled by 0039's + Current Price Updater worker. + +## Notes + +- This is the first Rust Lambda in the project per ADR 0006 + §Decision. Conventions for `cargo lambda` packaging, CI build + caching, and `provided.al2` ZIP layout established here will + be reused by 0039 and 0040. +- Coordinate the S3 notification registration with the BE team + early — adding a second notification target to a bucket BE + owns is a cross-team change, not a unilateral one. +- The 1-min UPSERT contract is shared with both backfill + streams; keep the merge SQL in a shared `packages/ohlcv-writer` + module (or similar) so live + backfill writers stay in sync. + +## Implementation Notes + +> The `## Implementation Plan` above predates ADR 0007 and still +> describes the retired RDS/sqlx/VPC shape. The authoritative design +> is `notes/G-local-prototype-spec.md` (CH + mTLS + no-VPC + +> SNS-doorbell). What was actually built: + +**Local prototype (Phase 1–2, branch `feat/0038_…`, PR #34).** +`packages/prices-ledger-processor` mirrors BE's indexer structure +with three production swap-seams (`ObjectFetcher`, `Cursor`, +`OhlcvSink`). The doorbell-cursor reconcile loop (`src/reconcile.rs`) +reads the cursor, derives the Galexie S3 key for `cursor+1`, fetches, +decodes, dispatches via the 0037 kernel, buckets to 1-min OHLCV, and +**advances the cursor last** — the ordering barrier. Runs against +local fixtures; `cargo check -p prices-ledger-processor` green. + +**SNS decision + CDK ingest wiring (2026-06-10).** Folded the live +ingest wiring into `infra/src/lib/stacks/compute-stack.ts`: +prices-owned `prices-ingest-{env}` SQS + `prices-ingest-dlq-{env}` +DLQ (`maxReceiveCount=10`), an SNS subscription to BE's imported +`ledger-events` topic (`rawMessageDelivery`), the ledger-processor +`lambda.Function` (ARM64 / `provided.al2023`, `reservedConcurrency=1`, +`batchSize=1`, `timeout+60s` visibility), the event-source-mapping, +and IAM (S3 read on BE's bucket, CloudWatch lag metric, X-Ray). +Env-var contract sourced from `/platform/{env}/*` SSM at deploy +(spec §C.2, incl. the new `ledger-events-topic-arn` key). `nx build` ++ `cdk synth Prices-production-Compute` both pass. **Prepare-only — +no deploy** (gated on BE 0227 + task 0047 + BE publishing the SSM +keys/topic). + +**Shared-core refactor + data-plane seams (2026-06-24).** The +prototype reimplemented decode/bucket/canonicalisation by hand and it +diverged from the tested `sdex-backfill` — fatal once a real sink +writes to the *shared* `prices.price_ohlcv_1m` (different asset ids + +orientation → split liquidity). Fixed by extracting +`packages/prices-ingest-core` (the tested `canonical`/`price`/`tick`/ +`bucket`/`filter`/`soroban` modules + a transport-agnostic +`OhlcvWriter` split out of the backfill `Sink`) and repointing **both** +`sdex-backfill` and this Lambda at it. The Lambda now keeps only its +transport shell: +- `src/reconcile.rs` — doorbell-cursor loop calling `prices_ingest_core` + (`extract_trades` + `process_ledger` → `CandleAccumulator`), warm + `AssetRegistry` + `Registries` loaded from `prices.assets` at cold + start, accumulate across the contiguous run, flush + advance cursor + **last**. +- `src/object_fetcher/s3.rs` — `S3Fetcher` (`aws-sdk-s3` GetObject; + `NoSuchKey`→gap), `lambda` feature. +- `src/sink/mod.rs` — `ClickHouseSink` over the shared `OhlcvWriter`; + `plaintext` (local) and `from_lambda_env` (mTLS via + `prices-clickhouse::mtls`, `aws-mtls` feature); writes retried via + `retry.rs`, CH errors redacted via `safe_log`. +- `src/bin/cli.rs` — local fixture runner (`--dry-run` counts; else + writes to local plaintext CH). +- `src/main.rs` — SQS-doorbell entrypoint (`lambda` feature, eager + cold-start init). + +Retired to `.trash/0038-lambda-prototype/`: `bucket.rs`, `decode.rs`, +`sink/{sql_file,stdout}.rs`. Feature matrix: `default` lean (no +rustls/lambda), `aws-mtls`, `lambda` (= `aws-mtls` + runtime + S3). +Tests: 13 core + 5 sdex (regression gate) + 15 lambda-unit + 3 +real-fixture e2e. fmt + clippy clean. + +**Broken/modified tests:** `tests/reconcile_e2e.rs` rewritten — the old +synthetic-`LedgerDecoder` fakes are gone (the decode seam was removed in +favour of the shared `decode_object`); it now drives the real pipeline +over the three bundled fixture ledgers (62460540–542) and self-skips +when fixtures are absent. Intentional, not a regression. + +## Design Decisions + +### Emerged + +1. **Ingest wiring lives in `ComputeStack`, not a separate + `IngestStack`.** First drafted as a standalone stack consuming + ComputeStack's `ledgerProcessorRole`; this created a + CloudFormation **dependency cycle** — the event-source-mapping and + the queue/bucket grants mutate the role's policy with the other + stack's ARNs, so Compute↔Ingest depend on each other. Co-locating + role + queue + Function in one stack removes the cycle and matches + BE's single-`compute-stack.ts` shape. (`ingest-stack.ts` moved to + `.trash/`.) +2. **`lambda.Function` + `Code.fromAsset`, not `RustFunction`.** The + prices infra doesn't carry `cargo-lambda-cdk`; rather than add an + uninstalled dependency, the Function consumes the pre-built + `cargo lambda build` bootstrap. Adopting `RustFunction` (synth-time + build, exactly BE's shape) is a follow-up once the dep lands. +3. **`reservedConcurrency` pinned to exactly 1 in `validateConfig`.** + Not a tunable — serial execution is the ordering guarantee, so the + config validator rejects any other value rather than letting a + typo silently break ordering at deploy. +4. **Refactor onto the shared core instead of keeping the prototype's + own decode/bucket (2026-06-24).** The user-confirmed call: a real + sink writing the prototype's String-id/f64/lexicographic rows to the + *shared* `prices.price_ohlcv_1m` would not match the backfill → + split liquidity. Resolved by extracting `prices-ingest-core` and + reusing it (partial "reconcile" of the live path onto the tested + code), not by reconciling ids inside the sink. Realises the task's + own Notes ask ("keep the merge SQL in a shared module so live + + backfill writers stay in sync"). +5. **`OhlcvWriter` takes a `clickhouse::Client`, not a URL.** Lets the + one writer serve both the plaintext local client and the task-0052 + mTLS client (both are `clickhouse::Client`) — the audit rule that + every remote CH access goes through `prices-clickhouse::mtls` holds. +6. **Candles accumulate across the whole contiguous run, flushed once.** + Matches the backfill's per-chunk accumulation so intra-run minutes + aggregate. A minute split across two *separate* invocations lands as + two `version`-keyed rows (RMT keeps the latest) — the same + characteristic the backfill has across partition boundaries; the fix + is a periodic re-aggregation (spawned as backlog). + +## Future Work + +> Each item below is spawned as a backlog task (don't leave as prose). + +- **0064** — CH-backed cursor (replace `StubFileCursor`; spec D.1). +- **0065** — periodic OHLCV re-aggregation for cross-invocation / + cross-chunk intra-minute candles (live + backfill share the gap). +- **0066** — `cargo-lambda-cdk` `RustFunction` + CloudWatch + `lag_seconds` metric/alarm, and unify the dual rustls (0.21 from + `aws-sdk-s3` vs 0.23 from mTLS) to shrink the Lambda ZIP. +- Production deploy + live end-to-end smoke — spec Part E, still gated + on BE 0227 + task 0047 (not a standalone backlog item; unblocks with + those gates). diff --git a/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md new file mode 100644 index 0000000..2c8420b --- /dev/null +++ b/lore/1-tasks/active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md @@ -0,0 +1,810 @@ +--- +id: "G-local-prototype-spec" +title: "Local-prototype scope + BE cross-team contract for the Prices Ledger Processor Lambda" +type: G +task: "0038" +status: developing +spawned_from: [] +spawns: [] +related_notes: [] +links: + - "../../../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" + - "../../../../2-adrs/0006_runtime-framework-rust-axum.md" + - "../../../../2-adrs/0005_stream2-sdex-local-workstation-backfill.md" + - "../../../../2-adrs/0003_price-ohlcv-pk-includes-quote-asset-id.md" + - "../../../../2-adrs/0004_price-ohlcv-multi-source-merge-columns.md" + - "../../../archive/0048_RESEARCH_soroban-events-pricing-decoder-spec/notes/G-soroban-events-pricing-decoder.md" + - "../../../archive/0045_RESEARCH_cross-team-bundle-with-be-on-hetzner-ch-tenancy/notes/G-be-agreement-record.md" + - "../../../archive/0037_FEATURE_tranche1-ledger-processor-skeleton.md" + - "../../../backlog/0047_RESEARCH_cross-tenant-throughput-verification-on-shared-hetzner-ch.md" + - "../../../../../docs/prices-api-general-overview.md" +--- + +# Local-prototype spec + BE cross-team contract + +> **Audience:** prices-api implementer (Part A), BE team reviewers (Part C). +> **Status:** draft for cross-team discussion. Revised 2026-06-08 +> after reading BE's production indexer crate. +> **Why this note exists:** task 0038's 2026-06-08 activation history +> entry promised a "local-only binary + design document" deliverable +> while the original engineering blockers (BE 0227 mTLS endpoint, +> task 0047 cross-tenant throughput) remain open. This document is +> that design. + +--- + +## 0. TL;DR + +We are building a **local-only** Rust Lambda binary that mirrors +the **shape** of BE's production indexer (`crates/indexer/` in the +soroban-block-explorer repo) — same doorbell-cursor pattern, same +S3 → SQS → Lambda trigger model, same mTLS-to-Hetzner-CH sink — +but exercised against local fixtures + a stub cursor instead of +real S3 / real CH. It does NOT deploy to AWS, does NOT consume +real SQS messages, does NOT write to Hetzner ClickHouse. + +The prototype's value is twofold: + +1. **De-risk the binary shape** — prove the kernel from task 0037 + composes correctly with BE's reusable building blocks + (`xdr-parser`, `db-clickhouse::mtls`, Galexie key derivation) + and the doorbell-cursor reconcile loop adapts cleanly to our + narrower extraction surface. +2. **Ground the BE meeting** — Part C of this note is the concrete + list of cross-team commitments the production Lambda needs. + The big questions are dependency distribution (`xdr-parser` is + currently an internal workspace path-dep at BE, not a published + crate) and ownership of the new SQS queue between our bucket + notifier and our Lambda. + +When the gating events clear (BE 0227 ships; task 0047 verifies +throughput), the prototype's interior survives: only the cursor +store, the S3 client, and the CDK packaging swap from stub +implementations to production wiring. + +--- + +## 1. Reference: BE production indexer (the model we mirror) + +Reading `soroban-block-explorer/crates/indexer/` is **prerequisite +context for the meeting** — the shape we propose IS BE's shape, +modulo a different extraction surface (Soroban swaps for price +discovery, not the 17 RMT tables BE writes) and a different +target database (`prices.*` per ADR 0007, not `default.*`). + +### 1.1 Patterns we MUST inherit (load-bearing, not preference) + +| Pattern | BE source | Why load-bearing | +|---|---|---| +| `reservedConcurrentExecutions = 1` | `compute-stack.ts:260` | Two concurrent invocations would race the CH cursor. Ordering correctness depends on serial execution. | +| Doorbell-cursor reconcile (ignore SQS body; read `max()` from CH) | `handler/mod.rs:160-251` | Order comes from the cursor + S3 contents, not SQS delivery order. Removes any need for FIFO. | +| Last-row-wins commit ordering per ledger | BE: `ledgers` row written last; us: equivalent "cursor advance" written last | A crash mid-ledger resumes cleanly from the unchanged cursor; partial writes get superseded by `ReplacingMergeTree` on next merge. | +| Lambdas outside the VPC, mTLS only | `compute-stack.ts:32-36` (task 0239) | The shared Caddy/Hetzner-CH path is mTLS-terminated; no SG or VPC peering. Putting our Lambda in a VPC would also need a NAT GW for S3. | +| `safe_error_message` redaction | `handler/mod.rs:416-485` | CH `BadResponse` bodies can echo offending row values; their `Display` would leak data into CW Logs. We need the same redactor. | + +### 1.2 Patterns we CHOOSE to inherit (sensible defaults, not absolutes) + +- **Retry backoff `[50, 200, 800] ms`** (`handler/mod.rs:113`) — three retries, four wire calls total, only on transient errors (network / timeout / 5xx). +- **Partial-batch-failure SQS response** (`handler/mod.rs:64-75, 160-189`) — fail just the offending message, ack the rest. +- **Eager init at cold start** — surface missing env / unreachable extension as a Lambda Init Errors entry, not a per-event panic (`main.rs:40, 50-67`). +- **Structured JSON tracing-subscriber** with `EnvFilter::from_default_env()` driven by `RUST_LOG`. +- **`maxReceiveCount = 10` on the SQS source** (`compute-stack.ts:147`) — higher than the usual 3 because with `concurrency = 1` the ESM over-polls and gets throttled; the queue absorbs that without false-DLQ'ing a processable record. +- **`visibilityTimeout = lambdaTimeout + 60s`** (`compute-stack.ts:139`). + +### 1.3 Patterns we DO NOT inherit + +- **`default` CH database.** Per ADR 0007 we live in our own `prices.*` database on the same Hetzner cluster. +- **One cursor table named `ledgers`.** BE persists every ledger they see; we only persist ledgers containing pricing-relevant trades. Cursor design is open (Part D.1). +- **Enrichment SQS fan-out.** BE has a separate `enrichment-worker` Lambda fed from the indexer. We don't need that pattern in scope of 0038 — Soroswap/Aquarius asset-discovery is task 0039's job. +- **17 RMT tables.** Our write surface is just `prices.price_ohlcv` (and possibly a small `prices.processed_ledgers` cursor table — see Part D.1). + +--- + +## Part A — Local prototype scope + +### A.1 What the binary does + +A single Rust binary, `prices-ledger-processor`, that on each +invocation runs the doorbell-cursor reconcile loop locally: + +1. Reads its **cursor** — for the prototype, a `--cursor ` CLI + arg (production: a CH-table read). +2. Computes the deterministic S3 key for ledger `cursor + 1` using + the **same Galexie key derivation as BE** (one's-complement + prefixes, `.xdr.zst` extension — see §1.3 below). +3. Resolves that key via an `ObjectFetcher` trait — wired in + prototype mode to a local-disk impl that maps the derived key + to `fixtures/ledgers/`. Misses → "no new ledger yet, stop" + (gap-stop is normal; future doorbell resumes). +4. Hits → `zstd`-decompresses + calls + `xdr_parser::deserialize_batch()` → iterates the + `LedgerCloseMeta` batch. +5. Per ledger: extracts Soroban contract events via the + `xdr-parser` walk, normalises into the `SorobanEventRow` shape + consumed by `dispatch()` from task 0037, groups by + `(transaction_id, contract_id)`, calls `dispatch()`, collects + `TradeRow`s. +6. **Buckets** the trades into 1-min OHLCV candles in-process per + the ADR 0004 merge formula (preserve `open`, overwrite `close`, + `GREATEST(high)`, `LEAST(low)`, sum `volume_base` / + `volume_quote_usd` / `trade_count`, recompute `vwap`). +7. **Writes** to a stub sink (see A.7) — no network egress. +8. **Advances the cursor** (writes new value to the prototype + stub: `out/cursor.txt`) **last** — the equivalent of BE's + "ledgers row written last" ordering barrier. +9. Loops back to step 2 until a gap, the in-process time budget, + or `--max-iterations` is hit. + +### A.2 Workspace placement + trait seams + +``` +packages/ +├── extractors-core/ # existing (from 0037) +├── ledger-processor/ # existing (from 0037) +├── phoenix-extractor/ # existing (from 0037) +├── soroswap-extractor/ # existing (from 0037, stub) +├── aquarius-extractor/ # existing (from 0037, stub) +├── sdex-backfill/ # existing +└── prices-ledger-processor/ # NEW — this prototype + ├── Cargo.toml + ├── src/ + │ ├── main.rs # lambda_runtime entrypoint + CLI mode + │ ├── reconcile.rs # doorbell-cursor loop + │ ├── decode.rs # xdr-parser walk → SorobanEventRow + │ ├── bucket.rs # 1-min OHLCV merge (ADR 0004) + │ ├── galexie_key.rs # ledger# → S3 key (copy of BE's) + │ ├── retry.rs # [50,200,800]ms backoff + │ ├── safe_log.rs # redaction wrappers (mirrors BE) + │ ├── object_fetcher/ # input abstraction + │ │ ├── mod.rs # trait `ObjectFetcher` + │ │ └── local_disk.rs # fixtures/ledgers/ + │ ├── cursor/ # cursor abstraction + │ │ ├── mod.rs # trait `Cursor` + │ │ └── stub_file.rs # out/cursor.txt + │ └── sink/ # writer abstraction + │ ├── mod.rs # trait `OhlcvSink` + │ ├── stdout.rs # JSON-lines to stdout + │ └── sql_file.rs # ALTER-friendly SQL dump + ├── fixtures/ # gitignored sample ledger files + └── tests/ + └── reconcile_e2e.rs # one full loop through fixtures +``` + +The three trait seams (`ObjectFetcher`, `Cursor`, `OhlcvSink`) are +the **production swap points**. In the production rewrite: + +- `LocalDiskFetcher` → `aws_sdk_s3::Client::get_object` +- `StubFileCursor` → CH-backed cursor (see Part D.1) +- `StdoutJsonSink` / `SqlFileSink` → `clickhouse::Client` over + mTLS, via `db_clickhouse::mtls::client_from_lambda_env` + +Everything else — the reconcile loop, the decode, the +bucketing, the redaction, the retry — survives. + +### A.3 Inputs — fixtures, not S3 events + +For the prototype, fixtures are real Galexie outputs copied locally, +indexed by their **derived** key (so the same `galexie_key.rs` +function we ship works in both modes): + +``` +packages/prices-ledger-processor/fixtures/ +└── ledgers/ + ├── FC45E5FF--62528000-62591999/ + │ ├── FC45E5C4--62528059.xdr.zst # known Phoenix swap + │ ├── FC45E5C3--62528060.xdr.zst # empty + │ └── FC45E5C2--62528061.xdr.zst # multi-venue + └── ... +``` + +The operator picks fixtures from the 10k uniform sample analysed +in tasks 0046 / 0048 — same evidence base as the decoder spec, so +expected outputs are pre-known. Filling `fixtures/` is a one-time +manual step (`aws s3 cp` against the dev bucket, after which the +prototype is offline-runnable). + +**No `S3Event` JSON fixtures.** The doorbell pattern means the +SQS message body would be ignored anyway — fabricating S3-event +JSONs gains us nothing and falsely suggests the Lambda parses +them. + +### A.4 Decode boundary — `xdr-parser` + +**Significant cross-team item.** BE's `xdr-parser` is a workspace +**path dep** at `soroban-block-explorer/crates/xdr-parser/`, not a +published crate. The prototype needs decisions on: + +**Option 1 — Vendor a snapshot** into +`packages/prices-ledger-processor/vendored/xdr-parser/`. Pros: +zero BE coordination; clean Cargo build. Cons: drifts on every +Stellar protocol upgrade; explicit re-sync ceremony. + +**Option 2 — Git submodule** of the BE repo, with a Cargo +`path = "../../soroban-block-explorer/crates/xdr-parser"` dep. +Pros: pinned commit, simple update. Cons: weird workspace layout; +breaks `cargo publish` (irrelevant for us) and `nx`-only mental +models. + +**Option 3 — Git Cargo dep** against the BE GitHub repo. Pros: +clean Cargo idiom. Cons: requires BE to keep `xdr-parser` a +**top-level package in their workspace** (it already is) and accept +that prices-api pins against specific commits. Stellar-XDR major +bumps still require coordinated PRs. + +**Option 4 — Ask BE to publish to a private cargo registry** +(e.g. CodeArtifact). Most disruptive; only justifies itself if +multiple downstream consumers exist. + +**Prototype recommendation: Option 3.** It is the cheapest +"works today" option that doesn't impose on BE — we just pin a +commit sha: + +```toml +[dependencies] +xdr-parser = { git = "ssh://git@github.com/rumblefishdev/soroban-block-explorer.git", rev = "", package = "xdr-parser" } +stellar-xdr = "" # transitively required +``` + +**Production rewrite item:** lock to a tagged release (e.g. +`xdr-parser-v0.4.0`) and agree on a semver discipline (Part C.4). + +What we need from the crate (all already exposed per the indexer's +usage at `handler/mod.rs:313-316, 327`): + +- `xdr_parser::decompress_zstd(&[u8]) -> Result, ParseError>` +- `xdr_parser::deserialize_batch(&[u8]) -> Result` where `Batch` has `.ledger_close_metas: Vec` +- A walk of `SorobanTransactionMeta.events` that yields the `(transaction_id, contract_id, event_index, topics, data)` tuples the dispatcher expects (BE's `handler/process::parse_ledger` does this; we may not need the full parse, just the events walk — Part C.4 sub-question). + +### A.5 Kernel boundary — `dispatch()` + +The prototype does NOT re-implement extraction. It calls the +existing kernel surface from task 0037: + +```rust +use ledger_processor::dispatch::dispatch; +use extractors_core::{SorobanEventRow, TradeRow, VenueRegistry}; +use phoenix_extractor::PhoenixPoolRegistry; + +let trades: Vec = dispatch(&rows, &venue_registry, &phoenix_registry)?; +``` + +Today the kernel implements Phoenix XYK only. Soroswap and +Aquarius extractors return `VenueNotImplemented`. The prototype +tolerates that variant — counts those rows in an +`unimplemented_venue` metric and continues, exactly like the +production Lambda should once those extractors land. + +**Implication for the BE meeting:** Soroswap and Aquarius live +ingestion is **gated on extractor work outside this task** +(separate FEATURE tasks, not yet spawned). The Lambda shape is +complete without them; the venues just yield empty output until +their extractors arrive. + +### A.6 OHLCV bucketing + +In-process, no DB round-trip. Pseudocode: + +```rust +let bucket_key = |t: &TradeRow| OhlcvKey { + timestamp: floor_to_minute(t.closed_at), + asset_id: t.base_asset(), + granularity: Granularity::OneMinute, + quote_asset_id: t.quote_asset(), // ADR 0003 + source: t.venue.into(), // ADR 0004 +}; + +let mut candles: HashMap = HashMap::new(); +for trade in trades { + candles.entry(bucket_key(&trade)) + .and_modify(|c| c.merge(&trade)) // ADR 0004 merge formula + .or_insert_with(|| OhlcvRow::from_first_trade(&trade)); +} +``` + +The `merge` impl is the canonical place to keep the incremental- +merge logic: it gets tested in-process and the production rewrite +can either reuse the in-memory merge or translate it to a CH +`AggregatingMergeTree` materialised view (per task 0048's +recommendation §6.3). + +### A.7 Sinks + cursor — stub only + +Three prototype-side stubs, all pure-local: + +1. **`StdoutJsonSink`** — emits one JSON line per `OhlcvRow` to + stdout. Tail-friendly, grep-able, diff-able across runs. +2. **`SqlFileSink`** — writes one `.sql` file per invocation + under `out/` containing the `INSERT INTO prices.price_ohlcv ...` + statements the production writer would emit. Hand to BE in + the meeting; they can read it and tell us whether the column + shape lines up with what `prices.*` will host. +3. **`StubFileCursor`** — reads/writes `out/cursor.txt` (a single + `u64`). Production replaces with a CH-table read (see Part D.1). + +**Explicitly out of prototype scope:** + +- No `clickhouse::Client` connection (no Hetzner reachability yet). +- No `aws_sdk_s3` client (no AWS reachability). +- No `aws_sdk_sqs` client (no real queue). +- No CloudWatch metric / log emit (stdout structured-JSON is + enough; CW is a deployment concern). + +### A.8 Operator invocation surface + +Two modes the operator on a local machine can use: + +```bash +# Mode 1: lambda_runtime via cargo-lambda (closer to production) +cargo lambda invoke prices-ledger-processor \ + --data '{"Records":[{"messageId":"local-doorbell","body":"ignored"}]}' + +# Mode 2: direct cargo run (faster iteration) +cargo run -p prices-ledger-processor -- \ + --cursor 62528058 \ + --max-iterations 16 \ + --sink stdout +``` + +Mode 2 is the inner-loop. Mode 1 proves the `provided.al2` +runtime shape works locally and exercises the full doorbell event +deserialise path (even though the body is ignored — same as BE). + +### A.9 Prototype acceptance + +- [ ] `cargo build -p prices-ledger-processor --release` succeeds. +- [ ] `cargo lambda invoke` against a stub doorbell event, with a + fixtures dir containing the known Phoenix-swap ledger + 62528059, emits the expected `OhlcvRow` for that swap and + advances `out/cursor.txt` to 62528059. +- [ ] `cargo run -- --cursor 62528058 --max-iterations 16 --sink sql_file` + walks contiguous fixtures, produces a `.sql` file whose + `INSERT ... ON CONFLICT ...` statements use the PK shape + from ADR 0003 (`timestamp, asset_id, granularity, + quote_asset_id`) and the merge columns from ADR 0004. +- [ ] Re-running the same invocation from the same starting + cursor is bit-identical (idempotent; proves the merge is + deterministic). +- [ ] Hitting a missing fixture is logged as `"reached gap on S3 + — contiguous run done"` (mirrors BE's wording for the same + condition) and exits cleanly without advancing past the gap. +- [ ] One `tests/reconcile_e2e.rs` test, runnable on a clean + clone with `nx test prices-ledger-processor`, that covers + the whole pipeline against three fixture ledgers (swap, + empty, gap-stop). +- [ ] This G-note's Part C reviewed by BE; their answers captured + below the questions inline (or as a follow-up G-note). + +No deployment, no AWS calls, no live network. + +--- + +## Part B — Out of prototype scope (explicit non-goals) + +Listed so the meeting doesn't accidentally extend scope: + +- **CDK stack.** No `infra/aws-cdk/` changes. The original + Implementation Plan Step 4 in this task's README is deferred to + the production-rewrite task (see Part E). +- **Real S3 → SQS wiring.** No notification configuration on BE's + bucket; no SQS queue creation. +- **Lambda execution role / IAM.** All AWS-side; deferred. +- **mTLS cert issuance.** No CA call, no Secrets Manager write, + no Caddyfile change. +- **CloudWatch alarms, X-Ray traces.** Observability is + prototype-side stdout JSON only. +- **DLQ.** No `aws_sdk_sqs::Client`, no DLQ behaviour modelled. +- **Soroswap / Aquarius extractor bodies.** The prototype tolerates + `VenueNotImplemented`; those bodies are separate tasks. +- **SDEX trade extraction.** The 0037 kernel currently dispatches + Soroban-only; classic SDEX ops travel a different path that the + Lambda inherits when 0022's extractor lands. +- **xdr-parser republishing.** Prototype consumes via git Cargo + dep against the BE repo on a pinned commit. Tag-pinning and + semver discipline are Part C.4 items, not prototype work. + +--- + +## Part C — Cross-team contract (BE meeting agenda) + +Each item is phrased as a concrete decision we need from BE, with +the prices-api position pre-staked so the meeting is about +confirming or pushing back, not co-designing from scratch. + +### C.1 — SQS queue ownership + S3 → SQS notification + +**Background.** Post-task-0241 (BE), the indexer is triggered by +**SQS doorbells**, not direct S3 → Lambda. The flow is: + +``` +ledger object PutObject → S3 ObjectCreated event + → SQS message ("doorbell", body ignored) + → Lambda invocation (batchSize=1, concurrency=1) +``` + +Our Lambda follows the same shape — a separate SQS queue with its +own doorbells, fed from the same `ObjectCreated` events on the +same bucket. + +**The ask:** add a **second** event notification on BE's +`stellar-ledger-data` bucket targeting a **prices-api-owned SQS +queue** (`prices-ingest-queue-{env}`), filtered to `.xdr.zst` +suffix (same filter BE uses — `compute-stack.ts:278`). + +Why a prices-api-owned queue, not a shared one: failure isolation. +A backlog or DLQ-spam on the prices side mustn't pressure BE's +indexer queue. + +**Open sub-questions for the meeting:** + +1. SNS-fan-out vs two direct notifications. BE today wires the + bucket directly to their SQS queue. Adding our queue as a + second target on the same bucket is supported by S3, but if BE + anticipates a third or fourth consumer they may prefer to + move the bucket-side to SNS and let everyone subscribe. +2. Notification filter precision. `.xdr.zst` is bucket-wide; + ledgers don't have a separate prefix today. If BE plans to + add other object types to the bucket (snapshot dumps, + diagnostic exports), we'd want a prefix filter on our + subscription so we don't process them. + +> **✅ RESOLVED — 2026-06-10 cross-team meeting → SNS fan-out.** +> +> BE and prices-api agreed to move the bucket-side to **SNS** rather +> than wire a second direct `S3 → SQS` notification. Final shape: +> +> ``` +> ledger PutObject → S3 ObjectCreated +> → SNS topic (BE-owned, on stellar-ledger-data) +> ├─ SQS ledger-ingest-{env} (BE) — rawMessageDelivery=true +> └─ SQS prices-ingest-{env} (prices-api) + its own DLQ +> → prices Lambda (this task) +> ``` +> +> **Ownership split (the user's words):** *"BE will refactor the code +> to use SNS; prices-api does its own SQS with DLQ and Lambda."* +> +> - **BE side:** repoint the existing notification to +> `SnsDestination(topic)` (was `SqsDestination(ingestQueue)`) and +> re-subscribe their own queue to the topic with +> `rawMessageDelivery: true` so the SQS body stays byte-identical +> to today and their indexer's S3-event parser is unchanged. BE +> adds a topic resource policy permitting the prices-api account +> to `sns:Subscribe`. +> - **prices-api side:** own `prices-ingest-{env}` SQS + DLQ +> (`maxReceiveCount = 10`, `visibilityTimeout = lambdaTimeout + 60s`, +> per §C.8), subscribe it to the BE topic (cross-account), and a +> queue policy permitting the topic to deliver. This is the +> prices-side CDK in the Part E punch-list (gated on BE 0227 + +> task 0047). +> +> **Why SNS over a second direct notification:** failure isolation +> *and* extensibility — a third/fourth consumer (asset-discovery, +> analytics) just adds a subscription with no further change to BE's +> bucket. EventBridge was considered (lighter for BE — additive bus +> toggle, their `S3 → SQS` untouched) but SNS was chosen for lowest +> latency and because the cross-team contract is being negotiated +> around a topic; topic ownership + subscription is tracked by +> **task 0050**. +> +> **Impact on this Lambda's code: none to the reconcile loop.** The +> doorbell is content-free — the handler ignores the SQS message body +> whether it arrives raw or SNS-wrapped — so the doorbell-cursor +> mechanism (`src/reconcile.rs`) is unaffected. Only doc/comment +> narrative and the (gated) CDK wiring carry the SNS shape. +> +> Sub-question (2) (prefix filter): deferred — `.xdr.zst` suffix +> remains sufficient; BE has no plans for other object types on the +> bucket. Revisit only if that changes. + +### C.2 — Env-var injection contract (NOT SSM-at-runtime) + +**Correction to the earlier draft.** I previously proposed +`/platform/{env}/*` SSM keys read at Lambda runtime. **BE's actual +pattern (compute-stack.ts:261-267) is CDK-time SSM reads baked +into Lambda env vars** at deploy. We mirror that. + +**The ask:** BE publishes the following identifiers under +`/platform/{env}/*` for our CDK to consume at deploy time: + +| SSM key | Type | Consumed at deploy → injected as env var | +|---|---|---| +| `/platform/{env}/stellar-ledger-data-bucket-name` | String | `BUCKET_NAME` | +| `/platform/{env}/stellar-ledger-data-bucket-arn` | String | (CDK-side, for IAM grant) | +| `/platform/{env}/ch-domain` | String | `CH_DOMAIN` (Caddy host) | +| `/platform/{env}/stellar-network-passphrase` | String | `STELLAR_NETWORK_PASSPHRASE` (xdr-parser cache init) | +| `/platform/{env}/ledger-events-topic-arn` | String | (CDK-side, SNS topic the prices queue subscribes to — added by the §C.1 SNS decision) | + +**Why this changes the contract.** No prices-api Lambda runtime +reads from SSM. The Lambda only sees env vars. SSM is the +deploy-time handshake, not a runtime dependency. + +**Open sub-question:** + +- Does BE already publish a `STELLAR_NETWORK_PASSPHRASE` SSM key + (mainnet vs testnet)? BE's indexer reads it from env; if their + CDK reads it from SSM at deploy, point us at the key. + +### C.3 — IAM principal authorisation + +Lighter than first draft because Caddy's CN mapping (C.6) does +most of the data-plane auth. The remaining IAM grants: + +**The ask:** BE's bucket policy + KMS key policy (if SSE-KMS) +explicitly trusts the prices-api Lambda execution role ARN for: + +- `s3:GetObject`, `s3:HeadObject` on the bucket +- `kms:Decrypt` on the bucket's KMS key (if any) + +The role ARN will be exported from the prices-api CDK stack and +published under `/prices/{env}/lambda-ledger-processor-role-arn` +for BE to consume in their own CDK. + +This is the standard cross-stack handshake — contract is "BE +agrees to wire this once our CDK stack lands." + +### C.4 — `xdr-parser` distribution model + +**The biggest single item in this meeting.** Today +`xdr-parser` is a workspace path-dep in +`soroban-block-explorer/crates/xdr-parser/`, not a published +crate. The prototype runs against a git-source Cargo dep pinned +to a commit (Option 3 in A.4). The production Lambda needs a +sturdier dependency contract. + +**The ask:** + +1. BE keeps `xdr-parser` as a **top-level workspace package** + (already true; just confirming nobody intends to fold it into + the indexer binary). +2. BE publishes **tagged releases** of `xdr-parser` + (`xdr-parser-vMAJOR.MINOR.PATCH`). Prices-api pins to a tag + in production, not `main` or a sha. +3. BE commits to **semver discipline** on the public surface + (the `decompress_zstd` / `deserialize_batch` / `parse_ledger` + functions and the public types they return). Payload-shape + changes get a MAJOR bump; additions get MINOR; bug fixes PATCH. + We don't need an SLA on cadence, just on semver. +4. BE exposes (if not already) the `SorobanTransactionMeta` events + walk as a **public library function** distinct from + `parse_ledger`. We don't need the full BE parse — we only need + the events stream + `(tx_id, contract_id, event_index, topics, + data)` tuples. If `parse_ledger` is the only entrypoint + today, we'd ride that (paying the cost of fields we discard); + if BE is willing to factor the events walk out, that's + cleaner. + +**Open sub-questions:** + +- Cargo registry vs git tag: would BE prefer to publish to + CodeArtifact (or similar)? Git tags work fine for now; flag + in case BE has a preference. +- `stellar-xdr` version pin. The prototype must use the **same** + `stellar-xdr` version as `xdr-parser` (Rust ABI). Today BE's + workspace pins it in the root `Cargo.toml`. Whose pin wins + when both repos drift? Proposal: prices-api pins to whatever + the `xdr-parser` tag we depend on transitively requires; we + follow BE on `stellar-xdr` updates within `xdr-parser` semver. + +### C.5 — Reuse of `db-clickhouse::mtls` + +**Background.** BE's `db-clickhouse` crate contains +`mtls::client_from_lambda_env(database: &str) -> Result` +which fetches `{cert, key, ca}` from Secrets Manager via the +Parameters and Secrets Lambda Extension on `localhost:2773`, +parses the PEM bundle, assembles a `rustls::ClientConfig`, and +returns a ready `clickhouse::Client` (`db-clickhouse/src/mtls.rs`). +This is exactly what our Lambda needs. + +**The ask:** + +- BE is willing to let prices-api depend on **just the `mtls` + module** of `db-clickhouse`, exposed as a smaller crate (e.g. + `db-clickhouse-mtls` or `clickhouse-mtls-aws`) — OR +- BE is willing to let prices-api depend on the **whole + `db-clickhouse` crate** (path `db-clickhouse = { ..., features = ["aws-mtls"] }`), + accepting we pull in their schema / persist code as dead + weight in our binary (Cargo dead-code-strips, so wire-size + impact ≈ zero) — OR +- BE is fine with prices-api **vendoring `mtls.rs` verbatim** + with a clear "synced from BE rev X" comment. + +**Position:** Option 2 (depend on the whole crate) is the +lowest-friction. Cargo's dead-code-elimination handles the unused +modules; we get the helper "for free" and inherit fixes when BE +ships them. If BE prefers we don't carry the dependency, Option 3 +(vendor) is acceptable; Option 1 (factor a smaller crate) is the +most disruptive on BE's side. + +**Open sub-question:** + +- The `mtls::client_from_lambda_env` reads `MTLS_SECRET_NAME` and + `CH_DOMAIN` env vars. Are those names canonical, or should + prices-api use a different prefix to avoid clashing if both + Lambdas ever share a process (they won't, but the env-var + name is in the public API of the helper)? + +### C.6 — Caddyfile `CLICKHOUSE_CN_USER_MAP` for prices-api + +**Background.** Per BE's mTLS design +(`db-clickhouse/src/mtls.rs` module docs and task 0240), Caddy +**strips** any client-supplied `X-ClickHouse-User` and re-applies +the user mapped from the certificate's CN via +`CLICKHOUSE_CN_USER_MAP`. The client never sets a user; Caddy +decides. + +**The ask:** BE adds two CN → CH-user mappings to the production +Caddy config: + +- `prices-api-dev` → `prices_writer_dev` (CH user) +- `prices-api-prod` → `prices_writer` (CH user) + +…and provisions the corresponding CH users with `INSERT`, `ALTER`, +`OPTIMIZE`, `SELECT` grants on the **`prices.*`** database only +(no access to `default.*`). The CN values match the issued cert +CNs (Part C.7). + +**Open sub-question:** + +- Does BE want prices-api to draft the `CREATE USER` DDL itself + (per ADR 0007's announcement-not-approval norm), or do they + prefer to author it? Lean: we draft, they apply, we land the + SQL in `lore/3-wiki/` for traceability. + +### C.7 — mTLS cert issuance for `prices-api-{env}` + +**Background.** BE operates the CA and the per-service cert +issuance procedure (`infra-hetzner/ca/README.md`). + +**The ask (production-only, surfaced now for awareness):** + +- BE-operated CA issues two prices-api certs (`prices-api-dev`, + `prices-api-prod`) with the CNs from C.6. +- Per ADR 0007 Cluster C: per-env, 1-year manual rotation, + CA-rotation revocation. +- Bundle uploaded to Secrets Manager under + `${mtlsSecretNamePrefix}/lambda-prices-ledger-processor-{env}` + (matches BE's naming convention from `compute-stack.ts:251, + 305`); prices-api Lambda role granted Secrets Manager read. + +**Gating:** blocked behind BE 0227 (Hetzner CH ships) and task +0047 (cross-tenant throughput verification). In this spec to +confirm the **shape** of the eventual contract, not to schedule +it. + +### C.8 — DLQ + lag-alarm coordination + +**The ask:** + +- prices-api owns its own DLQ for the prices-ingest queue + (`prices-ingest-dlq-{env}`). `maxReceiveCount = 10` matches + BE's value for the same reason: with `concurrency = 1` the + ESM over-polls and gets throttled, which absorbs without + false-DLQ'ing a processable doorbell. +- Lag alarm: `prices.ledger_processor.lag_seconds` = + `now() - ledger.closed_at` at invocation time, alarm if >60s + sustained. Flagged here so BE doesn't see our alarm and + assume their pipeline is broken — our alarm fires on **our** + Lambda being behind, not on Galexie being behind. + +--- + +## Part D — Open questions for the meeting + +Not commitments; questions where we want BE's input but haven't +pre-staked a position. + +### D.1 — Cursor source + +BE's cursor is `max(sequence) FROM default.ledgers` — they +persist every ledger they see. We only persist ledgers +containing pricing-relevant trades, so `max(...) FROM +prices.price_ohlcv` is a UNDER-COUNT, not the cursor we need. + +**Three options:** + +1. **Own cursor table `prices.processed_ledgers`** — single-row, + updated last per invocation per ADR 0007's last-row-wins + convention. Pros: independent of BE. Cons: yet another + `ReplacingMergeTree` to operate. +2. **Cross-DB read of `default.ledgers.max(sequence)`** as our + ceiling, processed-up-to stored on our side as a small file + or table. Pros: no parallel state. Cons: couples our cursor + to BE's persist pipeline; if BE pauses (`indexerLambdaConcurrency + = 0`), we'd also stall. +3. **Driven purely from S3** — HEAD-probe forward from the last + confirmed key, keep no cursor in CH. Pros: stateless. Cons: + restart cost on cold start (scan to find the floor). + +**Lean: Option 1.** Independence > parallel-state savings. Worth +~5 minutes of meeting time to confirm BE is fine with us adding +one tiny RMT table to `prices.*`. + +### D.2 — OHLCV column shape + +ADR 0003 puts `quote_asset_id` in the PK. ADR 0004 adds the +`volume_quote_usd` merge column. Both are prices-api decisions, +but if BE expects to read `prices.price_ohlcv` for any reason +(BE-side analytics, board, debugging), the column shape is a +soft coordination item. + +### D.3 — Retention on `prices.*` + +prices-api's empirical footprint from task 0046 is ~0.45 GB/yr. +BE's retention policy on the shared cluster — does our database +inherit BE's TTLs, or do we set our own? Lean: own (separate DB +→ separate retention). + +### D.4 — Backfill / live coexistence + +Stream 1 (ADR 0001) and Stream 2 (ADR 0005) backfill writers +will eventually also write to `prices.*`. The 1-min UPSERT +contract is shared with the live Lambda. Sequencing question: +backfill before live, or backfill into a side table and +`INSERT ... SELECT` into the live table once live tip is healthy? + +### D.5 — Empty-ledger optimisation + +Task 0048's 10k sample showed most ledgers contain zero +pricing-relevant events. Worth asking BE if they're willing to +pre-tag at the bucket level (e.g. an additional notification on +`*.has-soroban-events.zst` if their pipeline tags such ledgers), +or if we eat the no-op invocations. Likely answer: eat them — +the Lambda no-op path is cheap. + +### D.6 — Batch size + +BE uses `batchSize = 1` because their concurrency = 1 makes +larger batches pointless. Should we do the same, or — given +that most prices-relevant ledgers cluster and we expect long +gaps — increase to (say) 5 to amortise cold-start over multiple +doorbells? Probably not worth complexity; mirror BE at 1. + +--- + +## Part E — Production rewrite punch list (when gates clear) + +Surfaced here so the meeting can react to the **full sequence**. +These items are NOT in scope for this activation; they spawn as +separate backlog tasks when (a) BE 0227 lands and (b) task 0047 +verifies throughput. + +| # | Item | Est. days | +|---|---|---| +| 1 | Replace `LocalDiskFetcher` with `aws_sdk_s3` GetObject + HeadObject. | 1 | +| 2 | Replace `StubFileCursor` with the cursor strategy chosen in D.1. | 1 | +| 3 | Replace `StdoutJsonSink` / `SqlFileSink` with `db_clickhouse::mtls`-backed `clickhouse::Client` + ADR 0004 merge SQL. | 2 | +| 4 | CDK stack — Lambda function, role, SQS queue + DLQ, S3 notification on BE's bucket, env vars from `/platform/{env}/*` SSM reads, CW alarms. | 3 | +| 5 | mTLS cert issuance + Caddy `CN_USER_MAP` change with BE + cert upload to Secrets Manager. | 1 | +| 6 | Cross-stack handshake — publish Lambda role ARN under `/prices/{env}/...`, BE consumes in their CDK. | 0.5 | +| 7 | Pin `xdr-parser` from commit-sha to first tagged release. | 0.5 | +| 8 | Lag-alarm wiring + dashboard. | 1 | +| 9 | End-to-end smoke from a real `dev`-bucket doorbell. | 1 | + +**Total once gates clear: ~11 engineering days.** + +--- + +## Appendix — references + +### Code in BE repo (`soroban-block-explorer/`) +- `crates/indexer/src/main.rs` — cold-start shape, env-var contract +- `crates/indexer/src/handler/mod.rs` — doorbell-cursor reconcile loop +- `crates/indexer/src/handler/process.rs` — `parse_ledger` walk +- `crates/xdr-parser/` — XDR decode crate we'll depend on +- `crates/db-clickhouse/src/mtls.rs` — reusable mTLS client builder +- `infra/src/lib/stacks/compute-stack.ts` — Lambda + SQS + DLQ CDK wiring +- `infra-hetzner/Caddyfile` — `CLICKHOUSE_CN_USER_MAP` +- `infra-hetzner/ca/README.md` — cert issuance procedure + +### Local docs +- General overview §5.2 — Prices Ledger Processor (Rust) +- ADR 0001 — Stream 1 historical backfill (CH-sourced) +- ADR 0003 — `price_ohlcv` PK shape with `quote_asset_id` +- ADR 0004 — multi-source merge columns +- ADR 0005 — Stream 2 backfill; xdr-parser as git Cargo dep +- ADR 0006 — runtime framework Rust/axum +- ADR 0007 — live data sink on shared Hetzner ClickHouse +- Task 0037 — Tranche 1 Ledger Processor skeleton (the kernel) +- Task 0048 — Soroban events pricing decoder spec +- Task 0045 — BE agreement record (G-note) +- Task 0047 — cross-tenant throughput verification (gating) diff --git a/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-handoff.md b/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-handoff.md new file mode 100644 index 0000000..c6e0a8c --- /dev/null +++ b/lore/1-tasks/backlog/0050_FEATURE_be-side-prep-sns-mtls-prices-db-provisioning/notes/G-be-sns-fanout-handoff.md @@ -0,0 +1,227 @@ +--- +id: "G-be-sns-fanout-handoff" +title: "BE handoff — SNS fan-out implementation runbook (step-by-step)" +type: G +task: "0050" +status: mature +spawned_from: ["G-be-sns-fanout-ask"] +spawns: [] +related_notes: ["G-be-sns-fanout-ask"] +links: + - "../../active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md" + - "../../../2-adrs/0007_live-data-sink-on-shared-hetzner-clickhouse.md" +--- + +# BE handoff — SNS fan-out on `stellar-ledger-data` + +> **Audience:** BE team (soroban-block-explorer infra). +> **Status:** agreed at the 2026-06-10 cross-team meeting. This is a +> self-contained, copy-pasteable runbook — hand it straight to whoever +> owns `soroban-block-explorer/infra`. +> **Grounded in:** the *current* `infra/src/lib/stacks/compute-stack.ts` +> on `develop` (verified 2026-06-18). Line numbers below are real. +> **Why now:** prices-api's side is already authored (PR #34) — its CDK +> imports the topic ARN, subscribes its own queue, reads the SSM keys +> below, and is prepare-only until you land this. This item is **not** +> gated on BE 0227 or throughput verification (it's pure S3+SNS+SSM), so +> it can ship now. + +--- + +## TL;DR + +Today the `stellar-ledger-data` bucket fires `S3 → SQS` straight at the +indexer's `ingestQueue`. We need a **second tenant** (prices-api) to +receive the same `ObjectCreated` doorbells on its own queue. S3 allows +only **one** destination per overlapping `event + suffix`, so the clean +fan-out is to insert an **SNS topic**: + +``` +ledger PutObject → S3 ObjectCreated (.xdr.zst) + → SNS topic {env}-ledger-events (BE-owned, NEW) + ├─ SQS {env}-ledger-ingest (BE) rawMessageDelivery=true ← unchanged behaviour + └─ SQS prices-ingest-{env} (prices-api, already in PR #34) +``` + +The whole change is **one file** (`compute-stack.ts`) plus publishing a +few SSM keys. Same AWS account as prices-api, so **you do not need any +prices ARN, and no cross-account policy.** + +--- + +## Scope of the change + +- **One file:** `infra/src/lib/stacks/compute-stack.ts`. +- **Net-new SSM keys** under a `/platform/{env}/*` namespace (none exist + today — these are created, not "confirmed"). +- **No change** to: the `dlq`, the `ingestQueue` config (visibility, + `maxReceiveCount`), the indexer's SQS event-source-mapping, or the + indexer Rust code. Only the *source* of the doorbell moves, and raw + delivery keeps the SQS body byte-identical. + +--- + +## Step-by-step + +### Step 1 — add the three imports + +`compute-stack.ts` currently imports `cdk, s3, s3n, sqs` (L1–9). Add: + +```ts +import * as sns from 'aws-cdk-lib/aws-sns'; +import * as subs from 'aws-cdk-lib/aws-sns-subscriptions'; +import * as ssm from 'aws-cdk-lib/aws-ssm'; +``` + +(`ssm` is already a workspace dep — `hetzner-dns-stack.ts` uses it.) + +### Step 2 — create the topic + +Put this just **above** the bucket-notification block (currently L386). +`config.envName` is the same field used for the queue names +(`${config.envName}-ledger-ingest`): + +```ts +// NEW — fan-out topic the bucket publishes to. One per env. +const ledgerEventsTopic = new sns.Topic(this, 'LedgerEventsTopic', { + topicName: `${config.envName}-ledger-events`, +}); +``` + +### Step 3 — repoint the bucket notification (S3 → SNS) + +Replace the existing block at **L386–389**: + +```ts +// ── BEFORE (current L386-389) ── +ledgerBucket.addEventNotification( + s3.EventType.OBJECT_CREATED, + new s3n.SqsDestination(ingestQueue), + { suffix: '.xdr.zst' } +); +``` + +```ts +// ── AFTER ── +// S3 → SNS (was SqsDestination(ingestQueue)). S3 allows one destination +// per overlapping event+suffix, so this REPLACES the direct wiring; the +// indexer now receives doorbells via its SNS subscription (Step 4). +ledgerBucket.addEventNotification( + s3.EventType.OBJECT_CREATED, + new s3n.SnsDestination(ledgerEventsTopic), + { suffix: '.xdr.zst' } +); +``` + +`SnsDestination` auto-adds the topic policy letting S3 publish — CDK +handles that for you. + +### Step 4 — re-subscribe the indexer queue (⚠️ `rawMessageDelivery: true`) + +This is the one detail that keeps the indexer untouched. With raw +delivery the SQS body is byte-for-byte identical to today's direct +`S3 → SQS` event; without it, SNS wraps the event in an envelope and the +indexer's S3-event parser breaks on **every** ledger. + +```ts +// NEW — the indexer's own queue subscribes to the topic. rawMessageDelivery +// keeps the SQS message body identical to the old direct S3→SQS shape, so the +// indexer's event-source-mapping and parser are UNCHANGED. +ledgerEventsTopic.addSubscription( + new subs.SqsSubscription(ingestQueue, { rawMessageDelivery: true }), +); +``` + +Leave the existing `processorFunction.addEventSource(new SqsEventSource( +ingestQueue, …))` (L399) and `ingestQueue.grantConsumeMessages(…)` (L411) +exactly as they are — they still drain `ingestQueue`. + +### Step 5 — publish the topic ARN to SSM + +prices-api's CDK reads this **at deploy time** (never at Lambda runtime): + +```ts +// NEW — hand the topic ARN to prices-api's CDK via SSM. +new ssm.StringParameter(this, 'LedgerEventsTopicArnParam', { + parameterName: `/platform/${config.envName}/ledger-events-topic-arn`, + stringValue: ledgerEventsTopic.topicArn, +}); +``` + +### Step 6 — publish the remaining `/platform/{env}/*` keys (net-new) + +These do **not** exist in your infra today (you only publish +`EcrRepoUriParam` and read the Hetzner CH IP). prices-api's CDK consumes +all of them at deploy. The bucket name/arn are already in hand inside +`ComputeStack` (props `ledgerBucketName` / `ledgerBucketArn`, L20–21), so +publishing them is a one-liner each; `ch-domain` and the network +passphrase come from wherever you keep them today. + +| SSM key (String) | Value | Source in your code | +|---|---|---| +| `/platform/{env}/ledger-events-topic-arn` | new SNS topic ARN | Step 5 | +| `/platform/{env}/stellar-ledger-data-bucket-name` | bucket name | `props.ledgerBucketName` | +| `/platform/{env}/stellar-ledger-data-bucket-arn` | bucket ARN | `props.ledgerBucketArn` | +| `/platform/{env}/ch-domain` | Caddy/ClickHouse host | your Hetzner CH domain | +| `/platform/{env}/stellar-network-passphrase` | mainnet/testnet passphrase | indexer env config | + +> If any of these already live under a different key name, just tell us +> the names and we'll point prices-api's CDK at them instead — the table +> above is the canonical set our stack references. + +### Step 7 — confirm the topic policy isn't subscriber-restricted + +Same AWS account, so prices-api subscribes `prices-ingest-{env}` to the +topic via its **own** deploy-role `sns:Subscribe` IAM — no cross-account +policy needed from you. Only confirm your topic policy doesn't explicitly +restrict subscribers to BE principals. A default CDK `sns.Topic` (as +above) does **not**, so there's normally nothing to do here. + +--- + +## What BE does NOT need to do + +- ❌ No prices queue ARN (same account; prices owns the subscribe side). +- ❌ No cross-account topic policy. +- ❌ No DLQ / consumer / event-source-mapping changes. +- ❌ No indexer Rust changes (raw delivery → body unchanged). + +--- + +## Cutover (the one path that must never drop ledgers) + +Because S3 permits one destination per overlapping `event + suffix`, +Step 3 is a **replace** of the live notification, not an add: + +1. **Deploy to a non-prod env first.** Confirm the indexer keeps draining + `ingestQueue`. If it suddenly fails to parse messages, raw delivery + wasn't applied (Step 4) — fix before touching prod. +2. **On prod, deploy in a low-write window if possible.** The + `PutBucketNotificationConfiguration` swap is near-atomic, but it's the + single path that must not silently drop a ledger. +3. **prices-api subscribes after** the topic + SSM key exist. + +--- + +## Joint verification + +- A new `.xdr.zst` PutObject delivers to **both** queues independently. +- BE indexer continues processing post-cutover with **no parser errors**. +- prices-api subscribes a throwaway queue to the topic ARN and observes a + delivery (captures the envelope as a test fixture). + +--- + +## Alternative considered (on the record) + +EventBridge was weighed — additive bucket toggle, leaves your `S3 → SQS` +untouched, lower BE effort/risk. The meeting chose **SNS** for lowest +latency and because the cross-team contract is built around a topic. +Recorded so the trade-off is on file. + +--- + +*Deeper rationale + meeting provenance: [[G-be-sns-fanout-ask]] and the +0038 spec `notes/G-local-prototype-spec.md` §C.1. Topic ownership is +tracked by this task (0050); prices-api's subscriber-side CDK is in +PR #34 (task 0038).* diff --git a/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md b/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md new file mode 100644 index 0000000..c08aaaf --- /dev/null +++ b/lore/1-tasks/backlog/0064_FEATURE_ch-backed-cursor-for-ledger-processor.md @@ -0,0 +1,67 @@ +--- +id: "0064" +title: "ClickHouse-backed cursor for the Prices Ledger Processor" +type: FEATURE +status: backlog +related_adr: ["0007"] +related_tasks: ["0038"] +tags: [layer-indexing, priority-medium, effort-small, lambda, clickhouse, cursor] +links: + - "../active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md" +history: + - date: 2026-06-24 + status: backlog + who: oski + note: "Spawned from 0038 future work (spec Part D.1)." + - date: 2026-06-24 + status: backlog + who: claude + note: "Added PR #34 review context for finding #3 (cold-start rewind + bootstrap; interim INITIAL_CURSOR SSM seed shipped)." +--- + +# ClickHouse-backed cursor for the Prices Ledger Processor + +## Summary + +Replace the Lambda's `StubFileCursor` (a `/tmp` file, lost on cold start) +with a durable cursor read from / written to ClickHouse, so the +doorbell-cursor reconcile loop resumes correctly across container churn. + +## Context + +Task 0038 ships with `StubFileCursor` as a placeholder. The production +cursor design is the open question in `G-local-prototype-spec.md` Part D.1. +BE's cursor is `max(sequence) FROM default.ledgers`; we only persist +pricing-relevant ledgers, so `max(...) FROM prices.price_ohlcv_1m` undercounts. + +## Review findings (PR #34 review, 2026-06-24) + +Finding #3 (durable cursor) was confirmed in the PR #34 review, with two +concrete failure modes this task removes: + +- **Cold-start rewind / reprocessing.** `/tmp` is per-container ephemeral. On + every container recycle the cursor is lost and re-seeded from the *static* + `INITIAL_CURSOR`, so the loop rewinds to a fixed ledger and re-walks the + whole `INITIAL_CURSOR..tip` span. Idempotent (RMT), but the redundant S3 + fetch + decode + write is paid on every cold start; if the seed is far + behind it can blow the Lambda timeout and livelock the doorbell. +- **Bootstrap.** Without a seed the loop errors on `cursor.read()` and DLQs + every doorbell. Interim mitigation already shipped in PR #34: `main.rs` + seeds from `INITIAL_CURSOR`, wired in CDK from the prices-owned SSM param + `/prices/{env}/ledger-processor/initial-cursor` (`compute-stack.ts`). This + task supersedes that stop-gap with the durable CH cursor and should retire + the static seed (or keep it only as a genuine first-run bootstrap). + +## Implementation + +- Lean: own single-row `prices.processed_ledgers` (ReplacingMergeTree, + updated last per run — D.1 option 1). +- Implement `Cursor` over `prices-clickhouse` (mTLS client); wire into + `main.rs` in place of `StubFileCursor`. +- Decide seed-on-empty behaviour (env `INITIAL_CURSOR` vs first-S3-probe). + +## Acceptance Criteria + +- [ ] `prices.processed_ledgers` (or chosen design) added to the schema. +- [ ] CH `Cursor` impl; reconcile resumes from CH across cold starts. +- [ ] Idempotent: re-run from the persisted cursor is a no-op past the tip. diff --git a/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md b/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md new file mode 100644 index 0000000..4df882f --- /dev/null +++ b/lore/1-tasks/backlog/0065_FEATURE_periodic-ohlcv-reaggregation.md @@ -0,0 +1,78 @@ +--- +id: "0065" +title: "Periodic OHLCV re-aggregation for cross-chunk intra-minute candles" +type: FEATURE +status: backlog +related_adr: ["0004", "0007"] +related_tasks: ["0038", "0039"] +tags: [layer-indexing, priority-medium, effort-medium, clickhouse, ohlcv] +links: + - "../archive/0048_RESEARCH_soroban-events-pricing-decoder-spec/notes/G-soroban-events-pricing-decoder.md" +history: + - date: 2026-06-24 + status: backlog + who: oski + note: "Spawned from 0038 future work (cross-invocation intra-minute merge gap)." + - date: 2026-06-24 + status: backlog + who: claude + note: "Added PR #34 review context: finding #1 (live-path frequency correction) and finding #5 (version-namespace overflow caveat for the merge fix)." +--- + +# Periodic OHLCV re-aggregation for cross-chunk intra-minute candles + +## Summary + +Close the intra-minute aggregation gap shared by **both** writers: the live +Lambda (per contiguous run) and the backfill (per partition) accumulate +candles in memory and flush per chunk. When a single minute spans two +chunks/invocations, two rows land with the same PK but different `version`, +and `ReplacingMergeTree(version)` keeps only the latest — dropping the other +chunk's trades for that minute. + +## Context + +`price_ohlcv_1m` is `ReplacingMergeTree(version)` keyed by +`(asset_id, quote_asset_id, source, timestamp)`. RMT **replaces**, it does +not sum — so per-chunk partial candles for a boundary minute don't merge. +Negligible-but-real (one minute per chunk boundary). Same root cause for live +and backfill since both now use `prices-ingest-core`'s `CandleAccumulator`. + +## Review findings (PR #34 review, 2026-06-24) + +**Finding #1 — the live-path frequency is NOT negligible.** "One minute per +chunk boundary" holds for the backfill (large partitions), but the live Lambda +calls `flush_all()` every invocation (`reconcile.rs`), and with +`MAX_ITERATIONS=16` a run spans ~80-96s of ledgers — so a minute boundary +falls inside essentially *every* invocation. That is roughly one corrupted +(under-counted volume / wrong `open`) boundary minute per run in the live path, +not a rare edge. The in-code comment equating it with the backfill's partition +boundaries understates it; the fix is materially more impactful for 0038 than +the "negligible" framing suggests. + +**Finding #5 — the `version` scheme can invert across ledgers, which +constrains the fix.** `version = ledger_seq*1000 + operation_index` +(`bucket.rs`) assumes `operation_index < 1000`, but the AMM path sets it to +`first_event_index & 0xFFFF` (0..65535; `first_event_index` is `u32` in +`extractors-core`). A tx emitting ≥1000 events overflows the per-ledger +namespace, so a *later* ledger's candle can carry a *lower* `version` than an +earlier one. Any re-aggregation that relies on "higher version wins" must not +assume `version` is monotonic in ledger order — either widen the multiplier / +pack `(ledger, event_index)` without truncation, or make the merge +order-independent (Summing/Aggregating engine). Note: changing the version +formula also touches already-written backfill rows, so it needs a migration +decision. + +## Implementation (options to evaluate) + +- A periodic worker (task 0039 family) that re-reads raw trades/`_1m FINAL` + and rewrites boundary minutes with a higher `version`; OR +- An `AggregatingMergeTree` / SummingMergeTree variant for the write path so + partial candles combine on merge; OR +- Emit candles keyed to include a chunk discriminator and re-roll at read. + +## Acceptance Criteria + +- [ ] A minute split across two runs/chunks aggregates to one correct candle. +- [ ] Fix applies to both live (0038) and backfill writers (shared core). +- [ ] Regression test with a deliberately split-minute fixture. diff --git a/lore/1-tasks/backlog/0066_FEATURE_ledger-processor-rustfunction-and-lag-metric.md b/lore/1-tasks/backlog/0066_FEATURE_ledger-processor-rustfunction-and-lag-metric.md new file mode 100644 index 0000000..b8cc792 --- /dev/null +++ b/lore/1-tasks/backlog/0066_FEATURE_ledger-processor-rustfunction-and-lag-metric.md @@ -0,0 +1,46 @@ +--- +id: "0066" +title: "Ledger Processor: RustFunction CDK + lag metric + rustls dedup" +type: FEATURE +status: backlog +related_adr: ["0006", "0007"] +related_tasks: ["0038"] +tags: [layer-indexing, priority-low, effort-small, lambda, cdk, observability] +links: + - "../active/0038_FEATURE_prices-ledger-processor-lambda/notes/G-local-prototype-spec.md" +history: + - date: 2026-06-24 + status: backlog + who: oski + note: "Spawned from 0038 future work (packaging + observability + dep hygiene)." +--- + +# Ledger Processor: RustFunction CDK + lag metric + rustls dedup + +## Summary + +Three small production-hardening items for the Prices Ledger Processor that +are out of scope for the deploy-deferred build. + +## Context + +Spawned from task 0038. The Lambda is code-complete behind the `lambda` +feature; these are deploy/observability/dep-hygiene polish. + +## Implementation + +- **`cargo-lambda-cdk` `RustFunction`**: drop the `Code.fromAsset` seam in + `infra/.../compute-stack.ts` for synth-time builds (BE's exact shape) once + the dep is added. +- **`prices.ledger_processor.lag_seconds`**: emit `now() - ledger.closed_at` + per invocation to CloudWatch (namespace `prices/lambda`) + a >60s-sustained + alarm (spec §9.6 / §C.8). +- **rustls dedup**: `aws-sdk-s3` pulls rustls 0.21 (older smithy) alongside + our 0.23.40 (mTLS). Unify to one version to shrink the `provided.al2023` + ZIP — investigate aws-smithy-http-client TLS feature selection. + +## Acceptance Criteria + +- [ ] `RustFunction` synth-time build wired (no pre-built asset seam). +- [ ] `lag_seconds` metric + alarm present in the CDK synth. +- [ ] `cargo tree --features lambda` shows a single rustls version. diff --git a/packages/prices-ingest-core/Cargo.toml b/packages/prices-ingest-core/Cargo.toml new file mode 100644 index 0000000..3f61b63 --- /dev/null +++ b/packages/prices-ingest-core/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "prices-ingest-core" +version = "0.1.0" +edition = "2024" +description = "Shared ledger→OHLCV ingestion core (decode, extract, canonicalise, bucket, write) used by both the SDEX backfill CLI and the live Prices Ledger Processor Lambda" + +[lib] +name = "prices_ingest_core" +path = "src/lib.rs" + +[dependencies] +stellar-xdr = { workspace = true } +stellar-strkey = { workspace = true } +sha2 = { workspace = true } +xdr-parser = { workspace = true } +clickhouse = { workspace = true } +rust_decimal = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +tracing = { workspace = true } +thiserror = { workspace = true } + +prices-clickhouse = { path = "../prices-clickhouse" } +extractors-core = { path = "../extractors-core" } +phoenix-extractor = { path = "../phoenix-extractor" } +soroswap-extractor = { path = "../soroswap-extractor" } +aquarius-extractor = { path = "../aquarius-extractor" } +ledger-processor = { path = "../ledger-processor" } diff --git a/packages/sdex-backfill/src/bucket.rs b/packages/prices-ingest-core/src/bucket.rs similarity index 100% rename from packages/sdex-backfill/src/bucket.rs rename to packages/prices-ingest-core/src/bucket.rs diff --git a/packages/sdex-backfill/src/canonical.rs b/packages/prices-ingest-core/src/canonical.rs similarity index 100% rename from packages/sdex-backfill/src/canonical.rs rename to packages/prices-ingest-core/src/canonical.rs diff --git a/packages/prices-ingest-core/src/decode.rs b/packages/prices-ingest-core/src/decode.rs new file mode 100644 index 0000000..2f00648 --- /dev/null +++ b/packages/prices-ingest-core/src/decode.rs @@ -0,0 +1,29 @@ +//! `*.xdr.zst` object bytes → `Vec`. +//! +//! Wraps BE's `xdr-parser` (`decompress_zstd` + `deserialize_batch`) — the same +//! two calls the SDEX backfill makes per ledger file (`sdex-backfill::ingest`). +//! A Galexie object is a zstd-compressed `LedgerCloseMetaBatch`; with +//! `ledgers_per_file = 1` the returned vec is usually a single ledger, but the +//! batch shape is honoured so a multi-ledger file decodes correctly too. + +use stellar_xdr::curr::LedgerCloseMeta; + +use crate::error::IngestError; + +/// Decompress + deserialize one Galexie `*.xdr.zst` object into its ledgers. +pub fn decode_object(compressed: &[u8]) -> Result, IngestError> { + let xdr_bytes = xdr_parser::decompress_zstd(compressed)?; + let batch = xdr_parser::deserialize_batch(&xdr_bytes)?; + Ok(batch.ledger_close_metas.to_vec()) +} + +/// The ledger sequence number of a `LedgerCloseMeta` (all protocol versions). +/// The live Lambda uses this to advance its doorbell cursor to the highest +/// ledger actually processed in a run. +pub fn ledger_sequence(lcm: &LedgerCloseMeta) -> u32 { + match lcm { + LedgerCloseMeta::V0(v) => v.ledger_header.header.ledger_seq, + LedgerCloseMeta::V1(v) => v.ledger_header.header.ledger_seq, + LedgerCloseMeta::V2(v) => v.ledger_header.header.ledger_seq, + } +} diff --git a/packages/prices-ingest-core/src/error.rs b/packages/prices-ingest-core/src/error.rs new file mode 100644 index 0000000..8a177c1 --- /dev/null +++ b/packages/prices-ingest-core/src/error.rs @@ -0,0 +1,22 @@ +//! Shared error type for the ingestion core. + +/// Errors raised while decoding ledgers or reading/writing ClickHouse. The +/// SDEX backfill wraps this in its own `BackfillError` (which adds the +/// S3-partition-sync variants); the Lambda surfaces it through its reconcile +/// error. Keeping the shared variants here means both binaries classify +/// transient ClickHouse failures the same way. +#[derive(Debug, thiserror::Error)] +pub enum IngestError { + #[error("io: {0}")] + Io(#[from] std::io::Error), + + #[error("xdr parse: {0}")] + Parse(#[from] xdr_parser::ParseError), + + // Self-redacting: a ClickHouse `BadResponse` body can echo offending row + // values, so the `Display` emits only the leading `Code: NNN` / status + // token, never the raw body. Applying it on the shared error means every + // consumer of the writer (live Lambda + SDEX backfill) is leak-safe. + #[error("clickhouse: {}", crate::safe_log::redact_clickhouse(.0))] + Clickhouse(#[from] clickhouse::error::Error), +} diff --git a/packages/sdex-backfill/src/filter.rs b/packages/prices-ingest-core/src/filter.rs similarity index 100% rename from packages/sdex-backfill/src/filter.rs rename to packages/prices-ingest-core/src/filter.rs diff --git a/packages/prices-ingest-core/src/lib.rs b/packages/prices-ingest-core/src/lib.rs new file mode 100644 index 0000000..ec80b75 --- /dev/null +++ b/packages/prices-ingest-core/src/lib.rs @@ -0,0 +1,45 @@ +//! prices ingestion core — the shared ledger→OHLCV pipeline. +//! +//! This crate owns the *tested* decode → extract → canonicalise → bucket → +//! write pipeline that was first written for the SDEX historical backfill +//! (`sdex-backfill`) and is now reused verbatim by the live **Prices Ledger +//! Processor Lambda** (`prices-ledger-processor`, task 0038). Both writers go +//! through the same modules so live and backfill produce **identical** +//! `prices.price_ohlcv_1m` rows (same surrogate `asset_id`s via the +//! [`AssetRegistry`], same SAC→classic collapse, same preferred-quote +//! orientation, same `Decimal(38,14)` scaling, same `version`). Splitting this +//! into its own crate is what prevents the two paths from drifting. +//! +//! Layers, in pipeline order: +//! - [`filter`] — classic SDEX trades from `LedgerCloseMeta` operation results. +//! - [`soroban`] — Soroban AMM trades + oracle samples from contract events. +//! - [`canonical`] — asset identity, the [`AssetRegistry`] surrogate-id store, +//! and `(base, quote)` canonicalisation. +//! - [`price`] / [`tick`] — per-trade price + the [`TradeTick`] the bucketer eats. +//! - [`bucket`] — 1-minute OHLCV accumulation ([`CandleAccumulator`]). +//! - [`writer`] — the transport-agnostic ClickHouse [`OhlcvWriter`] (works with a +//! plaintext local client *or* the task-0052 mTLS client — both are a +//! `clickhouse::Client`). +//! - [`decode`] — `*.xdr.zst` object bytes → `Vec`. + +pub mod bucket; +pub mod canonical; +pub mod decode; +pub mod error; +pub mod filter; +pub mod price; +pub mod safe_log; +pub mod soroban; +pub mod tick; +pub mod writer; + +pub use bucket::{CandleAccumulator, OhlcvCandle}; +pub use canonical::{AssetIdentity, AssetRegistry, CanonicalPair, canonicalise}; +pub use decode::{decode_object, ledger_sequence}; +pub use error::IngestError; +pub use filter::{RawTrade, extract_trades}; +pub use price::{compute_price, stroops_to_decimal}; +pub use safe_log::safe_response_token; +pub use soroban::{LedgerSoroban, Registries, process_ledger}; +pub use tick::{TradeTick, raw_trade_to_tick}; +pub use writer::{OhlcvWriter, OracleSample}; diff --git a/packages/sdex-backfill/src/price.rs b/packages/prices-ingest-core/src/price.rs similarity index 100% rename from packages/sdex-backfill/src/price.rs rename to packages/prices-ingest-core/src/price.rs diff --git a/packages/prices-ingest-core/src/safe_log.rs b/packages/prices-ingest-core/src/safe_log.rs new file mode 100644 index 0000000..9c87f5a --- /dev/null +++ b/packages/prices-ingest-core/src/safe_log.rs @@ -0,0 +1,75 @@ +//! Error redaction for log emission. +//! +//! Mirrors BE's `safe_error_message` / `safe_bad_response_token` +//! (`crates/indexer/src/handler/mod.rs:436-485`). +//! +//! Logging policy: NEVER stringify an upstream error whose `Display` +//! could embed row data (ClickHouse `BadResponse` is the canonical +//! example — its body echoes offending row values into the message). +//! Emit fixed labels plus, for HTTP/CH responses, only the leading +//! `Code: NNN` or HTTP status token. +//! +//! This lives in the shared core (not the Lambda crate) so the redaction +//! is applied at the *source*: [`IngestError`](crate::IngestError)'s own +//! `Display` routes the ClickHouse variant through here, so every consumer +//! of the shared [`OhlcvWriter`](crate::OhlcvWriter) — the live Lambda +//! *and* the SDEX backfill — is leak-safe without each re-implementing it. + +/// Extract ONLY the leading code/status token from a wire-error body. +/// Returns `"Code: NNN"` for a CH exception body, `"HTTP NNN"` for a +/// plain HTTP status line, or `"detail suppressed"` for anything else +/// where we cannot prove the remainder is data-free. +pub fn safe_response_token(msg: &str) -> String { + if let Some(rest) = msg.strip_prefix("Code: ") { + let digits: String = rest.chars().take_while(char::is_ascii_digit).collect(); + if !digits.is_empty() { + return format!("Code: {digits}"); + } + } + let leading: String = msg.chars().take_while(char::is_ascii_digit).collect(); + if leading.len() == 3 { + return format!("HTTP {leading}"); + } + "detail suppressed".to_string() +} + +/// Redact a ClickHouse client error into a leak-safe label. Used by +/// [`IngestError`](crate::IngestError)'s `Display` so the offending-row +/// body a `BadResponse` echoes never reaches a log line. +pub fn redact_clickhouse(err: &clickhouse::error::Error) -> String { + safe_response_token(&err.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ch_exception_extracts_code() { + assert_eq!( + safe_response_token("Code: 241. DB::Exception: foo bar=12345"), + "Code: 241" + ); + } + + #[test] + fn http_status_extracts_three_digits() { + assert_eq!( + safe_response_token("503 Service Unavailable: backend timeout"), + "HTTP 503" + ); + } + + #[test] + fn proxy_html_suppresses_everything() { + assert_eq!( + safe_response_token("Bad Gateway"), + "detail suppressed" + ); + } + + #[test] + fn malformed_code_prefix_suppresses() { + assert_eq!(safe_response_token("Code: abc."), "detail suppressed"); + } +} diff --git a/packages/sdex-backfill/src/soroban.rs b/packages/prices-ingest-core/src/soroban.rs similarity index 99% rename from packages/sdex-backfill/src/soroban.rs rename to packages/prices-ingest-core/src/soroban.rs index 962ebd9..e6f2601 100644 --- a/packages/sdex-backfill/src/soroban.rs +++ b/packages/prices-ingest-core/src/soroban.rs @@ -24,8 +24,8 @@ use xdr_parser::extract_events; use xdr_parser::types::EventSource; use crate::canonical::{AssetIdentity, AssetRegistry, USDC_ISSUER, USDT_ISSUER, canonicalise}; -use crate::sink::OracleSample; use crate::tick::TradeTick; +use crate::writer::OracleSample; /// AMM token amounts are treated as 7-decimal (Stellar SAC convention). Token /// decimals vary; this is a documented sizing-measurement approximation. diff --git a/packages/sdex-backfill/src/tick.rs b/packages/prices-ingest-core/src/tick.rs similarity index 100% rename from packages/sdex-backfill/src/tick.rs rename to packages/prices-ingest-core/src/tick.rs diff --git a/packages/prices-ingest-core/src/writer.rs b/packages/prices-ingest-core/src/writer.rs new file mode 100644 index 0000000..3f733be --- /dev/null +++ b/packages/prices-ingest-core/src/writer.rs @@ -0,0 +1,256 @@ +//! Transport-agnostic ClickHouse writer for `prices.*`. +//! +//! Holds a `clickhouse::Client` and knows how to write the shared row shapes +//! (`price_ohlcv_1m`, `assets`, `oracle_prices`) and load the asset registry. +//! It does **not** care how the client was built: a plaintext local-dev client +//! ([`OhlcvWriter::plaintext`]) and the task-0052 mTLS client (passed to +//! [`OhlcvWriter::new`]) are both just a `clickhouse::Client`, so the same +//! writer serves the local backfill and the live Lambda's remote mTLS sink. +//! +//! Backfill-only bookkeeping (`backfill_sdex_ledgers` resume set) is **not** +//! here — it lives in `sdex-backfill`'s thin wrapper, since the live Lambda +//! uses its own doorbell cursor instead. + +use clickhouse::Client; +use rust_decimal::Decimal; +use serde::{Deserialize, Serialize}; +use tracing::info; + +use crate::bucket::OhlcvCandle; +use crate::canonical::{AssetIdentity, AssetRegistry}; +use crate::error::IngestError; + +/// Convert a `Decimal` to the `i128` mantissa ClickHouse expects for a +/// `Decimal(38, 14)` column. Saturates rather than panicking: AMM +/// amounts/prices are i128-derived and can exceed the 38-digit budget, and an +/// out-of-range value should clamp, not abort the whole run. +pub fn decimal_to_i128(d: Decimal) -> i128 { + let d = d.round_dp(14); + let factor = 10i128.pow(14 - d.scale()); + d.mantissa().saturating_mul(factor) +} + +/// A ClickHouse writer over `prices.*`. Cheap to clone (the client is). +pub struct OhlcvWriter { + client: Client, +} + +impl OhlcvWriter { + /// Wrap an already-built client (e.g. the mTLS client from + /// `prices_clickhouse::mtls::client_from_lambda_env`). + pub fn new(client: Client) -> Self { + Self { client } + } + + /// Build a plaintext client for local-dev / Docker ClickHouse. + pub fn plaintext(url: &str) -> Self { + Self { + client: Client::default().with_url(url), + } + } + + /// Borrow the underlying client (e.g. for backfill-only resume queries). + pub fn client(&self) -> &Client { + &self.client + } + + /// Cheap connectivity probe (`SELECT 1`). + pub async fn preflight(&self) -> Result<(), IngestError> { + self.client.query("SELECT 1").execute().await?; + Ok(()) + } + + /// Load the existing `prices.assets` rows as `(asset_id, identity)` so a + /// run reuses surrogate ids rather than reassigning them. + pub async fn load_assets(&self) -> Result, IngestError> { + let rows = self + .client + .query( + "SELECT asset_id, asset_code, issuer_address, contract_address FROM prices.assets", + ) + .fetch_all::() + .await?; + + let assets: Vec<(u32, AssetIdentity)> = rows + .into_iter() + .map(|r| { + let identity = if !r.contract_address.is_empty() { + AssetIdentity::Contract(r.contract_address) + } else if r.asset_code == "XLM" && r.issuer_address.is_empty() { + AssetIdentity::Native + } else { + AssetIdentity::Credit { + code: r.asset_code, + issuer: r.issuer_address, + } + }; + (r.asset_id, identity) + }) + .collect(); + + info!( + existing_assets = assets.len(), + "loaded asset registry from ClickHouse" + ); + Ok(assets) + } + + /// Write a batch of candles for one `source` into `prices.price_ohlcv_1m`. + pub async fn write_candles( + &self, + candles: &[OhlcvCandle], + source: &str, + ) -> Result<(), IngestError> { + if candles.is_empty() { + return Ok(()); + } + + let mut insert = self.client.insert("prices.price_ohlcv_1m")?; + + for candle in candles { + insert + .write(&OhlcvRow { + timestamp: candle.minute_start, + asset_id: candle.asset_id, + quote_asset_id: candle.quote_asset_id, + source: source.to_string(), + open: decimal_to_i128(candle.open), + high: decimal_to_i128(candle.high), + low: decimal_to_i128(candle.low), + close: decimal_to_i128(candle.close), + volume_base: decimal_to_i128(candle.volume_base), + volume_quote: decimal_to_i128(candle.volume_quote), + // DEFAULT 0 — the 0026 enrichment Lambda fills this + // (volume_quote_usd = oracle_price * volume_quote). + volume_quote_usd: 0, + // DEFAULT 0 — the enrichment pass fills this (task 0061, + // close_usd = oracle_price * close), same as volume_quote_usd. + close_usd: 0, + vwap: decimal_to_i128(candle.vwap), + trade_count: candle.trade_count, + version: candle.version, + }) + .await?; + } + insert.end().await?; + Ok(()) + } + + /// Write the asset registry into `prices.assets` (idempotent via + /// ReplacingMergeTree on the asset sort key). + pub async fn write_assets(&self, registry: &AssetRegistry) -> Result<(), IngestError> { + let mut insert = self.client.insert("prices.assets")?; + + for (identity, &id) in registry.assets() { + let (asset_code, asset_type, issuer_address, contract_address) = match identity { + AssetIdentity::Native => { + ("XLM".to_string(), "classic", String::new(), String::new()) + } + AssetIdentity::Credit { code, issuer } => { + (code.clone(), "classic", issuer.clone(), String::new()) + } + AssetIdentity::Contract(addr) => { + (String::new(), "soroban", String::new(), addr.clone()) + } + }; + // The SAC that wraps this classic asset (§12.4) — '' for a pure + // Soroban token. Lets a read-time consumer resolve a SAC-wrapped leg. + let sac_address = registry.sac_address_of(identity).unwrap_or_default(); + + insert + .write(&AssetRow { + asset_id: id, + asset_code, + asset_type: asset_type.to_string(), + issuer_address, + contract_address, + sac_address, + home_domain: String::new(), + is_active: 1, + }) + .await?; + } + insert.end().await?; + Ok(()) + } + + /// Write decoded oracle price samples into `prices.oracle_prices`. + pub async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), IngestError> { + if samples.is_empty() { + return Ok(()); + } + let mut insert = self.client.insert("prices.oracle_prices")?; + for s in samples { + insert + .write(&OracleRow { + timestamp: s.timestamp, + asset_id: s.asset_id, + oracle_name: s.oracle_name.clone(), + price_usd: s.price_usd, + raw_data: s.raw_data.clone(), + }) + .await?; + } + insert.end().await?; + Ok(()) + } +} + +#[derive(Debug, Serialize, clickhouse::Row)] +struct OhlcvRow { + timestamp: u32, + asset_id: u32, + quote_asset_id: u32, + source: String, + open: i128, + high: i128, + low: i128, + close: i128, + volume_base: i128, + volume_quote: i128, + volume_quote_usd: i128, + close_usd: i128, + vwap: i128, + trade_count: u32, + version: u64, +} + +#[derive(Debug, Serialize, clickhouse::Row)] +struct AssetRow { + asset_id: u32, + asset_code: String, + asset_type: String, + issuer_address: String, + contract_address: String, + sac_address: String, + home_domain: String, + is_active: u8, +} + +#[derive(Debug, Deserialize, clickhouse::Row)] +struct ExistingAssetRow { + asset_id: u32, + asset_code: String, + issuer_address: String, + contract_address: String, +} + +/// One decoded oracle price sample, ready for `prices.oracle_prices`. +#[derive(Debug, Clone)] +pub struct OracleSample { + pub timestamp: u32, + pub asset_id: u32, + pub oracle_name: String, + /// price_usd scaled to 14 decimals (matches Decimal(38,14)). + pub price_usd: i128, + pub raw_data: String, +} + +#[derive(Debug, Serialize, clickhouse::Row)] +struct OracleRow { + timestamp: u32, + asset_id: u32, + oracle_name: String, + price_usd: i128, + raw_data: String, +} diff --git a/packages/prices-ledger-processor/.gitignore b/packages/prices-ledger-processor/.gitignore new file mode 100644 index 0000000..0c1fa53 --- /dev/null +++ b/packages/prices-ledger-processor/.gitignore @@ -0,0 +1,2 @@ +fixtures/ +out/ diff --git a/packages/prices-ledger-processor/Cargo.toml b/packages/prices-ledger-processor/Cargo.toml new file mode 100644 index 0000000..8e61d31 --- /dev/null +++ b/packages/prices-ledger-processor/Cargo.toml @@ -0,0 +1,51 @@ +[package] +name = "prices-ledger-processor" +version = "0.1.0" +edition = "2024" +description = "Prices Ledger Processor — live S3/SNS-doorbell ingestion into prices.price_ohlcv_1m over mTLS (task 0038)" + +[lib] +name = "prices_ledger_processor" +path = "src/lib.rs" + +# The SQS-doorbell Lambda entrypoint. Behind `lambda` so a default `cargo build` +# stays lean (no lambda_runtime / aws-sdk-s3 / rustls mTLS stack); build it with +# `cargo build -p prices-ledger-processor --features lambda` (→ `cargo lambda` for +# the provided.al2023 ZIP, ADR 0006). +[[bin]] +name = "prices-ledger-processor" +path = "src/main.rs" +required-features = ["lambda"] + +# Local fixture runner — drives the same reconcile loop against local-disk +# fixtures and a local (plaintext) ClickHouse. Always builds. +[[bin]] +name = "prices-cli" +path = "src/bin/cli.rs" + +[features] +default = [] +# Remote ClickHouse over mTLS via the task-0052 prices-clickhouse::mtls client. +aws-mtls = ["prices-clickhouse/aws-mtls"] +# Full Lambda: SQS doorbell runtime + S3 object fetch + the mTLS sink. +lambda = ["aws-mtls", "dep:lambda_runtime", "dep:aws_lambda_events", "dep:aws-sdk-s3", "dep:aws-config"] + +[dependencies] +prices-ingest-core = { path = "../prices-ingest-core" } +prices-clickhouse = { path = "../prices-clickhouse" } + +tokio = { workspace = true } +clap = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } + +lambda_runtime = { workspace = true, optional = true } +aws_lambda_events = { workspace = true, optional = true } +aws-sdk-s3 = { workspace = true, optional = true } +aws-config = { workspace = true, optional = true } + +[dev-dependencies] +tempfile = { workspace = true } diff --git a/packages/prices-ledger-processor/README.md b/packages/prices-ledger-processor/README.md new file mode 100644 index 0000000..051f879 --- /dev/null +++ b/packages/prices-ledger-processor/README.md @@ -0,0 +1,91 @@ +# prices-ledger-processor + +Live ingestion of Stellar ledgers into `prices.price_ohlcv_1m` (task 0038). + +An SQS **doorbell** triggers a **doorbell-cursor reconcile loop** (mirroring BE's +production indexer): read cursor → derive the next Galexie S3 key → fetch → +decode → extract + bucket → write OHLCV candles to the shared Hetzner ClickHouse +over mTLS → advance the cursor **last**. + +## What it reuses (no drift) + +The decode → extract → canonicalise → bucket → write pipeline is **not** +reimplemented here — it is [`prices-ingest-core`](../prices-ingest-core), the +same tested code the SDEX backfill (`sdex-backfill`) runs. Live and backfill +therefore emit byte-identical `prices.price_ohlcv_1m` rows: same surrogate +`asset_id`s (via the shared `AssetRegistry`, with SAC→classic collapse), same +preferred-quote orientation, same `Decimal(38,14)` scaling, same `version`. + +This crate owns only the **transport seams**: + +| Seam | Local (default) | Production (`lambda` feature) | +|------|-----------------|-------------------------------| +| `object_fetcher` | `LocalDiskFetcher` (fixtures) | `S3Fetcher` (`aws-sdk-s3` GetObject) | +| `sink` | `ClickHouseSink::plaintext` / `CountingSink` | `ClickHouseSink::from_lambda_env` (mTLS via [`prices-clickhouse::mtls`](../prices-clickhouse), task 0052) | +| `cursor` | `StubFileCursor` | `StubFileCursor` (CH-backed cursor is a follow-up — G-note Part D.1) | + +## Cargo features + +- `default` — lean: the local fixture runner only. No rustls / lambda / aws SDK. +- `aws-mtls` — the remote ClickHouse-over-mTLS sink. +- `lambda` — full Lambda: `lambda_runtime` SQS runtime + `aws-sdk-s3` fetch + + `aws-mtls`. Build: `cargo build -p prices-ledger-processor --features lambda` + (then `cargo lambda` for the `provided.al2023` ZIP, ADR 0006). + +## Run it locally + +```bash +# parse + bucket only, no DB (uses bundled fixtures) +cargo run -p prices-ledger-processor --bin prices-cli -- --cursor 62460539 --dry-run + +# write into a local Docker ClickHouse (apply the prices schema first via +# `prices-clickhouse-init`) +CLICKHOUSE_URL=http://localhost:8123 \ + cargo run -p prices-ledger-processor --bin prices-cli -- --cursor 62460539 +``` + +Fixtures live under `fixtures/ledgers/` and are **gitignored** +(large binary Galexie objects copied locally); the integration test self-skips +when they are absent. + +## Event contract (production) + +Doorbells reach the Lambda via **SNS fan-out** (2026-06-10 cross-team decision): + +``` +ledger PutObject → S3 ObjectCreated + → SNS topic (BE-owned, on stellar-ledger-data) + ├─ SQS ledger-ingest-{env} (BE) + └─ SQS prices-ingest-{env} (prices-api) + DLQ → this Lambda +``` + +The SQS message **body is ignored** — order comes from the cursor + S3 contents, +not delivery order (so no FIFO needed). `reservedConcurrency = 1` (CDK) keeps +runs serial, which is the ordering guarantee. Adding the prices SNS subscription +on BE's bucket is a cross-team change (tracked by task 0050); the CDK wiring is +already in `infra/` (prepare-only). + +## Environment variables + +Injected by CDK at deploy from `/platform/{env}/*` SSM (deploy-time handshake — +the Lambda reads only env vars, never SSM at runtime): + +| Var | Used by | Meaning | +|-----|---------|---------| +| `BUCKET_NAME` | `S3Fetcher` | BE's `stellar-ledger-data` bucket | +| `CH_DOMAIN` | `prices-clickhouse::mtls` | Caddy host fronting the Hetzner cluster | +| `MTLS_SECRET_NAME` | `prices-clickhouse::mtls` | Secrets Manager bundle (cert+key+ca) name | +| `CURSOR_FILE` / `INITIAL_CURSOR` | `StubFileCursor` | cursor checkpoint path / cold-start seed | +| `MAX_ITERATIONS` | reconcile loop | max contiguous ledgers per invocation (default 16) | +| `CLICKHOUSE_URL` | local CLI only | plaintext local ClickHouse endpoint | + +## Known follow-ups + +- **Cross-invocation intra-minute aggregation.** Candles aggregate across one + contiguous run; a minute split across two separate runs lands as two + `version`-keyed rows (ReplacingMergeTree keeps the latest). Same characteristic + the backfill has across partition boundaries; a periodic re-aggregation / + AggregatingMergeTree is the fix. +- **CH-backed cursor** (G-note Part D.1) — replace the file cursor. +- **rustls dedup** — `aws-sdk-s3` pulls an older rustls 0.21 alongside our + 0.23.40; unify to shrink the Lambda ZIP. diff --git a/packages/prices-ledger-processor/src/bin/cli.rs b/packages/prices-ledger-processor/src/bin/cli.rs new file mode 100644 index 0000000..0ba4fb9 --- /dev/null +++ b/packages/prices-ledger-processor/src/bin/cli.rs @@ -0,0 +1,101 @@ +//! Local fixture runner for the Prices Ledger Processor (task 0038). +//! +//! Drives the *same* reconcile loop the Lambda runs, but against local-disk +//! fixtures and a local (plaintext) ClickHouse — no AWS, no mTLS. Use it to +//! exercise the full decode → extract → bucket → write pipeline end-to-end: +//! +//! ```bash +//! # write into local Docker ClickHouse (apply prices schema first) +//! CLICKHOUSE_URL=http://localhost:8123 cargo run -p prices-ledger-processor \ +//! --bin prices-cli -- --cursor 62460539 --max-iterations 16 +//! +//! # parse + bucket only, no DB writes +//! cargo run -p prices-ledger-processor --bin prices-cli -- \ +//! --cursor 62460539 --dry-run +//! ``` + +use std::path::PathBuf; + +use clap::Parser; +use prices_ingest_core::{AssetRegistry, Registries}; +use prices_ledger_processor::{ + cursor::{Cursor, StubFileCursor}, + object_fetcher::LocalDiskFetcher, + reconcile::{Reconciler, RunStats}, + sink::{ClickHouseSink, CountingSink}, +}; +use tracing::info; + +#[derive(Parser, Debug)] +#[command( + name = "prices-cli", + about = "Local fixture runner for the Prices Ledger Processor (task 0038)" +)] +struct Args { + /// Initial cursor (the run starts at this ledger + 1). Overwrites the + /// cursor file before the run. + #[arg(long)] + cursor: u64, + + /// Maximum reconcile iterations (contiguous ledgers) per run. + #[arg(long, default_value_t = 16)] + max_iterations: usize, + + /// Local fixture root — derived Galexie keys are joined onto this. + #[arg(long, default_value = "fixtures/ledgers")] + fixtures_dir: PathBuf, + + /// Where the cursor file lives. + #[arg(long, default_value = "out/cursor.txt")] + cursor_file: PathBuf, + + /// Local ClickHouse endpoint (plaintext). Ignored with --dry-run. + #[arg(long, env = "CLICKHOUSE_URL", default_value = "http://localhost:8123")] + clickhouse_url: String, + + /// Parse + bucket only; do not write to ClickHouse (counts rows). + #[arg(long, default_value_t = false)] + dry_run: bool, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .json() + .init(); + + let args = Args::parse(); + + let cursor = StubFileCursor::new(&args.cursor_file); + cursor.write(args.cursor).await?; + let fetcher = LocalDiskFetcher::new(&args.fixtures_dir); + + let stats: RunStats = if args.dry_run { + let reconciler = Reconciler::new( + fetcher, + cursor, + CountingSink::default(), + AssetRegistry::from_existing(Vec::new()), + Registries::new(), + ); + reconciler.run(args.max_iterations).await? + } else { + let sink = ClickHouseSink::plaintext(&args.clickhouse_url); + sink.preflight().await?; + let registry = sink.load_registry().await?; + let reconciler = Reconciler::new(fetcher, cursor, sink, registry, Registries::new()); + reconciler.run(args.max_iterations).await? + }; + + info!( + start = stats.start_cursor, + end = stats.end_cursor, + persisted = stats.ledgers_persisted, + rows = stats.rows_emitted, + dry_run = args.dry_run, + "reconcile complete" + ); + + Ok(()) +} diff --git a/packages/prices-ledger-processor/src/cursor/mod.rs b/packages/prices-ledger-processor/src/cursor/mod.rs new file mode 100644 index 0000000..117185f --- /dev/null +++ b/packages/prices-ledger-processor/src/cursor/mod.rs @@ -0,0 +1,24 @@ +//! Cursor trait — the production-swap seam for ledger-sequence state. +//! In prod this reads from / writes to a ClickHouse cursor table +//! (see G-note Part D.1 for the design question). + +use std::future::Future; + +pub mod stub_file; + +pub use stub_file::StubFileCursor; + +#[derive(Debug, thiserror::Error)] +pub enum CursorError { + #[error("cursor read failed: {0}")] + Read(String), + #[error("cursor write failed: {0}")] + Write(String), + #[error("cursor value malformed: {0}")] + Parse(String), +} + +pub trait Cursor { + fn read(&self) -> impl Future> + Send; + fn write(&self, value: u64) -> impl Future> + Send; +} diff --git a/packages/prices-ledger-processor/src/cursor/stub_file.rs b/packages/prices-ledger-processor/src/cursor/stub_file.rs new file mode 100644 index 0000000..125851d --- /dev/null +++ b/packages/prices-ledger-processor/src/cursor/stub_file.rs @@ -0,0 +1,58 @@ +use std::path::{Path, PathBuf}; + +use super::{Cursor, CursorError}; + +pub struct StubFileCursor { + path: PathBuf, +} + +impl StubFileCursor { + pub fn new(path: impl AsRef) -> Self { + Self { + path: path.as_ref().to_path_buf(), + } + } +} + +impl Cursor for StubFileCursor { + async fn read(&self) -> Result { + let raw = tokio::fs::read_to_string(&self.path) + .await + .map_err(|e| CursorError::Read(e.to_string()))?; + raw.trim() + .parse::() + .map_err(|e| CursorError::Parse(e.to_string())) + } + + async fn write(&self, value: u64) -> Result<(), CursorError> { + if let Some(parent) = self.path.parent() { + tokio::fs::create_dir_all(parent) + .await + .map_err(|e| CursorError::Write(e.to_string()))?; + } + tokio::fs::write(&self.path, format!("{value}\n")) + .await + .map_err(|e| CursorError::Write(e.to_string())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[tokio::test] + async fn write_then_read_roundtrips() { + let dir = tempdir().unwrap(); + let c = StubFileCursor::new(dir.path().join("cursor.txt")); + c.write(62_528_059).await.unwrap(); + assert_eq!(c.read().await.unwrap(), 62_528_059); + } + + #[tokio::test] + async fn missing_file_errors() { + let dir = tempdir().unwrap(); + let c = StubFileCursor::new(dir.path().join("nope.txt")); + assert!(matches!(c.read().await, Err(CursorError::Read(_)))); + } +} diff --git a/packages/prices-ledger-processor/src/galexie_key.rs b/packages/prices-ledger-processor/src/galexie_key.rs new file mode 100644 index 0000000..29fdcf3 --- /dev/null +++ b/packages/prices-ledger-processor/src/galexie_key.rs @@ -0,0 +1,49 @@ +//! S3 key derivation for Galexie ledger objects. +//! +//! Mirrors BE's indexer (`soroban-block-explorer/crates/indexer/src/handler/mod.rs:263`). +//! Coupled to Galexie's datastore schema (`ledgers_per_file = 1`, +//! `files_per_partition = 64000`). A wrong key reads as a gap and stalls the tail. + +const FILES_PER_PARTITION: i64 = 64_000; + +pub fn ledger_s3_key(ledger: i64) -> String { + let part_start = (ledger / FILES_PER_PARTITION) * FILES_PER_PARTITION; + let part_end = part_start + FILES_PER_PARTITION - 1; + let part_prefix = 0xFFFF_FFFFu32 - part_start as u32; + let file_prefix = 0xFFFF_FFFFu32 - ledger as u32; + format!("{part_prefix:08X}--{part_start}-{part_end}/{file_prefix:08X}--{ledger}.xdr.zst") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn matches_verified_live_key() { + // From BE: L = 62528059 → FC45E5FF--62528000-62591999/FC45E5C4--62528059.xdr.zst + assert_eq!( + ledger_s3_key(62_528_059), + "FC45E5FF--62528000-62591999/FC45E5C4--62528059.xdr.zst" + ); + } + + #[test] + fn ledgers_in_same_partition_share_prefix() { + let key_a = ledger_s3_key(64_000); + let key_b = ledger_s3_key(127_999); + let prefix_a = key_a.split('/').next().unwrap(); + let prefix_b = key_b.split('/').next().unwrap(); + assert_eq!(prefix_a, prefix_b); + assert!(prefix_a.ends_with("--64000-127999")); + } + + #[test] + fn partition_boundary_changes_prefix() { + let last = ledger_s3_key(127_999); + let first_next = ledger_s3_key(128_000); + let prefix_last = last.split('/').next().unwrap(); + let prefix_next = first_next.split('/').next().unwrap(); + assert_ne!(prefix_last, prefix_next); + assert!(prefix_next.ends_with("--128000-191999")); + } +} diff --git a/packages/prices-ledger-processor/src/lib.rs b/packages/prices-ledger-processor/src/lib.rs new file mode 100644 index 0000000..7bb7f2f --- /dev/null +++ b/packages/prices-ledger-processor/src/lib.rs @@ -0,0 +1,25 @@ +//! Prices Ledger Processor — live ingestion of Stellar ledgers into +//! `prices.price_ohlcv_1m` (task 0038). +//! +//! Shape mirrors BE's production indexer: an SQS **doorbell** triggers a +//! **doorbell-cursor reconcile loop** ([`reconcile`]) that walks contiguous +//! ledgers from S3, decodes + extracts + buckets them, writes OHLCV candles to +//! the shared Hetzner ClickHouse over mTLS, and advances its cursor last. +//! +//! The decode → extract → canonicalise → bucket → write pipeline is **not** +//! reimplemented here: it is `prices_ingest_core`, the same tested code the SDEX +//! backfill uses, so live and backfill rows are identical (same surrogate +//! `asset_id`s, SAC collapse, orientation, `Decimal`/`version`). This crate owns +//! only the *transport* seams: +//! - [`object_fetcher`] — local-disk (fixtures/tests) vs S3 (`lambda` feature). +//! - [`cursor`] — the ledger-sequence checkpoint. +//! - [`sink`] — the ClickHouse writer (plaintext local vs `aws-mtls` remote). +//! - [`galexie_key`] / [`retry`] — key derivation, backoff. (Log redaction now +//! lives at the error source in `prices_ingest_core::safe_log`.) + +pub mod cursor; +pub mod galexie_key; +pub mod object_fetcher; +pub mod reconcile; +pub mod retry; +pub mod sink; diff --git a/packages/prices-ledger-processor/src/main.rs b/packages/prices-ledger-processor/src/main.rs new file mode 100644 index 0000000..9d390a8 --- /dev/null +++ b/packages/prices-ledger-processor/src/main.rs @@ -0,0 +1,139 @@ +//! Lambda entrypoint — SQS doorbell handler (built only with `--features lambda`). +//! +//! Cold start mirrors BE's indexer: eager config + connectivity validation, then +//! one shared [`Reconciler`] reused across invocations. The SQS message body is +//! **ignored** — production doorbells arrive via SNS fan-out +//! (`S3 ObjectCreated → SNS (BE-owned) → prices-ingest-{env} SQS + DLQ → here`, +//! 2026-06-10 cross-team decision); raw or SNS-wrapped, the handler just runs +//! the doorbell-cursor reconcile loop. `reservedConcurrency = 1` (set in CDK) +//! keeps runs serial, which is the ordering guarantee. +//! +//! Transport here is production: S3 object fetch + ClickHouse over mTLS (task +//! 0052). The cursor is still a file checkpoint (`CURSOR_FILE`, seeded from +//! `INITIAL_CURSOR`) pending the CH-backed cursor decision (G-note Part D.1). + +use std::path::PathBuf; +use std::sync::Arc; + +use aws_lambda_events::sqs::{BatchItemFailure, SqsBatchResponse, SqsEvent}; +use lambda_runtime::{Error, LambdaEvent, run, service_fn}; +use prices_ingest_core::Registries; +use prices_ledger_processor::{ + cursor::{Cursor, StubFileCursor}, + object_fetcher::S3Fetcher, + reconcile::Reconciler, + sink::ClickHouseSink, +}; +use tracing::{error, info}; + +const ENV_BUCKET: &str = "BUCKET_NAME"; +const ENV_CURSOR_FILE: &str = "CURSOR_FILE"; +const ENV_INITIAL_CURSOR: &str = "INITIAL_CURSOR"; +const ENV_MAX_ITERATIONS: &str = "MAX_ITERATIONS"; +const DEFAULT_CURSOR_FILE: &str = "/tmp/prices-cursor.txt"; +const DEFAULT_MAX_ITERATIONS: usize = 16; + +type R = Reconciler; + +#[tokio::main] +async fn main() -> Result<(), Error> { + tracing_subscriber::fmt() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .json() + .init(); + + // Eager cold-start init — a missing env / unreachable cluster should be a + // Lambda Init error, not a per-event panic. + let bucket = std::env::var(ENV_BUCKET) + .map_err(|_| Error::from(format!("{ENV_BUCKET} env var is required")))?; + let cursor_file = std::env::var(ENV_CURSOR_FILE) + .map(PathBuf::from) + .unwrap_or_else(|_| PathBuf::from(DEFAULT_CURSOR_FILE)); + let max_iterations: usize = std::env::var(ENV_MAX_ITERATIONS) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(DEFAULT_MAX_ITERATIONS); + + let cursor = StubFileCursor::new(&cursor_file); + // Seed the cursor on a fresh container if it has no checkpoint yet. + if cursor.read().await.is_err() + && let Some(seed) = std::env::var(ENV_INITIAL_CURSOR) + .ok() + .and_then(|s| s.parse::().ok()) + { + cursor.write(seed).await?; + info!(seed, "seeded cursor from INITIAL_CURSOR"); + } + + // Build the S3 fetcher and the mTLS sink concurrently — they are + // independent (ambient AWS config load vs. Secrets-extension fetch + + // mTLS handshake), so joining them shaves their latency off cold start. + let (fetcher, sink) = tokio::join!( + S3Fetcher::from_env(&bucket), + ClickHouseSink::from_lambda_env() + ); + let sink = sink?; + // `load_registry` is the first ClickHouse round-trip, so it already + // surfaces an unreachable cluster as a Lambda Init error — a separate + // preflight `SELECT 1` would just be a redundant extra round-trip on the + // cold path. + let registry = sink.load_registry().await?; + + info!( + %bucket, + cursor_file = %cursor_file.display(), + max_iterations, + "prices-ledger-processor cold start ready" + ); + + let reconciler: Arc = Arc::new(Reconciler::new( + fetcher, + cursor, + sink, + registry, + Registries::new(), + )); + + run(service_fn(move |event: LambdaEvent| { + let r = reconciler.clone(); + async move { handler(event, r, max_iterations).await } + })) + .await +} + +async fn handler( + event: LambdaEvent, + reconciler: Arc, + max_iterations: usize, +) -> Result { + let (payload, _ctx) = event.into_parts(); + let mut batch_item_failures = Vec::new(); + + for msg in &payload.records { + let message_id = msg.message_id.clone().unwrap_or_default(); + match reconciler.run(max_iterations).await { + Ok(stats) => info!( + message_id = %message_id, + start = stats.start_cursor, + end = stats.end_cursor, + persisted = stats.ledgers_persisted, + rows = stats.rows_emitted, + "doorbell processed" + ), + Err(e) => { + error!( + message_id = %message_id, + error = %e, + "reconcile failed — will redeliver doorbell" + ); + batch_item_failures.push(BatchItemFailure { + item_identifier: message_id, + }); + } + } + } + + Ok(SqsBatchResponse { + batch_item_failures, + }) +} diff --git a/packages/prices-ledger-processor/src/object_fetcher/local_disk.rs b/packages/prices-ledger-processor/src/object_fetcher/local_disk.rs new file mode 100644 index 0000000..dedb268 --- /dev/null +++ b/packages/prices-ledger-processor/src/object_fetcher/local_disk.rs @@ -0,0 +1,56 @@ +use std::path::{Path, PathBuf}; + +use super::{FetchError, ObjectFetcher}; + +pub struct LocalDiskFetcher { + root: PathBuf, +} + +impl LocalDiskFetcher { + pub fn new(root: impl AsRef) -> Self { + Self { + root: root.as_ref().to_path_buf(), + } + } +} + +impl ObjectFetcher for LocalDiskFetcher { + async fn fetch(&self, key: &str) -> Result>, FetchError> { + let path = self.root.join(key); + match tokio::fs::read(&path).await { + Ok(bytes) => Ok(Some(bytes)), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(source) => Err(FetchError::Io { + key: key.to_string(), + source, + }), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[tokio::test] + async fn hit_returns_bytes() { + let dir = tempdir().unwrap(); + let key = "subdir/file.bin"; + tokio::fs::create_dir_all(dir.path().join("subdir")) + .await + .unwrap(); + tokio::fs::write(dir.path().join(key), b"hello") + .await + .unwrap(); + let f = LocalDiskFetcher::new(dir.path()); + assert_eq!(f.fetch(key).await.unwrap(), Some(b"hello".to_vec())); + } + + #[tokio::test] + async fn miss_returns_none() { + let dir = tempdir().unwrap(); + let f = LocalDiskFetcher::new(dir.path()); + assert_eq!(f.fetch("nope").await.unwrap(), None); + } +} diff --git a/packages/prices-ledger-processor/src/object_fetcher/mod.rs b/packages/prices-ledger-processor/src/object_fetcher/mod.rs new file mode 100644 index 0000000..3621903 --- /dev/null +++ b/packages/prices-ledger-processor/src/object_fetcher/mod.rs @@ -0,0 +1,30 @@ +//! Object-source trait — the production-swap seam where prototype-mode +//! local-disk reads become `aws_sdk_s3::Client::get_object` calls. + +use std::future::Future; + +pub mod local_disk; +#[cfg(feature = "lambda")] +pub mod s3; + +pub use local_disk::LocalDiskFetcher; +#[cfg(feature = "lambda")] +pub use s3::S3Fetcher; + +#[derive(Debug, thiserror::Error)] +pub enum FetchError { + #[error("i/o error fetching {key}: {source}")] + Io { + key: String, + #[source] + source: std::io::Error, + }, + #[error("object-store error fetching {key}: {detail}")] + Backend { key: String, detail: String }, +} + +pub trait ObjectFetcher { + /// `Ok(Some(bytes))` on hit, `Ok(None)` on miss (treat as a gap and + /// stop the reconcile run), `Err(...)` on a hard error. + fn fetch(&self, key: &str) -> impl Future>, FetchError>> + Send; +} diff --git a/packages/prices-ledger-processor/src/object_fetcher/s3.rs b/packages/prices-ledger-processor/src/object_fetcher/s3.rs new file mode 100644 index 0000000..d2daaf7 --- /dev/null +++ b/packages/prices-ledger-processor/src/object_fetcher/s3.rs @@ -0,0 +1,64 @@ +//! S3-backed [`ObjectFetcher`] — the production fetch path. +//! +//! Reads Galexie `*.xdr.zst` objects from BE's `stellar-ledger-data` bucket by +//! their derived key. A `NoSuchKey` is mapped to `Ok(None)` (a gap → the +//! reconcile loop stops cleanly), every other S3 error to `Err`. Bucket name +//! arrives via env var (CDK injects it from `/platform/{env}/…` SSM at deploy). + +use aws_sdk_s3::Client; + +use super::{FetchError, ObjectFetcher}; + +pub struct S3Fetcher { + client: Client, + bucket: String, +} + +impl S3Fetcher { + pub fn new(client: Client, bucket: impl Into) -> Self { + Self { + client, + bucket: bucket.into(), + } + } + + /// Build from the ambient AWS config (Lambda execution role). + pub async fn from_env(bucket: impl Into) -> Self { + let cfg = aws_config::defaults(aws_config::BehaviorVersion::latest()) + .load() + .await; + Self::new(Client::new(&cfg), bucket) + } +} + +impl ObjectFetcher for S3Fetcher { + async fn fetch(&self, key: &str) -> Result>, FetchError> { + match self + .client + .get_object() + .bucket(&self.bucket) + .key(key) + .send() + .await + { + Ok(out) => { + let data = out.body.collect().await.map_err(|e| FetchError::Backend { + key: key.to_string(), + detail: e.to_string(), + })?; + Ok(Some(data.into_bytes().to_vec())) + } + Err(err) => { + let svc = err.into_service_error(); + if svc.is_no_such_key() { + Ok(None) + } else { + Err(FetchError::Backend { + key: key.to_string(), + detail: svc.to_string(), + }) + } + } + } + } +} diff --git a/packages/prices-ledger-processor/src/reconcile.rs b/packages/prices-ledger-processor/src/reconcile.rs new file mode 100644 index 0000000..e686c53 --- /dev/null +++ b/packages/prices-ledger-processor/src/reconcile.rs @@ -0,0 +1,180 @@ +//! Doorbell-cursor reconcile loop. +//! +//! Mirrors BE's indexer: read cursor, derive the next S3 key, fetch, decode, +//! extract+bucket, write, advance the cursor **last**. Stops at the first gap or +//! `max_iterations`. The cursor write is the ordering barrier — a crash before +//! it leaves the cursor unchanged and the next invocation re-processes the run +//! (idempotent: ReplacingMergeTree collapses re-inserts by `version`). +//! +//! The decode→extract→canonicalise→bucket step is `prices_ingest_core` — the +//! same code the SDEX backfill runs — so live candles are byte-identical to +//! backfilled ones. Candles accumulate across the whole contiguous run and are +//! flushed once at the end, so all ledgers sharing a minute aggregate into one +//! candle (matching the backfill's per-chunk accumulation). The only residual +//! is a minute split across two separate invocations/runs; that is the same +//! `version`-keyed characteristic the backfill has across partition boundaries, +//! and a periodic re-aggregation is tracked as a follow-up. + +use std::collections::HashMap; + +use prices_ingest_core::{ + AssetRegistry, CandleAccumulator, OracleSample, Registries, decode_object, extract_trades, + ledger_sequence, process_ledger, raw_trade_to_tick, +}; +use tokio::sync::Mutex; +use tracing::info; + +use crate::cursor::{Cursor, CursorError}; +use crate::galexie_key::ledger_s3_key; +use crate::object_fetcher::{FetchError, ObjectFetcher}; +use crate::sink::{CandleSink, SinkError}; + +#[derive(Debug, thiserror::Error)] +pub enum ReconcileError { + #[error("cursor error: {0}")] + Cursor(#[from] CursorError), + #[error("fetch error: {0}")] + Fetch(#[from] FetchError), + #[error("decode error: {0}")] + Decode(String), + #[error("sink error: {0}")] + Sink(#[from] SinkError), +} + +#[derive(Debug, Clone, Default)] +pub struct RunStats { + pub start_cursor: u64, + pub end_cursor: u64, + pub ledgers_persisted: u64, + pub rows_emitted: u64, +} + +/// Warm per-container processing state: the surrogate-id registry (loaded from +/// `prices.assets` at cold start) and the incrementally-grown AMM venue/pool +/// registries. Persisting these across invocations lets a warm Lambda resolve +/// pools discovered earlier in its lifetime. +pub struct ProcessingState { + pub assets: AssetRegistry, + pub registries: Registries, +} + +pub struct Reconciler { + fetcher: F, + cursor: C, + sink: S, + state: Mutex, +} + +impl Reconciler +where + F: ObjectFetcher + Sync, + C: Cursor + Sync, + S: CandleSink + Sync, +{ + pub fn new( + fetcher: F, + cursor: C, + sink: S, + assets: AssetRegistry, + registries: Registries, + ) -> Self { + Self { + fetcher, + cursor, + sink, + state: Mutex::new(ProcessingState { assets, registries }), + } + } + + pub async fn run(&self, max_iterations: usize) -> Result { + let mut st = self.state.lock().await; + // Deref the guard once so `registries` and `assets` can be borrowed as + // disjoint fields (a borrow through the guard's DerefMut each time would + // conflict). + let state = &mut *st; + + let start = self.cursor.read().await?; + let mut current = start; + let mut persisted = 0u64; + + // Accumulate across the whole contiguous run, flush once at the end. + let mut sdex = CandleAccumulator::new(); + let mut amm: HashMap<&'static str, CandleAccumulator> = HashMap::new(); + let mut oracle: Vec = Vec::new(); + + for _ in 0..max_iterations { + let next = current + 1; + let key = ledger_s3_key(next as i64); + let Some(bytes) = self.fetcher.fetch(&key).await? else { + if persisted == 0 { + info!(next, "no new contiguous ledger — nothing to do"); + } else { + info!(next, persisted, "reached gap on S3 — contiguous run done"); + } + break; + }; + + let lcms = decode_object(&bytes).map_err(|e| ReconcileError::Decode(e.to_string()))?; + let mut obj_max = current; + for lcm in &lcms { + // Classic SDEX trades from operation results. + for trade in extract_trades(lcm) { + sdex.merge(&raw_trade_to_tick(&trade, &mut state.assets)); + } + // Soroban AMM trades + oracle samples. + let sob = process_ledger(lcm, &mut state.registries, &mut state.assets); + for (source, tick) in &sob.amm_ticks { + amm.entry(source) + .or_insert_with(CandleAccumulator::new) + .merge(tick); + } + oracle.extend(sob.oracle); + obj_max = obj_max.max(ledger_sequence(lcm) as u64); + } + + current = obj_max.max(next); + persisted += 1; + } + + if persisted == 0 { + return Ok(RunStats { + start_cursor: start, + end_cursor: start, + ledgers_persisted: 0, + rows_emitted: 0, + }); + } + + // Flush + write, then advance the cursor LAST (ordering barrier). + let mut rows_emitted = 0u64; + + let sdex_candles = sdex.flush_all(); + rows_emitted += sdex_candles.len() as u64; + self.sink.write_candles(&sdex_candles, "sdex").await?; + + for (source, mut acc) in amm { + let candles = acc.flush_all(); + rows_emitted += candles.len() as u64; + self.sink.write_candles(&candles, source).await?; + } + + self.sink.write_oracle(&oracle).await?; + self.sink.write_assets(&state.assets).await?; + self.cursor.write(current).await?; + + info!( + start, + end = current, + persisted, + rows = rows_emitted, + "reconcile run complete" + ); + + Ok(RunStats { + start_cursor: start, + end_cursor: current, + ledgers_persisted: persisted, + rows_emitted, + }) + } +} diff --git a/packages/prices-ledger-processor/src/retry.rs b/packages/prices-ledger-processor/src/retry.rs new file mode 100644 index 0000000..856c056 --- /dev/null +++ b/packages/prices-ledger-processor/src/retry.rs @@ -0,0 +1,117 @@ +//! Retry-with-backoff envelope mirroring BE's indexer +//! (`crates/indexer/src/handler/mod.rs:113`). +//! +//! `[50, 200, 800] ms` cadence — three retries, four wire calls total. +//! Only the caller knows which errors are transient; pass a classifier. + +use std::time::Duration; + +pub const DEFAULT_BACKOFF_MS: [u64; 3] = [50, 200, 800]; + +/// Returns `Ok(attempts)` where `attempts` is the retry count (0 = first +/// attempt succeeded). Errors classified as non-transient short-circuit. +pub async fn retry_with_backoff( + backoff_ms: &[u64], + is_transient: P, + mut attempt: F, +) -> Result<(T, u32), E> +where + F: FnMut() -> Fut, + Fut: std::future::Future>, + P: Fn(&E) -> bool, +{ + let mut tries: u32 = 0; + loop { + match attempt().await { + Ok(v) => return Ok((v, tries)), + Err(e) => { + if !is_transient(&e) || tries as usize >= backoff_ms.len() { + return Err(e); + } + tokio::time::sleep(Duration::from_millis(backoff_ms[tries as usize])).await; + tries += 1; + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::cell::Cell; + + #[derive(Debug, PartialEq)] + enum FakeErr { + Transient, + Permanent, + } + + #[tokio::test] + async fn succeeds_first_try() { + let (v, tries) = retry_with_backoff( + &[1, 1, 1], + |_: &FakeErr| true, + || async { Ok::(42) }, + ) + .await + .unwrap(); + assert_eq!(v, 42); + assert_eq!(tries, 0); + } + + #[tokio::test] + async fn retries_transient_then_succeeds() { + let calls = Cell::new(0u32); + let (v, tries) = retry_with_backoff( + &[1, 1, 1], + |e: &FakeErr| matches!(e, FakeErr::Transient), + || async { + let n = calls.get(); + calls.set(n + 1); + if n < 2 { + Err(FakeErr::Transient) + } else { + Ok::(7) + } + }, + ) + .await + .unwrap(); + assert_eq!(v, 7); + assert_eq!(tries, 2); + } + + #[tokio::test] + async fn permanent_error_short_circuits() { + let calls = Cell::new(0u32); + let err = retry_with_backoff( + &[1, 1, 1], + |e: &FakeErr| matches!(e, FakeErr::Transient), + || async { + calls.set(calls.get() + 1); + Err::(FakeErr::Permanent) + }, + ) + .await + .unwrap_err(); + assert_eq!(err, FakeErr::Permanent); + assert_eq!(calls.get(), 1); + } + + #[tokio::test] + async fn exhausts_backoff_then_fails() { + let calls = Cell::new(0u32); + let err = retry_with_backoff( + &[1, 1, 1], + |e: &FakeErr| matches!(e, FakeErr::Transient), + || async { + calls.set(calls.get() + 1); + Err::(FakeErr::Transient) + }, + ) + .await + .unwrap_err(); + assert_eq!(err, FakeErr::Transient); + assert_eq!(calls.get(), 4); // 1 initial + 3 retries + } +} diff --git a/packages/prices-ledger-processor/src/sink/mod.rs b/packages/prices-ledger-processor/src/sink/mod.rs new file mode 100644 index 0000000..f985478 --- /dev/null +++ b/packages/prices-ledger-processor/src/sink/mod.rs @@ -0,0 +1,158 @@ +//! OHLCV sink — the seam that turns bucketed candles into ClickHouse rows. +//! +//! The real sink ([`ClickHouseSink`]) wraps the shared +//! [`prices_ingest_core::OhlcvWriter`], so it writes the exact same +//! `prices.price_ohlcv_1m` rows as the SDEX backfill. It is transport-agnostic: +//! [`ClickHouseSink::plaintext`] talks to a local Docker ClickHouse, and +//! (with the `aws-mtls` feature) [`ClickHouseSink::from_lambda_env`] talks to the +//! shared Hetzner cluster over mTLS via the task-0052 client. Tests use the +//! in-memory [`CountingSink`]. + +use std::future::Future; + +use prices_ingest_core::{AssetRegistry, OhlcvCandle, OhlcvWriter, OracleSample}; + +use crate::retry::{DEFAULT_BACKOFF_MS, retry_with_backoff}; + +#[derive(Debug, thiserror::Error)] +pub enum SinkError { + #[error("sink write failed: {0}")] + Write(String), +} + +/// Writes the three shared `prices.*` outputs of a reconcile run. Candle writes +/// are idempotent (ReplacingMergeTree keyed by `version`), so the sink may be +/// retried freely. +pub trait CandleSink { + fn write_candles( + &self, + candles: &[OhlcvCandle], + source: &str, + ) -> impl Future> + Send; + + fn write_oracle( + &self, + samples: &[OracleSample], + ) -> impl Future> + Send; + + fn write_assets( + &self, + registry: &AssetRegistry, + ) -> impl Future> + Send; +} + +/// ClickHouse sink backed by the shared [`OhlcvWriter`]. Works against either a +/// plaintext local client or the mTLS remote client — both are a +/// `clickhouse::Client`. +pub struct ClickHouseSink { + writer: OhlcvWriter, +} + +impl ClickHouseSink { + /// Local / Docker ClickHouse over plain HTTP (no TLS). Used by the CLI + /// fixture runner and the local integration test. + pub fn plaintext(url: &str) -> Self { + Self { + writer: OhlcvWriter::plaintext(url), + } + } + + /// Remote Hetzner ClickHouse over mTLS, built from the Lambda's + /// `MTLS_SECRET_NAME` / `CH_DOMAIN` env vars via the task-0052 client. + #[cfg(feature = "aws-mtls")] + pub async fn from_lambda_env() -> Result { + let client = + prices_clickhouse::mtls::client_from_lambda_env(prices_clickhouse::PROD_DATABASE) + .await + .map_err(|e| SinkError::Write(format!("mtls client init: {e}")))?; + Ok(Self { + writer: OhlcvWriter::new(client), + }) + } + + /// Probe connectivity (`SELECT 1`). Call once at cold start so an + /// unreachable cluster surfaces as a Lambda Init error, not per-event. + pub async fn preflight(&self) -> Result<(), SinkError> { + self.writer.preflight().await.map_err(redact) + } + + /// Load the existing asset registry from `prices.assets` so surrogate ids + /// are reused (not reassigned) across cold starts — the load-bearing + /// guarantee that live ids match the backfill's. + pub async fn load_registry(&self) -> Result { + let existing = self.writer.load_assets().await.map_err(redact)?; + Ok(AssetRegistry::from_existing(existing)) + } +} + +impl CandleSink for ClickHouseSink { + async fn write_candles(&self, candles: &[OhlcvCandle], source: &str) -> Result<(), SinkError> { + // Idempotent (RMT by version) → retry every failure as transient. + // Finer permanent-vs-transient classification is a follow-up. + retry_with_backoff( + &DEFAULT_BACKOFF_MS, + |_| true, + || async { + self.writer + .write_candles(candles, source) + .await + .map_err(redact) + }, + ) + .await + .map(|_| ()) + } + + async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), SinkError> { + retry_with_backoff( + &DEFAULT_BACKOFF_MS, + |_| true, + || async { self.writer.write_oracle(samples).await.map_err(redact) }, + ) + .await + .map(|_| ()) + } + + async fn write_assets(&self, registry: &AssetRegistry) -> Result<(), SinkError> { + retry_with_backoff( + &DEFAULT_BACKOFF_MS, + |_| true, + || async { self.writer.write_assets(registry).await.map_err(redact) }, + ) + .await + .map(|_| ()) + } +} + +/// Map an ingest error into a sink error. `IngestError`'s `Display` is already +/// leak-safe — its ClickHouse variant redacts the `BadResponse` body down to the +/// leading `Code: NNN` / status token (see +/// [`prices_ingest_core::safe_response_token`]) — so this is a plain string map. +fn redact(e: prices_ingest_core::IngestError) -> SinkError { + SinkError::Write(e.to_string()) +} + +/// In-memory sink for tests and `--dry-run`: counts rows, touches no network. +#[derive(Default)] +pub struct CountingSink { + pub candles: std::sync::atomic::AtomicU64, + pub oracle: std::sync::atomic::AtomicU64, +} + +impl CandleSink for CountingSink { + async fn write_candles(&self, candles: &[OhlcvCandle], _source: &str) -> Result<(), SinkError> { + self.candles + .fetch_add(candles.len() as u64, std::sync::atomic::Ordering::Relaxed); + Ok(()) + } + + async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), SinkError> { + self.oracle + .fetch_add(samples.len() as u64, std::sync::atomic::Ordering::Relaxed); + Ok(()) + } + + async fn write_assets(&self, _registry: &AssetRegistry) -> Result<(), SinkError> { + Ok(()) + } +} diff --git a/packages/prices-ledger-processor/tests/reconcile_e2e.rs b/packages/prices-ledger-processor/tests/reconcile_e2e.rs new file mode 100644 index 0000000..73b84a3 --- /dev/null +++ b/packages/prices-ledger-processor/tests/reconcile_e2e.rs @@ -0,0 +1,123 @@ +//! End-to-end reconcile test against the bundled real Galexie fixtures. +//! +//! Drives the production pipeline (`prices_ingest_core` decode → extract → +//! bucket) over the three contiguous fixture ledgers 62460540–62460542 using a +//! local-disk fetcher and an in-memory counting sink (no ClickHouse). Proves the +//! doorbell-cursor loop decodes real XDR, advances the cursor to the last +//! contiguous ledger, stops at the gap, and is idempotent on re-run. +//! +//! Fixtures are gitignored (large binary Galexie objects, copied locally), so +//! each test **self-skips** when they are absent — matching the repo's +//! self-skipping integration-test convention (`prices-clickhouse` mtls smoke). + +use std::path::PathBuf; + +use prices_ingest_core::{AssetRegistry, Registries}; +use prices_ledger_processor::{ + cursor::{Cursor, StubFileCursor}, + object_fetcher::LocalDiskFetcher, + reconcile::Reconciler, + sink::CountingSink, +}; +use tempfile::tempdir; + +const FIRST_FIXTURE: u64 = 62_460_540; +const LAST_FIXTURE: u64 = 62_460_542; + +fn fixtures_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("fixtures/ledgers") +} + +/// The first fixture file must be present, else the test self-skips. +fn fixtures_present() -> bool { + let key = format!("FC47D9FF--62400000-62463999/FC46ED83--{FIRST_FIXTURE}.xdr.zst"); + fixtures_dir().join(key).exists() +} + +macro_rules! skip_if_no_fixtures { + () => { + if !fixtures_present() { + eprintln!( + "skipping: no local fixtures under packages/prices-ledger-processor/fixtures/" + ); + return; + } + }; +} + +fn reconciler( + fixtures: PathBuf, + cursor: StubFileCursor, +) -> Reconciler { + Reconciler::new( + LocalDiskFetcher::new(fixtures), + cursor, + CountingSink::default(), + AssetRegistry::from_existing(Vec::new()), + Registries::new(), + ) +} + +#[tokio::test] +async fn contiguous_run_decodes_real_fixtures_and_advances_cursor() { + skip_if_no_fixtures!(); + let dir = tempdir().unwrap(); + let cursor = StubFileCursor::new(dir.path().join("cursor.txt")); + cursor.write(FIRST_FIXTURE - 1).await.unwrap(); + + let stats = reconciler(fixtures_dir(), cursor) + .run(16) + .await + .expect("real-fixture reconcile run should succeed"); + + assert_eq!(stats.start_cursor, FIRST_FIXTURE - 1); + assert_eq!( + stats.end_cursor, LAST_FIXTURE, + "cursor should advance to the last contiguous fixture ledger" + ); + assert_eq!( + stats.ledgers_persisted, 3, + "all three contiguous fixtures should be processed" + ); + + // Cursor file persisted at the last ledger → next invocation resumes here. + let resumed = StubFileCursor::new(dir.path().join("cursor.txt")); + assert_eq!(resumed.read().await.unwrap(), LAST_FIXTURE); +} + +#[tokio::test] +async fn gap_stop_when_no_new_ledger() { + skip_if_no_fixtures!(); + let dir = tempdir().unwrap(); + let cursor = StubFileCursor::new(dir.path().join("cursor.txt")); + // Start past the last fixture → next key (62460543) is a miss → gap stop. + cursor.write(LAST_FIXTURE).await.unwrap(); + + let stats = reconciler(fixtures_dir(), cursor).run(16).await.unwrap(); + + assert_eq!(stats.ledgers_persisted, 0); + assert_eq!(stats.end_cursor, LAST_FIXTURE); + assert_eq!(stats.rows_emitted, 0); +} + +#[tokio::test] +async fn idempotent_on_re_run_from_same_cursor() { + skip_if_no_fixtures!(); + let run = || async { + let dir = tempdir().unwrap(); + let cursor = StubFileCursor::new(dir.path().join("cursor.txt")); + cursor.write(FIRST_FIXTURE - 1).await.unwrap(); + reconciler(fixtures_dir(), cursor).run(16).await.unwrap() + }; + + let first = run().await; + let second = run().await; + + assert_eq!(first.start_cursor, second.start_cursor); + assert_eq!(first.end_cursor, second.end_cursor); + assert_eq!(first.ledgers_persisted, second.ledgers_persisted); + assert_eq!( + first.rows_emitted, second.rows_emitted, + "row count must be deterministic across identical runs" + ); +} diff --git a/packages/sdex-backfill/Cargo.toml b/packages/sdex-backfill/Cargo.toml index 24e880f..e29b915 100644 --- a/packages/sdex-backfill/Cargo.toml +++ b/packages/sdex-backfill/Cargo.toml @@ -22,9 +22,5 @@ serde = { workspace = true } serde_json = { workspace = true } rust_decimal = { workspace = true } thiserror = { workspace = true } +prices-ingest-core = { path = "../prices-ingest-core" } prices-clickhouse = { path = "../prices-clickhouse" } -extractors-core = { path = "../extractors-core" } -phoenix-extractor = { path = "../phoenix-extractor" } -soroswap-extractor = { path = "../soroswap-extractor" } -aquarius-extractor = { path = "../aquarius-extractor" } -ledger-processor = { path = "../ledger-processor" } diff --git a/packages/sdex-backfill/src/error.rs b/packages/sdex-backfill/src/error.rs index aba758f..ed9d74a 100644 --- a/packages/sdex-backfill/src/error.rs +++ b/packages/sdex-backfill/src/error.rs @@ -38,4 +38,7 @@ pub enum BackfillError { #[error("clickhouse: {0}")] Clickhouse(#[from] clickhouse::error::Error), + + #[error("ingest: {0}")] + Ingest(#[from] prices_ingest_core::IngestError), } diff --git a/packages/sdex-backfill/src/ingest.rs b/packages/sdex-backfill/src/ingest.rs index 0a6b67e..6ac6638 100644 --- a/packages/sdex-backfill/src/ingest.rs +++ b/packages/sdex-backfill/src/ingest.rs @@ -4,14 +4,13 @@ use std::time::{Duration, Instant}; use tracing::{info, warn}; -use crate::bucket::CandleAccumulator; -use crate::canonical::AssetRegistry; +use prices_ingest_core::{ + AssetRegistry, CandleAccumulator, Registries, extract_trades, process_ledger, raw_trade_to_tick, +}; + use crate::error::BackfillError; -use crate::filter::extract_trades; use crate::partition::Partition; use crate::sink::{OracleSample, Sink}; -use crate::soroban::{Registries, process_ledger}; -use crate::tick::raw_trade_to_tick; const ORACLE_FLUSH_THRESHOLD: usize = 50_000; diff --git a/packages/sdex-backfill/src/main.rs b/packages/sdex-backfill/src/main.rs index 3721a9d..a9ba3ee 100644 --- a/packages/sdex-backfill/src/main.rs +++ b/packages/sdex-backfill/src/main.rs @@ -1,17 +1,11 @@ -mod bucket; -mod canonical; mod cli; mod error; -mod filter; mod ingest; mod obs; mod partition; -mod price; mod run; mod sink; -mod soroban; mod sync; -mod tick; use clap::Parser; diff --git a/packages/sdex-backfill/src/run.rs b/packages/sdex-backfill/src/run.rs index 9a8bd2c..8ff00cf 100644 --- a/packages/sdex-backfill/src/run.rs +++ b/packages/sdex-backfill/src/run.rs @@ -5,12 +5,12 @@ use tokio::process::Command; use tokio::task::JoinHandle; use tracing::{info, warn}; -use crate::canonical::AssetRegistry; +use prices_ingest_core::{AssetRegistry, Registries}; + use crate::error::BackfillError; use crate::ingest::{PartitionStats, index_partition}; use crate::partition::{Partition, partitions_for_range}; use crate::sink::Sink; -use crate::soroban::Registries; use crate::sync::{SyncOutcome, sync_partition}; pub async fn execute( diff --git a/packages/sdex-backfill/src/sink.rs b/packages/sdex-backfill/src/sink.rs index 536b414..54be549 100644 --- a/packages/sdex-backfill/src/sink.rs +++ b/packages/sdex-backfill/src/sink.rs @@ -1,47 +1,74 @@ +//! Backfill ClickHouse sink — a thin wrapper over the shared +//! [`prices_ingest_core::OhlcvWriter`]. +//! +//! The candle / asset / oracle writes (and the asset-registry load) are shared +//! with the live Lambda and live in the core writer, so both paths emit +//! byte-identical `prices.*` rows. This wrapper adds only the **backfill-only** +//! resume bookkeeping against `prices.backfill_sdex_ledgers` (the live Lambda +//! uses its own doorbell cursor instead). `OracleSample` is re-exported so the +//! rest of the backfill keeps importing it from `crate::sink`. + use std::collections::HashSet; -use clickhouse::Client; -use rust_decimal::Decimal; -use serde::{Deserialize, Serialize}; +use clickhouse::Row; +use prices_ingest_core::canonical::AssetIdentity; +use prices_ingest_core::{AssetRegistry, OhlcvCandle, OhlcvWriter}; +use serde::Serialize; use tracing::info; -use crate::bucket::OhlcvCandle; -use crate::canonical::{AssetIdentity, AssetRegistry}; -use crate::error::BackfillError; +pub use prices_ingest_core::OracleSample; -fn decimal_to_i128(d: Decimal) -> i128 { - let d = d.round_dp(14); - // `Decimal(38,14)` holds at most 38 significant digits. AMM amounts/prices - // are i128-derived and can be far larger than SDEX stroops, so a naive - // `mantissa * 10^(14-scale)` can overflow i128 and panic. Saturate instead: - // an out-of-range value is clamped to the representable bound rather than - // aborting the whole backfill. - let factor = 10i128.pow(14 - d.scale()); - d.mantissa().saturating_mul(factor) -} +use crate::error::BackfillError; pub struct Sink { - client: Client, + writer: OhlcvWriter, } impl Sink { pub fn new(url: &str) -> Self { - let client = Client::default().with_url(url); - Self { client } + Self { + writer: OhlcvWriter::plaintext(url), + } } pub async fn preflight(&self) -> Result<(), BackfillError> { - self.client.query("SELECT 1").execute().await?; + self.writer.preflight().await?; + Ok(()) + } + + pub async fn load_assets(&self) -> Result, BackfillError> { + Ok(self.writer.load_assets().await?) + } + + pub async fn write_candles( + &self, + candles: &[OhlcvCandle], + source: &str, + ) -> Result<(), BackfillError> { + self.writer.write_candles(candles, source).await?; + Ok(()) + } + + pub async fn write_assets(&self, registry: &AssetRegistry) -> Result<(), BackfillError> { + self.writer.write_assets(registry).await?; + Ok(()) + } + + pub async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), BackfillError> { + self.writer.write_oracle(samples).await?; Ok(()) } + // --- backfill-only resume bookkeeping (prices.backfill_sdex_ledgers) --- + pub async fn load_completed( &self, start: u32, end: u32, ) -> Result, BackfillError> { let rows = self - .client + .writer + .client() .query( "SELECT sequence FROM prices.backfill_sdex_ledgers \ WHERE sequence BETWEEN ? AND ?", @@ -61,140 +88,14 @@ impl Sink { Ok(set) } - pub async fn load_assets(&self) -> Result, BackfillError> { - let rows = self - .client - .query( - "SELECT asset_id, asset_code, issuer_address, contract_address FROM prices.assets", - ) - .fetch_all::() - .await?; - - let assets: Vec<(u32, AssetIdentity)> = rows - .into_iter() - .map(|r| { - let identity = if !r.contract_address.is_empty() { - AssetIdentity::Contract(r.contract_address) - } else if r.asset_code == "XLM" && r.issuer_address.is_empty() { - AssetIdentity::Native - } else { - AssetIdentity::Credit { - code: r.asset_code, - issuer: r.issuer_address, - } - }; - (r.asset_id, identity) - }) - .collect(); - - info!( - existing_assets = assets.len(), - "loaded asset registry from ClickHouse" - ); - Ok(assets) - } - - pub async fn write_candles( - &self, - candles: &[OhlcvCandle], - source: &str, - ) -> Result<(), BackfillError> { - if candles.is_empty() { - return Ok(()); - } - - let mut insert = self.client.insert("prices.price_ohlcv_1m")?; - - for candle in candles { - insert - .write(&OhlcvRow { - timestamp: candle.minute_start, - asset_id: candle.asset_id, - quote_asset_id: candle.quote_asset_id, - source: source.to_string(), - open: decimal_to_i128(candle.open), - high: decimal_to_i128(candle.high), - low: decimal_to_i128(candle.low), - close: decimal_to_i128(candle.close), - volume_base: decimal_to_i128(candle.volume_base), - volume_quote: decimal_to_i128(candle.volume_quote), - // DEFAULT 0 — the 0026 enrichment Lambda fills this - // (volume_quote_usd = oracle_price * volume_quote). - volume_quote_usd: 0, - // DEFAULT 0 — the enrichment pass fills this (task 0061, - // close_usd = oracle_price * close), same as volume_quote_usd. - close_usd: 0, - vwap: decimal_to_i128(candle.vwap), - trade_count: candle.trade_count, - version: candle.version, - }) - .await?; - } - insert.end().await?; - Ok(()) - } - - pub async fn write_assets(&self, registry: &AssetRegistry) -> Result<(), BackfillError> { - let mut insert = self.client.insert("prices.assets")?; - - for (identity, &id) in registry.assets() { - let (asset_code, asset_type, issuer_address, contract_address) = match identity { - AssetIdentity::Native => { - ("XLM".to_string(), "classic", String::new(), String::new()) - } - AssetIdentity::Credit { code, issuer } => { - (code.clone(), "classic", issuer.clone(), String::new()) - } - AssetIdentity::Contract(addr) => { - (String::new(), "soroban", String::new(), addr.clone()) - } - }; - // The SAC that wraps this classic asset (§12.4) — '' for a pure - // Soroban token. Lets a read-time consumer resolve a SAC-wrapped leg. - let sac_address = registry.sac_address_of(identity).unwrap_or_default(); - - insert - .write(&AssetRow { - asset_id: id, - asset_code, - asset_type: asset_type.to_string(), - issuer_address, - contract_address, - sac_address, - home_domain: String::new(), - is_active: 1, - }) - .await?; - } - insert.end().await?; - Ok(()) - } - - pub async fn write_oracle(&self, samples: &[OracleSample]) -> Result<(), BackfillError> { - if samples.is_empty() { - return Ok(()); - } - let mut insert = self.client.insert("prices.oracle_prices")?; - for s in samples { - insert - .write(&OracleRow { - timestamp: s.timestamp, - asset_id: s.asset_id, - oracle_name: s.oracle_name.clone(), - price_usd: s.price_usd, - raw_data: s.raw_data.clone(), - }) - .await?; - } - insert.end().await?; - Ok(()) - } - pub async fn write_completed_ledgers(&self, sequences: &[u32]) -> Result<(), BackfillError> { if sequences.is_empty() { return Ok(()); } - let mut insert = self.client.insert("prices.backfill_sdex_ledgers")?; + let mut insert = self + .writer + .client() + .insert("prices.backfill_sdex_ledgers")?; for &seq in sequences { insert.write(&LedgerRow { sequence: seq }).await?; } @@ -203,66 +104,7 @@ impl Sink { } } -#[derive(Debug, Serialize, clickhouse::Row)] -struct OhlcvRow { - timestamp: u32, - asset_id: u32, - quote_asset_id: u32, - source: String, - open: i128, - high: i128, - low: i128, - close: i128, - volume_base: i128, - volume_quote: i128, - volume_quote_usd: i128, - close_usd: i128, - vwap: i128, - trade_count: u32, - version: u64, -} - -#[derive(Debug, Serialize, clickhouse::Row)] -struct AssetRow { - asset_id: u32, - asset_code: String, - asset_type: String, - issuer_address: String, - contract_address: String, - sac_address: String, - home_domain: String, - is_active: u8, -} - -#[derive(Debug, Serialize, clickhouse::Row)] +#[derive(Debug, Serialize, Row)] struct LedgerRow { sequence: u32, } - -#[derive(Debug, Deserialize, clickhouse::Row)] -struct ExistingAssetRow { - asset_id: u32, - asset_code: String, - issuer_address: String, - contract_address: String, -} - -/// One decoded oracle price sample, ready for `prices.oracle_prices`. -#[derive(Debug, Clone)] -pub struct OracleSample { - pub timestamp: u32, - pub asset_id: u32, - pub oracle_name: String, - /// price_usd scaled to 14 decimals (matches Decimal(38,14)). - pub price_usd: i128, - pub raw_data: String, -} - -#[derive(Debug, Serialize, clickhouse::Row)] -struct OracleRow { - timestamp: u32, - asset_id: u32, - oracle_name: String, - price_usd: i128, - raw_data: String, -}