Skip to content

Commit 5eea4b9

Browse files
CopilotSteake
andauthored
Implement real-time metrics collection for admin dashboard (#89)
* Initial plan * Add missing metrics to node's MetricsRegistry - Add messages_sent and messages_received counters - Add average_trust_score gauge (stored as fixed-point for atomic operations) - Add slashing_events_total counter - Update export_prometheus to include all new metrics - Add comprehensive tests for new metrics Co-authored-by: Steake <530040+Steake@users.noreply.github.com> * Integrate metrics collection in network and tournament managers - Add message_sent counter increments in broadcast_block and broadcast_transaction - Add message_received counter increments in handle_incoming_block and handle_incoming_transaction - Update tournament manager to calculate and report average trust score - Track slashing events when negative evidence is recorded - All metrics now populated with real data from node operations Co-authored-by: Steake <530040+Steake@users.noreply.github.com> * Address code review feedback - Add DEFAULT_TRUST_SCORE constant instead of magic number 0.85 - Use evidence_type.is_negative() method instead of hardcoded match - Add bounds checking to trust score fixed-point conversion with clamp(0.0, 1000.0) - Add clarifying comment about trust score precision range Co-authored-by: Steake <530040+Steake@users.noreply.github.com> * Fix trust score clamp and remove unnecessary allow(dead_code) - Change trust score clamp upper bound from 1000.0 to 1.0 (trust scores are in range [0.0, 1.0]) - Remove #[allow(dead_code)] attribute from avg_trust_score field as it's now actively used Co-authored-by: Steake <530040+Steake@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Steake <530040+Steake@users.noreply.github.com>
1 parent ecbb4bb commit 5eea4b9

3 files changed

Lines changed: 145 additions & 3 deletions

File tree

crates/bitcell-node/src/monitoring/mod.rs

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ pub struct MetricsRegistry {
1919
peer_count: Arc<AtomicUsize>,
2020
bytes_sent: Arc<AtomicU64>,
2121
bytes_received: Arc<AtomicU64>,
22+
messages_sent: Arc<AtomicU64>,
23+
messages_received: Arc<AtomicU64>,
2224

2325
// Transaction pool metrics
2426
pending_txs: Arc<AtomicUsize>,
@@ -33,8 +35,8 @@ pub struct MetricsRegistry {
3335
// EBSL metrics
3436
active_miners: Arc<AtomicUsize>,
3537
banned_miners: Arc<AtomicUsize>,
36-
#[allow(dead_code)]
3738
avg_trust_score: Arc<AtomicU64>, // Stored as fixed-point * 1000
39+
slashing_events: Arc<AtomicU64>,
3840

3941
// DHT metrics
4042
dht_peer_count: Arc<AtomicUsize>,
@@ -48,6 +50,8 @@ impl MetricsRegistry {
4850
peer_count: Arc::new(AtomicUsize::new(0)),
4951
bytes_sent: Arc::new(AtomicU64::new(0)),
5052
bytes_received: Arc::new(AtomicU64::new(0)),
53+
messages_sent: Arc::new(AtomicU64::new(0)),
54+
messages_received: Arc::new(AtomicU64::new(0)),
5155
pending_txs: Arc::new(AtomicUsize::new(0)),
5256
total_txs_processed: Arc::new(AtomicU64::new(0)),
5357
proofs_generated: Arc::new(AtomicU64::new(0)),
@@ -57,6 +61,7 @@ impl MetricsRegistry {
5761
active_miners: Arc::new(AtomicUsize::new(0)),
5862
banned_miners: Arc::new(AtomicUsize::new(0)),
5963
avg_trust_score: Arc::new(AtomicU64::new(0)),
64+
slashing_events: Arc::new(AtomicU64::new(0)),
6065
dht_peer_count: Arc::new(AtomicUsize::new(0)),
6166
}
6267
}
@@ -103,6 +108,22 @@ impl MetricsRegistry {
103108
self.bytes_received.load(Ordering::Relaxed)
104109
}
105110

111+
pub fn add_message_sent(&self) {
112+
self.messages_sent.fetch_add(1, Ordering::Relaxed);
113+
}
114+
115+
pub fn add_message_received(&self) {
116+
self.messages_received.fetch_add(1, Ordering::Relaxed);
117+
}
118+
119+
pub fn get_messages_sent(&self) -> u64 {
120+
self.messages_sent.load(Ordering::Relaxed)
121+
}
122+
123+
pub fn get_messages_received(&self) -> u64 {
124+
self.messages_received.load(Ordering::Relaxed)
125+
}
126+
106127
// Transaction pool metrics
107128
pub fn set_pending_txs(&self, count: usize) {
108129
self.pending_txs.store(count, Ordering::Relaxed);
@@ -162,6 +183,28 @@ impl MetricsRegistry {
162183
self.banned_miners.load(Ordering::Relaxed)
163184
}
164185

186+
pub fn set_average_trust_score(&self, score: f64) {
187+
// Store as fixed-point * 1000 for atomic operations
188+
// Trust scores are typically in range [0.0, 1.0], so this provides
189+
// 3 decimal places of precision without overflow risk
190+
let clamped_score = score.clamp(0.0, 1.0);
191+
let fixed_point = (clamped_score * 1000.0) as u64;
192+
self.avg_trust_score.store(fixed_point, Ordering::Relaxed);
193+
}
194+
195+
pub fn get_average_trust_score(&self) -> f64 {
196+
let fixed_point = self.avg_trust_score.load(Ordering::Relaxed);
197+
fixed_point as f64 / 1000.0
198+
}
199+
200+
pub fn inc_slashing_events(&self) {
201+
self.slashing_events.fetch_add(1, Ordering::Relaxed);
202+
}
203+
204+
pub fn get_slashing_events(&self) -> u64 {
205+
self.slashing_events.load(Ordering::Relaxed)
206+
}
207+
165208
// DHT metrics
166209
pub fn set_dht_peer_count(&self, count: usize) {
167210
self.dht_peer_count.store(count, Ordering::Relaxed);
@@ -198,6 +241,14 @@ impl MetricsRegistry {
198241
# TYPE bitcell_bytes_received_total counter\n\
199242
bitcell_bytes_received_total {}\n\
200243
\n\
244+
# HELP bitcell_messages_sent_total Total messages sent\n\
245+
# TYPE bitcell_messages_sent_total counter\n\
246+
bitcell_messages_sent_total {}\n\
247+
\n\
248+
# HELP bitcell_messages_received_total Total messages received\n\
249+
# TYPE bitcell_messages_received_total counter\n\
250+
bitcell_messages_received_total {}\n\
251+
\n\
201252
# HELP bitcell_pending_txs Number of pending transactions\n\
202253
# TYPE bitcell_pending_txs gauge\n\
203254
bitcell_pending_txs {}\n\
@@ -220,19 +271,31 @@ impl MetricsRegistry {
220271
\n\
221272
# HELP bitcell_banned_miners Number of banned miners\n\
222273
# TYPE bitcell_banned_miners gauge\n\
223-
bitcell_banned_miners {}\n",
274+
bitcell_banned_miners {}\n\
275+
\n\
276+
# HELP bitcell_average_trust_score Average trust score of miners\n\
277+
# TYPE bitcell_average_trust_score gauge\n\
278+
bitcell_average_trust_score {}\n\
279+
\n\
280+
# HELP bitcell_slashing_events_total Total slashing events\n\
281+
# TYPE bitcell_slashing_events_total counter\n\
282+
bitcell_slashing_events_total {}\n",
224283
self.get_chain_height(),
225284
self.get_sync_progress(),
226285
self.get_peer_count(),
227286
self.get_dht_peer_count(),
228287
self.get_bytes_sent(),
229288
self.get_bytes_received(),
289+
self.get_messages_sent(),
290+
self.get_messages_received(),
230291
self.get_pending_txs(),
231292
self.get_total_txs_processed(),
232293
self.get_proofs_generated(),
233294
self.get_proofs_verified(),
234295
self.get_active_miners(),
235296
self.get_banned_miners(),
297+
self.get_average_trust_score(),
298+
self.get_slashing_events(),
236299
)
237300
}
238301
}
@@ -270,4 +333,50 @@ mod tests {
270333
assert!(export.contains("bitcell_chain_height 42"));
271334
assert!(export.contains("bitcell_peer_count 3"));
272335
}
336+
337+
#[test]
338+
fn test_new_metrics() {
339+
let metrics = MetricsRegistry::new();
340+
341+
// Test message counters
342+
metrics.add_message_sent();
343+
metrics.add_message_sent();
344+
metrics.add_message_sent();
345+
assert_eq!(metrics.get_messages_sent(), 3);
346+
347+
metrics.add_message_received();
348+
assert_eq!(metrics.get_messages_received(), 1);
349+
350+
// Test trust score
351+
metrics.set_average_trust_score(0.85);
352+
assert!((metrics.get_average_trust_score() - 0.85).abs() < 0.001);
353+
354+
metrics.set_average_trust_score(0.923);
355+
assert!((metrics.get_average_trust_score() - 0.923).abs() < 0.001);
356+
357+
// Test slashing events
358+
metrics.inc_slashing_events();
359+
metrics.inc_slashing_events();
360+
assert_eq!(metrics.get_slashing_events(), 2);
361+
}
362+
363+
#[test]
364+
fn test_new_metrics_in_prometheus_export() {
365+
let metrics = MetricsRegistry::new();
366+
367+
// Set new metrics
368+
metrics.add_message_sent();
369+
metrics.add_message_sent();
370+
metrics.add_message_received();
371+
metrics.set_average_trust_score(0.875);
372+
metrics.inc_slashing_events();
373+
374+
let export = metrics.export_prometheus();
375+
376+
// Verify new metrics are in export
377+
assert!(export.contains("bitcell_messages_sent_total 2"));
378+
assert!(export.contains("bitcell_messages_received_total 1"));
379+
assert!(export.contains("bitcell_average_trust_score 0.875"));
380+
assert!(export.contains("bitcell_slashing_events_total 1"));
381+
}
273382
}

crates/bitcell-node/src/network.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,10 @@ impl NetworkManager {
584584
}
585585

586586
self.metrics.add_bytes_sent(block_size * peer_ids.len() as u64);
587+
// Update message counter for each peer we sent to
588+
for _ in &peer_ids {
589+
self.metrics.add_message_sent();
590+
}
587591

588592
// Broadcast via Gossipsub using compact blocks for bandwidth efficiency
589593
let dht_opt = {
@@ -623,6 +627,10 @@ impl NetworkManager {
623627
}
624628

625629
self.metrics.add_bytes_sent(tx_size * peer_ids.len() as u64);
630+
// Update message counter for each peer we sent to
631+
for _ in &peer_ids {
632+
self.metrics.add_message_sent();
633+
}
626634

627635
// Broadcast via Gossipsub
628636
let dht_opt = {
@@ -653,6 +661,7 @@ impl NetworkManager {
653661
pub async fn handle_incoming_block(&self, block: Block) -> Result<()> {
654662
let block_size = bincode::serialize(&block).unwrap_or_default().len() as u64;
655663
self.metrics.add_bytes_received(block_size);
664+
self.metrics.add_message_received();
656665

657666
// Forward to block processing channel
658667
let tx_opt = {
@@ -670,6 +679,7 @@ impl NetworkManager {
670679
pub async fn handle_incoming_transaction(&self, tx: Transaction) -> Result<()> {
671680
let tx_size = bincode::serialize(&tx).unwrap_or_default().len() as u64;
672681
self.metrics.add_bytes_received(tx_size);
682+
self.metrics.add_message_received();
673683

674684
// Forward to transaction processing channel
675685
let sender_opt = {

crates/bitcell-node/src/tournament.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ const COMMIT_PHASE_SECS: u64 = 5;
1515
const REVEAL_PHASE_SECS: u64 = 5;
1616
const BATTLE_PHASE_SECS: u64 = 5;
1717

18+
/// Default trust score for new miners or when no miners exist
19+
const DEFAULT_TRUST_SCORE: f64 = 0.85;
20+
1821
/// Tournament manager
1922
pub struct TournamentManager {
2023
/// Current tournament
@@ -164,6 +167,11 @@ impl TournamentManager {
164167
// Add evidence with current block height
165168
let height = *self.current_height.read().unwrap();
166169
counters.add_evidence(bitcell_ebsl::Evidence::new(evidence_type, 0, height));
170+
171+
// Track slashing events (negative evidence)
172+
if evidence_type.is_negative() {
173+
self.metrics.inc_slashing_events();
174+
}
167175
} // Drop write lock here
168176

169177
// Update metrics (acquires read lock)
@@ -208,19 +216,34 @@ impl TournamentManager {
208216

209217
let mut active_count = 0;
210218
let mut banned_count = 0;
219+
let mut total_trust_score = 0.0;
220+
let mut miner_count = 0;
211221

212222
for (_miner, counters) in evidence_map.iter() {
213223
let trust = TrustScore::from_evidence(counters, &self.ebsl_params);
224+
let trust_value = trust.value();
225+
226+
total_trust_score += trust_value;
227+
miner_count += 1;
214228

215229
if trust.is_eligible(&self.ebsl_params) {
216230
active_count += 1;
217-
} else if trust.value() < self.ebsl_params.t_kill {
231+
} else if trust_value < self.ebsl_params.t_kill {
218232
banned_count += 1;
219233
}
220234
}
221235

222236
self.metrics.set_active_miners(active_count);
223237
self.metrics.set_banned_miners(banned_count);
238+
239+
// Calculate average trust score
240+
if miner_count > 0 {
241+
let avg_trust = total_trust_score / miner_count as f64;
242+
self.metrics.set_average_trust_score(avg_trust);
243+
} else {
244+
// Use default trust score when no miners
245+
self.metrics.set_average_trust_score(DEFAULT_TRUST_SCORE);
246+
}
224247
}
225248
}
226249

0 commit comments

Comments
 (0)