openzeppelin_monitor/utils/metrics/
mod.rs

1//! Metrics module for the application.
2//!
3//! - This module contains the global Prometheus registry.
4//! - Defines specific metrics for the application.
5
6pub mod server;
7use lazy_static::lazy_static;
8use prometheus::{
9	CounterVec, Encoder, Gauge, GaugeVec, HistogramOpts, HistogramVec, Opts, Registry, TextEncoder,
10};
11use sysinfo::{Disks, System};
12
13lazy_static! {
14	/// Global Prometheus registry.
15	///
16	/// This registry holds all metrics defined in this module and is used
17	/// to gather metrics for exposure via the metrics endpoint.
18	pub static ref REGISTRY: Registry = Registry::new();
19
20	/// Gauge for CPU usage percentage.
21	///
22	/// Tracks the current CPU usage as a percentage (0-100) across all cores.
23	pub static ref CPU_USAGE: Gauge = {
24	  let gauge = Gauge::new("cpu_usage_percentage", "Current CPU usage percentage").unwrap();
25	  REGISTRY.register(Box::new(gauge.clone())).unwrap();
26	  gauge
27	};
28
29	/// Gauge for memory usage percentage.
30	///
31	/// Tracks the percentage (0-100) of total system memory currently in use.
32	pub static ref MEMORY_USAGE_PERCENT: Gauge = {
33	  let gauge = Gauge::new("memory_usage_percentage", "Memory usage percentage").unwrap();
34	  REGISTRY.register(Box::new(gauge.clone())).unwrap();
35	  gauge
36	};
37
38	/// Gauge for memory usage in bytes.
39	///
40	/// Tracks the absolute amount of memory currently in use by the system in bytes.
41	pub static ref MEMORY_USAGE: Gauge = {
42		let gauge = Gauge::new("memory_usage_bytes", "Memory usage in bytes").unwrap();
43		REGISTRY.register(Box::new(gauge.clone())).unwrap();
44		gauge
45	};
46
47	/// Gauge for total memory in bytes.
48	///
49	/// Tracks the total amount of physical memory available on the system in bytes.
50	pub static ref TOTAL_MEMORY: Gauge = {
51	  let gauge = Gauge::new("total_memory_bytes", "Total memory in bytes").unwrap();
52	  REGISTRY.register(Box::new(gauge.clone())).unwrap();
53	  gauge
54	};
55
56	/// Gauge for available memory in bytes.
57	///
58	/// Tracks the amount of memory currently available for allocation in bytes.
59	pub static ref AVAILABLE_MEMORY: Gauge = {
60		let gauge = Gauge::new("available_memory_bytes", "Available memory in bytes").unwrap();
61		REGISTRY.register(Box::new(gauge.clone())).unwrap();
62		gauge
63	};
64
65	/// Gauge for used disk space in bytes.
66	///
67	/// Tracks the total amount of disk space currently in use across all mounted filesystems in bytes.
68	pub static ref DISK_USAGE: Gauge = {
69	  let gauge = Gauge::new("disk_usage_bytes", "Used disk space in bytes").unwrap();
70	  REGISTRY.register(Box::new(gauge.clone())).unwrap();
71	  gauge
72	};
73
74	/// Gauge for disk usage percentage.
75	///
76	/// Tracks the percentage (0-100) of total disk space currently in use across all mounted filesystems.
77	pub static ref DISK_USAGE_PERCENT: Gauge = {
78	  let gauge = Gauge::new("disk_usage_percentage", "Disk usage percentage").unwrap();
79	  REGISTRY.register(Box::new(gauge.clone())).unwrap();
80	  gauge
81	};
82
83	/// Gauge for total number of monitors (active and paused).
84	///
85	/// Tracks the total count of all configured monitors in the system, regardless of their active state.
86	pub static ref MONITORS_TOTAL: Gauge = {
87		let gauge = Gauge::new("monitors_total", "Total number of configured monitors").unwrap();
88		REGISTRY.register(Box::new(gauge.clone())).unwrap();
89		gauge
90	};
91
92	/// Gauge for number of active monitors (not paused).
93	///
94	/// Tracks the count of monitors that are currently active (not in paused state).
95	pub static ref MONITORS_ACTIVE: Gauge = {
96		let gauge = Gauge::new("monitors_active", "Number of active monitors").unwrap();
97		REGISTRY.register(Box::new(gauge.clone())).unwrap();
98		gauge
99	};
100
101	/// Gauge for total number of triggers.
102	///
103	/// Tracks the total count of all configured triggers in the system.
104	pub static ref TRIGGERS_TOTAL: Gauge = {
105		let gauge = Gauge::new("triggers_total", "Total number of configured triggers").unwrap();
106		REGISTRY.register(Box::new(gauge.clone())).unwrap();
107		gauge
108	};
109
110	/// Gauge for total number of contracts being monitored (across all monitors).
111	///
112	/// Tracks the total count of unique contracts (network + address combinations) being monitored.
113	pub static ref CONTRACTS_MONITORED: Gauge = {
114		let gauge = Gauge::new("contracts_monitored", "Total number of contracts being monitored").unwrap();
115		REGISTRY.register(Box::new(gauge.clone())).unwrap();
116		gauge
117	};
118
119	/// Gauge for total number of networks being monitored.
120	///
121	/// Tracks the count of unique blockchain networks that have at least one active monitor.
122	pub static ref NETWORKS_MONITORED: Gauge = {
123		let gauge = Gauge::new("networks_monitored", "Total number of networks being monitored").unwrap();
124		REGISTRY.register(Box::new(gauge.clone())).unwrap();
125		gauge
126	};
127
128	/// Gauge Vector for per-network metrics.
129	///
130	/// Tracks the number of active monitors for each network, with the network name as a label.
131	pub static ref NETWORK_MONITORS: GaugeVec = {
132		let gauge = GaugeVec::new(
133			Opts::new("network_monitors", "Number of monitors per network"),
134			&["network"]
135		).unwrap();
136		REGISTRY.register(Box::new(gauge.clone())).unwrap();
137		gauge
138	};
139
140	// ============================================================
141	// RPC Operational Metrics
142	// ============================================================
143
144	/// Counter for total RPC requests.
145	///
146	/// Tracks the total number of RPC requests made, labeled by network and method.
147	pub static ref RPC_REQUESTS_TOTAL: CounterVec = {
148		let counter = CounterVec::new(
149			Opts::new("rpc_requests_total", "Total number of RPC requests"),
150			&["network", "method"]
151		).unwrap();
152		REGISTRY.register(Box::new(counter.clone())).unwrap();
153		counter
154	};
155
156	/// Counter for RPC request errors.
157	///
158	/// Tracks the total number of failed RPC requests, labeled by network, HTTP status code, and error type.
159	pub static ref RPC_REQUEST_ERRORS_TOTAL: CounterVec = {
160		let counter = CounterVec::new(
161			Opts::new("rpc_request_errors_total", "Total number of RPC request errors"),
162			&["network", "status_code", "error_type"]
163		).unwrap();
164		REGISTRY.register(Box::new(counter.clone())).unwrap();
165		counter
166	};
167
168	/// Histogram for RPC request duration.
169	///
170	/// Tracks the duration of successful RPC requests in seconds, labeled by network.
171	pub static ref RPC_REQUEST_DURATION_SECONDS: HistogramVec = {
172		let histogram = HistogramVec::new(
173			HistogramOpts::new("rpc_request_duration_seconds", "RPC request duration in seconds")
174				.buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]),
175			&["network"]
176		).unwrap();
177		REGISTRY.register(Box::new(histogram.clone())).unwrap();
178		histogram
179	};
180
181	/// Counter for RPC endpoint rotations.
182	///
183	/// Tracks the total number of endpoint rotations, labeled by network and reason.
184	pub static ref RPC_ENDPOINT_ROTATIONS_TOTAL: CounterVec = {
185		let counter = CounterVec::new(
186			Opts::new("rpc_endpoint_rotations_total", "Total number of RPC endpoint rotations"),
187			&["network", "reason"]
188		).unwrap();
189		REGISTRY.register(Box::new(counter.clone())).unwrap();
190		counter
191	};
192
193	/// Counter for RPC rate limit responses (HTTP 429).
194	///
195	/// Tracks the total number of rate limit responses received, labeled by network and endpoint.
196	pub static ref RPC_RATE_LIMITS_TOTAL: CounterVec = {
197		let counter = CounterVec::new(
198			Opts::new("rpc_rate_limits_total", "Total number of RPC rate limit responses (HTTP 429)"),
199			&["network", "endpoint"]
200		).unwrap();
201		REGISTRY.register(Box::new(counter.clone())).unwrap();
202		counter
203	};
204}
205
206/// Gather all metrics and encode into the provided format.
207pub fn gather_metrics() -> Result<Vec<u8>, Box<dyn std::error::Error>> {
208	let encoder = TextEncoder::new();
209	let metric_families = REGISTRY.gather();
210	let mut buffer = Vec::new();
211	encoder.encode(&metric_families, &mut buffer)?;
212	Ok(buffer)
213}
214
215/// Updates the system metrics for CPU and memory usage.
216pub fn update_system_metrics() {
217	let mut sys = System::new_all();
218	sys.refresh_all();
219
220	// Overall CPU usage.
221	let cpu_usage = sys.global_cpu_usage();
222	CPU_USAGE.set(cpu_usage as f64);
223
224	// Total memory (in bytes).
225	let total_memory = sys.total_memory();
226	TOTAL_MEMORY.set(total_memory as f64);
227
228	// Available memory (in bytes).
229	let available_memory = sys.available_memory();
230	AVAILABLE_MEMORY.set(available_memory as f64);
231
232	// Used memory (in bytes).
233	let memory_usage = sys.used_memory();
234	MEMORY_USAGE.set(memory_usage as f64);
235
236	// Calculate memory usage percentage
237	let memory_percentage = if total_memory > 0 {
238		(memory_usage as f64 / total_memory as f64) * 100.0
239	} else {
240		0.0
241	};
242	MEMORY_USAGE_PERCENT.set(memory_percentage);
243
244	// Calculate disk usage:
245	// Sum total space and available space across all disks.
246	let disks = Disks::new_with_refreshed_list();
247	let mut total_disk_space: u64 = 0;
248	let mut total_disk_available: u64 = 0;
249	for disk in disks.list() {
250		total_disk_space += disk.total_space();
251		total_disk_available += disk.available_space();
252	}
253	// Used disk space is total minus available ( in bytes).
254	let used_disk_space = total_disk_space.saturating_sub(total_disk_available);
255	DISK_USAGE.set(used_disk_space as f64);
256
257	// Calculate disk usage percentage.
258	let disk_percentage = if total_disk_space > 0 {
259		(used_disk_space as f64 / total_disk_space as f64) * 100.0
260	} else {
261		0.0
262	};
263	DISK_USAGE_PERCENT.set(disk_percentage);
264}
265
266/// Updates metrics related to monitors, triggers, networks, and contracts.
267pub fn update_monitoring_metrics(
268	monitors: &std::collections::HashMap<String, crate::models::Monitor>,
269	triggers: &std::collections::HashMap<String, crate::models::Trigger>,
270	networks: &std::collections::HashMap<String, crate::models::Network>,
271) {
272	// Track total and active monitors
273	let total_monitors = monitors.len();
274	let active_monitors = monitors.values().filter(|m| !m.paused).count();
275
276	MONITORS_TOTAL.set(total_monitors as f64);
277	MONITORS_ACTIVE.set(active_monitors as f64);
278
279	// Track total triggers
280	TRIGGERS_TOTAL.set(triggers.len() as f64);
281
282	// Count unique contracts across all monitors
283	let mut unique_contracts = std::collections::HashSet::new();
284	for monitor in monitors.values() {
285		for address in &monitor.addresses {
286			// Create a unique identifier for each contract (network + address)
287			for network in &monitor.networks {
288				// Verify the network exists in our network repository
289				if networks.contains_key(network) {
290					unique_contracts.insert(format!("{}:{}", network, address.address));
291				}
292			}
293		}
294	}
295	CONTRACTS_MONITORED.set(unique_contracts.len() as f64);
296
297	// Count networks being monitored (those with active monitors)
298	let mut networks_with_monitors = std::collections::HashSet::new();
299	for monitor in monitors.values().filter(|m| !m.paused) {
300		for network in &monitor.networks {
301			// Only count networks that exist in our repository
302			if networks.contains_key(network) {
303				networks_with_monitors.insert(network.clone());
304			}
305		}
306	}
307	NETWORKS_MONITORED.set(networks_with_monitors.len() as f64);
308
309	// Reset all network-specific metrics
310	NETWORK_MONITORS.reset();
311
312	// Set per-network monitor counts (only for networks that exist)
313	let mut network_monitor_counts = std::collections::HashMap::<String, usize>::new();
314	for monitor in monitors.values().filter(|m| !m.paused) {
315		for network in &monitor.networks {
316			if networks.contains_key(network) {
317				*network_monitor_counts.entry(network.clone()).or_insert(0) += 1;
318			}
319		}
320	}
321
322	for (network, count) in network_monitor_counts {
323		NETWORK_MONITORS
324			.with_label_values(&[&network])
325			.set(count as f64);
326	}
327}
328
329// ============================================================
330// RPC Metrics Helper Functions
331// ============================================================
332
333/// Records an RPC request.
334///
335/// # Arguments
336/// * `network` - The network slug (e.g., "ethereum", "polygon")
337/// * `method` - The RPC method name (e.g., "eth_getBlockByNumber")
338pub fn record_rpc_request(network: &str, method: &str) {
339	RPC_REQUESTS_TOTAL
340		.with_label_values(&[network, method])
341		.inc();
342}
343
344/// Records an RPC request error.
345///
346/// # Arguments
347/// * `network` - The network slug
348/// * `status_code` - The HTTP status code as a string (e.g., "429", "500", or "0" for network errors)
349/// * `error_type` - The type of error (e.g., "http", "network", "timeout")
350pub fn record_rpc_error(network: &str, status_code: &str, error_type: &str) {
351	RPC_REQUEST_ERRORS_TOTAL
352		.with_label_values(&[network, status_code, error_type])
353		.inc();
354}
355
356/// Observes the duration of an RPC request.
357///
358/// # Arguments
359/// * `network` - The network slug
360/// * `duration_secs` - The request duration in seconds
361pub fn observe_rpc_duration(network: &str, duration_secs: f64) {
362	RPC_REQUEST_DURATION_SECONDS
363		.with_label_values(&[network])
364		.observe(duration_secs);
365}
366
367/// Records an RPC endpoint rotation event.
368///
369/// # Arguments
370/// * `network` - The network slug
371/// * `reason` - The reason for rotation (e.g., "rate_limit", "network_error")
372pub fn record_endpoint_rotation(network: &str, reason: &str) {
373	RPC_ENDPOINT_ROTATIONS_TOTAL
374		.with_label_values(&[network, reason])
375		.inc();
376}
377
378/// Records an RPC rate limit response (HTTP 429).
379///
380/// # Arguments
381/// * `network` - The network slug
382/// * `endpoint` - The endpoint URL that returned the rate limit
383pub fn record_rate_limit(network: &str, endpoint: &str) {
384	RPC_RATE_LIMITS_TOTAL
385		.with_label_values(&[network, endpoint])
386		.inc();
387}
388
389/// Initializes RPC metrics for a network so they appear in Prometheus output with 0 values.
390///
391/// This should be called when a transport client is created for a network.
392/// Ensures metrics are visible in dashboards even before any errors occur.
393///
394/// # Arguments
395/// * `network` - The network slug
396pub fn init_rpc_metrics_for_network(network: &str) {
397	// Initialize error counters with common status codes
398	// These will show as 0 until actual errors occur
399	RPC_REQUEST_ERRORS_TOTAL
400		.with_label_values(&[network, "429", "http"])
401		.inc_by(0.0);
402	RPC_REQUEST_ERRORS_TOTAL
403		.with_label_values(&[network, "500", "http"])
404		.inc_by(0.0);
405	RPC_REQUEST_ERRORS_TOTAL
406		.with_label_values(&[network, "0", "network"])
407		.inc_by(0.0);
408
409	// Initialize rotation counters
410	RPC_ENDPOINT_ROTATIONS_TOTAL
411		.with_label_values(&[network, "rate_limit"])
412		.inc_by(0.0);
413	RPC_ENDPOINT_ROTATIONS_TOTAL
414		.with_label_values(&[network, "network_error"])
415		.inc_by(0.0);
416}
417
418#[cfg(test)]
419mod tests {
420	use super::*;
421	use crate::{
422		models::{BlockChainType, Monitor, Network, TransactionStatus, Trigger},
423		utils::tests::builders::{
424			evm::monitor::MonitorBuilder, network::NetworkBuilder, trigger::TriggerBuilder,
425		},
426	};
427	use std::collections::HashMap;
428	use std::sync::Mutex;
429
430	// Use a mutex to ensure tests don't run in parallel when they modify shared state
431	lazy_static! {
432		static ref TEST_MUTEX: Mutex<()> = Mutex::new(());
433	}
434
435	// Reset all metrics to a known state
436	fn reset_all_metrics() {
437		// System metrics
438		CPU_USAGE.set(0.0);
439		MEMORY_USAGE.set(0.0);
440		MEMORY_USAGE_PERCENT.set(0.0);
441		TOTAL_MEMORY.set(0.0);
442		AVAILABLE_MEMORY.set(0.0);
443		DISK_USAGE.set(0.0);
444		DISK_USAGE_PERCENT.set(0.0);
445
446		// Monitoring metrics
447		MONITORS_TOTAL.set(0.0);
448		MONITORS_ACTIVE.set(0.0);
449		TRIGGERS_TOTAL.set(0.0);
450		CONTRACTS_MONITORED.set(0.0);
451		NETWORKS_MONITORED.set(0.0);
452		NETWORK_MONITORS.reset();
453
454		// RPC metrics
455		RPC_REQUESTS_TOTAL.reset();
456		RPC_REQUEST_ERRORS_TOTAL.reset();
457		RPC_REQUEST_DURATION_SECONDS.reset();
458		RPC_ENDPOINT_ROTATIONS_TOTAL.reset();
459		RPC_RATE_LIMITS_TOTAL.reset();
460	}
461
462	// Helper function to create a test network
463	fn create_test_network(slug: &str, name: &str, chain_id: u64) -> Network {
464		NetworkBuilder::new()
465			.name(name)
466			.slug(slug)
467			.network_type(BlockChainType::EVM)
468			.chain_id(chain_id)
469			.rpc_url(&format!("https://{}.example.com", slug))
470			.block_time_ms(15000)
471			.confirmation_blocks(12)
472			.cron_schedule("*/15 * * * * *")
473			.max_past_blocks(1000)
474			.store_blocks(true)
475			.build()
476	}
477
478	// Helper function to create a test monitor
479	fn create_test_monitor(
480		name: &str,
481		networks: Vec<String>,
482		addresses: Vec<String>,
483		paused: bool,
484	) -> Monitor {
485		MonitorBuilder::new()
486			.name(name)
487			.networks(networks)
488			.paused(paused)
489			.addresses(addresses)
490			.function("transfer(address,uint256)", None)
491			.transaction(TransactionStatus::Success, None)
492			.build()
493	}
494
495	fn create_test_trigger(name: &str) -> Trigger {
496		TriggerBuilder::new()
497			.name(name)
498			.email(
499				"smtp.example.com",
500				"user@example.com",
501				"password123",
502				"alerts@example.com",
503				vec!["user@example.com"],
504			)
505			.message("Alert", "Something happened!")
506			.build()
507	}
508
509	#[test]
510	fn test_gather_metrics_contains_expected_names() {
511		let _lock = TEST_MUTEX.lock().unwrap();
512		reset_all_metrics();
513
514		// Initialize all metrics with non-zero values to ensure they appear in output
515		CPU_USAGE.set(50.0);
516		MEMORY_USAGE_PERCENT.set(60.0);
517		MEMORY_USAGE.set(1024.0);
518		TOTAL_MEMORY.set(2048.0);
519		AVAILABLE_MEMORY.set(1024.0);
520		DISK_USAGE.set(512.0);
521		DISK_USAGE_PERCENT.set(25.0);
522		MONITORS_TOTAL.set(5.0);
523		MONITORS_ACTIVE.set(3.0);
524		TRIGGERS_TOTAL.set(2.0);
525		CONTRACTS_MONITORED.set(4.0);
526		NETWORKS_MONITORED.set(2.0);
527		NETWORK_MONITORS.with_label_values(&["test"]).set(1.0);
528
529		// Initialize RPC metrics
530		RPC_REQUESTS_TOTAL
531			.with_label_values(&["ethereum", "eth_getBlockByNumber"])
532			.inc();
533		RPC_REQUEST_ERRORS_TOTAL
534			.with_label_values(&["ethereum", "429", "http"])
535			.inc();
536		RPC_REQUEST_DURATION_SECONDS
537			.with_label_values(&["ethereum"])
538			.observe(0.5);
539		RPC_ENDPOINT_ROTATIONS_TOTAL
540			.with_label_values(&["ethereum", "rate_limit"])
541			.inc();
542		RPC_RATE_LIMITS_TOTAL
543			.with_label_values(&["ethereum", "https://rpc.example.com"])
544			.inc();
545
546		let metrics = gather_metrics().expect("failed to gather metrics");
547		let output = String::from_utf8(metrics).expect("metrics output is not valid UTF-8");
548
549		// Check for system metrics
550		assert!(output.contains("cpu_usage_percentage"));
551		assert!(output.contains("memory_usage_percentage"));
552		assert!(output.contains("memory_usage_bytes"));
553		assert!(output.contains("total_memory_bytes"));
554		assert!(output.contains("available_memory_bytes"));
555		assert!(output.contains("disk_usage_bytes"));
556		assert!(output.contains("disk_usage_percentage"));
557
558		// Check for monitoring metrics
559		assert!(output.contains("monitors_total"));
560		assert!(output.contains("monitors_active"));
561		assert!(output.contains("triggers_total"));
562		assert!(output.contains("contracts_monitored"));
563		assert!(output.contains("networks_monitored"));
564		assert!(output.contains("network_monitors"));
565
566		// Check for RPC metrics
567		assert!(output.contains("rpc_requests_total"));
568		assert!(output.contains("rpc_request_errors_total"));
569		assert!(output.contains("rpc_request_duration_seconds"));
570		assert!(output.contains("rpc_endpoint_rotations_total"));
571		assert!(output.contains("rpc_rate_limits_total"));
572	}
573
574	#[test]
575	fn test_system_metrics_update() {
576		let _lock = TEST_MUTEX.lock().unwrap();
577		reset_all_metrics();
578
579		// Update metrics
580		update_system_metrics();
581
582		// Verify metrics were updated with reasonable values
583		let cpu_usage = CPU_USAGE.get();
584		assert!((0.0..=100.0).contains(&cpu_usage));
585
586		let memory_usage = MEMORY_USAGE.get();
587		assert!(memory_usage >= 0.0);
588
589		let memory_percent = MEMORY_USAGE_PERCENT.get();
590		assert!((0.0..=100.0).contains(&memory_percent));
591
592		let total_memory = TOTAL_MEMORY.get();
593		assert!(total_memory > 0.0);
594
595		let expected_percentage = if total_memory > 0.0 {
596			(memory_usage / total_memory) * 100.0
597		} else {
598			0.0
599		};
600		assert_eq!(memory_percent, expected_percentage);
601
602		let available_memory = AVAILABLE_MEMORY.get();
603		assert!(available_memory >= 0.0);
604
605		let disk_usage = DISK_USAGE.get();
606		assert!(disk_usage >= 0.0);
607
608		let disk_percent = DISK_USAGE_PERCENT.get();
609		assert!((0.0..=100.0).contains(&disk_percent));
610
611		// Verify that memory usage doesn't exceed total memory
612		assert!(memory_usage <= total_memory);
613
614		// Verify that available memory doesn't exceed total memory
615		assert!(available_memory <= total_memory);
616	}
617
618	#[test]
619	fn test_monitoring_metrics_update() {
620		let _lock = TEST_MUTEX.lock().unwrap();
621		reset_all_metrics();
622
623		// Create test data
624		let mut monitors = HashMap::new();
625		let mut networks = HashMap::new();
626		let triggers = HashMap::new();
627
628		// Add test networks
629		networks.insert(
630			"ethereum".to_string(),
631			create_test_network("ethereum", "Ethereum", 1),
632		);
633		networks.insert(
634			"polygon".to_string(),
635			create_test_network("polygon", "Polygon", 137),
636		);
637		networks.insert(
638			"arbitrum".to_string(),
639			create_test_network("arbitrum", "Arbitrum", 42161),
640		);
641
642		// Add test monitors
643		monitors.insert(
644			"monitor1".to_string(),
645			create_test_monitor(
646				"Test Monitor 1",
647				vec!["ethereum".to_string()],
648				vec!["0x1234567890123456789012345678901234567890".to_string()],
649				false,
650			),
651		);
652
653		monitors.insert(
654			"monitor2".to_string(),
655			create_test_monitor(
656				"Test Monitor 2",
657				vec!["polygon".to_string(), "ethereum".to_string()],
658				vec!["0x0987654321098765432109876543210987654321".to_string()],
659				true,
660			),
661		);
662
663		monitors.insert(
664			"monitor3".to_string(),
665			create_test_monitor(
666				"Test Monitor 3",
667				vec!["arbitrum".to_string()],
668				vec![
669					"0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
670					"0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb".to_string(),
671				],
672				false,
673			),
674		);
675
676		// Update metrics
677		update_monitoring_metrics(&monitors, &triggers, &networks);
678
679		// Verify metrics
680		assert_eq!(MONITORS_TOTAL.get(), 3.0);
681		assert_eq!(MONITORS_ACTIVE.get(), 2.0);
682		assert_eq!(TRIGGERS_TOTAL.get(), 0.0);
683		assert_eq!(CONTRACTS_MONITORED.get(), 5.0);
684		assert_eq!(NETWORKS_MONITORED.get(), 2.0);
685
686		// Check network-specific metrics
687		let ethereum_monitors = NETWORK_MONITORS
688			.get_metric_with_label_values(&["ethereum"])
689			.unwrap();
690		assert_eq!(ethereum_monitors.get(), 1.0);
691
692		let polygon_monitors = NETWORK_MONITORS
693			.get_metric_with_label_values(&["polygon"])
694			.unwrap();
695		assert_eq!(polygon_monitors.get(), 0.0);
696
697		let arbitrum_monitors = NETWORK_MONITORS
698			.get_metric_with_label_values(&["arbitrum"])
699			.unwrap();
700		assert_eq!(arbitrum_monitors.get(), 1.0);
701	}
702
703	#[test]
704	fn test_nonexistent_networks_are_ignored() {
705		let _lock = TEST_MUTEX.lock().unwrap();
706		reset_all_metrics();
707
708		// Create test data with a monitor referencing a non-existent network
709		let mut monitors = HashMap::new();
710		let mut networks = HashMap::new();
711		let triggers = HashMap::new();
712
713		networks.insert(
714			"ethereum".to_string(),
715			create_test_network("ethereum", "Ethereum", 1),
716		);
717
718		monitors.insert(
719			"monitor1".to_string(),
720			create_test_monitor(
721				"Test Monitor 1",
722				vec!["ethereum".to_string(), "nonexistent_network".to_string()],
723				vec!["0x1234567890123456789012345678901234567890".to_string()],
724				false,
725			),
726		);
727
728		// Update metrics
729		update_monitoring_metrics(&monitors, &triggers, &networks);
730
731		// Verify metrics
732		assert_eq!(NETWORKS_MONITORED.get(), 1.0);
733		assert_eq!(CONTRACTS_MONITORED.get(), 1.0);
734
735		// The nonexistent network should not have a metric
736		let nonexistent = NETWORK_MONITORS.get_metric_with_label_values(&["nonexistent_network"]);
737		assert!(nonexistent.is_err() || nonexistent.unwrap().get() == 0.0);
738	}
739
740	#[test]
741	fn test_multiple_monitors_same_network() {
742		let _lock = TEST_MUTEX.lock().unwrap();
743		reset_all_metrics();
744
745		// Create test data with multiple monitors on the same network
746		let mut monitors = HashMap::new();
747		let mut networks = HashMap::new();
748		let triggers = HashMap::new();
749
750		networks.insert(
751			"ethereum".to_string(),
752			create_test_network("ethereum", "Ethereum", 1),
753		);
754
755		// Add three monitors all watching Ethereum
756		monitors.insert(
757			"monitor1".to_string(),
758			create_test_monitor(
759				"Test Monitor 1",
760				vec!["ethereum".to_string()],
761				vec!["0x1111111111111111111111111111111111111111".to_string()],
762				false,
763			),
764		);
765
766		monitors.insert(
767			"monitor2".to_string(),
768			create_test_monitor(
769				"Test Monitor 2",
770				vec!["ethereum".to_string()],
771				vec!["0x2222222222222222222222222222222222222222".to_string()],
772				false,
773			),
774		);
775
776		monitors.insert(
777			"monitor3".to_string(),
778			create_test_monitor(
779				"Test Monitor 3",
780				vec!["ethereum".to_string()],
781				vec!["0x3333333333333333333333333333333333333333".to_string()],
782				true, // This one is paused
783			),
784		);
785
786		// Update metrics
787		update_monitoring_metrics(&monitors, &triggers, &networks);
788
789		// Verify metrics
790		assert_eq!(MONITORS_TOTAL.get(), 3.0);
791		assert_eq!(MONITORS_ACTIVE.get(), 2.0);
792		assert_eq!(NETWORKS_MONITORED.get(), 1.0);
793
794		// Check network-specific metrics
795		let ethereum_monitors = NETWORK_MONITORS
796			.get_metric_with_label_values(&["ethereum"])
797			.unwrap();
798		assert_eq!(ethereum_monitors.get(), 2.0);
799	}
800
801	#[test]
802	fn test_multiple_contracts_per_monitor() {
803		let _lock = TEST_MUTEX.lock().unwrap();
804		reset_all_metrics();
805
806		// Create test data with a monitor watching multiple contracts
807		let mut monitors = HashMap::new();
808		let mut networks = HashMap::new();
809		let triggers = HashMap::new();
810
811		networks.insert(
812			"ethereum".to_string(),
813			create_test_network("ethereum", "Ethereum", 1),
814		);
815
816		// Add a monitor watching multiple contracts
817		monitors.insert(
818			"monitor1".to_string(),
819			create_test_monitor(
820				"Test Monitor 1",
821				vec!["ethereum".to_string()],
822				vec![
823					"0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(),
824					"0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb".to_string(),
825					"0xcccccccccccccccccccccccccccccccccccccccc".to_string(),
826				],
827				false,
828			),
829		);
830
831		// Update metrics
832		update_monitoring_metrics(&monitors, &triggers, &networks);
833
834		// Verify metrics
835		assert_eq!(CONTRACTS_MONITORED.get(), 3.0);
836	}
837
838	#[test]
839	fn test_triggers_count() {
840		let _lock = TEST_MUTEX
841			.lock()
842			.unwrap_or_else(|poisoned| poisoned.into_inner());
843		reset_all_metrics();
844
845		// Create test data with triggers
846		let monitors = HashMap::new();
847		let networks = HashMap::new();
848		let mut triggers = HashMap::new();
849
850		// Add some triggers
851		triggers.insert("trigger1".to_string(), create_test_trigger("trigger1"));
852		triggers.insert("trigger2".to_string(), create_test_trigger("trigger2"));
853		triggers.insert("trigger3".to_string(), create_test_trigger("trigger3"));
854
855		// Update metrics
856		update_monitoring_metrics(&monitors, &triggers, &networks);
857
858		// Verify metrics
859		let total_triggers = TRIGGERS_TOTAL.get();
860		assert_eq!(total_triggers, 3.0);
861
862		// Verify other metrics are zero since we have no monitors or networks
863		assert_eq!(MONITORS_TOTAL.get(), 0.0);
864		assert_eq!(MONITORS_ACTIVE.get(), 0.0);
865		assert_eq!(CONTRACTS_MONITORED.get(), 0.0);
866		assert_eq!(NETWORKS_MONITORED.get(), 0.0);
867	}
868
869	#[test]
870	fn test_empty_collections() {
871		let _lock = TEST_MUTEX.lock().unwrap();
872
873		// Test with empty collections
874		let monitors = HashMap::new();
875		let networks = HashMap::new();
876		let triggers = HashMap::new();
877
878		// Reset metrics to non-zero values
879		MONITORS_TOTAL.set(10.0);
880		MONITORS_ACTIVE.set(5.0);
881		TRIGGERS_TOTAL.set(3.0);
882		CONTRACTS_MONITORED.set(7.0);
883		NETWORKS_MONITORED.set(2.0);
884		NETWORK_MONITORS.reset();
885
886		// Set a value for a network that doesn't exist
887		NETWORK_MONITORS.with_label_values(&["test"]).set(3.0);
888
889		// Update metrics
890		update_monitoring_metrics(&monitors, &triggers, &networks);
891
892		// Verify all metrics are reset to zero
893		assert_eq!(MONITORS_TOTAL.get(), 0.0);
894		assert_eq!(MONITORS_ACTIVE.get(), 0.0);
895		assert_eq!(TRIGGERS_TOTAL.get(), 0.0);
896		assert_eq!(CONTRACTS_MONITORED.get(), 0.0);
897		assert_eq!(NETWORKS_MONITORED.get(), 0.0);
898
899		// The test network should have been reset
900		let test_network = NETWORK_MONITORS
901			.get_metric_with_label_values(&["test"])
902			.unwrap();
903		assert_eq!(test_network.get(), 0.0);
904	}
905
906	#[test]
907	fn test_rpc_metrics_helper_functions() {
908		let _lock = TEST_MUTEX.lock().unwrap();
909		reset_all_metrics();
910
911		// Test record_rpc_request
912		record_rpc_request("ethereum", "eth_getBlockByNumber");
913		record_rpc_request("ethereum", "eth_getBlockByNumber");
914		record_rpc_request("polygon", "eth_call");
915
916		let eth_requests = RPC_REQUESTS_TOTAL
917			.get_metric_with_label_values(&["ethereum", "eth_getBlockByNumber"])
918			.unwrap();
919		assert_eq!(eth_requests.get(), 2.0);
920
921		let polygon_requests = RPC_REQUESTS_TOTAL
922			.get_metric_with_label_values(&["polygon", "eth_call"])
923			.unwrap();
924		assert_eq!(polygon_requests.get(), 1.0);
925
926		// Test record_rpc_error
927		record_rpc_error("ethereum", "429", "http");
928		record_rpc_error("ethereum", "500", "http");
929		record_rpc_error("ethereum", "0", "network");
930
931		let rate_limit_errors = RPC_REQUEST_ERRORS_TOTAL
932			.get_metric_with_label_values(&["ethereum", "429", "http"])
933			.unwrap();
934		assert_eq!(rate_limit_errors.get(), 1.0);
935
936		let server_errors = RPC_REQUEST_ERRORS_TOTAL
937			.get_metric_with_label_values(&["ethereum", "500", "http"])
938			.unwrap();
939		assert_eq!(server_errors.get(), 1.0);
940
941		let network_errors = RPC_REQUEST_ERRORS_TOTAL
942			.get_metric_with_label_values(&["ethereum", "0", "network"])
943			.unwrap();
944		assert_eq!(network_errors.get(), 1.0);
945
946		// Test observe_rpc_duration
947		observe_rpc_duration("ethereum", 0.5);
948		observe_rpc_duration("ethereum", 1.5);
949
950		let duration_histogram = RPC_REQUEST_DURATION_SECONDS
951			.get_metric_with_label_values(&["ethereum"])
952			.unwrap();
953		assert_eq!(duration_histogram.get_sample_count(), 2);
954
955		// Test record_endpoint_rotation
956		record_endpoint_rotation("ethereum", "rate_limit");
957		record_endpoint_rotation("ethereum", "network_error");
958		record_endpoint_rotation("polygon", "rate_limit");
959
960		let eth_rate_limit_rotations = RPC_ENDPOINT_ROTATIONS_TOTAL
961			.get_metric_with_label_values(&["ethereum", "rate_limit"])
962			.unwrap();
963		assert_eq!(eth_rate_limit_rotations.get(), 1.0);
964
965		let eth_network_rotations = RPC_ENDPOINT_ROTATIONS_TOTAL
966			.get_metric_with_label_values(&["ethereum", "network_error"])
967			.unwrap();
968		assert_eq!(eth_network_rotations.get(), 1.0);
969
970		// Test record_rate_limit
971		record_rate_limit("ethereum", "https://rpc1.example.com");
972		record_rate_limit("ethereum", "https://rpc1.example.com");
973		record_rate_limit("ethereum", "https://rpc2.example.com");
974
975		let rpc1_rate_limits = RPC_RATE_LIMITS_TOTAL
976			.get_metric_with_label_values(&["ethereum", "https://rpc1.example.com"])
977			.unwrap();
978		assert_eq!(rpc1_rate_limits.get(), 2.0);
979
980		let rpc2_rate_limits = RPC_RATE_LIMITS_TOTAL
981			.get_metric_with_label_values(&["ethereum", "https://rpc2.example.com"])
982			.unwrap();
983		assert_eq!(rpc2_rate_limits.get(), 1.0);
984	}
985
986	#[test]
987	fn test_init_rpc_metrics_for_network() {
988		let _lock = TEST_MUTEX.lock().unwrap();
989		reset_all_metrics();
990
991		// Initialize metrics for a new network
992		init_rpc_metrics_for_network("arbitrum");
993
994		// Verify error counters are initialized with 0
995		let http_429 = RPC_REQUEST_ERRORS_TOTAL
996			.get_metric_with_label_values(&["arbitrum", "429", "http"])
997			.unwrap();
998		assert_eq!(http_429.get(), 0.0);
999
1000		let http_500 = RPC_REQUEST_ERRORS_TOTAL
1001			.get_metric_with_label_values(&["arbitrum", "500", "http"])
1002			.unwrap();
1003		assert_eq!(http_500.get(), 0.0);
1004
1005		let network_error = RPC_REQUEST_ERRORS_TOTAL
1006			.get_metric_with_label_values(&["arbitrum", "0", "network"])
1007			.unwrap();
1008		assert_eq!(network_error.get(), 0.0);
1009
1010		// Verify rotation counters are initialized with 0
1011		let rate_limit_rotation = RPC_ENDPOINT_ROTATIONS_TOTAL
1012			.get_metric_with_label_values(&["arbitrum", "rate_limit"])
1013			.unwrap();
1014		assert_eq!(rate_limit_rotation.get(), 0.0);
1015
1016		let network_rotation = RPC_ENDPOINT_ROTATIONS_TOTAL
1017			.get_metric_with_label_values(&["arbitrum", "network_error"])
1018			.unwrap();
1019		assert_eq!(network_rotation.get(), 0.0);
1020
1021		// Verify metrics appear in gathered output
1022		let metrics = gather_metrics().expect("failed to gather metrics");
1023		let output = String::from_utf8(metrics).expect("metrics output is not valid UTF-8");
1024
1025		assert!(output.contains("rpc_request_errors_total"));
1026		assert!(output.contains("arbitrum"));
1027		assert!(output.contains("rpc_endpoint_rotations_total"));
1028	}
1029}