diff --git a/Cargo.lock b/Cargo.lock index a22bcd4..c93c457 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1918,11 +1918,11 @@ dependencies = [ "ethereum_ssz_derive", "ethlambda-crypto", "ethlambda-fork-choice", + "ethlambda-metrics", "ethlambda-state-transition", "ethlambda-storage", "ethlambda-types", "hex", - "prometheus", "serde", "serde_json", "spawned-concurrency", @@ -1952,6 +1952,14 @@ dependencies = [ "ethlambda-types", ] +[[package]] +name = "ethlambda-metrics" +version = "0.1.0" +dependencies = [ + "prometheus", + "thiserror 2.0.17", +] + [[package]] name = "ethlambda-p2p" version = "0.1.0" @@ -1960,6 +1968,7 @@ dependencies = [ "ethereum_ssz", "ethereum_ssz_derive", "ethlambda-blockchain", + "ethlambda-metrics", "ethlambda-storage", "ethlambda-types", "ethrex-common", @@ -1967,7 +1976,6 @@ dependencies = [ "ethrex-rlp", "hex", "libp2p", - "prometheus", "sha2", "snap", "ssz_types", @@ -1982,11 +1990,10 @@ name = "ethlambda-rpc" version = "0.1.0" dependencies = [ "axum", + "ethlambda-metrics", "ethlambda-storage", - "prometheus", "serde", "serde_json", - "thiserror 2.0.17", "tokio", "tracing", ] @@ -1996,9 +2003,9 @@ name = "ethlambda-state-transition" version = "0.1.0" dependencies = [ "datatest-stable 0.3.3", + "ethlambda-metrics", "ethlambda-types", "hex", - "prometheus", "serde", "serde_json", "thiserror 2.0.17", diff --git a/Cargo.toml b/Cargo.toml index fa3812d..f9b521a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "crates/blockchain/fork_choice", "crates/blockchain/state_transition", "crates/common/crypto", + "crates/common/metrics", "crates/common/types", "crates/net/p2p", "crates/net/rpc", @@ -28,6 +29,7 @@ ethlambda-blockchain = { path = "crates/blockchain" } ethlambda-fork-choice = { path = "crates/blockchain/fork_choice" } ethlambda-state-transition = { path = "crates/blockchain/state_transition" } ethlambda-crypto = { path = "crates/common/crypto" } +ethlambda-metrics = { path = "crates/common/metrics" } ethlambda-types = { path = "crates/common/types" } ethlambda-p2p = { path = "crates/net/p2p" } ethlambda-rpc = { path = "crates/net/rpc" } diff --git a/crates/blockchain/Cargo.toml b/crates/blockchain/Cargo.toml index 800987e..440d45d 100644 --- a/crates/blockchain/Cargo.toml +++ b/crates/blockchain/Cargo.toml @@ -18,6 +18,7 @@ ethlambda-storage.workspace = true ethlambda-state-transition.workspace = true ethlambda-fork-choice.workspace = true ethlambda-crypto.workspace = true +ethlambda-metrics.workspace = true ethlambda-types.workspace = true spawned-concurrency.workspace = true @@ -27,7 +28,6 @@ tokio.workspace = true thiserror.workspace = true tracing.workspace = true -prometheus.workspace = true hex.workspace = true [dev-dependencies] diff --git a/crates/blockchain/src/metrics.rs b/crates/blockchain/src/metrics.rs index e151780..751d02a 100644 --- a/crates/blockchain/src/metrics.rs +++ b/crates/blockchain/src/metrics.rs @@ -1,78 +1,71 @@ //! Prometheus metrics for the blockchain module. +use ethlambda_metrics::*; + pub fn update_head_slot(slot: u64) { - static LEAN_HEAD_SLOT: std::sync::LazyLock = - std::sync::LazyLock::new(|| { - prometheus::register_int_gauge!("lean_head_slot", "Latest slot of the lean chain") - .unwrap() - }); + static LEAN_HEAD_SLOT: std::sync::LazyLock = std::sync::LazyLock::new(|| { + register_int_gauge!("lean_head_slot", "Latest slot of the lean chain").unwrap() + }); LEAN_HEAD_SLOT.set(slot.try_into().unwrap()); } pub fn update_latest_justified_slot(slot: u64) { - static LEAN_LATEST_JUSTIFIED_SLOT: std::sync::LazyLock = + static LEAN_LATEST_JUSTIFIED_SLOT: std::sync::LazyLock = std::sync::LazyLock::new(|| { - prometheus::register_int_gauge!("lean_latest_justified_slot", "Latest justified slot") - .unwrap() + register_int_gauge!("lean_latest_justified_slot", "Latest justified slot").unwrap() }); LEAN_LATEST_JUSTIFIED_SLOT.set(slot.try_into().unwrap()); } pub fn update_latest_finalized_slot(slot: u64) { - static LEAN_LATEST_FINALIZED_SLOT: std::sync::LazyLock = + static LEAN_LATEST_FINALIZED_SLOT: std::sync::LazyLock = std::sync::LazyLock::new(|| { - prometheus::register_int_gauge!("lean_latest_finalized_slot", "Latest finalized slot") - .unwrap() + register_int_gauge!("lean_latest_finalized_slot", "Latest finalized slot").unwrap() }); LEAN_LATEST_FINALIZED_SLOT.set(slot.try_into().unwrap()); } pub fn update_current_slot(slot: u64) { - static LEAN_CURRENT_SLOT: std::sync::LazyLock = - std::sync::LazyLock::new(|| { - prometheus::register_int_gauge!("lean_current_slot", "Current slot of the lean chain") - .unwrap() - }); + static LEAN_CURRENT_SLOT: std::sync::LazyLock = std::sync::LazyLock::new(|| { + register_int_gauge!("lean_current_slot", "Current slot of the lean chain").unwrap() + }); LEAN_CURRENT_SLOT.set(slot.try_into().unwrap()); } pub fn update_validators_count(count: u64) { - static LEAN_VALIDATORS_COUNT: std::sync::LazyLock = - std::sync::LazyLock::new(|| { - prometheus::register_int_gauge!( - "lean_validators_count", - "Number of validators managed by a node" - ) - .unwrap() - }); + static LEAN_VALIDATORS_COUNT: std::sync::LazyLock = std::sync::LazyLock::new(|| { + register_int_gauge!( + "lean_validators_count", + "Number of validators managed by a node" + ) + .unwrap() + }); LEAN_VALIDATORS_COUNT.set(count.try_into().unwrap()); } pub fn update_safe_target_slot(slot: u64) { - static LEAN_SAFE_TARGET_SLOT: std::sync::LazyLock = - std::sync::LazyLock::new(|| { - prometheus::register_int_gauge!("lean_safe_target_slot", "Safe target slot").unwrap() - }); + static LEAN_SAFE_TARGET_SLOT: std::sync::LazyLock = std::sync::LazyLock::new(|| { + register_int_gauge!("lean_safe_target_slot", "Safe target slot").unwrap() + }); LEAN_SAFE_TARGET_SLOT.set(slot.try_into().unwrap()); } pub fn set_node_info(name: &str, version: &str) { - static LEAN_NODE_INFO: std::sync::LazyLock = - std::sync::LazyLock::new(|| { - prometheus::register_int_gauge_vec!( - "lean_node_info", - "Node information (always 1)", - &["name", "version"] - ) - .unwrap() - }); + static LEAN_NODE_INFO: std::sync::LazyLock = std::sync::LazyLock::new(|| { + register_int_gauge_vec!( + "lean_node_info", + "Node information (always 1)", + &["name", "version"] + ) + .unwrap() + }); LEAN_NODE_INFO.with_label_values(&[name, version]).set(1); } pub fn set_node_start_time() { - static LEAN_NODE_START_TIME_SECONDS: std::sync::LazyLock = + static LEAN_NODE_START_TIME_SECONDS: std::sync::LazyLock = std::sync::LazyLock::new(|| { - prometheus::register_int_gauge!( + register_int_gauge!( "lean_node_start_time_seconds", "Timestamp when node started" ) @@ -87,9 +80,9 @@ pub fn set_node_start_time() { /// Increment the valid attestations counter. pub fn inc_attestations_valid(source: &str) { - static LEAN_ATTESTATIONS_VALID_TOTAL: std::sync::LazyLock = + static LEAN_ATTESTATIONS_VALID_TOTAL: std::sync::LazyLock = std::sync::LazyLock::new(|| { - prometheus::register_int_counter_vec!( + register_int_counter_vec!( "lean_attestations_valid_total", "Count of valid attestations", &["source"] @@ -103,9 +96,9 @@ pub fn inc_attestations_valid(source: &str) { /// Increment the invalid attestations counter. pub fn inc_attestations_invalid(source: &str) { - static LEAN_ATTESTATIONS_INVALID_TOTAL: std::sync::LazyLock = + static LEAN_ATTESTATIONS_INVALID_TOTAL: std::sync::LazyLock = std::sync::LazyLock::new(|| { - prometheus::register_int_counter_vec!( + register_int_counter_vec!( "lean_attestations_invalid_total", "Count of invalid attestations", &["source"] @@ -119,9 +112,9 @@ pub fn inc_attestations_invalid(source: &str) { /// Increment the fork choice reorgs counter. pub fn inc_fork_choice_reorgs() { - static LEAN_FORK_CHOICE_REORGS_TOTAL: std::sync::LazyLock = + static LEAN_FORK_CHOICE_REORGS_TOTAL: std::sync::LazyLock = std::sync::LazyLock::new(|| { - prometheus::register_int_counter!( + register_int_counter!( "lean_fork_choice_reorgs_total", "Count of fork choice reorganizations" ) @@ -129,3 +122,31 @@ pub fn inc_fork_choice_reorgs() { }); LEAN_FORK_CHOICE_REORGS_TOTAL.inc(); } + +/// Start timing fork choice block processing. Records duration when the guard is dropped. +pub fn time_fork_choice_block_processing() -> TimingGuard { + static LEAN_FORK_CHOICE_BLOCK_PROCESSING_TIME_SECONDS: std::sync::LazyLock = + std::sync::LazyLock::new(|| { + register_histogram!( + "lean_fork_choice_block_processing_time_seconds", + "Duration to process a block", + vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0] + ) + .unwrap() + }); + TimingGuard::new(&LEAN_FORK_CHOICE_BLOCK_PROCESSING_TIME_SECONDS) +} + +/// Start timing attestation validation. Records duration when the guard is dropped. +pub fn time_attestation_validation() -> TimingGuard { + static LEAN_ATTESTATION_VALIDATION_TIME_SECONDS: std::sync::LazyLock = + std::sync::LazyLock::new(|| { + register_histogram!( + "lean_attestation_validation_time_seconds", + "Duration to validate an attestation", + vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0] + ) + .unwrap() + }); + TimingGuard::new(&LEAN_ATTESTATION_VALIDATION_TIME_SECONDS) +} diff --git a/crates/blockchain/src/store.rs b/crates/blockchain/src/store.rs index 42b891d..de17767 100644 --- a/crates/blockchain/src/store.rs +++ b/crates/blockchain/src/store.rs @@ -70,6 +70,7 @@ fn update_safe_target(store: &mut Store) { /// 2. A vote cannot span backwards in time (source > target). /// 3. A vote cannot be for a future slot. fn validate_attestation(store: &Store, attestation: &Attestation) -> Result<(), StoreError> { + let _timing = metrics::time_attestation_validation(); let data = &attestation.data; // Availability Check - We cannot count a vote if we haven't seen the blocks involved. @@ -285,6 +286,8 @@ pub fn on_block( store: &mut Store, signed_block: SignedBlockWithAttestation, ) -> Result<(), StoreError> { + let _timing = metrics::time_fork_choice_block_processing(); + // Unpack block components let block = signed_block.message.block.clone(); let proposer_attestation = signed_block.message.proposer_attestation.clone(); @@ -1027,6 +1030,6 @@ fn is_reorg(old_head: H256, new_head: H256, store: &Store) -> bool { } // Couldn't walk back far enough (missing blocks in chain) - // Conservative: assume no reorg if we can't determine + // Assume the ancestor is behind the latest finalized block false } diff --git a/crates/blockchain/state_transition/Cargo.toml b/crates/blockchain/state_transition/Cargo.toml index 94ee8cb..01d06b0 100644 --- a/crates/blockchain/state_transition/Cargo.toml +++ b/crates/blockchain/state_transition/Cargo.toml @@ -11,9 +11,9 @@ version.workspace = true [dependencies] ethlambda-types.workspace = true +ethlambda-metrics.workspace = true thiserror.workspace = true -prometheus.workspace = true [dev-dependencies] serde.workspace = true diff --git a/crates/blockchain/state_transition/src/metrics.rs b/crates/blockchain/state_transition/src/metrics.rs index 3e3c8fb..8df3090 100644 --- a/crates/blockchain/state_transition/src/metrics.rs +++ b/crates/blockchain/state_transition/src/metrics.rs @@ -1,11 +1,8 @@ //! Prometheus metrics for state transition. use std::sync::LazyLock; -use std::time::Instant; -use prometheus::{ - Histogram, IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec, -}; +use ethlambda_metrics::*; static LEAN_STATE_TRANSITION_SLOTS_PROCESSED_TOTAL: LazyLock = LazyLock::new(|| { register_int_counter!( @@ -49,7 +46,7 @@ pub fn inc_finalizations(result: &str) { } static LEAN_STATE_TRANSITION_TIME_SECONDS: LazyLock = LazyLock::new(|| { - prometheus::register_histogram!( + register_histogram!( "lean_state_transition_time_seconds", "Duration of the entire state transition", vec![0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 2.0, 2.5, 3.0, 4.0] @@ -59,7 +56,7 @@ static LEAN_STATE_TRANSITION_TIME_SECONDS: LazyLock = LazyLock::new(| static LEAN_STATE_TRANSITION_SLOTS_PROCESSING_TIME_SECONDS: LazyLock = LazyLock::new(|| { - prometheus::register_histogram!( + register_histogram!( "lean_state_transition_slots_processing_time_seconds", "Duration to process slots", vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0] @@ -69,7 +66,7 @@ static LEAN_STATE_TRANSITION_SLOTS_PROCESSING_TIME_SECONDS: LazyLock static LEAN_STATE_TRANSITION_BLOCK_PROCESSING_TIME_SECONDS: LazyLock = LazyLock::new(|| { - prometheus::register_histogram!( + register_histogram!( "lean_state_transition_block_processing_time_seconds", "Duration to process a block in state transition", vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0] @@ -79,7 +76,7 @@ static LEAN_STATE_TRANSITION_BLOCK_PROCESSING_TIME_SECONDS: LazyLock static LEAN_STATE_TRANSITION_ATTESTATIONS_PROCESSING_TIME_SECONDS: LazyLock = LazyLock::new(|| { - prometheus::register_histogram!( + register_histogram!( "lean_state_transition_attestations_processing_time_seconds", "Duration to process attestations", vec![0.005, 0.01, 0.025, 0.05, 0.1, 1.0] @@ -87,27 +84,6 @@ static LEAN_STATE_TRANSITION_ATTESTATIONS_PROCESSING_TIME_SECONDS: LazyLock Self { - Self { - histogram, - start: Instant::now(), - } - } -} - -impl Drop for TimingGuard { - fn drop(&mut self) { - self.histogram.observe(self.start.elapsed().as_secs_f64()); - } -} - /// Start timing state transition. Records duration when the guard is dropped. pub fn time_state_transition() -> TimingGuard { TimingGuard::new(&LEAN_STATE_TRANSITION_TIME_SECONDS) diff --git a/crates/common/metrics/Cargo.toml b/crates/common/metrics/Cargo.toml new file mode 100644 index 0000000..57e4c86 --- /dev/null +++ b/crates/common/metrics/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "ethlambda-metrics" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true + +[dependencies] +prometheus.workspace = true +thiserror.workspace = true diff --git a/crates/common/metrics/src/gather.rs b/crates/common/metrics/src/gather.rs new file mode 100644 index 0000000..c4e727e --- /dev/null +++ b/crates/common/metrics/src/gather.rs @@ -0,0 +1,29 @@ +//! Utilities for gathering and encoding metrics. + +use thiserror::Error; + +use crate::{Encoder, PrometheusError, TextEncoder, gather}; + +#[derive(Debug, Error)] +pub enum GatherError { + #[error("Prometheus error: {0}")] + Prometheus(#[from] PrometheusError), + #[error("UTF-8 conversion error: {0}")] + FromUtf8(#[from] std::string::FromUtf8Error), +} + +/// Returns all metrics currently registered in Prometheus' default registry. +/// +/// Both profiling and RPC metrics register with this default registry, and the +/// metrics API surfaces them by calling this helper. +pub fn gather_default_metrics() -> Result { + let encoder = TextEncoder::new(); + let metric_families = gather(); + + let mut buffer = Vec::new(); + encoder.encode(&metric_families, &mut buffer)?; + + let res = String::from_utf8(buffer)?; + + Ok(res) +} diff --git a/crates/common/metrics/src/lib.rs b/crates/common/metrics/src/lib.rs new file mode 100644 index 0000000..9e508bb --- /dev/null +++ b/crates/common/metrics/src/lib.rs @@ -0,0 +1,15 @@ +//! Metrics utilities and prometheus re-exports for ethlambda. + +pub mod gather; +pub mod timing; + +// Re-export prometheus types and macros we use +pub use prometheus::{ + Encoder, Error as PrometheusError, Histogram, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, + TextEncoder, gather, register_histogram, register_int_counter, register_int_counter_vec, + register_int_gauge, register_int_gauge_vec, +}; + +// Re-export commonly used items +pub use gather::{GatherError, gather_default_metrics}; +pub use timing::TimingGuard; diff --git a/crates/common/metrics/src/timing.rs b/crates/common/metrics/src/timing.rs new file mode 100644 index 0000000..a3baa94 --- /dev/null +++ b/crates/common/metrics/src/timing.rs @@ -0,0 +1,26 @@ +//! Timing utilities for histogram metrics. + +use std::time::Instant; + +use crate::Histogram; + +/// A guard that records elapsed time to a histogram when dropped. +pub struct TimingGuard { + histogram: &'static Histogram, + start: Instant, +} + +impl TimingGuard { + pub fn new(histogram: &'static Histogram) -> Self { + Self { + histogram, + start: Instant::now(), + } + } +} + +impl Drop for TimingGuard { + fn drop(&mut self) { + self.histogram.observe(self.start.elapsed().as_secs_f64()); + } +} diff --git a/crates/net/p2p/Cargo.toml b/crates/net/p2p/Cargo.toml index 29af2f7..6ed7e3a 100644 --- a/crates/net/p2p/Cargo.toml +++ b/crates/net/p2p/Cargo.toml @@ -12,6 +12,7 @@ version.workspace = true [dependencies] ethlambda-blockchain.workspace = true ethlambda-storage.workspace = true +ethlambda-metrics.workspace = true ethlambda-types.workspace = true async-trait = "0.1" @@ -37,7 +38,5 @@ tree_hash_derive = "0.12.0" sha2 = "0.10" -prometheus.workspace = true - [dev-dependencies] hex.workspace = true diff --git a/crates/net/p2p/src/metrics.rs b/crates/net/p2p/src/metrics.rs index ba96a63..9f24f99 100644 --- a/crates/net/p2p/src/metrics.rs +++ b/crates/net/p2p/src/metrics.rs @@ -5,12 +5,12 @@ use std::{ sync::{LazyLock, RwLock}, }; +use ethlambda_metrics::*; use ethlambda_types::primitives::H256; use libp2p::{ PeerId, identity::{Keypair, secp256k1}, }; -use prometheus::{IntCounterVec, IntGaugeVec, register_int_counter_vec, register_int_gauge_vec}; static NODE_NAME_REGISTRY: LazyLock>> = LazyLock::new(|| RwLock::new(HashMap::new())); diff --git a/crates/net/rpc/Cargo.toml b/crates/net/rpc/Cargo.toml index 6053906..a40c721 100644 --- a/crates/net/rpc/Cargo.toml +++ b/crates/net/rpc/Cargo.toml @@ -12,8 +12,7 @@ version.workspace = true [dependencies] axum = "0.8.1" tokio.workspace = true -prometheus.workspace = true -thiserror.workspace = true +ethlambda-metrics.workspace = true tracing.workspace = true ethlambda-storage.workspace = true serde.workspace = true diff --git a/crates/net/rpc/src/metrics.rs b/crates/net/rpc/src/metrics.rs index 2af9545..c3edac6 100644 --- a/crates/net/rpc/src/metrics.rs +++ b/crates/net/rpc/src/metrics.rs @@ -1,5 +1,5 @@ use axum::{Router, http::HeaderValue, response::IntoResponse, routing::get}; -use thiserror::Error; +use ethlambda_metrics::gather_default_metrics; use tracing::warn; pub fn start_prometheus_metrics_api() -> Router { @@ -23,29 +23,3 @@ pub(crate) async fn get_metrics() -> impl IntoResponse { response.headers_mut().insert("content-type", content_type); response } - -#[derive(Debug, Error)] -pub enum Error { - #[error("Prometheus error: {0}")] - Prometheus(#[from] prometheus::Error), - #[error("UTF-8 conversion error: {0}")] - FromUtf8(#[from] std::string::FromUtf8Error), -} - -/// Returns all metrics currently registered in Prometheus' default registry. -/// -/// Both profiling and RPC metrics register with this default registry, and the -/// metrics API surfaces them by calling this helper. -pub fn gather_default_metrics() -> Result { - use prometheus::{Encoder, TextEncoder}; - - let encoder = TextEncoder::new(); - let metric_families = prometheus::gather(); - - let mut buffer = Vec::new(); - encoder.encode(&metric_families, &mut buffer)?; - - let res = String::from_utf8(buffer)?; - - Ok(res) -}