notecrumbs

a nostr opengraph server build on nostrdb and egui
git clone git://jb55.com/notecrumbs
Log | Files | Refs | README | LICENSE

commit e014b1dbc567b1eb7e43c810a664dc5661157ec3
parent b7db84af605f112f326480ebfb6775a93226ecff
Author: alltheseas <alltheseas@users.noreply.github.com>
Date:   Wed, 22 Oct 2025 10:48:26 -0500

Improve relay resiliency with metrics and discovery

Diffstat:
Msrc/main.rs | 13++++++-------
Msrc/relay_pool.rs | 53++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/src/main.rs b/src/main.rs @@ -31,7 +31,6 @@ mod nip19; mod pfp; mod relay_pool; mod render; -mod timeout; use crate::secp256k1::XOnlyPublicKey; use relay_pool::RelayPool; @@ -41,7 +40,7 @@ type ImageCache = LruCache<XOnlyPublicKey, egui::TextureHandle>; #[derive(Clone)] pub struct Notecrumbs { pub ndb: Ndb, - keys: Keys, + _keys: Keys, relay_pool: Arc<RelayPool>, font_data: egui::FontData, _img_cache: Arc<ImageCache>, @@ -246,7 +245,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> { let cfg = Config::new(); let ndb = Ndb::new(".", &cfg).expect("ndb failed to open"); let keys = Keys::generate(); - let timeout = timeout::get_env_timeout(); + let timeout = get_env_timeout(); let prometheus_handle = metrics_exporter_prometheus::PrometheusBuilder::new() .install_recorder() .expect("install prometheus recorder"); @@ -266,14 +265,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> { let app = Notecrumbs { ndb, - keys, + _keys: keys, relay_pool, - font_data, + _timeout: timeout, _img_cache: img_cache, - default_pfp, background, + font_data, + default_pfp, prometheus_handle, - _timeout: timeout, }; // We start a loop to continuously accept incoming connections diff --git a/src/relay_pool.rs b/src/relay_pool.rs @@ -5,7 +5,15 @@ use std::collections::HashSet; use std::sync::Arc; use tokio::sync::Mutex; use tokio::time::Duration; -use tracing::{debug, warn}; +use tracing::{debug, info, warn}; + +#[derive(Clone, Copy, Debug, Default)] +pub struct RelayStats { + pub ensure_calls: u64, + pub relays_added: u64, + pub connect_successes: u64, + pub connect_failures: u64, +} /// Persistent relay pool responsible for maintaining long-lived connections. #[derive(Clone)] @@ -14,6 +22,7 @@ pub struct RelayPool { known_relays: Arc<Mutex<HashSet<String>>>, default_relays: Arc<[RelayUrl]>, connect_timeout: Duration, + stats: Arc<Mutex<RelayStats>>, } impl RelayPool { @@ -40,6 +49,7 @@ impl RelayPool { known_relays: Arc::new(Mutex::new(HashSet::new())), default_relays: default_relays.clone(), connect_timeout, + stats: Arc::new(Mutex::new(RelayStats::default())), }; pool.ensure_relays(pool.default_relays().iter().cloned()) @@ -58,6 +68,7 @@ impl RelayPool { { let mut new_relays = Vec::new(); let mut had_new = false; + let mut relays_added = 0u64; { let mut guard = self.known_relays.lock().await; for relay in relays { @@ -65,10 +76,13 @@ impl RelayPool { if guard.insert(key) { new_relays.push(relay); had_new = true; + relays_added += 1; } } } + let mut connect_success = 0u64; + let mut connect_failure = 0u64; for relay in new_relays { debug!("adding relay {}", relay); self.client @@ -77,11 +91,39 @@ impl RelayPool { .map_err(|err| Error::Generic(format!("failed to add relay {relay}: {err}")))?; if let Err(err) = self.client.connect_relay(relay.clone()).await { warn!("failed to connect relay {}: {}", relay, err); + connect_failure += 1; + } else { + connect_success += 1; } } if had_new { self.client.connect_with_timeout(self.connect_timeout).await; + + let mut stats = self.stats.lock().await; + stats.ensure_calls += 1; + stats.relays_added += relays_added; + stats.connect_successes += connect_success; + stats.connect_failures += connect_failure; + let snapshot = *stats; + drop(stats); + + let tracked = { + let guard = self.known_relays.lock().await; + guard.len() + }; + + info!( + total_relays = tracked, + ensure_calls = snapshot.ensure_calls, + relays_added = relays_added, + connect_successes = connect_success, + connect_failures = connect_failure, + "relay pool health update" + ); + } else { + let mut stats = self.stats.lock().await; + stats.ensure_calls += 1; } Ok(()) @@ -103,4 +145,13 @@ impl RelayPool { .await?) } } + + pub async fn relay_stats(&self) -> (RelayStats, usize) { + let stats = { *self.stats.lock().await }; + let tracked = { + let guard = self.known_relays.lock().await; + guard.len() + }; + (stats, tracked) + } }