notedeck

One damus client to rule them all
git clone git://jb55.com/notedeck
Log | Files | Refs | README | LICENSE

commit 3e39cf785b8d98bde8e58c53bf1de1abe7cf16ac
parent 1244be4481c51e82cda12f302c7c647c1c55dd4b
Author: kernelkind <kernelkind@gmail.com>
Date:   Thu, 23 Oct 2025 21:10:47 -0400

feat(mime-cache): upgrade UrlMimes

1. more performant. No more deserialization every frame
2. employs TTL (so cache doesn't grow unbounded)
3. exponential backoff to retry on error

Signed-off-by: kernelkind <kernelkind@gmail.com>

Diffstat:
Mcrates/notedeck/src/urls.rs | 369++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 318 insertions(+), 51 deletions(-)

diff --git a/crates/notedeck/src/urls.rs b/crates/notedeck/src/urls.rs @@ -4,23 +4,90 @@ use std::{ io::{Read, Write}, path::PathBuf, sync::{Arc, RwLock}, - time::{Duration, SystemTime}, + time::{Duration, SystemTime, UNIX_EPOCH}, }; -use egui::TextBuffer; +use mime_guess::Mime; use poll_promise::Promise; +use serde::{Deserialize, Serialize}; +use tracing::trace; use url::Url; use crate::{Error, MediaCacheType}; const FILE_NAME: &str = "urls.bin"; const SAVE_INTERVAL: Duration = Duration::from_secs(60); +const MIME_TTL: Duration = Duration::from_secs(60 * 60 * 24 * 7); // one week +const FAILURE_BACKOFF_BASE: Duration = Duration::from_secs(4); +const FAILURE_BACKOFF_MAX: Duration = Duration::from_secs(60 * 60 * 6); +const FAILURE_BACKOFF_EXPONENT_LIMIT: u32 = 10; -type UrlsToMime = HashMap<String, String>; +type UrlsToMime = HashMap<String, StoredMimeEntry>; + +#[derive(Clone, Serialize, Deserialize)] +struct StoredMimeEntry { + entry: MimeEntry, + last_updated_secs: u64, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +enum MimeEntry { + Mime(String), + Fail { count: u32 }, +} + +impl StoredMimeEntry { + fn new_mime(mime: String, last_updated: SystemTime) -> Self { + Self { + entry: MimeEntry::Mime(mime), + last_updated_secs: system_time_to_secs(last_updated), + } + } + + fn new_failure(count: u32, last_updated: SystemTime) -> Self { + Self { + entry: MimeEntry::Fail { count }, + last_updated_secs: system_time_to_secs(last_updated), + } + } + + fn last_updated(&self) -> SystemTime { + UNIX_EPOCH + Duration::from_secs(self.last_updated_secs) + } + + fn expires_at(&self) -> SystemTime { + let ttl = match &self.entry { + MimeEntry::Mime(_) => MIME_TTL, + MimeEntry::Fail { count } => failure_backoff_duration(*count), + }; + + self.last_updated() + .checked_add(ttl) + .unwrap_or(SystemTime::UNIX_EPOCH) + } + + fn is_expired(&self, now: SystemTime) -> bool { + self.expires_at() <= now + } + + fn failure_count(&self) -> Option<u32> { + match &self.entry { + MimeEntry::Fail { count } => Some(*count), + _ => None, + } + } +} + +#[derive(Clone)] +struct CachedMime { + mime: Option<Mime>, + expires_at: SystemTime, +} /// caches mime type for a URL. saves to disk on interval [`SAVE_INTERVAL`] pub struct UrlCache { last_saved: SystemTime, + last_pruned: SystemTime, path: PathBuf, cache: Arc<RwLock<UrlsToMime>>, from_disk_promise: Option<Promise<Option<UrlsToMime>>>, @@ -34,19 +101,29 @@ impl UrlCache { pub fn new(path: PathBuf) -> Self { Self { last_saved: SystemTime::now(), + last_pruned: SystemTime::now(), path: path.clone(), cache: Default::default(), from_disk_promise: Some(read_from_disk(path)), } } - pub fn get_type(&self, url: &str) -> Option<String> { + fn get_entry(&self, url: &str) -> Option<StoredMimeEntry> { self.cache.read().ok()?.get(url).cloned() } - pub fn set_type(&mut self, url: String, mime_type: String) { + fn set_entry(&mut self, url: String, entry: StoredMimeEntry) { + if url.is_empty() { + return; + } if let Ok(mut locked_cache) = self.cache.write() { - locked_cache.insert(url, mime_type); + locked_cache.insert(url, entry); + } + } + + fn remove(&mut self, url: &str) { + if let Ok(mut locked_cache) = self.cache.write() { + locked_cache.remove(url); } } @@ -67,6 +144,13 @@ impl UrlCache { self.last_saved = SystemTime::now(); } } + + if let Ok(cur_duration) = SystemTime::now().duration_since(self.last_pruned) { + if cur_duration >= SAVE_INTERVAL { + self.purge_expired(SystemTime::now()); + self.last_pruned = SystemTime::now(); + } + } } pub fn clear(&mut self) { @@ -79,10 +163,22 @@ impl UrlCache { }); } } + + fn purge_expired(&self, now: SystemTime) { + let cache = self.cache.clone(); + std::thread::spawn(move || { + if let Ok(mut locked_cache) = cache.write() { + locked_cache.retain(|_, entry| !entry.is_expired(now)); + } + }); + } } -fn merge_cache(cur_cache: Arc<RwLock<UrlsToMime>>, from_disk: UrlsToMime) { +fn merge_cache(cur_cache: Arc<RwLock<UrlsToMime>>, mut from_disk: UrlsToMime) { std::thread::spawn(move || { + let now = SystemTime::now(); + from_disk.retain(|_, entry| !entry.is_expired(now)); + if let Ok(mut locked_cache) = cur_cache.write() { locked_cache.extend(from_disk); } @@ -97,9 +193,28 @@ fn read_from_disk(path: PathBuf) -> Promise<Option<UrlsToMime>> { let mut file = File::open(path)?; let mut buffer = Vec::new(); file.read_to_end(&mut buffer)?; - let data: UrlsToMime = - bincode::deserialize(&buffer).map_err(|e| Error::Generic(e.to_string()))?; - Ok(data) + if buffer.is_empty() { + return Ok(Default::default()); + } + + match bincode::deserialize::<UrlsToMime>(&buffer) { + Ok(data) => { + trace!("Got {} mime entries", data.len()); + Ok(data) + } + Err(err) => { + tracing::debug!("Unable to deserialize UrlMimes with new format: {err}. Attempting legacy fallback."); + let legacy: HashMap<String, String> = + bincode::deserialize(&buffer).map_err(|e| Error::Generic(e.to_string()))?; + trace!("legacy fallback has {} entries", legacy.len()); + let now = SystemTime::now(); + let migrated = legacy + .into_iter() + .map(|(url, mime)| (url, StoredMimeEntry::new_mime(mime, now))) + .collect(); + Ok(migrated) + } + } })(); match result { @@ -119,12 +234,13 @@ fn save_to_disk(path: PathBuf, cache: Arc<RwLock<UrlsToMime>>) { let result: Result<(), Error> = (|| { if let Ok(cache) = cache.read() { let cache = &*cache; + let num_items = cache.len(); let encoded = bincode::serialize(cache).map_err(|e| Error::Generic(e.to_string()))?; let mut file = File::create(&path)?; file.write_all(&encoded)?; file.sync_all()?; - tracing::debug!("Saved UrlCache to disk."); + tracing::debug!("Saved UrlCache with {num_items} mimes to disk."); Ok(()) } else { Err(Error::Generic( @@ -139,6 +255,26 @@ fn save_to_disk(path: PathBuf, cache: Arc<RwLock<UrlsToMime>>) { }); } +fn system_time_to_secs(time: SystemTime) -> u64 { + time.duration_since(UNIX_EPOCH) + .unwrap_or_else(|_| Duration::from_secs(0)) + .as_secs() +} + +fn failure_backoff_duration(count: u32) -> Duration { + if count == 0 { + return FAILURE_BACKOFF_BASE; + } + + let exponent = count.saturating_sub(1).min(FAILURE_BACKOFF_EXPONENT_LIMIT); + let base_secs = FAILURE_BACKOFF_BASE.as_secs().max(1); + let multiplier = 1u64 << exponent; + let delay_secs = base_secs.saturating_mul(multiplier); + let max_secs = FAILURE_BACKOFF_MAX.as_secs(); + + Duration::from_secs(delay_secs.min(max_secs)) +} + fn ehttp_get_mime_type(url: &str, sender: poll_promise::Sender<MimeResult>) { let request = ehttp::Request::head(url); @@ -181,6 +317,7 @@ fn extract_mime_type(content_type: &str) -> &str { pub struct UrlMimes { pub cache: UrlCache, in_flight: HashMap<String, Promise<MimeResult>>, + mime_cache: HashMap<String, CachedMime>, } impl UrlMimes { @@ -188,41 +325,169 @@ impl UrlMimes { Self { cache: url_cache, in_flight: Default::default(), + mime_cache: Default::default(), } } - pub fn get(&mut self, url: &str) -> Option<String> { - if let Some(mime_type) = self.cache.get_type(url) { - Some(mime_type) - } else if let Some(promise) = self.in_flight.get_mut(url) { - if let Some(mime_result) = promise.ready_mut() { - match mime_result { - Ok(mime_type) => { - let mime_type = mime_type.take(); - self.cache.set_type(url.to_owned(), mime_type.clone()); - self.in_flight.remove(url); - Some(mime_type) - } - Err(HttpError::HttpFailure) => { - // allow retrying - //self.in_flight.remove(url); + pub fn get_or_fetch(&mut self, url: &str) -> Option<&Mime> { + let now = SystemTime::now(); + + if let Some(cached) = self.mime_cache.get(url) { + if cached.expires_at > now { + return self + .mime_cache + .get(url) + .and_then(|cached| cached.mime.as_ref()); + } + + tracing::trace!("mime {:?} at url {url} has expired", cached.mime); + + self.mime_cache.remove(url); + } + + let stored_entry = self.cache.get_entry(url); + let previous_failure_count = stored_entry + .as_ref() + .and_then(|entry| entry.failure_count()) + .unwrap_or(0); + + if let Some(entry) = stored_entry.as_ref() { + if !entry.is_expired(now) { + return match &entry.entry { + MimeEntry::Mime(mime_string) => match mime_string.parse::<Mime>() { + Ok(mime) => { + let expires_at = entry.expires_at(); + trace!("inserted {mime:?} in mime cache for {url}"); + self.mime_cache.insert( + url.to_owned(), + CachedMime { + mime: Some(mime), + expires_at, + }, + ); + self.mime_cache + .get(url) + .and_then(|cached| cached.mime.as_ref()) + } + Err(err) => { + tracing::warn!("Failed to parse mime '{mime_string}' for {url}: {err}"); + self.record_failure( + url, + previous_failure_count.saturating_add(1), + SystemTime::now(), + ); + None + } + }, + MimeEntry::Fail { .. } => { + trace!("Read failure from storage for {url}, wrote None to cache"); + + let expires_at = entry.expires_at(); + self.mime_cache.insert( + url.to_owned(), + CachedMime { + mime: None, + expires_at, + }, + ); None } - Err(HttpError::MissingHeader) => { - // response was malformed, don't retry + }; + } + + if !matches!(entry.entry, MimeEntry::Fail { count: _ }) { + self.cache.remove(url); + } + } + + let Some(promise) = self.in_flight.get_mut(url) else { + if Url::parse(url).is_err() { + trace!("Found invalid url: {url}"); + self.mime_cache.insert( + url.to_owned(), + CachedMime { + mime: None, + expires_at: SystemTime::UNIX_EPOCH + Duration::from_secs(u64::MAX / 2), // never expire... + }, + ); + } + let (sender, promise) = Promise::new(); + ehttp_get_mime_type(url, sender); + self.in_flight.insert(url.to_owned(), promise); + return None; + }; + + let Ok(mime_type) = promise.ready_mut()? else { + self.in_flight.remove(url); + self.record_failure( + url, + previous_failure_count.saturating_add(1), + SystemTime::now(), + ); + return None; + }; + + let mime_string = std::mem::take(mime_type); + self.in_flight.remove(url); + + match mime_string.parse::<Mime>() { + Ok(mime) => { + let fetched_at = SystemTime::now(); + let prev_entry = stored_entry; + let entry = StoredMimeEntry::new_mime(mime_string, fetched_at); + let expires_at = entry.expires_at(); + if let Some(Some(failed_count)) = prev_entry.map(|p| { + if let MimeEntry::Fail { count } = p.entry { + Some(count) + } else { None } + }) { + trace!("found {mime:?} for {url}, inserting in cache & storage AFTER FAILING {failed_count} TIMES"); + } else { + trace!("found {mime:?} for {url}, inserting in cache & storage"); } - } else { + self.cache.set_entry(url.to_owned(), entry); + self.mime_cache.insert( + url.to_owned(), + CachedMime { + mime: Some(mime), + expires_at, + }, + ); + self.mime_cache + .get(url) + .and_then(|cached| cached.mime.as_ref()) + } + Err(err) => { + tracing::warn!("Unable to parse mime type returned for {url}: {err}"); + self.record_failure( + url, + previous_failure_count.saturating_add(1), + SystemTime::now(), + ); None } - } else { - let (sender, promise) = Promise::new(); - ehttp_get_mime_type(url, sender); - self.in_flight.insert(url.to_owned(), promise); - None } } + + fn record_failure(&mut self, url: &str, count: u32, timestamp: SystemTime) { + let count = count.max(1); + let entry = StoredMimeEntry::new_failure(count, timestamp); + let expires_at = entry.expires_at(); + trace!( + "failed to get mime for {url} {count} times. next request in {:?}", + failure_backoff_duration(count) + ); + self.cache.set_entry(url.to_owned(), entry); + self.mime_cache.insert( + url.to_owned(), + CachedMime { + mime: None, + expires_at, + }, + ); + } } #[derive(Debug)] @@ -258,11 +523,15 @@ impl SupportedMimeType { } pub fn to_cache_type(&self) -> MediaCacheType { - if self.mime == mime_guess::mime::IMAGE_GIF { - MediaCacheType::Gif - } else { - MediaCacheType::Image - } + mime_to_cache_type(&self.mime) + } +} + +fn mime_to_cache_type(mime: &Mime) -> MediaCacheType { + if *mime == mime_guess::mime::IMAGE_GIF { + MediaCacheType::Gif + } else { + MediaCacheType::Image } } @@ -297,18 +566,16 @@ fn url_has_supported_mime(url: &str) -> MimeHostedAtUrl { #[profiling::function] pub fn supported_mime_hosted_at_url(urls: &mut UrlMimes, url: &str) -> Option<MediaCacheType> { - match url_has_supported_mime(url) { - MimeHostedAtUrl::Yes(cache_type) => Some(cache_type), - MimeHostedAtUrl::Maybe => urls - .get(url) - .and_then(|s| s.parse::<mime_guess::mime::Mime>().ok()) - .and_then(|mime: mime_guess::mime::Mime| { - SupportedMimeType::from_mime(mime) - .ok() - .map(|s| s.to_cache_type()) - }), - MimeHostedAtUrl::No => None, - } + let Some(mime) = urls.get_or_fetch(url) else { + return match url_has_supported_mime(url) { + MimeHostedAtUrl::Yes(media_cache_type) => Some(media_cache_type), + MimeHostedAtUrl::Maybe | MimeHostedAtUrl::No => None, + }; + }; + + Some(mime) + .filter(|mime| is_mime_supported(mime)) + .map(mime_to_cache_type) } enum MimeHostedAtUrl {