commit 7037555516f0f34d648b1920f07605bbbabcb5f2
parent 19ed990c5782af2c6668ed788d2b140e6721a253
Author: Greg Heartsfield <scsibug@imap.cc>
Date: Wed, 5 Jan 2022 17:33:53 -0500
improvement: add indexed tag queries
Diffstat:
2 files changed, 90 insertions(+), 20 deletions(-)
diff --git a/src/event.rs b/src/event.rs
@@ -9,6 +9,8 @@ use secp256k1::{schnorr, Secp256k1, VerifyOnly, XOnlyPublicKey};
use serde::{Deserialize, Deserializer, Serialize};
use serde_json::value::Value;
use serde_json::Number;
+use std::collections::HashMap;
+use std::collections::HashSet;
use std::str::FromStr;
use std::time::SystemTime;
@@ -35,6 +37,9 @@ pub struct Event {
pub(crate) tags: Vec<Vec<String>>,
pub(crate) content: String,
pub(crate) sig: String,
+ // Optimization for tag search, built on demand
+ #[serde(skip)]
+ pub(crate) tagidx: Option<HashMap<String, HashSet<String>>>,
}
/// Simple tag type for array of array of strings.
@@ -56,7 +61,9 @@ impl From<EventCmd> for Result<Event> {
if ec.cmd != "EVENT" {
Err(CommandUnknownError)
} else if ec.event.is_valid() {
- Ok(ec.event)
+ let mut e = ec.event;
+ e.build_index();
+ Ok(e)
} else {
Err(EventInvalid)
}
@@ -72,6 +79,30 @@ fn unix_time() -> u64 {
}
impl Event {
+ /// Build an event tag index
+ fn build_index(&mut self) {
+ // if there are no tags; just leave the index as None
+ if self.tags.is_empty() {
+ return;
+ }
+ // otherwise, build an index
+ let mut idx: HashMap<String, HashSet<String>> = HashMap::new();
+ // iterate over tags that have at least 2 elements
+ for t in self.tags.iter().filter(|x| x.len() > 1) {
+ let tagname = t.get(0).unwrap();
+ let tagval = t.get(1).unwrap();
+ // ensure a vector exists for this tag
+ if !idx.contains_key(tagname) {
+ idx.insert(tagname.clone(), HashSet::new());
+ }
+ // get the tag vec and insert entry
+ let tidx = idx.get_mut(tagname).expect("could not get tag vector");
+ tidx.insert(tagval.clone());
+ }
+ // save the tag structure
+ self.tagidx = Some(idx);
+ }
+
/// Create a short event identifier, suitable for logging.
pub fn get_event_id_prefix(&self) -> String {
self.id.chars().take(8).collect()
@@ -193,6 +224,34 @@ impl Event {
pub fn pubkey_tag_match(&self, pubkey: &str) -> bool {
self.get_pubkey_tags().contains(&pubkey)
}
+
+ /// Generic tag match
+ pub fn generic_tag_match(&self, tagname: &str, tagvalue: &str) -> bool {
+ match &self.tagidx {
+ Some(idx) => {
+ // get the set of values for this tag
+ match idx.get(tagname) {
+ Some(valset) => valset.contains(tagvalue),
+ None => false,
+ }
+ }
+ None => false,
+ }
+ }
+
+ /// Determine if the given tag and value set intersect with tags in this event.
+ pub fn generic_tag_val_intersect(&self, tagname: &str, check: &HashSet<String>) -> bool {
+ match &self.tagidx {
+ Some(idx) => match idx.get(tagname) {
+ Some(valset) => {
+ let common = valset.intersection(check);
+ common.count() > 0
+ }
+ None => false,
+ },
+ None => false,
+ }
+ }
}
#[cfg(test)]
diff --git a/src/subscription.rs b/src/subscription.rs
@@ -34,6 +34,12 @@ pub struct ReqFilter {
pub until: Option<u64>,
/// List of author public keys
pub authors: Option<Vec<String>>,
+ /// Set of event tags, for quick indexing
+ #[serde(skip)]
+ event_tag_set: Option<HashSet<String>>,
+ /// Set of pubkey tags, for quick indexing
+ #[serde(skip)]
+ pubkey_tag_set: Option<HashSet<String>>,
}
impl<'de> Deserialize<'de> for Subscription {
@@ -76,8 +82,10 @@ impl<'de> Deserialize<'de> for Subscription {
let mut filters = vec![];
for fv in i {
- let f: ReqFilter = serde_json::from_value(fv.take())
+ let mut f: ReqFilter = serde_json::from_value(fv.take())
.map_err(|_| serde::de::Error::custom("could not parse filter"))?;
+ // create indexes
+ f.update_tag_indexes();
filters.push(f);
}
Ok(Subscription {
@@ -105,6 +113,15 @@ impl Subscription {
}
impl ReqFilter {
+ /// Update pubkey and event indexes
+ fn update_tag_indexes(&mut self) {
+ if let Some(event_vec) = &self.events {
+ self.event_tag_set = Some(HashSet::from_iter(event_vec.iter().cloned()));
+ }
+ if let Some(pubkey_vec) = &self.pubkeys {
+ self.pubkey_tag_set = Some(HashSet::from_iter(pubkey_vec.iter().cloned()));
+ }
+ }
/// Check for a match within the authors list.
fn ids_match(&self, event: &Event) -> bool {
self.ids
@@ -119,33 +136,27 @@ impl ReqFilter {
.map(|vs| vs.contains(&event.pubkey.to_owned()))
.unwrap_or(true)
}
+
/// Check if this filter either matches, or does not care about the event tags.
fn event_match(&self, event: &Event) -> bool {
- // This needs to be analyzed for performance; building these
- // hash sets for each active subscription isn't great.
- if let Some(es) = &self.events {
- let event_refs =
- HashSet::<_>::from_iter(event.get_event_tags().iter().map(|x| x.to_owned()));
- let filter_refs = HashSet::<_>::from_iter(es.iter().map(|x| &x[..]));
- let cardinality = event_refs.intersection(&filter_refs).count();
- cardinality > 0
+ // an event match is performed by looking at the ReqFilter events field, and sending a hashset to the event to intersect with.
+ if let Some(es) = &self.event_tag_set {
+ // if there exists event tags in this filter, find if any intersect.
+ event.generic_tag_val_intersect("e", es)
} else {
+ // if no event tags were requested in a filter, we do match
true
}
}
- /// Check if this filter either matches, or does not care about
- /// the pubkey/petname tags.
+ /// Check if this filter either matches, or does not care about the event tags.
fn pubkey_match(&self, event: &Event) -> bool {
- // This needs to be analyzed for performance; building these
- // hash sets for each active subscription isn't great.
- if let Some(ps) = &self.pubkeys {
- let pubkey_refs =
- HashSet::<_>::from_iter(event.get_pubkey_tags().iter().map(|x| x.to_owned()));
- let filter_refs = HashSet::<_>::from_iter(ps.iter().map(|x| &x[..]));
- let cardinality = pubkey_refs.intersection(&filter_refs).count();
- cardinality > 0
+ // an event match is performed by looking at the ReqFilter events field, and sending a hashset to the event to intersect with.
+ if let Some(ps) = &self.pubkey_tag_set {
+ // if there exists event tags in this filter, find if any intersect.
+ event.generic_tag_val_intersect("p", ps)
} else {
+ // if no event tags were requested in a filter, we do match
true
}
}