commit af453548ee60f060c49902a9fe83ea94566e1d70
parent df251c821c31787ab9e5b0b7118e5799a3484ea9
Author: Greg Heartsfield <scsibug@imap.cc>
Date: Tue, 25 Jan 2022 18:21:43 -0600
feat: allow author and event id prefix search
This is an experimental non-NIP feature that allows a subscription
filter to include a prefix for authors and events.
Diffstat:
M | src/db.rs | | | 229 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- |
M | src/event.rs | | | 28 | +++++++++++----------------- |
M | src/subscription.rs | | | 67 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------- |
3 files changed, 274 insertions(+), 50 deletions(-)
diff --git a/src/db.rs b/src/db.rs
@@ -383,6 +383,80 @@ fn is_hex(s: &str) -> bool {
s.chars().all(|x| char::is_ascii_hexdigit(&x))
}
+/// Check if a string contains only f chars
+fn is_all_fs(s: &str) -> bool {
+ s.chars().all(|x| x == 'f' || x == 'F')
+}
+
+#[derive(PartialEq, Debug, Clone)]
+enum HexSearch {
+ // when no range is needed, exact 32-byte
+ Exact(Vec<u8>),
+ // lower (inclusive) and upper range (exclusive)
+ Range(Vec<u8>, Vec<u8>),
+ // lower bound only, upper bound is MAX inclusive
+ LowerOnly(Vec<u8>),
+}
+
+/// Find the next hex sequence greater than the argument.
+fn hex_range(s: &str) -> Option<HexSearch> {
+ // handle special cases
+ if !is_hex(s) || s.len() > 64 {
+ return None;
+ }
+ if s.len() == 64 {
+ return Some(HexSearch::Exact(hex::decode(s).ok()?));
+ }
+ // if s is odd, add a zero
+ let mut hash_base = s.to_owned();
+ let mut odd = hash_base.len() % 2 != 0;
+ if odd {
+ // extend the string to make it even
+ hash_base.push('0');
+ }
+ let base = hex::decode(hash_base).ok()?;
+ // check for all ff's
+ if is_all_fs(s) {
+ // there is no higher bound, we only want to search for blobs greater than this.
+ return Some(HexSearch::LowerOnly(base));
+ }
+
+ // return a range
+ let mut upper = base.clone();
+ let mut byte_len = upper.len();
+
+ // for odd strings, we made them longer, but we want to increment the upper char (+16).
+ // we know we can do this without overflowing because we explicitly set the bottom half to 0's.
+ while byte_len > 0 {
+ byte_len -= 1;
+ // check if byte can be incremented, or if we need to carry.
+ let b = upper[byte_len];
+ if b == u8::MAX {
+ // reset and carry
+ upper[byte_len] = 0;
+ } else if odd {
+ // check if first char in this byte is NOT 'f'
+ if b < 240 {
+ upper[byte_len] = b + 16; // bump up the first character in this byte
+ // increment done, stop iterating through the vec
+ break;
+ } else {
+ // if it is 'f', reset the byte to 0 and do a carry
+ // reset and carry
+ upper[byte_len] = 0;
+ }
+ // done with odd logic, so don't repeat this
+ odd = false;
+ } else {
+ // bump up the first character in this byte
+ upper[byte_len] = b + 1;
+ // increment done, stop iterating
+ break;
+ }
+ }
+ Some(HexSearch::Range(base, upper))
+}
+
fn repeat_vars(count: usize) -> String {
if count == 0 {
return "".to_owned();
@@ -409,17 +483,33 @@ fn query_from_sub(sub: &Subscription) -> (String, Vec<Box<dyn ToSql>>) {
for f in sub.filters.iter() {
// individual filter components
let mut filter_components: Vec<String> = Vec::new();
- // Query for "authors"
- if f.authors.is_some() {
- let authors_escaped: Vec<String> = f
- .authors
- .as_ref()
- .unwrap()
- .iter()
- .filter(|&x| is_hex(x))
- .map(|x| format!("x'{}'", x))
- .collect();
- let authors_clause = format!("author IN ({})", authors_escaped.join(", "));
+ // Query for "authors", allowing prefix matches
+ if let Some(authvec) = &f.authors {
+ // take each author and convert to a hexsearch
+ let mut auth_searches: Vec<String> = vec![];
+ for auth in authvec {
+ match hex_range(auth) {
+ Some(HexSearch::Exact(ex)) => {
+ info!("Exact match for author");
+ auth_searches.push("author=?".to_owned());
+ params.push(Box::new(ex));
+ }
+ Some(HexSearch::Range(lower, upper)) => {
+ auth_searches.push("(author>? AND author<?)".to_owned());
+ params.push(Box::new(lower));
+ params.push(Box::new(upper));
+ }
+ Some(HexSearch::LowerOnly(lower)) => {
+ // info!("{:?} => lower; {:?} ", auth, hex::encode(lower));
+ auth_searches.push("author>?".to_owned());
+ params.push(Box::new(lower));
+ }
+ None => {
+ info!("Could not parse hex range from {:?}", auth);
+ }
+ }
+ }
+ let authors_clause = format!("({})", auth_searches.join(" OR "));
filter_components.push(authors_clause);
}
// Query for Kind
@@ -429,17 +519,31 @@ fn query_from_sub(sub: &Subscription) -> (String, Vec<Box<dyn ToSql>>) {
let kind_clause = format!("kind IN ({})", str_kinds.join(", "));
filter_components.push(kind_clause);
}
- // Query for event
- if f.ids.is_some() {
- let ids_escaped: Vec<String> = f
- .ids
- .as_ref()
- .unwrap()
- .iter()
- .filter(|&x| is_hex(x))
- .map(|x| format!("x'{}'", x))
- .collect();
- let id_clause = format!("event_hash IN ({})", ids_escaped.join(", "));
+ // Query for event, allowing prefix matches
+ if let Some(idvec) = &f.ids {
+ // take each author and convert to a hexsearch
+ let mut id_searches: Vec<String> = vec![];
+ for id in idvec {
+ match hex_range(id) {
+ Some(HexSearch::Exact(ex)) => {
+ id_searches.push("event_hash=?".to_owned());
+ params.push(Box::new(ex));
+ }
+ Some(HexSearch::Range(lower, upper)) => {
+ id_searches.push("(event_hash>? AND event_hash<?)".to_owned());
+ params.push(Box::new(lower));
+ params.push(Box::new(upper));
+ }
+ Some(HexSearch::LowerOnly(lower)) => {
+ id_searches.push("event_hash>?".to_owned());
+ params.push(Box::new(lower));
+ }
+ None => {
+ info!("Could not parse hex range from {:?}", id);
+ }
+ }
+ }
+ let id_clause = format!("({})", id_searches.join(" OR "));
filter_components.push(id_clause);
}
// Query for tags
@@ -553,3 +657,84 @@ pub async fn db_query(
ok
});
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn hex_range_exact() -> Result<()> {
+ let hex = "abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00";
+ let r = hex_range(hex);
+ assert_eq!(
+ r,
+ Some(HexSearch::Exact(hex::decode(hex).expect("invalid hex")))
+ );
+ Ok(())
+ }
+ #[test]
+ fn hex_full_range() -> Result<()> {
+ //let hex = "abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00abcdef00";
+ let hex = "aaaa";
+ let hex_upper = "aaab";
+ let r = hex_range(hex);
+ assert_eq!(
+ r,
+ Some(HexSearch::Range(
+ hex::decode(hex).expect("invalid hex"),
+ hex::decode(hex_upper).expect("invalid hex")
+ ))
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn hex_full_range_odd() -> Result<()> {
+ let r = hex_range("abc");
+ assert_eq!(
+ r,
+ Some(HexSearch::Range(
+ hex::decode("abc0").expect("invalid hex"),
+ hex::decode("abd0").expect("invalid hex")
+ ))
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn hex_full_range_odd_end_f() -> Result<()> {
+ let r = hex_range("abf");
+ assert_eq!(
+ r,
+ Some(HexSearch::Range(
+ hex::decode("abf0").expect("invalid hex"),
+ hex::decode("ac00").expect("invalid hex")
+ ))
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn hex_no_upper() -> Result<()> {
+ let r = hex_range("ffff");
+ assert_eq!(
+ r,
+ Some(HexSearch::LowerOnly(
+ hex::decode("ffff").expect("invalid hex")
+ ))
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn hex_no_upper_odd() -> Result<()> {
+ let r = hex_range("fff");
+ assert_eq!(
+ r,
+ Some(HexSearch::LowerOnly(
+ hex::decode("fff0").expect("invalid hex")
+ ))
+ );
+ Ok(())
+ }
+}
diff --git a/src/event.rs b/src/event.rs
@@ -145,6 +145,7 @@ impl Event {
return false;
}
// * validate the message digest (sig) using the pubkey & computed sha256 message hash.
+
let sig = schnorr::Signature::from_str(&self.sig).unwrap();
if let Ok(msg) = secp256k1::Message::from_slice(digest.as_ref()) {
let pubkey = XOnlyPublicKey::from_str(&self.pubkey).unwrap();
@@ -193,21 +194,6 @@ impl Event {
serde_json::Value::Array(tags)
}
- /// Generic tag match
- // TODO: is this used anywhere?
- pub fn generic_tag_match(&self, tagname: &str, tagvalue: &str) -> bool {
- match &self.tagidx {
- Some(idx) => {
- // get the set of values for this tag
- match idx.get(tagname) {
- Some(valset) => valset.contains(tagvalue),
- None => false,
- }
- }
- None => false,
- }
- }
-
/// Determine if the given tag and value set intersect with tags in this event.
pub fn generic_tag_val_intersect(&self, tagname: &str, check: &HashSet<String>) -> bool {
match &self.tagidx {
@@ -258,7 +244,8 @@ mod tests {
#[test]
fn empty_event_tag_match() -> Result<()> {
let event = simple_event();
- assert!(!event.event_tag_match("foo"));
+ assert!(!event
+ .generic_tag_val_intersect("e", &HashSet::from(["foo".to_owned(), "bar".to_owned()])));
Ok(())
}
@@ -266,7 +253,14 @@ mod tests {
fn single_event_tag_match() -> Result<()> {
let mut event = simple_event();
event.tags = vec![vec!["e".to_owned(), "foo".to_owned()]];
- assert!(event.event_tag_match("foo"));
+ event.build_index();
+ assert_eq!(
+ event.generic_tag_val_intersect(
+ "e",
+ &HashSet::from(["foo".to_owned(), "bar".to_owned()])
+ ),
+ true
+ );
Ok(())
}
diff --git a/src/subscription.rs b/src/subscription.rs
@@ -160,19 +160,28 @@ impl Subscription {
}
}
+fn prefix_match(prefixes: &[String], target: &str) -> bool {
+ for prefix in prefixes {
+ if target.starts_with(prefix) {
+ return true;
+ }
+ }
+ // none matched
+ false
+}
+
impl ReqFilter {
- /// Check for a match within the authors list.
fn ids_match(&self, event: &Event) -> bool {
self.ids
.as_ref()
- .map(|vs| vs.contains(&event.id.to_owned()))
+ .map(|vs| prefix_match(vs, &event.id))
.unwrap_or(true)
}
fn authors_match(&self, event: &Event) -> bool {
self.authors
.as_ref()
- .map(|vs| vs.contains(&event.pubkey.to_owned()))
+ .map(|vs| prefix_match(vs, &event.pubkey))
.unwrap_or(true)
}
@@ -259,10 +268,46 @@ mod tests {
}
#[test]
- fn interest_id_nomatch() -> Result<()> {
+ fn interest_author_prefix_match() -> Result<()> {
+ // subscription with a filter for ID
+ let s: Subscription = serde_json::from_str(r#"["REQ","xyz",{"authors": ["abc"]}]"#)?;
+ let e = Event {
+ id: "foo".to_owned(),
+ pubkey: "abcd".to_owned(),
+ created_at: 0,
+ kind: 0,
+ tags: Vec::new(),
+ content: "".to_owned(),
+ sig: "".to_owned(),
+ tagidx: None,
+ };
+ assert!(s.interested_in_event(&e));
+ Ok(())
+ }
+
+ #[test]
+ fn interest_id_prefix_match() -> Result<()> {
// subscription with a filter for ID
let s: Subscription = serde_json::from_str(r#"["REQ","xyz",{"ids": ["abc"]}]"#)?;
let e = Event {
+ id: "abcd".to_owned(),
+ pubkey: "".to_owned(),
+ created_at: 0,
+ kind: 0,
+ tags: Vec::new(),
+ content: "".to_owned(),
+ sig: "".to_owned(),
+ tagidx: None,
+ };
+ assert!(s.interested_in_event(&e));
+ Ok(())
+ }
+
+ #[test]
+ fn interest_id_nomatch() -> Result<()> {
+ // subscription with a filter for ID
+ let s: Subscription = serde_json::from_str(r#"["REQ","xyz",{"ids": ["xyz"]}]"#)?;
+ let e = Event {
id: "abcde".to_owned(),
pubkey: "".to_owned(),
created_at: 0,
@@ -272,7 +317,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
- assert_eq!(s.interested_in_event(&e), false);
+ assert!(!s.interested_in_event(&e));
Ok(())
}
@@ -291,7 +336,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
- assert_eq!(s.interested_in_event(&e), false);
+ assert!(!s.interested_in_event(&e));
Ok(())
}
@@ -309,7 +354,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
- assert_eq!(s.interested_in_event(&e), true);
+ assert!(s.interested_in_event(&e));
Ok(())
}
@@ -327,7 +372,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
- assert_eq!(s.interested_in_event(&e), true);
+ assert!(s.interested_in_event(&e));
Ok(())
}
@@ -345,7 +390,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
- assert_eq!(s.interested_in_event(&e), true);
+ assert!(s.interested_in_event(&e));
Ok(())
}
#[test]
@@ -363,7 +408,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
- assert_eq!(s.interested_in_event(&e), true);
+ assert!(s.interested_in_event(&e));
Ok(())
}
@@ -381,7 +426,7 @@ mod tests {
sig: "".to_owned(),
tagidx: None,
};
- assert_eq!(s.interested_in_event(&e), false);
+ assert!(!s.interested_in_event(&e));
Ok(())
}
}