notedeck

One damus client to rule them all
git clone git://jb55.com/notedeck
Log | Files | Refs | README | LICENSE

commit 6647e7dc3f1413aad52348a462a276aff3995bb9
parent ba8ac18de76e173704e433bf26d59e3f256a57ab
Author: William Casarin <jb55@jb55.com>
Date:   Tue, 14 Jan 2025 10:10:45 -0800

Merge Hashtag parsing

Daniel Saxton (5):
      Add t tags for hashtags
      Use HashSet, lowercase, and add emoji tests
      Add test and format
      Fix emoji hashtags
      Handle punctuation better

Link: https://github.com/damus-io/notedeck/pull/592

Diffstat:
Mcrates/notedeck_columns/src/post.rs | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 63 insertions(+), 8 deletions(-)

diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs @@ -20,12 +20,15 @@ impl NewPost { } pub fn to_note(&self, seckey: &[u8; 32]) -> Note { - add_client_tag(NoteBuilder::new()) + let mut builder = add_client_tag(NoteBuilder::new()) .kind(1) - .content(&self.content) - .sign(seckey) - .build() - .expect("note should be ok") + .content(&self.content); + + for hashtag in Self::extract_hashtags(&self.content) { + builder = builder.start_tag().tag_str("t").tag_str(&hashtag); + } + + builder.sign(seckey).build().expect("note should be ok") } pub fn to_reply(&self, seckey: &[u8; 32], replying_to: &Note) -> Note { @@ -106,9 +109,13 @@ impl NewPost { enostr::NoteId::new(*quoting.id()).to_bech().unwrap() ); - NoteBuilder::new() - .kind(1) - .content(&new_content) + let mut builder = NoteBuilder::new().kind(1).content(&new_content); + + for hashtag in Self::extract_hashtags(&self.content) { + builder = builder.start_tag().tag_str("t").tag_str(&hashtag); + } + + builder .start_tag() .tag_str("q") .tag_str(&hex::encode(quoting.id())) @@ -119,4 +126,52 @@ impl NewPost { .build() .expect("expected build to work") } + + fn extract_hashtags(content: &str) -> HashSet<String> { + let mut hashtags = HashSet::new(); + for word in + content.split(|c: char| c.is_whitespace() || (c.is_ascii_punctuation() && c != '#')) + { + if word.starts_with('#') && word.len() > 1 { + let tag = word[1..].to_lowercase(); + if !tag.is_empty() { + hashtags.insert(tag); + } + } + } + hashtags + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_hashtags() { + let test_cases = vec![ + ("Hello #world", vec!["world"]), + ("Multiple #tags #in #one post", vec!["tags", "in", "one"]), + ("No hashtags here", vec![]), + ("#tag1 with #tag2!", vec!["tag1", "tag2"]), + ("Ignore # empty", vec![]), + ("Testing emoji #🍌banana", vec!["🍌banana"]), + ("Testing emoji #🍌", vec!["🍌"]), + ("Duplicate #tag #tag #tag", vec!["tag"]), + ("Mixed case #TaG #tag #TAG", vec!["tag"]), + ( + "#tag1, #tag2, #tag3 with commas", + vec!["tag1", "tag2", "tag3"], + ), + ("Separated by commas #tag1,#tag2", vec!["tag1", "tag2"]), + ("Separated by periods #tag1.#tag2", vec!["tag1", "tag2"]), + ("Separated by semicolons #tag1;#tag2", vec!["tag1", "tag2"]), + ]; + + for (input, expected) in test_cases { + let result = NewPost::extract_hashtags(input); + let expected: HashSet<String> = expected.into_iter().map(String::from).collect(); + assert_eq!(result, expected, "Failed for input: {}", input); + } + } }