Merge Hashtag parsing - notedeck - One damus client to rule them all

commit 6647e7dc3f1413aad52348a462a276aff3995bb9
parent ba8ac18de76e173704e433bf26d59e3f256a57ab
Author: William Casarin <jb55@jb55.com>
Date:   Tue, 14 Jan 2025 10:10:45 -0800

Merge Hashtag parsing

Daniel Saxton (5):
      Add t tags for hashtags
      Use HashSet, lowercase, and add emoji tests
      Add test and format
      Fix emoji hashtags
      Handle punctuation better

Link: https://github.com/damus-io/notedeck/pull/592

Diffstat:
M crates/notedeck_columns/src/post.rs  | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------

1 file changed, 63 insertions(+), 8 deletions(-)
diff --git a/crates/notedeck_columns/src/post.rs b/crates/notedeck_columns/src/post.rs
@@ -20,12 +20,15 @@ impl NewPost {
     }
 
     pub fn to_note(&self, seckey: &[u8; 32]) -> Note {
-        add_client_tag(NoteBuilder::new())
+        let mut builder = add_client_tag(NoteBuilder::new())
             .kind(1)
-            .content(&self.content)
-            .sign(seckey)
-            .build()
-            .expect("note should be ok")
+            .content(&self.content);
+
+        for hashtag in Self::extract_hashtags(&self.content) {
+            builder = builder.start_tag().tag_str("t").tag_str(&hashtag);
+        }
+
+        builder.sign(seckey).build().expect("note should be ok")
     }
 
     pub fn to_reply(&self, seckey: &[u8; 32], replying_to: &Note) -> Note {
@@ -106,9 +109,13 @@ impl NewPost {
             enostr::NoteId::new(*quoting.id()).to_bech().unwrap()
         );
 
-        NoteBuilder::new()
-            .kind(1)
-            .content(&new_content)
+        let mut builder = NoteBuilder::new().kind(1).content(&new_content);
+
+        for hashtag in Self::extract_hashtags(&self.content) {
+            builder = builder.start_tag().tag_str("t").tag_str(&hashtag);
+        }
+
+        builder
             .start_tag()
             .tag_str("q")
             .tag_str(&hex::encode(quoting.id()))
@@ -119,4 +126,52 @@ impl NewPost {
             .build()
             .expect("expected build to work")
     }
+
+    fn extract_hashtags(content: &str) -> HashSet<String> {
+        let mut hashtags = HashSet::new();
+        for word in
+            content.split(|c: char| c.is_whitespace() || (c.is_ascii_punctuation() && c != '#'))
+        {
+            if word.starts_with('#') && word.len() > 1 {
+                let tag = word[1..].to_lowercase();
+                if !tag.is_empty() {
+                    hashtags.insert(tag);
+                }
+            }
+        }
+        hashtags
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_extract_hashtags() {
+        let test_cases = vec![
+            ("Hello #world", vec!["world"]),
+            ("Multiple #tags #in #one post", vec!["tags", "in", "one"]),
+            ("No hashtags here", vec![]),
+            ("#tag1 with #tag2!", vec!["tag1", "tag2"]),
+            ("Ignore # empty", vec![]),
+            ("Testing emoji #🍌banana", vec!["🍌banana"]),
+            ("Testing emoji #🍌", vec!["🍌"]),
+            ("Duplicate #tag #tag #tag", vec!["tag"]),
+            ("Mixed case #TaG #tag #TAG", vec!["tag"]),
+            (
+                "#tag1, #tag2, #tag3 with commas",
+                vec!["tag1", "tag2", "tag3"],
+            ),
+            ("Separated by commas #tag1,#tag2", vec!["tag1", "tag2"]),
+            ("Separated by periods #tag1.#tag2", vec!["tag1", "tag2"]),
+            ("Separated by semicolons #tag1;#tag2", vec!["tag1", "tag2"]),
+        ];
+
+        for (input, expected) in test_cases {
+            let result = NewPost::extract_hashtags(input);
+            let expected: HashSet<String> = expected.into_iter().map(String::from).collect();
+            assert_eq!(result, expected, "Failed for input: {}", input);
+        }
+    }
 }

	notedeck One damus client to rule them all
	git clone git://jb55.com/notedeck
	Log \| Files \| Refs \| README \| LICENSE