commit dc21b6139c21a3115ad7692b07068d5af8484fe5
parent aae97c5cb7a085f703099a2c5998421ee299eb6a
Author: cr0bar <cr0bar@cr0.bar>
Date: Tue, 11 Jul 2023 07:21:56 -0700
Add support for multilingual hashtags
Changelog-Added: Add support for multilingual hashtags
Reviewed-by: William Casarin <jb55@jb55.com>
Closes: https://github.com/damus-io/damus/issues/949
Diffstat:
6 files changed, 92 insertions(+), 45 deletions(-)
diff --git a/damus-c/cursor.h b/damus-c/cursor.h
@@ -432,7 +432,7 @@ static inline int is_whitespace(char c) {
}
static inline int is_boundary(char c) {
- return !isalnum(c);
+ return is_whitespace(c) || ispunct(c);
}
static inline int is_invalid_url_ending(char c) {
diff --git a/damus.xcodeproj/project.pbxproj b/damus.xcodeproj/project.pbxproj
@@ -53,6 +53,7 @@
4C198DF229F88C6B004C165C /* BlurHashDecode.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C198DEE29F88C6B004C165C /* BlurHashDecode.swift */; };
4C198DF529F88D2E004C165C /* ImageMetadata.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C198DF429F88D2E004C165C /* ImageMetadata.swift */; };
4C19AE512A5CEF7C00C90DB7 /* NostrScript.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C19AE4C2A5CEF7C00C90DB7 /* NostrScript.swift */; };
+ 4C19AE552A5D977400C90DB7 /* HashtagTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C19AE542A5D977400C90DB7 /* HashtagTests.swift */; };
4C1A9A1A29DCA17E00516EAC /* ReplyCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C1A9A1929DCA17E00516EAC /* ReplyCounter.swift */; };
4C1A9A1D29DDCF9B00516EAC /* NotificationSettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C1A9A1C29DDCF9B00516EAC /* NotificationSettingsView.swift */; };
4C1A9A1F29DDD24B00516EAC /* AppearanceSettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C1A9A1E29DDD24B00516EAC /* AppearanceSettingsView.swift */; };
@@ -495,6 +496,7 @@
4C19AE4B2A5CEF7C00C90DB7 /* primal.ts */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.typescript; path = primal.ts; sourceTree = "<group>"; };
4C19AE4C2A5CEF7C00C90DB7 /* NostrScript.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = NostrScript.swift; sourceTree = "<group>"; };
4C19AE502A5CEF7C00C90DB7 /* nostr.ts */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.typescript; path = nostr.ts; sourceTree = "<group>"; };
+ 4C19AE542A5D977400C90DB7 /* HashtagTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HashtagTests.swift; sourceTree = "<group>"; };
4C1A9A1929DCA17E00516EAC /* ReplyCounter.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReplyCounter.swift; sourceTree = "<group>"; };
4C1A9A1C29DDCF9B00516EAC /* NotificationSettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = NotificationSettingsView.swift; sourceTree = "<group>"; };
4C1A9A1E29DDD24B00516EAC /* AppearanceSettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppearanceSettingsView.swift; sourceTree = "<group>"; };
@@ -1510,6 +1512,7 @@
3A5E47C62A4A76C800C0D090 /* TrieTests.swift */,
3A90B1822A4EA3C600000D94 /* UserSearchCacheTests.swift */,
4C4F14A62A2A61A30045A0B9 /* NostrScriptTests.swift */,
+ 4C19AE542A5D977400C90DB7 /* HashtagTests.swift */,
);
path = damusTests;
sourceTree = "<group>";
@@ -2114,6 +2117,7 @@
files = (
5019CADD2A0FB0A9000069E1 /* ProfileDatabaseTests.swift in Sources */,
3A90B1832A4EA3C600000D94 /* UserSearchCacheTests.swift in Sources */,
+ 4C19AE552A5D977400C90DB7 /* HashtagTests.swift in Sources */,
3A3040ED29A5CB86008A0F29 /* ReplyDescriptionTests.swift in Sources */,
4C8D00D429E3C5D40036AF10 /* NIP19Tests.swift in Sources */,
3A30410129AB12AA008A0F29 /* EventGroupViewTests.swift in Sources */,
@@ -2405,7 +2409,7 @@
CLANG_ENABLE_MODULES = YES;
CODE_SIGN_ENTITLEMENTS = damus/damus.entitlements;
CODE_SIGN_STYLE = Automatic;
- CURRENT_PROJECT_VERSION = 1;
+ CURRENT_PROJECT_VERSION = 2;
DEVELOPMENT_ASSET_PATHS = "\"damus/Preview Content\"";
DEVELOPMENT_TEAM = XK7H4JAB3D;
ENABLE_PREVIEWS = YES;
@@ -2454,7 +2458,7 @@
CLANG_ENABLE_MODULES = YES;
CODE_SIGN_ENTITLEMENTS = damus/damus.entitlements;
CODE_SIGN_STYLE = Automatic;
- CURRENT_PROJECT_VERSION = 1;
+ CURRENT_PROJECT_VERSION = 2;
DEVELOPMENT_ASSET_PATHS = "\"damus/Preview Content\"";
DEVELOPMENT_TEAM = XK7H4JAB3D;
ENABLE_PREVIEWS = YES;
diff --git a/damus/Models/Mentions.swift b/damus/Models/Mentions.swift
@@ -415,7 +415,7 @@ func parse_while(_ p: Parser, match: (Character) -> Bool) -> String? {
}
func is_hashtag_char(_ c: Character) -> Bool {
- return c.isLetter || c.isNumber
+ return (c.isLetter || c.isNumber || c.isASCII) && (!c.isPunctuation && !c.isWhitespace)
}
func prev_char(_ p: Parser, n: Int) -> Character? {
diff --git a/damus/Nostr/NostrLink.swift b/damus/Nostr/NostrLink.swift
@@ -116,8 +116,12 @@ func decode_nostr_uri(_ s: String) -> NostrLink? {
return decode_universal_link(s)
}
- var uri = s.replacingOccurrences(of: "nostr://", with: "")
+ var uri = s
+ uri = uri.replacingOccurrences(of: "nostr://", with: "")
uri = uri.replacingOccurrences(of: "nostr:", with: "")
+
+ // Fix for non-latin characters resulting in second colon being encoded
+ uri = uri.replacingOccurrences(of: "damus:t%3A", with: "t:")
uri = uri.replacingOccurrences(of: "damus://", with: "")
uri = uri.replacingOccurrences(of: "damus:", with: "")
diff --git a/damusTests/HashtagTests.swift b/damusTests/HashtagTests.swift
@@ -0,0 +1,79 @@
+//
+// HashtagTests.swift
+// damusTests
+//
+// Created by William Casarin on 2023-07-11.
+//
+
+import XCTest
+@testable import damus
+
+final class HashtagTests: XCTestCase {
+
+ override func setUpWithError() throws {
+ // Put setup code here. This method is called before the invocation of each test method in the class.
+ }
+
+ override func tearDownWithError() throws {
+ // Put teardown code here. This method is called after the invocation of each test method in the class.
+ }
+
+ func testParseHashtag() {
+ let parsed = parse_mentions(content: "some hashtag #bitcoin derp", tags: []).blocks
+
+ XCTAssertNotNil(parsed)
+ XCTAssertEqual(parsed.count, 3)
+ XCTAssertEqual(parsed[0].is_text, "some hashtag ")
+ XCTAssertEqual(parsed[1].is_hashtag, "bitcoin")
+ XCTAssertEqual(parsed[2].is_text, " derp")
+ }
+
+ func testHashtagWithComma() {
+ let parsed = parse_mentions(content: "some hashtag #bitcoin, cool", tags: []).blocks
+
+ XCTAssertNotNil(parsed)
+ XCTAssertEqual(parsed.count, 3)
+ XCTAssertEqual(parsed[0].is_text, "some hashtag ")
+ XCTAssertEqual(parsed[1].is_hashtag, "bitcoin")
+ XCTAssertEqual(parsed[2].is_text, ", cool")
+ }
+
+ func testHashtagWithEmoji() {
+ let parsed = parse_mentions(content: "some hashtag #bitcoin☕️ cool", tags: []).blocks
+
+ XCTAssertNotNil(parsed)
+ XCTAssertEqual(parsed.count, 3)
+ XCTAssertEqual(parsed[0].is_text, "some hashtag ")
+ XCTAssertEqual(parsed[1].is_hashtag, "bitcoin☕️")
+ XCTAssertEqual(parsed[2].is_text, " cool")
+ }
+
+ func testHashtagWithAccents() {
+ let parsed = parse_mentions(content: "hello from #türkiye", tags: []).blocks
+
+ XCTAssertNotNil(parsed)
+ XCTAssertEqual(parsed.count, 2)
+ XCTAssertEqual(parsed[0].is_text, "hello from ")
+ XCTAssertEqual(parsed[1].is_hashtag, "türkiye")
+ }
+
+ func testHashtagWithNonLatinCharacters() {
+ let parsed = parse_mentions(content: "this is a #시험 hope it works", tags: []).blocks
+
+ XCTAssertNotNil(parsed)
+ XCTAssertEqual(parsed.count, 3)
+ XCTAssertEqual(parsed[0].is_text, "this is a ")
+ XCTAssertEqual(parsed[1].is_hashtag, "시험")
+ XCTAssertEqual(parsed[2].is_text, " hope it works")
+ }
+
+ func testParseHashtagEnd() {
+ let parsed = parse_mentions(content: "some hashtag #bitcoin", tags: []).blocks
+
+ XCTAssertNotNil(parsed)
+ XCTAssertEqual(parsed.count, 2)
+ XCTAssertEqual(parsed[0].is_text, "some hashtag ")
+ XCTAssertEqual(parsed[1].is_hashtag, "bitcoin")
+ }
+
+}
diff --git a/damusTests/damusTests.swift b/damusTests/damusTests.swift
@@ -176,46 +176,6 @@ class damusTests: XCTestCase {
XCTAssertEqual(ev.tags[1][1], "bitcoin")
}
-
- func testParseHashtag() {
- let parsed = parse_mentions(content: "some hashtag #bitcoin derp", tags: []).blocks
-
- XCTAssertNotNil(parsed)
- XCTAssertEqual(parsed.count, 3)
- XCTAssertEqual(parsed[0].is_text, "some hashtag ")
- XCTAssertEqual(parsed[1].is_hashtag, "bitcoin")
- XCTAssertEqual(parsed[2].is_text, " derp")
- }
-
- func testHashtagWithComma() {
- let parsed = parse_mentions(content: "some hashtag #bitcoin, cool", tags: []).blocks
-
- XCTAssertNotNil(parsed)
- XCTAssertEqual(parsed.count, 3)
- XCTAssertEqual(parsed[0].is_text, "some hashtag ")
- XCTAssertEqual(parsed[1].is_hashtag, "bitcoin")
- XCTAssertEqual(parsed[2].is_text, ", cool")
- }
-
- func testHashtagWithEmoji() {
- let parsed = parse_mentions(content: "some hashtag #bitcoin☕️ cool", tags: []).blocks
-
- XCTAssertNotNil(parsed)
- XCTAssertEqual(parsed.count, 3)
- XCTAssertEqual(parsed[0].is_text, "some hashtag ")
- XCTAssertEqual(parsed[1].is_hashtag, "bitcoin")
- XCTAssertEqual(parsed[2].is_text, "☕️ cool")
- }
-
- func testParseHashtagEnd() {
- let parsed = parse_mentions(content: "some hashtag #bitcoin", tags: []).blocks
-
- XCTAssertNotNil(parsed)
- XCTAssertEqual(parsed.count, 2)
- XCTAssertEqual(parsed[0].is_text, "some hashtag ")
- XCTAssertEqual(parsed[1].is_hashtag, "bitcoin")
- }
-
func testParseMentionOnlyText() {
let parsed = parse_mentions(content: "there is no mention here", tags: [["e", "event_id"]]).blocks