NdbNote.swift (15910B)
1 // 2 // NdbNote.swift 3 // damus 4 // 5 // Created by William Casarin on 2023-07-21. 6 // 7 8 import Foundation 9 import NaturalLanguage 10 import CommonCrypto 11 import secp256k1 12 import secp256k1_implementation 13 import CryptoKit 14 15 let MAX_NOTE_SIZE: Int = 2 << 18 16 17 struct NdbStr { 18 let note: NdbNote 19 let str: UnsafePointer<CChar> 20 } 21 22 struct NdbId { 23 let note: NdbNote 24 let id: Data 25 } 26 27 enum NdbData { 28 case id(NdbId) 29 case str(NdbStr) 30 31 init(note: NdbNote, str: ndb_str) { 32 guard str.flag == NDB_PACKED_ID else { 33 self = .str(NdbStr(note: note, str: str.str)) 34 return 35 } 36 37 let buffer = UnsafeBufferPointer(start: str.id, count: 32) 38 self = .id(NdbId(note: note, id: Data(buffer: buffer))) 39 } 40 } 41 42 class NdbNote: Encodable, Equatable, Hashable { 43 // we can have owned notes, but we can also have lmdb virtual-memory mapped notes so its optional 44 let owned: Bool 45 let count: Int 46 let key: NoteKey? 47 let note: UnsafeMutablePointer<ndb_note> 48 49 // cached stuff (TODO: remove these) 50 var decrypted_content: String? = nil 51 52 private var inner_event: NdbNote? { 53 get { 54 return NdbNote.owned_from_json_cstr(json: content_raw, json_len: content_len) 55 } 56 } 57 58 init(note: UnsafeMutablePointer<ndb_note>, size: Int, owned: Bool, key: NoteKey?) { 59 self.note = note 60 self.owned = owned 61 self.count = size 62 self.key = key 63 64 #if DEBUG_NOTE_SIZE 65 if let owned_size { 66 NdbNote.total_ndb_size += Int(owned_size) 67 NdbNote.notes_created += 1 68 69 print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes") 70 } 71 #endif 72 73 } 74 75 func to_owned() -> NdbNote { 76 if self.owned { 77 return self 78 } 79 80 let buf = malloc(self.count)! 81 memcpy(buf, &self.note.pointee, self.count) 82 let new_note = buf.assumingMemoryBound(to: ndb_note.self) 83 84 return NdbNote(note: new_note, size: self.count, owned: true, key: self.key) 85 } 86 87 var content: String { 88 String(cString: content_raw, encoding: .utf8) ?? "" 89 } 90 91 var content_raw: UnsafePointer<CChar> { 92 ndb_note_content(note) 93 } 94 95 var content_len: UInt32 { 96 ndb_note_content_length(note) 97 } 98 99 /// NDBTODO: make this into data 100 var id: NoteId { 101 .init(Data(bytes: ndb_note_id(note), count: 32)) 102 } 103 104 var sig: Signature { 105 .init(Data(bytes: ndb_note_sig(note), count: 64)) 106 } 107 108 /// NDBTODO: make this into data 109 var pubkey: Pubkey { 110 .init(Data(bytes: ndb_note_pubkey(note), count: 32)) 111 } 112 113 var created_at: UInt32 { 114 ndb_note_created_at(note) 115 } 116 117 var kind: UInt32 { 118 ndb_note_kind(note) 119 } 120 121 var tags: TagsSequence { 122 .init(note: self) 123 } 124 125 deinit { 126 if self.owned { 127 #if DEBUG_NOTE_SIZE 128 NdbNote.total_ndb_size -= Int(count) 129 NdbNote.notes_created -= 1 130 131 print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes") 132 #endif 133 free(note) 134 } 135 } 136 137 static func == (lhs: NdbNote, rhs: NdbNote) -> Bool { 138 return lhs.id == rhs.id 139 } 140 141 func hash(into hasher: inout Hasher) { 142 hasher.combine(id) 143 } 144 145 private enum CodingKeys: String, CodingKey { 146 case id, sig, tags, pubkey, created_at, kind, content 147 } 148 149 // Implement the `Encodable` protocol 150 func encode(to encoder: Encoder) throws { 151 var container = encoder.container(keyedBy: CodingKeys.self) 152 153 try container.encode(hex_encode(id.id), forKey: .id) 154 try container.encode(hex_encode(sig.data), forKey: .sig) 155 try container.encode(pubkey, forKey: .pubkey) 156 try container.encode(created_at, forKey: .created_at) 157 try container.encode(kind, forKey: .kind) 158 try container.encode(content, forKey: .content) 159 try container.encode(tags, forKey: .tags) 160 } 161 162 #if DEBUG_NOTE_SIZE 163 static var total_ndb_size: Int = 0 164 static var notes_created: Int = 0 165 #endif 166 167 init?(content: String, keypair: Keypair, kind: UInt32 = 1, tags: [[String]] = [], createdAt: UInt32 = UInt32(Date().timeIntervalSince1970)) { 168 169 var builder = ndb_builder() 170 let buflen = MAX_NOTE_SIZE 171 let buf = malloc(buflen) 172 173 ndb_builder_init(&builder, buf, Int32(buflen)) 174 175 var pk_raw = keypair.pubkey.bytes 176 177 ndb_builder_set_pubkey(&builder, &pk_raw) 178 ndb_builder_set_kind(&builder, UInt32(kind)) 179 ndb_builder_set_created_at(&builder, UInt64(createdAt)) 180 181 var ok = true 182 for tag in tags { 183 ndb_builder_new_tag(&builder); 184 for elem in tag { 185 ok = elem.withCString({ eptr in 186 return ndb_builder_push_tag_str(&builder, eptr, Int32(elem.utf8.count)) > 0 187 }) 188 if !ok { 189 return nil 190 } 191 } 192 } 193 194 ok = content.withCString { cptr in 195 return ndb_builder_set_content(&builder, cptr, Int32(content.utf8.count)) > 0 196 } 197 if !ok { 198 return nil 199 } 200 201 var n = UnsafeMutablePointer<ndb_note>?(nil) 202 203 204 var the_kp: ndb_keypair? = nil 205 206 if let sec = keypair.privkey { 207 var kp = ndb_keypair() 208 memcpy(&kp.secret.0, sec.id.bytes, 32); 209 210 if ndb_create_keypair(&kp) <= 0 { 211 print("bad keypair") 212 } else { 213 the_kp = kp 214 } 215 } 216 217 var len: Int32 = 0 218 if var the_kp { 219 len = ndb_builder_finalize(&builder, &n, &the_kp) 220 } else { 221 len = ndb_builder_finalize(&builder, &n, nil) 222 } 223 224 if len <= 0 { 225 free(buf) 226 return nil 227 } 228 229 //guard let n else { return nil } 230 231 self.owned = true 232 self.count = Int(len) 233 //self.note = n 234 let r = realloc(buf, Int(len)) 235 guard let r else { 236 free(buf) 237 return nil 238 } 239 240 self.note = r.assumingMemoryBound(to: ndb_note.self) 241 self.key = nil 242 } 243 244 static func owned_from_json(json: String, bufsize: Int = 2 << 18) -> NdbNote? { 245 return json.withCString { cstr in 246 return NdbNote.owned_from_json_cstr( 247 json: cstr, json_len: UInt32(json.utf8.count), bufsize: bufsize) 248 } 249 } 250 251 static func owned_from_json_cstr(json: UnsafePointer<CChar>, json_len: UInt32, bufsize: Int = 2 << 18) -> NdbNote? { 252 let data = malloc(bufsize) 253 //guard var json_cstr = json.cString(using: .utf8) else { return nil } 254 255 //json_cs 256 var note: UnsafeMutablePointer<ndb_note>? 257 258 let len = ndb_note_from_json(json, Int32(json_len), ¬e, data, Int32(bufsize)) 259 260 if len == 0 { 261 free(data) 262 return nil 263 } 264 265 // Create new Data with just the valid bytes 266 guard let note_data = realloc(data, Int(len)) else { return nil } 267 let new_note = note_data.assumingMemoryBound(to: ndb_note.self) 268 269 return NdbNote(note: new_note, size: Int(len), owned: true, key: nil) 270 } 271 272 func get_inner_event() -> NdbNote? { 273 return self.inner_event 274 } 275 } 276 277 // Extension to make NdbNote compatible with NostrEvent's original API 278 extension NdbNote { 279 var is_textlike: Bool { 280 return kind == 1 || kind == 42 || kind == 30023 || kind == 9802 281 } 282 283 var is_quote_repost: NoteId? { 284 guard kind == 1, let quoted_note_id = referenced_quote_ids.first else { 285 return nil 286 } 287 return quoted_note_id.note_id 288 } 289 290 var known_kind: NostrKind? { 291 return NostrKind.init(rawValue: kind) 292 } 293 294 var too_big: Bool { 295 return known_kind != .longform && self.content_len > 16000 296 } 297 298 var should_show_event: Bool { 299 return !too_big 300 } 301 302 func get_blocks(keypair: Keypair) -> Blocks { 303 return parse_note_content(content: .init(note: self, keypair: keypair)) 304 } 305 306 // TODO: References iterator 307 public var referenced_ids: References<NoteId> { 308 References<NoteId>(tags: self.tags) 309 } 310 311 public var referenced_quote_ids: References<QuoteId> { 312 References<QuoteId>(tags: self.tags) 313 } 314 315 public var referenced_noterefs: References<NoteRef> { 316 References<NoteRef>(tags: self.tags) 317 } 318 319 public var referenced_follows: References<FollowRef> { 320 References<FollowRef>(tags: self.tags) 321 } 322 323 public var referenced_pubkeys: References<Pubkey> { 324 References<Pubkey>(tags: self.tags) 325 } 326 327 public var referenced_hashtags: References<Hashtag> { 328 References<Hashtag>(tags: self.tags) 329 } 330 331 public var referenced_params: References<ReplaceableParam> { 332 References<ReplaceableParam>(tags: self.tags) 333 } 334 335 public var referenced_mute_items: References<MuteItem> { 336 References<MuteItem>(tags: self.tags) 337 } 338 339 public var referenced_comment_items: References<CommentItem> { 340 References<CommentItem>(tags: self.tags) 341 } 342 343 public var references: References<RefId> { 344 References<RefId>(tags: self.tags) 345 } 346 347 func thread_reply() -> ThreadReply? { 348 if self.known_kind != .highlight { 349 return ThreadReply(tags: self.tags) 350 } 351 return nil 352 } 353 354 func highlighted_note_id() -> NoteId? { 355 return ThreadReply(tags: self.tags)?.reply.note_id 356 } 357 358 func get_content(_ keypair: Keypair) -> String { 359 if known_kind == .dm { 360 return decrypted(keypair: keypair) ?? "*failed to decrypt content*" 361 } 362 else if known_kind == .highlight { 363 return self.referenced_comment_items.first?.content ?? "" 364 } 365 366 return content 367 } 368 369 func maybe_get_content(_ keypair: Keypair) -> String? { 370 if known_kind == .dm { 371 return decrypted(keypair: keypair) 372 } 373 374 return content 375 } 376 377 func blocks(_ keypair: Keypair) -> Blocks { 378 return get_blocks(keypair: keypair) 379 } 380 381 // NDBTODO: switch this to operating on bytes not strings 382 func decrypted(keypair: Keypair) -> String? { 383 if let decrypted_content { 384 return decrypted_content 385 } 386 387 let our_pubkey = keypair.pubkey 388 389 // NDBTODO: don't hex encode 390 var pubkey = self.pubkey 391 // This is our DM, we need to use the pubkey of the person we're talking to instead 392 393 if our_pubkey == pubkey, let pk = self.referenced_pubkeys.first { 394 pubkey = pk 395 } 396 397 // NDBTODO: pass data to pubkey 398 let dec = decrypt_dm(keypair.privkey, pubkey: pubkey, content: self.content, encoding: .base64) 399 self.decrypted_content = dec 400 401 return dec 402 } 403 404 public func direct_replies() -> NoteId? { 405 return thread_reply()?.reply.note_id 406 } 407 408 // NDBTODO: just use Id 409 public func thread_id() -> NoteId { 410 guard let root = self.thread_reply()?.root else { 411 return self.id 412 } 413 414 return root.note_id 415 } 416 417 public func last_refid() -> NoteId? { 418 return self.referenced_ids.last 419 } 420 421 // NDBTODO: id -> data 422 /* 423 public func references(id: String, key: AsciiCharacter) -> Bool { 424 var matcher: (Reference) -> Bool = { ref in ref.ref_id.matches_str(id) } 425 if id.count == 64, let decoded = hex_decode(id) { 426 matcher = { ref in ref.ref_id.matches_id(decoded) } 427 } 428 for ref in References(tags: self.tags) { 429 if ref.key == key && matcher(ref) { 430 return true 431 } 432 } 433 434 return false 435 } 436 */ 437 438 func is_reply() -> Bool { 439 return thread_reply() != nil 440 } 441 442 func note_language(_ keypair: Keypair) -> String? { 443 assert(!Thread.isMainThread, "This function must not be run on the main thread.") 444 445 // Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in 446 // and filter on only the text portions of the content as URLs and hashtags confuse the language recognizer. 447 let originalBlocks = self.blocks(keypair).blocks 448 let originalOnlyText = originalBlocks.compactMap { 449 if case .text(let txt) = $0 { 450 // Replacing right single quotation marks (’) with "typewriter or ASCII apostrophes" (') 451 // as a workaround to get Apple's language recognizer to predict language the correctly. 452 // It is important to add this workaround to get the language right because it wastes users' money to send translation requests. 453 // Until Apple fixes their language model, this workaround will be kept in place. 454 // See https://en.wikipedia.org/wiki/Apostrophe#Unicode for an explanation of the differences between the two characters. 455 // 456 // For example, 457 // "nevent1qqs0wsknetaju06xk39cv8sttd064amkykqalvfue7ydtg3p0lyfksqzyrhxagf6h8l9cjngatumrg60uq22v66qz979pm32v985ek54ndh8gj42wtp" 458 // has the note content "It’s a meme". 459 // Without the character replacement, it is 61% confident that the text is in Turkish (tr) and 8% confident that the text is in English (en), 460 // which is a wildly incorrect hypothesis. 461 // With the character replacement, it is 65% confident that the text is in English (en) and 24% confident that the text is in Turkish (tr), which is more accurate. 462 // 463 // Similarly, 464 // "nevent1qqspjqlln6wvxrqg6kzl2p7gk0rgr5stc7zz5sstl34cxlw55gvtylgpp4mhxue69uhkummn9ekx7mqpr4mhxue69uhkummnw3ez6ur4vgh8wetvd3hhyer9wghxuet5qy28wumn8ghj7un9d3shjtnwdaehgu3wvfnsygpx6655ve67vqlcme9ld7ww73pqx7msclhwzu8lqmkhvuluxnyc7yhf3xut" 465 // has the note content "You’re funner". 466 // Without the character replacement, it is 52% confident that the text is in Norwegian Bokmål (nb) and 41% confident that the text is in English (en). 467 // With the character replacement, it is 93% confident that the text is in English (en) and 4% confident that the text is in Norwegian Bokmål (nb). 468 return txt.replacingOccurrences(of: "’", with: "'") 469 } 470 else { 471 return nil 472 } 473 } 474 .joined(separator: " ") 475 476 // If there is no text, there's nothing to use to detect language. 477 guard !originalOnlyText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { 478 return nil 479 } 480 481 let languageRecognizer = NLLanguageRecognizer() 482 languageRecognizer.processString(originalOnlyText) 483 484 // Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate. 485 guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else { 486 return nil 487 } 488 489 // Remove the variant component and just take the language part as translation services typically only supports the variant-less language. 490 // Moreover, speakers of one variant can generally understand other variants. 491 return localeToLanguage(locale) 492 } 493 494 var age: TimeInterval { 495 let event_date = Date(timeIntervalSince1970: TimeInterval(created_at)) 496 return Date.now.timeIntervalSince(event_date) 497 } 498 } 499 500 func hex_encode(_ data: Data) -> String { 501 var str = "" 502 for c in data { 503 let c1 = hexchar(c >> 4) 504 let c2 = hexchar(c & 0xF) 505 506 str.append(Character(Unicode.Scalar(c1))) 507 str.append(Character(Unicode.Scalar(c2))) 508 } 509 return str 510 }