NdbNote.swift (22067B)
1 // 2 // NdbNote.swift 3 // damus 4 // 5 // Created by William Casarin on 2023-07-21. 6 // 7 8 import Foundation 9 import NaturalLanguage 10 import CommonCrypto 11 import secp256k1 12 import secp256k1_implementation 13 import CryptoKit 14 15 let MAX_NOTE_SIZE: Int = 2 << 18 16 17 // Default threshold of the hellthread pubkey tag count setting if it is not set. 18 let DEFAULT_HELLTHREAD_MAX_PUBKEYS: Int = 10 19 20 struct NdbStr { 21 let note: NdbNote 22 let str: UnsafePointer<CChar> 23 } 24 25 struct NdbId { 26 let note: NdbNote 27 let id: Data 28 } 29 30 enum NdbData { 31 case id(NdbId) 32 case str(NdbStr) 33 34 init(note: NdbNote, str: ndb_str) { 35 guard str.flag == NDB_PACKED_ID else { 36 self = .str(NdbStr(note: note, str: str.str)) 37 return 38 } 39 40 let buffer = UnsafeBufferPointer(start: str.id, count: 32) 41 self = .id(NdbId(note: note, id: Data(buffer: buffer))) 42 } 43 } 44 45 class NdbNote: Codable, Equatable, Hashable { 46 // we can have owned notes, but we can also have lmdb virtual-memory mapped notes so its optional 47 private(set) var owned: Bool 48 let count: Int 49 let key: NoteKey? 50 let note: ndb_note_ptr 51 52 // cached stuff (TODO: remove these) 53 var decrypted_content: String? = nil 54 55 private var inner_event: NdbNote? { 56 get { 57 return NdbNote.owned_from_json_cstr(json: content_raw, json_len: content_len) 58 } 59 } 60 61 init(note: ndb_note_ptr, size: Int, owned: Bool, key: NoteKey?) { 62 self.note = note 63 self.owned = owned 64 self.count = size 65 self.key = key 66 67 #if DEBUG_NOTE_SIZE 68 if let owned_size { 69 NdbNote.total_ndb_size += Int(owned_size) 70 NdbNote.notes_created += 1 71 72 print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes") 73 } 74 #endif 75 } 76 77 func to_owned() -> NdbNote { 78 if self.owned { 79 return self 80 } 81 82 let buf = malloc(self.count)! 83 memcpy(buf, UnsafeRawPointer(self.note.ptr), self.count) 84 85 let new_note = ndb_note_ptr(ptr: OpaquePointer(buf)) 86 return NdbNote(note: new_note, size: self.count, owned: true, key: self.key) 87 } 88 89 func mark_ownership_moved() { 90 self.owned = false 91 } 92 93 var content: String { 94 String(cString: content_raw, encoding: .utf8) ?? "" 95 } 96 97 var content_raw: UnsafePointer<CChar> { 98 ndb_note_content(note.ptr) 99 } 100 101 var content_len: UInt32 { 102 ndb_note_content_length(note.ptr) 103 } 104 105 /// NDBTODO: make this into data 106 var id: NoteId { 107 .init(Data(bytes: ndb_note_id(note.ptr), count: 32)) 108 } 109 110 var raw_note_id: UnsafeMutablePointer<UInt8> { 111 ndb_note_id(note.ptr) 112 } 113 114 func id_matches(other: NdbNote) -> Bool { 115 memcmp(self.raw_note_id, other.raw_note_id, 32) == 0 116 } 117 118 var sig: Signature { 119 .init(Data(bytes: ndb_note_sig(note.ptr), count: 64)) 120 } 121 122 /// NDBTODO: make this into data 123 var pubkey: Pubkey { 124 .init(Data(bytes: ndb_note_pubkey(note.ptr), count: 32)) 125 } 126 127 var created_at: UInt32 { 128 ndb_note_created_at(note.ptr) 129 } 130 131 var kind: UInt32 { 132 ndb_note_kind(note.ptr) 133 } 134 135 var tags: TagsSequence { 136 .init(note: self) 137 } 138 139 deinit { 140 if self.owned { 141 #if DEBUG_NOTE_SIZE 142 NdbNote.total_ndb_size -= Int(count) 143 NdbNote.notes_created -= 1 144 145 print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes") 146 #endif 147 free(UnsafeMutableRawPointer(note.ptr)) 148 } 149 } 150 151 static func == (lhs: NdbNote, rhs: NdbNote) -> Bool { 152 return lhs.id == rhs.id 153 } 154 155 func hash(into hasher: inout Hasher) { 156 hasher.combine(id) 157 } 158 159 private enum CodingKeys: String, CodingKey { 160 case id, sig, tags, pubkey, created_at, kind, content 161 } 162 163 // Implement the `Encodable` protocol 164 func encode(to encoder: Encoder) throws { 165 var container = encoder.container(keyedBy: CodingKeys.self) 166 167 try container.encode(hex_encode(id.id), forKey: .id) 168 try container.encode(hex_encode(sig.data), forKey: .sig) 169 try container.encode(pubkey, forKey: .pubkey) 170 try container.encode(created_at, forKey: .created_at) 171 try container.encode(kind, forKey: .kind) 172 try container.encode(content, forKey: .content) 173 try container.encode(tags, forKey: .tags) 174 } 175 176 required init(from decoder: any Decoder) throws { 177 let container = try decoder.container(keyedBy: CodingKeys.self) 178 179 let content = try container.decode(String.self, forKey: .content) 180 let pubkey = try container.decode(Pubkey.self, forKey: .pubkey) 181 let kind = try container.decode(UInt32.self, forKey: .kind) 182 let tags = try container.decode([[String]].self, forKey: .tags) 183 let createdAt = try container.decode(UInt32.self, forKey: .created_at) 184 let noteId = try container.decode(NoteId.self, forKey: .id) 185 let signature = try container.decode(Signature.self, forKey: .sig) 186 187 guard let note = NdbNote.init(content: content, author: pubkey, kind: kind, tags: tags, createdAt: createdAt, id: noteId, sig: signature) else { 188 throw DecodingError.initializationFailed 189 } 190 191 self.note = note.note 192 self.owned = note.owned 193 note.mark_ownership_moved() // This is done to prevent a double-free error when both `self` and `note` get deinitialized. 194 self.count = note.count 195 self.key = note.key 196 197 } 198 199 enum DecodingError: Error { 200 case initializationFailed 201 } 202 203 #if DEBUG_NOTE_SIZE 204 static var total_ndb_size: Int = 0 205 static var notes_created: Int = 0 206 #endif 207 208 fileprivate enum NoteConstructionMaterial { 209 case keypair(Keypair) 210 case manual(Pubkey, Signature, NoteId) 211 212 var pubkey: Pubkey { 213 switch self { 214 case .keypair(let keypair): 215 return keypair.pubkey 216 case .manual(let pubkey, _, _): 217 return pubkey 218 } 219 } 220 221 var privkey: Privkey? { 222 switch self { 223 case .keypair(let kp): 224 return kp.privkey 225 case .manual(_, _, _): 226 return nil 227 } 228 } 229 } 230 231 fileprivate init?(content: String, noteConstructionMaterial: NoteConstructionMaterial, kind: UInt32 = 1, tags: [[String]] = [], createdAt: UInt32 = UInt32(Date().timeIntervalSince1970)) { 232 233 var builder = ndb_builder() 234 let buflen = MAX_NOTE_SIZE 235 let buf = malloc(buflen) 236 237 ndb_builder_init(&builder, buf, buflen) 238 239 var pk_raw = noteConstructionMaterial.pubkey.bytes 240 241 ndb_builder_set_pubkey(&builder, &pk_raw) 242 ndb_builder_set_kind(&builder, UInt32(kind)) 243 ndb_builder_set_created_at(&builder, UInt64(createdAt)) 244 245 var ok = true 246 for tag in tags { 247 ndb_builder_new_tag(&builder); 248 for elem in tag { 249 ok = elem.withCString({ eptr in 250 return ndb_builder_push_tag_str(&builder, eptr, Int32(elem.utf8.count)) > 0 251 }) 252 if !ok { 253 return nil 254 } 255 } 256 } 257 258 ok = content.withCString { cptr in 259 return ndb_builder_set_content(&builder, cptr, Int32(content.utf8.count)) > 0 260 } 261 if !ok { 262 return nil 263 } 264 265 var n = ndb_note_ptr() 266 var len: Int32 = 0 267 268 switch noteConstructionMaterial { 269 case .keypair(let keypair): 270 var the_kp: ndb_keypair? = nil 271 272 if let sec = noteConstructionMaterial.privkey { 273 var kp = ndb_keypair() 274 memcpy(&kp.secret.0, sec.id.bytes, 32); 275 276 if ndb_create_keypair(&kp) <= 0 { 277 print("bad keypair") 278 } else { 279 the_kp = kp 280 } 281 } 282 283 if var the_kp { 284 len = ndb_builder_finalize(&builder, &n.ptr, &the_kp) 285 } else { 286 len = ndb_builder_finalize(&builder, &n.ptr, nil) 287 } 288 289 if len <= 0 { 290 free(buf) 291 return nil 292 } 293 case .manual(_, let signature, _): 294 var raw_sig = signature.data.bytes 295 ndb_builder_set_sig(&builder, &raw_sig) 296 297 do { 298 // Finalize note, save length, and ensure it is higher than zero (which signals finalization has succeeded) 299 len = ndb_builder_finalize(&builder, &n.ptr, nil) 300 guard len > 0 else { throw InitError.generic } 301 302 let scratch_buf_len = MAX_NOTE_SIZE 303 let scratch_buf = malloc(scratch_buf_len) 304 defer { free(scratch_buf) } // Ensure we deallocate as soon as we leave this scope, regardless of the outcome 305 306 // Calculate the ID based on the content 307 guard ndb_calculate_id(n.ptr, scratch_buf, Int32(scratch_buf_len)) == 1 else { throw InitError.generic } 308 309 // Verify the signature against the pubkey and the computed ID, to verify the validity of the whole note 310 var ctx = secp256k1_context_create(UInt32(SECP256K1_CONTEXT_VERIFY)) 311 guard ndb_note_verify(&ctx, ndb_note_pubkey(n.ptr), ndb_note_id(n.ptr), ndb_note_sig(n.ptr)) == 1 else { throw InitError.generic } 312 } 313 catch { 314 free(buf) 315 return nil 316 } 317 } 318 319 //guard let n else { return nil } 320 321 self.owned = true 322 self.count = Int(len) 323 //self.note = n 324 let r = realloc(buf, Int(len)) 325 guard let r else { 326 free(buf) 327 return nil 328 } 329 330 self.note = ndb_note_ptr(ptr: OpaquePointer(r)) 331 self.key = nil 332 } 333 334 convenience init?(content: String, keypair: Keypair, kind: UInt32 = 1, tags: [[String]] = [], createdAt: UInt32 = UInt32(Date().timeIntervalSince1970)) { 335 self.init(content: content, noteConstructionMaterial: .keypair(keypair), kind: kind, tags: tags, createdAt: createdAt) 336 } 337 338 convenience init?(content: String, author: Pubkey, kind: UInt32 = 1, tags: [[String]] = [], createdAt: UInt32 = UInt32(Date().timeIntervalSince1970), id: NoteId, sig: Signature) { 339 self.init(content: content, noteConstructionMaterial: .manual(author, sig, id), kind: kind, tags: tags, createdAt: createdAt) 340 } 341 342 static func owned_from_json(json: String, bufsize: Int = 2 << 18) -> NdbNote? { 343 return json.withCString { cstr in 344 return NdbNote.owned_from_json_cstr( 345 json: cstr, json_len: UInt32(json.utf8.count), bufsize: bufsize) 346 } 347 } 348 349 func verify() -> Bool { 350 let scratch_buf_len = MAX_NOTE_SIZE 351 let scratch_buf = malloc(scratch_buf_len) 352 defer { free(scratch_buf) } // Ensure we deallocate as soon as we leave this scope, regardless of the outcome 353 354 let current_id = self.id 355 356 // Calculate the ID based on the content 357 guard ndb_calculate_id(self.note.ptr, scratch_buf, Int32(scratch_buf_len)) == 1 else { return false } 358 359 let computed_id = self.id 360 361 // Ensure computed ID matches given id to prevent ID tampering 362 guard computed_id == current_id else { return false } 363 364 // Verify the signature against the pubkey and the computed ID, to verify the validity of the whole note 365 var ctx = secp256k1_context_create(UInt32(SECP256K1_CONTEXT_VERIFY)) 366 guard ndb_note_verify(&ctx, ndb_note_pubkey(self.note.ptr), ndb_note_id(self.note.ptr), ndb_note_sig(self.note.ptr)) == 1 else { return false } 367 368 return true 369 } 370 371 static func owned_from_json_cstr(json: UnsafePointer<CChar>, json_len: UInt32, bufsize: Int = 2 << 18) -> NdbNote? { 372 let data = malloc(bufsize) 373 //guard var json_cstr = json.cString(using: .utf8) else { return nil } 374 375 //json_cs 376 var note = ndb_note_ptr() 377 378 let len = ndb_note_from_json(json, Int32(json_len), ¬e.ptr, data, Int32(bufsize)) 379 380 if len == 0 { 381 free(data) 382 return nil 383 } 384 385 // Create new Data with just the valid bytes 386 guard let new_note = realloc(data, Int(len)) else { return nil } 387 let new_note_ptr = ndb_note_ptr(ptr: OpaquePointer(new_note)) 388 return NdbNote(note: new_note_ptr, size: Int(len), owned: true, key: nil) 389 } 390 391 func get_inner_event() -> NdbNote? { 392 return self.inner_event 393 } 394 } 395 396 // Extension to make NdbNote compatible with NostrEvent's original API 397 extension NdbNote { 398 var is_textlike: Bool { 399 return kind == 1 || kind == 42 || kind == 30023 || kind == 9802 || kind == 39089 400 } 401 402 var is_quote_repost: NoteId? { 403 guard kind == 1, let quoted_note_id = referenced_quote_ids.first else { 404 return nil 405 } 406 return quoted_note_id.note_id 407 } 408 409 var known_kind: NostrKind? { 410 return NostrKind.init(rawValue: kind) 411 } 412 413 var too_big: Bool { 414 return known_kind != .longform && self.content_len > 16000 415 } 416 417 var should_show_event: Bool { 418 return !too_big 419 } 420 421 func is_hellthread(max_pubkeys: Int) -> Bool { 422 switch known_kind { 423 case .text, .boost, .like, .zap: 424 Set(referenced_pubkeys).count > max_pubkeys 425 default: 426 false 427 } 428 } 429 430 // TODO: References iterator 431 public var referenced_ids: References<NoteId> { 432 References<NoteId>(tags: self.tags) 433 } 434 435 public var referenced_quote_ids: References<QuoteId> { 436 References<QuoteId>(tags: self.tags) 437 } 438 439 public var referenced_noterefs: References<NoteRef> { 440 References<NoteRef>(tags: self.tags) 441 } 442 443 public var referenced_follows: References<FollowRef> { 444 References<FollowRef>(tags: self.tags) 445 } 446 447 public var referenced_pubkeys: References<Pubkey> { 448 References<Pubkey>(tags: self.tags) 449 } 450 451 public var referenced_hashtags: References<Hashtag> { 452 References<Hashtag>(tags: self.tags) 453 } 454 455 public var referenced_params: References<ReplaceableParam> { 456 References<ReplaceableParam>(tags: self.tags) 457 } 458 459 public var referenced_mute_items: References<MuteItem> { 460 References<MuteItem>(tags: self.tags) 461 } 462 463 public var referenced_comment_items: References<CommentItem> { 464 References<CommentItem>(tags: self.tags) 465 } 466 467 public var references: References<RefId> { 468 References<RefId>(tags: self.tags) 469 } 470 471 func thread_reply() -> ThreadReply? { 472 if self.known_kind != .highlight { 473 return ThreadReply(tags: self.tags) 474 } 475 return nil 476 } 477 478 func highlighted_note_id() -> NoteId? { 479 return ThreadReply(tags: self.tags)?.reply.note_id 480 } 481 482 func block_offsets(ndb: Ndb) -> SafeNdbTxn<NdbBlockGroup.BlocksMetadata>? { 483 let blocks_txn: SafeNdbTxn<NdbBlockGroup.BlocksMetadata>? = .new(on: ndb) { txn -> NdbBlockGroup.BlocksMetadata? in 484 guard let key = ndb.lookup_note_key_with_txn(self.id, txn: txn) else { 485 return nil 486 } 487 return ndb.lookup_blocks_by_key_with_txn(key, txn: txn) 488 } 489 490 guard let blocks_txn else { return nil } 491 492 return blocks_txn 493 } 494 495 func is_content_encrypted() -> Bool { 496 return known_kind == .dm // Probably other kinds should be listed here 497 } 498 499 func get_content(_ keypair: Keypair) -> String { 500 if is_content_encrypted() { 501 return decrypted(keypair: keypair) ?? "*failed to decrypt content*" 502 } 503 else if known_kind == .highlight { 504 return self.referenced_comment_items.first?.content ?? "" 505 } 506 507 return content 508 } 509 510 func maybe_get_content(_ keypair: Keypair) -> String? { 511 if is_content_encrypted() { 512 return decrypted(keypair: keypair) 513 } 514 else if known_kind == .highlight { 515 return self.referenced_comment_items.first?.content 516 } 517 518 return content 519 } 520 521 // NDBTODO: switch this to operating on bytes not strings 522 func decrypted(keypair: Keypair) -> String? { 523 if let decrypted_content { 524 return decrypted_content 525 } 526 527 let our_pubkey = keypair.pubkey 528 529 // NDBTODO: don't hex encode 530 var pubkey = self.pubkey 531 // This is our DM, we need to use the pubkey of the person we're talking to instead 532 533 if our_pubkey == pubkey, let pk = self.referenced_pubkeys.first { 534 pubkey = pk 535 } 536 537 // NDBTODO: pass data to pubkey 538 let dec = decrypt_dm(keypair.privkey, pubkey: pubkey, content: self.content, encoding: .base64) 539 self.decrypted_content = dec 540 541 return dec 542 } 543 544 public func direct_replies() -> NoteId? { 545 return thread_reply()?.reply.note_id 546 } 547 548 // NDBTODO: just use Id 549 public func thread_id() -> NoteId { 550 guard let root = self.thread_reply()?.root else { 551 return self.id 552 } 553 554 return root.note_id 555 } 556 557 public func last_refid() -> NoteId? { 558 return self.referenced_ids.last 559 } 560 561 func is_reply() -> Bool { 562 return thread_reply() != nil 563 } 564 565 func note_language(ndb: Ndb, _ keypair: Keypair) -> String? { 566 assert(!Thread.isMainThread, "This function must not be run on the main thread.") 567 568 // Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in 569 // and filter on only the text portions of the content as URLs and hashtags confuse the language recognizer. 570 /* 571 guard let blocks_txn = self.blocks(ndb: ndb) else { 572 return nil 573 } 574 let blocks = blocks_txn.unsafeUnownedValue 575 576 let originalOnlyText = blocks.blocks(note: self).compactMap { 577 if case .text(let txt) = $0 { 578 // Replacing right single quotation marks (’) with "typewriter or ASCII apostrophes" (') 579 // as a workaround to get Apple's language recognizer to predict language the correctly. 580 // It is important to add this workaround to get the language right because it wastes users' money to send translation requests. 581 // Until Apple fixes their language model, this workaround will be kept in place. 582 // See https://en.wikipedia.org/wiki/Apostrophe#Unicode for an explanation of the differences between the two characters. 583 // 584 // For example, 585 // "nevent1qqs0wsknetaju06xk39cv8sttd064amkykqalvfue7ydtg3p0lyfksqzyrhxagf6h8l9cjngatumrg60uq22v66qz979pm32v985ek54ndh8gj42wtp" 586 // has the note content "It’s a meme". 587 // Without the character replacement, it is 61% confident that the text is in Turkish (tr) and 8% confident that the text is in English (en), 588 // which is a wildly incorrect hypothesis. 589 // With the character replacement, it is 65% confident that the text is in English (en) and 24% confident that the text is in Turkish (tr), which is more accurate. 590 // 591 // Similarly, 592 // "nevent1qqspjqlln6wvxrqg6kzl2p7gk0rgr5stc7zz5sstl34cxlw55gvtylgpp4mhxue69uhkummn9ekx7mqpr4mhxue69uhkummnw3ez6ur4vgh8wetvd3hhyer9wghxuet5qy28wumn8ghj7un9d3shjtnwdaehgu3wvfnsygpx6655ve67vqlcme9ld7ww73pqx7msclhwzu8lqmkhvuluxnyc7yhf3xut" 593 // has the note content "You’re funner". 594 // Without the character replacement, it is 52% confident that the text is in Norwegian Bokmål (nb) and 41% confident that the text is in English (en). 595 // With the character replacement, it is 93% confident that the text is in English (en) and 4% confident that the text is in Norwegian Bokmål (nb). 596 return txt.replacingOccurrences(of: "’", with: "'") 597 } 598 else { 599 return nil 600 } 601 } 602 .joined(separator: " ") 603 */ 604 605 let originalOnlyText = self.get_content(keypair) 606 607 // If there is no text, there's nothing to use to detect language. 608 guard !originalOnlyText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { 609 return nil 610 } 611 612 let languageRecognizer = NLLanguageRecognizer() 613 languageRecognizer.processString(originalOnlyText) 614 615 // Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate. 616 guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else { 617 return nil 618 } 619 620 // Remove the variant component and just take the language part as translation services typically only supports the variant-less language. 621 // Moreover, speakers of one variant can generally understand other variants. 622 return localeToLanguage(locale) 623 } 624 625 var age: TimeInterval { 626 let event_date = Date(timeIntervalSince1970: TimeInterval(created_at)) 627 return Date.now.timeIntervalSince(event_date) 628 } 629 } 630 631 func hex_encode(_ data: Data) -> String { 632 var str = "" 633 for c in data { 634 let c1 = hexchar(c >> 4) 635 let c2 = hexchar(c & 0xF) 636 637 str.append(Character(Unicode.Scalar(c1))) 638 str.append(Character(Unicode.Scalar(c2))) 639 } 640 return str 641 } 642 643 extension NdbNote { 644 /// A generic init error type to help make error handling code more concise 645 fileprivate enum InitError: Error { 646 case generic 647 } 648 }