NdbNote.swift (15621B)
1 // 2 // NdbNote.swift 3 // damus 4 // 5 // Created by William Casarin on 2023-07-21. 6 // 7 8 import Foundation 9 import NaturalLanguage 10 import CommonCrypto 11 import secp256k1 12 import secp256k1_implementation 13 import CryptoKit 14 15 let MAX_NOTE_SIZE: Int = 2 << 18 16 17 struct NdbStr { 18 let note: NdbNote 19 let str: UnsafePointer<CChar> 20 } 21 22 struct NdbId { 23 let note: NdbNote 24 let id: Data 25 } 26 27 enum NdbData { 28 case id(NdbId) 29 case str(NdbStr) 30 31 init(note: NdbNote, str: ndb_str) { 32 guard str.flag == NDB_PACKED_ID else { 33 self = .str(NdbStr(note: note, str: str.str)) 34 return 35 } 36 37 let buffer = UnsafeBufferPointer(start: str.id, count: 32) 38 self = .id(NdbId(note: note, id: Data(buffer: buffer))) 39 } 40 } 41 42 class NdbNote: Encodable, Equatable, Hashable { 43 // we can have owned notes, but we can also have lmdb virtual-memory mapped notes so its optional 44 let owned: Bool 45 let count: Int 46 let key: NoteKey? 47 let note: UnsafeMutablePointer<ndb_note> 48 49 // cached stuff (TODO: remove these) 50 var decrypted_content: String? = nil 51 52 private var inner_event: NdbNote? { 53 get { 54 return NdbNote.owned_from_json_cstr(json: content_raw, json_len: content_len) 55 } 56 } 57 58 init(note: UnsafeMutablePointer<ndb_note>, size: Int, owned: Bool, key: NoteKey?) { 59 self.note = note 60 self.owned = owned 61 self.count = size 62 self.key = key 63 64 #if DEBUG_NOTE_SIZE 65 if let owned_size { 66 NdbNote.total_ndb_size += Int(owned_size) 67 NdbNote.notes_created += 1 68 69 print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes") 70 } 71 #endif 72 73 } 74 75 func to_owned() -> NdbNote { 76 if self.owned { 77 return self 78 } 79 80 let buf = malloc(self.count)! 81 memcpy(buf, &self.note.pointee, self.count) 82 let new_note = buf.assumingMemoryBound(to: ndb_note.self) 83 84 return NdbNote(note: new_note, size: self.count, owned: true, key: self.key) 85 } 86 87 var content: String { 88 String(cString: content_raw, encoding: .utf8) ?? "" 89 } 90 91 var content_raw: UnsafePointer<CChar> { 92 ndb_note_content(note) 93 } 94 95 var content_len: UInt32 { 96 ndb_note_content_length(note) 97 } 98 99 /// NDBTODO: make this into data 100 var id: NoteId { 101 .init(Data(bytes: ndb_note_id(note), count: 32)) 102 } 103 104 var sig: Signature { 105 .init(Data(bytes: ndb_note_sig(note), count: 64)) 106 } 107 108 /// NDBTODO: make this into data 109 var pubkey: Pubkey { 110 .init(Data(bytes: ndb_note_pubkey(note), count: 32)) 111 } 112 113 var created_at: UInt32 { 114 ndb_note_created_at(note) 115 } 116 117 var kind: UInt32 { 118 ndb_note_kind(note) 119 } 120 121 var tags: TagsSequence { 122 .init(note: self) 123 } 124 125 deinit { 126 if self.owned { 127 #if DEBUG_NOTE_SIZE 128 NdbNote.total_ndb_size -= Int(count) 129 NdbNote.notes_created -= 1 130 131 print("\(NdbNote.notes_created) ndb_notes, \(NdbNote.total_ndb_size) bytes") 132 #endif 133 free(note) 134 } 135 } 136 137 static func == (lhs: NdbNote, rhs: NdbNote) -> Bool { 138 return lhs.id == rhs.id 139 } 140 141 func hash(into hasher: inout Hasher) { 142 hasher.combine(id) 143 } 144 145 private enum CodingKeys: String, CodingKey { 146 case id, sig, tags, pubkey, created_at, kind, content 147 } 148 149 // Implement the `Encodable` protocol 150 func encode(to encoder: Encoder) throws { 151 var container = encoder.container(keyedBy: CodingKeys.self) 152 153 try container.encode(hex_encode(id.id), forKey: .id) 154 try container.encode(hex_encode(sig.data), forKey: .sig) 155 try container.encode(pubkey, forKey: .pubkey) 156 try container.encode(created_at, forKey: .created_at) 157 try container.encode(kind, forKey: .kind) 158 try container.encode(content, forKey: .content) 159 try container.encode(tags, forKey: .tags) 160 } 161 162 #if DEBUG_NOTE_SIZE 163 static var total_ndb_size: Int = 0 164 static var notes_created: Int = 0 165 #endif 166 167 init?(content: String, keypair: Keypair, kind: UInt32 = 1, tags: [[String]] = [], createdAt: UInt32 = UInt32(Date().timeIntervalSince1970)) { 168 169 var builder = ndb_builder() 170 let buflen = MAX_NOTE_SIZE 171 let buf = malloc(buflen) 172 173 ndb_builder_init(&builder, buf, Int32(buflen)) 174 175 var pk_raw = keypair.pubkey.bytes 176 177 ndb_builder_set_pubkey(&builder, &pk_raw) 178 ndb_builder_set_kind(&builder, UInt32(kind)) 179 ndb_builder_set_created_at(&builder, UInt64(createdAt)) 180 181 var ok = true 182 for tag in tags { 183 ndb_builder_new_tag(&builder); 184 for elem in tag { 185 ok = elem.withCString({ eptr in 186 return ndb_builder_push_tag_str(&builder, eptr, Int32(elem.utf8.count)) > 0 187 }) 188 if !ok { 189 return nil 190 } 191 } 192 } 193 194 ok = content.withCString { cptr in 195 return ndb_builder_set_content(&builder, cptr, Int32(content.utf8.count)) > 0 196 } 197 if !ok { 198 return nil 199 } 200 201 var n = UnsafeMutablePointer<ndb_note>?(nil) 202 203 204 var the_kp: ndb_keypair? = nil 205 206 if let sec = keypair.privkey { 207 var kp = ndb_keypair() 208 memcpy(&kp.secret.0, sec.id.bytes, 32); 209 210 if ndb_create_keypair(&kp) <= 0 { 211 print("bad keypair") 212 } else { 213 the_kp = kp 214 } 215 } 216 217 var len: Int32 = 0 218 if var the_kp { 219 len = ndb_builder_finalize(&builder, &n, &the_kp) 220 } else { 221 len = ndb_builder_finalize(&builder, &n, nil) 222 } 223 224 if len <= 0 { 225 free(buf) 226 return nil 227 } 228 229 //guard let n else { return nil } 230 231 self.owned = true 232 self.count = Int(len) 233 //self.note = n 234 let r = realloc(buf, Int(len)) 235 guard let r else { 236 free(buf) 237 return nil 238 } 239 240 self.note = r.assumingMemoryBound(to: ndb_note.self) 241 self.key = nil 242 } 243 244 static func owned_from_json(json: String, bufsize: Int = 2 << 18) -> NdbNote? { 245 return json.withCString { cstr in 246 return NdbNote.owned_from_json_cstr( 247 json: cstr, json_len: UInt32(json.utf8.count), bufsize: bufsize) 248 } 249 } 250 251 static func owned_from_json_cstr(json: UnsafePointer<CChar>, json_len: UInt32, bufsize: Int = 2 << 18) -> NdbNote? { 252 let data = malloc(bufsize) 253 //guard var json_cstr = json.cString(using: .utf8) else { return nil } 254 255 //json_cs 256 var note: UnsafeMutablePointer<ndb_note>? 257 258 let len = ndb_note_from_json(json, Int32(json_len), ¬e, data, Int32(bufsize)) 259 260 if len == 0 { 261 free(data) 262 return nil 263 } 264 265 // Create new Data with just the valid bytes 266 guard let note_data = realloc(data, Int(len)) else { return nil } 267 let new_note = note_data.assumingMemoryBound(to: ndb_note.self) 268 269 return NdbNote(note: new_note, size: Int(len), owned: true, key: nil) 270 } 271 272 func get_inner_event() -> NdbNote? { 273 return self.inner_event 274 } 275 } 276 277 // Extension to make NdbNote compatible with NostrEvent's original API 278 extension NdbNote { 279 var is_textlike: Bool { 280 return kind == 1 || kind == 42 || kind == 30023 281 } 282 283 var is_quote_repost: NoteId? { 284 guard kind == 1, let quoted_note_id = referenced_quote_ids.first else { 285 return nil 286 } 287 return quoted_note_id.note_id 288 } 289 290 var known_kind: NostrKind? { 291 return NostrKind.init(rawValue: kind) 292 } 293 294 var too_big: Bool { 295 return known_kind != .longform && self.content_len > 16000 296 } 297 298 var should_show_event: Bool { 299 return !too_big 300 } 301 302 func get_blocks(keypair: Keypair) -> Blocks { 303 return parse_note_content(content: .init(note: self, keypair: keypair)) 304 } 305 306 // TODO: References iterator 307 public var referenced_ids: References<NoteId> { 308 References<NoteId>(tags: self.tags) 309 } 310 311 public var referenced_quote_ids: References<QuoteId> { 312 References<QuoteId>(tags: self.tags) 313 } 314 315 public var referenced_noterefs: References<NoteRef> { 316 References<NoteRef>(tags: self.tags) 317 } 318 319 public var referenced_follows: References<FollowRef> { 320 References<FollowRef>(tags: self.tags) 321 } 322 323 public var referenced_pubkeys: References<Pubkey> { 324 References<Pubkey>(tags: self.tags) 325 } 326 327 public var referenced_hashtags: References<Hashtag> { 328 References<Hashtag>(tags: self.tags) 329 } 330 331 public var referenced_params: References<ReplaceableParam> { 332 References<ReplaceableParam>(tags: self.tags) 333 } 334 335 public var referenced_mute_items: References<MuteItem> { 336 References<MuteItem>(tags: self.tags) 337 } 338 339 public var references: References<RefId> { 340 References<RefId>(tags: self.tags) 341 } 342 343 func thread_reply(_ keypair: Keypair) -> ThreadReply? { 344 ThreadReply(event_refs: interpret_event_refs_ndb(blocks: self.blocks(keypair).blocks, tags: self.tags)) 345 } 346 347 func get_content(_ keypair: Keypair) -> String { 348 if known_kind == .dm { 349 return decrypted(keypair: keypair) ?? "*failed to decrypt content*" 350 } 351 352 return content 353 } 354 355 func maybe_get_content(_ keypair: Keypair) -> String? { 356 if known_kind == .dm { 357 return decrypted(keypair: keypair) 358 } 359 360 return content 361 } 362 363 func blocks(_ keypair: Keypair) -> Blocks { 364 return get_blocks(keypair: keypair) 365 } 366 367 // NDBTODO: switch this to operating on bytes not strings 368 func decrypted(keypair: Keypair) -> String? { 369 if let decrypted_content { 370 return decrypted_content 371 } 372 373 let our_pubkey = keypair.pubkey 374 375 // NDBTODO: don't hex encode 376 var pubkey = self.pubkey 377 // This is our DM, we need to use the pubkey of the person we're talking to instead 378 379 if our_pubkey == pubkey, let pk = self.referenced_pubkeys.first { 380 pubkey = pk 381 } 382 383 // NDBTODO: pass data to pubkey 384 let dec = decrypt_dm(keypair.privkey, pubkey: pubkey, content: self.content, encoding: .base64) 385 self.decrypted_content = dec 386 387 return dec 388 } 389 390 public func direct_replies(_ keypair: Keypair) -> NoteId? { 391 return thread_reply(keypair)?.reply?.note_id 392 } 393 394 // NDBTODO: just use Id 395 public func thread_id(keypair: Keypair) -> NoteId { 396 guard let root = self.thread_reply(keypair)?.root else { 397 return self.id 398 } 399 400 return root.note_id 401 } 402 403 public func last_refid() -> NoteId? { 404 return self.referenced_ids.last 405 } 406 407 // NDBTODO: id -> data 408 /* 409 public func references(id: String, key: AsciiCharacter) -> Bool { 410 var matcher: (Reference) -> Bool = { ref in ref.ref_id.matches_str(id) } 411 if id.count == 64, let decoded = hex_decode(id) { 412 matcher = { ref in ref.ref_id.matches_id(decoded) } 413 } 414 for ref in References(tags: self.tags) { 415 if ref.key == key && matcher(ref) { 416 return true 417 } 418 } 419 420 return false 421 } 422 */ 423 424 func is_reply(_ keypair: Keypair) -> Bool { 425 return thread_reply(keypair)?.reply != nil 426 } 427 428 func note_language(_ keypair: Keypair) -> String? { 429 assert(!Thread.isMainThread, "This function must not be run on the main thread.") 430 431 // Rely on Apple's NLLanguageRecognizer to tell us which language it thinks the note is in 432 // and filter on only the text portions of the content as URLs and hashtags confuse the language recognizer. 433 let originalBlocks = self.blocks(keypair).blocks 434 let originalOnlyText = originalBlocks.compactMap { 435 if case .text(let txt) = $0 { 436 // Replacing right single quotation marks (’) with "typewriter or ASCII apostrophes" (') 437 // as a workaround to get Apple's language recognizer to predict language the correctly. 438 // It is important to add this workaround to get the language right because it wastes users' money to send translation requests. 439 // Until Apple fixes their language model, this workaround will be kept in place. 440 // See https://en.wikipedia.org/wiki/Apostrophe#Unicode for an explanation of the differences between the two characters. 441 // 442 // For example, 443 // "nevent1qqs0wsknetaju06xk39cv8sttd064amkykqalvfue7ydtg3p0lyfksqzyrhxagf6h8l9cjngatumrg60uq22v66qz979pm32v985ek54ndh8gj42wtp" 444 // has the note content "It’s a meme". 445 // Without the character replacement, it is 61% confident that the text is in Turkish (tr) and 8% confident that the text is in English (en), 446 // which is a wildly incorrect hypothesis. 447 // With the character replacement, it is 65% confident that the text is in English (en) and 24% confident that the text is in Turkish (tr), which is more accurate. 448 // 449 // Similarly, 450 // "nevent1qqspjqlln6wvxrqg6kzl2p7gk0rgr5stc7zz5sstl34cxlw55gvtylgpp4mhxue69uhkummn9ekx7mqpr4mhxue69uhkummnw3ez6ur4vgh8wetvd3hhyer9wghxuet5qy28wumn8ghj7un9d3shjtnwdaehgu3wvfnsygpx6655ve67vqlcme9ld7ww73pqx7msclhwzu8lqmkhvuluxnyc7yhf3xut" 451 // has the note content "You’re funner". 452 // Without the character replacement, it is 52% confident that the text is in Norwegian Bokmål (nb) and 41% confident that the text is in English (en). 453 // With the character replacement, it is 93% confident that the text is in English (en) and 4% confident that the text is in Norwegian Bokmål (nb). 454 return txt.replacingOccurrences(of: "’", with: "'") 455 } 456 else { 457 return nil 458 } 459 } 460 .joined(separator: " ") 461 462 // If there is no text, there's nothing to use to detect language. 463 guard !originalOnlyText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { 464 return nil 465 } 466 467 let languageRecognizer = NLLanguageRecognizer() 468 languageRecognizer.processString(originalOnlyText) 469 470 // Only accept language recognition hypothesis if there's at least a 50% probability that it's accurate. 471 guard let locale = languageRecognizer.languageHypotheses(withMaximum: 1).first(where: { $0.value >= 0.5 })?.key.rawValue else { 472 return nil 473 } 474 475 // Remove the variant component and just take the language part as translation services typically only supports the variant-less language. 476 // Moreover, speakers of one variant can generally understand other variants. 477 return localeToLanguage(locale) 478 } 479 480 var age: TimeInterval { 481 let event_date = Date(timeIntervalSince1970: TimeInterval(created_at)) 482 return Date.now.timeIntervalSince(event_date) 483 } 484 } 485 486 func hex_encode(_ data: Data) -> String { 487 var str = "" 488 for c in data { 489 let c1 = hexchar(c >> 4) 490 let c2 = hexchar(c & 0xF) 491 492 str.append(Character(Unicode.Scalar(c1))) 493 str.append(Character(Unicode.Scalar(c2))) 494 } 495 return str 496 }