ndb: fix iterators, pack id tags, more tests - damus

commit e34351ca37b89b5ba9d0a011076aee555dbbfcf0
parent 1a33d639ede07c9afa45ab1dd69797d984bde9ef
Author: William Casarin <jb55@jb55.com>
Date:   Sun, 23 Jul 2023 11:55:36 -0700

ndb: fix iterators, pack id tags, more tests

Diffstat:
M damus.xcodeproj/project.pbxproj  | 4 ++++
M damus/Nostr/NostrEvent.swift  | 26 --------------------------
A damus/Nostr/ReferencedId.swift  | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M nostrdb/NdbNote.swift  | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
M nostrdb/NdbTagElem.swift  | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
M nostrdb/NdbTagIterator.swift  | 19 +------------------
M nostrdb/NdbTagsIterator.swift  | 21 +++++++++++----------
M nostrdb/Test/NdbTests.swift  | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
M nostrdb/nostrdb.c  | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
M nostrdb/nostrdb.h  | 107 +++++++++++++++++++++++++++++++++++++++++--------------------------------------

10 files changed, 398 insertions(+), 173 deletions(-)
diff --git a/damus.xcodeproj/project.pbxproj b/damus.xcodeproj/project.pbxproj
@@ -75,6 +75,7 @@
 		4C285C8A2838B985008A31F1 /* ProfilePictureSelector.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C285C892838B985008A31F1 /* ProfilePictureSelector.swift */; };
 		4C285C8C28398BC7008A31F1 /* Keys.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C285C8B28398BC6008A31F1 /* Keys.swift */; };
 		4C285C8E28399BFE008A31F1 /* SaveKeysView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C285C8D28399BFD008A31F1 /* SaveKeysView.swift */; };
+		4C28A4122A6D03D200C1A7A5 /* ReferencedId.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C28A4112A6D03D200C1A7A5 /* ReferencedId.swift */; };
 		4C2CDDF7299D4A5E00879FD5 /* Debouncer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C2CDDF6299D4A5E00879FD5 /* Debouncer.swift */; };
 		4C30AC7229A5677A00E2BD5A /* NotificationsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C30AC7129A5677A00E2BD5A /* NotificationsView.swift */; };
 		4C30AC7429A5680900E2BD5A /* EventGroupView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4C30AC7329A5680900E2BD5A /* EventGroupView.swift */; };
@@ -527,6 +528,7 @@
 		4C285C892838B985008A31F1 /* ProfilePictureSelector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProfilePictureSelector.swift; sourceTree = "<group>"; };
 		4C285C8B28398BC6008A31F1 /* Keys.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Keys.swift; sourceTree = "<group>"; };
 		4C285C8D28399BFD008A31F1 /* SaveKeysView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SaveKeysView.swift; sourceTree = "<group>"; };
+		4C28A4112A6D03D200C1A7A5 /* ReferencedId.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReferencedId.swift; sourceTree = "<group>"; };
 		4C2CDDF6299D4A5E00879FD5 /* Debouncer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Debouncer.swift; sourceTree = "<group>"; };
 		4C30AC7129A5677A00E2BD5A /* NotificationsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = NotificationsView.swift; sourceTree = "<group>"; };
 		4C30AC7329A5680900E2BD5A /* EventGroupView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EventGroupView.swift; sourceTree = "<group>"; };
@@ -1215,6 +1217,7 @@
 				4C3BEFD32819DE8F00B3DE84 /* NostrKind.swift */,
 				4C363A8F28247A1D006E126D /* NostrLink.swift */,
 				50088DA029E8271A008A1FDF /* WebSocket.swift */,
+				4C28A4112A6D03D200C1A7A5 /* ReferencedId.swift */,
 			);
 			path = Nostr;
 			sourceTree = "<group>";
@@ -1919,6 +1922,7 @@
 				4C363AA228296A7E006E126D /* SearchView.swift in Sources */,
 				4CC7AAED297F0B9E00430951 /* Highlight.swift in Sources */,
 				4CA927652A290F1A0098A105 /* TimeDot.swift in Sources */,
+				4C28A4122A6D03D200C1A7A5 /* ReferencedId.swift in Sources */,
 				4CC6193A29DC777C006A86D1 /* RelayBootstrap.swift in Sources */,
 				4C285C8A2838B985008A31F1 /* ProfilePictureSelector.swift in Sources */,
 				4CFD502F2A2DA45800A229DB /* MediaView.swift in Sources */,
diff --git a/damus/Nostr/NostrEvent.swift b/damus/Nostr/NostrEvent.swift
@@ -20,32 +20,6 @@ enum ValidationResult: Decodable {
     case bad_sig
 }
 
-struct ReferencedId: Identifiable, Hashable, Equatable {
-    let ref_id: String
-    let relay_id: String?
-    let key: String
-
-    var id: String {
-        return ref_id
-    }
-    
-    static func q(_ id: String, relay_id: String? = nil) -> ReferencedId {
-        return ReferencedId(ref_id: id, relay_id: relay_id, key: "q")
-    }
-    
-    static func e(_ id: String, relay_id: String? = nil) -> ReferencedId {
-        return ReferencedId(ref_id: id, relay_id: relay_id, key: "e")
-    }
-
-    static func p(_ pk: String, relay_id: String? = nil) -> ReferencedId {
-        return ReferencedId(ref_id: pk, relay_id: relay_id, key: "p")
-    }
-
-    static func t(_ hashtag: String, relay_id: String? = nil) -> ReferencedId {
-        return ReferencedId(ref_id: hashtag, relay_id: relay_id, key: "t")
-    }
-}
-
 class NostrEvent: Codable, Identifiable, CustomStringConvertible, Equatable, Hashable, Comparable {
     // TODO: memory mapped db events
     /*
diff --git a/damus/Nostr/ReferencedId.swift b/damus/Nostr/ReferencedId.swift
@@ -0,0 +1,69 @@
+//
+//  ReferencedId.swift
+//  damus
+//
+//  Created by William Casarin on 2023-07-22.
+//
+
+import Foundation
+
+struct Reference {
+    let key: AsciiCharacter
+    let id: NdbTagElem
+
+    func to_referenced_id() -> ReferencedId {
+        ReferencedId(ref_id: id.string(), relay_id: nil, key: key.string)
+    }
+}
+
+struct References: Sequence, IteratorProtocol {
+    let note: NdbNote
+    var tags: TagsIterator
+
+    mutating func next() -> Reference? {
+        while let tag = tags.next() {
+            guard let key = tag[0], key.count == 1,
+                  let id = tag[1], id.is_id
+            else { continue }
+
+            for c in key {
+                guard let a = AsciiCharacter(c) else { break }
+                return Reference(key: a, id: id)
+            }
+        }
+
+        return nil
+    }
+
+    init(note: NdbNote) {
+        self.note = note
+        self.tags = note.tags().makeIterator()
+    }
+}
+
+struct ReferencedId: Identifiable, Hashable, Equatable {
+    let ref_id: String
+    let relay_id: String?
+    let key: String
+
+    var id: String {
+        return ref_id
+    }
+    
+    static func q(_ id: String, relay_id: String? = nil) -> ReferencedId {
+        return ReferencedId(ref_id: id, relay_id: relay_id, key: "q")
+    }
+    
+    static func e(_ id: String, relay_id: String? = nil) -> ReferencedId {
+        return ReferencedId(ref_id: id, relay_id: relay_id, key: "e")
+    }
+
+    static func p(_ pk: String, relay_id: String? = nil) -> ReferencedId {
+        return ReferencedId(ref_id: pk, relay_id: relay_id, key: "p")
+    }
+
+    static func t(_ hashtag: String, relay_id: String? = nil) -> ReferencedId {
+        return ReferencedId(ref_id: hashtag, relay_id: relay_id, key: "t")
+    }
+}
+
diff --git a/nostrdb/NdbNote.swift b/nostrdb/NdbNote.swift
@@ -7,20 +7,44 @@
 
 import Foundation
 
-struct NdbNote {
+
+struct NdbStr {
+    let note: NdbNote
+    let str: UnsafePointer<CChar>
+}
+
+struct NdbId {
+    let note: NdbNote
+    let id: Data
+}
+
+enum NdbData {
+    case id(NdbId)
+    case str(NdbStr)
+
+    init(note: NdbNote, str: ndb_str) {
+        guard str.flag == NDB_PACKED_ID else {
+            self = .str(NdbStr(note: note, str: str.str))
+            return
+        }
+
+        let buffer = UnsafeBufferPointer(start: str.id, count: 32)
+        self = .id(NdbId(note: note, id: Data(buffer: buffer)))
+    }
+}
+
+class NdbNote {
     // we can have owned notes, but we can also have lmdb virtual-memory mapped notes so its optional
-    private var owned: Data?
+    private let owned: Bool
+    let count: Int
     let note: UnsafeMutablePointer<ndb_note>
 
-    init(note: UnsafeMutablePointer<ndb_note>, data: Data?) {
+    init(note: UnsafeMutablePointer<ndb_note>, owned_size: Int?) {
         self.note = note
-        self.owned = data
-    }
-    
-    var owned_size: Int? {
-        return owned?.count
+        self.owned = owned_size != nil
+        self.count = owned_size ?? 0
     }
-    
+
     var content: String {
         String(cString: content_raw, encoding: .utf8) ?? ""
     }
@@ -33,10 +57,12 @@ struct NdbNote {
         ndb_note_content_length(note)
     }
 
+    /// These are unsafe if working on owned data and if this outlives the NdbNote
     var id: Data {
         Data(buffer: UnsafeBufferPointer(start: ndb_note_id(note), count: 32))
     }
     
+    /// Make a copy if this outlives the note and the note has owned data!
     var pubkey: Data {
         Data(buffer: UnsafeBufferPointer(start: ndb_note_pubkey(note), count: 32))
     }
@@ -48,26 +74,35 @@ struct NdbNote {
     var kind: UInt32 {
         ndb_note_kind(note)
     }
-    
+
     func tags() -> TagsSequence {
         return .init(note: self)
     }
-    
+
+    deinit {
+        if self.owned {
+            free(note)
+        }
+    }
+
     static func owned_from_json(json: String, bufsize: Int = 2 << 18) -> NdbNote? {
-        var data = Data(capacity: bufsize)
+        let data = malloc(bufsize)
         guard var json_cstr = json.cString(using: .utf8) else { return nil }
         
         var note: UnsafeMutablePointer<ndb_note>?
         
-        let len = data.withUnsafeMutableBytes { (bytes: UnsafeMutableRawBufferPointer) in
-            return ndb_note_from_json(&json_cstr, Int32(json_cstr.count), &note, bytes.baseAddress, Int32(bufsize))
+        let len = ndb_note_from_json(&json_cstr, Int32(json_cstr.count), &note, data, Int32(bufsize))
+
+        if len == 0 {
+            free(data)
+            return nil
         }
-        
-        guard let note else { return nil }
-        
+
         // Create new Data with just the valid bytes
-        let smol_data = Data(bytes: &note.pointee, count: Int(len))
-        return NdbNote(note: note, data: smol_data)
+        guard let note_data = realloc(data, Int(len)) else { return nil }
+
+        let new_note = note_data.assumingMemoryBound(to: ndb_note.self)
+        return NdbNote(note: new_note, owned_size: Int(len))
     }
 }
 
@@ -99,20 +134,35 @@ extension NdbNote {
         return parse_note_content_ndb(note: self)
     }
 
-    /*
-
     func get_inner_event(cache: EventCache) -> NostrEvent? {
         guard self.known_kind == .boost else {
             return nil
         }
 
         if self.content == "", let ref = self.referenced_ids.first {
-            return cache.lookup(ref.ref_id)
+            // TODO: raw id cache lookups
+            let id = ref.id.string()
+            return cache.lookup(id)
         }
 
-        return self.inner_event
+        // TODO: how to handle inner events?
+        return nil
+        //return self.inner_event
     }
 
+    // TODO: References iterator
+    public var referenced_ids: LazyFilterSequence<References> {
+        References(note: self).lazy
+            .filter() { ref in ref.key == "e" }
+    }
+
+    public var referenced_pubkeys: LazyFilterSequence<References> {
+        References(note: self).lazy
+            .filter() { ref in ref.key == "p" }
+    }
+
+    /*
+
     func event_refs(_ privkey: String?) -> [EventRef] {
         if let rs = _event_refs {
             return rs
@@ -122,7 +172,6 @@ extension NdbNote {
         return refs
     }
 
-
     func decrypted(privkey: String?) -> String? {
         if let decrypted_content = decrypted_content {
             return decrypted_content
@@ -272,3 +321,9 @@ extension NdbNote {
     }
      */
 }
+
+extension LazyFilterSequence {
+    var first: Element? {
+        self.first(where: { _ in true })
+    }
+}
diff --git a/nostrdb/NdbTagElem.swift b/nostrdb/NdbTagElem.swift
@@ -7,9 +7,33 @@
 
 import Foundation
 
-struct NdbTagElem {
-    private let note: NdbNote
-    private let tag: UnsafeMutablePointer<ndb_tag>
+struct NdbStrIter: IteratorProtocol {
+    typealias Element = CChar
+
+    var ind: Int
+    let str: ndb_str
+    let tag: NdbTagElem // stored for lifetime reasons
+
+    mutating func next() -> CChar? {
+        let c = str.str[ind]
+        if (c != 0) {
+            ind += 1
+            return c
+        }
+
+        return nil
+    }
+
+    init(tag: NdbTagElem) {
+        self.str = ndb_tag_str(tag.note.note, tag.tag, tag.index)
+        self.ind = 0
+        self.tag = tag
+    }
+}
+
+struct NdbTagElem: Sequence {
+    let note: NdbNote
+    let tag: UnsafeMutablePointer<ndb_tag>
     let index: Int32
 
     init(note: NdbNote, tag: UnsafeMutablePointer<ndb_tag>, index: Int32) {
@@ -18,12 +42,44 @@ struct NdbTagElem {
         self.index = index
     }
 
+    var is_id: Bool {
+        return ndb_tag_str(note.note, tag, index).flag == NDB_PACKED_ID
+    }
+
+    var count: Int {
+        let r = ndb_tag_str(note.note, tag, index)
+        if r.flag == NDB_PACKED_ID {
+            return 32
+        } else {
+            return strlen(r.str)
+        }
+    }
+
     func matches_char(_ c: AsciiCharacter) -> Bool {
         return ndb_tag_matches_char(note.note, tag, index, c.cchar) == 1
     }
 
+    func data() -> NdbData {
+        let s = ndb_tag_str(note.note, tag, index)
+        return NdbData(note: note, str: s)
+    }
+
+    func id() -> Data? {
+        guard case .id(let id) = self.data() else { return nil }
+        return id.id
+    }
+
     func string() -> String {
-        return String(cString: ndb_tag_str(note.note, tag, index), encoding: .utf8) ?? ""
+        switch self.data() {
+        case .id(let id):
+            return hex_encode(id.id)
+        case .str(let s):
+            return String(cString: s.str, encoding: .utf8) ?? ""
+        }
+    }
+
+    func makeIterator() -> NdbStrIter {
+        return NdbStrIter(tag: self)
     }
 }
 
diff --git a/nostrdb/NdbTagIterator.swift b/nostrdb/NdbTagIterator.swift
@@ -16,9 +16,7 @@ struct TagSequence: Sequence {
     }
 
     subscript(index: Int) -> NdbTagElem? {
-        if index >= tag.pointee.count {
-            return nil
-        }
+        guard index < count else { return nil }
 
         return NdbTagElem(note: note, tag: tag, index: Int32(index))
     }
@@ -40,14 +38,6 @@ struct TagIterator: IteratorProtocol {
         return el
     }
 
-    subscript(index: Int) -> NdbTagElem? {
-        if index >= tag.pointee.count {
-            return nil
-        }
-
-        return NdbTagElem(note: note, tag: tag, index: Int32(index))
-    }
-
     var index: Int32
     let note: NdbNote
     var tag: UnsafeMutablePointer<ndb_tag>
@@ -62,10 +52,3 @@ struct TagIterator: IteratorProtocol {
         self.index = 0
     }
 }
-
-
-func ndb_maybe_pointee<T>(_ p: UnsafeMutablePointer<T>!) -> T? {
-    guard p != nil else { return nil }
-    return p.pointee
-}
-
diff --git a/nostrdb/NdbTagsIterator.swift b/nostrdb/NdbTagsIterator.swift
@@ -15,24 +15,22 @@ struct TagsIterator: IteratorProtocol {
     var note: NdbNote
 
     mutating func next() -> TagSequence? {
-        guard !done else { return nil }
-
-        let tag_seq = TagSequence(note: note, tag: self.iter.tag)
-
-        let ok = ndb_tags_iterate_next(&self.iter)
-        done = ok == 0
+        guard ndb_tags_iterate_next(&self.iter) == 1 else {
+            done = true
+            return nil
+        }
 
-        return tag_seq
+        return TagSequence(note: note, tag: self.iter.tag)
     }
 
     var count: UInt16 {
-        return iter.tag.pointee.count
+        return note.note.pointee.tags.count
     }
 
     init(note: NdbNote) {
         self.iter = ndb_iterator()
-        let res = ndb_tags_iterate_start(note.note, &self.iter)
-        self.done = res == 0
+        ndb_tags_iterate_start(note.note, &self.iter)
+        self.done = false
         self.note = note
     }
 }
@@ -44,6 +42,9 @@ struct TagsSequence: Sequence {
         note.note.pointee.tags.count
     }
 
+    // no O(1) indexing on top-level tag lists unfortunately :(
+    // bit it's very fast to iterate over each tag since the number of tags
+    // are stored and the elements are fixed size.
     subscript(index: Int) -> Iterator.Element? {
         var i = 0
         for element in self {
diff --git a/nostrdb/Test/NdbTests.swift b/nostrdb/Test/NdbTests.swift
@@ -28,25 +28,74 @@ final class NdbTests: XCTestCase {
         XCTAssertEqual(hex_encode(note.id), id)
         XCTAssertEqual(hex_encode(note.pubkey), pubkey)
 
-        XCTAssertEqual(note.owned_size, 59062)
+        XCTAssertEqual(note.count, 34058)
         XCTAssertEqual(note.kind, 3)
         XCTAssertEqual(note.created_at, 1689904312)
-        XCTAssertEqual(note.tags().reduce(0, { sum, _ in sum + 1 }), 786)
 
+        let expected_count: UInt16 = 786
+        XCTAssertEqual(note.tags().count, expected_count)
+        XCTAssertEqual(note.tags().reduce(0, { sum, _ in sum + 1 }), expected_count)
+
+        var count = 0
+        var tags = 0
+        var total_count_stored = 0
+        var total_count_iter = 0
         //let tags = note.tags()
         for tag in note.tags() {
+            total_count_stored += Int(tag.count)
+
+            if tags == 0 || tags == 1 || tags == 2 {
+                XCTAssertEqual(tag.count, 3)
+            }
+
+            if tags == 6 {
+                XCTAssertEqual(tag.count, 2)
+            }
+
+            if tags == 7 {
+                XCTAssertEqual(tag[2]?.string(), "wss://nostr-pub.wellorder.net")
+            }
+
             for elem in tag {
-                print("test_ndb_iterator \(elem.string())")
+                print("tag[\(tags)][\(elem.index)]")
+                total_count_iter += 1
             }
+
+            tags += 1
         }
 
+        XCTAssertEqual(tags, 786)
+        XCTAssertEqual(total_count_stored, total_count_iter)
     }
 
-    func testPerformanceExample() throws {
+    func test_decode_perf() throws {
         // This is an example of a performance test case.
         self.measure {
-            // Put the code you want to measure the time of here.
+            _ = NdbNote.owned_from_json(json: test_contact_list_json)
         }
     }
 
+    func test_iteration_perf() throws {
+        guard let note = NdbNote.owned_from_json(json: test_contact_list_json) else {
+            XCTAssert(false)
+            return
+        }
+
+        var count = 0
+        for tag in note.tags() {
+            for elem in tag {
+                print("iter_elem \(elem.string())")
+                /*
+                for _ in elem {
+                    count += 1
+                }
+                 */
+            }
+            count += 1
+        }
+
+        XCTAssertEqual(count, 786)
+    }
+
 }
+
diff --git a/nostrdb/nostrdb.c b/nostrdb/nostrdb.c
@@ -57,24 +57,6 @@ int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf,
 	return 1;
 }
 
-/// Check for small strings to pack
-static inline int ndb_builder_try_compact_str(struct ndb_builder *builder,
-					      const char *str, int len,
-					      union packed_str *pstr)
-{
-	if (len == 0) {
-		*pstr = ndb_char_to_packed_str(0);
-		return 1;
-	} else if (len == 1) {
-		*pstr = ndb_char_to_packed_str(str[0]);
-		return 1;
-	} else if (len == 2) {
-		*pstr = ndb_chars_to_packed_str(str[0], str[1]);
-		return 1;
-	}
-
-	return 0;
-}
 
 
 static inline int ndb_json_parser_init(struct ndb_json_parser *p,
@@ -146,7 +128,7 @@ struct ndb_note * ndb_builder_note(struct ndb_builder *builder)
 /// builder phase just for this purpose.
 static inline int ndb_builder_find_str(struct ndb_builder *builder,
 				       const char *str, int len,
-				       union packed_str *pstr)
+				       union ndb_packed_str *pstr)
 {
 	// find existing matching string to avoid duplicate strings
 	int indices = cursor_count(&builder->str_indices, sizeof(uint32_t));
@@ -165,7 +147,7 @@ static inline int ndb_builder_find_str(struct ndb_builder *builder,
 }
 
 static int ndb_builder_push_str(struct ndb_builder *builder, const char *str,
-				int len, union packed_str *pstr)
+				int len, union ndb_packed_str *pstr)
 {
 	uint32_t loc;
 
@@ -185,9 +167,52 @@ static int ndb_builder_push_str(struct ndb_builder *builder, const char *str,
 	return 1;
 }
 
+static int ndb_builder_push_packed_id(struct ndb_builder *builder,
+				      unsigned char *id,
+				      union ndb_packed_str *pstr)
+{
+	if (ndb_builder_find_str(builder, (const char*)id, 32, pstr)) {
+		pstr->packed.flag = NDB_PACKED_ID;
+		return 1;
+	}
+
+	if (ndb_builder_push_str(builder, (const char*)id, 32, pstr)) {
+		pstr->packed.flag = NDB_PACKED_ID;
+		return 1;
+	}
+
+	return 0;
+}
+
+
+/// Check for small strings to pack
+static inline int ndb_builder_try_compact_str(struct ndb_builder *builder,
+					      const char *str, int len,
+					      union ndb_packed_str *pstr,
+					      int pack_ids)
+{
+	unsigned char id_buf[32];
+
+	if (len == 0) {
+		*pstr = ndb_char_to_packed_str(0);
+		return 1;
+	} else if (len == 1) {
+		*pstr = ndb_char_to_packed_str(str[0]);
+		return 1;
+	} else if (len == 2) {
+		*pstr = ndb_chars_to_packed_str(str[0], str[1]);
+		return 1;
+	} else if (pack_ids && len == 64 && hex_decode(str, 64, id_buf, 32)) {
+		return ndb_builder_push_packed_id(builder, id_buf, pstr);
+	}
+
+	return 0;
+}
+
+
 static int ndb_builder_push_unpacked_str(struct ndb_builder *builder,
 					 const char *str, int len,
-					 union packed_str *pstr)
+					 union ndb_packed_str *pstr)
 {
 	if (ndb_builder_find_str(builder, str, len, pstr))
 		return 1;
@@ -196,9 +221,9 @@ static int ndb_builder_push_unpacked_str(struct ndb_builder *builder,
 }
 
 int ndb_builder_make_str(struct ndb_builder *builder, const char *str, int len,
-			 union packed_str *pstr)
+			 union ndb_packed_str *pstr, int pack_ids)
 {
-	if (ndb_builder_try_compact_str(builder, str, len, pstr))
+	if (ndb_builder_try_compact_str(builder, str, len, pstr, pack_ids))
 		return 1;
 
 	return ndb_builder_push_unpacked_str(builder, str, len, pstr);
@@ -207,8 +232,10 @@ int ndb_builder_make_str(struct ndb_builder *builder, const char *str, int len,
 int ndb_builder_set_content(struct ndb_builder *builder, const char *content,
 			    int len)
 {
+	int pack_ids = 0;
 	builder->note->content_length = len;
-	return ndb_builder_make_str(builder, content, len, &builder->note->content);
+	return ndb_builder_make_str(builder, content, len,
+				    &builder->note->content, pack_ids);
 }
 
 
@@ -228,7 +255,7 @@ static inline int toksize(jsmntok_t *tok)
 }
 
 static int ndb_builder_finalize_tag(struct ndb_builder *builder,
-				    union packed_str offset)
+				    union ndb_packed_str offset)
 {
 	if (!cursor_push_u32(&builder->note_cur, offset.offset))
 		return 0;
@@ -239,8 +266,8 @@ static int ndb_builder_finalize_tag(struct ndb_builder *builder,
 /// Unescape and push json strings
 static int ndb_builder_make_json_str(struct ndb_builder *builder,
 				     const char *str, int len,
-				     union packed_str *pstr,
-				     int *written)
+				     union ndb_packed_str *pstr,
+				     int *written, int pack_ids)
 {
 	// let's not care about de-duping these. we should just unescape
 	// in-place directly into the strings table. 
@@ -249,7 +276,7 @@ static int ndb_builder_make_json_str(struct ndb_builder *builder,
 	unsigned char *builder_start;
 
 	// always try compact strings first
-	if (ndb_builder_try_compact_str(builder, str, len, pstr))
+	if (ndb_builder_try_compact_str(builder, str, len, pstr, pack_ids))
 		return 1;
 
 	end = str + len;
@@ -327,8 +354,9 @@ static int ndb_builder_make_json_str(struct ndb_builder *builder,
 static int ndb_builder_push_json_tag(struct ndb_builder *builder,
 				     const char *str, int len)
 {
-	union packed_str pstr;
-	if (!ndb_builder_make_json_str(builder, str, len, &pstr, NULL))
+	union ndb_packed_str pstr;
+	int pack_ids = 1;
+	if (!ndb_builder_make_json_str(builder, str, len, &pstr, NULL, pack_ids))
 		return 0;
 	return ndb_builder_finalize_tag(builder, pstr);
 }
@@ -474,13 +502,13 @@ int ndb_note_from_json(const char *json, int len, struct ndb_note **note,
 			} else if (jsoneq(json, tok, tok_len, "content")) {
 				// content
 				tok = &parser.toks[i+1];
-				union packed_str pstr;
+				union ndb_packed_str pstr;
 				tok_len = toksize(tok);
-				int written;
+				int written, pack_ids = 0;
 				if (!ndb_builder_make_json_str(&parser.builder,
 							json + tok->start,
 							tok_len, &pstr,
-							&written)) {
+							&written, pack_ids)) {
 					return 0;
 				}
 				parser.builder.note->content_length = written;
@@ -531,8 +559,9 @@ int ndb_builder_new_tag(struct ndb_builder *builder)
 inline int ndb_builder_push_tag_str(struct ndb_builder *builder,
 				    const char *str, int len)
 {
-	union packed_str pstr;
-	if (!ndb_builder_make_str(builder, str, len, &pstr))
+	union ndb_packed_str pstr;
+	int pack_ids = 1;
+	if (!ndb_builder_make_str(builder, str, len, &pstr, pack_ids))
 		return 0;
 	return ndb_builder_finalize_tag(builder, pstr);
 }
diff --git a/nostrdb/nostrdb.h b/nostrdb/nostrdb.h
@@ -4,25 +4,36 @@
 #include <inttypes.h>
 #include "cursor.h"
 
+struct ndb_str {
+	unsigned char flag;
+	union {
+		const char *str;
+		unsigned char *id;
+	};
+};
+
 // these must be byte-aligned, they are directly accessing the serialized data
 // representation
 #pragma pack(push, 1)
 
-union packed_str {
-	uint32_t offset;
+/// We can store byte data in the string table, so 
+#define NDB_PACKED_STR     0x1
+#define NDB_PACKED_ID      0x2
 
+union ndb_packed_str {
 	struct {
 		char str[3];
 		// we assume little endian everywhere. sorry not sorry.
-		unsigned char flag;
+		unsigned char flag; // NDB_PACKED_STR, etc
 	} packed;
 
+	uint32_t offset;
 	unsigned char bytes[4];
 };
 
 struct ndb_tag {
 	uint16_t count;
-	union packed_str strs[0];
+	union ndb_packed_str strs[0];
 };
 
 struct ndb_tags {
@@ -41,9 +52,8 @@ struct ndb_note {
 	uint32_t created_at;
 	uint32_t kind;
 	uint32_t content_length;
-	union packed_str content;
+	union ndb_packed_str content;
 	uint32_t strings;
-	uint32_t json;
 
 	// nothing can come after tags since it contains variadic data
 	struct ndb_tags tags;
@@ -80,40 +90,40 @@ int ndb_builder_new_tag(struct ndb_builder *builder);
 int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len);
 // BYE BUILDER
 
-static inline int ndb_str_is_packed(union packed_str str)
+static inline struct ndb_str ndb_note_str(struct ndb_note *note,
+					  union ndb_packed_str *pstr)
 {
-	return (str.offset >> 31) & 0x1;
-}
+	struct ndb_str str;
+	str.flag = pstr->packed.flag;
 
+	if (str.flag == NDB_PACKED_STR) {
+		str.str = pstr->packed.str;
+		return str;
+	}
 
-static inline const char * ndb_note_str(struct ndb_note *note,
-					union packed_str *str)
-{
-	if (ndb_str_is_packed(*str))
-		return str->packed.str;
-
-	return ((const char *)note) + note->strings + str->offset;
+	str.str = ((const char *)note) + note->strings + (pstr->offset & 0xFFFFFF);
+	return str;
 }
 
-static inline const char * ndb_tag_str(struct ndb_note *note,
-				       struct ndb_tag *tag, int ind)
+static inline struct ndb_str ndb_tag_str(struct ndb_note *note,
+					 struct ndb_tag *tag, int ind)
 {
 	return ndb_note_str(note, &tag->strs[ind]);
 }
 
-static int ndb_tag_matches_char(struct ndb_note *note,
+static inline int ndb_tag_matches_char(struct ndb_note *note,
 				       struct ndb_tag *tag, int ind, char c)
 {
-	const char *str = ndb_tag_str(note, tag, ind);
-	if (str[0] == '\0')
+	struct ndb_str str = ndb_tag_str(note, tag, ind);
+	if (str.str[0] == '\0')
 		return 0;
-	else if (str[0] == c)
+	else if (str.str[0] == c)
 		return 1;
 	return 0;
 }
 
-static inline const char * ndb_iter_tag_str(struct ndb_iterator *iter,
-					    int ind)
+static inline struct ndb_str ndb_iter_tag_str(struct ndb_iterator *iter,
+					      int ind)
 {
 	return ndb_tag_str(iter->note, iter->tag, ind);
 }
@@ -143,9 +153,9 @@ static inline uint32_t ndb_note_kind(struct ndb_note *note)
 	return note->kind;
 }
 
-static inline const char * ndb_note_content(struct ndb_note *note)
+static inline const char *ndb_note_content(struct ndb_note *note)
 {
-	return ndb_note_str(note, &note->content);
+	return ndb_note_str(note, &note->content).str;
 }
 
 static inline uint32_t ndb_note_content_length(struct ndb_note *note)
@@ -161,55 +171,50 @@ static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes)
 	return note;
 }
 
-static inline union packed_str ndb_offset_str(uint32_t offset)
+static inline union ndb_packed_str ndb_offset_str(uint32_t offset)
 {
 	// ensure accidents like -1 don't corrupt our packed_str
-	union packed_str str;
-	str.offset = offset & 0x7FFFFFFF;
+	union ndb_packed_str str;
+	// most significant byte is reserved for ndb_packtype
+	str.offset = offset & 0xFFFFFF;
 	return str;
 }
 
-static inline union packed_str ndb_char_to_packed_str(char c)
+static inline union ndb_packed_str ndb_char_to_packed_str(char c)
 {
-	union packed_str str;
-	str.packed.flag = 0xFF;
+	union ndb_packed_str str;
+	str.packed.flag = NDB_PACKED_STR;
 	str.packed.str[0] = c;
 	str.packed.str[1] = '\0';
 	return str;
 }
 
-static inline union packed_str ndb_chars_to_packed_str(char c1, char c2)
+static inline union ndb_packed_str ndb_chars_to_packed_str(char c1, char c2)
 {
-	union packed_str str;
-	str.packed.flag = 0xFF;
+	union ndb_packed_str str;
+	str.packed.flag = NDB_PACKED_STR;
 	str.packed.str[0] = c1;
 	str.packed.str[1] = c2;
 	str.packed.str[2] = '\0';
 	return str;
 }
 
-static inline const char * ndb_note_tag_index(struct ndb_note *note,
-					      struct ndb_tag *tag, int index)
-{
-	if (index >= tag->count) {
-		return 0;
-	}
-
-	return ndb_note_str(note, &tag->strs[index]);
-}
-
-static inline int ndb_tags_iterate_start(struct ndb_note *note,
-					 struct ndb_iterator *iter)
+static inline void ndb_tags_iterate_start(struct ndb_note *note,
+					  struct ndb_iterator *iter)
 {
 	iter->note = note;
-	iter->tag = note->tags.tag;
-	iter->index = 0;
-
-	return note->tags.count != 0 && iter->tag->count != 0;
+	iter->tag = NULL;
+	iter->index = -1;
 }
 
 static inline int ndb_tags_iterate_next(struct ndb_iterator *iter)
 {
+	if (iter->tag == NULL || iter->index == -1) {
+		iter->tag = iter->note->tags.tag;
+		iter->index = 0;
+		return iter->note->tags.count != 0;
+	}
+
 	struct ndb_tags *tags = &iter->note->tags;
 
 	if (++iter->index < tags->count) {

	damus nostr ios client
	git clone git://jb55.com/damus
	Log \| Files \| Refs \| README \| LICENSE

M	damus.xcodeproj/project.pbxproj	\|	4	++++
M	damus/Nostr/NostrEvent.swift	\|	26	--------------------------
A	damus/Nostr/ReferencedId.swift	\|	69	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	nostrdb/NdbNote.swift	\|	103	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
M	nostrdb/NdbTagElem.swift	\|	64	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
M	nostrdb/NdbTagIterator.swift	\|	19	+------------------
M	nostrdb/NdbTagsIterator.swift	\|	21	+++++++++++----------
M	nostrdb/Test/NdbTests.swift	\|	59	++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
M	nostrdb/nostrdb.c	\|	99	+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
M	nostrdb/nostrdb.h	\|	107	+++++++++++++++++++++++++++++++++++++++++--------------------------------------