commit dc7826c4e59dfd50901ac2aad5101e8c994b2c90
parent 4eee715bcddcbc9cc34dea40a12a23a69302a17a
Author: William Casarin <jb55@jb55.com>
Date: Fri, 21 Jul 2023 14:55:23 -0700
c: add nostrdb c lib
Diffstat:
4 files changed, 558 insertions(+), 0 deletions(-)
diff --git a/damus-c/damus-Bridging-Header.h b/damus-c/damus-Bridging-Header.h
@@ -8,4 +8,5 @@
#include "nostr_bech32.h"
#include "wasm.h"
#include "nostrscript.h"
+#include "nostrdb.h"
diff --git a/damus-c/nostrdb.c b/damus-c/nostrdb.c
@@ -0,0 +1,344 @@
+
+#include "nostrdb.h"
+#include "jsmn.h"
+#include "hex.h"
+#include "cursor.h"
+#include <stdlib.h>
+
+struct ndb_json_parser {
+ const char *json;
+ int json_len;
+ struct ndb_builder builder;
+ jsmn_parser json_parser;
+ jsmntok_t *toks, *toks_end;
+ int num_tokens;
+};
+
+static inline int cursor_push_tag(struct cursor *cur, struct ndb_tag *tag)
+{
+ return cursor_push_u16(cur, tag->count);
+}
+
+int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf,
+ int bufsize)
+{
+ struct ndb_note *note;
+ struct cursor mem;
+ int half, size, str_indices_size;
+
+ // come on bruh
+ if (bufsize < sizeof(struct ndb_note) * 2)
+ return 0;
+
+ str_indices_size = bufsize / 32;
+ size = bufsize - str_indices_size;
+ half = size / 2;
+
+ //debug("size %d half %d str_indices %d\n", size, half, str_indices_size);
+
+ // make a safe cursor of our available memory
+ make_cursor(buf, buf + bufsize, &mem);
+
+ note = builder->note = (struct ndb_note *)buf;
+
+ // take slices of the memory into subcursors
+ if (!(cursor_slice(&mem, &builder->note_cur, half) &&
+ cursor_slice(&mem, &builder->strings, half) &&
+ cursor_slice(&mem, &builder->str_indices, str_indices_size))) {
+ return 0;
+ }
+
+ memset(note, 0, sizeof(*note));
+ builder->note_cur.p += sizeof(*note);
+
+ note->version = 1;
+
+ return 1;
+}
+
+static inline int ndb_json_parser_init(struct ndb_json_parser *p,
+ const char *json, int json_len,
+ unsigned char *buf, int bufsize)
+{
+ int half = bufsize / 2;
+
+ unsigned char *tok_start = buf + half;
+ unsigned char *tok_end = buf + bufsize;
+
+ p->toks = (jsmntok_t*)tok_start;
+ p->toks_end = (jsmntok_t*)tok_end;
+ p->num_tokens = 0;
+ p->json = json;
+ p->json_len = json_len;
+
+ // ndb_builder gets the first half of the buffer, and jsmn gets the
+ // second half. I like this way of alloating memory (without actually
+ // dynamically allocating memory). You get one big chunk upfront and
+ // then submodules can recursively subdivide it. Maybe you could do
+ // something even more clever like golden-ratio style subdivision where
+ // the more important stuff gets a larger chunk and then it spirals
+ // downward into smaller chunks. Thanks for coming to my TED talk.
+
+ if (!ndb_builder_new(&p->builder, buf, half))
+ return 0;
+
+ jsmn_init(&p->json_parser);
+
+ return 1;
+}
+
+static inline int ndb_json_parser_parse(struct ndb_json_parser *p)
+{
+ int cap = ((unsigned char *)p->toks_end - (unsigned char*)p->toks)/sizeof(*p->toks);
+ p->num_tokens =
+ jsmn_parse(&p->json_parser, p->json, p->json_len, p->toks, cap);
+
+ return p->num_tokens;
+}
+
+int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note)
+{
+ int strings_len = builder->strings.p - builder->strings.start;
+ unsigned char *end = builder->note_cur.p + strings_len;
+ int total_size = end - builder->note_cur.start;
+
+ // move the strings buffer next to the end of our ndb_note
+ memmove(builder->note_cur.p, builder->strings.start, strings_len);
+
+ // set the strings location
+ builder->note->strings = builder->note_cur.p - builder->note_cur.start;
+
+ // record the total size
+ //builder->note->size = total_size;
+
+ *note = builder->note;
+
+ return total_size;
+}
+
+struct ndb_note * ndb_builder_note(struct ndb_builder *builder)
+{
+ return builder->note;
+}
+
+int ndb_builder_make_string(struct ndb_builder *builder, const char *str,
+ int len, union packed_str *pstr)
+{
+ uint32_t loc;
+
+ if (len == 0) {
+ *pstr = ndb_char_to_packed_str(0);
+ return 1;
+ } else if (len == 1) {
+ *pstr = ndb_char_to_packed_str(str[0]);
+ return 1;
+ } else if (len == 2) {
+ *pstr = ndb_chars_to_packed_str(str[0], str[1]);
+ return 1;
+ }
+
+ // find existing matching string to avoid duplicate strings
+ int indices = cursor_count(&builder->str_indices, sizeof(uint32_t));
+ for (int i = 0; i < indices; i++) {
+ uint32_t index = ((uint32_t*)builder->str_indices.start)[i];
+ const char *some_str = (const char*)builder->strings.start + index;
+
+ if (!strcmp(some_str, str)) {
+ // found an existing matching str, use that index
+ *pstr = ndb_offset_str(index);
+ return 1;
+ }
+ }
+
+ // no string found, push a new one
+ loc = builder->strings.p - builder->strings.start;
+ if (!(cursor_push(&builder->strings, (unsigned char*)str, len) &&
+ cursor_push_byte(&builder->strings, '\0'))) {
+ return 0;
+ }
+ *pstr = ndb_offset_str(loc);
+
+ // record in builder indices. ignore return value, if we can't cache it
+ // then whatever
+ cursor_push_u32(&builder->str_indices, loc);
+
+ return 1;
+}
+
+int ndb_builder_set_content(struct ndb_builder *builder, const char *content,
+ int len)
+{
+ return ndb_builder_make_string(builder, content, len, &builder->note->content);
+}
+
+
+static inline int jsoneq(const char *json, jsmntok_t *tok, int tok_len,
+ const char *s)
+{
+ if (tok->type == JSMN_STRING && (int)strlen(s) == tok_len &&
+ memcmp(json + tok->start, s, tok_len) == 0) {
+ return 1;
+ }
+ return 0;
+}
+
+static inline int toksize(jsmntok_t *tok)
+{
+ return tok->end - tok->start;
+}
+
+// Push a json array into an ndb tag ["p", "abcd..."] -> struct ndb_tag
+static inline int ndb_builder_tag_from_json_array(struct ndb_json_parser *p,
+ jsmntok_t *array)
+{
+ jsmntok_t *str_tok;
+ const char *str;
+
+ if (array->size == 0)
+ return 0;
+
+ if (!ndb_builder_new_tag(&p->builder))
+ return 0;
+
+ for (int i = 0; i < array->size; i++) {
+ str_tok = &array[i+1];
+ str = p->json + str_tok->start;
+
+ if (!ndb_builder_push_tag_str(&p->builder, str, toksize(str_tok)))
+ return 0;
+ }
+
+ return 1;
+}
+
+// Push json tags into ndb data
+// [["t", "hashtag"], ["p", "abcde..."]] -> struct ndb_tags
+static inline int ndb_builder_process_json_tags(struct ndb_json_parser *p,
+ jsmntok_t *array)
+{
+ jsmntok_t *tag = array;
+
+ if (array->size == 0)
+ return 1;
+
+ for (int i = 0; i < array->size; i++) {
+ if (!ndb_builder_tag_from_json_array(p, &tag[i+1]))
+ return 0;
+ tag += tag[i+1].size;
+ }
+
+ return 1;
+}
+
+
+int ndb_note_from_json(const char *json, int len, struct ndb_note **note,
+ unsigned char *buf, int bufsize)
+{
+ jsmntok_t *tok = NULL;
+ unsigned char hexbuf[64];
+
+ int i, tok_len, res;
+ const char *start;
+ struct ndb_json_parser parser;
+
+ ndb_json_parser_init(&parser, json, len, buf, bufsize);
+ res = ndb_json_parser_parse(&parser);
+ if (res < 0)
+ return res;
+
+ if (parser.num_tokens < 1 || parser.toks[0].type != JSMN_OBJECT)
+ return 0;
+
+ for (i = 1; i < parser.num_tokens; i++) {
+ tok = &parser.toks[i];
+ start = json + tok->start;
+ tok_len = toksize(tok);
+
+ //printf("toplevel %.*s %d\n", tok_len, json + tok->start, tok->type);
+ if (tok_len == 0 || i + 1 >= parser.num_tokens)
+ continue;
+
+ if (start[0] == 'p' && jsoneq(json, tok, tok_len, "pubkey")) {
+ // pubkey
+ tok = &parser.toks[i+1];
+ hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf));
+ ndb_builder_set_pubkey(&parser.builder, hexbuf);
+ } else if (tok_len == 2 && start[0] == 'i' && start[1] == 'd') {
+ // id
+ tok = &parser.toks[i+1];
+ hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf));
+ // TODO: validate id
+ ndb_builder_set_id(&parser.builder, hexbuf);
+ } else if (tok_len == 3 && start[0] == 's' && start[1] == 'i' && start[2] == 'g') {
+ // sig
+ tok = &parser.toks[i+1];
+ hex_decode(json + tok->start, toksize(tok), hexbuf, sizeof(hexbuf));
+ ndb_builder_set_signature(&parser.builder, hexbuf);
+ } else if (start[0] == 'k' && jsoneq(json, tok, tok_len, "kind")) {
+ // kind
+ tok = &parser.toks[i+1];
+ printf("json_kind %.*s\n", toksize(tok), json + tok->start);
+ } else if (start[0] == 'c') {
+ if (jsoneq(json, tok, tok_len, "created_at")) {
+ // created_at
+ tok = &parser.toks[i+1];
+ printf("json_created_at %.*s\n", toksize(tok), json + tok->start);
+ } else if (jsoneq(json, tok, tok_len, "content")) {
+ // content
+ tok = &parser.toks[i+1];
+ if (!ndb_builder_set_content(&parser.builder, json + tok->start, toksize(tok)))
+ return 0;
+ }
+ } else if (start[0] == 't' && jsoneq(json, tok, tok_len, "tags")) {
+ tok = &parser.toks[i+1];
+ ndb_builder_process_json_tags(&parser, tok);
+ i += tok->size;
+ }
+ }
+
+ return ndb_builder_finalize(&parser.builder, note);
+}
+
+void ndb_builder_set_pubkey(struct ndb_builder *builder, unsigned char *pubkey)
+{
+ memcpy(builder->note->pubkey, pubkey, 32);
+}
+
+void ndb_builder_set_id(struct ndb_builder *builder, unsigned char *id)
+{
+ memcpy(builder->note->id, id, 32);
+}
+
+void ndb_builder_set_signature(struct ndb_builder *builder,
+ unsigned char *signature)
+{
+ memcpy(builder->note->signature, signature, 64);
+}
+
+void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind)
+{
+ builder->note->kind = kind;
+}
+
+int ndb_builder_new_tag(struct ndb_builder *builder)
+{
+ builder->note->tags.count++;
+ struct ndb_tag tag = {0};
+ builder->current_tag = (struct ndb_tag *)builder->note_cur.p;
+ return cursor_push_tag(&builder->note_cur, &tag);
+}
+
+/// Push an element to the current tag
+///
+/// Basic idea is to call ndb_builder_new_tag
+inline int ndb_builder_push_tag_str(struct ndb_builder *builder,
+ const char *str, int len)
+{
+ union packed_str pstr;
+ if (!ndb_builder_make_string(builder, str, len, &pstr))
+ return 0;
+ if (!cursor_push_u32(&builder->note_cur, pstr.offset))
+ return 0;
+ builder->current_tag->count++;
+ return 1;
+}
diff --git a/damus-c/nostrdb.h b/damus-c/nostrdb.h
@@ -0,0 +1,212 @@
+#ifndef NOSTRDB_H
+#define NOSTRDB_H
+
+#include <inttypes.h>
+#include "cursor.h"
+
+// these must be byte-aligned, they are directly accessing the serialized data
+// representation
+#pragma pack(push, 1)
+
+union packed_str {
+ uint32_t offset;
+
+ struct {
+ char str[3];
+ // we assume little endian everywhere. sorry not sorry.
+ unsigned char flag;
+ } packed;
+
+ unsigned char bytes[4];
+};
+
+struct ndb_tag {
+ uint16_t count;
+ union packed_str strs[0];
+};
+
+struct ndb_tags {
+ uint16_t count;
+ struct ndb_tag tag[0];
+};
+
+// v1
+struct ndb_note {
+ unsigned char version; // v=1
+ unsigned char padding[3]; // keep things aligned
+ unsigned char id[32];
+ unsigned char pubkey[32];
+ unsigned char signature[64];
+
+ uint32_t created_at;
+ uint32_t kind;
+ union packed_str content;
+ uint32_t strings;
+ uint32_t json;
+
+ // nothing can come after tags since it contains variadic data
+ struct ndb_tags tags;
+};
+
+#pragma pack(pop)
+
+struct ndb_builder {
+ struct cursor note_cur;
+ struct cursor strings;
+ struct cursor str_indices;
+ struct ndb_note *note;
+ struct ndb_tag *current_tag;
+};
+
+struct ndb_iterator {
+ struct ndb_note *note;
+ struct ndb_tag *tag;
+
+ // current outer index
+ int index;
+};
+
+// HI BUILDER
+int ndb_note_from_json(const char *json, int len, struct ndb_note **, unsigned char *buf, int buflen);
+int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, int bufsize);
+int ndb_builder_finalize(struct ndb_builder *builder, struct ndb_note **note);
+int ndb_builder_set_content(struct ndb_builder *builder, const char *content, int len);
+void ndb_builder_set_signature(struct ndb_builder *builder, unsigned char *signature);
+void ndb_builder_set_pubkey(struct ndb_builder *builder, unsigned char *pubkey);
+void ndb_builder_set_id(struct ndb_builder *builder, unsigned char *id);
+void ndb_builder_set_kind(struct ndb_builder *builder, uint32_t kind);
+int ndb_builder_new_tag(struct ndb_builder *builder);
+int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len);
+// BYE BUILDER
+
+static inline int ndb_str_is_packed(union packed_str str)
+{
+ return (str.offset >> 31) & 0x1;
+}
+
+static inline const char * ndb_note_str(struct ndb_note *note,
+ union packed_str *str)
+{
+ if (ndb_str_is_packed(*str))
+ return str->packed.str;
+
+ return ((const char *)note) + note->strings + str->offset;
+}
+
+static inline const char * ndb_tag_str(struct ndb_note *note,
+ struct ndb_tag *tag, int ind)
+{
+ return ndb_note_str(note, &tag->strs[ind]);
+}
+
+static inline int ndb_tag_matches_char(struct ndb_note *note,
+ struct ndb_tag *tag, int ind, char c)
+{
+ const char *str = ndb_tag_str(note, tag, ind);
+ if (str[0] == '\0')
+ return 0;
+ else if (str[0] == c)
+ return 1;
+ return 0;
+}
+
+static inline const char * ndb_iter_tag_str(struct ndb_iterator *iter,
+ int ind)
+{
+ return ndb_tag_str(iter->note, iter->tag, ind);
+}
+
+static inline unsigned char * ndb_note_id(struct ndb_note *note)
+{
+ return note->id;
+}
+
+static inline unsigned char * ndb_note_pubkey(struct ndb_note *note)
+{
+ return note->pubkey;
+}
+
+static inline unsigned char * ndb_note_signature(struct ndb_note *note)
+{
+ return note->signature;
+}
+
+static inline uint32_t ndb_note_created_at(struct ndb_note *note)
+{
+ return note->created_at;
+}
+
+static inline const char * ndb_note_content(struct ndb_note *note)
+{
+ return ndb_note_str(note, ¬e->content);
+}
+
+static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes)
+{
+ struct ndb_note *note = (struct ndb_note *)bytes;
+ if (note->version != 1)
+ return 0;
+ return note;
+}
+
+static inline union packed_str ndb_offset_str(uint32_t offset)
+{
+ // ensure accidents like -1 don't corrupt our packed_str
+ union packed_str str;
+ str.offset = offset & 0x7FFFFFFF;
+ return str;
+}
+
+static inline union packed_str ndb_char_to_packed_str(char c)
+{
+ union packed_str str;
+ str.packed.flag = 0xFF;
+ str.packed.str[0] = c;
+ str.packed.str[1] = '\0';
+ return str;
+}
+
+static inline union packed_str ndb_chars_to_packed_str(char c1, char c2)
+{
+ union packed_str str;
+ str.packed.flag = 0xFF;
+ str.packed.str[0] = c1;
+ str.packed.str[1] = c2;
+ str.packed.str[2] = '\0';
+ return str;
+}
+
+static inline const char * ndb_note_tag_index(struct ndb_note *note,
+ struct ndb_tag *tag, int index)
+{
+ if (index >= tag->count) {
+ return 0;
+ }
+
+ return ndb_note_str(note, &tag->strs[index]);
+}
+
+static inline int ndb_tags_iterate_start(struct ndb_note *note,
+ struct ndb_iterator *iter)
+{
+ iter->note = note;
+ iter->tag = note->tags.tag;
+ iter->index = 0;
+
+ return note->tags.count != 0 && iter->tag->count != 0;
+}
+
+static inline int ndb_tags_iterate_next(struct ndb_iterator *iter)
+{
+ struct ndb_tags *tags = &iter->note->tags;
+
+ if (++iter->index < tags->count) {
+ uint32_t tag_data_size = iter->tag->count * sizeof(iter->tag->strs[0]);
+ iter->tag = (struct ndb_tag *)(iter->tag->strs[0].bytes + tag_data_size);
+ return 1;
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/damus.xcodeproj/project.pbxproj b/damus.xcodeproj/project.pbxproj
@@ -2410,6 +2410,7 @@
ENABLE_USER_SCRIPT_SANDBOXING = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
+ "GCC_PREPROCESSOR_DEFINITIONS[arch=*]" = "";
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;