damus

nostr ios client
git clone git://jb55.com/damus
Log | Files | Refs | README | LICENSE

commit 4da23390f81f1ea8b986c4e49f222e76926df95f
parent c74993366bb3bdddaace853016c15414b8b8e9e6
Author: William Casarin <jb55@jb55.com>
Date:   Sat, 22 Jul 2023 16:57:16 -0700

ndb: update lib

Diffstat:
Mnostrdb/nostrdb.c | 254+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mnostrdb/nostrdb.h | 14+++++++++++++-
2 files changed, 237 insertions(+), 31 deletions(-)

diff --git a/nostrdb/nostrdb.c b/nostrdb/nostrdb.c @@ -4,6 +4,7 @@ #include "hex.h" #include "cursor.h" #include <stdlib.h> +#include <limits.h> struct ndb_json_parser { const char *json; @@ -56,6 +57,26 @@ int ndb_builder_new(struct ndb_builder *builder, unsigned char *buf, return 1; } +/// Check for small strings to pack +static inline int ndb_builder_try_compact_str(struct ndb_builder *builder, + const char *str, int len, + union packed_str *pstr) +{ + if (len == 0) { + *pstr = ndb_char_to_packed_str(0); + return 1; + } else if (len == 1) { + *pstr = ndb_char_to_packed_str(str[0]); + return 1; + } else if (len == 2) { + *pstr = ndb_chars_to_packed_str(str[0], str[1]); + return 1; + } + + return 0; +} + + static inline int ndb_json_parser_init(struct ndb_json_parser *p, const char *json, int json_len, unsigned char *buf, int bufsize) @@ -121,41 +142,40 @@ struct ndb_note * ndb_builder_note(struct ndb_builder *builder) return builder->note; } -int ndb_builder_make_string(struct ndb_builder *builder, const char *str, - int len, union packed_str *pstr) +/// find an existing string via str_indices. these indices only exist in the +/// builder phase just for this purpose. +static inline int ndb_builder_find_str(struct ndb_builder *builder, + const char *str, int len, + union packed_str *pstr) { - uint32_t loc; - - if (len == 0) { - *pstr = ndb_char_to_packed_str(0); - return 1; - } else if (len == 1) { - *pstr = ndb_char_to_packed_str(str[0]); - return 1; - } else if (len == 2) { - *pstr = ndb_chars_to_packed_str(str[0], str[1]); - return 1; - } - // find existing matching string to avoid duplicate strings int indices = cursor_count(&builder->str_indices, sizeof(uint32_t)); for (int i = 0; i < indices; i++) { uint32_t index = ((uint32_t*)builder->str_indices.start)[i]; const char *some_str = (const char*)builder->strings.start + index; - if (!strcmp(some_str, str)) { + if (!strncmp(some_str, str, len)) { // found an existing matching str, use that index *pstr = ndb_offset_str(index); return 1; } } + return 0; +} + +static int ndb_builder_push_str(struct ndb_builder *builder, const char *str, + int len, union packed_str *pstr) +{ + uint32_t loc; + // no string found, push a new one loc = builder->strings.p - builder->strings.start; if (!(cursor_push(&builder->strings, (unsigned char*)str, len) && cursor_push_byte(&builder->strings, '\0'))) { return 0; } + *pstr = ndb_offset_str(loc); // record in builder indices. ignore return value, if we can't cache it @@ -165,10 +185,30 @@ int ndb_builder_make_string(struct ndb_builder *builder, const char *str, return 1; } +static int ndb_builder_push_unpacked_str(struct ndb_builder *builder, + const char *str, int len, + union packed_str *pstr) +{ + if (ndb_builder_find_str(builder, str, len, pstr)) + return 1; + + return ndb_builder_push_str(builder, str, len, pstr); +} + +int ndb_builder_make_str(struct ndb_builder *builder, const char *str, int len, + union packed_str *pstr) +{ + if (ndb_builder_try_compact_str(builder, str, len, pstr)) + return 1; + + return ndb_builder_push_unpacked_str(builder, str, len, pstr); +} + int ndb_builder_set_content(struct ndb_builder *builder, const char *content, int len) { - return ndb_builder_make_string(builder, content, len, &builder->note->content); + builder->note->content_length = len; + return ndb_builder_make_str(builder, content, len, &builder->note->content); } @@ -187,9 +227,115 @@ static inline int toksize(jsmntok_t *tok) return tok->end - tok->start; } +static int ndb_builder_finalize_tag(struct ndb_builder *builder, + union packed_str offset) +{ + if (!cursor_push_u32(&builder->note_cur, offset.offset)) + return 0; + builder->current_tag->count++; + return 1; +} + +/// Unescape and push json strings +static int ndb_builder_make_json_str(struct ndb_builder *builder, + const char *str, int len, + union packed_str *pstr, + int *written) +{ + // let's not care about de-duping these. we should just unescape + // in-place directly into the strings table. + + const char *p, *end, *start; + unsigned char *builder_start; + + // always try compact strings first + if (ndb_builder_try_compact_str(builder, str, len, pstr)) + return 1; + + end = str + len; + start = str; // Initialize start to the beginning of the string + + *pstr = ndb_offset_str(builder->strings.p - builder->strings.start); + builder_start = builder->strings.p; + + for (p = str; p < end; p++) { + if (*p == '\\' && p+1 < end) { + // Push the chunk of unescaped characters before this escape sequence + if (start < p && !cursor_push(&builder->strings, + (unsigned char *)start, + p - start)) { + return 0; + } + + switch (*(p+1)) { + case 't': + if (!cursor_push_byte(&builder->strings, '\t')) + return 0; + break; + case 'n': + if (!cursor_push_byte(&builder->strings, '\n')) + return 0; + break; + case 'r': + if (!cursor_push_byte(&builder->strings, '\r')) + return 0; + break; + case 'b': + if (!cursor_push_byte(&builder->strings, '\b')) + return 0; + break; + case 'f': + if (!cursor_push_byte(&builder->strings, '\f')) + return 0; + break; + case '\\': + if (!cursor_push_byte(&builder->strings, '\\')) + return 0; + break; + case '"': + if (!cursor_push_byte(&builder->strings, '"')) + return 0; + break; + case 'u': + // these aren't handled yet + return 0; + default: + if (!cursor_push_byte(&builder->strings, *p) || + !cursor_push_byte(&builder->strings, *(p+1))) + return 0; + break; + } + + p++; // Skip the character following the backslash + start = p + 1; // Update the start pointer to the next character + } + } + + // Handle the last chunk after the last escape sequence (or if there are no escape sequences at all) + if (start < p && !cursor_push(&builder->strings, (unsigned char *)start, + p - start)) { + return 0; + } + + if (written) + *written = builder->strings.p - builder_start; + + // TODO: dedupe these!? + return cursor_push_byte(&builder->strings, '\0'); +} + +static int ndb_builder_push_json_tag(struct ndb_builder *builder, + const char *str, int len) +{ + union packed_str pstr; + if (!ndb_builder_make_json_str(builder, str, len, &pstr, NULL)) + return 0; + return ndb_builder_finalize_tag(builder, pstr); +} + // Push a json array into an ndb tag ["p", "abcd..."] -> struct ndb_tag -static inline int ndb_builder_tag_from_json_array(struct ndb_json_parser *p, - jsmntok_t *array) +static int ndb_builder_tag_from_json_array(struct ndb_json_parser *p, + jsmntok_t *array) { jsmntok_t *str_tok; const char *str; @@ -204,8 +350,10 @@ static inline int ndb_builder_tag_from_json_array(struct ndb_json_parser *p, str_tok = &array[i+1]; str = p->json + str_tok->start; - if (!ndb_builder_push_tag_str(&p->builder, str, toksize(str_tok))) + if (!ndb_builder_push_json_tag(&p->builder, str, + toksize(str_tok))) { return 0; + } } return 1; @@ -222,11 +370,41 @@ static inline int ndb_builder_process_json_tags(struct ndb_json_parser *p, return 1; for (int i = 0; i < array->size; i++) { - if (!ndb_builder_tag_from_json_array(p, &tag[i+1])) + if (!ndb_builder_tag_from_json_array(p, &tag[i+1])) return 0; - tag += tag[i+1].size; + tag += tag[i+1].size; + } + + return 1; +} + +static int parse_unsigned_int(const char *start, int len, unsigned int *num) +{ + unsigned int number = 0; + const char *p = start, *end = start + len; + int digits = 0; + + while (p < end) { + char c = *p; + + if (c < '0' || c > '9') + break; + + // Check for overflow + char digit = c - '0'; + if (number > (UINT_MAX - digit) / 10) + return 0; // Overflow detected + + number = number * 10 + digit; + + p++; + digits++; } + if (digits == 0) + return 0; + + *num = number; return 1; } @@ -278,17 +456,36 @@ int ndb_note_from_json(const char *json, int len, struct ndb_note **note, } else if (start[0] == 'k' && jsoneq(json, tok, tok_len, "kind")) { // kind tok = &parser.toks[i+1]; - printf("json_kind %.*s\n", toksize(tok), json + tok->start); + start = json + tok->start; + if (tok->type != JSMN_PRIMITIVE || tok_len <= 0) + return 0; + if (!parse_unsigned_int(start, toksize(tok), + &parser.builder.note->kind)) + return 0; } else if (start[0] == 'c') { if (jsoneq(json, tok, tok_len, "created_at")) { // created_at tok = &parser.toks[i+1]; - printf("json_created_at %.*s\n", toksize(tok), json + tok->start); + start = json + tok->start; + if (tok->type != JSMN_PRIMITIVE || tok_len <= 0) + return 0; + if (!parse_unsigned_int(start, toksize(tok), + &parser.builder.note->created_at)) + return 0; } else if (jsoneq(json, tok, tok_len, "content")) { // content tok = &parser.toks[i+1]; - if (!ndb_builder_set_content(&parser.builder, json + tok->start, toksize(tok))) + union packed_str pstr; + tok_len = toksize(tok); + int written; + if (!ndb_builder_make_json_str(&parser.builder, + json + tok->start, + tok_len, &pstr, + &written)) { return 0; + } + parser.builder.note->content_length = written; + parser.builder.note->content = pstr; } } else if (start[0] == 't' && jsoneq(json, tok, tok_len, "tags")) { tok = &parser.toks[i+1]; @@ -336,10 +533,7 @@ inline int ndb_builder_push_tag_str(struct ndb_builder *builder, const char *str, int len) { union packed_str pstr; - if (!ndb_builder_make_string(builder, str, len, &pstr)) + if (!ndb_builder_make_str(builder, str, len, &pstr)) return 0; - if (!cursor_push_u32(&builder->note_cur, pstr.offset)) - return 0; - builder->current_tag->count++; - return 1; + return ndb_builder_finalize_tag(builder, pstr); } diff --git a/nostrdb/nostrdb.h b/nostrdb/nostrdb.h @@ -40,6 +40,7 @@ struct ndb_note { uint32_t created_at; uint32_t kind; + uint32_t content_length; union packed_str content; uint32_t strings; uint32_t json; @@ -84,6 +85,7 @@ static inline int ndb_str_is_packed(union packed_str str) return (str.offset >> 31) & 0x1; } + static inline const char * ndb_note_str(struct ndb_note *note, union packed_str *str) { @@ -99,7 +101,7 @@ static inline const char * ndb_tag_str(struct ndb_note *note, return ndb_note_str(note, &tag->strs[ind]); } -static inline int ndb_tag_matches_char(struct ndb_note *note, +static int ndb_tag_matches_char(struct ndb_note *note, struct ndb_tag *tag, int ind, char c) { const char *str = ndb_tag_str(note, tag, ind); @@ -136,11 +138,21 @@ static inline uint32_t ndb_note_created_at(struct ndb_note *note) return note->created_at; } +static inline uint32_t ndb_note_kind(struct ndb_note *note) +{ + return note->kind; +} + static inline const char * ndb_note_content(struct ndb_note *note) { return ndb_note_str(note, &note->content); } +static inline uint32_t ndb_note_content_length(struct ndb_note *note) +{ + return note->content_length; +} + static inline struct ndb_note * ndb_note_from_bytes(unsigned char *bytes) { struct ndb_note *note = (struct ndb_note *)bytes;