nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

commit 43efda619f814bff0f3ab3bd89a65ca0cb792d79
parent 62ffacbdc88879cb29fb9ed4c9c5d0c1a12eafdf
Author: William Casarin <jb55@jb55.com>
Date:   Fri, 16 Aug 2024 12:17:00 -0700

ndb_filter_from_json

Changelog-Added: Add method for parsing filter json
Signed-off-by: William Casarin <jb55@jb55.com>

Diffstat:
Msrc/nostrdb.c | 292+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/nostrdb.h | 10+++++++++-
Mtest.c | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 376 insertions(+), 5 deletions(-)

diff --git a/src/nostrdb.c b/src/nostrdb.c @@ -726,7 +726,8 @@ static int ndb_filter_start_field_impl(struct ndb_filter *filter, enum ndb_filte for (i = 0; i < filter->num_elements; i++) { el = ndb_filter_get_elements(filter, i); assert(el); - if (el->field.type == field) { + // TODO: fix this tags check to try to find matching tags + if (el->field.type == field && field != NDB_FILTER_TAGS) { fprintf(stderr, "ndb_filter_start_field: field '%s' already exists\n", ndb_filter_field_name(field)); return 0; @@ -796,7 +797,9 @@ static int ndb_filter_add_element(struct ndb_filter *filter, union ndb_filter_el return 0; break; case NDB_ELEMENT_STRING: - if (!cursor_push_c_str(&filter->data_buf, el.string)) + if (!cursor_push(&filter->data_buf, (unsigned char *)el.string.string, el.string.len)) + return 0; + if (!cursor_push_byte(&filter->data_buf, 0)) return 0; break; case NDB_ELEMENT_INT: @@ -840,7 +843,8 @@ static int ndb_filter_set_elem_type(struct ndb_filter *filter, return 1; } -int ndb_filter_add_str_element(struct ndb_filter *filter, const char *str) + +int ndb_filter_add_str_element_len(struct ndb_filter *filter, const char *str, int len) { union ndb_filter_element el; struct ndb_filter_elements *current; @@ -864,10 +868,17 @@ int ndb_filter_add_str_element(struct ndb_filter *filter, const char *str) if (!ndb_filter_set_elem_type(filter, NDB_ELEMENT_STRING)) return 0; - el.string = str; + el.string.string = str; + el.string.len = len; + return ndb_filter_add_element(filter, el); } +int ndb_filter_add_str_element(struct ndb_filter *filter, const char *str) +{ + return ndb_filter_add_str_element_len(filter, str, strlen(str)); +} + int ndb_filter_add_int_element(struct ndb_filter *filter, uint64_t integer) { union ndb_filter_element el; @@ -5460,6 +5471,260 @@ int ndb_ws_event_from_json(const char *json, int len, struct ndb_tce *tce, return 0; } +static enum ndb_filter_fieldtype +ndb_filter_parse_field(const char *tok, int len, char *tagchar) +{ + *tagchar = 0; + + if (len == 0) + return 0; + + if (len == 7 && !strncmp(tok, "authors", 7)) { + return NDB_FILTER_AUTHORS; + } else if (len == 3 && !strncmp(tok, "ids", 3)) { + return NDB_FILTER_IDS; + } else if (len == 5 && !strncmp(tok, "kinds", 5)) { + return NDB_FILTER_KINDS; + } else if (len == 2 && tok[0] == '#') { + *tagchar = tok[1]; + return NDB_FILTER_TAGS; + } else if (len == 5 && !strncmp(tok, "since", 5)) { + return NDB_FILTER_SINCE; + } else if (len == 5 && !strncmp(tok, "until", 5)) { + return NDB_FILTER_UNTIL; + } else if (len == 5 && !strncmp(tok, "limit", 5)) { + return NDB_FILTER_LIMIT; + } + + return 0; +} + +static int ndb_filter_parse_json_ids(struct ndb_json_parser *parser, + struct ndb_filter *filter) +{ + jsmntok_t *tok; + const char *start; + unsigned char hexbuf[32]; + int tok_len, i, size; + + tok = &parser->toks[parser->i++]; + + if (tok->type != JSMN_ARRAY) { + ndb_debug("parse_json_ids: not an array\n"); + return 0; + } + + size = tok->size; + + for (i = 0; i < size; parser->i++, i++) { + tok = &parser->toks[parser->i]; + start = parser->json + tok->start; + tok_len = toksize(tok); + + if (tok->type != JSMN_STRING) { + ndb_debug("parse_json_ids: not a string '%d'\n", tok->type); + return 0; + } + + if (tok_len != 64) { + ndb_debug("parse_json_ids: not len 64: '%.*s'\n", tok_len, start); + return 0; + } + + // id + if (!hex_decode(start, tok_len, hexbuf, sizeof(hexbuf))) { + ndb_debug("parse_json_ids: hex decode failed\n"); + return 0; + } + + ndb_debug("adding id elem\n"); + if (!ndb_filter_add_id_element(filter, hexbuf)) { + ndb_debug("parse_json_ids: failed to add id element\n"); + return 0; + } + } + + parser->i--; + return 1; +} + +static int ndb_filter_parse_json_elems(struct ndb_json_parser *parser, + struct ndb_filter *filter) +{ + jsmntok_t *tok; + const char *start; + int tok_len; + unsigned char hexbuf[32]; + enum ndb_generic_element_type typ; + tok = NULL; + int i, size; + + tok = &parser->toks[parser->i++]; + + if (tok->type != JSMN_ARRAY) + return 0; + + size = tok->size; + + for (i = 0; i < size; i++, parser->i++) { + tok = &parser->toks[parser->i]; + start = parser->json + tok->start; + tok_len = toksize(tok); + + if (tok->type != JSMN_STRING) + return 0; + + if (i == 0) { + if (tok_len == 64 && hex_decode(start, 64, hexbuf, sizeof(hexbuf))) { + typ = NDB_ELEMENT_ID; + if (!ndb_filter_add_id_element(filter, hexbuf)) { + ndb_debug("failed to push id elem\n"); + return 0; + } + } else { + typ = NDB_ELEMENT_STRING; + if (!ndb_filter_add_str_element_len(filter, start, tok_len)) + return 0; + } + } else if (typ == NDB_ELEMENT_ID) { + if (!hex_decode(start, 64, hexbuf, sizeof(hexbuf))) + return 0; + if (!ndb_filter_add_id_element(filter, hexbuf)) + return 0; + } else if (typ == NDB_ELEMENT_STRING) { + if (!ndb_filter_add_str_element_len(filter, start, tok_len)) + return 0; + } else { + // ??? + return 0; + } + } + + parser->i--; + return 1; +} + +static int ndb_filter_parse_json_int(struct ndb_json_parser *parser, + struct ndb_filter *filter) +{ + jsmntok_t *tok; + const char *start; + int tok_len; + unsigned int value; + + tok = &parser->toks[parser->i]; + start = parser->json + tok->start; + tok_len = toksize(tok); + + if (tok->type != JSMN_PRIMITIVE) + return 0; + + if (!parse_unsigned_int(start, tok_len, &value)) + return 0; + + if (!ndb_filter_add_int_element(filter, (uint64_t)value)) + return 0; + + ndb_debug("added int elem %d\n", value); + + return 1; +} + + +static int ndb_filter_parse_json_ints(struct ndb_json_parser *parser, + struct ndb_filter *filter) +{ + jsmntok_t *tok; + int size, i; + + tok = &parser->toks[parser->i++]; + + if (tok->type != JSMN_ARRAY) + return 0; + + size = tok->size; + + for (i = 0; i < size; parser->i++, i++) { + if (!ndb_filter_parse_json_int(parser, filter)) + return 0; + } + + parser->i--; + return 1; +} + + +static int ndb_filter_parse_json(struct ndb_json_parser *parser, + struct ndb_filter *filter) +{ + jsmntok_t *tok = NULL; + const char *json = parser->json; + const char *start; + char tag; + int tok_len; + enum ndb_filter_fieldtype field; + + if (parser->toks[parser->i++].type != JSMN_OBJECT) + return 0; + + for (; parser->i < parser->num_tokens; parser->i++) { + tok = &parser->toks[parser->i++]; + start = json + tok->start; + tok_len = toksize(tok); + + if (!(field = ndb_filter_parse_field(start, tok_len, &tag))) { + ndb_debug("failed field '%.*s'\n", tok_len, start); + continue; + } + + if (tag) { + ndb_debug("starting tag field '%c'\n", tag); + if (!ndb_filter_start_tag_field(filter, tag)) { + ndb_debug("failed to start tag field '%c'\n", tag); + return 0; + } + } else if (!ndb_filter_start_field(filter, field)) { + ndb_debug("field already started\n"); + return 0; + } + + // we parsed a top-level field + switch(field) { + case NDB_FILTER_AUTHORS: + case NDB_FILTER_IDS: + if (!ndb_filter_parse_json_ids(parser, filter)) { + ndb_debug("failed to parse filter ids/authors\n"); + return 0; + } + break; + case NDB_FILTER_SINCE: + case NDB_FILTER_UNTIL: + case NDB_FILTER_LIMIT: + if (!ndb_filter_parse_json_int(parser, filter)) { + ndb_debug("failed to parse filter since/until/limit\n"); + return 0; + } + break; + case NDB_FILTER_KINDS: + if (!ndb_filter_parse_json_ints(parser, filter)) { + ndb_debug("failed to parse filter kinds\n"); + return 0; + } + break; + case NDB_FILTER_TAGS: + if (!ndb_filter_parse_json_elems(parser, filter)) { + ndb_debug("failed to parse filter tags\n"); + return 0; + } + break; + } + + ndb_filter_end_field(filter); + } + + return ndb_filter_end(filter); +} + int ndb_parse_json_note(struct ndb_json_parser *parser, struct ndb_note **note) { jsmntok_t *tok = NULL; @@ -5557,6 +5822,25 @@ int ndb_parse_json_note(struct ndb_json_parser *parser, struct ndb_note **note) return ndb_builder_finalize(&parser->builder, note, NULL); } +int ndb_filter_from_json(const char *json, int len, struct ndb_filter *filter, + unsigned char *buf, int bufsize) +{ + struct ndb_json_parser parser; + int res; + + if (filter->finalized) + return 0; + + ndb_json_parser_init(&parser, json, len, buf, bufsize); + if ((res = ndb_json_parser_parse(&parser, NULL)) < 0) + return res; + + if (parser.num_tokens < 1) + return 0; + + return ndb_filter_parse_json(&parser, filter); +} + int ndb_note_from_json(const char *json, int len, struct ndb_note **note, unsigned char *buf, int bufsize) { diff --git a/src/nostrdb.h b/src/nostrdb.h @@ -223,8 +223,13 @@ struct ndb_iterator { int index; }; -union ndb_filter_element { +struct ndb_filter_string { const char *string; + int len; +}; + +union ndb_filter_element { + struct ndb_filter_string string; const unsigned char *id; uint64_t integer; }; @@ -488,6 +493,9 @@ int ndb_filter_add_id_element(struct ndb_filter *, const unsigned char *id); int ndb_filter_add_int_element(struct ndb_filter *, uint64_t integer); int ndb_filter_add_str_element(struct ndb_filter *, const char *str); +// filters from json +int ndb_filter_from_json(const char *, int len, struct ndb_filter *filter, unsigned char *buf, int bufsize); + // getting field elements unsigned char *ndb_filter_get_id_element(const struct ndb_filter *, const struct ndb_filter_elements *, int index); const char *ndb_filter_get_string_element(const struct ndb_filter *, const struct ndb_filter_elements *, int index); diff --git a/test.c b/test.c @@ -817,6 +817,84 @@ static void test_content_len() free(buf); } +static void test_parse_filter_json() +{ + int i; + unsigned char buffer[1024]; + unsigned char *pid; + const char *str; + uint64_t val; + struct ndb_filter_elements *elems; + + const unsigned char id_bytes[] = { 0x50, 0x04, 0xa0, 0x81, 0xe3, 0x97, + 0xc6, 0xda, 0x9d, 0xc2, 0xf2, 0xd6, 0xb3, 0x13, 0x40, 0x06, + 0xa9, 0xd0, 0xe8, 0xc1, 0xb4, 0x66, 0x89, 0xd9, 0xfe, 0x15, + 0x0b, 0xb2, 0xf2, 0x1a, 0x20, 0x4d }; + + const unsigned char id2_bytes[] = { 0xb1, 0x69, 0xf5, 0x96, 0x96, 0x89, + 0x17, 0xa1, 0xab, 0xeb, 0x42, 0x34, 0xd3, 0xcf, 0x3a, 0xa9, + 0xba, 0xee, 0x21, 0x12, 0xe5, 0x89, 0x98, 0xd1, 0x7c, 0x6d, + 0xb4, 0x16, 0xad, 0x33, 0xfe, 0x40 }; + +#define HEX_ID "5004a081e397c6da9dc2f2d6b3134006a9d0e8c1b46689d9fe150bb2f21a204d" +#define HEX_PK "b169f596968917a1abeb4234d3cf3aa9baee2112e58998d17c6db416ad33fe40" + + static const char *json = "{\"ids\": [\"" HEX_ID "\", \"" HEX_PK "\"], \"kinds\": [1,2,3], \"limit\": 10, \"#e\":[\"" HEX_PK "\"], \"#t\": [\"hashtag\"]}"; + struct ndb_filter filter, *f = &filter; + ndb_filter_init(f); + + assert(ndb_filter_from_json(json, strlen(json), f, buffer, sizeof(buffer))); + assert(filter.finalized); + + for (i = 0; i < filter.num_elements; i++) { + elems = ndb_filter_get_elements(f, i); + + switch (i) { + case 0: + assert(elems->field.type == NDB_FILTER_IDS); + assert(elems->count == 2); + + pid = ndb_filter_get_id_element(f, elems, 0); + assert(!memcmp(pid, id_bytes, 32)); + pid = ndb_filter_get_id_element(f, elems, 1); + assert(!memcmp(pid, id2_bytes, 32)); + break; + case 1: + assert(elems->field.type == NDB_FILTER_KINDS); + assert(elems->count == 3); + val = ndb_filter_get_int_element(elems, 0); + assert(val == 1); + val = ndb_filter_get_int_element(elems, 1); + assert(val == 2); + val = ndb_filter_get_int_element(elems, 2); + assert(val == 3); + break; + + case 2: + assert(elems->field.type == NDB_FILTER_LIMIT); + val = ndb_filter_get_int_element(elems, 0); + assert(val == 10); + break; + + case 3: + assert(elems->field.type == NDB_FILTER_TAGS); + assert(elems->field.tag == 'e'); + pid = ndb_filter_get_id_element(f, elems, 0); + assert(pid != NULL); + assert(!memcmp(pid, id2_bytes, 32)); + break; + + case 4: + assert(elems->field.type == NDB_FILTER_TAGS); + assert(elems->field.tag == 't'); + str = ndb_filter_get_string_element(f, elems, 0); + assert(!strcmp(str, "hashtag")); + break; + } + } + +} + static void test_parse_json() { char hex_id[32] = {0}; unsigned char buffer[1024]; @@ -1587,6 +1665,7 @@ static void test_weird_note_corruption() { } int main(int argc, const char *argv[]) { + test_parse_filter_json(); test_filter_json(); test_bech32_parsing(); test_single_url_parsing();