commit ded5b85c629fe14652cd837bd9e1ec58938c0725
parent d07ad1c1fafc795842b47e32a15f793a39a9deee
Author: William Casarin <jb55@jb55.com>
Date: Sat, 23 Dec 2023 13:20:52 -0800
varint: switch to 64 bit varints
Diffstat:
6 files changed, 123 insertions(+), 101 deletions(-)
diff --git a/src/cursor.h b/src/cursor.h
@@ -2,10 +2,10 @@
#ifndef JB55_CURSOR_H
#define JB55_CURSOR_H
-#include "typedefs.h"
#include "bolt11/likely.h"
#include <stdio.h>
+#include <inttypes.h>
#include <ctype.h>
#include <assert.h>
#include <string.h>
@@ -32,14 +32,14 @@ static inline void wipe_cursor(struct cursor *cursor)
memset(cursor->start, 0, cursor->end - cursor->start);
}
-static inline void make_cursor(u8 *start, u8 *end, struct cursor *cursor)
+static inline void make_cursor(unsigned char *start, unsigned char *end, struct cursor *cursor)
{
cursor->start = start;
cursor->p = start;
cursor->end = end;
}
-static inline void make_array(struct array *a, u8* start, u8 *end, unsigned int elem_size)
+static inline void make_array(struct array *a, unsigned char* start, unsigned char *end, unsigned int elem_size)
{
make_cursor(start, end, &a->cur);
a->elem_size = elem_size;
@@ -77,7 +77,7 @@ static inline void *cursor_alloc(struct cursor *mem, unsigned long size)
static inline int cursor_slice(struct cursor *mem, struct cursor *slice, size_t size)
{
- u8 *p;
+ unsigned char *p;
if (!(p = cursor_alloc(mem, size))) {
return 0;
}
@@ -103,7 +103,7 @@ static inline int cursor_skip(struct cursor *cursor, int n)
return 1;
}
-static inline int pull_byte(struct cursor *cursor, u8 *c)
+static inline int cursor_pull_byte(struct cursor *cursor, unsigned char *c)
{
if (unlikely(cursor->p >= cursor->end))
return 0;
@@ -114,7 +114,7 @@ static inline int pull_byte(struct cursor *cursor, u8 *c)
return 1;
}
-static inline int parse_byte(struct cursor *cursor, u8 *c)
+static inline int parse_byte(struct cursor *cursor, unsigned char *c)
{
if (unlikely(cursor->p >= cursor->end))
return 0;
@@ -159,7 +159,7 @@ static inline int cursor_pull_c_str(struct cursor *cursor, const char **str)
}
-static inline int cursor_push_byte(struct cursor *cursor, u8 c)
+static inline int cursor_push_byte(struct cursor *cursor, unsigned char c)
{
if (unlikely(cursor->p + 1 > cursor->end)) {
return 0;
@@ -171,7 +171,7 @@ static inline int cursor_push_byte(struct cursor *cursor, u8 c)
return 1;
}
-static inline int cursor_pull(struct cursor *cursor, u8 *data, int len)
+static inline int cursor_pull(struct cursor *cursor, unsigned char *data, int len)
{
if (unlikely(cursor->p + len > cursor->end)) {
return 0;
@@ -241,7 +241,7 @@ static inline unsigned char *cursor_top(struct cursor *cur, int len)
static inline int cursor_top_int(struct cursor *cur, int *i)
{
- u8 *p;
+ unsigned char *p;
if (unlikely(!(p = cursor_top(cur, sizeof(*i))))) {
return 0;
}
@@ -249,7 +249,7 @@ static inline int cursor_top_int(struct cursor *cur, int *i)
return 1;
}
-static inline int cursor_pop(struct cursor *cur, u8 *data, int len)
+static inline int cursor_pop(struct cursor *cur, unsigned char *data, int len)
{
if (unlikely(cur->p - len < cur->start)) {
return 0;
@@ -261,7 +261,7 @@ static inline int cursor_pop(struct cursor *cur, u8 *data, int len)
return 1;
}
-static inline int cursor_push(struct cursor *cursor, u8 *data, int len)
+static inline int cursor_push(struct cursor *cursor, unsigned char *data, int len)
{
if (unlikely(cursor->p + len >= cursor->end)) {
return 0;
@@ -277,7 +277,7 @@ static inline int cursor_push(struct cursor *cursor, u8 *data, int len)
static inline int cursor_push_int(struct cursor *cursor, int i)
{
- return cursor_push(cursor, (u8*)&i, sizeof(i));
+ return cursor_push(cursor, (unsigned char*)&i, sizeof(i));
}
static inline size_t cursor_count(struct cursor *cursor, size_t elem_size)
@@ -285,73 +285,61 @@ static inline size_t cursor_count(struct cursor *cursor, size_t elem_size)
return (cursor->p - cursor->start)/elem_size;
}
-/* TODO: push_varint */
-static inline int push_varint(struct cursor *cursor, int n)
+/* Encodes a 64-bit integer into a variable-length format and pushes it into a cursor.
+ * Returns the number of bytes used or -1 in case of an error. */
+static inline int cursor_push_varint(struct cursor *cursor, uint64_t n)
{
- int ok, len;
- unsigned char b;
- len = 0;
-
- while (1) {
- b = (n & 0xFF) | 0x80;
+ int len = 0;
+ do {
+ unsigned char b = (n & 0x7F) | (n > 0x7F ? 0x80 : 0);
n >>= 7;
- if (n == 0) {
- b &= 0x7F;
- ok = cursor_push_byte(cursor, b);
- len++;
- if (!ok) return 0;
- break;
- }
-
- ok = cursor_push_byte(cursor, b);
+ if (!cursor_push_byte(cursor, b))
+ return -1; // Error handling
len++;
- if (!ok) return 0;
- }
+ } while (n != 0);
return len;
}
-/* TODO: pull_varint */
-static inline int pull_varint(struct cursor *cursor, int *n)
+static inline int cursor_pull_varint(struct cursor *cursor, uint64_t *n)
{
int ok, i;
unsigned char b;
+
*n = 0;
- for (i = 0;; i++) {
- ok = pull_byte(cursor, &b);
+ for (i = 0; i < 10; i++) { // Loop up to 10 bytes for 64-bit
+ ok = cursor_pull_byte(cursor, &b);
if (!ok) return 0;
- *n |= ((int)b & 0x7F) << (i * 7);
+ *n |= ((int64_t)b & 0x7F) << (i * 7);
- /* is_last */
if ((b & 0x80) == 0) {
- return i+1;
+ return i + 1; // Successfully read i+1 bytes
}
-
- if (i == 4) return 0;
}
- return 0;
+ return 10; // Successfully read 10 bytes for a full 64-bit integer
}
+
static inline int cursor_pull_int(struct cursor *cursor, int *i)
{
- return cursor_pull(cursor, (u8*)i, sizeof(*i));
+ return cursor_pull(cursor, (unsigned char*)i, sizeof(*i));
}
static inline int cursor_push_u32(struct cursor *cursor, uint32_t i) {
return cursor_push(cursor, (unsigned char*)&i, sizeof(i));
}
-static inline int cursor_push_u16(struct cursor *cursor, u16 i)
+static inline int cursor_push_u16(struct cursor *cursor, unsigned short i)
{
- return cursor_push(cursor, (u8*)&i, sizeof(i));
+ return cursor_push(cursor, (unsigned char*)&i, sizeof(i));
}
static inline void *index_cursor(struct cursor *cursor, unsigned int index, int elem_size)
{
- u8 *p;
+ unsigned char *p;
p = &cursor->start[elem_size * index];
if (unlikely(p >= cursor->end))
@@ -363,7 +351,7 @@ static inline void *index_cursor(struct cursor *cursor, unsigned int index, int
static inline int push_sized_str(struct cursor *cursor, const char *str, int len)
{
- return cursor_push(cursor, (u8*)str, len);
+ return cursor_push(cursor, (unsigned char*)str, len);
}
static inline int cursor_push_lowercase(struct cursor *cur, const char *str, int len)
@@ -382,7 +370,7 @@ static inline int cursor_push_lowercase(struct cursor *cur, const char *str, int
static inline int cursor_push_str(struct cursor *cursor, const char *str)
{
- return cursor_push(cursor, (u8*)str, (int)strlen(str));
+ return cursor_push(cursor, (unsigned char*)str, (int)strlen(str));
}
static inline int cursor_push_c_str(struct cursor *cursor, const char *str)
@@ -393,30 +381,27 @@ static inline int cursor_push_c_str(struct cursor *cursor, const char *str)
/* TODO: push varint size */
static inline int push_prefixed_str(struct cursor *cursor, const char *str)
{
- int ok, len;
- len = (int)strlen(str);
- ok = push_varint(cursor, len);
- if (!ok) return 0;
+ uint64_t len;
+ len = strlen(str);
+ if (!cursor_push_varint(cursor, len))
+ return 0;
return push_sized_str(cursor, str, len);
}
static inline int pull_prefixed_str(struct cursor *cursor, struct cursor *dest_buf, const char **str)
{
- int len, ok;
-
- ok = pull_varint(cursor, &len);
- if (!ok) return 0;
+ uint64_t len;
- if (unlikely(dest_buf->p + len > dest_buf->end)) {
+ if (!cursor_pull_varint(cursor, &len))
return 0;
- }
- ok = pull_data_into_cursor(cursor, dest_buf, (unsigned char**)str, len);
- if (!ok) return 0;
+ if (unlikely(dest_buf->p + len > dest_buf->end))
+ return 0;
- ok = cursor_push_byte(dest_buf, 0);
+ if (!pull_data_into_cursor(cursor, dest_buf, (unsigned char**)str, len))
+ return 0;
- return 1;
+ return cursor_push_byte(dest_buf, 0);
}
static inline int cursor_remaining_capacity(struct cursor *cursor)
@@ -450,7 +435,7 @@ static inline void cursor_print_around(struct cursor *cur, int range)
}
#undef max
-static inline int pull_bytes(struct cursor *cur, int count, const u8 **bytes) {
+static inline int pull_bytes(struct cursor *cur, int count, const unsigned char **bytes) {
if (cur->p + count > cur->end)
return 0;
@@ -489,13 +474,13 @@ static inline int is_underscore(char c) {
return c == '_';
}
-static inline int is_utf8_byte(u8 c) {
+static inline int is_utf8_byte(unsigned char c) {
return c & 0x80;
}
static inline int parse_utf8_char(struct cursor *cursor, unsigned int *code_point, unsigned int *utf8_length)
{
- u8 first_byte;
+ unsigned char first_byte;
if (!parse_byte(cursor, &first_byte))
return 0; // Not enough data
diff --git a/src/nostrdb.c b/src/nostrdb.c
@@ -225,11 +225,11 @@ static int ndb_make_text_search_key(unsigned char *buf, int bufsize,
// TODO: need update this to uint64_t
// we push this first because our query function can pull this off
// quicky to check matches
- if (!push_varint(&cur, (int32_t)note_id))
+ if (!cursor_push_varint(&cur, (int32_t)note_id))
return 0;
// string length
- if (!push_varint(&cur, word_len))
+ if (!cursor_push_varint(&cur, word_len))
return 0;
// non-null terminated, lowercase string
@@ -237,12 +237,12 @@ static int ndb_make_text_search_key(unsigned char *buf, int bufsize,
return 0;
// TODO: need update this to uint64_t
- if (!push_varint(&cur, (int)timestamp))
+ if (!cursor_push_varint(&cur, (int)timestamp))
return 0;
// the index of the word in the content so that we can do more accurate
// phrase searches
- if (!push_varint(&cur, word_index))
+ if (!cursor_push_varint(&cur, word_index))
return 0;
size = cur.p - cur.start;
@@ -314,19 +314,18 @@ static int mdb_cmp_memn(const MDB_val *a, const MDB_val *b) {
static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b)
{
struct cursor ca, cb;
- int sa, sb;
- int nid_a, nid_b;
+ uint64_t sa, sb, nid_a, nid_b;
MDB_val a2, b2;
make_cursor(a->mv_data, a->mv_data + a->mv_size, &ca);
make_cursor(b->mv_data, b->mv_data + b->mv_size, &cb);
// note_id
- if (unlikely(!pull_varint(&ca, &nid_a) || !pull_varint(&cb, &nid_b)))
+ if (unlikely(!cursor_pull_varint(&ca, &nid_a) || !cursor_pull_varint(&cb, &nid_b)))
return 0;
// string size
- if (unlikely(!pull_varint(&ca, &sa) || !pull_varint(&cb, &sb)))
+ if (unlikely(!cursor_pull_varint(&ca, &sa) || !cursor_pull_varint(&cb, &sb)))
return 0;
a2.mv_data = ca.p;
@@ -343,7 +342,7 @@ static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b)
cb.p += sb;
// timestamp
- if (unlikely(!pull_varint(&ca, &sa) || !pull_varint(&cb, &sb)))
+ if (unlikely(!cursor_pull_varint(&ca, &sa) || !cursor_pull_varint(&cb, &sb)))
return 0;
if (sa < sb) return -1;
@@ -354,7 +353,7 @@ static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b)
else if (nid_a > nid_b) return 1;
// word index
- if (unlikely(!pull_varint(&ca, &sa) || !pull_varint(&cb, &sb)))
+ if (unlikely(!cursor_pull_varint(&ca, &sa) || !cursor_pull_varint(&cb, &sb)))
return 0;
if (sa < sb) return -1;
@@ -366,11 +365,9 @@ static int ndb_text_search_key_compare(const MDB_val *a, const MDB_val *b)
static inline int ndb_unpack_text_search_key_noteid(
struct cursor *cur, uint64_t *note_id)
{
- int inote_id;
- if (!pull_varint(cur, &inote_id))
+ if (!cursor_pull_varint(cur, note_id))
return 0;
- *note_id = inote_id;
return 1;
}
@@ -381,9 +378,13 @@ static inline int ndb_unpack_text_search_key_string(struct cursor *cur,
const char **str,
int *str_len)
{
- if (!pull_varint(cur, str_len))
+ uint64_t len;
+
+ if (!cursor_pull_varint(cur, &len))
return 0;
+ *str_len = len;
+
*str = (const char *)cur->p;
if (!cursor_skip(cur, *str_len))
@@ -398,16 +399,12 @@ static inline int
ndb_unpack_remaining_text_search_key(struct cursor *cur,
struct ndb_text_search_key *key)
{
- int timestamp;
-
- if (!pull_varint(cur, ×tamp))
+ if (!cursor_pull_varint(cur, &key->timestamp))
return 0;
- if (!pull_varint(cur, &key->word_index))
+ if (!cursor_pull_varint(cur, &key->word_index))
return 0;
- key->timestamp = timestamp;
-
return 1;
}
@@ -2332,7 +2329,7 @@ static int prefix_count(const char *str1, int len1, const char *str2, int len2)
static void ndb_print_text_search_key(struct ndb_text_search_key *key)
{
- printf("K<'%.*s' %d %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str,
+ printf("K<'%.*s' %" PRIu64 " %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str,
key->word_index,
key->timestamp,
key->note_id);
diff --git a/src/nostrdb.h b/src/nostrdb.h
@@ -266,7 +266,7 @@ struct ndb_text_search_key
const char *str;
uint64_t timestamp;
uint64_t note_id;
- int word_index;
+ uint64_t word_index;
};
struct ndb_text_search_result {
diff --git a/src/print_util.h b/src/print_util.h
@@ -1,7 +1,7 @@
static void ndb_print_text_search_key(struct ndb_text_search_key *key)
{
- printf("K<'%.*s' %d %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str,
+ printf("K<'%.*s' %" PRIu64 " %" PRIu64 " note_id:%" PRIu64 ">", key->str_len, key->str,
key->word_index,
key->timestamp,
key->note_id);
diff --git a/src/typedefs.h b/src/typedefs.h
@@ -1,14 +0,0 @@
-
-#ifndef PROTOVERSE_TYPEDEFS_H
-#define PROTOVERSE_TYPEDEFS_H
-
-#include <stdint.h>
-
-typedef unsigned char u8;
-typedef unsigned int u32;
-typedef unsigned short u16;
-typedef uint64_t u64;
-typedef int64_t s64;
-
-
-#endif /* PROTOVERSE_TYPEDEFS_H */
diff --git a/test.c b/test.c
@@ -964,9 +964,63 @@ static void test_fulltext()
ndb_destroy(ndb);
free(json);
+
+}
+
+static void test_varint(int64_t value) {
+ unsigned char buffer[10];
+ struct cursor cursor;
+ uint64_t result;
+
+ // Initialize cursor
+ cursor.start = buffer;
+ cursor.p = buffer;
+ cursor.end = buffer + sizeof(buffer);
+
+ // Push the value
+ assert(cursor_push_varint(&cursor, value));
+
+ // Reset cursor for reading
+ cursor.p = buffer;
+
+ // Pull the value
+ if (!cursor_pull_varint(&cursor, &result)) {
+ printf("Test failed for value %" PRIu64 " \n", value);
+ assert(!"Failed to pull value");
+ }
+
+ // Check if the pushed and pulled values are the same
+ if (value != result) {
+ printf("Test failed for value %" PRIu64 "\n", value);
+ assert(!"test failed");
+ }
+}
+
+static int test_varints() {
+ test_varint(0);
+ test_varint(127); // Edge case for 1-byte varint
+ test_varint(128); // Edge case for 2-byte varint
+ test_varint(16383); // Edge case for 2-byte varint
+ test_varint(16384); // Edge case for 3-byte varint
+ test_varint(2097151); // Edge case for 3-byte varint
+ test_varint(2097152); // Edge case for 4-byte varint
+ test_varint(268435455); // Edge case for 4-byte varint
+ test_varint(268435456); // Edge case for 5-byte varint
+ test_varint(34359738367LL); // Edge case for 5-byte varint
+ test_varint(34359738368LL); // Edge case for 6-byte varint
+ test_varint(4398046511103LL); // Edge case for 6-byte varint
+ test_varint(4398046511104LL); // Edge case for 7-byte varint
+ test_varint(562949953421311LL); // Edge case for 7-byte varint
+ test_varint(562949953421312LL); // Edge case for 8-byte varint
+ test_varint(72057594037927935LL); // Edge case for 8-byte varint
+ test_varint(72057594037927936LL); // Edge case for 9-byte varint
+ test_varint(9223372036854775807LL); // Maximum 64-bit integer
+
+ return 0;
}
int main(int argc, const char *argv[]) {
+ test_varints();
test_filters();
//test_migrate();
test_fetched_at();