fix some incompatibility between nostrdb and damus' - damus

commit 65be56ba7cc32cc79eefab598c64babec556b950
parent 8e361a9586a721d6f6a1bad228a20f03cfd90263
Author: William Casarin <jb55@jb55.com>
Date:   Sat,  2 Dec 2023 13:41:36 -0800

fix some incompatibility between nostrdb and damus'

Diffstat:
M nostrdb/nostrdb.c  | 169 +------------------------------------------------------------------------------

1 file changed, 1 insertion(+), 168 deletions(-)
diff --git a/nostrdb/nostrdb.c b/nostrdb/nostrdb.c
@@ -2124,173 +2124,6 @@ static int ndb_write_note_kind_index(struct ndb_txn *txn, struct ndb_note *note,
 	return 1;
 }
 
-/**
-  * Checks if a given Unicode code point is a punctuation character
-  *
-  * @param codepoint The Unicode code point to check. @return true if the
-  * code point is a punctuation character, false otherwise.
-  */
-static inline int is_punctuation(unsigned int codepoint) {
-	// Check for underscore (underscore is not treated as punctuation)
-	if (codepoint == '_')
-		return 0;
-
-	// Check for ASCII punctuation
-	if (codepoint <= 128 && ispunct(codepoint))
-		return 1;
-
-	// Check for Unicode punctuation exceptions (punctuation allowed in hashtags)
-	if (codepoint == 0x301C || codepoint == 0xFF5E) // Japanese Wave Dash / Tilde
-		return 0;
-
-	// Check for Unicode punctuation
-	// NOTE: We may need to adjust the codepoint ranges in the future,
-	// to include/exclude certain types of Unicode characters in hashtags.
-	// Unicode Blocks Reference: https://www.compart.com/en/unicode/block
-	return (
-		// Latin-1 Supplement No-Break Space (NBSP): U+00A0
-		(codepoint == 0x00A0) ||
-
-		// Latin-1 Supplement Punctuation: U+00A1 to U+00BF
-		(codepoint >= 0x00A1 && codepoint <= 0x00BF) ||
-
-		// General Punctuation: U+2000 to U+206F
-		(codepoint >= 0x2000 && codepoint <= 0x206F) ||
-
-		// Currency Symbols: U+20A0 to U+20CF
-		(codepoint >= 0x20A0 && codepoint <= 0x20CF) ||
-
-		// Supplemental Punctuation: U+2E00 to U+2E7F
-		(codepoint >= 0x2E00 && codepoint <= 0x2E7F) ||
-
-		// CJK Symbols and Punctuation: U+3000 to U+303F
-		(codepoint >= 0x3000 && codepoint <= 0x303F) ||
-
-		// Ideographic Description Characters: U+2FF0 to U+2FFF
-		(codepoint >= 0x2FF0 && codepoint <= 0x2FFF)
-	);
-}
-
-static inline int is_whitespace(char c) {
-    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
-}
-
-static inline int is_right_boundary(int c) {
-	return is_whitespace(c) || is_punctuation(c);
-}
-
-static inline int parse_byte(struct cursor *cursor, unsigned char *c)
-{
-	if (unlikely(cursor->p >= cursor->end))
-		return 0;
-
-	*c = *cursor->p;
-
-	return 1;
-}
-
-static inline int peek_char(struct cursor *cur, int ind) {
-	if ((cur->p + ind < cur->start) || (cur->p + ind >= cur->end))
-		return -1;
-
-	return *(cur->p + ind);
-}
-
-static int parse_utf8_char(struct cursor *cursor, unsigned int *code_point,
-			   unsigned int *utf8_length)
-{
-	unsigned char first_byte;
-	if (!parse_byte(cursor, &first_byte))
-		return 0; // Not enough data
-
-	// Determine the number of bytes in this UTF-8 character
-	int remaining_bytes = 0;
-	if (first_byte < 0x80) {
-		*code_point = first_byte;
-		return 1;
-	} else if ((first_byte & 0xE0) == 0xC0) {
-		remaining_bytes = 1;
-		*utf8_length = remaining_bytes + 1;
-		*code_point = first_byte & 0x1F;
-	} else if ((first_byte & 0xF0) == 0xE0) {
-		remaining_bytes = 2;
-		*utf8_length = remaining_bytes + 1;
-		*code_point = first_byte & 0x0F;
-	} else if ((first_byte & 0xF8) == 0xF0) {
-		remaining_bytes = 3;
-		*utf8_length = remaining_bytes + 1;
-		*code_point = first_byte & 0x07;
-	} else {
-		remaining_bytes = 0;
-		*utf8_length = 1; // Assume 1 byte length for unrecognized UTF-8 characters
-		// TODO: We need to gracefully handle unrecognized UTF-8 characters
-		//printf("Invalid UTF-8 byte: %x\n", *code_point);
-		*code_point = ((first_byte & 0xF0) << 6); // Prevent testing as punctuation
-		return 0; // Invalid first byte
-	}
-
-	// Peek at remaining bytes
-	for (int i = 0; i < remaining_bytes; ++i) {
-		signed char next_byte;
-		if ((next_byte = peek_char(cursor, i+1)) == -1) {
-			*utf8_length = 1;
-			return 0; // Not enough data
-		}
-
-		if ((next_byte & 0xC0) != 0x80) {
-			*utf8_length = 1;
-			return 0; // Invalid byte in sequence
-		}
-
-		*code_point = (*code_point << 6) | (next_byte & 0x3F);
-	}
-
-	return 1;
-}
-
-
-static inline int is_utf8_byte(unsigned char c) {
-    return c & 0x80;
-}
-
-static inline int consume_until_boundary(struct cursor *cur) {
-	unsigned int c;
-	unsigned int char_length = 1;
-	unsigned int *utf8_char_length = &char_length;
-
-	while (cur->p < cur->end) {
-		c = *cur->p;
-		*utf8_char_length = 1;
-
-		if (is_whitespace(c))
-			return 1;
-
-		// Need to check for UTF-8 characters, which can be multiple
-		// bytes long
-		if (is_utf8_byte(c)) {
-			if (!parse_utf8_char(cur, &c, utf8_char_length)) {
-				if (!is_right_boundary(c)){
-					// TODO: We should work towards
-					// handling all UTF-8 characters.
-					//printf("Invalid UTF-8 code point: %x\n", c);
-					return 0;
-				}
-			}
-		}
-
-		if (is_right_boundary(c))
-			return 1;
-
-		// Need to use a variable character byte length for UTF-8 (2-4 bytes)
-		if (cur->p + *utf8_char_length <= cur->end)
-			cur->p += *utf8_char_length;
-		else
-			cur->p++;
-	}
-
-	return 1;
-}
-
 static void consume_whitespace_or_punctuation(struct cursor *cur)
 {
 	while (cur->p < cur->end) {
@@ -3670,7 +3503,7 @@ static int ndb_event_commitment(struct ndb_note *ev, unsigned char *buf, int buf
 	struct cursor cur;
 	int ok;
 
-	if (!hex_encode(ev->pubkey, sizeof(ev->pubkey), pubkey))
+	if (!hex_encode(ev->pubkey, sizeof(ev->pubkey), pubkey, sizeof(pubkey)))
 		return 0;
 
 	make_cursor(buf, buf + buflen, &cur);

	damus nostr ios client
	git clone git://jb55.com/damus
	Log \| Files \| Refs \| README \| LICENSE