damus

nostr ios client
git clone git://jb55.com/damus
Log | Files | Refs | README | LICENSE

commit 65be56ba7cc32cc79eefab598c64babec556b950
parent 8e361a9586a721d6f6a1bad228a20f03cfd90263
Author: William Casarin <jb55@jb55.com>
Date:   Sat,  2 Dec 2023 13:41:36 -0800

fix some incompatibility between nostrdb and damus'

Diffstat:
Mnostrdb/nostrdb.c | 169+------------------------------------------------------------------------------
1 file changed, 1 insertion(+), 168 deletions(-)

diff --git a/nostrdb/nostrdb.c b/nostrdb/nostrdb.c @@ -2124,173 +2124,6 @@ static int ndb_write_note_kind_index(struct ndb_txn *txn, struct ndb_note *note, return 1; } -/** - * Checks if a given Unicode code point is a punctuation character - * - * @param codepoint The Unicode code point to check. @return true if the - * code point is a punctuation character, false otherwise. - */ -static inline int is_punctuation(unsigned int codepoint) { - // Check for underscore (underscore is not treated as punctuation) - if (codepoint == '_') - return 0; - - // Check for ASCII punctuation - if (codepoint <= 128 && ispunct(codepoint)) - return 1; - - // Check for Unicode punctuation exceptions (punctuation allowed in hashtags) - if (codepoint == 0x301C || codepoint == 0xFF5E) // Japanese Wave Dash / Tilde - return 0; - - // Check for Unicode punctuation - // NOTE: We may need to adjust the codepoint ranges in the future, - // to include/exclude certain types of Unicode characters in hashtags. - // Unicode Blocks Reference: https://www.compart.com/en/unicode/block - return ( - // Latin-1 Supplement No-Break Space (NBSP): U+00A0 - (codepoint == 0x00A0) || - - // Latin-1 Supplement Punctuation: U+00A1 to U+00BF - (codepoint >= 0x00A1 && codepoint <= 0x00BF) || - - // General Punctuation: U+2000 to U+206F - (codepoint >= 0x2000 && codepoint <= 0x206F) || - - // Currency Symbols: U+20A0 to U+20CF - (codepoint >= 0x20A0 && codepoint <= 0x20CF) || - - // Supplemental Punctuation: U+2E00 to U+2E7F - (codepoint >= 0x2E00 && codepoint <= 0x2E7F) || - - // CJK Symbols and Punctuation: U+3000 to U+303F - (codepoint >= 0x3000 && codepoint <= 0x303F) || - - // Ideographic Description Characters: U+2FF0 to U+2FFF - (codepoint >= 0x2FF0 && codepoint <= 0x2FFF) - ); -} - -static inline int is_whitespace(char c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; -} - -static inline int is_right_boundary(int c) { - return is_whitespace(c) || is_punctuation(c); -} - -static inline int parse_byte(struct cursor *cursor, unsigned char *c) -{ - if (unlikely(cursor->p >= cursor->end)) - return 0; - - *c = *cursor->p; - - return 1; -} - -static inline int peek_char(struct cursor *cur, int ind) { - if ((cur->p + ind < cur->start) || (cur->p + ind >= cur->end)) - return -1; - - return *(cur->p + ind); -} - -static int parse_utf8_char(struct cursor *cursor, unsigned int *code_point, - unsigned int *utf8_length) -{ - unsigned char first_byte; - if (!parse_byte(cursor, &first_byte)) - return 0; // Not enough data - - // Determine the number of bytes in this UTF-8 character - int remaining_bytes = 0; - if (first_byte < 0x80) { - *code_point = first_byte; - return 1; - } else if ((first_byte & 0xE0) == 0xC0) { - remaining_bytes = 1; - *utf8_length = remaining_bytes + 1; - *code_point = first_byte & 0x1F; - } else if ((first_byte & 0xF0) == 0xE0) { - remaining_bytes = 2; - *utf8_length = remaining_bytes + 1; - *code_point = first_byte & 0x0F; - } else if ((first_byte & 0xF8) == 0xF0) { - remaining_bytes = 3; - *utf8_length = remaining_bytes + 1; - *code_point = first_byte & 0x07; - } else { - remaining_bytes = 0; - *utf8_length = 1; // Assume 1 byte length for unrecognized UTF-8 characters - // TODO: We need to gracefully handle unrecognized UTF-8 characters - //printf("Invalid UTF-8 byte: %x\n", *code_point); - *code_point = ((first_byte & 0xF0) << 6); // Prevent testing as punctuation - return 0; // Invalid first byte - } - - // Peek at remaining bytes - for (int i = 0; i < remaining_bytes; ++i) { - signed char next_byte; - if ((next_byte = peek_char(cursor, i+1)) == -1) { - *utf8_length = 1; - return 0; // Not enough data - } - - if ((next_byte & 0xC0) != 0x80) { - *utf8_length = 1; - return 0; // Invalid byte in sequence - } - - *code_point = (*code_point << 6) | (next_byte & 0x3F); - } - - return 1; -} - - -static inline int is_utf8_byte(unsigned char c) { - return c & 0x80; -} - -static inline int consume_until_boundary(struct cursor *cur) { - unsigned int c; - unsigned int char_length = 1; - unsigned int *utf8_char_length = &char_length; - - while (cur->p < cur->end) { - c = *cur->p; - *utf8_char_length = 1; - - if (is_whitespace(c)) - return 1; - - // Need to check for UTF-8 characters, which can be multiple - // bytes long - if (is_utf8_byte(c)) { - if (!parse_utf8_char(cur, &c, utf8_char_length)) { - if (!is_right_boundary(c)){ - // TODO: We should work towards - // handling all UTF-8 characters. - //printf("Invalid UTF-8 code point: %x\n", c); - return 0; - } - } - } - - if (is_right_boundary(c)) - return 1; - - // Need to use a variable character byte length for UTF-8 (2-4 bytes) - if (cur->p + *utf8_char_length <= cur->end) - cur->p += *utf8_char_length; - else - cur->p++; - } - - return 1; -} - static void consume_whitespace_or_punctuation(struct cursor *cur) { while (cur->p < cur->end) { @@ -3670,7 +3503,7 @@ static int ndb_event_commitment(struct ndb_note *ev, unsigned char *buf, int buf struct cursor cur; int ok; - if (!hex_encode(ev->pubkey, sizeof(ev->pubkey), pubkey)) + if (!hex_encode(ev->pubkey, sizeof(ev->pubkey), pubkey, sizeof(pubkey))) return 0; make_cursor(buf, buf + buflen, &cur);