nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

parser.c (43746B)


      1 /*
      2  * FlatBuffers IDL parser.
      3  *
      4  * Originally based on the numeric parser in the Luthor lexer project.
      5  *
      6  * We are moving away from TDOP approach because the grammer doesn't
      7  * really benefit from it. We use the same overall framework.
      8  */
      9 
     10 #include <stdio.h>
     11 #include <stdlib.h>
     12 #include <assert.h>
     13 #include <stdarg.h>
     14 
     15 #include "semantics.h"
     16 #include "codegen.h"
     17 #include "fileio.h"
     18 #include "pstrutil.h"
     19 #include "flatcc/portable/pparseint.h"
     20 
     21 void fb_default_error_out(void *err_ctx, const char *buf, size_t len)
     22 {
     23     (void)err_ctx;
     24 
     25     fwrite(buf, 1, len, stderr);
     26 }
     27 
     28 int fb_print_error(fb_parser_t *P, const char * format, ...)
     29 {
     30     int n;
     31     va_list ap;
     32     char buf[ERROR_BUFSIZ];
     33 
     34     va_start (ap, format);
     35     n = vsnprintf (buf, ERROR_BUFSIZ, format, ap);
     36     va_end (ap);
     37     if (n >= ERROR_BUFSIZ) {
     38         strcpy(buf + ERROR_BUFSIZ - 5, "...\n");
     39         n = ERROR_BUFSIZ - 1;
     40     }
     41     P->error_out(P->error_ctx, buf, (size_t)n);
     42     return n;
     43 }
     44 
     45 const char *error_find_file_of_token(fb_parser_t *P, fb_token_t *t)
     46 {
     47     /*
     48      * Search token in dependent buffers if not in current token
     49      * buffer. We can do this as a linear search because we limit the
     50      * number of output errors.
     51      */
     52     while (P) {
     53         if (P->ts <= t && P->te > t) {
     54             return P->schema.errorname;
     55         }
     56         P = P->dependencies;
     57     }
     58     return "";
     59 }
     60 
     61 void error_report(fb_parser_t *P, fb_token_t *t, const char *msg, fb_token_t *peer, const char *s, size_t len)
     62 {
     63     const char *file, *peer_file;
     64 
     65     if (t && !s) {
     66         s = t->text;
     67         len = (size_t)t->len;
     68     }
     69     if (!msg) {
     70         msg = "";
     71     }
     72     if (!s) {
     73         s = "";
     74         len = 0;
     75     }
     76     if (t && !peer) {
     77         file = error_find_file_of_token(P, t);
     78         fb_print_error(P, "%s:%ld:%ld: error: '%.*s': %s\n",
     79                 file, (long)t->linenum, (long)t->pos, len, s, msg);
     80     } else if (t && peer) {
     81         file = error_find_file_of_token(P, t);
     82         peer_file = error_find_file_of_token(P, peer);
     83         fb_print_error(P, "%s:%ld:%ld: error: '%.*s': %s: %s:%ld:%ld: '%.*s'\n",
     84                 file, (long)t->linenum, (long)t->pos, len, s, msg,
     85                 peer_file, (long)peer->linenum, (long)peer->pos, (int)peer->len, peer->text);
     86     } else if (!t && !peer) {
     87         fb_print_error(P, "error: %s\n", msg);
     88     } else if (peer) {
     89         peer_file = error_find_file_of_token(P, peer);
     90         fb_print_error(P, "error: %s: %s:%ld:%ld: '%.*s'\n",
     91                 msg,
     92                 peer_file, (long)peer->linenum, (long)peer->pos, (int)peer->len, peer->text);
     93     } else {
     94         fb_print_error(P, "internal error: unexpected state\n");
     95     }
     96     ++P->failed;
     97 }
     98 
     99 void error_ref_sym(fb_parser_t *P, fb_ref_t *ref, const char *msg, fb_symbol_t *s2)
    100 {
    101     fb_ref_t *p;
    102     char buf[FLATCC_MAX_IDENT_SHOW + 1];
    103     size_t k = FLATCC_MAX_IDENT_SHOW;
    104     size_t n = 0;
    105     size_t n0 = 0;
    106     int truncated = 0;
    107 
    108     p = ref;
    109     while (p && k > 0) {
    110         if (n0 > 0) {
    111             buf[n0] = '.';
    112             --k;
    113             ++n0;
    114         }
    115         n = (size_t)p->ident->len;
    116         if (k < n) {
    117             n = k;
    118             truncated = 1;
    119         }
    120         memcpy(buf + n0, p->ident->text, n);
    121         k -= n;
    122         n0 += n;
    123         p = p->link;
    124     }
    125     if (p) truncated = 1;
    126     buf[n0] = '\0';
    127     if (n0 > 0) {
    128         --n0;
    129     }
    130     if (truncated) {
    131         memcpy(buf + FLATCC_MAX_IDENT_SHOW + 1 - 4, "...\0", 4);
    132         n0 = FLATCC_MAX_IDENT_SHOW;
    133     }
    134     error_report(P, ref->ident, msg, s2 ? s2->ident : 0, buf, n0);
    135 }
    136 
    137 //#define LEX_DEBUG
    138 
    139 /* Flatbuffers reserve keywords. */
    140 #define LEX_KEYWORDS
    141 
    142 #define LEX_C_BLOCK_COMMENT
    143 /*
    144  * Flatbuffers also support /// on a single line for documentation but
    145  * we can handle that within the normal line comment parsing logic.
    146  */
    147 #define LEX_C99_LINE_COMMENT
    148 /*
    149  * String escapes are not defined in fb schema but it only uses strings
    150  * for attribute, namespace, file ext, and file id. For JSON objects we
    151  * use C string escapes but control characters must be detected.
    152  */
    153 #define LEX_C_STRING
    154 
    155 /* Accept numbers like -0x42 as integer literals. */
    156 #define LEX_HEX_NUMERIC
    157 
    158 #include "parser.h"
    159 
    160 #ifdef LEX_DEBUG
    161 
    162 static void print_token(fb_token_t *t)
    163 {
    164     lex_fprint_token(stderr, t->id, t->text, t->text + t->len, t->linenum, t->pos);
    165 }
    166 
    167 static void debug_token(const char *info, fb_token_t *t)
    168 {
    169     fprintf(stderr, "%s\n    ", info);
    170     print_token(t);
    171 }
    172 #else
    173 #define debug_token(info, t) ((void)0)
    174 #endif
    175 
    176 static void revert_metadata(fb_metadata_t **list)
    177 {
    178     REVERT_LIST(fb_metadata_t, link, list);
    179 }
    180 
    181 static void revert_symbols(fb_symbol_t **list)
    182 {
    183     REVERT_LIST(fb_symbol_t, link, list);
    184 }
    185 
    186 static void revert_names(fb_name_t **list)
    187 {
    188     REVERT_LIST(fb_name_t, link, list);
    189 }
    190 
    191 static inline fb_doc_t *fb_add_doc(fb_parser_t *P, fb_token_t *t)
    192 {
    193     fb_doc_t *p;
    194 
    195     p = new_elem(P, sizeof(*p));
    196     p->ident = t;
    197     p->link = P->doc;
    198     P->doc = p;
    199     return p;
    200 }
    201 
    202 #define fb_assign_doc(P, p) {\
    203     revert_symbols(&P->doc); p->doc = P->doc; P->doc = 0; }
    204 
    205 static inline fb_compound_type_t *fb_add_table(fb_parser_t *P)
    206 {
    207     fb_compound_type_t *p;
    208 
    209     p = new_elem(P, sizeof(*p));
    210     p->symbol.link = P->schema.symbols;
    211     p->symbol.kind = fb_is_table;
    212     P->schema.symbols = &p->symbol;
    213     p->scope = P->current_scope;
    214     fb_assign_doc(P, p);
    215     return p;
    216 }
    217 
    218 static inline fb_compound_type_t *fb_add_struct(fb_parser_t *P)
    219 {
    220     fb_compound_type_t *p;
    221 
    222     p = new_elem(P, sizeof(*p));
    223     p->symbol.link = P->schema.symbols;
    224     p->symbol.kind = fb_is_struct;
    225     P->schema.symbols = &p->symbol;
    226     p->scope = P->current_scope;
    227     fb_assign_doc(P, p);
    228     return p;
    229 }
    230 
    231 static inline fb_compound_type_t *fb_add_rpc_service(fb_parser_t *P)
    232 {
    233     fb_compound_type_t *p;
    234 
    235     p = new_elem(P, sizeof(*p));
    236     p->symbol.link = P->schema.symbols;
    237     p->symbol.kind = fb_is_rpc_service;
    238     P->schema.symbols = &p->symbol;
    239     p->scope = P->current_scope;
    240     fb_assign_doc(P, p);
    241     return p;
    242 }
    243 
    244 static inline fb_compound_type_t *fb_add_enum(fb_parser_t *P)
    245 {
    246     fb_compound_type_t *p;
    247 
    248     p = new_elem(P, sizeof(*p));
    249     p->symbol.link = P->schema.symbols;
    250     p->symbol.kind = fb_is_enum;
    251     P->schema.symbols = &p->symbol;
    252     p->scope = P->current_scope;
    253     fb_assign_doc(P, p);
    254     return p;
    255 }
    256 
    257 static inline fb_compound_type_t *fb_add_union(fb_parser_t *P)
    258 {
    259     fb_compound_type_t *p;
    260 
    261     p = new_elem(P, sizeof(*p));
    262     p->symbol.link = P->schema.symbols;
    263     p->symbol.kind = fb_is_union;
    264     P->schema.symbols = &p->symbol;
    265     p->scope = P->current_scope;
    266     fb_assign_doc(P, p);
    267     return p;
    268 }
    269 
    270 static inline fb_ref_t *fb_add_ref(fb_parser_t *P, fb_token_t *t)
    271 {
    272     fb_ref_t *p;
    273 
    274     p = new_elem(P, sizeof(*p));
    275     p->ident = t;
    276     return p;
    277 }
    278 
    279 static inline fb_attribute_t *fb_add_attribute(fb_parser_t *P)
    280 {
    281     fb_attribute_t *p;
    282 
    283     p = new_elem(P, sizeof(*p));
    284     p->name.link = P->schema.attributes;
    285     P->schema.attributes = &p->name;
    286     return p;
    287 }
    288 
    289 static inline fb_include_t *fb_add_include(fb_parser_t *P)
    290 {
    291     fb_include_t *p;
    292     p = new_elem(P, sizeof(*p));
    293     p->link = P->schema.includes;
    294     return P->schema.includes = p;
    295 }
    296 
    297 static inline fb_scope_t *fb_add_scope(fb_parser_t *P, fb_ref_t *name)
    298 {
    299     fb_scope_t *p;
    300 
    301     p = fb_scope_table_find(&P->schema.root_schema->scope_index, name, 0);
    302     if (p) {
    303         return p;
    304     }
    305     p = new_elem(P, sizeof(*p));
    306     p->name = name;
    307     p->prefix = P->schema.prefix;
    308 
    309     fb_scope_table_insert_item(&P->schema.root_schema->scope_index, p, ht_keep);
    310     return p;
    311 }
    312 
    313 static inline fb_metadata_t *fb_add_metadata(fb_parser_t *P, fb_metadata_t **metadata)
    314 {
    315     fb_metadata_t *p;
    316     p = new_elem(P, sizeof(*p));
    317     p->link = *metadata;
    318     return *metadata = p;
    319 }
    320 
    321 static inline fb_member_t *fb_add_member(fb_parser_t *P, fb_symbol_t **members)
    322 {
    323     fb_member_t *p;
    324     p = new_elem(P, sizeof(*p));
    325     p->symbol.link = *members;
    326     p->symbol.kind = fb_is_member;
    327     *members = (fb_symbol_t *)p;
    328     fb_assign_doc(P, p);
    329     return p;
    330 }
    331 
    332 static inline int is_end(fb_token_t *t)
    333 {
    334     return t->id == LEX_TOK_EOF;
    335 }
    336 
    337 static fb_token_t *next(fb_parser_t *P)
    338 {
    339 again:
    340     ++P->token;
    341     if (P->token == P->te) {
    342         /* We keep returning end of token to help binary operators etc., if any. */
    343         --P->token;
    344         assert(0);
    345         switch (P->token->id) {
    346         case LEX_TOK_EOS: case LEX_TOK_EOB: case LEX_TOK_EOF:
    347             P->token->id = LEX_TOK_EOF;
    348             return P->token;
    349         }
    350         error_tok(P, P->token, "unexpected end of input");
    351     }
    352     if (P->token->id == tok_kw_doc_comment) {
    353         /* Note: we can have blanks that are control characters here, such as \t. */
    354         fb_add_doc(P, P->token);
    355         goto again;
    356     }
    357     debug_token("next", P->token);
    358     return P->token;
    359 }
    360 
    361 static void recover(fb_parser_t *P, long token_id, int consume)
    362 {
    363     while (!is_end(P->token)) {
    364         if (P->token->id == token_id) {
    365             if (consume) {
    366                 next(P);
    367             }
    368             P->doc = 0;
    369             return;
    370         }
    371         next(P);
    372     }
    373 }
    374 
    375 static void recover2(fb_parser_t *P, long token_id, int consume, long token_id_2, int consume_2)
    376 {
    377     while (!is_end(P->token)) {
    378         if (P->token->id == token_id) {
    379             if (consume) {
    380                 next(P);
    381             }
    382             P->doc = 0;
    383             return;
    384         }
    385         if (P->token->id == token_id_2) {
    386             if (consume_2) {
    387                 next(P);
    388             }
    389             P->doc = 0;
    390             return;
    391         }
    392         next(P);
    393     }
    394 }
    395 
    396 static inline fb_token_t *optional(fb_parser_t *P, long id) {
    397     fb_token_t *t = 0;
    398     if (P->token->id == id) {
    399         t = P->token;
    400         next(P);
    401     }
    402     return t;
    403 }
    404 
    405 static inline fb_token_t *match(fb_parser_t *P, long id, char *msg) {
    406     fb_token_t *t = 0;
    407     if (P->token->id == id) {
    408         t = P->token;
    409         next(P);
    410     } else {
    411         error_tok(P, P->token, msg);
    412     }
    413     return t;
    414 }
    415 
    416 /*
    417  * When a keyword should also be accepted as an identifier.
    418  * This is useful for JSON where field naems are visible.
    419  * Since field names are not referenced within the schema,
    420  * this is generally safe. Enums can also be resererved but
    421  * they can then not be used as default values. Table names
    422  * and other type names should not be remapped as they can then
    423  * not by used as a type name for other fields.
    424  */
    425 #if FLATCC_ALLOW_KW_FIELDS
    426 static inline void remap_field_ident(fb_parser_t *P)
    427 {
    428     if (P->token->id >= LEX_TOK_KW_BASE && P->token->id < LEX_TOK_KW_END) {
    429         P->token->id = LEX_TOK_ID;
    430     }
    431 }
    432 #else
    433 static inline void remap_field_ident(fb_parser_t *P) { (void)P; }
    434 #endif
    435 
    436 #if FLATCC_ALLOW_KW_ENUMS
    437 static inline void remap_enum_ident(fb_parser_t *P)
    438 {
    439     if (P->token->id >= LEX_TOK_KW_BASE && P->token->id < LEX_TOK_KW_END) {
    440         P->token->id = LEX_TOK_ID;
    441     }
    442 }
    443 #else
    444 static inline void remap_enum_ident(fb_parser_t *P) { (void)P; }
    445 #endif
    446 
    447 static fb_token_t *advance(fb_parser_t *P, long id, const char *msg, fb_token_t *peer)
    448 {
    449     /*
    450      * `advance` is generally used at end of statements so it is a
    451      * convenient place to get rid of rogue doc comments we can't attach
    452      * to anything meaningful.
    453      */
    454     P->doc = 0;
    455     if (P->token->id != id) {
    456         error_tok_2(P, P->token, msg, peer);
    457         return P->token;
    458     }
    459     return next(P);
    460 }
    461 
    462 static void read_integer_value(fb_parser_t *P, fb_token_t *t, fb_value_t *v, int sign)
    463 {
    464     int status;
    465 
    466     v->type = vt_uint;
    467     /* The token does not store the sign internally. */
    468     parse_integer(t->text, (size_t)t->len, &v->u, &status);
    469     if (status != PARSE_INTEGER_UNSIGNED) {
    470         v->type = vt_invalid;
    471         error_tok(P, t, "invalid integer format");
    472     }
    473     if (sign) {
    474         v->i = -(int64_t)v->u;
    475         v->type = vt_int;
    476 #ifdef FLATCC_FAIL_ON_INT_SIGN_OVERFLOW
    477         /* Sometimes we might want this, so don't fail by default. */
    478         if (v->i > 0) {
    479             v->type = vt_invalid;
    480             error_tok(P, t, "sign overflow in integer format");
    481         }
    482 #endif
    483     }
    484 }
    485 
    486 static void read_hex_value(fb_parser_t *P, fb_token_t *t, fb_value_t *v, int sign)
    487 {
    488     int status;
    489 
    490     v->type = vt_uint;
    491     /* The token does not store the sign internally. */
    492     parse_hex_integer(t->text, (size_t)t->len, &v->u, &status);
    493     if (status != PARSE_INTEGER_UNSIGNED) {
    494         v->type = vt_invalid;
    495         error_tok(P, t, "invalid hex integer format");
    496     }
    497     if (sign) {
    498         v->i = -(int64_t)v->u;
    499         v->type = vt_int;
    500 #ifdef FLATCC_FAIL_ON_INT_SIGN_OVERFLOW
    501         /* Sometimes we might want this, so don't fail by default. */
    502         if (v->i > 0) {
    503             v->type = vt_invalid;
    504             error_tok(P, t, "sign overflow in hex integer format");
    505         }
    506 #endif
    507     }
    508 }
    509 
    510 static void read_float_value(fb_parser_t *P, fb_token_t *t, fb_value_t *v, int sign)
    511 {
    512     char *end;
    513 
    514     v->type = vt_float;
    515     v->f = strtod(t->text, &end);
    516     if (end != t->text + t->len) {
    517         v->type = vt_invalid;
    518         error_tok(P, t, "invalid float format");
    519     } else if (t->text[0] == '.') {
    520         v->type = vt_invalid;
    521         /* The FB spec requires this, in line with the JSON format. */
    522         error_tok(P, t, "numeric values must start with a digit");
    523     } else if (sign) {
    524         v->f = -v->f;
    525     }
    526 }
    527 
    528 /*
    529  * We disallow escape characters, newlines and other control characters,
    530  * but especially escape characters because they would require us to
    531  * reallocate the string and convert the escaped characters. We also
    532  * disallow non-utf8 characters, but we do not check for it. The tab
    533  * character could meaningfully be accepted, but we don't.
    534  *
    535  * String literals are only used to name attributes, namespaces,
    536  * file identifiers and file externsions, so we really have no need
    537  * for these extra featuresescape .
    538  *
    539  * JSON strings should be handled separately, if or when supported -
    540  * either by converting escapes and reallocating the string, or
    541  * simply by ignoring the escape errors and use the string unmodified.
    542  */
    543 static void parse_string_literal(fb_parser_t *P, fb_value_t *v)
    544 {
    545     fb_token_t *t;
    546 
    547     v->type = vt_string;
    548     v->s.s = 0;
    549     v->s.len = 0;
    550 
    551     for (;;) {
    552         t = P->token;
    553         switch (t->id) {
    554         case LEX_TOK_STRING_PART:
    555             if (v->s.s == 0) {
    556                 v->s.s = (char *)t->text;
    557             }
    558             break;
    559         case LEX_TOK_STRING_ESCAPE:
    560             v->type = vt_invalid;
    561             error_tok(P, t, "escape not allowed in strings");
    562             break;
    563         case LEX_TOK_STRING_CTRL:
    564             v->type = vt_invalid;
    565             error_tok_as_string(P, t, "control characters not allowed in strings", "?", 1);
    566             break;
    567         case LEX_TOK_STRING_NEWLINE:
    568             v->type = vt_invalid;
    569             error_tok(P, t, "newline not allowed in strings");
    570             break;
    571         case LEX_TOK_STRING_UNTERMINATED:
    572         case LEX_TOK_STRING_END:
    573             goto done;
    574 
    575         default:
    576             error_tok(P, t, "internal error: unexpected token in string");
    577             v->type = vt_invalid;
    578             goto done;
    579         }
    580         next(P);
    581     }
    582 done:
    583     /*
    584      * If we were to ignore all errors, we would get the full
    585      * string as is excluding delimiting quotes.
    586      */
    587     if (v->s.s) {
    588         v->s.len = (int)(P->token->text - v->s.s);
    589     }
    590     if (!match(P, LEX_TOK_STRING_END, "unterminated string")) {
    591         v->type = vt_invalid;
    592     }
    593 }
    594 
    595 /* Current token must be an identifier. */
    596 static void parse_ref(fb_parser_t *P, fb_ref_t **ref)
    597 {
    598     *ref = fb_add_ref(P, P->token);
    599     next(P);
    600     ref = &((*ref)->link);
    601     while (optional(P, '.')) {
    602         if (P->token->id != LEX_TOK_ID) {
    603             error_tok(P, P->token, "namespace prefix expected identifier");
    604             break;
    605         }
    606         *ref = fb_add_ref(P, P->token);
    607         ref = &((*ref)->link);
    608         next(P);
    609     }
    610 }
    611 
    612 /* `flags` */
    613 enum { allow_string_value = 1, allow_id_value = 2, allow_null_value = 4 };
    614 static void parse_value(fb_parser_t *P, fb_value_t *v, int flags, const char *error_msg)
    615 {
    616     fb_token_t *t;
    617     fb_token_t *sign;
    618 
    619     sign = optional(P, '-');
    620     t = P->token;
    621 
    622     switch (t->id) {
    623     case LEX_TOK_HEX:
    624         read_hex_value(P, t, v, sign != 0);
    625         break;
    626     case LEX_TOK_INT:
    627         read_integer_value(P, t, v, sign != 0);
    628         break;
    629     case LEX_TOK_FLOAT:
    630         read_float_value(P, t, v, sign != 0);
    631         break;
    632     case tok_kw_true:
    633         v->b = 1;
    634         v->type = vt_bool;
    635         break;
    636     case tok_kw_false:
    637         v->b = 0;
    638         v->type = vt_bool;
    639         break;
    640     case tok_kw_null:
    641         if (!(flags & allow_null_value)) {
    642             v->type = vt_invalid;
    643             error_tok(P, t, error_msg);
    644             return;
    645         }
    646         v->type = vt_null;
    647         break;
    648     case LEX_TOK_STRING_BEGIN:
    649         next(P);
    650         parse_string_literal(P, v);
    651         if (!(flags & allow_string_value)) {
    652             v->type = vt_invalid;
    653             error_tok(P, t, error_msg);
    654             return;
    655         }
    656         if (sign) {
    657             v->type = vt_invalid;
    658             error_tok(P, t, "string constants cannot be signed");
    659             return;
    660         }
    661         return;
    662     case LEX_TOK_ID:
    663         parse_ref(P, &v->ref);
    664         v->type = vt_name_ref;
    665         if (sign) {
    666             v->type = vt_invalid;
    667             /* Technically they could, but we do not allow it. */
    668             error_tok(P, t, "named values cannot be signed");
    669         }
    670         return;
    671     default:
    672         /* We might have consumed a sign, but never mind that. */
    673         error_tok(P, t, error_msg);
    674         return;
    675     }
    676     if (sign && v->type == vt_bool) {
    677         v->type = vt_invalid;
    678         error_tok(P, t, "boolean constants cannot be signed");
    679     }
    680     next(P);
    681 }
    682 
    683 static void parse_fixed_array_size(fb_parser_t *P, fb_token_t *ttype, fb_value_t *v)
    684 {
    685     const char *error_msg = "fixed length array length expected to be an unsigned integer";
    686     fb_value_t vsize;
    687     fb_token_t *tlen = P->token;
    688 
    689     parse_value(P, &vsize, 0, error_msg);
    690     if (vsize.type != vt_uint) {
    691         error_tok(P, tlen, error_msg);
    692         v->type = vt_invalid;
    693         return;
    694     }
    695     if (v->type == vt_invalid) return;
    696     switch (v->type) {
    697     case vt_vector_type:
    698         v->type = vt_fixed_array_type;
    699         break;
    700     case vt_vector_type_ref:
    701         v->type = vt_fixed_array_type_ref;
    702         break;
    703     case vt_vector_string_type:
    704         v->type = vt_fixed_array_string_type;
    705         break;
    706     case vt_invalid:
    707         return;
    708     default:
    709         error_tok(P, ttype, "invalid fixed length array type");
    710         v->type = vt_invalid;
    711         return;
    712     }
    713     if (vsize.u == 0) {
    714         error_tok(P, tlen, "fixed length array length cannot be 0");
    715         v->type = vt_invalid;
    716         return;
    717     }
    718     /*
    719      * This allows for safe 64-bit multiplication by elements no
    720      * larger than 2^32-1 and also fits into the value len field.
    721      * without extra size cost.
    722      */
    723     if (vsize.u > UINT32_MAX) {
    724         error_tok(P, tlen, "fixed length array length overflow");
    725         v->type = vt_invalid;
    726         return;
    727     }
    728     v->len = (uint32_t)vsize.u;
    729 }
    730 
    731 /* ':' must already be matched */
    732 static void parse_type(fb_parser_t *P, fb_value_t *v)
    733 {
    734     fb_token_t *t = 0;
    735     fb_token_t *ttype = 0;
    736     fb_token_t *t0 = P->token;
    737     int vector = 0;
    738 
    739     v->len = 1;
    740     v->type = vt_invalid;
    741     while ((t = optional(P, '['))) {
    742         ++vector;
    743     }
    744     if (vector > 1) {
    745         error_tok(P, t0, "vector type can only be one-dimensional");
    746     }
    747     ttype = P->token;
    748     switch (ttype->id) {
    749     case tok_kw_int:
    750     case tok_kw_bool:
    751     case tok_kw_byte:
    752     case tok_kw_long:
    753     case tok_kw_uint:
    754     case tok_kw_float:
    755     case tok_kw_short:
    756     case tok_kw_char:
    757     case tok_kw_ubyte:
    758     case tok_kw_ulong:
    759     case tok_kw_ushort:
    760     case tok_kw_double:
    761     case tok_kw_int8:
    762     case tok_kw_int16:
    763     case tok_kw_int32:
    764     case tok_kw_int64:
    765     case tok_kw_uint8:
    766     case tok_kw_uint16:
    767     case tok_kw_uint32:
    768     case tok_kw_uint64:
    769     case tok_kw_float32:
    770     case tok_kw_float64:
    771         v->t = P->token;
    772         v->type = vector ? vt_vector_type : vt_scalar_type;
    773         next(P);
    774         break;
    775     case tok_kw_string:
    776         v->t = P->token;
    777         v->type = vector ? vt_vector_string_type : vt_string_type;
    778         next(P);
    779         break;
    780     case LEX_TOK_ID:
    781         parse_ref(P, &v->ref);
    782         v->type = vector ? vt_vector_type_ref : vt_type_ref;
    783         break;
    784     case ']':
    785         error_tok(P, t, "vector type cannot be empty");
    786         break;
    787     default:
    788         error_tok(P, ttype, "invalid type specifier");
    789         break;
    790     }
    791     if (vector && optional(P, ':')) {
    792         parse_fixed_array_size(P, ttype, v);
    793     }
    794     while (optional(P, ']') && vector--) {
    795     }
    796     if (vector) {
    797         error_tok_2(P, t, "vector type missing ']' to match", t0);
    798     }
    799     if ((t = optional(P, ']'))) {
    800         error_tok_2(P, t, "extra ']' not matching", t0);
    801         while (optional(P, ']')) {
    802         }
    803     }
    804     if (ttype->id == tok_kw_char && v->type != vt_invalid) {
    805         if (v->type != vt_fixed_array_type) {
    806             error_tok(P, ttype, "char can only be used as a fixed length array type [char:<n>]");
    807             v->type = vt_invalid;
    808         }
    809     }
    810 }
    811 
    812 static fb_metadata_t *parse_metadata(fb_parser_t *P)
    813 {
    814     fb_token_t *t, *t0;
    815     fb_metadata_t *md = 0;
    816 
    817     if (!(t0 = optional(P, '('))) {
    818         return 0;
    819     }
    820     if ((t = optional(P, LEX_TOK_ID)))
    821     for (;;) {
    822         fb_add_metadata(P, &md);
    823         md->ident = t;
    824         if (optional(P, ':')) {
    825             parse_value(P, &md->value, allow_string_value, "scalar or string value expected");
    826         }
    827         if (P->failed >= FLATCC_MAX_ERRORS) {
    828             return md;
    829         }
    830         if (!optional(P, ',')) {
    831             break;
    832         }
    833         if (!(t = match(P, LEX_TOK_ID, "attribute name expected identifier after ','"))) {
    834             break;
    835         }
    836     }
    837     advance(P, ')', "metadata expected ')' to match", t0);
    838     revert_metadata(&md);
    839     return md;
    840 }
    841 
    842 static void parse_field(fb_parser_t *P, fb_member_t *fld)
    843 {
    844     fb_token_t *t;
    845 
    846     remap_field_ident(P);
    847     if (!(t = match(P, LEX_TOK_ID, "field expected identifier"))) {
    848         goto fail;
    849     }
    850     fld->symbol.ident = t;
    851     if (!match(P, ':', "field expected ':' before mandatory type")) {
    852         goto fail;
    853     }
    854     parse_type(P, &fld->type);
    855     if (optional(P, '=')) {
    856         /*
    857          * Because types can be named references, we do not check the
    858          * default assignment before the schema is fully parsed.
    859          * We allow the initializer to be a name in case it is an enum
    860          * name.
    861          */
    862         parse_value(P, &fld->value, allow_id_value | allow_null_value, "initializer must be of scalar type or null");
    863     }
    864     fld->metadata = parse_metadata(P);
    865     advance(P, ';', "field must be terminated with ';'", 0);
    866     return;
    867 fail:
    868     recover2(P, ';', 1, '}', 0);
    869 }
    870 
    871 static void parse_method(fb_parser_t *P, fb_member_t *fld)
    872 {
    873     fb_token_t *t;
    874     if (!(t = match(P, LEX_TOK_ID, "method expected identifier"))) {
    875         goto fail;
    876     }
    877     fld->symbol.ident = t;
    878     if (!match(P, '(', "method expected '(' after identifier")) {
    879         goto fail;
    880     }
    881     parse_type(P, &fld->req_type);
    882     if (!match(P, ')', "method expected ')' after request type")) {
    883         goto fail;
    884     }
    885     if (!match(P, ':', "method expected ':' before mandatory response type")) {
    886         goto fail;
    887     }
    888     parse_type(P, &fld->type);
    889     if ((t = optional(P, '='))) {
    890         error_tok(P, t, "method does not accept an initializer");
    891         goto fail;
    892     }
    893     fld->metadata = parse_metadata(P);
    894     advance(P, ';', "method must be terminated with ';'", 0);
    895     return;
    896 fail:
    897     recover2(P, ';', 1, '}', 0);
    898 }
    899 
    900 /* `enum` must already be matched. */
    901 static void parse_enum_decl(fb_parser_t *P, fb_compound_type_t *ct)
    902 {
    903     fb_token_t *t, *t0;
    904     fb_member_t *member;
    905 
    906     if (!(ct->symbol.ident = match(P, LEX_TOK_ID, "enum declaration expected identifier"))) {
    907         goto fail;
    908     }
    909     if (optional(P, ':')) {
    910         parse_type(P, &ct->type);
    911         if (ct->type.type != vt_scalar_type) {
    912             error_tok(P, ct->type.t, "integral type expected");
    913         } else {
    914             switch (ct->type.t->id) {
    915             case tok_kw_float:
    916             case tok_kw_double:
    917             case tok_kw_float32:
    918             case tok_kw_float64:
    919                 error_tok(P, ct->type.t, "integral type expected");
    920                 break;
    921             default:
    922                 break;
    923             }
    924         }
    925     }
    926     ct->metadata = parse_metadata(P);
    927     if (!((t0 = match(P, '{', "enum declaration expected '{'")))) {
    928         goto fail;
    929     }
    930     for (;;) {
    931         remap_enum_ident(P);
    932         if (!(t = match(P, LEX_TOK_ID,
    933                 "member identifier expected"))) {
    934             goto fail;
    935         }
    936         if (P->failed >= FLATCC_MAX_ERRORS) {
    937             goto fail;
    938         }
    939         member = fb_add_member(P, &ct->members);
    940         member->symbol.ident = t;
    941         if (optional(P, '=')) {
    942             t = P->token;
    943             parse_value(P, &member->value, 0, "integral constant expected");
    944             /* Leave detailed type (e.g. no floats) and range checking to a later stage. */
    945         }
    946         /*
    947          * Trailing comma is optional in flatc but not in grammar, we
    948          * follow flatc.
    949          */
    950         if (!optional(P, ',') || P->token->id == '}') {
    951             break;
    952         }
    953         P->doc = 0;
    954     }
    955     if (t0) {
    956         advance(P, '}', "enum missing closing '}' to match", t0);
    957     }
    958     revert_symbols(&ct->members);
    959     return;
    960 fail:
    961     recover(P, '}', 1);
    962 }
    963 
    964 /* `union` must already be matched. */
    965 static void parse_union_decl(fb_parser_t *P, fb_compound_type_t *ct)
    966 {
    967     fb_token_t *t0;
    968     fb_member_t *member;
    969     fb_ref_t *ref;
    970     fb_token_t *t;
    971 
    972     if (!(ct->symbol.ident = match(P, LEX_TOK_ID, "union declaration expected identifier"))) {
    973         goto fail;
    974     }
    975     ct->metadata = parse_metadata(P);
    976     if (!((t0 = match(P, '{', "union declaration expected '{'")))) {
    977         goto fail;
    978     }
    979     for (;;) {
    980         if (P->token->id != LEX_TOK_ID) {
    981             error_tok(P, P->token, "union expects an identifier");
    982             goto fail;
    983         }
    984         if (P->failed >= FLATCC_MAX_ERRORS) {
    985             goto fail;
    986         }
    987         t = P->token;
    988         member = fb_add_member(P, &ct->members);
    989         parse_ref(P, &ref);
    990         member->type.ref = ref;
    991         member->type.type = vt_type_ref;
    992         while (ref->link) {
    993             ref = ref->link;
    994         }
    995         /* The union member name is the unqualified reference. */
    996         member->symbol.ident = ref->ident;
    997         if (optional(P, ':')) {
    998             if (member->type.ref->link) {
    999                 error_tok(P, t, "qualified union member name cannot have an explicit type");
   1000             }
   1001             parse_type(P, &member->type);
   1002             /* Leave type checking to later stage. */
   1003         }
   1004         if (optional(P, '=')) {
   1005             parse_value(P, &member->value, 0, "integral constant expected");
   1006             /* Leave detailed type (e.g. no floats) and range checking to a later stage. */
   1007         }
   1008         if (!optional(P, ',') || P->token->id == '}') {
   1009             break;
   1010         }
   1011         P->doc = 0;
   1012     }
   1013     advance(P, '}', "union missing closing '}' to match", t0);
   1014     revert_symbols(&ct->members);
   1015     /* Add implicit `NONE` member first in the list. */
   1016     member = fb_add_member(P, &ct->members);
   1017     member->symbol.ident = &P->t_none;
   1018     return;
   1019 fail:
   1020     recover2(P, ';', 1, '}', 0);
   1021 }
   1022 
   1023 /* `struct` , `table`, or 'rpc_service' must already be matched. */
   1024 static void parse_compound_type(fb_parser_t *P, fb_compound_type_t *ct, long token)
   1025 {
   1026     fb_token_t *t = 0;
   1027 
   1028     if (!(t = match(P, LEX_TOK_ID, "Declaration expected an identifier"))) {
   1029         goto fail;
   1030     }
   1031     ct->symbol.ident = t;
   1032     ct->metadata = parse_metadata(P);
   1033     if (!(match(P, '{', "Declaration expected '{'"))) {
   1034         goto fail;
   1035     }
   1036     t = P->token;
   1037 
   1038 /* Allow empty tables and structs. */
   1039 #if 0
   1040     if (P->token->id == '}') {
   1041         error_tok(P, t, "table / struct declaration cannot be empty");
   1042     }
   1043 #endif
   1044     while (P->token->id != '}') {
   1045         if (token == tok_kw_rpc_service) {
   1046             parse_method(P, fb_add_member(P, &ct->members));
   1047         } else {
   1048             parse_field(P, fb_add_member(P, &ct->members));
   1049         }
   1050         if (P->failed >= FLATCC_MAX_ERRORS) {
   1051             goto fail;
   1052         }
   1053     }
   1054     if (!optional(P, '}') && t) {
   1055         error_tok_2(P, P->token, "Declaration missing closing '}' to match", t);
   1056     }
   1057     revert_symbols(&ct->members);
   1058     return;
   1059 fail:
   1060     recover(P, '}', 1);
   1061 }
   1062 
   1063 static void parse_namespace(fb_parser_t *P)
   1064 {
   1065     fb_ref_t *ref = 0;
   1066     fb_token_t *t = P->token;
   1067 
   1068     if (optional(P, ';') && t) {
   1069         /* Revert to global namespace. */
   1070         P->current_scope = P->root_scope;
   1071         return;
   1072     }
   1073     if (P->token->id != LEX_TOK_ID) {
   1074         error_tok(P, P->token, "namespace expects an identifier");
   1075         recover(P, ';', 1);
   1076         return;
   1077     }
   1078     parse_ref(P, &ref);
   1079     advance(P, ';', "missing ';' expected by namespace at", t);
   1080     P->current_scope = fb_add_scope(P, ref);
   1081 }
   1082 
   1083 static void parse_root_type(fb_parser_t *P, fb_root_type_t *rt)
   1084 {
   1085     fb_token_t *t = P->token;
   1086 
   1087     if (rt->name) {
   1088         error_tok(P, P->token, "root_type already set");
   1089     }
   1090     parse_ref(P, &rt->name);
   1091     rt->scope = P->current_scope;
   1092     advance(P, ';', "missing ';' expected by root_type at", t);
   1093 }
   1094 
   1095 static void parse_include(fb_parser_t *P)
   1096 {
   1097     fb_token_t *t = P->token;
   1098 
   1099     while (optional(P, tok_kw_include)) {
   1100         if (P->opts.disable_includes) {
   1101             error_tok(P, t, "include statements not supported by current environment");
   1102         }
   1103         if (P->failed >= FLATCC_MAX_ERRORS) {
   1104             return;
   1105         }
   1106         if (!match(P, LEX_TOK_STRING_BEGIN,
   1107                     "include expected a string literal as filename")) {
   1108             recover(P, ';', 1);
   1109         }
   1110         parse_string_literal(P, &fb_add_include(P)->name);
   1111         match(P, ';', "include statement expected ';'");
   1112     }
   1113 }
   1114 
   1115 static void parse_attribute(fb_parser_t *P, fb_attribute_t *a)
   1116 {
   1117     fb_token_t *t = P->token;
   1118 
   1119     if (match(P, LEX_TOK_STRING_BEGIN, "attribute expected string literal")) {
   1120         parse_string_literal(P, &a->name.name);
   1121         if (a->name.name.s.len == 0) {
   1122             error_tok_as_string(P, t, "attribute name cannot be empty", 0, 0);
   1123         }
   1124     }
   1125     match(P, ';', "attribute expected ';'");
   1126 }
   1127 
   1128 static void parse_file_extension(fb_parser_t *P, fb_value_t *v)
   1129 {
   1130     if (v->type == vt_string) {
   1131         error_tok_as_string(P, P->token, "file extension already set", v->s.s, (size_t)v->s.len);
   1132     }
   1133     if (!match(P, LEX_TOK_STRING_BEGIN, "file_extension expected string literal")) {
   1134         goto fail;
   1135     }
   1136     parse_string_literal(P, v);
   1137     match(P, ';', "file_extension expected ';'");
   1138     return;
   1139 fail:
   1140     recover(P, ';', 1);
   1141 }
   1142 
   1143 static void parse_file_identifier(fb_parser_t *P, fb_value_t *v)
   1144 {
   1145     fb_token_t *t;
   1146     if (v->type != vt_missing) {
   1147         error_tok_as_string(P, P->token, "file identifier already set", v->s.s, (size_t)v->s.len);
   1148     }
   1149     if (!match(P, LEX_TOK_STRING_BEGIN, "file_identifier expected string literal")) {
   1150         goto fail;
   1151     }
   1152     t = P->token;
   1153     parse_string_literal(P, v);
   1154     if (v->s.s && v->s.len != 4) {
   1155         v->type = vt_invalid;
   1156         error_tok(P, t, "file_identifier must be 4 characters");
   1157     }
   1158     match(P, ';', "file_identifier expected ';'");
   1159     return;
   1160 fail:
   1161     recover(P, ';', 1);
   1162 }
   1163 
   1164 static void parse_schema_decl(fb_parser_t *P)
   1165 {
   1166     switch(P->token->id) {
   1167     case tok_kw_namespace:
   1168         next(P);
   1169         parse_namespace(P);
   1170         break;
   1171     case tok_kw_file_extension:
   1172         next(P);
   1173         parse_file_extension(P, &P->schema.file_extension);
   1174         break;
   1175     case tok_kw_file_identifier:
   1176         next(P);
   1177         parse_file_identifier(P, &P->schema.file_identifier);
   1178         break;
   1179     case tok_kw_root_type:
   1180         next(P);
   1181         parse_root_type(P, &P->schema.root_type);
   1182         break;
   1183     case tok_kw_attribute:
   1184         next(P);
   1185         parse_attribute(P, fb_add_attribute(P));
   1186         break;
   1187     case tok_kw_struct:
   1188         next(P);
   1189         parse_compound_type(P, fb_add_struct(P), tok_kw_struct);
   1190         break;
   1191     case tok_kw_table:
   1192         next(P);
   1193         parse_compound_type(P, fb_add_table(P), tok_kw_table);
   1194         break;
   1195     case tok_kw_rpc_service:
   1196         next(P);
   1197         parse_compound_type(P, fb_add_rpc_service(P), tok_kw_rpc_service);
   1198         break;
   1199     case tok_kw_enum:
   1200         next(P);
   1201         parse_enum_decl(P, fb_add_enum(P));
   1202         break;
   1203     case tok_kw_union:
   1204         next(P);
   1205         parse_union_decl(P, fb_add_union(P));
   1206         break;
   1207     case tok_kw_include:
   1208         error_tok(P, P->token, "include statements must be placed first in the schema");
   1209         break;
   1210     case '{':
   1211         error_tok(P, P->token, "JSON objects in schema file is not supported - but a schema specific JSON parser can be generated");
   1212         break;
   1213     case LEX_TOK_CTRL:
   1214         error_tok_as_string(P, P->token, "unexpected control character in schema definition", "?", 1);
   1215         break;
   1216     case LEX_TOK_COMMENT_CTRL:
   1217         error_tok_as_string(P, P->token, "unexpected control character in comment", "?", 1);
   1218         break;
   1219     case LEX_TOK_COMMENT_UNTERMINATED:
   1220         error_tok_as_string(P, P->token, "unterminated comment", "<eof>", 5);
   1221         break;
   1222     default:
   1223         error_tok(P, P->token, "unexpected token in schema definition");
   1224         break;
   1225     }
   1226 }
   1227 
   1228 static int parse_schema(fb_parser_t *P)
   1229 {
   1230     fb_token_t *t, *t0;
   1231     parse_include(P);
   1232     t = P->token;
   1233     for (;;) {
   1234         if (is_end(t)) {
   1235             break;
   1236         }
   1237         if (P->failed >= FLATCC_MAX_ERRORS) {
   1238             return -1;
   1239         }
   1240         t0 = t;
   1241         parse_schema_decl(P);
   1242         t = P->token;
   1243         if (t == t0) {
   1244             if (P->failed) {
   1245                 return -1;
   1246             }
   1247             error_tok(P, t, "extra tokens in input");
   1248             return -1;
   1249         }
   1250     }
   1251     revert_names(&P->schema.attributes);
   1252     revert_symbols(&P->schema.symbols);
   1253     return 0;
   1254 }
   1255 
   1256 static inline void clear_elem_buffers(fb_parser_t *P)
   1257 {
   1258     void **p, **p2;
   1259 
   1260     p = P->elem_buffers;
   1261     while (p) {
   1262         p2 = *((void**)p);
   1263         free(p);
   1264         p = p2;
   1265     };
   1266 }
   1267 
   1268 static void push_token(fb_parser_t *P, long id, const char *first, const char *last)
   1269 {
   1270     size_t offset;
   1271     fb_token_t *t;
   1272 
   1273     if (P->token == P->te) {
   1274         offset = (size_t)(P->token - P->ts);
   1275         P->tcapacity = P->tcapacity ? 2 * P->tcapacity : 1024;
   1276         P->ts = realloc(P->ts, (size_t)P->tcapacity * sizeof(fb_token_t));
   1277         checkmem(P->ts);
   1278         P->te = P->ts + P->tcapacity;
   1279         P->token = P->ts + offset;
   1280     }
   1281     t = P->token;
   1282     t->id = id;
   1283     t->text = first;
   1284     t->len = (long)(last - first);
   1285     t->linenum = P->linenum;
   1286     t->pos = (long)(first - P->line + 1);
   1287     ++P->token;
   1288 }
   1289 
   1290 /*
   1291  * If the file contains a control character, we can get multiple
   1292  * comments per line.
   1293  */
   1294 static inline void push_comment(fb_parser_t *P, const char *first, const char *last)
   1295 {
   1296     if (P->doc_mode) {
   1297         push_token(P, tok_kw_doc_comment, first, last);
   1298     }
   1299 }
   1300 
   1301 static void inject_token(fb_token_t *t, const char *lex, long id)
   1302 {
   1303     t->id = id;
   1304     t->text = lex;
   1305     t->len = (long)strlen(lex);
   1306     t->pos = 0;
   1307     t->linenum = 0;
   1308 }
   1309 
   1310 /* --- Customize lexer --- */
   1311 
   1312 /* Depends on the `context` argument given to the lex function. */
   1313 #define ctx(name) (((fb_parser_t *)context)->name)
   1314 
   1315 #define lex_emit_newline(first, last) (ctx(linenum)++, ctx(line) = last)
   1316 
   1317 #define lex_emit_string_newline(first, last)                            \
   1318     (ctx(linenum)++, ctx(line) = last,                                  \
   1319     push_token((fb_parser_t*)context, LEX_TOK_STRING_NEWLINE, first, last))
   1320 
   1321 /*
   1322  * Add emtpy comment on comment start - otherwise we miss empty lines.
   1323  * Save is_doc becuase comment_part does not remember.
   1324  */
   1325 #define lex_emit_comment_begin(first, last, is_doc)                     \
   1326     { ctx(doc_mode) = is_doc; push_comment((fb_parser_t*)context, last, last); }
   1327 #define lex_emit_comment_part(first, last) push_comment((fb_parser_t*)context, first, last)
   1328 #define lex_emit_comment_end(first, last) (ctx(doc_mode) = 0)
   1329 
   1330 /* By default emitted as lex_emit_other which would be ignored. */
   1331 #define lex_emit_comment_unterminated(pos)                                  \
   1332     push_token((fb_parser_t*)context, LEX_TOK_COMMENT_UNTERMINATED, pos, pos)
   1333 
   1334 #define lex_emit_comment_ctrl(pos)                                          \
   1335     if (lex_isblank(*pos) || !lex_isctrl(*pos)) {                           \
   1336         push_comment((fb_parser_t*)context, pos, pos + 1);                  \
   1337     } else {                                                                \
   1338         push_token((fb_parser_t*)context, LEX_TOK_CTRL,                     \
   1339                 pos, pos + 1);                                              \
   1340     }
   1341 
   1342 /*
   1343  * Provide hook to lexer for emitting tokens. We can override many
   1344  * things, but most default to calling lex_emit, so that is all we need
   1345  * to handle.
   1346  *
   1347  * `context` is a magic name available to macros in the lexer.
   1348  */
   1349 #define lex_emit(token, first, last)                                    \
   1350     push_token((fb_parser_t*)context, token, first, last)
   1351 
   1352 /*
   1353  * We could just eos directly as it defaults to emit, but formally we
   1354  * should use the eof marker which is always zero, so parser can check
   1355  * for it easily, if needed.
   1356  */
   1357 #define lex_emit_eos(first, last)                                       \
   1358     push_token((fb_parser_t*)context, LEX_TOK_EOF, first, last)
   1359 
   1360 /*
   1361  * This event happens in place of eos if we exhaust the input buffer.
   1362  * In this case we treat this as end of input, but this choice prevents
   1363  * us from parsing across multiple buffers.
   1364  */
   1365 #define lex_emit_eob(pos)                                       \
   1366     push_token((fb_parser_t*)context, LEX_TOK_EOF, pos, pos)
   1367 
   1368 /*
   1369  * Luthor is our speedy generic lexer - it knows most common operators
   1370  * and therefore allows us to fail meaningfully on those that we don't
   1371  * support here, which is most.
   1372  */
   1373 #include "lex/luthor.c"
   1374 
   1375 #include "keywords.h"
   1376 
   1377 /* Root schema `rs` is null for top level parser. */
   1378 int fb_init_parser(fb_parser_t *P, fb_options_t *opts, const char *name,
   1379         fb_error_fun error_out, void *error_ctx, fb_root_schema_t *rs)
   1380 {
   1381     size_t n, name_len;
   1382     char *s;
   1383 
   1384     memset(P, 0, sizeof(*P));
   1385 
   1386     if (error_out) {
   1387         P->error_out = error_out;
   1388         P->error_ctx = error_ctx;
   1389     } else {
   1390         P->error_out = fb_default_error_out;
   1391     }
   1392     if (opts) {
   1393         memcpy(&P->opts, opts, sizeof(*opts));
   1394     } else {
   1395         flatcc_init_options(&P->opts);
   1396     }
   1397     P->schema.root_schema = rs ? rs : &P->schema.root_schema_instance;
   1398     switch (P->opts.offset_size) {
   1399     case 2:
   1400     case 4:
   1401     case 8:
   1402         break;
   1403     default:
   1404         error(P, "invalid offset configured, must be 2, 4 (default), or 8");
   1405         return -1;
   1406     }
   1407     switch (P->opts.voffset_size) {
   1408     case 2:
   1409     case 4:
   1410     case 8:
   1411         break;
   1412     default:
   1413         error(P, "invalid voffset configured, must be 2 (default), 4, or 8");
   1414         return -1;
   1415     }
   1416     if (!name) {
   1417         /* Mostly for testing, just so we always have a name. */
   1418         name = FLATCC_DEFAULT_FILENAME;
   1419     }
   1420     if (name == 0) {
   1421         name = "";
   1422     }
   1423     name_len = strlen(name);
   1424     checkmem((P->schema.basename = fb_create_basename(name, name_len, opts->default_schema_ext)));
   1425     n = strlen(P->schema.basename);
   1426     checkmem(s = fb_copy_path_n(P->schema.basename, n));
   1427     pstrntoupper(s, n);
   1428     P->schema.basenameup = s;
   1429     P->schema.name.name.s.s = s;
   1430     P->schema.name.name.s.len = (int)n;
   1431     checkmem((P->schema.errorname = fb_create_basename(name, name_len, "")));
   1432     P->schema.prefix.s = "";
   1433     P->schema.prefix.len = 0;
   1434     if (opts->ns) {
   1435         P->schema.prefix.s = (char *)opts->ns;
   1436         P->schema.prefix.len = (int)strlen(opts->ns);
   1437     }
   1438     P->root_scope = fb_add_scope(P, 0);
   1439     P->current_scope = P->root_scope;
   1440     assert(P->current_scope == fb_scope_table_find(&P->schema.root_schema->scope_index, 0, 0));
   1441     return 0;
   1442 }
   1443 
   1444 /*
   1445  * Main entry function for this specific parser type.
   1446  * We expect a zero terminated string.
   1447  *
   1448  * The parser structure is uninitialized upon entry, and should be
   1449  * cleared with `clear_flatbuffer_parser` subsequently.
   1450  *
   1451  * Datastructures point into the token buffer and into the input
   1452  * buffer, so the parser and input should not be cleared prematurely.
   1453  *
   1454  * The input buffer must remain valid until the parser is cleared
   1455  * because the internal represenation stores pointers into the buffer.
   1456  *
   1457  * `own_buffer` indicates that the the buffer should be deallocated when
   1458  * the parser is cleaned up.
   1459  */
   1460 int fb_parse(fb_parser_t *P, const char *input, size_t len, int own_buffer)
   1461 {
   1462     static const char *id_none = "NONE";
   1463     static const char *id_ubyte = "ubyte";
   1464 
   1465     P->line = input;
   1466     P->linenum = 1;
   1467 
   1468     /* Used with union defaults. */
   1469     inject_token(&P->t_none, id_none, LEX_TOK_ID);
   1470     inject_token(&P->t_ubyte, id_ubyte, tok_kw_ubyte);
   1471 
   1472     if (own_buffer) {
   1473         P->managed_input = input;
   1474     }
   1475     lex(input, len, 0, P);
   1476 
   1477     P->te = P->token;
   1478     P->token = P->ts;
   1479     /* Only used while processing table id's. */
   1480     checkmem((P->tmp_field_marker = malloc(sizeof(P->tmp_field_marker[0]) * (size_t)P->opts.vt_max_count)));
   1481     checkmem((P->tmp_field_index = malloc(sizeof(P->tmp_field_index[0]) * (size_t)P->opts.vt_max_count)));
   1482     if (P->token->id == tok_kw_doc_comment) {
   1483         next(P);
   1484     }
   1485     parse_schema(P);
   1486     return P->failed;
   1487 }
   1488 
   1489 static void __destroy_scope_item(void *item, fb_scope_t *scope)
   1490 {
   1491     /* Each scope points into table that is cleared separately. */
   1492     (void)item;
   1493 
   1494     fb_symbol_table_clear(&scope->symbol_index);
   1495 }
   1496 
   1497 void fb_clear_parser(fb_parser_t *P)
   1498 {
   1499     fb_symbol_t *sym;
   1500     fb_compound_type_t *ct;
   1501 
   1502     for (sym = P->schema.symbols; sym; sym = sym->link) {
   1503         switch (sym->kind) {
   1504         case fb_is_struct:
   1505         case fb_is_table:
   1506         case fb_is_rpc_service:
   1507         case fb_is_enum:
   1508         case fb_is_union:
   1509             ct = (fb_compound_type_t *)sym;
   1510             fb_symbol_table_clear(&ct->index);
   1511             fb_value_set_clear(&ct->value_set);
   1512         }
   1513     }
   1514     fb_schema_table_clear(&P->schema.root_schema_instance.include_index);
   1515     fb_name_table_clear(&P->schema.root_schema_instance.attribute_index);
   1516     ptr_set_clear(&P->schema.visible_schema);
   1517     if (P->tmp_field_marker) {
   1518         free(P->tmp_field_marker);
   1519     }
   1520     if (P->tmp_field_index) {
   1521         free(P->tmp_field_index);
   1522     }
   1523     if (P->ts) {
   1524         free(P->ts);
   1525     }
   1526     if (P->schema.basename) {
   1527         free((void *)P->schema.basename);
   1528     }
   1529     if (P->schema.basenameup) {
   1530         free((void *)P->schema.basenameup);
   1531     }
   1532     if (P->schema.errorname) {
   1533         free((void *)P->schema.errorname);
   1534     }
   1535     /*
   1536      * P->referer_path in included files points to parent P->path, so
   1537      * don't free it, and don't access it after this point.
   1538      */
   1539     if (P->path) {
   1540         free((void *)P->path);
   1541     }
   1542     fb_scope_table_destroy(&P->schema.root_schema_instance.scope_index,
   1543             __destroy_scope_item, 0);
   1544     /* Destroy last since destructor has references into elem buffer. */
   1545     clear_elem_buffers(P);
   1546     if (P->managed_input) {
   1547         free((void *)P->managed_input);
   1548     }
   1549     memset(P, 0, sizeof(*P));
   1550 }