nostrdb

an unfairly fast embedded nostr database backed by lmdb
git clone git://jb55.com/nostrdb
Log | Files | Refs | Submodules | README | LICENSE

symbols.h (10984B)


      1 /* Flatbuffers parser attributes and symbols. */
      2 
      3 #ifndef SYMBOLS_H
      4 #define SYMBOLS_H
      5 
      6 #include <stdint.h>
      7 
      8 #include "config.h"
      9 #include "lex/tokens.h"
     10 #include "hash/hash_table.h"
     11 #include "hash/ptr_set.h"
     12 
     13 typedef struct fb_token fb_token_t;
     14 typedef struct fb_string fb_string_t;
     15 typedef struct fb_value fb_value_t;
     16 typedef struct fb_symbol fb_symbol_t;
     17 
     18 typedef struct fb_metadata fb_metadata_t;
     19 
     20 typedef struct fb_name fb_name_t;
     21 typedef fb_symbol_t fb_namespace_t;
     22 typedef fb_symbol_t fb_ref_t;
     23 /* Doc is not strictly a symbol, just a chained token list, but close enough. */
     24 typedef fb_symbol_t fb_doc_t;
     25 typedef fb_name_t fb_include_t;
     26 typedef struct fb_attribute fb_attribute_t;
     27 
     28 typedef struct fb_member fb_member_t;
     29 typedef struct fb_compound_type fb_compound_type_t;
     30 
     31 typedef struct fb_scope fb_scope_t;
     32 typedef struct fb_root_schema fb_root_schema_t;
     33 typedef struct fb_root_type fb_root_type_t;
     34 typedef struct fb_schema fb_schema_t;
     35 
     36 enum {
     37     tok_kw_base = LEX_TOK_KW_BASE,
     38     tok_kw_bool,
     39     tok_kw_byte,
     40     tok_kw_char,
     41     tok_kw_enum,
     42     tok_kw_float32,
     43     tok_kw_float64,
     44     tok_kw_int,
     45     tok_kw_int8,
     46     tok_kw_int16,
     47     tok_kw_int32,
     48     tok_kw_int64,
     49     tok_kw_long,
     50     tok_kw_null,
     51     tok_kw_true,
     52     tok_kw_uint,
     53     tok_kw_false,
     54     tok_kw_float,
     55     tok_kw_short,
     56     tok_kw_table,
     57     tok_kw_ubyte,
     58     tok_kw_uint8,
     59     tok_kw_uint16,
     60     tok_kw_uint32,
     61     tok_kw_uint64,
     62     tok_kw_ulong,
     63     tok_kw_union,
     64     tok_kw_double,
     65     tok_kw_string,
     66     tok_kw_struct,
     67     tok_kw_ushort,
     68     tok_kw_include,
     69     tok_kw_attribute,
     70     tok_kw_namespace,
     71     tok_kw_root_type,
     72     tok_kw_rpc_service,
     73     tok_kw_file_extension,
     74     tok_kw_file_identifier,
     75     LEX_TOK_KW_END,
     76     /* Pseudo keywords. */
     77     tok_kw_doc_comment
     78 };
     79 
     80 struct fb_token {
     81     const char *text;
     82     long len;
     83     long linenum;
     84     long pos;
     85     long id;
     86 };
     87 
     88 enum fb_scalar_type {
     89     fb_missing_type = 0,
     90     fb_ulong,
     91     fb_uint,
     92     fb_ushort,
     93     fb_ubyte,
     94     fb_bool,
     95     fb_long,
     96     fb_int,
     97     fb_short,
     98     fb_byte,
     99     fb_double,
    100     fb_float,
    101     fb_char,
    102 };
    103 
    104 typedef enum fb_scalar_type fb_scalar_type_t;
    105 
    106 static inline size_t sizeof_scalar_type(fb_scalar_type_t st)
    107 {
    108     static const size_t scalar_type_size[] = {
    109         0, 8, 4, 2, 1, 1, 8, 4, 2, 1, 8, 4, 1
    110     };
    111 
    112     return scalar_type_size[st];
    113 }
    114 
    115 enum fb_value_type {
    116     vt_missing = 0,
    117     vt_invalid = 1,
    118     vt_null,
    119     vt_string,
    120     vt_float,
    121     vt_int,
    122     vt_uint,
    123     vt_bool,
    124     vt_vector_type,
    125     vt_scalar_type,
    126     vt_vector_string_type,
    127     vt_string_type,
    128     vt_vector_type_ref,
    129     vt_type_ref,
    130     vt_name_ref,
    131     vt_compound_type_ref,
    132     vt_vector_compound_type_ref,
    133     vt_fixed_array_type,
    134     vt_fixed_array_type_ref,
    135     vt_fixed_array_string_type,
    136     vt_fixed_array_compound_type_ref
    137 };
    138 
    139 struct fb_string {
    140     char *s;
    141     /* printf statements relies on this being int. */
    142     int len;
    143 };
    144 
    145 struct fb_value {
    146     union {
    147         fb_string_t s;
    148         double f;
    149         int64_t i;
    150         uint64_t u;
    151         uint8_t b;
    152         fb_token_t *t;
    153         fb_compound_type_t *ct;
    154         fb_scalar_type_t st;
    155         fb_ref_t *ref;
    156     };
    157     unsigned short type;
    158     uint32_t len;
    159 };
    160 
    161 enum fb_kind {
    162     fb_is_table,
    163     fb_is_struct,
    164     fb_is_rpc_service,
    165     fb_is_enum,
    166     fb_is_union,
    167     fb_is_member
    168 };
    169 
    170 /*
    171  * Used for white, gray, black graph coloring while detecting circular
    172  * references.
    173  */
    174 enum fb_symbol_flags {
    175     fb_circular_open = 1,
    176     fb_circular_closed = 2,
    177     fb_duplicate = 4,
    178     fb_indexed = 8,
    179 };
    180 
    181 enum fb_member_flags {
    182     fb_fm_optional = 1
    183 };
    184 
    185 /*
    186  * We keep the link first in all structs so that we can use a
    187  * generic list reverse function after all symbols have been pushed
    188  * within a scope.
    189  */
    190 struct fb_symbol {
    191     fb_symbol_t *link;
    192     fb_token_t *ident;
    193     uint16_t kind;
    194     uint16_t flags;
    195 };
    196 
    197 struct fb_name {
    198     fb_name_t *link;
    199     fb_value_t name;
    200 };
    201 
    202 #define fb_name_table __flatcc_fb_name_table
    203 #define fb_value_set __flatcc_fb_value_set
    204 #define fb_symbol_table __flatcc_fb_symbol_table
    205 #define fb_scope_table __flatcc_fb_scope_table
    206 
    207 DECLARE_HASH_TABLE(fb_name_table, fb_name_t *)
    208 DECLARE_HASH_TABLE(fb_schema_table, fb_schema_t *)
    209 DECLARE_HASH_TABLE(fb_value_set, fb_value_t *)
    210 DECLARE_HASH_TABLE(fb_symbol_table, fb_symbol_t *)
    211 DECLARE_HASH_TABLE(fb_scope_table, fb_scope_t *)
    212 
    213 struct fb_member {
    214     fb_symbol_t symbol;
    215     /* Struct or table field type, or method response type. */
    216     fb_value_t type;
    217     /* Method request type only used for methods. */
    218     fb_value_t req_type;
    219     fb_value_t value;
    220     fb_metadata_t *metadata;
    221     fb_doc_t *doc;
    222     uint16_t metadata_flags;
    223     /*
    224      * `align`, `offset` are for structs only.  64-bit allows for
    225      * dynamically configured 64-bit file offsets. Align is restricted to
    226      * at most 256 and must be a power of 2.
    227      */
    228     uint16_t align;
    229     uint16_t flags;
    230     uint64_t offset;
    231     uint64_t size;
    232 
    233     /* `id` is for table fields only. */
    234     uint64_t id;
    235     /*
    236      * Resolved `nested_flatbuffer` attribute type. Always a table if
    237      * set, and only on struct and table fields.
    238      */
    239     fb_compound_type_t *nest;
    240     /* Used to generate table fields in sorted order. */
    241     fb_member_t *order;
    242 
    243     /*
    244      * Use by code generators. Only valid during export and may hold
    245      * garbage from a prevous export.
    246      */
    247     size_t export_index;
    248 };
    249 
    250 struct fb_metadata {
    251     fb_metadata_t *link;
    252     fb_token_t *ident;
    253     fb_value_t value;
    254 };
    255 
    256 struct fb_compound_type {
    257     fb_symbol_t symbol;
    258     /* `scope` may span multiple input files, but has a unique namespace. */
    259     fb_scope_t *scope;
    260     /* Identifies the the schema the symbol belongs. */
    261     fb_schema_t *schema;
    262     fb_symbol_t *members;
    263     fb_member_t *ordered_members;
    264     fb_member_t *primary_key;
    265     fb_metadata_t *metadata;
    266     fb_doc_t *doc;
    267     fb_value_t type;
    268     fb_symbol_table_t index;
    269     /* Only for enums. */
    270     fb_value_set_t value_set;
    271     /* FNV-1a 32 bit hash of fully qualified name, accidental 0 maps to hash(""). */
    272     uint32_t type_hash;
    273     uint16_t metadata_flags;
    274     /* `count` is for tables only. */
    275     uint64_t count;
    276     /* `align`, `size` is for structs only. */
    277     uint16_t align;
    278     uint64_t size;
    279     /* Sort structs with forward references. */
    280     fb_compound_type_t *order;
    281     /*
    282      * Use by code generators. Only valid during export and may hold
    283      * garbage from a previous export.
    284      */
    285     size_t export_index;
    286 };
    287 
    288 enum fb_known_attributes {
    289     fb_attr_unknown = 0,
    290     fb_attr_id = 1,
    291     fb_attr_deprecated = 2,
    292     fb_attr_original_order = 3,
    293     fb_attr_force_align = 4,
    294     fb_attr_bit_flags = 5,
    295     fb_attr_nested_flatbuffer = 6,
    296     fb_attr_key = 7,
    297     fb_attr_required = 8,
    298     fb_attr_hash = 9,
    299     fb_attr_base64 = 10,
    300     fb_attr_base64url = 11,
    301     fb_attr_primary_key = 12,
    302     fb_attr_sorted = 13,
    303     KNOWN_ATTR_COUNT
    304 };
    305 
    306 enum fb_known_attribute_flags {
    307     fb_f_unknown = 1 << fb_attr_unknown,
    308     fb_f_id = 1 << fb_attr_id,
    309     fb_f_deprecated = 1 << fb_attr_deprecated,
    310     fb_f_original_order = 1 << fb_attr_original_order,
    311     fb_f_force_align = 1 << fb_attr_force_align,
    312     fb_f_bit_flags = 1 << fb_attr_bit_flags,
    313     fb_f_nested_flatbuffer = 1 << fb_attr_nested_flatbuffer,
    314     fb_f_key = 1 << fb_attr_key,
    315     fb_f_required = 1 << fb_attr_required,
    316     fb_f_hash = 1 << fb_attr_hash,
    317     fb_f_base64 = 1 << fb_attr_base64,
    318     fb_f_base64url = 1 << fb_attr_base64url,
    319     fb_f_primary_key = 1 << fb_attr_primary_key,
    320     fb_f_sorted = 1 << fb_attr_sorted,
    321 };
    322 
    323 struct fb_attribute {
    324     fb_name_t name;
    325     unsigned int known;
    326 };
    327 
    328 struct fb_scope {
    329     fb_ref_t *name;
    330     fb_symbol_table_t symbol_index;
    331     fb_string_t prefix;
    332 };
    333 
    334 struct fb_root_schema {
    335     fb_scope_table_t scope_index;
    336     fb_name_table_t attribute_index;
    337     fb_schema_table_t include_index;
    338     int include_count;
    339     int include_depth;
    340     size_t total_source_size;
    341 };
    342 
    343 struct fb_root_type {
    344     /* Root decl. before symbol is visible. */
    345     fb_ref_t *name;
    346     /* Resolved symbol. */
    347     fb_symbol_t *type;
    348     fb_scope_t *scope;
    349 };
    350 
    351 /*
    352  * We store the parsed structure as token references. Tokens are stored
    353  * in a single array pointing into the source buffer.
    354  *
    355  * Strings may contain multiple tokens when holding control characters
    356  * and line breaks, but for our purposes the first string part is
    357  * sufficient.
    358  */
    359 
    360 struct fb_schema {
    361     fb_include_t *includes;
    362     fb_name_t *attributes;
    363     fb_value_t file_identifier;
    364     fb_value_t file_extension;
    365     fb_symbol_t *symbols;
    366     /* Topologically sorted structs. */
    367     fb_compound_type_t *ordered_structs;
    368     fb_root_type_t root_type;
    369     fb_root_schema_t *root_schema;
    370     /* Only used if schema is root. */
    371     fb_root_schema_t root_schema_instance;
    372 
    373     /* An optional scope prefix for generated code. */
    374     fb_string_t prefix;
    375 
    376     /* The basenameup in a format that can be index. */
    377     fb_name_t name;
    378 
    379     /* These are allocated strings that must be freed. */
    380 
    381     /* Name of schema being processed without path or default extension. */
    382     char *basename;
    383     /* Uppercase basename for codegen and for case insenstive file inclusion check. */
    384     char *basenameup;
    385     /* Basename with extension. */
    386     char *errorname;
    387 
    388     /*
    389      * The dependency schemas visible to this schema (includes self).
    390      * Compound symbols have a link to schema which can be checked
    391      * against this set to see if the symbol is visible in this
    392      * conctext.
    393      */
    394     ptr_set_t visible_schema;
    395 };
    396 
    397 /*
    398  * Helpers to ensure a symbol is actually visible because a scope
    399  * (namespace) may be extended when a parent inlcudes another file
    400  * first.
    401  */
    402 static inline fb_compound_type_t *get_enum_if_visible(fb_schema_t *schema, fb_symbol_t *sym)
    403 {
    404     fb_compound_type_t *ct = 0;
    405 
    406     switch (sym->kind) {
    407     case fb_is_union:
    408         /* Fall through. */
    409     case fb_is_enum:
    410         ct = (fb_compound_type_t *)sym;
    411         if (!ptr_set_exists(&schema->visible_schema, ct->schema)) {
    412             ct = 0;
    413         }
    414         break;
    415     default:
    416         break;
    417     }
    418     return ct;
    419 }
    420 
    421 static inline fb_compound_type_t *get_compound_if_visible(fb_schema_t *schema, fb_symbol_t *sym)
    422 {
    423     fb_compound_type_t *ct = 0;
    424 
    425     switch (sym->kind) {
    426     case fb_is_struct:
    427     case fb_is_table:
    428     case fb_is_rpc_service:
    429     case fb_is_union:
    430     case fb_is_enum:
    431         ct = (fb_compound_type_t *)sym;
    432         if (!ptr_set_exists(&schema->visible_schema, ct->schema)) {
    433             ct = 0;
    434         }
    435         break;
    436     default:
    437         break;
    438     }
    439     return ct;
    440 }
    441 
    442 /* Constants are specific to 32-bit FNV-1a hash. It is important to use unsigned integers. */
    443 static inline uint32_t fb_hash_fnv1a_32_init(void)
    444 {
    445     return 2166136261UL;
    446 }
    447 
    448 static inline uint32_t fb_hash_fnv1a_32_append(uint32_t hash, const char *data, size_t len)
    449 {
    450     while (len--) {
    451         hash ^= *(uint8_t *)data++;
    452         hash = hash * 16777619UL;
    453     }
    454     return hash;
    455 }
    456 
    457 #endif /* SYMBOLS_H */