commit 61aa047b8e685bde0215de3803750384e0915a78
parent 9c9e035db3cb4019ab5b09005dee845691015133
Author: William Casarin <jb55@jb55.com>
Date: Mon, 5 Jul 2021 18:11:14 -0700
start to parse larger wasm binaries
Signed-off-by: William Casarin <jb55@jb55.com>
Diffstat:
M | src/cursor.h | | | 10 | ++-------- |
M | src/wasm.c | | | 358 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------- |
M | src/wasm.h | | | 29 | +++++++++++++++++++++++++++++ |
3 files changed, 316 insertions(+), 81 deletions(-)
diff --git a/src/cursor.h b/src/cursor.h
@@ -267,10 +267,7 @@ static inline void cursor_print_around(struct cursor *cur, int range)
c = max(cur->p - range, cur->start);
for (; c < cur->end && c < (cur->p + range); c++) {
- if (*c < 32)
- printf("%02x", *c);
- else
- printf("%c", *c);
+ printf("%02x", *c);
}
printf("\n");
@@ -280,10 +277,7 @@ static inline void cursor_print_around(struct cursor *cur, int range)
printf("^");
continue;
}
- if (*c < 32)
- printf(" ");
- else
- printf(" ");
+ printf(" ");
}
printf("\n");
}
diff --git a/src/wasm.c b/src/wasm.c
@@ -89,12 +89,12 @@ static const char *valtype_name(enum valtype valtype)
static void print_val(struct val *val)
{
switch (val->type) {
- case i32: debug("%d", val->i32); break;
- case i64: debug("%ld", val->i64); break;
- case f32: debug("%f", val->f32); break;
- case f64: debug("%f", val->f64); break;
+ case i32: printf("%d", val->i32); break;
+ case i64: printf("%ld", val->i64); break;
+ case f32: printf("%f", val->f32); break;
+ case f64: printf("%f", val->f64); break;
}
- debug(":%s\n", valtype_name(val->type));
+ printf(":%s\n", valtype_name(val->type));
}
static inline int cursor_popdata(struct cursor *cur, unsigned char *dest, int len)
@@ -110,6 +110,12 @@ static inline int cursor_popdata(struct cursor *cur, unsigned char *dest, int le
return 1;
}
+static inline int was_section_parsed(struct module *module,
+ enum section_tag section)
+{
+ return module->parsed & (1 << section);
+}
+
static inline int cursor_popbyte(struct cursor *cur, unsigned char *byte)
{
if (cur->p - 1 < cur->start)
@@ -134,7 +140,7 @@ static void print_stack(struct cursor *stack)
for (i = 0; stack->p > stack->start; i++) {
cursor_popval(stack, &val);
- debug("[%d] ", i);
+ printf("[%d] ", i);
print_val(&val);
}
@@ -256,40 +262,39 @@ static void print_functype(struct functype *ft)
int i;
for (i = 0; i < ft->params.num_valtypes; i++) {
- debug("%s ", valtype_name(ft->params.valtypes[i]));
+ printf("%s ", valtype_name(ft->params.valtypes[i]));
}
- debug("-> ");
+ printf("-> ");
for (i = 0; i < ft->result.num_valtypes; i++) {
- debug("%s ", valtype_name(ft->result.valtypes[i]));
+ printf("%s ", valtype_name(ft->result.valtypes[i]));
}
- debug("\n");
+ printf("\n");
}
static void print_type_section(struct typesec *typesec)
{
int i;
- debug("%d functypes:\n", typesec->num_functypes);
+ printf("%d functypes:\n", typesec->num_functypes);
for (i = 0; i < typesec->num_functypes; i++) {
- debug(" ");
+ printf(" ");
print_functype(&typesec->functypes[i]);
}
}
-/*
static void print_func_section(struct funcsec *funcsec)
{
- int i;
printf("%d functions\n", funcsec->num_indices);
+ /*
printf(" ");
for (i = 0; i < funcsec->num_indices; i++) {
printf("%d ", funcsec->type_indices[i]);
}
printf("\n");
+ */
}
-*/
__attribute__((unused))
-static const char *exportdesc_name(enum exportdesc desc)
+static const char *exportdesc_name(enum exportdesc desc)
{
switch (desc) {
case export_func: return "function";
@@ -304,15 +309,15 @@ static const char *exportdesc_name(enum exportdesc desc)
static void print_import(struct import *import)
{
(void)import;
- debug("%s %s\n", import->module_name, import->name);
+ printf("%s %s\n", import->module_name, import->name);
}
static void print_import_section(struct importsec *importsec)
{
int i;
- debug("%d imports:\n", importsec->num_imports);
+ printf("%d imports:\n", importsec->num_imports);
for (i = 0; i < importsec->num_imports; i++) {
- debug(" ");
+ printf(" ");
print_import(&importsec->imports[i]);
}
}
@@ -321,10 +326,10 @@ static void print_limits(struct limits *limits)
{
switch (limits->type) {
case limit_min:
- debug("%d", limits->min);
+ printf("%d", limits->min);
break;
case limit_min_max:
- debug("%d-%d", limits->min, limits->max);
+ printf("%d-%d", limits->min, limits->max);
break;
}
}
@@ -343,12 +348,12 @@ static void print_memory_section(struct memsec *memory)
int i;
struct limits *mem;
- debug("%d memory:\n", memory->num_mems);
+ printf("%d memory:\n", memory->num_mems);
for (i = 0; i < memory->num_mems; i++) {
mem = &memory->mems[i];
- debug(" ");
+ printf(" ");
print_limits(mem);
- debug("\n");
+ printf("\n");
}
}
@@ -357,23 +362,44 @@ static void print_table_section(struct tablesec *section)
int i;
struct table *table;
- debug("%d tables:\n", section->num_tables);
+ printf("%d tables:\n", section->num_tables);
for (i = 0; i < section->num_tables; i++) {
table = §ion->tables[i];
- debug(" ");
- debug("%s: ", reftype_name(table->reftype));
+ printf(" ");
+ printf("%s: ", reftype_name(table->reftype));
print_limits(&table->limits);
- debug("\n");
+ printf("\n");
}
}
+static const char *get_function_name(struct module *module, unsigned int func_index)
+{
+ struct wexport *export;
+ int i;
+
+ for (i = 0; i < module->export_section.num_exports; i++) {
+ export = &module->export_section.exports[i];
+ if (export->index == func_index) {
+ return export->name;
+ }
+ }
+
+ return "unknown";
+}
+
+static void print_start_section(struct module *module)
+{
+ int fn = module->start_section.start_fn;
+ printf("start function: %d <%s>\n", fn, get_function_name(module, fn));
+}
+
static void print_export_section(struct exportsec *exportsec)
{
int i;
- debug("%d exports:\n", exportsec->num_exports);
+ printf("%d exports:\n", exportsec->num_exports);
for (i = 0; i < exportsec->num_exports; i++) {
- debug(" ");
- debug("%s %s %d\n", exportdesc_name(exportsec->exports[i].desc),
+ printf(" ");
+ printf("%s %s %d\n", exportdesc_name(exportsec->exports[i].desc),
exportsec->exports[i].name,
exportsec->exports[i].index);
}
@@ -395,26 +421,85 @@ static void print_func(struct func *func)
}
debug("%d bytes of code\n", func->code_len);
}
+*/
-static void print_code_section(struct codesec *codesec)
+static void print_global_section(struct globalsec *section)
{
- int i;
+ printf("%d globals\n", section->num_globals);
+}
+
+static void print_code_section(struct codesec *codesec)
+{
+ printf("%d code segments\n", codesec->num_funcs);
+ /*
for (i = 0; i < codesec->num_funcs; i++) {
print_func(&codesec->funcs[i]);
}
+ */
+}
+
+static void print_data_section(struct datasec *section)
+{
+ printf("%d data segments\n", section->num_datas);
+}
+
+static void print_section(struct module *module, enum section_tag section)
+{
+ switch (section) {
+ case section_custom:
+ printf("TODO: print custom section\n");
+ break;
+ case section_type:
+ print_type_section(&module->type_section);
+ break;
+ case section_import:
+ print_import_section(&module->import_section);
+ break;
+ case section_function:
+ print_func_section(&module->func_section);
+ break;
+ case section_table:
+ print_table_section(&module->table_section);
+ break;
+ case section_memory:
+ print_memory_section(&module->memory_section);
+ break;
+ case section_global:
+ print_global_section(&module->global_section);
+ break;
+ case section_export:
+ print_export_section(&module->export_section);
+ break;
+ case section_start:
+ print_start_section(module);
+ break;
+ case section_element:
+ printf("TODO: print element section\n");
+ break;
+ case section_code:
+ print_code_section(&module->code_section);
+ break;
+ case section_data:
+ print_data_section(&module->data_section);
+ break;
+ case num_sections:
+ assert(0);
+ break;
+ }
}
-*/
static void print_module(struct module *module)
{
- print_type_section(&module->type_section);
- //print_func_section(&module->func_section);
- print_import_section(&module->import_section);
- print_export_section(&module->export_section);
- print_table_section(&module->table_section);
- print_memory_section(&module->memory_section);
- //print_code_section(&module->code_section);
+ int i;
+ enum section_tag section;
+
+ for (i = 0; i < num_sections; i++) {
+ section = (enum section_tag)i;
+ if (was_section_parsed(module, section)) {
+ print_section(module, section);
+ }
+ }
}
@@ -912,7 +997,7 @@ static int parse_ref_instr(struct wasm_parser *p)
case ref_is_null:
break;
-
+
case ref_func:
if (!leb128_read(&p->cur, &idx)) {
note_error(p, "invalid ref.func idx");
@@ -924,23 +1009,23 @@ static int parse_ref_instr(struct wasm_parser *p)
return 1;
}
-static inline int parse_const_expr(struct wasm_parser *p)
+static inline int parse_const_expr_instr(struct wasm_parser *p)
{
return parse_ref_instr(p) || parse_const_instr(p);
}
-static int parse_const_exprs(struct wasm_parser *p, struct expr *code)
+static int parse_const_expr(struct wasm_parser *p, struct expr *expr)
{
- code->code = p->cur.p;
+ expr->code = p->cur.p;
while (p->cur.p < p->cur.end) {
if (*p->cur.p == i_end) {
p->cur.p++;
- code->code_len = p->cur.p - code->code;
+ expr->code_len = p->cur.p - expr->code;
return 1;
}
- if (!parse_const_expr(p)) {
+ if (!parse_const_expr_instr(p)) {
note_error(p, "no constant expr found");
return 0;
}
@@ -983,7 +1068,7 @@ static int parse_global(struct wasm_parser *p,
return 0;
}
- if (!parse_const_exprs(p, &global->init)) {
+ if (!parse_const_expr(p, &global->init)) {
note_error(p, "init code");
return 0;
}
@@ -1039,6 +1124,125 @@ static int parse_memory_section(struct wasm_parser *p,
return 1;
}
+static int parse_start_section(struct wasm_parser *p,
+ struct startsec *start_section)
+{
+ if (!leb128_read(&p->cur, (unsigned int*)&start_section->start_fn)) {
+ note_error(p, "start_fn index");
+ return 0;
+ }
+
+ return 1;
+}
+
+static inline int parse_byte_vector(struct wasm_parser *p, unsigned char **data,
+ int *data_len)
+{
+ if (!leb128_read(&p->cur, (unsigned int*)data_len)) {
+ note_error(p, "len");
+ return 0;
+ }
+
+ if (p->cur.p + *data_len > p->cur.end) {
+ note_error(p, "byte vector overflow");
+ return 0;
+ }
+
+ *data = p->cur.p;
+ p->cur.p += *data_len;
+
+ return 1;
+}
+
+static int parse_wdata(struct wasm_parser *p, struct wdata *data)
+{
+ u8 tag;
+
+ if (!pull_byte(&p->cur, &tag)) {
+ note_error(p, "tag");
+ return 0;
+ }
+
+ if (tag > 2) {
+ cursor_print_around(&p->cur, 10);
+ note_error(p, "invalid datasegment tag: 0x%x", tag);
+ return 0;
+ }
+
+ switch (tag) {
+ case 0:
+ data->mode = datamode_active;
+ data->active.mem_index = 0;
+
+ if (!parse_const_expr(p, &data->active.offset_expr)) {
+ note_error(p, "const expr");
+ return 0;
+ }
+
+ if (!parse_byte_vector(p, &data->bytes, &data->bytes_len)) {
+ note_error(p, "bytes vector");
+ return 0;
+ }
+
+ break;
+
+ case 1:
+ data->mode = datamode_passive;
+
+ if (!parse_byte_vector(p, &data->bytes, &data->bytes_len)) {
+ note_error(p, "passive bytes vector");
+ return 0;
+ }
+
+ break;
+
+ case 2:
+ data->mode = datamode_active;
+
+ if (!leb128_read(&p->cur, (unsigned int*)&data->active.mem_index)) {
+ note_error(p, "read active data mem_index");
+ return 0;
+ }
+
+ if (!parse_const_expr(p, &data->active.offset_expr)) {
+ note_error(p, "read active data (w/ mem_index) offset_expr");
+ return 0;
+ }
+
+ if (!parse_byte_vector(p, &data->bytes, &data->bytes_len)) {
+ note_error(p, "active (w/ mem_index) bytes vector");
+ return 0;
+ }
+
+ break;
+ }
+
+ return 1;
+}
+
+static int parse_data_section(struct wasm_parser *p, struct datasec *section)
+{
+ struct wdata *data;
+ unsigned int elems, i;
+
+ if (!parse_vector(p, sizeof(*data), &elems, (void**)&data)) {
+ note_error(p, "datas vector");
+ return 0;
+ }
+
+ for (i = 0; i < elems; i++) {
+ if (!parse_wdata(p, &data[i])) {
+ note_error(p, "data segment #%d/%d", i+1, elems);
+ return 0;
+ }
+ }
+
+ section->num_datas = elems;
+ section->datas = data;
+
+ return 1;
+}
+
static int parse_table_section(struct wasm_parser *p,
struct tablesec *table_section)
{
@@ -1266,8 +1470,11 @@ static int parse_section_by_tag(struct wasm_parser *p, enum section_tag tag,
}
return 1;
case section_start:
- note_error(p, "section_start parse not implemented");
- return 0;
+ if (!parse_start_section(p, &p->module.start_section)) {
+ note_error(p, "start section");
+ return 0;
+ }
+ return 1;
case section_element:
note_error(p, "section_element parse not implemented");
return 0;
@@ -1278,8 +1485,11 @@ static int parse_section_by_tag(struct wasm_parser *p, enum section_tag tag,
}
return 1;
case section_data:
- note_error(p, "section_data parse not implemented");
- return 0;
+ if (!parse_data_section(p, &p->module.data_section)) {
+ note_error(p, "data section");
+ return 0;
+ }
+ return 1;
default:
note_error(p, "invalid section tag");
return 0;
@@ -1342,11 +1552,15 @@ static int parse_section(struct wasm_parser *p)
return 0;
}
+ p->module.parsed |= 1 << tag;
+
return 1;
}
int parse_wasm(struct wasm_parser *p)
{
+ p->module.parsed = 0;
+
if (!consume_bytes(&p->cur, WASM_MAGIC, sizeof(WASM_MAGIC))) {
note_error(p, "magic");
goto fail;
@@ -1473,7 +1687,6 @@ static int set_local(struct wasm_interp *interp, int ind, struct val *val)
return 1;
}
- debug("pushing local %d\n", ind);
cursor_pushval(&interp->locals, val);
assert(count_locals(interp) > 0);
return 1;
@@ -1549,13 +1762,20 @@ static inline int interp_i32_const(struct wasm_interp *interp)
return cursor_pushval(&interp->stack, &val);
}
+static inline int get_import_offset(struct module *module)
+{
+ return !was_section_parsed(module, section_import) ? 0 :
+ module->import_section.num_imports;
+}
+
static inline struct func *get_function(struct module *module, int ind)
{
+ // TODO: imports
if (ind >= module->code_section.num_funcs) {
return NULL;
}
- return &module->code_section.funcs[ind];
+ return &module->code_section.funcs[ind - get_import_offset(module)];
}
static inline struct functype *get_function_type(struct module *module, int ind)
@@ -1564,7 +1784,7 @@ static inline struct functype *get_function_type(struct module *module, int ind)
return NULL;
}
- ind = module->func_section.type_indices[ind];
+ ind = module->func_section.type_indices[ind - get_import_offset(module)];
if (ind >= module->type_section.num_functypes) {
return NULL;
}
@@ -1572,23 +1792,6 @@ static inline struct functype *get_function_type(struct module *module, int ind)
return &module->type_section.functypes[ind];
}
-/*
-static const char *get_function_name(struct module *module, unsigned int func_index)
-{
- struct wexport *export;
- int i;
-
- for (i = 0; i < module->export_section.num_exports; i++) {
- export = &module->export_section.exports[i];
- if (export->index == func_index) {
- return export->name;
- }
- }
-
- return "unknown";
-}
-*/
-
static int prepare_call(struct wasm_interp *interp, int func_index)
{
int i;
@@ -1742,6 +1945,15 @@ static int find_function(struct module *module, const char *name)
return -1;
}
+static int find_start_function(struct module *module)
+{
+ if (module->parsed & (1 << section_start)) {
+ return module->start_section.start_fn;
+ }
+
+ return find_function(module, "start");
+}
+
static int cursor_slice(struct cursor *mem, struct cursor *slice, size_t size)
{
u8 *p;
@@ -1794,7 +2006,7 @@ int interp_wasm_module(struct wasm_interp *interp, struct module *module)
assert(ok);
- func = find_function(module, "start");
+ func = find_start_function(module);
if (func == -1) {
interp_error(interp, "no start function found");
return 0;
diff --git a/src/wasm.h b/src/wasm.h
@@ -324,7 +324,34 @@ struct instr {
};
};
+enum datamode {
+ datamode_active,
+ datamode_passive,
+};
+
+struct wdata_active {
+ int mem_index;
+ struct expr offset_expr;
+};
+
+struct wdata {
+ struct wdata_active active;
+ unsigned char *bytes;
+ int bytes_len;
+ enum datamode mode;
+};
+
+struct datasec {
+ struct wdata *datas;
+ int num_datas;
+};
+
+struct startsec {
+ int start_fn;
+};
+
struct module {
+ unsigned int parsed;
struct typesec type_section;
struct funcsec func_section;
struct importsec import_section;
@@ -333,6 +360,8 @@ struct module {
struct tablesec table_section;
struct memsec memory_section;
struct globalsec global_section;
+ struct startsec start_section;
+ struct datasec data_section;
};
struct wasm_interp {