btcs

bitcoin script parser/evaluator/compiler/decompiler
git clone git://jb55.com/btcs
Log | Files | Refs | README | LICENSE

commit 270f7597b189064a7b298a14210e35884da8a266
parent c50cf5a2b2be76ec5c4a0f4098eef0dc87904684
Author: William Casarin <jb55@jb55.com>
Date:   Sun,  5 Nov 2017 14:28:39 -0800

WIP: huge refactor

Diffstat:
Malloc.c | 3+--
Malloc.h | 2+-
Mmain.c | 11+++++++----
Mmisc.h | 22++++++++++++++++++++++
Mop.c | 5++++-
Mop.h | 2+-
Mscript.c | 110++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Mscript.h | 16+++++++++++-----
Mscript_num.c | 40+++++++++++++++++++++++++++++++++++-----
Mscript_num.h | 6+++---
Mtest.c | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Mval.c | 60+++++++++++++++++++++++++++++++++++++++++-------------------
Mval.h | 13+++++++++++++
13 files changed, 292 insertions(+), 65 deletions(-)

diff --git a/alloc.c b/alloc.c @@ -41,13 +41,12 @@ num_pool_new(int *ind) { } char * -byte_pool_new(const char *bytes, const u16 len) { +byte_pool_new(const u16 len) { assert(g_arenas.bytes_top - g_arenas.bytes + len <= g_arenas.nbytes); char *start = g_arenas.bytes_top; u16 *c = (u16*)g_arenas.bytes_top; *c++ = len; char *p = (char*)c; - memcpy(p, bytes, len); p += len; g_arenas.bytes_top = p; assert(*p == 0); diff --git a/alloc.h b/alloc.h @@ -12,7 +12,7 @@ struct num * num_pool_new(int *ind); struct num * num_pool_get(const int ind); -char * byte_pool_new(const char *bytes, const u16 len); +char * byte_pool_new(const u16 len); char * byte_pool_get(const int ind, u16 *len); char * byte_pool_pop(); diff --git a/main.c b/main.c @@ -8,25 +8,28 @@ extern int yyparse(); extern FILE* yyin; -struct stack reader_stack; +char * g_reader_buf; +char * g_reader_buf_top; +u32 g_reader_buf_cap; void yyerror(const char* s); int main() { yyin = stdin; + size_t size; struct stack tmp_stack; alloc_arenas(0, MAX_STACK_SIZE, MAX_STACK_SIZE * MAX_STACK_SIZE); - stack_init(&reader_stack); stack_init(&tmp_stack); do { yyparse(); } while(!feof(yyin)); - script_eval(&reader_stack, &tmp_stack); + size = g_reader_buf_top - g_reader_buf; + script_eval(g_reader_buf, size, &tmp_stack); printf("script: "); - script_print_ops(&reader_stack); + script_print_ops(g_reader_buf, g_reader_buf_top -); printf("stack: "); script_print_vals(&tmp_stack); diff --git a/misc.h b/misc.h @@ -4,6 +4,8 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +/* #include <endian.h> */ +#include <string.h> #include <stdint.h> #define DEBUG 0 @@ -21,7 +23,27 @@ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef int64_t s64; +typedef uint64_t u64; #define STATIC_ASSERT(COND,MSG) typedef char static_assertion_##MSG[(!!(COND))*2-1] +// TODO: host endianness +#define le16toh(x) (x) +#define le32toh(x) (x) + +u16 static inline +readle16(const u8* ptr) { + u16 x; + memcpy((char*)&x, ptr, 2); + return le16toh(x); +} + +u32 static inline +readle32(const u8* ptr) { + u32 x; + memcpy((char*)&x, ptr, 4); + return le32toh(x); +} + + #endif /* BCS_MISC_H */ diff --git a/op.c b/op.c @@ -359,7 +359,10 @@ val_print(struct val val) { case VT_SCRIPTNUM: assert(val.ind != -1); n = num_pool_get(val.ind); - printf("%lu", n->val); + printf("sn:%lu", n->val); + break; + case VT_SMALLINT: + printf("si:%d", val.ind); break; default: assert(!"val_print data"); diff --git a/op.h b/op.h @@ -312,7 +312,7 @@ const char * val_name(struct val); static inline void stack_push_val(struct stack *stack, struct val val) { -#if 0 +#if 1 printf("pushing val "); val_print(val); printf("\n"); diff --git a/script.c b/script.c @@ -8,15 +8,20 @@ #define SCRIPTERR(serr) script_add_error(c, opcode, serr) +int g_silence_script_err = 0; +int g_silence_script_warn = 0; + int script_add_error(int c, enum opcode cur_op, const char *serror) { // TODO: set_error - fprintf(stderr, "error: %s @ op %d (%s)\n", serror, c, op_name(cur_op)); + if (!g_silence_script_err) + fprintf(stderr, "error: %s @ op %d (%s)\n", serror, c, op_name(cur_op)); return 0; } void script_add_warning(const char *warning) { // TODO: set_error - fprintf(stderr, "warning: %s\n", warning); + if (!g_silence_script_warn) + fprintf(stderr, "warning: %s\n", warning); } @@ -38,9 +43,51 @@ cast_to_bool(struct val val) { } int -script_eval(struct stack *script, struct stack *stack) { +script_getop(u8 **p, u8 *end, enum opcode *opcode, u8 *buf, int bufsize, u32 *outlen) { + u32 nsize = 0; + + *opcode = *(*p++); + + if (*opcode > OP_PUSHDATA4) + return 1; + + if (*opcode < OP_PUSHDATA1) + nsize = *opcode; + else if (*opcode == OP_PUSHDATA1) { + if (end - *p < 1) + return 0; + nsize = *(*p++); + } + else if (*opcode == OP_PUSHDATA2) { + if (end - *p < 2) + return 0; + nsize = readle16(*p); + *p += 2; + } + else if (*opcode == OP_PUSHDATA4) { + if (end - *p < 4) + return 0; + nsize = readle32(*p); + *p += 4; + } + + if (buf) { + *outlen = nsize; + memcpy(buf, *p, nsize); + *p += nsize; + } + + return 1; +} + + +int +script_eval(u8 *script, size_t script_size, struct stack *stack) { int op_count = 0; - void **p = script->bottom; + u32 tmplen; + u8 *p = script; + u8 *top = script + script_size; + static char tmpbuf[32]; static const struct val val_true = {.type = VT_SMALLINT, .ind = 1}; static const struct val val_false = {.type = VT_SMALLINT, .ind = 0}; static const struct num bn_one = {.val = 1, .ind = -1}; @@ -55,14 +102,13 @@ script_eval(struct stack *script, struct stack *stack) { // TODO: require minimal? int require_minimal = !(flags & ~(CO_WARNINGS_ARE_ERRORS | CO_WARN_MINIMAL)); - - char tmpbuf[32]; stack_init(altstack); stack_init(ifstack); - while (p < script->top) { + while (p < top) { c++; - enum opcode opcode = *(enum opcode*)p; + enum opcode opcode; + script_getop(&p, top, &opcode, (u8*)tmpbuf, ARRAY_SIZE(tmpbuf), &tmplen); int if_exec = !stack_size(ifstack); p++; // TODO: pushdata ops @@ -550,11 +596,19 @@ script_eval(struct stack *script, struct stack *stack) { return 1; } -void script_print_ops(struct stack *stack) { - void **p = stack->bottom; - while (p < stack->top) { - enum opcode opcode = (enum opcode)*p++; +void script_print(u8 *script, size_t script_size) { + u32 len; + static u8 tmpbuf[4096]; + u8 *t = tmpbuf; + u8 *p = script; + u8 *top = script + script_size; + while (p < top) { + enum opcode opcode; + script_getop(&p, top, &opcode, tmpbuf, ARRAY_SIZE(tmpbuf), &len); + u8 *ttop = tmpbuf + len; printf("%s ", op_name(opcode)); + if (t < ttop) + printf("%02x", *t++); } printf("\n"); } @@ -589,3 +643,35 @@ void script_free(struct script *script) { int script_new(struct script *script) { return 0; } + +void +script_push_int(struct stack *script, s64 intval) { + /* u16 len; */ + /* u16 i; */ + /* static u8 buf[8]; */ + struct val val = val_from_int(intval); + stack_push_val(script, val); + /* val_serialize(val, &len, buf, ARRAY_SIZE(buf)); */ + /* for (i = 0; i < len; ++i) */ + /* stack_push_small(u8, script, &buf[i]); */ +} + + +void +script_serialize(struct stack *stack, u8 *buf, int buflen, int* len) { + struct val val; + u8 *huh = (u8*)&val; + u8 *p = buf; + u16 valsize; + *len = 0; + + while (stack_size(stack) != 0) { + val = stack_pop_val(stack); + printf("%02x %02x %02x %02x | ", huh[0], huh[1], huh[2], huh[3]); + printf("%d %d\n", val.type, val.ind); + val_serialize(val, &valsize, p, buflen-(p-buf)); + p += valsize; + *len += valsize; + assert(p-buf <= buflen); + } +} diff --git a/script.h b/script.h @@ -9,11 +9,17 @@ struct script { struct stack pushdata; // a stack of pushdata stacks }; -int script_new(struct script *); -void script_free(struct script *); -int script_eval(struct stack *, struct stack*); -void script_print_ops(struct stack *); -void script_print_vals(struct stack *); +int script_new(struct script *); +void script_free(struct script *); +int script_eval(u8 *script, size_t script_size, struct stack *stack); +void script_print_ops(char * buf, size_t size); +void script_print_vals(struct stack *); +void script_push_int(struct stack *, s64); +void +script_serialize(struct stack *stack, u8 *buf, int buflen, int* len); + +extern int g_silence_script_err; +extern int g_silence_script_warn; #endif /* BTCS_SCRIPT_H */ diff --git a/script_num.c b/script_num.c @@ -15,7 +15,7 @@ void -sn_from_int(int n, struct num *sn) { +sn_from_int(s64 n, struct num *sn) { sn->ind = -1; sn->val = n; } @@ -29,10 +29,40 @@ sn_from_int(int n, struct num *sn) { /* } */ -const char * -sn_serialize(struct num *sn, u16 *len) { - assert(!"implement sn_serialize"); - return ""; +void +sn_serialize(struct num *sn, u8 *buf, int bufsize, u16 *len) { + u8 *p = buf; + + if(sn->val == 0) { + *len = 0; + return; + } + + const int neg = sn->val < 0; + u64 absvalue = neg ? -(sn->val) : sn->val; + + while(absvalue) { + *p++ = absvalue & 0xff; + assert((p - buf) <= bufsize); + absvalue >>= 8; + } + + // - If the most significant byte is >= 0x80 and the value is positive, push a + // new zero-byte to make the significant byte < 0x80 again. + + // - If the most significant byte is >= 0x80 and the value is negative, push a + // new 0x80 byte that will be popped off when converting to an integral. + + // - If the most significant byte is < 0x80 and the value is negative, add + // 0x80 to it, since it will be subtracted and interpreted as a negative when + // converting to an integral. + + if (*p & 0x80) + *p++ = neg ? 0x80 : 0; + else if (neg) + *p++ |= 0x80; + + *len = p - buf; } diff --git a/script_num.h b/script_num.h @@ -16,7 +16,7 @@ struct num { }; void -sn_from_int(int n, struct num *); +sn_from_int(s64 n, struct num *); int sn_overflowed(struct num *num); @@ -30,8 +30,8 @@ sn_to_val(struct num *sn); int sn_to_int(struct num *sn, int require_minimal); -const char * -sn_serialize(struct num *sn, u16 *len); +void +sn_serialize(struct num *sn, u8 *buf, int bufsize, u16 *len); static void inline diff --git a/test.c b/test.c @@ -14,6 +14,27 @@ typedef void (program)(struct stack *script, struct stack *stack,\ struct stack *expected \ ) + +static void +cmp_data(const u8 *a, const u8 *b, int alen, int blen, const char *msg) { + int i = 0; + cmp_ok(alen, "==", blen); + + if (memcmp(a, b, blen) != 0) { + printf("#\n# Failed data cmp test\n# > "); + for (i = 0; i < alen; ++i) + printf("%02x ", a[i] & 0xff); + + printf("\n# > "); + for (i = 0; i < blen; ++i) + printf("%02x ", b[i] & 0xff); + + printf("\n#\n"); + fail("%s: data should match", msg); + } + else + pass("%s: data should match", msg); +} /* static void */ /* test_nip(struct stack *stack, struct stack *expected) { */ /* stack_push_op(OP_1); */ @@ -43,33 +64,52 @@ ok_stacks_equal(struct stack *s1, struct stack *s2, const char *context) { } TEST(test_simple) { - stack_push_op(script, OP_1); + u8 in_script[] = { OP_1 }; - script_eval(script, stack); + script_eval(in_script, 1, stack); stack_push_val(expected, smallintval(1)); ok_stacks_equal(stack, expected, "test_simple"); } TEST(test_nip) { - stack_push_op(script, OP_1); - stack_push_op(script, OP_2); - stack_push_op(script, OP_NIP); + u8 in_script[] = { OP_1, OP_2, OP_NIP }; - script_eval(script, stack); + script_eval(in_script, ARRAY_SIZE(in_script), stack); stack_push_val(expected, smallintval(2)); ok_stacks_equal(stack, expected, "test_nip"); } TEST(test_2dup_not_enough_input) { - stack_push_op(script, OP_1); - stack_push_op(script, OP_2DUP); + u8 in_script[] = { OP_1, OP_2DUP }; - int res = script_eval(script, stack); + int res = script_eval(in_script, ARRAY_SIZE(in_script), stack); ok(res == 0, "2dup fail on small stack"); } +TEST(big_int_serializes_ok) { + int len; + static u8 buf[12]; + static u8 expected_in[] = { 0x04, 0xff, 0xff, 0xff, 0x7f, + 0x04, 0xff, 0xff, 0xff, 0x7f, OP_ADD }; + static u8 expected_out[] = { 0x05, 0xfe, 0xff, 0xff, 0xff, 0 }; + + script_push_int(script, 2147483647); + script_push_int(script, 2147483647); + stack_push_op(script, OP_ADD); + + script_serialize(script, buf, ARRAY_SIZE(buf), &len); + + cmp_data(buf, expected_in, len, ARRAY_SIZE(expected_in), + "big int input serializes ok"); + + script_eval(buf, ARRAY_SIZE(expected_in), stack); + script_serialize(stack, buf, ARRAY_SIZE(buf), &len); + + cmp_data(buf, expected_out, len, ARRAY_SIZE(expected_out), + "big int output serializes ok"); +} // TODO test scriptnum overflows // TODO test scriptnum negative zero boolean logic @@ -89,8 +129,7 @@ TEST(test_2dup_not_enough_input) { static inline void -run_test(struct stack *script, struct stack *stack, struct stack *expected, - program *prog) +run_test(struct stack *script, struct stack *stack, struct stack *expected, program *prog) { stack_clear(script); stack_clear(stack); @@ -106,6 +145,9 @@ main(int argc, char *argv[]) { struct stack *stack = &_stack; struct stack *expected = &_expected; + g_silence_script_err = 0; + g_silence_script_warn = 1; + alloc_arenas(); plan(5); @@ -117,10 +159,11 @@ main(int argc, char *argv[]) { RUNTEST(test_simple); RUNTEST(test_nip); RUNTEST(test_2dup_not_enough_input); + RUNTEST(big_int_serializes_ok); + stack_free(script); stack_free(expected); stack_free(stack); - stack_free(script); free_arenas(0); diff --git a/val.c b/val.c @@ -14,23 +14,43 @@ static const char intbytes[] = { OP_16 }; -const char * -val_serialize(struct val val, u16 *len, int require_minimal) { +struct val +val_from_int(s64 intval) { + struct val val; struct num *sn; - int n, ind; + int ind; + sn = num_pool_new(&ind); + val.type = VT_SCRIPTNUM; + val.ind = ind; + sn->ind = ind; + sn->val = intval; + return val; +} + +void +val_serialize(struct val val, u16 *len, u8 *buf, int bufsize) { + struct num *sn; + int n; + u16 valsize; switch (val.type) { case VT_SCRIPTNUM: sn = num_pool_get(val.ind); assert(sn); - return sn_serialize(sn, len); + sn_serialize(sn, buf+1, bufsize - 1, &valsize); + assert(valsize <= 0xFF); + *buf = (u8)valsize; + *len = valsize + 1; + return; case VT_DATA: - return byte_pool_get(val.ind, len); + assert("!implement val_serialize VT_DATA"); + byte_pool_get(val.ind, len); case VT_SMALLINT: *len = 1; n = val.ind; if (n >= -1 && n <= 16) { val.type = VT_SMALLINT; - return &intbytes[n+1]; + assert(bufsize >= 1); + *buf = intbytes[n+1]; } else { assert(!"non-small VT_SMALLINT"); @@ -38,28 +58,30 @@ val_serialize(struct val val, u16 *len, int require_minimal) { } assert(!"val_serialize missing implementation"); - return 0; } int val_eq(struct val a, struct val b, int require_minimal) { u16 alen, blen; int eq = 0; - const char *abytes, *bbytes; - abytes = val_serialize(a, &alen, require_minimal); - bbytes = val_serialize(b, &blen, require_minimal); - // TODO: what if they're semantically equivalent? or does that matter? - if (alen != blen) { - if (a.type == VT_DATA) byte_pool_pop(); - if (b.type == VT_DATA) byte_pool_pop(); - return 0; - } + static const int tmpsize = 4096; + static u8 tmpa[tmpsize]; + static u8 tmpb[tmpsize]; + + const u8 *abytes, *bbytes; + // TODO: do I need to serialize to compare? + /* abytes = val_serialize(a, &alen, require_minimal); */ + /* bbytes = val_serialize(b, &blen, require_minimal); */ + val_serialize(a, &alen, tmpa, tmpsize); + val_serialize(b, &blen, tmpb, tmpsize); + // TODO: what if they're semantically equivalent? or does that matter + // (eg. minimal vs not miniminal)? - eq = memcmp(abytes, bbytes, alen) == 0; + if (alen != blen) + return 0; - if (a.type == VT_DATA) byte_pool_pop(); - if (b.type == VT_DATA) byte_pool_pop(); + eq = memcmp(tmpa, tmpb, alen) == 0; return eq; } diff --git a/val.h b/val.h @@ -36,6 +36,14 @@ stack_top_val(struct stack *stack, int ind) { return val; } +static inline struct val +stack_pop_val(struct stack *stack) { + struct val val; + val = stack_top_val(stack, -1); + stack_pop(stack); + return val; +} + static inline void stack_set_val(struct stack *stack, int ind, struct val val) { struct val *pval = (struct val *)(stack->top + ind); @@ -45,4 +53,9 @@ stack_set_val(struct stack *stack, int ind, struct val val) { int val_eq(struct val a, struct val b, int require_minimal); +void +val_serialize(struct val val, u16 *len, u8 *buf, int bufsize); + +struct val val_from_int(s64); + #endif /* BTCS_VAL_H */