btcs

bitcoin script parser/evaluator/compiler/decompiler
git clone git://jb55.com/btcs
Log | Files | Refs | README | LICENSE

mph-opcodes (3922B)


      1 #!/usr/bin/env python3
      2 # Easy Perfect Minimal Hashing
      3 # By Steve Hanov. Released to the public domain.
      4 #
      5 # Based on:
      6 # Edward A. Fox, Lenwood S. Heath, Qi Fan Chen and Amjad M. Daoud,
      7 # "Practical minimal perfect hash functions for large databases", CACM, 35(1):105-121
      8 # also a good reference:
      9 # Compress, Hash, and Displace algorithm by Djamal Belazzougui,
     10 # Fabiano C. Botelho, and Martin Dietzfelbinger
     11 import sys
     12 
     13 DICTIONARY = sys.argv[1]
     14 
     15 # Calculates a distinct hash function for a given string. Each value of the
     16 # integer d results in a different hash value.
     17 def hash( d, str ):
     18     if d == 0: d = 0x01000193
     19 
     20     # Use the FNV algorithm from http://isthe.com/chongo/tech/comp/fnv/
     21     for c in str:
     22         d = ( (d * 0x01000193) ^ ord(c) ) & 0xffffffff;
     23 
     24     return d
     25 
     26 # Computes a minimal perfect hash table using the given python dictionary. It
     27 # returns a tuple (G, V). G and V are both arrays. G contains the intermediate
     28 # table of values needed to compute the index of the value in V. V contains the
     29 # values of the dictionary.
     30 def CreateMinimalPerfectHash( dict ):
     31     size = len(dict)
     32 
     33     # Step 1: Place all of the keys into buckets
     34     buckets = [ [] for i in range(size) ]
     35     G = [0] * size
     36     values = [None] * size
     37 
     38     for key in dict.keys():
     39         buckets[hash(0, key) % size].append( key )
     40 
     41     # Step 2: Sort the buckets and process the ones with the most items first.
     42     buckets.sort( key=len, reverse=True )
     43     for b in range( size ):
     44         bucket = buckets[b]
     45         if len(bucket) <= 1: break
     46 
     47         d = 1
     48         item = 0
     49         slots = []
     50 
     51         # Repeatedly try different values of d until we find a hash function
     52         # that places all items in the bucket into free slots
     53         while item < len(bucket):
     54             slot = hash( d, bucket[item] ) % size
     55             if values[slot] != None or slot in slots:
     56                 d += 1
     57                 item = 0
     58                 slots = []
     59             else:
     60                 slots.append( slot )
     61                 item += 1
     62 
     63         G[hash(0, bucket[0]) % size] = d
     64         for i in range(len(bucket)):
     65             values[slots[i]] = dict[bucket[i]]
     66 
     67     # Only buckets with 1 item remain. Process them more quickly by directly
     68     # placing them into a free slot. Use a negative value of d to indicate
     69     # this.
     70     freelist = []
     71     for i in range(size):
     72         if values[i] == None: freelist.append( i )
     73 
     74     for b in range( b, size ):
     75         bucket = buckets[b]
     76         if len(bucket) == 0: break
     77         slot = freelist.pop()
     78         # We subtract one to ensure it's negative even if the zeroeth slot was
     79         # used.
     80         G[hash(0, bucket[0]) % size] = -slot-1
     81         values[slot] = dict[bucket[0]]
     82 
     83     return (G, values)
     84 
     85 # Look up a value in the hash table, defined by G and V.
     86 def PerfectHashLookup( G, V, key ):
     87     d = G[hash(0,key) % len(G)]
     88     if d < 0: return V[-d-1]
     89     return V[hash(d, key) % len(V)]
     90 
     91 dict = {}
     92 line = 1
     93 keys = map(lambda x: x.strip(), open(DICTIONARY, "rt").readlines())
     94 for key in keys:
     95     dict[key] = line
     96     line += 1
     97 
     98 (G, V) = CreateMinimalPerfectHash( dict )
     99 
    100 Gstrs = ",".join(map(str, G))
    101 Vstrs = ",".join(map(str, V))
    102 
    103 header = open('oplookup.h', 'w')
    104 
    105 
    106 
    107 header.write("/* THIS IS GENERATED FROM mph-opcodes DO NOT EDIT */\n\n")
    108 header.write("#ifndef BCS_OPLOOKUP_H\n")
    109 header.write("#define BCS_OPLOOKUP_H\n\n")
    110 
    111 header.write("#include \"misc.h\"\n")
    112 header.write("extern const int opcodes_g[{}];\n".format(len(G)))
    113 header.write("extern const int opcodes_v[{}];\n".format(len(V)))
    114 header.write("\n#endif /* BCS_OPLOOKUP_H */\n")
    115 header.close()
    116 
    117 cfile = open('oplookup.c', 'w')
    118 
    119 cfile.write("/* THIS IS GENERATED FROM mph-opcodes DO NOT EDIT */\n\n")
    120 cfile.write("#include \"oplookup.h\"\n".format(Gstrs))
    121 cfile.write("const int opcodes_g[{}] = {{ {} }};\n".format(len(G), Gstrs))
    122 cfile.write("const int opcodes_v[{}] = {{ {} }};\n".format(len(V), Vstrs))
    123 cfile.close()