2018-05-16 10:59:09 +00:00
|
|
|
# Step 04 - generate Java literals.
|
|
|
|
#
|
2021-08-31 12:07:17 +00:00
|
|
|
# Java byte-code has severe restrictions. There is no such thing as
|
2018-05-16 10:59:09 +00:00
|
|
|
# "array literal" - those are implemented as series of data[x] = y;
|
|
|
|
# as a consequence N-byte array will use 7N bytes in class, plus N bytes
|
|
|
|
# in instantiated variable. Also no literal could be longer than 64KiB.
|
|
|
|
#
|
|
|
|
# To keep dictionary data compact both in source code and in compiled format
|
|
|
|
# we use the following tricks:
|
|
|
|
# * use String as a data container
|
|
|
|
# * store only lowest 7 bits; i.e. all characters fit ASCII table; this allows
|
|
|
|
# efficient conversion to byte array; also ASCII characters use only 1 byte
|
|
|
|
#. of memory (UTF-8 encoding)
|
|
|
|
# * RLE-compress sequence of 8-th bits
|
|
|
|
#
|
|
|
|
# This script generates literals used in Java code.
|
|
|
|
|
2021-01-27 14:08:05 +00:00
|
|
|
try:
|
|
|
|
unichr # Python 2
|
2021-08-31 12:07:17 +00:00
|
|
|
except NameError:
|
2021-01-27 14:08:05 +00:00
|
|
|
unichr = chr # Python 3
|
|
|
|
|
2018-05-16 10:59:09 +00:00
|
|
|
bin_path = "dictionary.bin"
|
|
|
|
|
|
|
|
with open(bin_path, "rb") as raw:
|
|
|
|
data = raw.read()
|
|
|
|
|
|
|
|
low = []
|
|
|
|
hi = []
|
|
|
|
is_skip = True
|
|
|
|
skip_flip_offset = 36
|
|
|
|
cntr = skip_flip_offset
|
|
|
|
for b in data:
|
|
|
|
value = ord(b)
|
|
|
|
low.append(chr(value & 0x7F))
|
|
|
|
if is_skip:
|
|
|
|
if value < 0x80:
|
|
|
|
cntr += 1
|
|
|
|
else:
|
|
|
|
is_skip = False
|
|
|
|
hi.append(unichr(cntr))
|
|
|
|
cntr = skip_flip_offset + 1
|
|
|
|
else:
|
|
|
|
if value >= 0x80:
|
|
|
|
cntr += 1
|
|
|
|
else:
|
|
|
|
is_skip = True
|
|
|
|
hi.append(unichr(cntr))
|
|
|
|
cntr = skip_flip_offset + 1
|
|
|
|
hi.append(unichr(cntr))
|
|
|
|
|
2021-08-31 12:07:17 +00:00
|
|
|
low0 = low[0:len(low) // 2]
|
|
|
|
low1 = low[len(low) // 2:len(low)]
|
|
|
|
|
2018-05-16 10:59:09 +00:00
|
|
|
|
|
|
|
def escape(chars):
|
|
|
|
result = []
|
|
|
|
for c in chars:
|
|
|
|
if "\r" == c:
|
|
|
|
result.append("\\r")
|
|
|
|
elif "\n" == c:
|
|
|
|
result.append("\\n")
|
|
|
|
elif "\t" == c:
|
|
|
|
result.append("\\t")
|
|
|
|
elif "\"" == c:
|
|
|
|
result.append("\\\"")
|
|
|
|
elif "\\" == c:
|
|
|
|
result.append("\\\\")
|
|
|
|
elif ord(c) < 32 or ord(c) >= 127:
|
|
|
|
result.append("\\u%04X" % ord(c))
|
|
|
|
else:
|
2021-08-31 12:07:17 +00:00
|
|
|
result.append(c)
|
2018-05-16 10:59:09 +00:00
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
source_code = [
|
|
|
|
" private static final String DATA0 = \"", "".join(escape(low0)), "\";\n",
|
|
|
|
" private static final String DATA1 = \"", "".join(escape(low1)), "\";\n",
|
|
|
|
" private static final String SKIP_FLIP = \"", "".join(escape(hi)), "\";\n"
|
|
|
|
]
|
|
|
|
|
|
|
|
src_path = "DictionaryData.inc.java"
|
|
|
|
|
|
|
|
with open(src_path, "w") as source:
|
|
|
|
source.write("".join(source_code))
|