* re-enable Js build/test
  * improve decoder performance
  * rewrite dictionary data in Java/Js to a shorter uncompressed form
  * improve dictionary generation tool
This commit is contained in:
Eugene Kliuchnikov 2021-06-23 09:40:57 +02:00 committed by GitHub
parent bbe5d72ba3
commit f8c6717745
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 446 additions and 466 deletions

View File

@ -108,45 +108,55 @@ static BROTLI_INLINE void BrotliFillBitWindow(
BrotliBitReader* const br, uint32_t n_bits) { BrotliBitReader* const br, uint32_t n_bits) {
#if (BROTLI_64_BITS) #if (BROTLI_64_BITS)
if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) { if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
if (br->bit_pos_ >= 56) { uint32_t bit_pos = br->bit_pos_;
br->val_ >>= 56; if (bit_pos >= 56) {
br->bit_pos_ ^= 56; /* here same as -= 56 because of the if condition */ br->val_ =
br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 8; (br->val_ >> 56) | (BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 8);
br->bit_pos_ =
bit_pos ^ 56; /* here same as -= 56 because of the if condition */
br->avail_in -= 7; br->avail_in -= 7;
br->next_in += 7; br->next_in += 7;
} }
} else if ( } else if (
!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 16)) { !BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 16)) {
if (br->bit_pos_ >= 48) { uint32_t bit_pos = br->bit_pos_;
br->val_ >>= 48; if (bit_pos >= 48) {
br->bit_pos_ ^= 48; /* here same as -= 48 because of the if condition */ br->val_ =
br->val_ |= BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 16; (br->val_ >> 48) | (BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 16);
br->bit_pos_ =
bit_pos ^ 48; /* here same as -= 48 because of the if condition */
br->avail_in -= 6; br->avail_in -= 6;
br->next_in += 6; br->next_in += 6;
} }
} else { } else {
if (br->bit_pos_ >= 32) { uint32_t bit_pos = br->bit_pos_;
br->val_ >>= 32; if (bit_pos >= 32) {
br->bit_pos_ ^= 32; /* here same as -= 32 because of the if condition */ br->val_ = (br->val_ >> 32) |
br->val_ |= ((uint64_t)BROTLI_UNALIGNED_LOAD32LE(br->next_in)) << 32; (((uint64_t)BROTLI_UNALIGNED_LOAD32LE(br->next_in)) << 32);
br->bit_pos_ =
bit_pos ^ 32; /* here same as -= 32 because of the if condition */
br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ; br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ; br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
} }
} }
#else #else
if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) { if (!BROTLI_ALIGNED_READ && BROTLI_IS_CONSTANT(n_bits) && (n_bits <= 8)) {
if (br->bit_pos_ >= 24) { uint32_t bit_pos = br->bit_pos_;
br->val_ >>= 24; if (bit_pos >= 24) {
br->bit_pos_ ^= 24; /* here same as -= 24 because of the if condition */ br->val_ =
br->val_ |= BROTLI_UNALIGNED_LOAD32LE(br->next_in) << 8; (br->val_ >> 24) | (BROTLI_UNALIGNED_LOAD32LE(br->next_in) << 8);
br->bit_pos_ =
bit_pos ^ 24; /* here same as -= 24 because of the if condition */
br->avail_in -= 3; br->avail_in -= 3;
br->next_in += 3; br->next_in += 3;
} }
} else { } else {
if (br->bit_pos_ >= 16) { uint32_t bit_pos = br->bit_pos_;
br->val_ >>= 16; if (bit_pos >= 16) {
br->bit_pos_ ^= 16; /* here same as -= 16 because of the if condition */ br->val_ = (br->val_ >> 16) |
br->val_ |= ((uint32_t)BROTLI_UNALIGNED_LOAD16LE(br->next_in)) << 16; (((uint32_t)BROTLI_UNALIGNED_LOAD16LE(br->next_in)) << 16);
br->bit_pos_ =
bit_pos ^ 16; /* here same as -= 16 because of the if condition */
br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ; br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ; br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
} }

View File

@ -1,4 +1,4 @@
/* Copyright 2020 Google Inc. All Rights Reserved. /* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license. Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT See file LICENSE for detail or copy at https://opensource.org/licenses/MIT

View File

@ -1127,13 +1127,13 @@ final class Decode {
s.distanceCode = CMD_LOOKUP[cmdCode + 3]; s.distanceCode = CMD_LOOKUP[cmdCode + 3];
BitReader.fillBitWindow(s); BitReader.fillBitWindow(s);
{ {
int extraBits = insertAndCopyExtraBits & 0xFF; int insertLengthExtraBits = insertAndCopyExtraBits & 0xFF;
s.insertLength = insertLengthOffset + BitReader.readBits(s, extraBits); s.insertLength = insertLengthOffset + BitReader.readBits(s, insertLengthExtraBits);
} }
BitReader.fillBitWindow(s); BitReader.fillBitWindow(s);
{ {
int extraBits = insertAndCopyExtraBits >> 8; int copyLengthExtraBits = insertAndCopyExtraBits >> 8;
s.copyLength = copyLengthOffset + BitReader.readBits(s, extraBits); s.copyLength = copyLengthOffset + BitReader.readBits(s, copyLengthExtraBits);
} }
s.j = 0; s.j = 0;

File diff suppressed because one or more lines are too long

View File

@ -2915,7 +2915,7 @@ public class SynthTest {
(byte) 0x12, (byte) 0x00, (byte) 0x00, (byte) 0x77, (byte) 0xda, (byte) 0xcc, (byte) 0xe1, (byte) 0x12, (byte) 0x00, (byte) 0x00, (byte) 0x77, (byte) 0xda, (byte) 0xcc, (byte) 0xe1,
(byte) 0x7b, (byte) 0xfa, (byte) 0x0f (byte) 0x7b, (byte) 0xfa, (byte) 0x0f
}; };
/* This lines is added manually. */ /* This line is added manually. */
char[] expected = new char[16777216]; Arrays.fill(expected, '*'); char[] expected = new char[16777216]; Arrays.fill(expected, '*');
checkSynth( checkSynth(
/* /*

View File

@ -23,11 +23,7 @@ closure_js_library(
closure_js_library( closure_js_library(
name = "decode", name = "decode",
srcs = ["decode.js"], srcs = ["decode.js"],
suppress = [ suppress = ["JSC_USELESS_BLOCK"],
"JSC_DUP_VAR_DECLARATION",
"JSC_INVALID_OCTAL_LITERAL",
"JSC_USELESS_BLOCK",
],
deps = [":polyfill"], deps = [":polyfill"],
) )
@ -36,6 +32,8 @@ load("@io_bazel_rules_closure//closure:defs.bzl", "closure_js_test")
closure_js_test( closure_js_test(
name = "all_tests", name = "all_tests",
srcs = ["decode_test.js"], srcs = ["decode_test.js"],
entry_points = ["decode_test.js"],
suppress = ["moduleLoad"],
deps = [ deps = [
":decode", ":decode",
":polyfill", ":polyfill",

File diff suppressed because one or more lines are too long

3
js/decode.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,11 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
import {BrotliDecode} from "./decode.js";
const testSuite = goog.require('goog.testing.testSuite');
goog.require('goog.testing.asserts'); goog.require('goog.testing.asserts');
goog.require('goog.testing.jsunit');
/** /**
* @param {!Int8Array} bytes * @param {!Int8Array} bytes
@ -19,79 +25,17 @@ function stringToBytes(str) {
return out; return out;
} }
function testMetadata() { testSuite({
testMetadata() {
assertEquals("", bytesToString(BrotliDecode(Int8Array.from([1, 11, 0, 42, 3])))); assertEquals("", bytesToString(BrotliDecode(Int8Array.from([1, 11, 0, 42, 3]))));
} },
function testEmpty() { testCompoundDictionary() {
assertEquals("", bytesToString(BrotliDecode(Int8Array.from([6]))));
assertEquals("", bytesToString(BrotliDecode(Int8Array.from([0x81, 1]))));
}
function testBaseDictWord() {
var input = Int8Array.from([
0x1b, 0x03, 0x00, 0x00, 0x00, 0x00, 0x80, 0xe3, 0xb4, 0x0d, 0x00, 0x00,
0x07, 0x5b, 0x26, 0x31, 0x40, 0x02, 0x00, 0xe0, 0x4e, 0x1b, 0x41, 0x02
]);
/** @type {!Int8Array} */
var output = BrotliDecode(input);
assertEquals("time", bytesToString(output));
}
function testBlockCountMessage() {
var input = Int8Array.from([
0x1b, 0x0b, 0x00, 0x11, 0x01, 0x8c, 0xc1, 0xc5, 0x0d, 0x08, 0x00, 0x22,
0x65, 0xe1, 0xfc, 0xfd, 0x22, 0x2c, 0xc4, 0x00, 0x00, 0x38, 0xd8, 0x32,
0x89, 0x01, 0x12, 0x00, 0x00, 0x77, 0xda, 0x04, 0x10, 0x42, 0x00, 0x00, 0x00
]);
/** @type {!Int8Array} */
var output = BrotliDecode(input);
assertEquals("aabbaaaaabab", bytesToString(output));
}
function testCompressedUncompressedShortCompressedSmallWindow() {
var input = Int8Array.from([
0x21, 0xf4, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x1c, 0xa7, 0x6d, 0x00, 0x00,
0x38, 0xd8, 0x32, 0x89, 0x01, 0x12, 0x00, 0x00, 0x77, 0xda, 0x34, 0x7b,
0xdb, 0x50, 0x80, 0x02, 0x80, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x31,
0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x4e, 0xdb, 0x00, 0x00, 0x70, 0xb0,
0x65, 0x12, 0x03, 0x24, 0x00, 0x00, 0xee, 0xb4, 0x11, 0x24, 0x00
]);
/** @type {!Int8Array} */
var output = BrotliDecode(input);
assertEquals(
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +
"aaaaaaaaaaaaaabbbbbbbbbb", bytesToString(output));
}
function testIntactDistanceRingBuffer0() {
var input = Int8Array.from([
0x1b, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x80, 0xe3, 0xb4, 0x0d, 0x00, 0x00,
0x07, 0x5b, 0x26, 0x31, 0x40, 0x02, 0x00, 0xe0, 0x4e, 0x1b, 0xa1, 0x80,
0x20, 0x00
]);
/** @type {!Int8Array} */
var output = BrotliDecode(input);
assertEquals("himselfself", bytesToString(output));
}
function testCompoundDictionary() {
var txt = "kot lomom kolol slona\n"; var txt = "kot lomom kolol slona\n";
var dictionary = stringToBytes(txt); var dictionary = stringToBytes(txt);
var compressed = [0xa1, 0xa8, 0x00, 0xc0, 0x2f, 0x01, 0x10, 0xc4, 0x44, 0x09, 0x00]; var compressed = [0xa1, 0xa8, 0x00, 0xc0, 0x2f, 0x01, 0x10, 0xc4, 0x44, 0x09, 0x00];
assertEquals(txt.length, compressed.length * 2); assertEquals(txt.length, compressed.length * 2);
assertEquals(txt, bytesToString(BrotliDecode(Int8Array.from(compressed), {customDictionary: dictionary}))); var options = {"customDictionary": dictionary};
assertEquals(txt, bytesToString(BrotliDecode(Int8Array.from(compressed), options)));
} }
});

View File

@ -63,3 +63,12 @@ if (!Int8Array.prototype.fill) {
if (!Int32Array.prototype.fill) { if (!Int32Array.prototype.fill) {
Int32Array.prototype.fill = Array.prototype.fill; Int32Array.prototype.fill = Array.prototype.fill;
} }
if (!Int8Array.prototype.slice) {
Object.defineProperty(Int8Array.prototype, 'slice', {
value: function (begin, end)
{
return new Int8Array(Array.prototype.slice.call(this, begin, end));
}
});
}

View File

@ -3,6 +3,7 @@
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <fstream> #include <fstream>
#include <glob.h>
#include <vector> #include <vector>
#include "./deorummolae.h" #include "./deorummolae.h"
@ -71,14 +72,13 @@ static void writeFile(const char* file, const std::string& content) {
outfile.close(); outfile.close();
} }
static void writeSamples(char const* argv[], const std::vector<int>& pathArgs, static void writeSamples(const std::vector<std::string>& paths,
const std::vector<size_t>& sizes, const uint8_t* data) { const std::vector<size_t>& sizes, const uint8_t* data) {
size_t offset = 0; size_t offset = 0;
for (size_t i = 0; i < pathArgs.size(); ++i) { for (size_t i = 0; i < paths.size(); ++i) {
int j = pathArgs[i]; const char* path = paths[i].c_str();
const char* file = argv[j];
size_t sampleSize = sizes[i]; size_t sampleSize = sizes[i];
std::ofstream outfile(file, std::ofstream::binary); std::ofstream outfile(path, std::ofstream::binary);
outfile.write(reinterpret_cast<const char*>(data + offset), outfile.write(reinterpret_cast<const char*>(data + offset),
static_cast<std::streamsize>(sampleSize)); static_cast<std::streamsize>(sampleSize));
outfile.close(); outfile.close();
@ -135,7 +135,7 @@ int main(int argc, char const* argv[]) {
std::vector<uint8_t> data; std::vector<uint8_t> data;
std::vector<size_t> sizes; std::vector<size_t> sizes;
std::vector<int> pathArgs; std::vector<std::string> paths;
size_t total = 0; size_t total = 0;
for (int i = 1; i < argc; ++i) { for (int i = 1; i < argc; ++i) {
if (argv[i] == nullptr) { if (argv[i] == nullptr) {
@ -217,6 +217,9 @@ int main(int argc, char const* argv[]) {
} }
} else if (arg1 == 's') { } else if (arg1 == 's') {
sliceLen = readInt(arg2); sliceLen = readInt(arg2);
// TODO: investigate why sliceLen == 4..5 greatly slows down
// durschlag engine, but only from command line;
// durschlag_runner seems to work fine with those.
if (sliceLen < 4 || sliceLen > 256) { if (sliceLen < 4 || sliceLen > 256) {
printHelp(fileName(argv[0])); printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]); fprintf(stderr, "Invalid option '%s'\n", argv[i]);
@ -265,9 +268,14 @@ int main(int argc, char const* argv[]) {
} }
} }
std::string content = readFile(argv[i]); glob_t resolved_paths;
memset(&resolved_paths, 0, sizeof(resolved_paths));
bool ok = true;
if (glob(argv[i], GLOB_TILDE, NULL, &resolved_paths) == 0) {
for(size_t j = 0; j < resolved_paths.gl_pathc; ++j) {
std::string content = readFile(resolved_paths.gl_pathv[j]);
if (chunkLen == 0) { if (chunkLen == 0) {
pathArgs.push_back(i); paths.emplace_back(resolved_paths.gl_pathv[j]);
data.insert(data.end(), content.begin(), content.end()); data.insert(data.end(), content.begin(), content.end());
total += content.size(); total += content.size();
sizes.push_back(content.size()); sizes.push_back(content.size());
@ -286,6 +294,15 @@ int main(int argc, char const* argv[]) {
sizes.push_back(chunk.size()); sizes.push_back(chunk.size());
} }
} }
} else {
ok = false;
}
globfree(&resolved_paths);
if (!ok) exit(1);
}
fprintf(stderr, "Number of chunks: %zu; total size: %zu\n", sizes.size(),
total);
bool wantDictionary = (dictionaryArg == -1); bool wantDictionary = (dictionaryArg == -1);
if (method == METHOD_DISTILL || method == METHOD_PURIFY) { if (method == METHOD_DISTILL || method == METHOD_PURIFY) {
@ -313,10 +330,10 @@ int main(int argc, char const* argv[]) {
targetSize, sliceLen, blockSize, sizes, data.data())); targetSize, sliceLen, blockSize, sizes, data.data()));
} else if (method == METHOD_DISTILL) { } else if (method == METHOD_DISTILL) {
durchschlag_distill(sliceLen, minimumPopulation, &sizes, data.data()); durchschlag_distill(sliceLen, minimumPopulation, &sizes, data.data());
writeSamples(argv, pathArgs, sizes, data.data()); writeSamples(paths, sizes, data.data());
} else if (method == METHOD_PURIFY) { } else if (method == METHOD_PURIFY) {
durchschlag_purify(sliceLen, minimumPopulation, sizes, data.data()); durchschlag_purify(sliceLen, minimumPopulation, sizes, data.data());
writeSamples(argv, pathArgs, sizes, data.data()); writeSamples(paths, sizes, data.data());
} else { } else {
printHelp(fileName(argv[0])); printHelp(fileName(argv[0]));
fprintf(stderr, "Unknown generator\n"); fprintf(stderr, "Unknown generator\n");

View File

@ -62,8 +62,7 @@ case "$1" in
bazel build -c opt ...:all && bazel build -c opt ...:all &&
cd go && bazel test -c opt ...:all && cd .. && cd go && bazel test -c opt ...:all && cd .. &&
cd java && bazel test -c opt ...:all && cd .. && cd java && bazel test -c opt ...:all && cd .. &&
echo "bazelbuild/rules_closure seems to be dead; js build is skipped" && cd js && bazel test -c opt ...:all && cd .. &&
echo "cd js && bazel test -c opt ...:all && cd .." &&
cd research && bazel build -c opt ...:all && cd .. cd research && bazel build -c opt ...:all && cd ..
;; ;;
esac esac