mirror of
https://github.com/google/brotli.git
synced 2024-11-21 19:20:09 +00:00
Update (#807)
- fix formatting - fix type conversion - fix no-op arithmetic with null-pointer - improve performance of hash_longest_match64 - go: detect read after close - java decoder: support compound dictionary - remove executable flag on non-scripts
This commit is contained in:
parent
f83aa5169e
commit
7f740f1308
0
c/common/context.h
Executable file → Normal file
0
c/common/context.h
Executable file → Normal file
0
c/common/dictionary.bin.br
Executable file → Normal file
0
c/common/dictionary.bin.br
Executable file → Normal file
0
c/common/platform.h
Executable file → Normal file
0
c/common/platform.h
Executable file → Normal file
0
c/common/transform.c
Executable file → Normal file
0
c/common/transform.c
Executable file → Normal file
0
c/common/transform.h
Executable file → Normal file
0
c/common/transform.h
Executable file → Normal file
@ -275,7 +275,8 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE DecodeMetaBlockLength(
|
||||
s->loop_counter = i;
|
||||
return BROTLI_DECODER_NEEDS_MORE_INPUT;
|
||||
}
|
||||
if (i + 1 == (int)s->size_nibbles && s->size_nibbles > 4 && bits == 0) {
|
||||
if (i + 1 == (int)s->size_nibbles && s->size_nibbles > 4 &&
|
||||
bits == 0) {
|
||||
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE);
|
||||
}
|
||||
s->meta_block_remaining_len |= (int)(bits << (i * 4));
|
||||
@ -324,7 +325,8 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE DecodeMetaBlockLength(
|
||||
s->loop_counter = i;
|
||||
return BROTLI_DECODER_NEEDS_MORE_INPUT;
|
||||
}
|
||||
if (i + 1 == (int)s->size_nibbles && s->size_nibbles > 1 && bits == 0) {
|
||||
if (i + 1 == (int)s->size_nibbles && s->size_nibbles > 1 &&
|
||||
bits == 0) {
|
||||
return BROTLI_FAILURE(
|
||||
BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE);
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ typedef BROTLI_ALIGNED(4) uint32_t HuffmanCode;
|
||||
|
||||
static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
|
||||
const uint16_t value) {
|
||||
return ((value & 0xFFFF) << 16) | (bits & 0xFF);
|
||||
return (HuffmanCode) ((value & 0xFFFF) << 16) | (bits & 0xFF);
|
||||
}
|
||||
|
||||
#define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H) uint32_t __fastload_##H = (*H)
|
||||
|
@ -524,7 +524,7 @@ static void StoreCommands(MemoryManager* m,
|
||||
static BROTLI_BOOL ShouldCompress(
|
||||
const uint8_t* input, size_t input_size, size_t num_literals) {
|
||||
double corpus_size = (double)input_size;
|
||||
if (num_literals < MIN_RATIO * corpus_size) {
|
||||
if ((double)num_literals < MIN_RATIO * corpus_size) {
|
||||
return BROTLI_TRUE;
|
||||
} else {
|
||||
uint32_t literal_histo[256] = { 0 };
|
||||
|
@ -516,7 +516,7 @@ static BROTLI_BOOL ShouldCompress(
|
||||
/* TODO: find more precise minimal block overhead. */
|
||||
if (bytes <= 2) return BROTLI_FALSE;
|
||||
if (num_commands < (bytes >> 8) + 2) {
|
||||
if (num_literals > 0.99 * (double)bytes) {
|
||||
if ((double)num_literals > 0.99 * (double)bytes) {
|
||||
uint32_t literal_histo[256] = { 0 };
|
||||
static const uint32_t kSampleRate = 13;
|
||||
static const double kMinEntropy = 7.92;
|
||||
@ -1686,8 +1686,10 @@ static BROTLI_BOOL BrotliEncoderCompressStreamFast(
|
||||
&storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
}
|
||||
if (block_size != 0) {
|
||||
*next_in += block_size;
|
||||
*available_in -= block_size;
|
||||
}
|
||||
if (inplace) {
|
||||
size_t out_bytes = storage_ix >> 3;
|
||||
BROTLI_DCHECK(out_bytes <= *available_out);
|
||||
|
0
c/enc/encoder_dict.c
Executable file → Normal file
0
c/enc/encoder_dict.c
Executable file → Normal file
0
c/enc/encoder_dict.h
Executable file → Normal file
0
c/enc/encoder_dict.h
Executable file → Normal file
@ -32,7 +32,7 @@ static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
|
||||
computation.
|
||||
|
||||
", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
|
||||
static const float kLog2Table[] = {
|
||||
static const double kLog2Table[] = {
|
||||
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
|
||||
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
|
||||
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
|
||||
@ -123,8 +123,9 @@ static const float kLog2Table[] = {
|
||||
|
||||
/* Visual Studio 2012 and Android API levels < 18 do not have the log2()
|
||||
* function defined, so we use log() and a multiplication instead. */
|
||||
#ifndef BROTLI_HAVE_LOG2
|
||||
#if ((defined(_MSC_VER) && _MSC_VER <= 1700) || (defined(__ANDROID_API__) && __ANDROID_API__ < 18))
|
||||
#if !defined(BROTLI_HAVE_LOG2)
|
||||
#if ((defined(_MSC_VER) && _MSC_VER <= 1700) || \
|
||||
(defined(__ANDROID_API__) && __ANDROID_API__ < 18))
|
||||
#define BROTLI_HAVE_LOG2 0
|
||||
#else
|
||||
#define BROTLI_HAVE_LOG2 1
|
||||
|
0
c/enc/hash_composite_inc.h
Executable file → Normal file
0
c/enc/hash_composite_inc.h
Executable file → Normal file
@ -115,8 +115,8 @@ static BROTLI_INLINE void FN(Store)(
|
||||
self->hash_shift_);
|
||||
const size_t minor_ix = num[key] & self->block_mask_;
|
||||
const size_t offset = minor_ix + (key << self->block_bits_);
|
||||
buckets[offset] = (uint32_t)ix;
|
||||
++num[key];
|
||||
buckets[offset] = (uint32_t)ix;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
|
||||
|
0
c/enc/hash_rolling_inc.h
Executable file → Normal file
0
c/enc/hash_rolling_inc.h
Executable file → Normal file
0
c/enc/params.h
Executable file → Normal file
0
c/enc/params.h
Executable file → Normal file
@ -77,7 +77,7 @@ BROTLI_BOOL BrotliIsMostlyUTF8(
|
||||
i += bytes_read;
|
||||
if (symbol < 0x110000) size_utf8 += bytes_read;
|
||||
}
|
||||
return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);
|
||||
return TO_BROTLI_BOOL((double)size_utf8 > min_fraction * (double)length);
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
0
compiler_config_setting.bzl
Executable file → Normal file
0
compiler_config_setting.bzl
Executable file → Normal file
47
docs/constants.h.3
Normal file
47
docs/constants.h.3
Normal file
@ -0,0 +1,47 @@
|
||||
.TH "constants.h" 3 "Wed May 13 2020" "Brotli" \" -*- nroff -*-
|
||||
.ad l
|
||||
.nh
|
||||
.SH NAME
|
||||
constants.h \- Common constants used in decoder and encoder API\&.
|
||||
|
||||
.SH SYNOPSIS
|
||||
.br
|
||||
.PP
|
||||
.SS "Macros"
|
||||
|
||||
.in +1c
|
||||
.ti -1c
|
||||
.RI "#define \fBBROTLI_LARGE_MAX_DISTANCE_BITS\fP 62U"
|
||||
.br
|
||||
.RI "\fIThe theoretical maximum number of distance bits specified for large window brotli, for 64-bit encoders and decoders\&. \fP"
|
||||
.ti -1c
|
||||
.RI "#define \fBBROTLI_LARGE_MAX_WBITS\fP 30"
|
||||
.br
|
||||
.RI "\fIThe maximum supported large brotli window bits by the encoder and decoder\&. \fP"
|
||||
.ti -1c
|
||||
.RI "#define \fBBROTLI_MAX_NPOSTFIX\fP 3"
|
||||
.br
|
||||
.RI "\fIMaximal number of 'postfix' bits\&. \fP"
|
||||
.in -1c
|
||||
.SH "Detailed Description"
|
||||
.PP
|
||||
Common constants used in decoder and encoder API\&.
|
||||
|
||||
|
||||
.SH "Macro Definition Documentation"
|
||||
.PP
|
||||
.SS "#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U"
|
||||
|
||||
.PP
|
||||
The theoretical maximum number of distance bits specified for large window brotli, for 64-bit encoders and decoders\&. Even when in practice 32-bit encoders and decoders only support up to 30 max distance bits, the value is set to 62 because it affects the large window brotli file format\&. Specifically, it affects the encoding of simple huffman tree for distances, see Specification RFC 7932 chapter 3\&.4\&.
|
||||
.SS "#define BROTLI_LARGE_MAX_WBITS 30"
|
||||
|
||||
.PP
|
||||
The maximum supported large brotli window bits by the encoder and decoder\&. Large window brotli allows up to 62 bits, however the current encoder and decoder, designed for 32-bit integers, only support up to 30 bits maximum\&.
|
||||
.SS "#define BROTLI_MAX_NPOSTFIX 3"
|
||||
|
||||
.PP
|
||||
Maximal number of 'postfix' bits\&. Number of 'postfix' bits is stored as 2 bits in meta-block header\&.
|
||||
.SH "Author"
|
||||
.PP
|
||||
Generated automatically by Doxygen for Brotli from the source code\&.
|
@ -274,6 +274,10 @@ func TestReader(t *testing.T) {
|
||||
"<%d bytes>",
|
||||
got, len(content))
|
||||
}
|
||||
buf := make([]byte, 4)
|
||||
if _, err := r.Read(buf); err != errReaderClosed {
|
||||
t.Errorf("Read-after-Close returned %v, expected %v", err, errReaderClosed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecode(t *testing.T) {
|
||||
|
@ -82,6 +82,9 @@ func (r *Reader) Close() error {
|
||||
}
|
||||
|
||||
func (r *Reader) Read(p []byte) (n int, err error) {
|
||||
if r.state == nil {
|
||||
return 0, errReaderClosed
|
||||
}
|
||||
if int(C.BrotliDecoderHasMoreOutput(r.state)) == 0 && len(r.in) == 0 {
|
||||
m, readErr := r.src.Read(r.buf)
|
||||
if m == 0 {
|
||||
|
11
java/org/brotli/common/BUILD
Normal file
11
java/org/brotli/common/BUILD
Normal file
@ -0,0 +1,11 @@
|
||||
# Description:
|
||||
# Java port of Brotli.
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
licenses(["notice"]) # MIT
|
||||
|
||||
java_library(
|
||||
name = "shared_dictionary",
|
||||
srcs = ["SharedDictionaryType.java"],
|
||||
)
|
15
java/org/brotli/common/SharedDictionaryType.java
Normal file
15
java/org/brotli/common/SharedDictionaryType.java
Normal file
@ -0,0 +1,15 @@
|
||||
/* Copyright 2018 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
package org.brotli.common;
|
||||
|
||||
/** POJO enum that mirrors C BrotliSharedDictionaryType. */
|
||||
public class SharedDictionaryType {
|
||||
// Disallow instantiation.
|
||||
private SharedDictionaryType() {}
|
||||
|
||||
public static final int RAW = 0;
|
||||
public static final int SERIALIZED = 1;
|
||||
}
|
@ -84,6 +84,10 @@ public class BrotliInputStream extends InputStream {
|
||||
}
|
||||
}
|
||||
|
||||
public void attachDictionaryChunk(byte[] data) {
|
||||
Decode.attachDictionaryChunk(state, data);
|
||||
}
|
||||
|
||||
public void enableEagerOutput() {
|
||||
Decode.enableEagerOutput(state);
|
||||
}
|
||||
|
63
java/org/brotli/dec/CompoundDictionaryTest.java
Normal file
63
java/org/brotli/dec/CompoundDictionaryTest.java
Normal file
@ -0,0 +1,63 @@
|
||||
/* Copyright 2020 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
package org.brotli.dec;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
|
||||
/**
|
||||
* Tests for {@link Dictionary}.
|
||||
*/
|
||||
@RunWith(JUnit4.class)
|
||||
public class CompoundDictionaryTest {
|
||||
|
||||
/** See {@link SynthTest} */
|
||||
private static final byte[] ONE_COPY = {
|
||||
(byte) 0xa1, (byte) 0xa8, (byte) 0x00, (byte) 0xc0, (byte) 0x2f, (byte) 0x01, (byte) 0x10,
|
||||
(byte) 0xc4, (byte) 0x44, (byte) 0x09, (byte) 0x00
|
||||
};
|
||||
|
||||
private static final String TEXT = "Kot lomom kolol slona!";
|
||||
|
||||
@Test
|
||||
public void testNoDictionary() throws IOException {
|
||||
BrotliInputStream decoder = new BrotliInputStream(new ByteArrayInputStream(ONE_COPY));
|
||||
byte[] buffer = new byte[32];
|
||||
int length = decoder.read(buffer, 0, buffer.length);
|
||||
assertEquals(TEXT.length(), length);
|
||||
assertEquals("alternate\" type=\"appli", new String(buffer, 0, length, "US-ASCII"));
|
||||
decoder.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnePieceDictionary() throws IOException {
|
||||
BrotliInputStream decoder = new BrotliInputStream(new ByteArrayInputStream(ONE_COPY));
|
||||
decoder.attachDictionaryChunk(TEXT.getBytes("US-ASCII"));
|
||||
byte[] buffer = new byte[32];
|
||||
int length = decoder.read(buffer, 0, buffer.length);
|
||||
assertEquals(TEXT.length(), length);
|
||||
assertEquals(TEXT, new String(buffer, 0, length, "US-ASCII"));
|
||||
decoder.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTwoPieceDictionary() throws IOException {
|
||||
BrotliInputStream decoder = new BrotliInputStream(new ByteArrayInputStream(ONE_COPY));
|
||||
decoder.attachDictionaryChunk(TEXT.substring(0, 13).getBytes("US-ASCII"));
|
||||
decoder.attachDictionaryChunk(TEXT.substring(13).getBytes("US-ASCII"));
|
||||
byte[] buffer = new byte[32];
|
||||
int length = decoder.read(buffer, 0, buffer.length);
|
||||
assertEquals(TEXT.length(), length);
|
||||
assertEquals(TEXT, new String(buffer, 0, length, "US-ASCII"));
|
||||
decoder.close();
|
||||
}
|
||||
}
|
@ -8,6 +8,7 @@ package org.brotli.dec;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
* API for Brotli decompression.
|
||||
@ -31,11 +32,12 @@ final class Decode {
|
||||
private static final int COPY_UNCOMPRESSED = 6;
|
||||
private static final int INSERT_LOOP = 7;
|
||||
private static final int COPY_LOOP = 8;
|
||||
private static final int TRANSFORM = 9;
|
||||
private static final int USE_DICTIONARY = 9;
|
||||
private static final int FINISHED = 10;
|
||||
private static final int CLOSED = 11;
|
||||
private static final int INIT_WRITE = 12;
|
||||
private static final int WRITE = 13;
|
||||
private static final int COPY_FROM_COMPOUND_DICTIONARY = 14;
|
||||
|
||||
private static final int DEFAULT_CODE_LENGTH = 8;
|
||||
private static final int CODE_LENGTH_REPEAT_CODE = 16;
|
||||
@ -45,6 +47,7 @@ final class Decode {
|
||||
private static final int LITERAL_CONTEXT_BITS = 6;
|
||||
private static final int DISTANCE_CONTEXT_BITS = 2;
|
||||
|
||||
private static final int CD_BLOCK_MAP_BITS = 8;
|
||||
private static final int HUFFMAN_TABLE_BITS = 8;
|
||||
private static final int HUFFMAN_TABLE_MASK = 0xFF;
|
||||
|
||||
@ -85,20 +88,8 @@ final class Decode {
|
||||
0x020000, 0x020004, 0x020003, 0x030002, 0x020000, 0x020004, 0x020003, 0x040005
|
||||
};
|
||||
|
||||
static final int[] DICTIONARY_OFFSETS_BY_LENGTH = {
|
||||
0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
|
||||
104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280, 122016
|
||||
};
|
||||
|
||||
static final int[] DICTIONARY_SIZE_BITS_BY_LENGTH = {
|
||||
0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10, 9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5
|
||||
};
|
||||
|
||||
static final int MIN_WORD_LENGTH = 4;
|
||||
|
||||
static final int MAX_WORD_LENGTH = 24;
|
||||
|
||||
static final int MAX_TRANSFORMED_WORD_LENGTH = 5 + MAX_WORD_LENGTH + 8;
|
||||
// TODO: generalize.
|
||||
static final int MAX_TRANSFORMED_WORD_LENGTH = 5 + 24 + 8;
|
||||
|
||||
private static final int MAX_DISTANCE_BITS = 24;
|
||||
private static final int MAX_LARGE_WINDOW_DISTANCE_BITS = 62;
|
||||
@ -274,6 +265,25 @@ final class Decode {
|
||||
s.isLargeWindow = 1;
|
||||
}
|
||||
|
||||
// TODO: do we need byte views?
|
||||
static void attachDictionaryChunk(State s, byte[] data) {
|
||||
if (s.runningState != INITIALIZED) {
|
||||
throw new IllegalStateException("State MUST be freshly initialized");
|
||||
}
|
||||
if (s.cdNumChunks == 0) {
|
||||
s.cdChunks = new byte[16][];
|
||||
s.cdChunkOffsets = new int[16];
|
||||
s.cdBlockBits = -1;
|
||||
}
|
||||
if (s.cdNumChunks == 15) {
|
||||
throw new IllegalStateException("Too many dictionary chunks");
|
||||
}
|
||||
s.cdChunks[s.cdNumChunks] = data;
|
||||
s.cdNumChunks++;
|
||||
s.cdTotalSize += data.length;
|
||||
s.cdChunkOffsets[s.cdNumChunks] = s.cdTotalSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Associate input with decoder state.
|
||||
*
|
||||
@ -821,7 +831,6 @@ final class Decode {
|
||||
BitReader.fillBitWindow(s);
|
||||
s.distancePostfixBits = BitReader.readFewBits(s, 2);
|
||||
s.numDirectDistanceCodes = BitReader.readFewBits(s, 4) << s.distancePostfixBits;
|
||||
s.distancePostfixMask = (1 << s.distancePostfixBits) - 1;
|
||||
// TODO: Reuse?
|
||||
s.contextModes = new byte[s.numLiteralBlockTypes];
|
||||
for (int i = 0; i < s.numLiteralBlockTypes;) {
|
||||
@ -945,6 +954,118 @@ final class Decode {
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void doUseDictionary(State s, int fence) {
|
||||
if (s.distance > MAX_ALLOWED_DISTANCE) {
|
||||
throw new BrotliRuntimeException("Invalid backward reference");
|
||||
}
|
||||
int address = s.distance - s.maxDistance - 1 - s.cdTotalSize;
|
||||
if (address < 0) {
|
||||
initializeCompoundDictionaryCopy(s, -address - 1, s.copyLength);
|
||||
s.runningState = COPY_FROM_COMPOUND_DICTIONARY;
|
||||
} else {
|
||||
// Force lazy dictionary initialization.
|
||||
ByteBuffer dictionaryData = Dictionary.getData();
|
||||
int wordLength = s.copyLength;
|
||||
if (wordLength > Dictionary.MAX_DICTIONARY_WORD_LENGTH) {
|
||||
throw new BrotliRuntimeException("Invalid backward reference"); // COV_NF_LINE
|
||||
}
|
||||
int shift = Dictionary.sizeBits[wordLength];
|
||||
if (shift == 0) {
|
||||
throw new BrotliRuntimeException("Invalid backward reference"); // COV_NF_LINE
|
||||
}
|
||||
int offset = Dictionary.offsets[wordLength];
|
||||
int mask = (1 << shift) - 1;
|
||||
int wordIdx = address & mask;
|
||||
int transformIdx = address >>> shift;
|
||||
offset += wordIdx * wordLength;
|
||||
Transform.Transforms transforms = Transform.RFC_TRANSFORMS;
|
||||
if (transformIdx >= transforms.numTransforms) {
|
||||
throw new BrotliRuntimeException("Invalid backward reference"); // COV_NF_LINE
|
||||
}
|
||||
int len = Transform.transformDictionaryWord(s.ringBuffer, s.pos, dictionaryData,
|
||||
offset, wordLength, transforms, transformIdx);
|
||||
s.pos += len;
|
||||
s.metaBlockLength -= len;
|
||||
if (s.pos >= fence) {
|
||||
s.nextRunningState = MAIN_LOOP;
|
||||
s.runningState = INIT_WRITE;
|
||||
return;
|
||||
}
|
||||
s.runningState = MAIN_LOOP;
|
||||
}
|
||||
}
|
||||
|
||||
private static void initializeCompoundDictionary(State s) {
|
||||
s.cdBlockMap = new byte[1 << CD_BLOCK_MAP_BITS];
|
||||
int blockBits = CD_BLOCK_MAP_BITS;
|
||||
// If this function is executed, then s.cdTotalSize > 0.
|
||||
while (((s.cdTotalSize - 1) >>> blockBits) != 0) {
|
||||
blockBits++;
|
||||
}
|
||||
blockBits -= CD_BLOCK_MAP_BITS;
|
||||
s.cdBlockBits = blockBits;
|
||||
int cursor = 0;
|
||||
int index = 0;
|
||||
while (cursor < s.cdTotalSize) {
|
||||
while (s.cdChunkOffsets[index + 1] < cursor) {
|
||||
index++;
|
||||
}
|
||||
s.cdBlockMap[cursor >>> blockBits] = (byte) index;
|
||||
cursor += 1 << blockBits;
|
||||
}
|
||||
}
|
||||
|
||||
private static void initializeCompoundDictionaryCopy(State s, int address, int length) {
|
||||
if (s.cdBlockBits == -1) {
|
||||
initializeCompoundDictionary(s);
|
||||
}
|
||||
int index = s.cdBlockMap[address >>> s.cdBlockBits];
|
||||
while (address >= s.cdChunkOffsets[index + 1]) {
|
||||
index++;
|
||||
}
|
||||
if (s.cdTotalSize > address + length) {
|
||||
throw new BrotliRuntimeException("Invalid backward reference");
|
||||
}
|
||||
/* Update the recent distances cache */
|
||||
s.distRbIdx = (s.distRbIdx + 1) & 0x3;
|
||||
s.rings[s.distRbIdx] = s.distance;
|
||||
s.metaBlockLength -= length;
|
||||
s.cdBrIndex = index;
|
||||
s.cdBrOffset = address - s.cdChunkOffsets[index];
|
||||
s.cdBrLength = length;
|
||||
s.cdBrCopied = 0;
|
||||
}
|
||||
|
||||
private static int copyFromCompoundDictionary(State s, int fence) {
|
||||
int pos = s.pos;
|
||||
int origPos = pos;
|
||||
while (s.cdBrLength != s.cdBrCopied) {
|
||||
int space = fence - pos;
|
||||
int chunkLength = s.cdChunkOffsets[s.cdBrIndex + 1] - s.cdChunkOffsets[s.cdBrIndex];
|
||||
int remChunkLength = chunkLength - s.cdBrOffset;
|
||||
int length = s.cdBrLength - s.cdBrCopied;
|
||||
if (length > remChunkLength) {
|
||||
length = remChunkLength;
|
||||
}
|
||||
if (length > space) {
|
||||
length = space;
|
||||
}
|
||||
Utils.copyBytes(
|
||||
s.ringBuffer, pos, s.cdChunks[s.cdBrIndex], s.cdBrOffset, s.cdBrOffset + length);
|
||||
pos += length;
|
||||
s.cdBrOffset += length;
|
||||
s.cdBrCopied += length;
|
||||
if (length == remChunkLength) {
|
||||
s.cdBrIndex++;
|
||||
s.cdBrOffset = 0;
|
||||
}
|
||||
if (pos >= fence) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return pos - origPos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Actual decompress implementation.
|
||||
*/
|
||||
@ -1110,7 +1231,7 @@ final class Decode {
|
||||
}
|
||||
|
||||
if (s.distance > s.maxDistance) {
|
||||
s.runningState = TRANSFORM;
|
||||
s.runningState = USE_DICTIONARY;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1164,35 +1285,16 @@ final class Decode {
|
||||
}
|
||||
continue;
|
||||
|
||||
case TRANSFORM:
|
||||
// This check is done here to unburden the hot loop.
|
||||
if (s.distance > MAX_ALLOWED_DISTANCE) {
|
||||
throw new BrotliRuntimeException("Invalid backward reference"); // COV_NF_LINE
|
||||
}
|
||||
if (s.copyLength >= MIN_WORD_LENGTH
|
||||
&& s.copyLength <= MAX_WORD_LENGTH) {
|
||||
int offset = DICTIONARY_OFFSETS_BY_LENGTH[s.copyLength];
|
||||
int wordId = s.distance - s.maxDistance - 1;
|
||||
int shift = DICTIONARY_SIZE_BITS_BY_LENGTH[s.copyLength];
|
||||
int mask = (1 << shift) - 1;
|
||||
int wordIdx = wordId & mask;
|
||||
int transformIdx = wordId >>> shift;
|
||||
offset += wordIdx * s.copyLength;
|
||||
if (transformIdx < Transform.NUM_RFC_TRANSFORMS) {
|
||||
int len = Transform.transformDictionaryWord(ringBuffer, s.pos, Dictionary.getData(),
|
||||
offset, s.copyLength, Transform.RFC_TRANSFORMS, transformIdx);
|
||||
s.pos += len;
|
||||
s.metaBlockLength -= len;
|
||||
if (s.pos >= fence) {
|
||||
s.nextRunningState = MAIN_LOOP;
|
||||
s.runningState = INIT_WRITE;
|
||||
case USE_DICTIONARY:
|
||||
doUseDictionary(s, fence);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
throw new BrotliRuntimeException("Invalid backward reference"); // COV_NF_LINE
|
||||
}
|
||||
} else {
|
||||
throw new BrotliRuntimeException("Invalid backward reference"); // COV_NF_LINE
|
||||
|
||||
case COPY_FROM_COMPOUND_DICTIONARY:
|
||||
s.pos += copyFromCompoundDictionary(s, fence);
|
||||
if (s.pos >= fence) {
|
||||
s.nextRunningState = COPY_FROM_COMPOUND_DICTIONARY;
|
||||
s.runningState = INIT_WRITE;
|
||||
return;
|
||||
}
|
||||
s.runningState = MAIN_LOOP;
|
||||
continue;
|
||||
@ -1208,7 +1310,6 @@ final class Decode {
|
||||
s.runningState = BLOCK_START;
|
||||
continue;
|
||||
|
||||
|
||||
case COPY_UNCOMPRESSED:
|
||||
copyUncompressedData(s);
|
||||
continue;
|
||||
|
@ -18,7 +18,12 @@ import java.nio.ByteBuffer;
|
||||
* once in each classworld). To avoid this, it is enough to call {@link #getData()} proactively.
|
||||
*/
|
||||
public final class Dictionary {
|
||||
private static volatile ByteBuffer data;
|
||||
static final int MIN_DICTIONARY_WORD_LENGTH = 4;
|
||||
static final int MAX_DICTIONARY_WORD_LENGTH = 31;
|
||||
|
||||
private static ByteBuffer data;
|
||||
static final int[] offsets = new int[32];
|
||||
static final int[] sizeBits = new int[32];
|
||||
|
||||
private static class DataLoader {
|
||||
static final boolean OK;
|
||||
@ -34,10 +39,44 @@ public final class Dictionary {
|
||||
}
|
||||
}
|
||||
|
||||
public static void setData(ByteBuffer data) {
|
||||
public static void setData(ByteBuffer data, int[] sizeBits) {
|
||||
if (!data.isDirect() || !data.isReadOnly()) {
|
||||
throw new BrotliRuntimeException("data must be a direct read-only byte buffer");
|
||||
}
|
||||
// TODO: is that so?
|
||||
if (sizeBits.length > MAX_DICTIONARY_WORD_LENGTH) {
|
||||
throw new BrotliRuntimeException(
|
||||
"sizeBits length must be at most " + MAX_DICTIONARY_WORD_LENGTH);
|
||||
}
|
||||
for (int i = 0; i < MIN_DICTIONARY_WORD_LENGTH; ++i) {
|
||||
if (sizeBits[i] != 0) {
|
||||
throw new BrotliRuntimeException("first " + MIN_DICTIONARY_WORD_LENGTH + " must be 0");
|
||||
}
|
||||
}
|
||||
int[] dictionaryOffsets = Dictionary.offsets;
|
||||
int[] dictionarySizeBits = Dictionary.sizeBits;
|
||||
System.arraycopy(sizeBits, 0, dictionarySizeBits, 0, sizeBits.length);
|
||||
int pos = 0;
|
||||
int limit = data.capacity();
|
||||
for (int i = 0; i < sizeBits.length; ++i) {
|
||||
dictionaryOffsets[i] = pos;
|
||||
int bits = dictionarySizeBits[i];
|
||||
if (bits != 0) {
|
||||
if (bits >= 31) {
|
||||
throw new BrotliRuntimeException("sizeBits values must be less than 31");
|
||||
}
|
||||
pos += i << bits;
|
||||
if (pos <= 0 || pos > limit) {
|
||||
throw new BrotliRuntimeException("sizeBits is inconsistent: overflow");
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = sizeBits.length; i < 32; ++i) {
|
||||
dictionaryOffsets[i] = pos;
|
||||
}
|
||||
if (pos != limit) {
|
||||
throw new BrotliRuntimeException("sizeBits is inconsistent: underflow");
|
||||
}
|
||||
Dictionary.data = data;
|
||||
}
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
0
java/org/brotli/dec/EagerStreamTest.java
Executable file → Normal file
0
java/org/brotli/dec/EagerStreamTest.java
Executable file → Normal file
@ -65,7 +65,10 @@ public class SetDictionaryTest {
|
||||
FileChannel dictionaryChannel =
|
||||
new FileInputStream(System.getProperty("RFC_DICTIONARY")).getChannel();
|
||||
ByteBuffer dictionary = dictionaryChannel.map(FileChannel.MapMode.READ_ONLY, 0, 122784).load();
|
||||
Dictionary.setData(dictionary);
|
||||
int[] sizeBits = {
|
||||
0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10, 9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5
|
||||
};
|
||||
Dictionary.setData(dictionary, sizeBits);
|
||||
|
||||
// Retry decoding of dictionary item.
|
||||
decoder = new BrotliInputStream(new ByteArrayInputStream(BASE_DICT_WORD));
|
||||
|
@ -60,7 +60,6 @@ final class State {
|
||||
int contextLookupOffset2;
|
||||
int distanceCode;
|
||||
int numDirectDistanceCodes;
|
||||
int distancePostfixMask;
|
||||
int distancePostfixBits;
|
||||
int distance;
|
||||
int copyLength;
|
||||
@ -76,6 +75,18 @@ final class State {
|
||||
int isEager;
|
||||
int isLargeWindow;
|
||||
|
||||
// Compound dictionary
|
||||
int cdNumChunks;
|
||||
int cdTotalSize;
|
||||
int cdBrIndex;
|
||||
int cdBrOffset;
|
||||
int cdBrLength;
|
||||
int cdBrCopied;
|
||||
byte[][] cdChunks;
|
||||
int[] cdChunkOffsets;
|
||||
int cdBlockBits;
|
||||
byte[] cdBlockMap;
|
||||
|
||||
InputStream input; // BitReader
|
||||
|
||||
State() {
|
||||
|
@ -58,6 +58,10 @@ final class Utils {
|
||||
}
|
||||
}
|
||||
|
||||
static void copyBytes(byte[] dst, int target, byte[] src, int start, int end) {
|
||||
System.arraycopy(src, start, dst, target, end - start);
|
||||
}
|
||||
|
||||
static void copyBytesWithin(byte[] bytes, int target, int start, int end) {
|
||||
System.arraycopy(bytes, start, bytes, target, end - start);
|
||||
}
|
||||
|
14
java/org/brotli/enc/BUILD
Normal file
14
java/org/brotli/enc/BUILD
Normal file
@ -0,0 +1,14 @@
|
||||
# Description:
|
||||
# Java port of Brotli decoder.
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
licenses(["notice"]) # MIT
|
||||
|
||||
java_library(
|
||||
name = "prepared_dictionary",
|
||||
srcs = [
|
||||
"PreparedDictionary.java",
|
||||
"PreparedDictionaryGenerator.java",
|
||||
],
|
||||
)
|
16
java/org/brotli/enc/PreparedDictionary.java
Normal file
16
java/org/brotli/enc/PreparedDictionary.java
Normal file
@ -0,0 +1,16 @@
|
||||
/* Copyright 2018 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
package org.brotli.enc;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
* Prepared dictionary data provider.
|
||||
*/
|
||||
public interface PreparedDictionary {
|
||||
ByteBuffer getData();
|
||||
}
|
185
java/org/brotli/enc/PreparedDictionaryGenerator.java
Normal file
185
java/org/brotli/enc/PreparedDictionaryGenerator.java
Normal file
@ -0,0 +1,185 @@
|
||||
/* Copyright 2017 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
package org.brotli.enc;
|
||||
|
||||
import java.nio.Buffer;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.IntBuffer;
|
||||
import java.nio.ShortBuffer;
|
||||
|
||||
/**
|
||||
* Java prepared (raw) dictionary producer.
|
||||
*/
|
||||
public class PreparedDictionaryGenerator {
|
||||
|
||||
private static final int MAGIC = 0xDEBCEDE0;
|
||||
private static final long HASH_MULTIPLIER = 0x1fe35a7bd3579bd3L;
|
||||
|
||||
private static class PreparedDictionaryImpl implements PreparedDictionary {
|
||||
private final ByteBuffer data;
|
||||
|
||||
private PreparedDictionaryImpl(ByteBuffer data) {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer getData() {
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
// Disallow instantiation.
|
||||
private PreparedDictionaryGenerator() { }
|
||||
|
||||
public static PreparedDictionary generate(ByteBuffer src) {
|
||||
return generate(src, 17, 3, 40, 5);
|
||||
}
|
||||
|
||||
public static PreparedDictionary generate(ByteBuffer src,
|
||||
int bucketBits, int slotBits, int hashBits, int blockBits) {
|
||||
((Buffer) src).clear(); // Just in case...
|
||||
if (blockBits > 12) {
|
||||
throw new IllegalArgumentException("blockBits is too big");
|
||||
}
|
||||
if (bucketBits >= 24) {
|
||||
throw new IllegalArgumentException("bucketBits is too big");
|
||||
}
|
||||
if (bucketBits - slotBits >= 16) {
|
||||
throw new IllegalArgumentException("slotBits is too small");
|
||||
}
|
||||
int bucketLimit = 1 << blockBits;
|
||||
int numBuckets = 1 << bucketBits;
|
||||
int numSlots = 1 << slotBits;
|
||||
int slotMask = numSlots - 1;
|
||||
int hashShift = 64 - bucketBits;
|
||||
long hashMask = (~0L) >>> (64 - hashBits);
|
||||
int sourceSize = src.capacity();
|
||||
if (sourceSize < 8) {
|
||||
throw new IllegalArgumentException("src is too short");
|
||||
}
|
||||
|
||||
/* Step 1: create "bloated" hasher. */
|
||||
short[] num = new short[numBuckets];
|
||||
int[] bucketHeads = new int[numBuckets];
|
||||
int[] nextBucket = new int[sourceSize];
|
||||
|
||||
long accumulator = 0;
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
accumulator |= (src.get(i) & 0xFFL) << (8 * i);
|
||||
}
|
||||
accumulator <<= 8;
|
||||
/* TODO: apply custom "store" order. */
|
||||
for (int i = 0; i + 7 < sourceSize; ++i) {
|
||||
accumulator = (accumulator >>> 8) | ((src.get(i + 7) & 0xFFL) << 56);
|
||||
long h = (accumulator & hashMask) * HASH_MULTIPLIER;
|
||||
int key = (int) (h >>> hashShift);
|
||||
int count = num[key];
|
||||
nextBucket[i] = (count == 0) ? -1 : bucketHeads[key];
|
||||
bucketHeads[key] = i;
|
||||
count++;
|
||||
if (count > bucketLimit) {
|
||||
count = bucketLimit;
|
||||
}
|
||||
num[key] = (short) count;
|
||||
}
|
||||
|
||||
/* Step 2: find slot limits. */
|
||||
int[] slotLimit = new int[numSlots];
|
||||
int[] slotSize = new int[numSlots];
|
||||
int totalItems = 0;
|
||||
for (int i = 0; i < numSlots; ++i) {
|
||||
boolean overflow = false;
|
||||
slotLimit[i] = bucketLimit;
|
||||
while (true) {
|
||||
overflow = false;
|
||||
int limit = slotLimit[i];
|
||||
int count = 0;
|
||||
for (int j = i; j < numBuckets; j += numSlots) {
|
||||
int size = num[j];
|
||||
/* Last chain may span behind 64K limit; overflow happens only if
|
||||
we are about to use 0xFFFF+ as item offset. */
|
||||
if (count >= 0xFFFF) {
|
||||
overflow = true;
|
||||
break;
|
||||
}
|
||||
if (size > limit) {
|
||||
size = limit;
|
||||
}
|
||||
count += size;
|
||||
}
|
||||
if (!overflow) {
|
||||
slotSize[i] = count;
|
||||
totalItems += count;
|
||||
break;
|
||||
}
|
||||
slotLimit[i]--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 3: transfer data to "slim" hasher. */
|
||||
int part0 = 6 * 4;
|
||||
int part1 = numSlots * 4;
|
||||
int part2 = numBuckets * 2;
|
||||
int part3 = totalItems * 4;
|
||||
int allocSize = part0 + part1 + part2 + part3 + sourceSize;
|
||||
ByteBuffer flat = ByteBuffer.allocateDirect(allocSize);
|
||||
ByteBuffer pointer = flat.slice();
|
||||
pointer.order(ByteOrder.nativeOrder());
|
||||
|
||||
IntBuffer struct = pointer.asIntBuffer();
|
||||
pointer.position(pointer.position() + part0);
|
||||
IntBuffer slotOffsets = pointer.asIntBuffer();
|
||||
pointer.position(pointer.position() + part1);
|
||||
ShortBuffer heads = pointer.asShortBuffer();
|
||||
pointer.position(pointer.position() + part2);
|
||||
IntBuffer items = pointer.asIntBuffer();
|
||||
pointer.position(pointer.position() + part3);
|
||||
ByteBuffer sourceCopy = pointer.slice();
|
||||
|
||||
/* magic */ struct.put(0, MAGIC);
|
||||
/* source_offset */ struct.put(1, totalItems);
|
||||
/* source_size */ struct.put(2, sourceSize);
|
||||
/* hash_bits */ struct.put(3, hashBits);
|
||||
/* bucket_bits */ struct.put(4, bucketBits);
|
||||
/* slot_bits */ struct.put(5, slotBits);
|
||||
|
||||
totalItems = 0;
|
||||
for (int i = 0; i < numSlots; ++i) {
|
||||
slotOffsets.put(i, totalItems);
|
||||
totalItems += slotSize[i];
|
||||
slotSize[i] = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numBuckets; ++i) {
|
||||
int slot = i & slotMask;
|
||||
int count = num[i];
|
||||
if (count > slotLimit[slot]) {
|
||||
count = slotLimit[slot];
|
||||
}
|
||||
if (count == 0) {
|
||||
heads.put(i, (short) 0xFFFF);
|
||||
continue;
|
||||
}
|
||||
int cursor = slotSize[slot];
|
||||
heads.put(i, (short) cursor);
|
||||
cursor += slotOffsets.get(slot);
|
||||
slotSize[slot] += count;
|
||||
int pos = bucketHeads[i];
|
||||
for (int j = 0; j < count; j++) {
|
||||
items.put(cursor++, pos);
|
||||
pos = nextBucket[pos];
|
||||
}
|
||||
cursor--;
|
||||
items.put(cursor, items.get(cursor) | 0x80000000);
|
||||
}
|
||||
|
||||
sourceCopy.put(src);
|
||||
|
||||
return new PreparedDictionaryImpl(flat);
|
||||
}
|
||||
}
|
0
java/org/brotli/wrapper/dec/EagerStreamTest.java
Executable file → Normal file
0
java/org/brotli/wrapper/dec/EagerStreamTest.java
Executable file → Normal file
0
java/org/brotli/wrapper/enc/EmptyInputTest.java
Executable file → Normal file
0
java/org/brotli/wrapper/enc/EmptyInputTest.java
Executable file → Normal file
0
js/decode.min.js
vendored
Executable file → Normal file
0
js/decode.min.js
vendored
Executable file → Normal file
0
python/bro.py
Executable file → Normal file
0
python/bro.py
Executable file → Normal file
0
research/BUILD
Executable file → Normal file
0
research/BUILD
Executable file → Normal file
0
research/dictionary_generator.cc
Executable file → Normal file
0
research/dictionary_generator.cc
Executable file → Normal file
0
research/durchschlag.cc
Executable file → Normal file
0
research/durchschlag.cc
Executable file → Normal file
0
research/durchschlag.h
Executable file → Normal file
0
research/durchschlag.h
Executable file → Normal file
0
research/sieve.cc
Executable file → Normal file
0
research/sieve.cc
Executable file → Normal file
0
research/sieve.h
Executable file → Normal file
0
research/sieve.h
Executable file → Normal file
Loading…
Reference in New Issue
Block a user