Merge pull request #30 from szabadka/master

Implement a 32-bit bitstream decoder + visual studio support
This commit is contained in:
szabadka 2015-02-25 18:40:17 +01:00
commit e60b7b846a
7 changed files with 70 additions and 22 deletions

View File

@ -26,6 +26,12 @@
extern "C" {
#endif
#if (defined(__x86_64__) || defined(_M_X64))
/* This should be set to 1 only on little-endian machines. */
#define BROTLI_USE_64_BITS 1
#else
#define BROTLI_USE_64_BITS 0
#endif
#define BROTLI_MAX_NUM_BIT_READ 25
#define BROTLI_READ_SIZE 4096
#define BROTLI_IBUF_SIZE (2 * BROTLI_READ_SIZE + 32)
@ -45,7 +51,11 @@ typedef struct {
uint8_t buf_[BROTLI_IBUF_SIZE];
uint8_t* buf_ptr_; /* next input will write here */
BrotliInput input_; /* input callback */
#if (BROTLI_USE_64_BITS)
uint64_t val_; /* pre-fetched bits */
#else
uint32_t val_; /* pre-fetched bits */
#endif
uint32_t pos_; /* byte position in stream */
uint32_t bit_pos_; /* current bit-reading position in val_ */
uint32_t bit_end_pos_; /* bit-reading end position from LSB of val_ */
@ -72,11 +82,14 @@ static BROTLI_INLINE void BrotliSetBitPos(BrotliBitReader* const br,
br->bit_pos_ = val;
}
/* Reload up to 64 bits byte-by-byte */
static BROTLI_INLINE void ShiftBytes(BrotliBitReader* const br) {
/*
* Reload up to 32 bits byte-by-byte.
* This function works on both little and big endian.
*/
static BROTLI_INLINE void ShiftBytes32(BrotliBitReader* const br) {
while (br->bit_pos_ >= 8) {
br->val_ >>= 8;
br->val_ |= ((uint64_t)br->buf_[br->pos_ & BROTLI_IBUF_MASK]) << 56;
br->val_ |= ((uint32_t)br->buf_[br->pos_ & BROTLI_IBUF_MASK]) << 24;
++br->pos_;
br->bit_pos_ -= 8;
br->bit_end_pos_ -= 8;
@ -109,7 +122,7 @@ static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
if (bytes_read < BROTLI_READ_SIZE) {
br->eos_ = 1;
/* Store 32 bytes of zero after the stream end. */
#if (defined(__x86_64__) || defined(_M_X64))
#if (BROTLI_USE_64_BITS)
*(uint64_t*)(dst + bytes_read) = 0;
*(uint64_t*)(dst + bytes_read + 8) = 0;
*(uint64_t*)(dst + bytes_read + 16) = 0;
@ -120,7 +133,7 @@ static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
}
if (dst == br->buf_) {
/* Copy the head of the ringbuffer to the slack region. */
#if (defined(__x86_64__) || defined(_M_X64))
#if (BROTLI_USE_64_BITS)
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 32, br->buf_);
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 24, br->buf_ + 8);
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 16, br->buf_ + 16);
@ -137,30 +150,45 @@ static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
}
}
/* Advances the Read buffer by 5 bytes to make room for reading next 24 bits. */
/* Guarantees that there are at least 24 bits in the buffer. */
static BROTLI_INLINE void BrotliFillBitWindow(BrotliBitReader* const br) {
#if (BROTLI_USE_64_BITS)
if (br->bit_pos_ >= 40) {
#if (defined(__x86_64__) || defined(_M_X64))
/*
* Advances the Read buffer by 5 bytes to make room for reading next
* 24 bits.
* The expression below needs a little-endian arch to work correctly.
* This gives a large speedup for decoding speed.
*/
br->val_ >>= 40;
/* The expression below needs a little-endian arch to work correctly. */
/* This gives a large speedup for decoding speed. */
br->val_ |= *(const uint64_t*)(
br->buf_ + (br->pos_ & BROTLI_IBUF_MASK)) << 24;
br->pos_ += 5;
br->bit_pos_ -= 40;
br->bit_end_pos_ -= 40;
#else
ShiftBytes(br);
#endif
}
#else
ShiftBytes32(br);
#endif
}
/* Reads the specified number of bits from Read Buffer. */
static BROTLI_INLINE uint32_t BrotliReadBits(
BrotliBitReader* const br, int n_bits) {
uint32_t val;
#if (BROTLI_USE_64_BITS)
BrotliFillBitWindow(br);
val = (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
#else
/*
* The if statement gives 2-4% speed boost on Canterbury data set with
* asm.js/firefox/x86-64.
*/
if ((32 - br->bit_pos_) < ((uint32_t) n_bits)) {
BrotliFillBitWindow(br);
}
val = (br->val_ >> br->bit_pos_) & kBitMask[n_bits];
#endif
#ifdef BROTLI_DECODE_DEBUG
printf("[BrotliReadBits] %010d %2d val: %6x\n",
(br->pos_ << 3) + br->bit_pos_ - 64, n_bits, val);

View File

@ -553,6 +553,7 @@ int CopyUncompressedBlockToOutput(BrotliOutput output, int len, int pos,
int rb_pos = pos & ringbuffer_mask;
int br_pos = br->pos_ & BROTLI_IBUF_MASK;
int nbytes;
uint32_t remaining_bits;
/* For short lengths copy byte-by-byte */
if (len < 8 || br->bit_pos_ + (uint32_t)(len << 3) < br->bit_end_pos_) {
@ -575,8 +576,16 @@ int CopyUncompressedBlockToOutput(BrotliOutput output, int len, int pos,
return 0;
}
/* Copy remaining 0-8 bytes from br->val_ to ringbuffer. */
while (br->bit_pos_ < 64) {
/*
* Copy remaining 0-4 in 32-bit case or 0-8 bytes in the 64-bit case
* from br->val_ to ringbuffer.
*/
#if (BROTLI_USE_64_BITS)
remaining_bits = 64;
#else
remaining_bits = 32;
#endif
while (br->bit_pos_ < remaining_bits) {
ringbuffer[rb_pos] = (uint8_t)(br->val_ >> br->bit_pos_);
br->bit_pos_ += 8;
++rb_pos;

View File

@ -71,6 +71,7 @@ BrotliOutput BrotliInitMemOutput(uint8_t* buffer, size_t length,
}
int BrotliStdinInputFunction(void* data, uint8_t* buf, size_t count) {
(void) data; /* Shut up LLVM */
#ifndef _WIN32
return (int)read(STDIN_FILENO, buf, count);
#else
@ -86,6 +87,7 @@ BrotliInput BrotliStdinInput() {
}
int BrotliStdoutOutputFunction(void* data, const uint8_t* buf, size_t count) {
(void) data; /* Shut up LLVM */
#ifndef _WIN32
return (int)write(STDOUT_FILENO, buf, count);
#else

View File

@ -82,6 +82,14 @@ void CopyCommandsToByteArray(const std::vector<Command>& cmds,
}
}
inline static unsigned int MyRand(unsigned int* seed) {
*seed *= 16807U;
if (*seed == 0) {
*seed = 1;
}
return *seed;
}
template<typename HistogramType, typename DataType>
void InitialEntropyCodes(const DataType* data, size_t length,
int literals_per_histogram,
@ -97,7 +105,7 @@ void InitialEntropyCodes(const DataType* data, size_t length,
for (int i = 0; i < total_histograms; ++i) {
int pos = length * i / total_histograms;
if (i != 0) {
pos += rand_r(&seed) % block_length;
pos += MyRand(&seed) % block_length;
}
if (pos + stride >= length) {
pos = length - stride - 1;
@ -119,7 +127,7 @@ void RandomSample(unsigned int* seed,
pos = 0;
stride = length;
} else {
pos = rand_r(seed) % (length - stride + 1);
pos = MyRand(seed) % (length - stride + 1);
}
sample->Add(data + pos, stride);
}

View File

@ -17,6 +17,7 @@
#ifndef BROTLI_ENC_FAST_LOG_H_
#define BROTLI_ENC_FAST_LOG_H_
#include <assert.h>
#include <math.h>
#include <stdint.h>
@ -163,7 +164,7 @@ static inline double FastLog2(int v) {
if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
return kLog2Table[v];
}
return log2(v);
return log2(static_cast<double>(v));
}
} // namespace brotli

View File

@ -50,6 +50,11 @@
#endif
#endif // __BYTE_ORDER
// Enable little-endian optimization for x64 architecture on Windows.
#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
#define IS_LITTLE_ENDIAN
#endif
#if defined(COMPILER_GCC3)
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))

View File

@ -18,11 +18,6 @@
#define BROTLI_ENC_WRITE_BITS_H_
#include <assert.h>
#if defined(OS_MACOSX)
#include <machine/endian.h>
#else
#include <endian.h>
#endif
#include <stdint.h>
#include <stdio.h>