mirror of
https://github.com/google/brotli.git
synced 2024-11-23 12:10:07 +00:00
add comment (fingerprint) CLI feature
PiperOrigin-RevId: 597489910
This commit is contained in:
parent
2ad58d8603
commit
033940f97c
194
c/tools/brotli.c
194
c/tools/brotli.c
@ -103,9 +103,17 @@ typedef enum {
|
||||
COMMAND_VERSION
|
||||
} Command;
|
||||
|
||||
typedef enum {
|
||||
COMMENT_INIT,
|
||||
COMMENT_READ,
|
||||
COMMENT_OK,
|
||||
COMMENT_BAD,
|
||||
} CommentState;
|
||||
|
||||
#define DEFAULT_LGWIN 24
|
||||
#define DEFAULT_SUFFIX ".br"
|
||||
#define MAX_OPTIONS 20
|
||||
#define MAX_COMMENT_LEN 80
|
||||
|
||||
typedef struct {
|
||||
/* Parameters */
|
||||
@ -123,6 +131,10 @@ typedef struct {
|
||||
const char* output_path;
|
||||
const char* dictionary_path;
|
||||
const char* suffix;
|
||||
uint8_t comment[MAX_COMMENT_LEN];
|
||||
size_t comment_len;
|
||||
size_t comment_pos;
|
||||
CommentState comment_state;
|
||||
int not_input_indices[MAX_OPTIONS];
|
||||
size_t longest_path_len;
|
||||
size_t input_count;
|
||||
@ -161,6 +173,55 @@ typedef struct {
|
||||
clock_t end_time;
|
||||
} Context;
|
||||
|
||||
/* Parse base 64 encoded string to buffer. Not performance-centric.
|
||||
|out_len| as input is buffer size; |out_len| as output is decoded length.
|
||||
Returns BROTLI_FALSE if either input is not (relaxed) base 64 conformant,
|
||||
or output does not fit buffer. */
|
||||
static BROTLI_BOOL ParseBase64(const char* str, uint8_t* out, size_t* out_len) {
|
||||
size_t in_len = strlen(str);
|
||||
size_t max_out_len = *out_len;
|
||||
size_t i;
|
||||
size_t bit_count = 0;
|
||||
uint32_t bits = 0;
|
||||
size_t padding_count = 0;
|
||||
size_t octet_count = 0;
|
||||
for (i = 0; i < in_len; ++i) {
|
||||
char c = str[i];
|
||||
uint32_t sextet = 0;
|
||||
if (c == 9 || c == 10 || c == 13 || c == ' ') {
|
||||
continue;
|
||||
}
|
||||
if (c == '=') {
|
||||
padding_count++;
|
||||
continue;
|
||||
}
|
||||
if (padding_count) return BROTLI_FALSE;
|
||||
if (c == '+' || c == '-') {
|
||||
sextet = 62;
|
||||
} else if (c == '/' || c == '_') {
|
||||
sextet = 63;
|
||||
} else if (c >= 'A' && c <= 'Z') {
|
||||
sextet = c - 'A';
|
||||
} else if (c >= 'a' && c <= 'z') {
|
||||
sextet = c - 'a' + 26;
|
||||
} else if (c >= '0' && c <= '9') {
|
||||
sextet = c - '0' + 52;
|
||||
} else {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
bits = (bits << 6) | sextet;
|
||||
bit_count += 6;
|
||||
if (bit_count >= 8) {
|
||||
if (octet_count == max_out_len) return BROTLI_FALSE;
|
||||
bit_count -= 8;
|
||||
out[octet_count++] = (bits >> bit_count) & 0xFF;
|
||||
}
|
||||
}
|
||||
if (padding_count > 2) return BROTLI_FALSE;
|
||||
*out_len = octet_count;
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
|
||||
/* Parse up to 5 decimal digits. */
|
||||
static BROTLI_BOOL ParseInt(const char* s, int low, int high, int* result) {
|
||||
int value = 0;
|
||||
@ -217,6 +278,7 @@ static Command ParseParams(Context* params) {
|
||||
BROTLI_BOOL lgwin_set = BROTLI_FALSE;
|
||||
BROTLI_BOOL suffix_set = BROTLI_FALSE;
|
||||
BROTLI_BOOL after_dash_dash = BROTLI_FALSE;
|
||||
BROTLI_BOOL comment_set = BROTLI_FALSE;
|
||||
Command command = ParseAlias(argv[0]);
|
||||
|
||||
for (i = 1; i < argc; ++i) {
|
||||
@ -344,8 +406,9 @@ static Command ParseParams(Context* params) {
|
||||
params->quality = 11;
|
||||
continue;
|
||||
}
|
||||
/* o/q/w/D/S with parameter is expected */
|
||||
if (c != 'o' && c != 'q' && c != 'w' && c != 'D' && c != 'S') {
|
||||
/* o/q/w/C/D/S with parameter is expected */
|
||||
if (c != 'o' && c != 'q' && c != 'w' && c != 'C' && c != 'D' &&
|
||||
c != 'S') {
|
||||
fprintf(stderr, "invalid argument -%c\n", c);
|
||||
return COMMAND_INVALID;
|
||||
}
|
||||
@ -393,6 +456,17 @@ static Command ParseParams(Context* params) {
|
||||
params->lgwin, BROTLI_MIN_WINDOW_BITS);
|
||||
return COMMAND_INVALID;
|
||||
}
|
||||
} else if (c == 'C') {
|
||||
if (comment_set) {
|
||||
fprintf(stderr, "comment already set\n");
|
||||
return COMMAND_INVALID;
|
||||
}
|
||||
params->comment_len = MAX_COMMENT_LEN;
|
||||
if (!ParseBase64(argv[i], params->comment, ¶ms->comment_len)) {
|
||||
fprintf(stderr, "invalid base64-encoded comment\n");
|
||||
return COMMAND_INVALID;
|
||||
}
|
||||
comment_set = BROTLI_TRUE;
|
||||
} else if (c == 'D') {
|
||||
if (params->dictionary_path) {
|
||||
fprintf(stderr, "dictionary path already set\n");
|
||||
@ -486,7 +560,7 @@ static Command ParseParams(Context* params) {
|
||||
return COMMAND_VERSION;
|
||||
} else {
|
||||
/* key=value */
|
||||
const char* value = strrchr(arg, '=');
|
||||
const char* value = strchr(arg, '=');
|
||||
size_t key_len;
|
||||
if (!value || value[1] == 0) {
|
||||
fprintf(stderr, "must pass the parameter as --%s=value\n", arg);
|
||||
@ -494,7 +568,18 @@ static Command ParseParams(Context* params) {
|
||||
}
|
||||
key_len = (size_t)(value - arg);
|
||||
value++;
|
||||
if (strncmp("dictionary", arg, key_len) == 0) {
|
||||
if (strncmp("comment", arg, key_len) == 0) {
|
||||
if (comment_set) {
|
||||
fprintf(stderr, "comment already set\n");
|
||||
return COMMAND_INVALID;
|
||||
}
|
||||
params->comment_len = MAX_COMMENT_LEN;
|
||||
if (!ParseBase64(value, params->comment, ¶ms->comment_len)) {
|
||||
fprintf(stderr, "invalid base64-encoded comment\n");
|
||||
return COMMAND_INVALID;
|
||||
}
|
||||
comment_set = BROTLI_TRUE;
|
||||
} else if (strncmp("dictionary", arg, key_len) == 0) {
|
||||
if (params->dictionary_path) {
|
||||
fprintf(stderr, "dictionary path already set\n");
|
||||
return COMMAND_INVALID;
|
||||
@ -633,6 +718,12 @@ static void PrintHelp(const char* name, BROTLI_BOOL error) {
|
||||
" decodable with regular brotli decoders\n",
|
||||
BROTLI_MIN_WINDOW_BITS, BROTLI_LARGE_MAX_WINDOW_BITS);
|
||||
fprintf(media,
|
||||
" -C B64, --comment=B64 set comment; argument is base64-decoded first;\n"
|
||||
" (maximal decoded length: %d)\n"
|
||||
" when decoding: check stream comment;\n"
|
||||
" when encoding: embed comment (fingerprint)\n",
|
||||
MAX_COMMENT_LEN);
|
||||
fprintf(media,
|
||||
" -D FILE, --dictionary=FILE use FILE as raw (LZ77) dictionary\n");
|
||||
fprintf(media,
|
||||
" -S SUF, --suffix=SUF output file suffix (default:'%s')\n",
|
||||
@ -1034,10 +1125,59 @@ static const char* PrettyDecoderErrorString(BrotliDecoderErrorCode code) {
|
||||
return error_str;
|
||||
}
|
||||
|
||||
static void OnMetadataStart(void* opaque, size_t size) {
|
||||
Context* context = (Context*) opaque;
|
||||
if (context->comment_state == COMMENT_INIT) {
|
||||
if (context->comment_len != size) {
|
||||
context->comment_state = COMMENT_BAD;
|
||||
return;
|
||||
}
|
||||
context->comment_pos = 0;
|
||||
context->comment_state = COMMENT_READ;
|
||||
}
|
||||
}
|
||||
|
||||
static void OnMetadataChunk(void* opaque, const uint8_t* data, size_t size) {
|
||||
Context* context = (Context*) opaque;
|
||||
if (context->comment_state == COMMENT_READ) {
|
||||
size_t i;
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (context->comment_pos >= context->comment_len) {
|
||||
context->comment_state = COMMENT_BAD;
|
||||
return;
|
||||
}
|
||||
if (context->comment[context->comment_pos++] != data[i]) {
|
||||
context->comment_state = COMMENT_BAD;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (context->comment_pos == context->comment_len) {
|
||||
context->comment_state = COMMENT_OK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
|
||||
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
|
||||
if (context->comment_len) {
|
||||
context->comment_state = COMMENT_INIT;
|
||||
BrotliDecoderSetMetadataCallbacks(s, &OnMetadataStart, &OnMetadataChunk,
|
||||
(void*)context);
|
||||
} else {
|
||||
context->comment_state = COMMENT_OK;
|
||||
}
|
||||
|
||||
InitializeBuffers(context);
|
||||
for (;;) {
|
||||
/* Early check */
|
||||
if (context->comment_state == COMMENT_BAD) {
|
||||
fprintf(stderr, "corrupt input [%s]\n",
|
||||
PrintablePath(context->current_input_path));
|
||||
if (context->verbosity > 0) {
|
||||
fprintf(stderr, "reason: comment mismatch\n");
|
||||
}
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
|
||||
if (!HasMoreInput(context)) {
|
||||
fprintf(stderr, "corrupt input [%s]\n",
|
||||
@ -1068,6 +1208,14 @@ static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
|
||||
PrintFileProcessingProgress(context);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
/* Final check */
|
||||
if (context->comment_state != COMMENT_OK) {
|
||||
fprintf(stderr, "corrupt input [%s]\n",
|
||||
PrintablePath(context->current_input_path));
|
||||
if (context->verbosity > 0) {
|
||||
fprintf(stderr, "reason: comment mismatch\n");
|
||||
}
|
||||
}
|
||||
return BROTLI_TRUE;
|
||||
} else { /* result == BROTLI_DECODER_RESULT_ERROR */
|
||||
fprintf(stderr, "corrupt input [%s]\n",
|
||||
@ -1121,6 +1269,7 @@ static BROTLI_BOOL DecompressFiles(Context* context) {
|
||||
|
||||
static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
|
||||
BROTLI_BOOL is_eof = BROTLI_FALSE;
|
||||
BROTLI_BOOL prologue = !!context->comment_len;
|
||||
InitializeBuffers(context);
|
||||
for (;;) {
|
||||
if (context->available_in == 0 && !is_eof) {
|
||||
@ -1128,14 +1277,34 @@ static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
|
||||
is_eof = !HasMoreInput(context);
|
||||
}
|
||||
|
||||
if (!BrotliEncoderCompressStream(s,
|
||||
is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
|
||||
&context->available_in, &context->next_in,
|
||||
&context->available_out, &context->next_out, NULL)) {
|
||||
/* Should detect OOM? */
|
||||
fprintf(stderr, "failed to compress data [%s]\n",
|
||||
PrintablePath(context->current_input_path));
|
||||
return BROTLI_FALSE;
|
||||
if (prologue) {
|
||||
prologue = BROTLI_FALSE;
|
||||
const uint8_t* next_meta = context->comment;
|
||||
size_t available_meta = context->comment_len;
|
||||
if (!BrotliEncoderCompressStream(s,
|
||||
BROTLI_OPERATION_EMIT_METADATA,
|
||||
&available_meta, &next_meta,
|
||||
&context->available_out, &context->next_out, NULL)) {
|
||||
/* Should detect OOM? */
|
||||
fprintf(stderr, "failed to emit metadata [%s]\n",
|
||||
PrintablePath(context->current_input_path));
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
if (available_meta != 0) {
|
||||
fprintf(stderr, "failed to emit metadata [%s]\n",
|
||||
PrintablePath(context->current_input_path));
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
} else {
|
||||
if (!BrotliEncoderCompressStream(s,
|
||||
is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
|
||||
&context->available_in, &context->next_in,
|
||||
&context->available_out, &context->next_out, NULL)) {
|
||||
/* Should detect OOM? */
|
||||
fprintf(stderr, "failed to compress data [%s]\n",
|
||||
PrintablePath(context->current_input_path));
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (context->available_out == 0) {
|
||||
@ -1230,6 +1399,7 @@ int main(int argc, char** argv) {
|
||||
context.quality = 11;
|
||||
context.lgwin = -1;
|
||||
context.verbosity = 0;
|
||||
context.comment_len = 0;
|
||||
context.force_overwrite = BROTLI_FALSE;
|
||||
context.junk_source = BROTLI_FALSE;
|
||||
context.reject_uncompressible = BROTLI_FALSE;
|
||||
|
84
tests/cli_test.sh
Executable file
84
tests/cli_test.sh
Executable file
@ -0,0 +1,84 @@
|
||||
#!/bin/bash
|
||||
|
||||
source gbash.sh || exit
|
||||
source module gbash_unit.sh
|
||||
|
||||
readonly MKTEMP='/bin/mktemp'
|
||||
readonly RM='/bin/rm'
|
||||
|
||||
function test::brotli_cli::setup() {
|
||||
TEMP_DIR=$(${MKTEMP} -d)
|
||||
LOG INFO ${TEMP_DIR}
|
||||
BROTLI_PKG="${RUNFILES}/google3/third_party/brotli"
|
||||
BROTLI="${BROTLI_PKG}/tools/brotli"
|
||||
cd ${TEMP_DIR}
|
||||
echo "Kot lomom kolol slona" > text.orig
|
||||
}
|
||||
|
||||
function test::brotli_cli::teardown() {
|
||||
unset BROTLI
|
||||
unset BROTLI_PKG
|
||||
${RM} -rf ${TEMP_DIR}
|
||||
unset TEMP_DIR
|
||||
}
|
||||
|
||||
function test::brotli_cli::roundtrip() {
|
||||
${BROTLI} -Zfk text.orig -o text.br
|
||||
${BROTLI} -d text.br -o text.unbr
|
||||
EXPECT_FILE_CONTENT_EQ text.orig text.unbr
|
||||
}
|
||||
|
||||
# 'SGVsbG8=' == $(echo -n "Hello" | base64)
|
||||
|
||||
function test::brotli_cli::comment_ok() {
|
||||
${BROTLI} -Zfk -C SGVsbG8= text.orig -o text.br
|
||||
${BROTLI} -d --comment=SGVsbG8= text.br -o text.unbr
|
||||
EXPECT_FILE_CONTENT_EQ text.orig text.unbr
|
||||
}
|
||||
|
||||
function test::brotli_cli::comment_no_padding() {
|
||||
${BROTLI} -Zfk -C SGVsbG8 text.orig -o text.br
|
||||
${BROTLI} -d --comment=SGVsbG8= text.br -o text.unbr
|
||||
EXPECT_FILE_CONTENT_EQ text.orig text.unbr
|
||||
}
|
||||
|
||||
function test::brotli_cli::comment_extra_padding() {
|
||||
${BROTLI} -Zfk -C SGVsbG8 text.orig -o text.br
|
||||
${BROTLI} -d --comment=SGVsbG8== text.br -o text.unbr
|
||||
EXPECT_FILE_CONTENT_EQ text.orig text.unbr
|
||||
}
|
||||
|
||||
function test::brotli_cli::comment_ignored() {
|
||||
${BROTLI} -Zfk -C SGVsbG8= text.orig -o text.br
|
||||
EXPECT_SUCCEED "${BROTLI} -d text.br -o text.unbr"
|
||||
}
|
||||
|
||||
function test::brotli_cli::comment_mismatch_content() {
|
||||
${BROTLI} -Zfk --comment=SGVsbG8= text.orig -o text.br
|
||||
EXPECT_FAIL "${BROTLI} -dC SGVsbG7= text.br -o text.unbr"
|
||||
EXPECT_FAIL "${BROTLI} -tC SGVsbG7= text.br"
|
||||
}
|
||||
|
||||
function test::brotli_cli::comment_mismatch_length() {
|
||||
${BROTLI} -Zfk --comment=SGVsbG8= text.orig -o text.br
|
||||
EXPECT_FAIL "${BROTLI} -tC SGVsbA== text.br"
|
||||
}
|
||||
|
||||
function test::brotli_cli::comment_too_much_padding() {
|
||||
EXPECT_FAIL "${BROTLI} -Zfk -C SGVsbG8=== text.orig -o text.br"
|
||||
}
|
||||
|
||||
function test::brotli_cli::comment_padding_in_the_middle() {
|
||||
EXPECT_FAIL "${BROTLI} -Zfk -C SGVsbG=8 text.orig -o text.br"
|
||||
}
|
||||
|
||||
function test::brotli_cli::comment_ignore_tab_cr_lf_sp() {
|
||||
COMMENT=$'S\tG\rV\ns bG8='
|
||||
EXPECT_SUCCEED "${BROTLI} -Zfk -C \"${COMMENT}\" text.orig -o text.br"
|
||||
}
|
||||
|
||||
function test::brotli_cli::comment_invalid_chars() {
|
||||
EXPECT_FAIL "${BROTLI} -Zfk -C S.GVsbG8= text.orig -o text.br"
|
||||
}
|
||||
|
||||
gbash::unit::main "$@"
|
Loading…
Reference in New Issue
Block a user