add comment (fingerprint) CLI feature

PiperOrigin-RevId: 597489910
This commit is contained in:
Evgenii Kliuchnikov 2024-01-11 02:03:59 -08:00 committed by Copybara-Service
parent 2ad58d8603
commit 033940f97c
2 changed files with 266 additions and 12 deletions

View File

@ -103,9 +103,17 @@ typedef enum {
COMMAND_VERSION
} Command;
typedef enum {
COMMENT_INIT,
COMMENT_READ,
COMMENT_OK,
COMMENT_BAD,
} CommentState;
#define DEFAULT_LGWIN 24
#define DEFAULT_SUFFIX ".br"
#define MAX_OPTIONS 20
#define MAX_COMMENT_LEN 80
typedef struct {
/* Parameters */
@ -123,6 +131,10 @@ typedef struct {
const char* output_path;
const char* dictionary_path;
const char* suffix;
uint8_t comment[MAX_COMMENT_LEN];
size_t comment_len;
size_t comment_pos;
CommentState comment_state;
int not_input_indices[MAX_OPTIONS];
size_t longest_path_len;
size_t input_count;
@ -161,6 +173,55 @@ typedef struct {
clock_t end_time;
} Context;
/* Parse base 64 encoded string to buffer. Not performance-centric.
|out_len| as input is buffer size; |out_len| as output is decoded length.
Returns BROTLI_FALSE if either input is not (relaxed) base 64 conformant,
or output does not fit buffer. */
static BROTLI_BOOL ParseBase64(const char* str, uint8_t* out, size_t* out_len) {
size_t in_len = strlen(str);
size_t max_out_len = *out_len;
size_t i;
size_t bit_count = 0;
uint32_t bits = 0;
size_t padding_count = 0;
size_t octet_count = 0;
for (i = 0; i < in_len; ++i) {
char c = str[i];
uint32_t sextet = 0;
if (c == 9 || c == 10 || c == 13 || c == ' ') {
continue;
}
if (c == '=') {
padding_count++;
continue;
}
if (padding_count) return BROTLI_FALSE;
if (c == '+' || c == '-') {
sextet = 62;
} else if (c == '/' || c == '_') {
sextet = 63;
} else if (c >= 'A' && c <= 'Z') {
sextet = c - 'A';
} else if (c >= 'a' && c <= 'z') {
sextet = c - 'a' + 26;
} else if (c >= '0' && c <= '9') {
sextet = c - '0' + 52;
} else {
return BROTLI_FALSE;
}
bits = (bits << 6) | sextet;
bit_count += 6;
if (bit_count >= 8) {
if (octet_count == max_out_len) return BROTLI_FALSE;
bit_count -= 8;
out[octet_count++] = (bits >> bit_count) & 0xFF;
}
}
if (padding_count > 2) return BROTLI_FALSE;
*out_len = octet_count;
return BROTLI_TRUE;
}
/* Parse up to 5 decimal digits. */
static BROTLI_BOOL ParseInt(const char* s, int low, int high, int* result) {
int value = 0;
@ -217,6 +278,7 @@ static Command ParseParams(Context* params) {
BROTLI_BOOL lgwin_set = BROTLI_FALSE;
BROTLI_BOOL suffix_set = BROTLI_FALSE;
BROTLI_BOOL after_dash_dash = BROTLI_FALSE;
BROTLI_BOOL comment_set = BROTLI_FALSE;
Command command = ParseAlias(argv[0]);
for (i = 1; i < argc; ++i) {
@ -344,8 +406,9 @@ static Command ParseParams(Context* params) {
params->quality = 11;
continue;
}
/* o/q/w/D/S with parameter is expected */
if (c != 'o' && c != 'q' && c != 'w' && c != 'D' && c != 'S') {
/* o/q/w/C/D/S with parameter is expected */
if (c != 'o' && c != 'q' && c != 'w' && c != 'C' && c != 'D' &&
c != 'S') {
fprintf(stderr, "invalid argument -%c\n", c);
return COMMAND_INVALID;
}
@ -393,6 +456,17 @@ static Command ParseParams(Context* params) {
params->lgwin, BROTLI_MIN_WINDOW_BITS);
return COMMAND_INVALID;
}
} else if (c == 'C') {
if (comment_set) {
fprintf(stderr, "comment already set\n");
return COMMAND_INVALID;
}
params->comment_len = MAX_COMMENT_LEN;
if (!ParseBase64(argv[i], params->comment, &params->comment_len)) {
fprintf(stderr, "invalid base64-encoded comment\n");
return COMMAND_INVALID;
}
comment_set = BROTLI_TRUE;
} else if (c == 'D') {
if (params->dictionary_path) {
fprintf(stderr, "dictionary path already set\n");
@ -486,7 +560,7 @@ static Command ParseParams(Context* params) {
return COMMAND_VERSION;
} else {
/* key=value */
const char* value = strrchr(arg, '=');
const char* value = strchr(arg, '=');
size_t key_len;
if (!value || value[1] == 0) {
fprintf(stderr, "must pass the parameter as --%s=value\n", arg);
@ -494,7 +568,18 @@ static Command ParseParams(Context* params) {
}
key_len = (size_t)(value - arg);
value++;
if (strncmp("dictionary", arg, key_len) == 0) {
if (strncmp("comment", arg, key_len) == 0) {
if (comment_set) {
fprintf(stderr, "comment already set\n");
return COMMAND_INVALID;
}
params->comment_len = MAX_COMMENT_LEN;
if (!ParseBase64(value, params->comment, &params->comment_len)) {
fprintf(stderr, "invalid base64-encoded comment\n");
return COMMAND_INVALID;
}
comment_set = BROTLI_TRUE;
} else if (strncmp("dictionary", arg, key_len) == 0) {
if (params->dictionary_path) {
fprintf(stderr, "dictionary path already set\n");
return COMMAND_INVALID;
@ -633,6 +718,12 @@ static void PrintHelp(const char* name, BROTLI_BOOL error) {
" decodable with regular brotli decoders\n",
BROTLI_MIN_WINDOW_BITS, BROTLI_LARGE_MAX_WINDOW_BITS);
fprintf(media,
" -C B64, --comment=B64 set comment; argument is base64-decoded first;\n"
" (maximal decoded length: %d)\n"
" when decoding: check stream comment;\n"
" when encoding: embed comment (fingerprint)\n",
MAX_COMMENT_LEN);
fprintf(media,
" -D FILE, --dictionary=FILE use FILE as raw (LZ77) dictionary\n");
fprintf(media,
" -S SUF, --suffix=SUF output file suffix (default:'%s')\n",
@ -1034,10 +1125,59 @@ static const char* PrettyDecoderErrorString(BrotliDecoderErrorCode code) {
return error_str;
}
static void OnMetadataStart(void* opaque, size_t size) {
Context* context = (Context*) opaque;
if (context->comment_state == COMMENT_INIT) {
if (context->comment_len != size) {
context->comment_state = COMMENT_BAD;
return;
}
context->comment_pos = 0;
context->comment_state = COMMENT_READ;
}
}
static void OnMetadataChunk(void* opaque, const uint8_t* data, size_t size) {
Context* context = (Context*) opaque;
if (context->comment_state == COMMENT_READ) {
size_t i;
for (i = 0; i < size; ++i) {
if (context->comment_pos >= context->comment_len) {
context->comment_state = COMMENT_BAD;
return;
}
if (context->comment[context->comment_pos++] != data[i]) {
context->comment_state = COMMENT_BAD;
return;
}
}
if (context->comment_pos == context->comment_len) {
context->comment_state = COMMENT_OK;
}
}
}
static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
if (context->comment_len) {
context->comment_state = COMMENT_INIT;
BrotliDecoderSetMetadataCallbacks(s, &OnMetadataStart, &OnMetadataChunk,
(void*)context);
} else {
context->comment_state = COMMENT_OK;
}
InitializeBuffers(context);
for (;;) {
/* Early check */
if (context->comment_state == COMMENT_BAD) {
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_input_path));
if (context->verbosity > 0) {
fprintf(stderr, "reason: comment mismatch\n");
}
return BROTLI_FALSE;
}
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
if (!HasMoreInput(context)) {
fprintf(stderr, "corrupt input [%s]\n",
@ -1068,6 +1208,14 @@ static BROTLI_BOOL DecompressFile(Context* context, BrotliDecoderState* s) {
PrintFileProcessingProgress(context);
fprintf(stderr, "\n");
}
/* Final check */
if (context->comment_state != COMMENT_OK) {
fprintf(stderr, "corrupt input [%s]\n",
PrintablePath(context->current_input_path));
if (context->verbosity > 0) {
fprintf(stderr, "reason: comment mismatch\n");
}
}
return BROTLI_TRUE;
} else { /* result == BROTLI_DECODER_RESULT_ERROR */
fprintf(stderr, "corrupt input [%s]\n",
@ -1121,6 +1269,7 @@ static BROTLI_BOOL DecompressFiles(Context* context) {
static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
BROTLI_BOOL is_eof = BROTLI_FALSE;
BROTLI_BOOL prologue = !!context->comment_len;
InitializeBuffers(context);
for (;;) {
if (context->available_in == 0 && !is_eof) {
@ -1128,14 +1277,34 @@ static BROTLI_BOOL CompressFile(Context* context, BrotliEncoderState* s) {
is_eof = !HasMoreInput(context);
}
if (!BrotliEncoderCompressStream(s,
is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
&context->available_in, &context->next_in,
&context->available_out, &context->next_out, NULL)) {
/* Should detect OOM? */
fprintf(stderr, "failed to compress data [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
if (prologue) {
prologue = BROTLI_FALSE;
const uint8_t* next_meta = context->comment;
size_t available_meta = context->comment_len;
if (!BrotliEncoderCompressStream(s,
BROTLI_OPERATION_EMIT_METADATA,
&available_meta, &next_meta,
&context->available_out, &context->next_out, NULL)) {
/* Should detect OOM? */
fprintf(stderr, "failed to emit metadata [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
}
if (available_meta != 0) {
fprintf(stderr, "failed to emit metadata [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
}
} else {
if (!BrotliEncoderCompressStream(s,
is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
&context->available_in, &context->next_in,
&context->available_out, &context->next_out, NULL)) {
/* Should detect OOM? */
fprintf(stderr, "failed to compress data [%s]\n",
PrintablePath(context->current_input_path));
return BROTLI_FALSE;
}
}
if (context->available_out == 0) {
@ -1230,6 +1399,7 @@ int main(int argc, char** argv) {
context.quality = 11;
context.lgwin = -1;
context.verbosity = 0;
context.comment_len = 0;
context.force_overwrite = BROTLI_FALSE;
context.junk_source = BROTLI_FALSE;
context.reject_uncompressible = BROTLI_FALSE;

84
tests/cli_test.sh Executable file
View File

@ -0,0 +1,84 @@
#!/bin/bash
source gbash.sh || exit
source module gbash_unit.sh
readonly MKTEMP='/bin/mktemp'
readonly RM='/bin/rm'
function test::brotli_cli::setup() {
TEMP_DIR=$(${MKTEMP} -d)
LOG INFO ${TEMP_DIR}
BROTLI_PKG="${RUNFILES}/google3/third_party/brotli"
BROTLI="${BROTLI_PKG}/tools/brotli"
cd ${TEMP_DIR}
echo "Kot lomom kolol slona" > text.orig
}
function test::brotli_cli::teardown() {
unset BROTLI
unset BROTLI_PKG
${RM} -rf ${TEMP_DIR}
unset TEMP_DIR
}
function test::brotli_cli::roundtrip() {
${BROTLI} -Zfk text.orig -o text.br
${BROTLI} -d text.br -o text.unbr
EXPECT_FILE_CONTENT_EQ text.orig text.unbr
}
# 'SGVsbG8=' == $(echo -n "Hello" | base64)
function test::brotli_cli::comment_ok() {
${BROTLI} -Zfk -C SGVsbG8= text.orig -o text.br
${BROTLI} -d --comment=SGVsbG8= text.br -o text.unbr
EXPECT_FILE_CONTENT_EQ text.orig text.unbr
}
function test::brotli_cli::comment_no_padding() {
${BROTLI} -Zfk -C SGVsbG8 text.orig -o text.br
${BROTLI} -d --comment=SGVsbG8= text.br -o text.unbr
EXPECT_FILE_CONTENT_EQ text.orig text.unbr
}
function test::brotli_cli::comment_extra_padding() {
${BROTLI} -Zfk -C SGVsbG8 text.orig -o text.br
${BROTLI} -d --comment=SGVsbG8== text.br -o text.unbr
EXPECT_FILE_CONTENT_EQ text.orig text.unbr
}
function test::brotli_cli::comment_ignored() {
${BROTLI} -Zfk -C SGVsbG8= text.orig -o text.br
EXPECT_SUCCEED "${BROTLI} -d text.br -o text.unbr"
}
function test::brotli_cli::comment_mismatch_content() {
${BROTLI} -Zfk --comment=SGVsbG8= text.orig -o text.br
EXPECT_FAIL "${BROTLI} -dC SGVsbG7= text.br -o text.unbr"
EXPECT_FAIL "${BROTLI} -tC SGVsbG7= text.br"
}
function test::brotli_cli::comment_mismatch_length() {
${BROTLI} -Zfk --comment=SGVsbG8= text.orig -o text.br
EXPECT_FAIL "${BROTLI} -tC SGVsbA== text.br"
}
function test::brotli_cli::comment_too_much_padding() {
EXPECT_FAIL "${BROTLI} -Zfk -C SGVsbG8=== text.orig -o text.br"
}
function test::brotli_cli::comment_padding_in_the_middle() {
EXPECT_FAIL "${BROTLI} -Zfk -C SGVsbG=8 text.orig -o text.br"
}
function test::brotli_cli::comment_ignore_tab_cr_lf_sp() {
COMMENT=$'S\tG\rV\ns bG8='
EXPECT_SUCCEED "${BROTLI} -Zfk -C \"${COMMENT}\" text.orig -o text.br"
}
function test::brotli_cli::comment_invalid_chars() {
EXPECT_FAIL "${BROTLI} -Zfk -C S.GVsbG8= text.orig -o text.br"
}
gbash::unit::main "$@"