From bd137524f2f50e30ba054f42f1f6536cd3cee920 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Thu, 2 Oct 2008 22:51:46 +0300 Subject: [PATCH] Initial changes to change the suffix of the new format to .xz. This also fixes a bug related to --suffix option. Some issues with suffixes with --format=raw were not fixed. --- src/lzma/args.c | 67 ++++++++++++++++++++++++++------------ src/lzma/args.h | 13 ++++---- src/lzma/help.c | 4 +-- src/lzma/process.c | 24 ++++++++------ src/lzma/suffix.c | 74 ++++++++++++++++++++++++++++++++++-------- tests/test_compress.sh | 3 +- 6 files changed, 133 insertions(+), 52 deletions(-) diff --git a/src/lzma/args.c b/src/lzma/args.c index ddaa0f9..47ae766 100644 --- a/src/lzma/args.c +++ b/src/lzma/args.c @@ -26,7 +26,7 @@ enum tool_mode opt_mode = MODE_COMPRESS; -enum header_type opt_header = HEADER_AUTO; +enum format_type opt_format = FORMAT_AUTO; char *opt_suffix = NULL; @@ -50,6 +50,12 @@ static size_t preset_number = 7 - 1; static bool preset_default = true; static size_t filter_count = 0; +/// When compressing, which file format to use if --format=auto or no --format +/// at all has been specified. We need a variable because this depends on +/// with which name we are called. All names with "lz" in them makes us to +/// use the legacy .lzma format. +static enum format_type format_compress_auto = FORMAT_XZ; + enum { OPT_SUBBLOCK = INT_MIN, @@ -312,18 +318,25 @@ parse_real(int argc, char **argv) // --format case 'F': { - static const char *types[] = { - "auto", - "native", - "alone", - // "gzip", - "raw", - NULL + // Just in case, support both "lzma" and "alone" since + // the latter was used for forward compatibility in + // LZMA Utils 4.32.x. + static const struct { + char str[8]; + enum format_type format; + } types[] = { + { "auto", FORMAT_AUTO }, + { "xz", FORMAT_XZ }, + { "lzma", FORMAT_LZMA }, + { "alone", FORMAT_LZMA }, + // { "gzip", FORMAT_GZIP }, + // { "gz", FORMAT_GZIP }, + { "raw", FORMAT_RAW }, }; - opt_header = 0; - while (strcmp(types[opt_header], optarg) != 0) { - if (types[++opt_header] == NULL) { + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) { + if (++i == ARRAY_SIZE(types)) { errmsg(V_ERROR, _("%s: Unknown file " "format type"), optarg); @@ -331,25 +344,25 @@ parse_real(int argc, char **argv) } } + opt_format = types[i].format; break; } // --check case 'C': { static const struct { - const char *str; - unsigned int value; + char str[8]; + lzma_check check; } types[] = { { "none", LZMA_CHECK_NONE }, { "crc32", LZMA_CHECK_CRC32 }, { "crc64", LZMA_CHECK_CRC64 }, { "sha256", LZMA_CHECK_SHA256 }, - { NULL, 0 } }; size_t i = 0; while (strcmp(types[i].str, optarg) != 0) { - if (types[++i].str == NULL) { + if (++i == ARRAY_SIZE(types)) { errmsg(V_ERROR, _("%s: Unknown " "integrity check " "type"), optarg); @@ -357,7 +370,7 @@ parse_real(int argc, char **argv) } } - opt_check = types[i].value; + opt_check = types[i].check; break; } @@ -460,7 +473,7 @@ set_compression_settings(void) my_exit(ERROR); } - opt_filters[0].id = opt_header == HEADER_ALONE + opt_filters[0].id = opt_format == FORMAT_LZMA ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; opt_filters[0].options = &opt_lzma; filter_count = 1; @@ -471,9 +484,9 @@ set_compression_settings(void) // If we are using the LZMA_Alone format, allow exactly one filter // which has to be LZMA. - if (opt_header == HEADER_ALONE && (filter_count != 1 + if (opt_format == FORMAT_LZMA && (filter_count != 1 || opt_filters[0].id != LZMA_FILTER_LZMA1)) { - errmsg(V_ERROR, _("With --format=alone only the LZMA1 filter " + errmsg(V_ERROR, _("With --format=lzma only the LZMA1 filter " "is supported")); my_exit(ERROR); } @@ -503,6 +516,12 @@ set_compression_settings(void) memory_usage = lzma_memusage_encoder(opt_filters); } + + // TODO: With --format=raw, we should print a warning since + // the presets may change and thus the next version may not + // be able to uncompress the raw stream with the same preset + // number. + } else { if (memory_usage > opt_memory) { errmsg(V_ERROR, _("Memory usage limit is too small " @@ -532,6 +551,11 @@ parse_args(int argc, char **argv) { const char *name = str_filename(argv[0]); if (name != NULL) { + // Default file format + if (strstr(name, "lz") != NULL) + format_compress_auto = FORMAT_LZMA; + + // Operation mode if (strstr(name, "cat") != NULL) { opt_mode = MODE_DECOMPRESS; opt_stdout = true; @@ -556,7 +580,10 @@ parse_args(int argc, char **argv) opt_stdout = true; } - if (opt_mode == MODE_COMPRESS || opt_header == HEADER_RAW) + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) + opt_format = format_compress_auto; + + if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) set_compression_settings(); // If no filenames are given, use stdin. diff --git a/src/lzma/args.h b/src/lzma/args.h index abc810c..587b280 100644 --- a/src/lzma/args.h +++ b/src/lzma/args.h @@ -30,12 +30,13 @@ enum tool_mode { MODE_LIST, }; -enum header_type { - HEADER_AUTO, - HEADER_NATIVE, - HEADER_ALONE, +// NOTE: The order of these is significant in suffix.c. +enum format_type { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_LZMA, // HEADER_GZIP, - HEADER_RAW, + FORMAT_RAW, }; @@ -51,7 +52,7 @@ extern bool opt_keep_original; extern bool opt_preserve_name; // extern bool opt_recursive; extern enum tool_mode opt_mode; -extern enum header_type opt_header; +extern enum format_type opt_format; extern lzma_check opt_check; extern lzma_filter opt_filters[8]; diff --git a/src/lzma/help.c b/src/lzma/help.c index 0b530ff..3b9e398 100644 --- a/src/lzma/help.c +++ b/src/lzma/help.c @@ -46,7 +46,7 @@ show_help(void) " -z, --compress force compression\n" " -d, --decompress force decompression\n" " -t, --test test compressed file integrity\n" -" -l, --list list block sizes, total sizes, and possible metadata\n" +" -l, --list list information about files\n" )); puts(_( @@ -57,7 +57,7 @@ show_help(void) " -c, --stdout write to standard output and don't delete input files\n" " -S, --suffix=.SUF use suffix `.SUF' on compressed files instead of `.lzma'\n" " -F, --format=FMT file format to encode or decode; possible values are\n" -" `auto' (default), `native', `alone', and `raw'\n" +" `auto' (default), `xz', `lzma', and `raw'\n" " --files=[FILE] read filenames to process from FILE; if FILE is\n" " omitted, filenames are read from the standard input;\n" " filenames must be terminated with the newline character\n" diff --git a/src/lzma/process.c b/src/lzma/process.c index 46c27df..fc4ef96 100644 --- a/src/lzma/process.c +++ b/src/lzma/process.c @@ -155,19 +155,23 @@ single_init(thread_data *t) lzma_ret ret = LZMA_PROG_ERROR; if (opt_mode == MODE_COMPRESS) { - switch (opt_header) { - case HEADER_AUTO: - case HEADER_NATIVE: + switch (opt_format) { + case FORMAT_AUTO: + // args.c ensures this. + assert(0); + break; + + case FORMAT_XZ: ret = lzma_stream_encoder(&t->strm, opt_filters, opt_check); break; - case HEADER_ALONE: + case FORMAT_LZMA: ret = lzma_alone_encoder(&t->strm, opt_filters[0].options); break; - case HEADER_RAW: + case FORMAT_RAW: ret = lzma_raw_encoder(&t->strm, opt_filters); break; } @@ -175,20 +179,20 @@ single_init(thread_data *t) const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED; - switch (opt_header) { - case HEADER_AUTO: + switch (opt_format) { + case FORMAT_AUTO: ret = lzma_auto_decoder(&t->strm, opt_memory, flags); break; - case HEADER_NATIVE: + case FORMAT_XZ: ret = lzma_stream_decoder(&t->strm, opt_memory, flags); break; - case HEADER_ALONE: + case FORMAT_LZMA: ret = lzma_alone_decoder(&t->strm, opt_memory); break; - case HEADER_RAW: + case FORMAT_RAW: // Memory usage has already been checked in args.c. ret = lzma_raw_decoder(&t->strm, opt_filters); break; diff --git a/src/lzma/suffix.c b/src/lzma/suffix.c index 57afce8..41b4c35 100644 --- a/src/lzma/suffix.c +++ b/src/lzma/suffix.c @@ -20,14 +20,9 @@ #include "private.h" -static const struct { +struct suffix_pair { const char *compressed; const char *uncompressed; -} suffixes[] = { - { ".lzma", "" }, - { ".tlz", ".tar" }, - { ".ylz", ".yar" }, - { NULL, NULL } }; @@ -63,13 +58,21 @@ test_suffix(const char *suffix, const char *src_name, size_t src_len) /// \return Name of the uncompressed file, or NULL if file has unknown /// suffix. static char * -uncompressed_name(const char *src_name) +uncompressed_name(const char *src_name, const size_t src_len) { + static const struct suffix_pair suffixes[] = { + { ".xz", "" }, + { ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare. + { ".lzma", "" }, + { ".tlz", ".tar" }, + // { ".gz", "" }, + // { ".tgz", ".tar" }, + }; + const char *new_suffix = ""; - const size_t src_len = strlen(src_name); size_t new_len = 0; - for (size_t i = 0; suffixes[i].compressed != NULL; ++i) { + for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) { new_len = test_suffix(suffixes[i].compressed, src_name, src_len); if (new_len != 0) { @@ -103,10 +106,40 @@ uncompressed_name(const char *src_name) /// \brief Appends suffix to src_name +/// +/// In contrast to uncompressed_name(), we check only suffixes that are valid +/// for the specified file format. static char * -compressed_name(const char *src_name) +compressed_name(const char *src_name, const size_t src_len) { - const size_t src_len = strlen(src_name); + // The order of these must match the order in args.h. + static const struct suffix_pair all_suffixes[][3] = { + { + { ".xz", "" }, + { ".txz", ".tar" }, + { NULL, NULL } + }, { + { ".lzma", "" }, + { ".tlz", ".tar" }, + { NULL, NULL } +/* + }, { + { ".gz", "" }, + { ".tgz", ".tar" }, + { NULL, NULL } +*/ + }, { + // --format=raw requires specifying the suffix + // manually or using stdout. + { NULL, NULL } + } + }; + + // args.c ensures this. + assert(opt_format != FORMAT_AUTO); + + const size_t format = opt_format - 1; + const struct suffix_pair *const suffixes = all_suffixes[format]; for (size_t i = 0; suffixes[i].compressed != NULL; ++i) { if (test_suffix(suffixes[i].compressed, src_name, src_len) @@ -118,6 +151,15 @@ compressed_name(const char *src_name) } } + // TODO: Hmm, maybe it would be better to validate this in args.c, + // since the suffix handling when decoding is weird now. + if (opt_format == FORMAT_RAW && opt_suffix == NULL) { + errmsg(V_ERROR, _("%s: With --format=raw, --suffix=.SUF is " + "required unless writing to stdout"), + src_name); + return NULL; + } + const char *suffix = opt_suffix != NULL ? opt_suffix : suffixes[0].compressed; const size_t suffix_len = strlen(suffix); @@ -139,7 +181,13 @@ compressed_name(const char *src_name) extern char * get_dest_name(const char *src_name) { + assert(src_name != NULL); + + // Length of the name is needed in all cases to locate the end of + // the string to compare the suffix, so calculate the length here. + const size_t src_len = strlen(src_name); + return opt_mode == MODE_COMPRESS - ? compressed_name(src_name) - : uncompressed_name(src_name); + ? compressed_name(src_name, src_len) + : uncompressed_name(src_name, src_len); } diff --git a/tests/test_compress.sh b/tests/test_compress.sh index d0511e4..667d8fd 100755 --- a/tests/test_compress.sh +++ b/tests/test_compress.sh @@ -69,7 +69,8 @@ test_lzma() { echo . | tr -d '\n\r' } -LZMA="../src/lzma/lzma --memory=15Mi --threads=1" +# TODO: Remove --format=xz once the command name has been changed. +LZMA="../src/lzma/lzma --memory=15Mi --threads=1 --format=xz" LZMADEC="../src/lzmadec/lzmadec --memory=4Mi" unset LZMA_OPT