From f80437c58665294083c27ebba519b303352fa437 Mon Sep 17 00:00:00 2001 From: Sen Huang Date: Wed, 2 Oct 2019 11:08:20 -0400 Subject: [PATCH] Add support for --output-dir-flat New flag to specify output directory destination for multiple files. --- build/LICENSE | 0 programs/fileio.c | 2 +- programs/util.c | 94 ++++++++++++++++++++++++++++++---------------- programs/util.h | 11 +++--- programs/zstdcli.c | 27 ++++++++----- tests/playTests.sh | 19 +++++++++- 6 files changed, 103 insertions(+), 50 deletions(-) create mode 100644 build/LICENSE diff --git a/build/LICENSE b/build/LICENSE new file mode 100644 index 00000000..e69de29b diff --git a/programs/fileio.c b/programs/fileio.c index cc8809f9..c0300f6a 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1453,7 +1453,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, const char** inFileN } } FIO_freeCResources(ress); - if (outDirName) + if (dstFileNamesTable) UTIL_freeDestinationFilenameTable(dstFileNamesTable, nbFiles); return error; diff --git a/programs/util.c b/programs/util.c index 920601fb..e5c93786 100644 --- a/programs/util.c +++ b/programs/util.c @@ -90,53 +90,81 @@ U32 UTIL_isDirectory(const char* infilename) return 0; } -int UTIL_createDir(const char* outDirName) -{ - int r; - if (UTIL_isDirectory(outDirName)) - return 0; /* no need to create if directory already exists */ +int UTIL_compareStr(const void *p1, const void *p2) { + return strcmp(* (char * const *) p1, * (char * const *) p2); +} -#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) - r = _mkdir(outDirName); - if (r || !UTIL_isDirectory(outDirName)) return 1; -#else - r = mkdir(outDirName, S_IRWXU | S_IRWXG | S_IRWXO); /* dir has all permissions */ - if (r || !UTIL_isDirectory(outDirName)) return 1; -#endif +int UTIL_checkFilenameCollisions(char** dstFilenameTable, unsigned nbFiles) { + char** dstFilenameTableSorted; + char* prevElem; + unsigned u; + + dstFilenameTableSorted = (char**) malloc(sizeof(char*) * nbFiles); + if (!dstFilenameTableSorted) { + UTIL_DISPLAYLEVEL(1, "Unable to malloc new str array, not checking for name collisions\n"); + return 1; + } + + for (u = 0; u < nbFiles; ++u) { + dstFilenameTableSorted[u] = dstFilenameTable[u]; + } + qsort(dstFilenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr); + prevElem = dstFilenameTableSorted[0]; + for (u = 1; u < nbFiles; ++u) { + if (strcmp(prevElem, dstFilenameTableSorted[u]) == 0) { + UTIL_DISPLAYLEVEL(1, "WARNING: Two files have same filename as source : %s\n", prevElem); + } + prevElem = dstFilenameTableSorted[u]; + } + + free(dstFilenameTableSorted); return 0; } -void UTIL_createDestinationDirTable(const char** filenameTable, unsigned nbFiles, - const char* outDirName, char** dstFilenameTable) +void UTIL_createDestinationDirTable(char** dstFilenameTable, const char** filenameTable, + const unsigned nbFiles, const char* outDirName, const int compressing) { unsigned u; - char c; - c = '/'; + const char* c; + #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ + c = "\\"; + #else + c = "/"; + #endif - /* duplicate source file table */ for (u = 0; u < nbFiles; ++u) { - const char* filename; + char* filename, *filenameBegin; size_t finalPathLen; finalPathLen = strlen(outDirName); - filename = strrchr(filenameTable[u], c); /* filename is the last bit of string after '/' */ + filenameBegin = strrchr(filenameTable[u], c[0]); + if (filenameBegin == NULL) { + filename = strdup(filenameTable[u]); + } else { + filename = strdup(filenameBegin+1); + } + finalPathLen += strlen(filename); - dstFilenameTable[u] = (char*) malloc((finalPathLen+5) * sizeof(char)); /* extra 1 bit for \0, extra 4 for .zst if compressing*/ + dstFilenameTable[u] = compressing ? + (char*) malloc((finalPathLen+6) * sizeof(char)) /* 4 more bytes for .zst suffix */ + : (char*) malloc((finalPathLen+2) * sizeof(char)); + if (!dstFilenameTable[u]) { + UTIL_DISPLAYLEVEL(1, "Unable to allocate space for file destination str\n"); + continue; + } + strcpy(dstFilenameTable[u], outDirName); - strcat(dstFilenameTable[u], filename); - } -} + if (outDirName[strlen(outDirName)-1] == c[0]) { + strcat(dstFilenameTable[u], filename); + } else { + strcat(dstFilenameTable[u], c); + strcat(dstFilenameTable[u], filename); + } -void UTIL_processMultipleFilenameDestinationDir(char** dstFilenameTable, - const char** filenameTable, unsigned filenameIdx, - const char* outFileName, const char* outDirName) { - int dirResult; - dirResult = UTIL_createDir(outDirName); - if (dirResult) - UTIL_DISPLAYLEVEL(1, "Directory creation unsuccessful\n"); + free(filename); + } - UTIL_createDestinationDirTable(filenameTable, filenameIdx, outDirName, dstFilenameTable); - if (outFileName) { - outFileName = dstFilenameTable[0]; /* in case -O is called with single file */ + if (UTIL_checkFilenameCollisions(dstFilenameTable, nbFiles)) { + UTIL_DISPLAYLEVEL(1, "Checking for filename collisions failed"); } } diff --git a/programs/util.h b/programs/util.h index 9615504c..e90b251d 100644 --- a/programs/util.h +++ b/programs/util.h @@ -127,15 +127,14 @@ int UTIL_fileExist(const char* filename); int UTIL_isRegularFile(const char* infilename); int UTIL_setFileStat(const char* filename, stat_t* statbuf); U32 UTIL_isDirectory(const char* infilename); -int UTIL_createDir(const char* outDirName); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_isSameFile(const char* file1, const char* file2); -void UTIL_createDestinationDirTable(const char** filenameTable, unsigned filenameIdx, - const char* outDirName, char** dstFilenameTable); +int UTIL_compareStr(const void *p1, const void *p2); +int UTIL_checkFilenameCollisions(char** dstFilenameTable, unsigned nbFiles); +/* Populates dstFilenameTable using outDirName concatenated with entries from filenameTable */ +void UTIL_createDestinationDirTable(char** dstFilenameTable, const char** filenameTable, const unsigned nbFiles, + const char* outDirName, const int compressing); void UTIL_freeDestinationFilenameTable(char** dstDirTable, unsigned nbFiles); -void UTIL_processMultipleFilenameDestinationDir(char** dstFilenameTable, - const char** filenameTable, unsigned filenameIdx, - const char* outFileName, const char* outDirName); U32 UTIL_isLink(const char* infilename); #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 5fc6b8aa..6b761ffa 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -118,7 +118,6 @@ static int usage(const char* programName) #endif DISPLAY( " -D file: use `file` as Dictionary \n"); DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n"); - DISPLAY( " -O directory: result(s) stored into `directory`, creates one if non-existent \n"); DISPLAY( " -f : overwrite output without prompting and (de)compress links \n"); DISPLAY( "--rm : remove source file(s) after successful de/compression \n"); DISPLAY( " -k : preserve source file(s) (default) \n"); @@ -137,6 +136,7 @@ static int usage_advanced(const char* programName) DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); DISPLAY( " -l : print information about zstd compressed files \n"); + DISPLAY( " --output-dir-flat directory: results stored into `directory` top level \n"); #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); @@ -690,6 +690,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; } if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; } if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; } + if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; } if (!strcmp(argument, "--adapt")) { adapt = 1; continue; } if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; } if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; } @@ -856,9 +857,6 @@ int main(int argCount, const char* argv[]) /* destination file name */ case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break; - - /* destination directory name */ - case 'O': nextArgumentIsOutDirName=1; lastCommand=1; argument++; break; /* limit decompression memory */ case 'M': @@ -1178,9 +1176,14 @@ int main(int argCount, const char* argv[]) if (adaptMax < cLevel) cLevel = adaptMax; if (outDirName) { - printf("ok\n"); - dstFilenameTable = (char**)malloc(filenameIdx * sizeof(char*)); - UTIL_processMultipleFilenameDestinationDir(dstFilenameTable, filenameTable, filenameIdx, outFileName, outDirName); + if (UTIL_isDirectory(outDirName)) { + DISPLAY("Output of files will be in directory: %s\n", outDirName); + dstFilenameTable = (char**)malloc(filenameIdx * sizeof(char*)); + UTIL_createDestinationDirTable(dstFilenameTable, filenameTable, filenameIdx, outDirName, 1); + } else { + DISPLAY("%s is not a directory!\n", outDirName); + CLEAN_RETURN(1); + } } else { dstFilenameTable = NULL; } @@ -1205,8 +1208,14 @@ int main(int argCount, const char* argv[]) FIO_setMemLimit(prefs, memLimit); if (outDirName) { - dstFilenameTable = (char**)malloc(filenameIdx * sizeof(char*)); - UTIL_processMultipleFilenameDestinationDir(dstFilenameTable, filenameTable, filenameIdx, outFileName, outDirName); + if (UTIL_isDirectory(outDirName)) { + DISPLAY("Output of files will be in directory: %s\n", outDirName); + dstFilenameTable = (char**)malloc(filenameIdx * sizeof(char*)); + UTIL_createDestinationDirTable(dstFilenameTable, filenameTable, filenameIdx, outDirName, 1); + } else { + DISPLAY("%s is not a directory!\n", outDirName); + CLEAN_RETURN(1); + } } else { dstFilenameTable = NULL; } diff --git a/tests/playTests.sh b/tests/playTests.sh index 19fc514f..8851da2f 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -264,6 +264,24 @@ if [ "$?" -eq 139 ]; then fi rm tmp* +println "test : compress multiple files into an output directory, --output-dir-flat" +println henlo > tmp1 +mkdir tmpInputTestDir +mkdir tmpInputTestDir/we +mkdir tmpInputTestDir/we/must +mkdir tmpInputTestDir/we/must/go +mkdir tmpInputTestDir/we/must/go/deeper +println cool > tmpInputTestDir/we/must/go/deeper/tmp2 +mkdir tmpOutDir +$ZSTD tmp1 tmpInputTestDir/we/must/go/deeper/tmp2 --output-dir-flat tmpOutDir +test -f tmpOutDir/tmp1.zst +test -f tmpOutDir/tmp2.zst +println "test : decompress multiple files into an output directory, --output-dir-flat" +mkdir tmpOutDirDecomp +$ZSTD tmpOutDir/ -r -d --output-dir-flat tmpOutDirDecomp +test -f tmpOutDirDecomp/tmp2 +test -f tmpOutDirDecomp/tmp1 +rm -rf tmp* println "\n===> Advanced compression parameters " println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!" @@ -407,7 +425,6 @@ ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3)) println "compress multiple files including a missing one (notHere) : " $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" - println "\n===> stream-size mode" ./datagen -g11000 > tmp