Add support for --output-dir-flat

New flag to specify output directory destination for multiple files.
This commit is contained in:
Sen Huang 2019-10-02 11:08:20 -04:00
parent 62616c4d90
commit f80437c586
6 changed files with 103 additions and 50 deletions

0
build/LICENSE Normal file
View File

View File

@ -1453,7 +1453,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, const char** inFileN
} }
FIO_freeCResources(ress);
if (outDirName)
if (dstFileNamesTable)
UTIL_freeDestinationFilenameTable(dstFileNamesTable, nbFiles);
return error;

View File

@ -90,53 +90,81 @@ U32 UTIL_isDirectory(const char* infilename)
return 0;
}
int UTIL_createDir(const char* outDirName)
{
int r;
if (UTIL_isDirectory(outDirName))
return 0; /* no need to create if directory already exists */
int UTIL_compareStr(const void *p1, const void *p2) {
return strcmp(* (char * const *) p1, * (char * const *) p2);
}
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__)
r = _mkdir(outDirName);
if (r || !UTIL_isDirectory(outDirName)) return 1;
#else
r = mkdir(outDirName, S_IRWXU | S_IRWXG | S_IRWXO); /* dir has all permissions */
if (r || !UTIL_isDirectory(outDirName)) return 1;
#endif
int UTIL_checkFilenameCollisions(char** dstFilenameTable, unsigned nbFiles) {
char** dstFilenameTableSorted;
char* prevElem;
unsigned u;
dstFilenameTableSorted = (char**) malloc(sizeof(char*) * nbFiles);
if (!dstFilenameTableSorted) {
UTIL_DISPLAYLEVEL(1, "Unable to malloc new str array, not checking for name collisions\n");
return 1;
}
for (u = 0; u < nbFiles; ++u) {
dstFilenameTableSorted[u] = dstFilenameTable[u];
}
qsort(dstFilenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
prevElem = dstFilenameTableSorted[0];
for (u = 1; u < nbFiles; ++u) {
if (strcmp(prevElem, dstFilenameTableSorted[u]) == 0) {
UTIL_DISPLAYLEVEL(1, "WARNING: Two files have same filename as source : %s\n", prevElem);
}
prevElem = dstFilenameTableSorted[u];
}
free(dstFilenameTableSorted);
return 0;
}
void UTIL_createDestinationDirTable(const char** filenameTable, unsigned nbFiles,
const char* outDirName, char** dstFilenameTable)
void UTIL_createDestinationDirTable(char** dstFilenameTable, const char** filenameTable,
const unsigned nbFiles, const char* outDirName, const int compressing)
{
unsigned u;
char c;
c = '/';
const char* c;
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
c = "\\";
#else
c = "/";
#endif
/* duplicate source file table */
for (u = 0; u < nbFiles; ++u) {
const char* filename;
char* filename, *filenameBegin;
size_t finalPathLen;
finalPathLen = strlen(outDirName);
filename = strrchr(filenameTable[u], c); /* filename is the last bit of string after '/' */
filenameBegin = strrchr(filenameTable[u], c[0]);
if (filenameBegin == NULL) {
filename = strdup(filenameTable[u]);
} else {
filename = strdup(filenameBegin+1);
}
finalPathLen += strlen(filename);
dstFilenameTable[u] = (char*) malloc((finalPathLen+5) * sizeof(char)); /* extra 1 bit for \0, extra 4 for .zst if compressing*/
dstFilenameTable[u] = compressing ?
(char*) malloc((finalPathLen+6) * sizeof(char)) /* 4 more bytes for .zst suffix */
: (char*) malloc((finalPathLen+2) * sizeof(char));
if (!dstFilenameTable[u]) {
UTIL_DISPLAYLEVEL(1, "Unable to allocate space for file destination str\n");
continue;
}
strcpy(dstFilenameTable[u], outDirName);
strcat(dstFilenameTable[u], filename);
}
}
if (outDirName[strlen(outDirName)-1] == c[0]) {
strcat(dstFilenameTable[u], filename);
} else {
strcat(dstFilenameTable[u], c);
strcat(dstFilenameTable[u], filename);
}
void UTIL_processMultipleFilenameDestinationDir(char** dstFilenameTable,
const char** filenameTable, unsigned filenameIdx,
const char* outFileName, const char* outDirName) {
int dirResult;
dirResult = UTIL_createDir(outDirName);
if (dirResult)
UTIL_DISPLAYLEVEL(1, "Directory creation unsuccessful\n");
free(filename);
}
UTIL_createDestinationDirTable(filenameTable, filenameIdx, outDirName, dstFilenameTable);
if (outFileName) {
outFileName = dstFilenameTable[0]; /* in case -O is called with single file */
if (UTIL_checkFilenameCollisions(dstFilenameTable, nbFiles)) {
UTIL_DISPLAYLEVEL(1, "Checking for filename collisions failed");
}
}

View File

@ -127,15 +127,14 @@ int UTIL_fileExist(const char* filename);
int UTIL_isRegularFile(const char* infilename);
int UTIL_setFileStat(const char* filename, stat_t* statbuf);
U32 UTIL_isDirectory(const char* infilename);
int UTIL_createDir(const char* outDirName);
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
int UTIL_isSameFile(const char* file1, const char* file2);
void UTIL_createDestinationDirTable(const char** filenameTable, unsigned filenameIdx,
const char* outDirName, char** dstFilenameTable);
int UTIL_compareStr(const void *p1, const void *p2);
int UTIL_checkFilenameCollisions(char** dstFilenameTable, unsigned nbFiles);
/* Populates dstFilenameTable using outDirName concatenated with entries from filenameTable */
void UTIL_createDestinationDirTable(char** dstFilenameTable, const char** filenameTable, const unsigned nbFiles,
const char* outDirName, const int compressing);
void UTIL_freeDestinationFilenameTable(char** dstDirTable, unsigned nbFiles);
void UTIL_processMultipleFilenameDestinationDir(char** dstFilenameTable,
const char** filenameTable, unsigned filenameIdx,
const char* outFileName, const char* outDirName);
U32 UTIL_isLink(const char* infilename);
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))

View File

@ -118,7 +118,6 @@ static int usage(const char* programName)
#endif
DISPLAY( " -D file: use `file` as Dictionary \n");
DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n");
DISPLAY( " -O directory: result(s) stored into `directory`, creates one if non-existent \n");
DISPLAY( " -f : overwrite output without prompting and (de)compress links \n");
DISPLAY( "--rm : remove source file(s) after successful de/compression \n");
DISPLAY( " -k : preserve source file(s) (default) \n");
@ -137,6 +136,7 @@ static int usage_advanced(const char* programName)
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
DISPLAY( " -c : force write to standard output, even if it is the console\n");
DISPLAY( " -l : print information about zstd compressed files \n");
DISPLAY( " --output-dir-flat directory: results stored into `directory` top level \n");
#ifndef ZSTD_NOCOMPRESS
DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
@ -690,6 +690,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; }
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
@ -856,9 +857,6 @@ int main(int argCount, const char* argv[])
/* destination file name */
case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
/* destination directory name */
case 'O': nextArgumentIsOutDirName=1; lastCommand=1; argument++; break;
/* limit decompression memory */
case 'M':
@ -1178,9 +1176,14 @@ int main(int argCount, const char* argv[])
if (adaptMax < cLevel) cLevel = adaptMax;
if (outDirName) {
printf("ok\n");
dstFilenameTable = (char**)malloc(filenameIdx * sizeof(char*));
UTIL_processMultipleFilenameDestinationDir(dstFilenameTable, filenameTable, filenameIdx, outFileName, outDirName);
if (UTIL_isDirectory(outDirName)) {
DISPLAY("Output of files will be in directory: %s\n", outDirName);
dstFilenameTable = (char**)malloc(filenameIdx * sizeof(char*));
UTIL_createDestinationDirTable(dstFilenameTable, filenameTable, filenameIdx, outDirName, 1);
} else {
DISPLAY("%s is not a directory!\n", outDirName);
CLEAN_RETURN(1);
}
} else {
dstFilenameTable = NULL;
}
@ -1205,8 +1208,14 @@ int main(int argCount, const char* argv[])
FIO_setMemLimit(prefs, memLimit);
if (outDirName) {
dstFilenameTable = (char**)malloc(filenameIdx * sizeof(char*));
UTIL_processMultipleFilenameDestinationDir(dstFilenameTable, filenameTable, filenameIdx, outFileName, outDirName);
if (UTIL_isDirectory(outDirName)) {
DISPLAY("Output of files will be in directory: %s\n", outDirName);
dstFilenameTable = (char**)malloc(filenameIdx * sizeof(char*));
UTIL_createDestinationDirTable(dstFilenameTable, filenameTable, filenameIdx, outDirName, 1);
} else {
DISPLAY("%s is not a directory!\n", outDirName);
CLEAN_RETURN(1);
}
} else {
dstFilenameTable = NULL;
}

View File

@ -264,6 +264,24 @@ if [ "$?" -eq 139 ]; then
fi
rm tmp*
println "test : compress multiple files into an output directory, --output-dir-flat"
println henlo > tmp1
mkdir tmpInputTestDir
mkdir tmpInputTestDir/we
mkdir tmpInputTestDir/we/must
mkdir tmpInputTestDir/we/must/go
mkdir tmpInputTestDir/we/must/go/deeper
println cool > tmpInputTestDir/we/must/go/deeper/tmp2
mkdir tmpOutDir
$ZSTD tmp1 tmpInputTestDir/we/must/go/deeper/tmp2 --output-dir-flat tmpOutDir
test -f tmpOutDir/tmp1.zst
test -f tmpOutDir/tmp2.zst
println "test : decompress multiple files into an output directory, --output-dir-flat"
mkdir tmpOutDirDecomp
$ZSTD tmpOutDir/ -r -d --output-dir-flat tmpOutDirDecomp
test -f tmpOutDirDecomp/tmp2
test -f tmpOutDirDecomp/tmp1
rm -rf tmp*
println "\n===> Advanced compression parameters "
println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!"
@ -407,7 +425,6 @@ ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
println "compress multiple files including a missing one (notHere) : "
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
println "\n===> stream-size mode"
./datagen -g11000 > tmp