Merge pull request #1812 from senhuang42/output-dir-flat

Support for --output-dir-flat: output into a directory
This commit is contained in:
Yann Collet 2019-10-14 09:25:05 -07:00 committed by GitHub
commit b3e92ad346
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 202 additions and 44 deletions

0
build/LICENSE Normal file
View File

View File

@ -628,6 +628,102 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName)
return (size_t)fileSize;
}
/* FIO_checkFilenameCollisions() :
* Checks for and warns if there are any files that would have the same output path
*/
int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
const char **filenameTableSorted, *c, *prevElem, *filename;
unsigned u;
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
c = "\\";
#else
c = "/";
#endif
filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
if (!filenameTableSorted) {
DISPLAY("Unable to malloc new str array, not checking for name collisions\n");
return 1;
}
for (u = 0; u < nbFiles; ++u) {
filename = strrchr(filenameTable[u], c[0]);
if (filename == NULL) {
filenameTableSorted[u] = filenameTable[u];
} else {
filenameTableSorted[u] = filename+1;
}
}
qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
prevElem = filenameTableSorted[0];
for (u = 1; u < nbFiles; ++u) {
if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
DISPLAY("WARNING: Two files have same filename: %s\n", prevElem);
}
prevElem = filenameTableSorted[u];
}
free((void*)filenameTableSorted);
return 0;
}
/* FIO_createFilename_fromOutDir() :
* Takes a source file name and specified output directory, and
* allocates memory for and returns a pointer to final path.
* This function never returns an error (it may abort() in case of pb)
*/
static char*
FIO_createFilename_fromOutDir(const char* srcFilename, const char* outDirName, const size_t suffixLen)
{
const char* c, *filenameBegin;
char* filename, *result;
size_t finalPathLen;
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
c = "\\";
#else
c = "/";
#endif
finalPathLen = strlen(outDirName);
filenameBegin = strrchr(srcFilename, c[0]);
if (filenameBegin == NULL) {
filename = (char*) malloc((strlen(srcFilename)+1) * sizeof(char));
if (!filename) {
EXM_THROW(30, "zstd: %s", strerror(errno));
}
strcpy(filename, srcFilename);
} else {
filename = (char*) malloc((strlen(filenameBegin+1)+1) * sizeof(char));
if (!filename) {
EXM_THROW(30, "zstd: %s", strerror(errno));
}
strcpy(filename, filenameBegin+1);
}
finalPathLen += strlen(filename);
result = (char*) malloc((finalPathLen+suffixLen+30) * sizeof(char));
if (!result) {
free(filename);
EXM_THROW(30, "zstd: %s", strerror(errno));
}
strcpy(result, outDirName);
if (outDirName[strlen(outDirName)-1] == c[0]) {
strcat(result, filename);
} else {
strcat(result, c);
strcat(result, filename);
}
free(filename);
return result;
}
#ifndef ZSTD_NOCOMPRESS
/* **********************************************************************
@ -1276,9 +1372,7 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs,
int result;
stat_t statbuf;
int transfer_permissions = 0;
assert(ress.srcFile != NULL);
if (ress.dstFile == NULL) {
closeDstFile = 1;
DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName);
@ -1369,11 +1463,9 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs,
return result;
}
int FIO_compressFilename(FIO_prefs_t* const prefs,
const char* dstFileName, const char* srcFileName,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams)
int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dstFileName,
const char* srcFileName, const char* dictFileName,
int compressionLevel, ZSTD_compressionParameters comprParams)
{
cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
@ -1383,21 +1475,25 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
return result;
}
/* FIO_determineCompressedName() :
* create a destination filename for compressed srcFileName.
* @return a pointer to it.
* This function never returns an error (it may abort() in case of pb)
*/
static const char*
FIO_determineCompressedName(const char* srcFileName, const char* suffix)
FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
{
static size_t dfnbCapacity = 0;
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
size_t const sfnSize = strlen(srcFileName);
char* outDirFilename = NULL;
size_t sfnSize = strlen(srcFileName);
size_t const suffixSize = strlen(suffix);
if (outDirName) {
outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, suffixSize);
sfnSize = strlen(outDirFilename);
assert(outDirFilename != NULL);
}
if (dfnbCapacity <= sfnSize+suffixSize+1) {
/* resize buffer for dstName */
free(dstFileNameBuffer);
@ -1405,23 +1501,30 @@ FIO_determineCompressedName(const char* srcFileName, const char* suffix)
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (!dstFileNameBuffer) {
EXM_THROW(30, "zstd: %s", strerror(errno));
} }
}
}
assert(dstFileNameBuffer != NULL);
memcpy(dstFileNameBuffer, srcFileName, sfnSize);
memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);
if (outDirFilename) {
memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
free(outDirFilename);
} else {
memcpy(dstFileNameBuffer, srcFileName, sfnSize);
}
memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */);
return dstFileNameBuffer;
}
/* FIO_compressMultipleFilenames() :
* compress nbFiles files
* into one destination (outFileName)
* or into one file each (outFileName == NULL, but suffix != NULL).
* into either one destination (outFileName),
* or into one file each (outFileName == NULL, but suffix != NULL),
* or into a destination folder (specified with -O)
*/
int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
const char** inFileNamesTable, unsigned nbFiles,
const char* outFileName, const char* suffix,
int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, const char** inFileNamesTable,
const char* outDirName, unsigned nbFiles,
const char* outFileName, const char* suffix,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams)
{
@ -1430,7 +1533,6 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
/* init */
assert(outFileName != NULL || suffix != NULL);
if (outFileName != NULL) { /* output into a single destination (stdout typically) */
ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName);
if (ress.dstFile == NULL) { /* could not open outFileName */
@ -1448,9 +1550,12 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
unsigned u;
for (u=0; u<nbFiles; u++) {
const char* const srcFileName = inFileNamesTable[u];
const char* const dstFileName = FIO_determineCompressedName(srcFileName, suffix); /* cannot fail */
const char* const dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */
error |= FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
} }
}
if (outDirName)
FIO_checkFilenameCollisions(inFileNamesTable ,nbFiles);
}
FIO_freeCResources(ress);
return error;
@ -2166,13 +2271,14 @@ int FIO_decompressFilename(FIO_prefs_t* const prefs,
* @return a pointer to it.
* @return == NULL if there is an error */
static const char*
FIO_determineDstName(const char* srcFileName)
FIO_determineDstName(const char* srcFileName, const char* outDirName)
{
static size_t dfnbCapacity = 0;
static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
size_t const sfnSize = strlen(srcFileName);
char* outDirFilename = NULL;
size_t sfnSize = strlen(srcFileName);
size_t suffixSize;
const char* const suffixPtr = strrchr(srcFileName, '.');
if (suffixPtr == NULL) {
DISPLAYLEVEL(1, "zstd: %s: unknown suffix -- ignored \n",
@ -2210,19 +2316,29 @@ FIO_determineDstName(const char* srcFileName)
srcFileName, suffixlist);
return NULL;
}
if (outDirName) {
outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
sfnSize = strlen(outDirFilename);
assert(outDirFilename != NULL);
}
/* allocate enough space to write dstFilename into it */
if (dfnbCapacity+suffixSize <= sfnSize+1) {
/* allocate enough space to write dstFilename into it */
free(dstFileNameBuffer);
dfnbCapacity = sfnSize + 20;
dstFileNameBuffer = (char*)malloc(dfnbCapacity);
if (dstFileNameBuffer==NULL)
EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno));
EXM_THROW(74, "%s : not enough memory for dstFileName", strerror(errno));
}
/* return dst name == src name truncated from suffix */
assert(dstFileNameBuffer != NULL);
memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
if (outDirFilename) {
memcpy(dstFileNameBuffer, outDirFilename, sfnSize - suffixSize);
free(outDirFilename);
} else {
memcpy(dstFileNameBuffer, srcFileName, sfnSize - suffixSize);
}
dstFileNameBuffer[sfnSize-suffixSize] = '\0';
return dstFileNameBuffer;
@ -2232,8 +2348,8 @@ FIO_determineDstName(const char* srcFileName)
int
FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
const char* srcNamesTable[], unsigned nbFiles,
const char* outFileName,
const char** srcNamesTable, unsigned nbFiles,
const char* outDirName, const char* outFileName,
const char* dictFileName)
{
int error = 0;
@ -2252,19 +2368,19 @@ FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
unsigned u;
for (u=0; u<nbFiles; u++) { /* create dstFileName */
const char* const srcFileName = srcNamesTable[u];
const char* const dstFileName = FIO_determineDstName(srcFileName);
const char* const dstFileName = FIO_determineDstName(srcFileName, outDirName);
if (dstFileName == NULL) { error=1; continue; }
error |= FIO_decompressSrcFile(prefs, ress, dstFileName, srcFileName);
}
if (outDirName)
FIO_checkFilenameCollisions(srcNamesTable ,nbFiles);
}
FIO_freeDResources(ress);
return error;
}
/* **************************************************************************
* .zst file info (--list command)
***************************************************************************/

View File

@ -87,8 +87,9 @@ void FIO_setNotificationLevel(int level);
/** FIO_compressFilename() :
@return : 0 == ok; 1 == pb with src file. */
int FIO_compressFilename (FIO_prefs_t* const prefs,
const char* outfilename, const char* infilename, const char* dictFileName,
int compressionLevel, ZSTD_compressionParameters comprParams);
const char* outfilename, const char* infilename,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams);
/** FIO_decompressFilename() :
@return : 0 == ok; 1 == pb with src file. */
@ -103,19 +104,24 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis
***************************************/
/** FIO_compressMultipleFilenames() :
@return : nb of missing files */
int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
const char** srcNamesTable, unsigned nbFiles,
const char* outFileName, const char* suffix,
const char* dictFileName, int compressionLevel,
int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, const char** inFileNamesTable,
const char* outDirName, unsigned nbFiles,
const char* outFileName, const char* suffix,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams);
/** FIO_decompressMultipleFilenames() :
@return : nb of missing or skipped files */
int FIO_decompressMultipleFilenames(FIO_prefs_t* const prefs,
const char** srcNamesTable, unsigned nbFiles,
const char* outDirName,
const char* outFileName,
const char* dictFileName);
/* FIO_checkFilenameCollisions() :
* Checks for and warns if thereå are any files that would have the same output path
*/
int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles);
/*-*************************************
* Advanced stuff (should actually be hosted elsewhere)

View File

@ -20,6 +20,9 @@ extern "C" {
#include <errno.h>
#include <assert.h>
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__)
#include <direct.h> /* needed for _mkdir in windows */
#endif
int UTIL_fileExist(const char* filename)
{
@ -98,6 +101,10 @@ U32 UTIL_isDirectory(const char* infilename)
return 0;
}
int UTIL_compareStr(const void *p1, const void *p2) {
return strcmp(* (char * const *) p1, * (char * const *) p2);
}
int UTIL_isSameFile(const char* file1, const char* file2)
{
#if defined(_MSC_VER)

View File

@ -134,6 +134,7 @@ int UTIL_setFileStat(const char* filename, stat_t* statbuf);
U32 UTIL_isDirectory(const char* infilename);
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
int UTIL_isSameFile(const char* file1, const char* file2);
int UTIL_compareStr(const void *p1, const void *p2);
U32 UTIL_isLink(const char* infilename);
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))

View File

@ -136,6 +136,7 @@ static int usage_advanced(const char* programName)
DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n");
DISPLAY( " -c : force write to standard output, even if it is the console\n");
DISPLAY( " -l : print information about zstd compressed files \n");
DISPLAY( " --output-dir-flat directory: results stored into `directory`. Filename collisions mean first file will be compressed. With -f, the last file will be compressed.\n");
#ifndef ZSTD_NOCOMPRESS
DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
@ -562,6 +563,7 @@ int main(int argCount, const char* argv[])
adaptMax = MAXCLEVEL,
rsyncable = 0,
nextArgumentIsOutFileName = 0,
nextArgumentIsOutDirName = 0,
nextArgumentIsMaxDict = 0,
nextArgumentIsDictID = 0,
nextArgumentsAreFiles = 0,
@ -586,6 +588,7 @@ int main(int argCount, const char* argv[])
unsigned filenameIdx = 0;
const char* programName = argv[0];
const char* outFileName = NULL;
const char* outDirName = NULL;
const char* dictFileName = NULL;
const char* suffix = ZSTD_EXTENSION;
unsigned maxDictSize = g_defaultMaxDictSize;
@ -686,6 +689,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; }
if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
@ -852,7 +856,7 @@ int main(int argCount, const char* argv[])
/* destination file name */
case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break;
/* limit decompression memory */
case 'M':
argument++;
@ -965,6 +969,13 @@ int main(int argCount, const char* argv[])
continue;
}
if (nextArgumentIsOutDirName) {
nextArgumentIsOutDirName = 0;
lastCommand = 0;
outDirName = argument;
continue;
}
/* add filename to list */
filenameTable[filenameIdx++] = argument;
}
@ -1166,7 +1177,7 @@ int main(int argCount, const char* argv[])
if ((filenameIdx==1) && outFileName)
operationResult = FIO_compressFilename(prefs, outFileName, filenameTable[0], dictFileName, cLevel, compressionParams);
else
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, outDirName, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
#else
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
DISPLAY("Compression not supported \n");
@ -1184,7 +1195,7 @@ int main(int argCount, const char* argv[])
if (filenameIdx==1 && outFileName)
operationResult = FIO_decompressFilename(prefs, outFileName, filenameTable[0], dictFileName);
else
operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, dictFileName);
operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outDirName, outFileName, dictFileName);
#else
DISPLAY("Decompression not supported \n");
#endif

View File

@ -269,6 +269,24 @@ if [ "$?" -eq 139 ]; then
fi
rm tmp*
println "test : compress multiple files into an output directory, --output-dir-flat"
println henlo > tmp1
mkdir tmpInputTestDir
mkdir tmpInputTestDir/we
mkdir tmpInputTestDir/we/must
mkdir tmpInputTestDir/we/must/go
mkdir tmpInputTestDir/we/must/go/deeper
println cool > tmpInputTestDir/we/must/go/deeper/tmp2
mkdir tmpOutDir
$ZSTD tmp1 tmpInputTestDir/we/must/go/deeper/tmp2 --output-dir-flat tmpOutDir
test -f tmpOutDir/tmp1.zst
test -f tmpOutDir/tmp2.zst
println "test : decompress multiple files into an output directory, --output-dir-flat"
mkdir tmpOutDirDecomp
$ZSTD tmpOutDir/ -r -d --output-dir-flat tmpOutDirDecomp
test -f tmpOutDirDecomp/tmp2
test -f tmpOutDirDecomp/tmp1
rm -rf tmp*
println "\n===> Advanced compression parameters "
println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!"
@ -412,7 +430,6 @@ ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3))
println "compress multiple files including a missing one (notHere) : "
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
println "\n===> stream-size mode"
./datagen -g11000 > tmp