Merge pull request #1844 from AhmedAbdellah19/adding_read_files_from_file_feature

Adding --file=FILE feature
This commit is contained in:
Yann Collet 2019-10-25 10:11:47 -07:00 committed by GitHub
commit 14e9010bb5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 369 additions and 2 deletions

View File

@ -187,6 +187,228 @@ U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbF
return error ? UTIL_FILESIZE_UNKNOWN : total;
}
int UTIL_readLineFromFile(char* buf, size_t len, FILE* file) {
char* fgetsCheck = NULL;
if (feof(file)) {
UTIL_DISPLAYLEVEL(1, "[ERROR] end of file reached and need to read\n");
return -1;
}
fgetsCheck = fgets(buf, (int) len, file);
if(fgetsCheck == NULL || fgetsCheck != buf) {
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readLineFromFile] fgets has a problem check: %s buf: %s \n",
fgetsCheck == NULL ? "NULL" : fgetsCheck, buf);
return -1;
}
return (int) strlen(buf)-1; /* -1 to ignore '\n' character */
}
/* Warning: inputFileSize should be less than or equal buf capacity and buf should be initialized*/
static int readFromFile(char* buf, size_t inputFileSize, const char* inputFileName) {
FILE* inputFile = fopen(inputFileName, "r");
int nbFiles = -1;
unsigned pos = 0;
if(!buf) {
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't create buffer.\n");
return -1;
}
if(!inputFile) {
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't open file to read input file names.\n");
return -1;
}
for(nbFiles=0; !feof(inputFile) ; ) {
if(UTIL_readLineFromFile(buf+pos, inputFileSize, inputFile) > 0) {
int len = (int) strlen(buf+pos);
buf[pos+len-1] = '\0'; /* replace '\n' with '\0'*/
pos += len;
++nbFiles;
}
}
fclose(inputFile);
if(pos > inputFileSize) return -1;
return nbFiles;
}
/*Note: buf is not freed in case function successfully created table because filesTable->fileNames[0] = buf*/
FileNamesTable*
UTIL_createFileNamesTable_fromFileName(const char* inputFileName) {
U64 inputFileSize = 0;
unsigned nbFiles = 0;
int ret_nbFiles = -1;
char* buf = NULL;
size_t i = 0, pos = 0;
FileNamesTable* filesTable = NULL;
if(!UTIL_fileExist(inputFileName) || !UTIL_isRegularFile(inputFileName))
return NULL;
inputFileSize = UTIL_getFileSize(inputFileName) + 1; /* (+1) to add '\0' at the end of last filename */
if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE)
return NULL;
buf = (char*) malloc((size_t) inputFileSize * sizeof(char));
if(!buf) {
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't create buffer.\n");
return NULL;
}
ret_nbFiles = readFromFile(buf, (size_t) inputFileSize, inputFileName);
if(ret_nbFiles <= 0) {
free(buf);
return NULL;
}
nbFiles = ret_nbFiles;
filesTable = UTIL_createFileNamesTable(NULL, NULL, 0);
if(!filesTable) {
free(buf);
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't create table for files.\n");
return NULL;
}
filesTable->tableSize = nbFiles;
filesTable->fileNames = (const char**) malloc((nbFiles+1) * sizeof(char*));
for(i = 0, pos = 0; i < nbFiles; ++i) {
filesTable->fileNames[i] = buf+pos;
pos += strlen(buf+pos)+1;
}
if(pos > inputFileSize){
UTIL_freeFileNamesTable(filesTable);
if(buf) free(buf);
return NULL;
}
filesTable->buf = buf;
return filesTable;
}
FileNamesTable*
UTIL_createFileNamesTable(const char** filenames, char* buf, size_t tableSize){
FileNamesTable* table = (FileNamesTable*) malloc(sizeof(FileNamesTable));
if(!table) {
return NULL;
}
table->fileNames = filenames;
table->buf = buf;
table->tableSize = tableSize;
return table;
}
void UTIL_freeFileNamesTable(FileNamesTable* table) {
if(table) {
if(table->fileNames) {
free((void*)table->fileNames);
}
if(table && table->buf) {
free(table->buf);
}
free(table);
}
}
static size_t getTotalTableSize(FileNamesTable* table) {
size_t i = 0, totalSize = 0;
for(i = 0 ; i < table->tableSize && table->fileNames[i] ; ++i) {
totalSize += strlen(table->fileNames[i]) + 1; /* +1 to add '\0' at the end of each fileName */
}
return totalSize;
}
FileNamesTable*
UTIL_concatenateTwoTables(FileNamesTable* table1, FileNamesTable* table2) {
unsigned newTableIdx = 0, idx1 = 0, idx2 = 0;
size_t i = 0, pos = 0;
size_t newTotalTableSize = 0;
FileNamesTable* newTable = NULL;
char* buf = NULL;
newTable = UTIL_createFileNamesTable(NULL, NULL, 0);
if(!newTable) {
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_concatenateTwoTables] Can't create new table for concatenation output.\n");
return NULL;
}
newTotalTableSize = getTotalTableSize(table1) + getTotalTableSize(table2);
buf = (char*) malloc(newTotalTableSize * sizeof(char));
if(!buf) {
UTIL_freeFileNamesTable(newTable);
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_concatenateTwoTables] Can't create buf for concatenation output.\n");
return NULL;
}
for(i = 0; i < newTotalTableSize ; ++i) buf[i] = '\0';
newTable->tableSize = table1->tableSize + table2->tableSize;
newTable->fileNames = (const char **) malloc(newTable->tableSize * sizeof(char*));
if(!newTable->fileNames) {
UTIL_freeFileNamesTable(newTable);
if(buf) free(buf);
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_concatenateTwoTables] Can't create new table for concatenation output.\n");
return NULL;
}
for (i = 0; i < newTable->tableSize; ++i)
newTable->fileNames[i] = NULL;
for( ; idx1 < table1->tableSize && table1->fileNames[idx1] && pos < newTotalTableSize; ++idx1, ++newTableIdx) {
size_t curLen = strlen(table1->fileNames[idx1]);
memcpy(buf+pos, table1->fileNames[idx1], curLen);
newTable->fileNames[newTableIdx] = buf+pos;
pos += curLen+1;
}
for( ; idx2 < table2->tableSize && table2->fileNames[idx2] && pos < newTotalTableSize ; ++idx2, ++newTableIdx) {
size_t curLen = strlen(table2->fileNames[idx2]);
memcpy(buf+pos, table2->fileNames[idx2], curLen);
newTable->fileNames[newTableIdx] = buf+pos;
pos += curLen+1;
}
if(pos > newTotalTableSize) {
UTIL_freeFileNamesTable(newTable);
if(buf) free(buf);
return NULL;
}
newTable->buf = buf;
UTIL_freeFileNamesTable(table1);
UTIL_freeFileNamesTable(table2);
return newTable;
}
#ifdef _WIN32
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{

View File

@ -90,7 +90,7 @@ extern "C" {
* Constants
***************************************/
#define LIST_SIZE_INCREASE (8*1024)
#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50
/*-****************************************
* Compiler specifics
@ -142,6 +142,50 @@ U32 UTIL_isLink(const char* infilename);
U64 UTIL_getFileSize(const char* infilename);
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles);
/*! UTIL_readLineFromFile(char* buf, size_t len, File* file):
* @return : int. size next line in file or -1 in case of file ends
* function reads next line in the file
* Will also modify `*file`, advancing it to position where it stopped reading.
*/
int UTIL_readLineFromFile(char* buf, size_t len, FILE* file);
/*Note: tableSize is denotes the total capacity of table*/
typedef struct
{
const char** fileNames;
char* buf;
size_t tableSize;
} FileNamesTable;
/*! UTIL_readFileNamesTableFromFile(const char* inputFileName) :
* @return : char** the fileNamesTable or NULL in case of not regular file or file doesn't exist.
* reads fileNamesTable from input fileName.
* Note: inputFileSize should be less than or equal 50MB
*/
FileNamesTable* UTIL_createFileNamesTable_fromFileName(const char* inputFileName);
/*! UTIL_freeFileNamesTable(const char** filenames, char* buf, size_t tableSize) :
* This function takes an buffered based filename, buf and tableSize to create its object.
* @return : FileNamesTable*
*/
FileNamesTable*
UTIL_createFileNamesTable(const char** filenames, char* buf, size_t tableSize);
/*! UTIL_freeFileNamesTable(FileNamesTable* table) :
* This function takes an buffered based table and frees it.
* @return : void.
*/
void UTIL_freeFileNamesTable(FileNamesTable* table);
/*! UTIL_concatenateTwoTables(FileNamesTable* table1,FileNamesTable* table2):
* takes table1, its maxSize, table2 and its maxSize, free them and returns its concatenation.
* @return : FileNamesTable* concatenation of two tables
* note table1 and table2 will be freed
*/
FileNamesTable* UTIL_concatenateTwoTables(FileNamesTable* table1, FileNamesTable* table2);
/*
* A modified version of realloc().

View File

@ -567,6 +567,7 @@ int main(int argCount, const char* argv[])
nextArgumentIsMaxDict = 0,
nextArgumentIsDictID = 0,
nextArgumentsAreFiles = 0,
isTableBufferBased = 0,
nextEntryIsDictionary = 0,
operationResult = 0,
separateFiles = 0,
@ -584,7 +585,12 @@ int main(int argCount, const char* argv[])
int cLevelLast = -1000000000;
unsigned recursive = 0;
unsigned memLimit = 0;
const char** filenameTable = (const char**)malloc((size_t)argCount * sizeof(const char*)); /* argCount >= 1 */
size_t filenameTableSize = argCount;
const char** filenameTable = (const char**)malloc(filenameTableSize * sizeof(const char*)); /* argCount >= 1 */
FileNamesTable* extendedTable = NULL;
FileNamesTable* concatenatedTables = NULL;
FileNamesTable* curTable = NULL;
char* tableBuf = NULL;
unsigned filenameIdx = 0;
const char* programName = argv[0];
const char* outFileName = NULL;
@ -796,6 +802,45 @@ int main(int argCount, const char* argv[])
continue;
}
#endif
if (longCommandWArg(&argument, "--file=")) {
if(!UTIL_fileExist(argument) || !UTIL_isRegularFile(argument)){
DISPLAYLEVEL(1, "[ERROR] wrong fileName: %s\n", argument);
CLEAN_RETURN(badusage(programName));
}
extendedTable = UTIL_createFileNamesTable_fromFileName(argument);
if(!extendedTable) {
CLEAN_RETURN(badusage(programName));
}
filenameTable[filenameIdx] = NULL; // marking end of table
curTable = UTIL_createFileNamesTable(filenameTable, tableBuf, filenameTableSize);
if(!curTable) {
UTIL_freeFileNamesTable(extendedTable);
CLEAN_RETURN(badusage(programName));
}
concatenatedTables = UTIL_concatenateTwoTables(curTable, extendedTable);
if(!concatenatedTables) {
UTIL_freeFileNamesTable(curTable);
UTIL_freeFileNamesTable(extendedTable);
CLEAN_RETURN(badusage(programName));
}
filenameTable = concatenatedTables->fileNames;
filenameTableSize = concatenatedTables->tableSize;
tableBuf = concatenatedTables->buf;
filenameIdx += (unsigned) extendedTable->tableSize;
isTableBufferBased = 1;
continue;
}
/* fall-through, will trigger bad_usage() later on */
}
@ -1205,6 +1250,15 @@ int main(int argCount, const char* argv[])
_end:
FIO_freePreferences(prefs);
if(filenameTable) {
if(isTableBufferBased && tableBuf){
free(tableBuf);
}
}
UTIL_freeFileNamesTable(curTable);
UTIL_freeFileNamesTable(extendedTable);
UTIL_freeFileNamesTable(concatenatedTables);
if (main_pause) waitEnter();
#ifdef UTIL_HAS_CREATEFILELIST
if (extendedFileList)

View File

@ -293,6 +293,53 @@ test -f tmpOutDirDecomp/tmp2
test -f tmpOutDirDecomp/tmp1
rm -rf tmp*
println "test : compress multiple files reading them from a file, --file=FILE"
mkdir tmpInputTestDir
println "Hello world!, file1" > tmpInputTestDir/file1
println "Hello world!, file2" > tmpInputTestDir/file2
println tmpInputTestDir/file1 > tmp
println tmpInputTestDir/file2 >> tmp
$ZSTD -f --file=tmp
test -f tmpInputTestDir/file2.zst
test -f tmpInputTestDir/file1.zst
rm tmpInputTestDir/*.zst
println "test : compress multiple files reading them from multiple files, --file=FILE"
println "Hello world!, file3" > tmpInputTestDir/file3
println "Hello world!, file4" > tmpInputTestDir/file4
println tmpInputTestDir/file3 > tmp1
println tmpInputTestDir/file4 >> tmp1
$ZSTD -f --file=tmp --file=tmp1
test -f tmpInputTestDir/file1.zst
test -f tmpInputTestDir/file2.zst
test -f tmpInputTestDir/file3.zst
test -f tmpInputTestDir/file4.zst
println "test : decompress multiple files reading them from a file, --file=FILE"
rm tmpInputTestDir/file1
rm tmpInputTestDir/file2
println tmpInputTestDir/file1.zst > tmpZst
println tmpInputTestDir/file2.zst >> tmpZst
$ZSTD -d -f --file=tmpZst
test -f tmpInputTestDir/file2
test -f tmpInputTestDir/file1
println "test : decompress multiple files reading them from multiple files, --file=FILE"
rm tmpInputTestDir/file1
rm tmpInputTestDir/file2
rm tmpInputTestDir/file3
rm tmpInputTestDir/file4
println tmpInputTestDir/file3.zst > tmpZst1
println tmpInputTestDir/file4.zst >> tmpZst1
$ZSTD -d -f --file=tmpZst --file=tmpZst1
test -f tmpInputTestDir/file1
test -f tmpInputTestDir/file2
test -f tmpInputTestDir/file3
test -f tmpInputTestDir/file4
rm -rf tmp*
println "\n===> Advanced compression parameters "
println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!"
println "Hello world!" | $ZSTD --zstd=windowLo=21 - -o tmp.zst && die "wrong parameters not detected!"