Merge pull request #1844 from AhmedAbdellah19/adding_read_files_from_file_feature
Adding --file=FILE feature
This commit is contained in:
commit
14e9010bb5
222
programs/util.c
222
programs/util.c
@ -187,6 +187,228 @@ U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbF
|
||||
return error ? UTIL_FILESIZE_UNKNOWN : total;
|
||||
}
|
||||
|
||||
|
||||
int UTIL_readLineFromFile(char* buf, size_t len, FILE* file) {
|
||||
char* fgetsCheck = NULL;
|
||||
|
||||
if (feof(file)) {
|
||||
UTIL_DISPLAYLEVEL(1, "[ERROR] end of file reached and need to read\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fgetsCheck = fgets(buf, (int) len, file);
|
||||
|
||||
if(fgetsCheck == NULL || fgetsCheck != buf) {
|
||||
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readLineFromFile] fgets has a problem check: %s buf: %s \n",
|
||||
fgetsCheck == NULL ? "NULL" : fgetsCheck, buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (int) strlen(buf)-1; /* -1 to ignore '\n' character */
|
||||
}
|
||||
|
||||
/* Warning: inputFileSize should be less than or equal buf capacity and buf should be initialized*/
|
||||
static int readFromFile(char* buf, size_t inputFileSize, const char* inputFileName) {
|
||||
|
||||
FILE* inputFile = fopen(inputFileName, "r");
|
||||
int nbFiles = -1;
|
||||
unsigned pos = 0;
|
||||
|
||||
|
||||
if(!buf) {
|
||||
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't create buffer.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(!inputFile) {
|
||||
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't open file to read input file names.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(nbFiles=0; !feof(inputFile) ; ) {
|
||||
if(UTIL_readLineFromFile(buf+pos, inputFileSize, inputFile) > 0) {
|
||||
int len = (int) strlen(buf+pos);
|
||||
buf[pos+len-1] = '\0'; /* replace '\n' with '\0'*/
|
||||
pos += len;
|
||||
++nbFiles;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(inputFile);
|
||||
|
||||
if(pos > inputFileSize) return -1;
|
||||
|
||||
return nbFiles;
|
||||
}
|
||||
|
||||
/*Note: buf is not freed in case function successfully created table because filesTable->fileNames[0] = buf*/
|
||||
FileNamesTable*
|
||||
UTIL_createFileNamesTable_fromFileName(const char* inputFileName) {
|
||||
U64 inputFileSize = 0;
|
||||
unsigned nbFiles = 0;
|
||||
int ret_nbFiles = -1;
|
||||
char* buf = NULL;
|
||||
size_t i = 0, pos = 0;
|
||||
|
||||
FileNamesTable* filesTable = NULL;
|
||||
|
||||
if(!UTIL_fileExist(inputFileName) || !UTIL_isRegularFile(inputFileName))
|
||||
return NULL;
|
||||
|
||||
inputFileSize = UTIL_getFileSize(inputFileName) + 1; /* (+1) to add '\0' at the end of last filename */
|
||||
|
||||
if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE)
|
||||
return NULL;
|
||||
|
||||
buf = (char*) malloc((size_t) inputFileSize * sizeof(char));
|
||||
if(!buf) {
|
||||
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't create buffer.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret_nbFiles = readFromFile(buf, (size_t) inputFileSize, inputFileName);
|
||||
|
||||
if(ret_nbFiles <= 0) {
|
||||
free(buf);
|
||||
return NULL;
|
||||
}
|
||||
nbFiles = ret_nbFiles;
|
||||
|
||||
filesTable = UTIL_createFileNamesTable(NULL, NULL, 0);
|
||||
if(!filesTable) {
|
||||
free(buf);
|
||||
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_readFileNamesTableFromFile] Can't create table for files.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
filesTable->tableSize = nbFiles;
|
||||
filesTable->fileNames = (const char**) malloc((nbFiles+1) * sizeof(char*));
|
||||
|
||||
|
||||
|
||||
for(i = 0, pos = 0; i < nbFiles; ++i) {
|
||||
filesTable->fileNames[i] = buf+pos;
|
||||
pos += strlen(buf+pos)+1;
|
||||
}
|
||||
|
||||
|
||||
if(pos > inputFileSize){
|
||||
UTIL_freeFileNamesTable(filesTable);
|
||||
if(buf) free(buf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
filesTable->buf = buf;
|
||||
|
||||
return filesTable;
|
||||
}
|
||||
|
||||
FileNamesTable*
|
||||
UTIL_createFileNamesTable(const char** filenames, char* buf, size_t tableSize){
|
||||
FileNamesTable* table = (FileNamesTable*) malloc(sizeof(FileNamesTable));
|
||||
if(!table) {
|
||||
return NULL;
|
||||
}
|
||||
table->fileNames = filenames;
|
||||
table->buf = buf;
|
||||
table->tableSize = tableSize;
|
||||
return table;
|
||||
}
|
||||
|
||||
void UTIL_freeFileNamesTable(FileNamesTable* table) {
|
||||
if(table) {
|
||||
if(table->fileNames) {
|
||||
free((void*)table->fileNames);
|
||||
}
|
||||
|
||||
if(table && table->buf) {
|
||||
free(table->buf);
|
||||
}
|
||||
|
||||
free(table);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t getTotalTableSize(FileNamesTable* table) {
|
||||
size_t i = 0, totalSize = 0;
|
||||
for(i = 0 ; i < table->tableSize && table->fileNames[i] ; ++i) {
|
||||
totalSize += strlen(table->fileNames[i]) + 1; /* +1 to add '\0' at the end of each fileName */
|
||||
}
|
||||
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
FileNamesTable*
|
||||
UTIL_concatenateTwoTables(FileNamesTable* table1, FileNamesTable* table2) {
|
||||
unsigned newTableIdx = 0, idx1 = 0, idx2 = 0;
|
||||
size_t i = 0, pos = 0;
|
||||
size_t newTotalTableSize = 0;
|
||||
|
||||
FileNamesTable* newTable = NULL;
|
||||
|
||||
char* buf = NULL;
|
||||
|
||||
|
||||
newTable = UTIL_createFileNamesTable(NULL, NULL, 0);
|
||||
|
||||
if(!newTable) {
|
||||
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_concatenateTwoTables] Can't create new table for concatenation output.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
newTotalTableSize = getTotalTableSize(table1) + getTotalTableSize(table2);
|
||||
|
||||
buf = (char*) malloc(newTotalTableSize * sizeof(char));
|
||||
if(!buf) {
|
||||
UTIL_freeFileNamesTable(newTable);
|
||||
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_concatenateTwoTables] Can't create buf for concatenation output.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for(i = 0; i < newTotalTableSize ; ++i) buf[i] = '\0';
|
||||
|
||||
newTable->tableSize = table1->tableSize + table2->tableSize;
|
||||
newTable->fileNames = (const char **) malloc(newTable->tableSize * sizeof(char*));
|
||||
|
||||
if(!newTable->fileNames) {
|
||||
UTIL_freeFileNamesTable(newTable);
|
||||
if(buf) free(buf);
|
||||
UTIL_DISPLAYLEVEL(1, "[ERROR][UTIL_concatenateTwoTables] Can't create new table for concatenation output.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < newTable->tableSize; ++i)
|
||||
newTable->fileNames[i] = NULL;
|
||||
|
||||
for( ; idx1 < table1->tableSize && table1->fileNames[idx1] && pos < newTotalTableSize; ++idx1, ++newTableIdx) {
|
||||
size_t curLen = strlen(table1->fileNames[idx1]);
|
||||
memcpy(buf+pos, table1->fileNames[idx1], curLen);
|
||||
newTable->fileNames[newTableIdx] = buf+pos;
|
||||
pos += curLen+1;
|
||||
}
|
||||
|
||||
|
||||
for( ; idx2 < table2->tableSize && table2->fileNames[idx2] && pos < newTotalTableSize ; ++idx2, ++newTableIdx) {
|
||||
size_t curLen = strlen(table2->fileNames[idx2]);
|
||||
memcpy(buf+pos, table2->fileNames[idx2], curLen);
|
||||
newTable->fileNames[newTableIdx] = buf+pos;
|
||||
pos += curLen+1;
|
||||
}
|
||||
|
||||
if(pos > newTotalTableSize) {
|
||||
UTIL_freeFileNamesTable(newTable);
|
||||
if(buf) free(buf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
newTable->buf = buf;
|
||||
|
||||
UTIL_freeFileNamesTable(table1);
|
||||
UTIL_freeFileNamesTable(table2);
|
||||
|
||||
return newTable;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
{
|
||||
|
@ -90,7 +90,7 @@ extern "C" {
|
||||
* Constants
|
||||
***************************************/
|
||||
#define LIST_SIZE_INCREASE (8*1024)
|
||||
|
||||
#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50
|
||||
|
||||
/*-****************************************
|
||||
* Compiler specifics
|
||||
@ -142,6 +142,50 @@ U32 UTIL_isLink(const char* infilename);
|
||||
U64 UTIL_getFileSize(const char* infilename);
|
||||
|
||||
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles);
|
||||
/*! UTIL_readLineFromFile(char* buf, size_t len, File* file):
|
||||
* @return : int. size next line in file or -1 in case of file ends
|
||||
* function reads next line in the file
|
||||
* Will also modify `*file`, advancing it to position where it stopped reading.
|
||||
*/
|
||||
int UTIL_readLineFromFile(char* buf, size_t len, FILE* file);
|
||||
|
||||
/*Note: tableSize is denotes the total capacity of table*/
|
||||
typedef struct
|
||||
{
|
||||
const char** fileNames;
|
||||
char* buf;
|
||||
size_t tableSize;
|
||||
} FileNamesTable;
|
||||
|
||||
/*! UTIL_readFileNamesTableFromFile(const char* inputFileName) :
|
||||
* @return : char** the fileNamesTable or NULL in case of not regular file or file doesn't exist.
|
||||
* reads fileNamesTable from input fileName.
|
||||
* Note: inputFileSize should be less than or equal 50MB
|
||||
*/
|
||||
FileNamesTable* UTIL_createFileNamesTable_fromFileName(const char* inputFileName);
|
||||
|
||||
|
||||
/*! UTIL_freeFileNamesTable(const char** filenames, char* buf, size_t tableSize) :
|
||||
* This function takes an buffered based filename, buf and tableSize to create its object.
|
||||
* @return : FileNamesTable*
|
||||
*/
|
||||
|
||||
FileNamesTable*
|
||||
UTIL_createFileNamesTable(const char** filenames, char* buf, size_t tableSize);
|
||||
|
||||
|
||||
/*! UTIL_freeFileNamesTable(FileNamesTable* table) :
|
||||
* This function takes an buffered based table and frees it.
|
||||
* @return : void.
|
||||
*/
|
||||
void UTIL_freeFileNamesTable(FileNamesTable* table);
|
||||
|
||||
/*! UTIL_concatenateTwoTables(FileNamesTable* table1,FileNamesTable* table2):
|
||||
* takes table1, its maxSize, table2 and its maxSize, free them and returns its concatenation.
|
||||
* @return : FileNamesTable* concatenation of two tables
|
||||
* note table1 and table2 will be freed
|
||||
*/
|
||||
FileNamesTable* UTIL_concatenateTwoTables(FileNamesTable* table1, FileNamesTable* table2);
|
||||
|
||||
/*
|
||||
* A modified version of realloc().
|
||||
|
@ -567,6 +567,7 @@ int main(int argCount, const char* argv[])
|
||||
nextArgumentIsMaxDict = 0,
|
||||
nextArgumentIsDictID = 0,
|
||||
nextArgumentsAreFiles = 0,
|
||||
isTableBufferBased = 0,
|
||||
nextEntryIsDictionary = 0,
|
||||
operationResult = 0,
|
||||
separateFiles = 0,
|
||||
@ -584,7 +585,12 @@ int main(int argCount, const char* argv[])
|
||||
int cLevelLast = -1000000000;
|
||||
unsigned recursive = 0;
|
||||
unsigned memLimit = 0;
|
||||
const char** filenameTable = (const char**)malloc((size_t)argCount * sizeof(const char*)); /* argCount >= 1 */
|
||||
size_t filenameTableSize = argCount;
|
||||
const char** filenameTable = (const char**)malloc(filenameTableSize * sizeof(const char*)); /* argCount >= 1 */
|
||||
FileNamesTable* extendedTable = NULL;
|
||||
FileNamesTable* concatenatedTables = NULL;
|
||||
FileNamesTable* curTable = NULL;
|
||||
char* tableBuf = NULL;
|
||||
unsigned filenameIdx = 0;
|
||||
const char* programName = argv[0];
|
||||
const char* outFileName = NULL;
|
||||
@ -796,6 +802,45 @@ int main(int argCount, const char* argv[])
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (longCommandWArg(&argument, "--file=")) {
|
||||
|
||||
if(!UTIL_fileExist(argument) || !UTIL_isRegularFile(argument)){
|
||||
DISPLAYLEVEL(1, "[ERROR] wrong fileName: %s\n", argument);
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
|
||||
extendedTable = UTIL_createFileNamesTable_fromFileName(argument);
|
||||
if(!extendedTable) {
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
|
||||
|
||||
filenameTable[filenameIdx] = NULL; // marking end of table
|
||||
|
||||
curTable = UTIL_createFileNamesTable(filenameTable, tableBuf, filenameTableSize);
|
||||
|
||||
if(!curTable) {
|
||||
UTIL_freeFileNamesTable(extendedTable);
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
|
||||
concatenatedTables = UTIL_concatenateTwoTables(curTable, extendedTable);
|
||||
if(!concatenatedTables) {
|
||||
UTIL_freeFileNamesTable(curTable);
|
||||
UTIL_freeFileNamesTable(extendedTable);
|
||||
CLEAN_RETURN(badusage(programName));
|
||||
}
|
||||
|
||||
filenameTable = concatenatedTables->fileNames;
|
||||
filenameTableSize = concatenatedTables->tableSize;
|
||||
tableBuf = concatenatedTables->buf;
|
||||
|
||||
filenameIdx += (unsigned) extendedTable->tableSize;
|
||||
isTableBufferBased = 1;
|
||||
|
||||
continue;
|
||||
}
|
||||
/* fall-through, will trigger bad_usage() later on */
|
||||
}
|
||||
|
||||
@ -1205,6 +1250,15 @@ int main(int argCount, const char* argv[])
|
||||
_end:
|
||||
FIO_freePreferences(prefs);
|
||||
|
||||
if(filenameTable) {
|
||||
if(isTableBufferBased && tableBuf){
|
||||
free(tableBuf);
|
||||
}
|
||||
}
|
||||
UTIL_freeFileNamesTable(curTable);
|
||||
UTIL_freeFileNamesTable(extendedTable);
|
||||
UTIL_freeFileNamesTable(concatenatedTables);
|
||||
|
||||
if (main_pause) waitEnter();
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
if (extendedFileList)
|
||||
|
@ -293,6 +293,53 @@ test -f tmpOutDirDecomp/tmp2
|
||||
test -f tmpOutDirDecomp/tmp1
|
||||
rm -rf tmp*
|
||||
|
||||
println "test : compress multiple files reading them from a file, --file=FILE"
|
||||
mkdir tmpInputTestDir
|
||||
println "Hello world!, file1" > tmpInputTestDir/file1
|
||||
println "Hello world!, file2" > tmpInputTestDir/file2
|
||||
println tmpInputTestDir/file1 > tmp
|
||||
println tmpInputTestDir/file2 >> tmp
|
||||
$ZSTD -f --file=tmp
|
||||
test -f tmpInputTestDir/file2.zst
|
||||
test -f tmpInputTestDir/file1.zst
|
||||
rm tmpInputTestDir/*.zst
|
||||
|
||||
println "test : compress multiple files reading them from multiple files, --file=FILE"
|
||||
println "Hello world!, file3" > tmpInputTestDir/file3
|
||||
println "Hello world!, file4" > tmpInputTestDir/file4
|
||||
println tmpInputTestDir/file3 > tmp1
|
||||
println tmpInputTestDir/file4 >> tmp1
|
||||
$ZSTD -f --file=tmp --file=tmp1
|
||||
test -f tmpInputTestDir/file1.zst
|
||||
test -f tmpInputTestDir/file2.zst
|
||||
test -f tmpInputTestDir/file3.zst
|
||||
test -f tmpInputTestDir/file4.zst
|
||||
|
||||
println "test : decompress multiple files reading them from a file, --file=FILE"
|
||||
rm tmpInputTestDir/file1
|
||||
rm tmpInputTestDir/file2
|
||||
println tmpInputTestDir/file1.zst > tmpZst
|
||||
println tmpInputTestDir/file2.zst >> tmpZst
|
||||
$ZSTD -d -f --file=tmpZst
|
||||
test -f tmpInputTestDir/file2
|
||||
test -f tmpInputTestDir/file1
|
||||
|
||||
println "test : decompress multiple files reading them from multiple files, --file=FILE"
|
||||
rm tmpInputTestDir/file1
|
||||
rm tmpInputTestDir/file2
|
||||
rm tmpInputTestDir/file3
|
||||
rm tmpInputTestDir/file4
|
||||
println tmpInputTestDir/file3.zst > tmpZst1
|
||||
println tmpInputTestDir/file4.zst >> tmpZst1
|
||||
$ZSTD -d -f --file=tmpZst --file=tmpZst1
|
||||
test -f tmpInputTestDir/file1
|
||||
test -f tmpInputTestDir/file2
|
||||
test -f tmpInputTestDir/file3
|
||||
test -f tmpInputTestDir/file4
|
||||
|
||||
rm -rf tmp*
|
||||
|
||||
|
||||
println "\n===> Advanced compression parameters "
|
||||
println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!"
|
||||
println "Hello world!" | $ZSTD --zstd=windowLo=21 - -o tmp.zst && die "wrong parameters not detected!"
|
||||
|
Loading…
Reference in New Issue
Block a user