Merge pull request #543 from inikep/gz_compress

Gz compress
This commit is contained in:
Yann Collet 2017-02-14 10:30:22 -08:00 committed by GitHub
commit aca067f19d
4 changed files with 103 additions and 11 deletions

View File

@ -70,7 +70,7 @@ VOID = /dev/null
HAVE_ZLIB := $(shell printf '\#include <zlib.h>\nint main(){}' | $(CC) -o have_zlib -x c - -lz 2> $(VOID) && echo 1 || echo 0) HAVE_ZLIB := $(shell printf '\#include <zlib.h>\nint main(){}' | $(CC) -o have_zlib -x c - -lz 2> $(VOID) && echo 1 || echo 0)
ifeq ($(HAVE_ZLIB), 1) ifeq ($(HAVE_ZLIB), 1)
TEMP := $(shell rm have_zlib$(EXT)) TEMP := $(shell rm have_zlib$(EXT))
ZLIBCPP = -DZSTD_GZDECOMPRESS ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS
ZLIBLD = -lz ZLIBLD = -lz
endif endif

View File

@ -38,7 +38,7 @@
#ifdef ZSTD_MULTITHREAD #ifdef ZSTD_MULTITHREAD
# include "zstdmt_compress.h" # include "zstdmt_compress.h"
#endif #endif
#ifdef ZSTD_GZDECOMPRESS #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
# include <zlib.h> # include <zlib.h>
# if !defined(z_const) # if !defined(z_const)
# define z_const # define z_const
@ -121,6 +121,8 @@ static clock_t g_time = 0;
/*-************************************* /*-*************************************
* Local Parameters - Not thread safe * Local Parameters - Not thread safe
***************************************/ ***************************************/
static FIO_compressionType_t g_compressionType = FIO_zstdCompression;
void FIO_setCompressionType(FIO_compressionType_t compressionType) { g_compressionType = compressionType; }
static U32 g_overwrite = 0; static U32 g_overwrite = 0;
void FIO_overwriteMode(void) { g_overwrite=1; } void FIO_overwriteMode(void) { g_overwrite=1; }
static U32 g_sparseFileSupport = 1; /* 0 : no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */ static U32 g_sparseFileSupport = 1; /* 0 : no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */
@ -361,13 +363,79 @@ static void FIO_freeCResources(cRess_t ress)
} }
#ifdef ZSTD_GZCOMPRESS
static unsigned long long FIO_compressGzFrame(cRess_t* ress, const char* srcFileName, U64 const srcFileSize, int compressionLevel, U64* readsize)
{
unsigned long long inFileSize = 0, outFileSize = 0;
z_stream strm;
int ret;
if (compressionLevel > Z_BEST_COMPRESSION) compressionLevel = Z_BEST_COMPRESSION;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, 15 /* maxWindowLogSize */ + 16 /* gzip only */, 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */
if (ret != Z_OK) EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
strm.next_in = 0;
strm.avail_in = Z_NULL;
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
while (1) {
if (strm.avail_in == 0) {
size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
if (inSize == 0) break;
inFileSize += inSize;
strm.next_in = (z_const unsigned char*)ress->srcBuffer;
strm.avail_in = (uInt)inSize;
}
ret = deflate(&strm, Z_NO_FLUSH);
if (ret != Z_OK) EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) EXM_THROW(73, "Write error : cannot write to output file");
outFileSize += decompBytes;
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
}
}
if (!srcFileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(inFileSize>>20), (double)outFileSize/inFileSize*100)
else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(inFileSize>>20), (U32)(srcFileSize>>20), (double)outFileSize/inFileSize*100);
}
while (1) {
ret = deflate(&strm, Z_FINISH);
{ size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
if (decompBytes) {
if (fwrite(ress->dstBuffer, 1, decompBytes, ress->dstFile) != decompBytes) EXM_THROW(75, "Write error : cannot write to output file");
outFileSize += decompBytes;
strm.next_out = (Bytef*)ress->dstBuffer;
strm.avail_out = (uInt)ress->dstBufferSize;
}
}
if (ret == Z_STREAM_END) break;
if (ret != Z_BUF_ERROR) EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
}
ret = deflateEnd(&strm);
if (ret != Z_OK) EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
*readsize = inFileSize;
return outFileSize;
}
#endif
/*! FIO_compressFilename_internal() : /*! FIO_compressFilename_internal() :
* same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
* @return : 0 : compression completed correctly, * @return : 0 : compression completed correctly,
* 1 : missing or pb opening srcFileName * 1 : missing or pb opening srcFileName
*/ */
static int FIO_compressFilename_internal(cRess_t ress, static int FIO_compressFilename_internal(cRess_t ress,
const char* dstFileName, const char* srcFileName) const char* dstFileName, const char* srcFileName, int compressionLevel)
{ {
FILE* const srcFile = ress.srcFile; FILE* const srcFile = ress.srcFile;
FILE* const dstFile = ress.dstFile; FILE* const dstFile = ress.dstFile;
@ -375,6 +443,16 @@ static int FIO_compressFilename_internal(cRess_t ress,
U64 compressedfilesize = 0; U64 compressedfilesize = 0;
U64 const fileSize = UTIL_getFileSize(srcFileName); U64 const fileSize = UTIL_getFileSize(srcFileName);
if (g_compressionType) {
#ifdef ZSTD_GZCOMPRESS
compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
#else
(void)compressionLevel;
EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName);
#endif
goto finish;
}
/* init */ /* init */
#ifdef ZSTD_MULTITHREAD #ifdef ZSTD_MULTITHREAD
{ size_t const resetError = ZSTDMT_resetCStream(ress.cctx, fileSize); { size_t const resetError = ZSTDMT_resetCStream(ress.cctx, fileSize);
@ -432,6 +510,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
} }
} }
finish:
/* Status */ /* Status */
DISPLAYLEVEL(2, "\r%79s\r", ""); DISPLAYLEVEL(2, "\r%79s\r", "");
DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", srcFileName, DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6llu => %6llu bytes, %s) \n", srcFileName,
@ -449,7 +528,7 @@ static int FIO_compressFilename_internal(cRess_t ress,
* 1 : missing or pb opening srcFileName * 1 : missing or pb opening srcFileName
*/ */
static int FIO_compressFilename_srcFile(cRess_t ress, static int FIO_compressFilename_srcFile(cRess_t ress,
const char* dstFileName, const char* srcFileName) const char* dstFileName, const char* srcFileName, int compressionLevel)
{ {
int result; int result;
@ -462,7 +541,7 @@ static int FIO_compressFilename_srcFile(cRess_t ress,
ress.srcFile = FIO_openSrcFile(srcFileName); ress.srcFile = FIO_openSrcFile(srcFileName);
if (!ress.srcFile) return 1; /* srcFile could not be opened */ if (!ress.srcFile) return 1; /* srcFile could not be opened */
result = FIO_compressFilename_internal(ress, dstFileName, srcFileName); result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, compressionLevel);
fclose(ress.srcFile); fclose(ress.srcFile);
if (g_removeSrcFile && !result) { if (remove(srcFileName)) EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); } /* remove source file : --rm */ if (g_removeSrcFile && !result) { if (remove(srcFileName)) EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); } /* remove source file : --rm */
@ -475,7 +554,7 @@ static int FIO_compressFilename_srcFile(cRess_t ress,
* 1 : pb * 1 : pb
*/ */
static int FIO_compressFilename_dstFile(cRess_t ress, static int FIO_compressFilename_dstFile(cRess_t ress,
const char* dstFileName, const char* srcFileName) const char* dstFileName, const char* srcFileName, int compressionLevel)
{ {
int result; int result;
stat_t statbuf; stat_t statbuf;
@ -485,7 +564,7 @@ static int FIO_compressFilename_dstFile(cRess_t ress,
if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ if (ress.dstFile==NULL) return 1; /* could not open dstFileName */
if (strcmp (srcFileName, stdinmark) && UTIL_getFileStat(srcFileName, &statbuf)) stat_result = 1; if (strcmp (srcFileName, stdinmark) && UTIL_getFileStat(srcFileName, &statbuf)) stat_result = 1;
result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName); result = FIO_compressFilename_srcFile(ress, dstFileName, srcFileName, compressionLevel);
if (fclose(ress.dstFile)) { DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); result=1; } /* error closing dstFile */ if (fclose(ress.dstFile)) { DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); result=1; } /* error closing dstFile */
if (result!=0) { if (remove(dstFileName)) EXM_THROW(1, "zstd: %s: %s", dstFileName, strerror(errno)); } /* remove operation artefact */ if (result!=0) { if (remove(dstFileName)) EXM_THROW(1, "zstd: %s: %s", dstFileName, strerror(errno)); } /* remove operation artefact */
@ -502,7 +581,7 @@ int FIO_compressFilename(const char* dstFileName, const char* srcFileName,
int const regFile = UTIL_isRegFile(srcFileName); int const regFile = UTIL_isRegFile(srcFileName);
cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, regFile, comprParams); cRess_t const ress = FIO_createCResources(dictFileName, compressionLevel, srcSize, regFile, comprParams);
int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName); int const result = FIO_compressFilename_dstFile(ress, dstFileName, srcFileName, compressionLevel);
double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC; double const seconds = (double)(clock() - start) / CLOCKS_PER_SEC;
DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds); DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds);
@ -535,7 +614,7 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
ress.dstFile = stdout; ress.dstFile = stdout;
SET_BINARY_MODE(stdout); SET_BINARY_MODE(stdout);
for (u=0; u<nbFiles; u++) for (u=0; u<nbFiles; u++)
missed_files += FIO_compressFilename_srcFile(ress, stdoutmark, inFileNamesTable[u]); missed_files += FIO_compressFilename_srcFile(ress, stdoutmark, inFileNamesTable[u], compressionLevel);
if (fclose(ress.dstFile)) EXM_THROW(29, "Write error : cannot properly close stdout"); if (fclose(ress.dstFile)) EXM_THROW(29, "Write error : cannot properly close stdout");
} else { } else {
unsigned u; unsigned u;
@ -544,7 +623,7 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
if (dfnSize <= ifnSize+suffixSize+1) { free(dstFileName); dfnSize = ifnSize + 20; dstFileName = (char*)malloc(dfnSize); } if (dfnSize <= ifnSize+suffixSize+1) { free(dstFileName); dfnSize = ifnSize + 20; dstFileName = (char*)malloc(dfnSize); }
strcpy(dstFileName, inFileNamesTable[u]); strcpy(dstFileName, inFileNamesTable[u]);
strcat(dstFileName, suffix); strcat(dstFileName, suffix);
missed_files += FIO_compressFilename_dstFile(ress, dstFileName, inFileNamesTable[u]); missed_files += FIO_compressFilename_dstFile(ress, dstFileName, inFileNamesTable[u], compressionLevel);
} } } }
/* Close & Free */ /* Close & Free */

View File

@ -31,9 +31,16 @@ extern "C" {
#endif #endif
/*-*************************************
* Types
***************************************/
typedef enum { FIO_zstdCompression, FIO_gzipCompression } FIO_compressionType_t;
/*-************************************* /*-*************************************
* Parameters * Parameters
***************************************/ ***************************************/
void FIO_setCompressionType(FIO_compressionType_t compressionType);
void FIO_overwriteMode(void); void FIO_overwriteMode(void);
void FIO_setNotificationLevel(unsigned level); void FIO_setNotificationLevel(unsigned level);
void FIO_setSparseWrite(unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setSparseWrite(unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */

View File

@ -49,6 +49,7 @@
#define AUTHOR "Yann Collet" #define AUTHOR "Yann Collet"
#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR
#define GZ_EXTENSION ".gz"
#define ZSTD_EXTENSION ".zst" #define ZSTD_EXTENSION ".zst"
#define ZSTD_CAT "zstdcat" #define ZSTD_CAT "zstdcat"
#define ZSTD_UNZSTD "unzstd" #define ZSTD_UNZSTD "unzstd"
@ -123,6 +124,9 @@ static int usage_advanced(const char* programName)
DISPLAY( " -T# : use # threads for compression (default:1) \n"); DISPLAY( " -T# : use # threads for compression (default:1) \n");
DISPLAY( " -B# : select size of independent sections (default:0==automatic) \n"); DISPLAY( " -B# : select size of independent sections (default:0==automatic) \n");
#endif #endif
#ifdef ZSTD_GZCOMPRESS
DISPLAY( "--format=gzip : compress files to the .gz format \n");
#endif
#endif #endif
#ifndef ZSTD_NODECOMPRESS #ifndef ZSTD_NODECOMPRESS
DISPLAY( "--test : test compressed file integrity \n"); DISPLAY( "--test : test compressed file integrity \n");
@ -283,6 +287,7 @@ int main(int argCount, const char* argv[])
const char* programName = argv[0]; const char* programName = argv[0];
const char* outFileName = NULL; const char* outFileName = NULL;
const char* dictFileName = NULL; const char* dictFileName = NULL;
const char* suffix = ZSTD_EXTENSION;
unsigned maxDictSize = g_defaultMaxDictSize; unsigned maxDictSize = g_defaultMaxDictSize;
unsigned dictID = 0; unsigned dictID = 0;
int dictCLevel = g_defaultDictCLevel; int dictCLevel = g_defaultDictCLevel;
@ -359,6 +364,7 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; } if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(0); continue; }
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; } if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; } if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(FIO_gzipCompression); continue; }
/* long commands with arguments */ /* long commands with arguments */
#ifndef ZSTD_NODICT #ifndef ZSTD_NODICT
@ -643,7 +649,7 @@ int main(int argCount, const char* argv[])
if ((filenameIdx==1) && outFileName) if ((filenameIdx==1) && outFileName)
operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams); operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel, &compressionParams);
else else
operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName, cLevel, &compressionParams); operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : suffix, dictFileName, cLevel, &compressionParams);
#else #else
DISPLAY("Compression not supported\n"); DISPLAY("Compression not supported\n");
#endif #endif