diff --git a/contrib/long_distance_matching/ldm.c b/contrib/long_distance_matching/ldm.c index f8061f53..aeef4a33 100644 --- a/contrib/long_distance_matching/ldm.c +++ b/contrib/long_distance_matching/ldm.c @@ -187,29 +187,30 @@ static unsigned LDM_count(const BYTE *pIn, const BYTE *pMatch, return (unsigned)(pIn - pStart); } -void LDM_read_header(void const *source, size_t *compressed_size, +void LDM_read_header(const void *src, size_t *compressed_size, size_t *decompressed_size) { - const U32 *ip = (const U32 *)source; + const U32 *ip = (const U32 *)src; *compressed_size = *ip++; *decompressed_size = *ip; } -size_t LDM_compress(void const *source, void *dest, size_t source_size, - size_t max_dest_size) { - const BYTE * const istart = (const BYTE*)source; +// TODO: maxDstSize is unused +size_t LDM_compress(const void *src, size_t srcSize, + void *dst, size_t maxDstSize) { + const BYTE * const istart = (const BYTE*)src; const BYTE *ip = istart; - const BYTE * const iend = istart + source_size; + const BYTE * const iend = istart + srcSize; const BYTE *ilimit = iend - HASH_SIZE; const BYTE * const matchlimit = iend - HASH_SIZE; const BYTE * const mflimit = iend - MINMATCH; - BYTE *op = (BYTE*) dest; + BYTE *op = (BYTE*) dst; compress_stats compressStats = { 0 }; U32 hashTable[LDM_HASHTABLESIZE_U32]; memset(hashTable, 0, sizeof(hashTable)); - const BYTE *anchor = (const BYTE *)source; + const BYTE *anchor = (const BYTE *)src; // struct LDM_cctx cctx; size_t output_size = 0; @@ -361,14 +362,14 @@ _last_literals: op += lastRun; } print_compress_stats(&compressStats); - return (op - (BYTE *)dest); + return (op - (BYTE *)dst); } -size_t LDM_decompress(void const *source, void *dest, size_t compressed_size, - size_t max_decompressed_size) { - const BYTE *ip = (const BYTE *)source; +size_t LDM_decompress(const void *src, size_t compressed_size, + void *dst, size_t max_decompressed_size) { + const BYTE *ip = (const BYTE *)src; const BYTE * const iend = ip + compressed_size; - BYTE *op = (BYTE *)dest; + BYTE *op = (BYTE *)dst; BYTE * const oend = op + max_decompressed_size; BYTE *cpy; @@ -437,8 +438,8 @@ size_t LDM_decompress(void const *source, void *dest, size_t compressed_size, *op++ = *match++; } } -// memcpy(dest, source, compressed_size); - return op - (BYTE *)dest; +// memcpy(dst, src, compressed_size); + return op - (BYTE *)dst; } diff --git a/contrib/long_distance_matching/ldm.h b/contrib/long_distance_matching/ldm.h index f4ca25a3..0ac7b2ec 100644 --- a/contrib/long_distance_matching/ldm.h +++ b/contrib/long_distance_matching/ldm.h @@ -7,13 +7,13 @@ #define LDM_DECOMPRESS_SIZE 4 #define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE)) -size_t LDM_compress(void const *source, void *dest, size_t source_size, - size_t max_dest_size); +size_t LDM_compress(const void *src, size_t srcSize, + void *dst, size_t maxDstSize); -size_t LDM_decompress(void const *source, void *dest, size_t compressed_size, - size_t max_decompressed_size); +size_t LDM_decompress(const void *src, size_t srcSize, + void *dst, size_t maxDstSize); -void LDM_read_header(void const *source, size_t *compressed_size, - size_t *decompressed_size); +void LDM_read_header(const void *src, size_t *compressSize, + size_t *decompressSize); #endif /* LDM_H */ diff --git a/contrib/long_distance_matching/main-ldm.c b/contrib/long_distance_matching/main-ldm.c index 10869cce..0017335b 100644 --- a/contrib/long_distance_matching/main-ldm.c +++ b/contrib/long_distance_matching/main-ldm.c @@ -18,6 +18,263 @@ //#define ZSTD +/* Compress file given by fname and output to oname. + * Returns 0 if successful, error code otherwise. + */ +static int compress(const char *fname, const char *oname) { + int fdin, fdout; + struct stat statbuf; + char *src, *dst; + + /* Open the input file. */ + if ((fdin = open(fname, O_RDONLY)) < 0) { + perror("Error in file opening"); + return 1; + } + + /* Open the output file. */ + if ((fdout = open(oname, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) { + perror("Can't create output file"); + return 1; + } + + /* Find the size of the input file. */ + if (fstat (fdin, &statbuf) < 0) { + perror("Fstat error"); + return 1; + } + + size_t maxCompressSize = statbuf.st_size + LDM_HEADER_SIZE; + + /* Go to the location corresponding to the last byte. */ + /* TODO: fallocate? */ + if (lseek(fdout, maxCompressSize - 1, SEEK_SET) == -1) { + perror("lseek error"); + return 1; + } + + /* Write a dummy byte at the last location. */ + if (write(fdout, "", 1) != 1) { + perror("write error"); + return 1; + } + + /* mmap the input file. */ + if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0)) + == (caddr_t) - 1) { + perror("mmap error for input"); + return 1; + } + + /* mmap the output file */ + if ((dst = mmap(0, maxCompressSize, PROT_READ | PROT_WRITE, + MAP_SHARED, fdout, 0)) == (caddr_t) - 1) { + perror("mmap error for output"); + return 1; + } + +#ifdef ZSTD + size_t compressSize = ZSTD_compress(dst, statbuf.st_size, + src, statbuf.st_size, 1); +#else + size_t compressSize = LDM_HEADER_SIZE + + LDM_compress(src, statbuf.st_size, + dst + LDM_HEADER_SIZE, statbuf.st_size); + + // Write compress and decompress size to header + // TODO: should depend on LDM_DECOMPRESS_SIZE write32 + memcpy(dst, &compressSize, 4); + memcpy(dst + 4, &(statbuf.st_size), 4); + +#ifdef DEBUG + printf("Compressed size: %zu\n", compressSize); + printf("Decompressed size: %zu\n", statbuf.st_size); +#endif +#endif + + // Truncate file to compressSize. + ftruncate(fdout, compressSize); + + printf("%25s : %6u -> %7u - %s (%.1f%%)\n", fname, + (unsigned)statbuf.st_size, (unsigned)compressSize, oname, + (double)compressSize / (statbuf.st_size) * 100); + + // Close files. + close(fdin); + close(fdout); + return 0; +} + +/* Decompress file compressed using LDM_compress. + * The input file should have the LDM_HEADER followed by payload. + * Returns 0 if succesful, and an error code otherwise. + */ +static int decompress(const char *fname, const char *oname) { + int fdin, fdout; + struct stat statbuf; + char *src, *dst; + + /* Open the input file. */ + if ((fdin = open(fname, O_RDONLY)) < 0) { + perror("Error in file opening"); + return 1; + } + + /* Open the output file. */ + if ((fdout = open(oname, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) { + perror("Can't create output file"); + return 1; + } + + /* Find the size of the input file. */ + if (fstat (fdin, &statbuf) < 0) { + perror("Fstat error"); + return 1; + } + + /* mmap the input file. */ + if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0)) + == (caddr_t) - 1) { + perror("mmap error for input"); + return 1; + } + + /* Read the header. */ + size_t compressSize, decompressSize; + LDM_read_header(src, &compressSize, &decompressSize); + +#ifdef DEBUG + printf("Size, compressSize, decompressSize: %zu %zu %zu\n", + statbuf.st_size, compressSize, decompressSize); +#endif + + /* Go to the location corresponding to the last byte. */ + if (lseek(fdout, decompressSize - 1, SEEK_SET) == -1) { + perror("lseek error"); + return 1; + } + + /* write a dummy byte at the last location */ + if (write(fdout, "", 1) != 1) { + perror("write error"); + return 1; + } + + /* mmap the output file */ + if ((dst = mmap(0, decompressSize, PROT_READ | PROT_WRITE, + MAP_SHARED, fdout, 0)) == (caddr_t) - 1) { + perror("mmap error for output"); + return 1; + } + +#ifdef ZSTD + size_t outSize = ZSTD_decompress(dst, decomrpessed_size, + src + LDM_HEADER_SIZE, + statbuf.st_size - LDM_HEADER_SIZE); +#else + size_t outSize = LDM_decompress( + src + LDM_HEADER_SIZE, statbuf.st_size - LDM_HEADER_SIZE, + dst, decompressSize); + + printf("Ret size out: %zu\n", outSize); + #endif + ftruncate(fdout, outSize); + + close(fdin); + close(fdout); + return 0; +} + +/* Compare two files. + * Returns 0 iff they are the same. + */ +static int compare(FILE *fp0, FILE *fp1) { + int result = 0; + while (result == 0) { + char b0[1024]; + char b1[1024]; + const size_t r0 = fread(b0, 1, sizeof(b0), fp0); + const size_t r1 = fread(b1, 1, sizeof(b1), fp1); + + result = (int)r0 - (int)r1; + + if (0 == r0 || 0 == r1) break; + + if (0 == result) result = memcmp(b0, b1, r0); + } + return result; +} + +/* Verify the input file is the same as the decompressed file. */ +static void verify(const char *inpFilename, const char *decFilename) { + FILE *inpFp = fopen(inpFilename, "rb"); + FILE *decFp = fopen(decFilename, "rb"); + + printf("verify : %s <-> %s\n", inpFilename, decFilename); + const int cmp = compare(inpFp, decFp); + if(0 == cmp) { + printf("verify : OK\n"); + } else { + printf("verify : NG\n"); + } + + fclose(decFp); + fclose(inpFp); +} + +int main(int argc, const char *argv[]) { + const char * const exeName = argv[0]; + char inpFilename[256] = { 0 }; + char ldmFilename[256] = { 0 }; + char decFilename[256] = { 0 }; + + if (argc < 2) { + printf("Wrong arguments\n"); + printf("Usage:\n"); + printf("%s FILE\n", exeName); + return 1; + } + + snprintf(inpFilename, 256, "%s", argv[1]); + snprintf(ldmFilename, 256, "%s.ldm", argv[1]); + snprintf(decFilename, 256, "%s.ldm.dec", argv[1]); + + printf("inp = [%s]\n", inpFilename); + printf("ldm = [%s]\n", ldmFilename); + printf("dec = [%s]\n", decFilename); + + struct timeval tv1, tv2; + + /* Compress */ + + gettimeofday(&tv1, NULL); + if (compress(inpFilename, ldmFilename)) { + printf("Compress error"); + return 1; + } + gettimeofday(&tv2, NULL); + printf("Total time = %f seconds\n", + (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + + (double) (tv2.tv_sec - tv1.tv_sec)); + + /* Decompress */ + + gettimeofday(&tv1, NULL); + if (decompress(ldmFilename, decFilename)) { + printf("Decompress error"); + return 1; + } + gettimeofday(&tv2, NULL); + printf("Total time = %f seconds\n", + (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + + (double) (tv2.tv_sec - tv1.tv_sec)); + + /* verify */ + verify(inpFilename, decFilename); + return 0; +} + + #if 0 static size_t compress_file(FILE *in, FILE *out, size_t *size_in, size_t *size_out) { @@ -137,249 +394,7 @@ static size_t decompress_file(FILE *in, FILE *out) { return ret; } -#endif -static size_t compress(const char *fname, const char *oname) { - int fdin, fdout; - struct stat statbuf; - char *src, *dst; - - /* open the input file */ - if ((fdin = open(fname, O_RDONLY)) < 0) { - perror("Error in file opening"); - return 1; - } - - /* open the output file */ - if ((fdout = open(oname, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) { - perror("Can't create output file"); - return 1; - } - - /* find size of input file */ - if (fstat (fdin, &statbuf) < 0) { - perror("Fstat error"); - return 1; - } - size_t size_in = statbuf.st_size; - - /* go to the location corresponding to the last byte */ - if (lseek(fdout, size_in + LDM_HEADER_SIZE - 1, SEEK_SET) == -1) { - perror("lseek error"); - return 1; - } - - /* write a dummy byte at the last location */ - if (write(fdout, "", 1) != 1) { - perror("write error"); - return 1; - } - - /* mmap the input file */ - if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0)) - == (caddr_t) - 1) { - perror("mmap error for input"); - return 1; - } - size_t out_size = statbuf.st_size + LDM_HEADER_SIZE; - - /* mmap the output file */ - if ((dst = mmap(0, out_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fdout, 0)) == (caddr_t) - 1) { - perror("mmap error for output"); - return 1; - } - - #ifdef ZSTD - size_t size_out = ZSTD_compress(dst, statbuf.st_size, - src, statbuf.st_size, 1); - #else - size_t size_out = LDM_compress(src, dst + LDM_HEADER_SIZE, statbuf.st_size, - statbuf.st_size); - size_out += LDM_HEADER_SIZE; - - // TODO: should depend on LDM_DECOMPRESS_SIZE write32 - memcpy(dst, &size_out, 4); - memcpy(dst + 4, &(statbuf.st_size), 4); - printf("Compressed size: %zu\n", size_out); - printf("Decompressed size: %zu\n", statbuf.st_size); - #endif - ftruncate(fdout, size_out); - - printf("%25s : %6u -> %7u - %s (%.1f%%)\n", fname, - (unsigned)statbuf.st_size, (unsigned)size_out, oname, - (double)size_out / (statbuf.st_size) * 100); - - close(fdin); - close(fdout); - return 0; -} - -static size_t decompress(const char *fname, const char *oname) { - int fdin, fdout; - struct stat statbuf; - char *src, *dst; - - /* open the input file */ - if ((fdin = open(fname, O_RDONLY)) < 0) { - perror("Error in file opening"); - return 1; - } - - /* open the output file */ - if ((fdout = open(oname, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) { - perror("Can't create output file"); - return 1; - } - - /* find size of input file */ - if (fstat (fdin, &statbuf) < 0) { - perror("Fstat error"); - return 1; - } - - /* mmap the input file */ - if ((src = mmap(0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0)) - == (caddr_t) - 1) { - perror("mmap error for input"); - return 1; - } - - /* read header */ - size_t compressed_size, decompressed_size; - LDM_read_header(src, &compressed_size, &decompressed_size); - - printf("Size, compressed_size, decompressed_size: %zu %zu %zu\n", - statbuf.st_size, compressed_size, decompressed_size); - - /* go to the location corresponding to the last byte */ - if (lseek(fdout, decompressed_size - 1, SEEK_SET) == -1) { - perror("lseek error"); - return 1; - } - - /* write a dummy byte at the last location */ - if (write(fdout, "", 1) != 1) { - perror("write error"); - return 1; - } - - /* mmap the output file */ - if ((dst = mmap(0, decompressed_size, PROT_READ | PROT_WRITE, - MAP_SHARED, fdout, 0)) == (caddr_t) - 1) { - perror("mmap error for output"); - return 1; - } - - /* Copy input file to output file */ -// memcpy(dst, src, statbuf.st_size); - - #ifdef ZSTD - size_t size_out = ZSTD_decompress(dst, decomrpessed_size, - src + LDM_HEADER_SIZE, - statbuf.st_size - LDM_HEADER_SIZE); - #else - size_t size_out = LDM_decompress(src + LDM_HEADER_SIZE, dst, - statbuf.st_size - LDM_HEADER_SIZE, - decompressed_size); - printf("Ret size out: %zu\n", size_out); - #endif - ftruncate(fdout, size_out); - - close(fdin); - close(fdout); - return 0; -} - -static int compare(FILE *fp0, FILE *fp1) { - int result = 0; - while (result == 0) { - char b0[1024]; - char b1[1024]; - const size_t r0 = fread(b0, 1, sizeof(b0), fp0); - const size_t r1 = fread(b1, 1, sizeof(b1), fp1); - - result = (int)r0 - (int)r1; - - if (0 == r0 || 0 == r1) { - break; - } - if (0 == result) { - result = memcmp(b0, b1, r0); - } - } - return result; -} - -static void verify(const char *inpFilename, const char *decFilename) { - FILE *inpFp = fopen(inpFilename, "rb"); - FILE *decFp = fopen(decFilename, "rb"); - - printf("verify : %s <-> %s\n", inpFilename, decFilename); - const int cmp = compare(inpFp, decFp); - if(0 == cmp) { - printf("verify : OK\n"); - } else { - printf("verify : NG\n"); - } - - fclose(decFp); - fclose(inpFp); -} - -int main(int argc, const char *argv[]) { - const char * const exeName = argv[0]; - char inpFilename[256] = { 0 }; - char ldmFilename[256] = { 0 }; - char decFilename[256] = { 0 }; - - if (argc < 2) { - printf("Wrong arguments\n"); - printf("Usage:\n"); - printf("%s FILE\n", exeName); - return 1; - } - - snprintf(inpFilename, 256, "%s", argv[1]); - snprintf(ldmFilename, 256, "%s.ldm", argv[1]); - snprintf(decFilename, 256, "%s.ldm.dec", argv[1]); - - printf("inp = [%s]\n", inpFilename); - printf("ldm = [%s]\n", ldmFilename); - printf("dec = [%s]\n", decFilename); - - struct timeval tv1, tv2; - /* compress */ - { - gettimeofday(&tv1, NULL); - if (compress(inpFilename, ldmFilename)) { - printf("Compress error"); - return 1; - } - gettimeofday(&tv2, NULL); - printf("Total time = %f seconds\n", - (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + - (double) (tv2.tv_sec - tv1.tv_sec)); - } - - /* decompress */ - - gettimeofday(&tv1, NULL); - if (decompress(ldmFilename, decFilename)) { - printf("Decompress error"); - return 1; - } - gettimeofday(&tv2, NULL); - printf("Total time = %f seconds\n", - (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + - (double) (tv2.tv_sec - tv1.tv_sec)); - - /* verify */ - verify(inpFilename, decFilename); - return 0; -} - -#if 0 int main2(int argc, char *argv[]) { char inpFilename[256] = { 0 }; char ldmFilename[256] = { 0 };