diff options
-rw-r--r-- | lib/lz4frame.c | 4 | ||||
-rw-r--r-- | lib/lz4frame.h | 9 | ||||
-rw-r--r-- | programs/lz4io.c | 377 | ||||
-rw-r--r-- | programs/lz4io.h | 2 | ||||
-rw-r--r-- | tests/Makefile | 7 | ||||
-rw-r--r-- | tests/test-lz4-list.py | 282 |
6 files changed, 585 insertions, 96 deletions
diff --git a/lib/lz4frame.c b/lib/lz4frame.c index f131d9a..95b8b8e 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -213,8 +213,8 @@ static void LZ4F_writeLE64 (void* dst, U64 value64) static const size_t minFHSize = LZ4F_HEADER_SIZE_MIN; /* 7 */ static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX; /* 19 */ -static const size_t BHSize = 4; /* block header : size, and compress flag */ -static const size_t BFSize = 4; /* block footer : checksum (optional) */ +static const size_t BHSize = LZ4F_BLOCK_HEADER_SIZE; /* block header : size, and compress flag */ +static const size_t BFSize = LZ4F_BLOCK_CHECKSUM_SIZE; /* block footer : checksum (optional) */ /*-************************************ diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 742c252..391e484 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -253,6 +253,15 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx); #define LZ4F_HEADER_SIZE_MIN 7 /* LZ4 Frame header size can vary, depending on selected paramaters */ #define LZ4F_HEADER_SIZE_MAX 19 +/* Size in bytes of a block header in little-endian format. Highest bit indicates if block data is uncompressed */ +#define LZ4F_BLOCK_HEADER_SIZE 4 + +/* Size in bytes of a block checksum footer in little-endian format. */ +#define LZ4F_BLOCK_CHECKSUM_SIZE 4 + +/* Size in bytes of the content checksum. */ +#define LZ4F_CONTENT_CHECKSUM_SIZE 4 + /*! LZ4F_compressBegin() : * will write the frame header into dstBuffer. * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes. diff --git a/programs/lz4io.c b/programs/lz4io.c index 960c451..c27a0ad 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -1278,82 +1278,118 @@ int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs, /* ********************** LZ4 --list command *********************** */ /* ********************************************************************* */ +typedef enum +{ + lz4Frame = 0, + legacyFrame, + skippableFrame +} LZ4IO_frameType_t; + +typedef struct { + LZ4F_frameInfo_t lz4FrameInfo; + LZ4IO_frameType_t frameType; +} LZ4IO_frameInfo_t; + +#define LZ4IO_INIT_FRAMEINFO { LZ4F_INIT_FRAMEINFO, lz4Frame } + typedef struct { - LZ4F_frameInfo_t frameInfo; const char* fileName; unsigned long long fileSize; + unsigned long long frameCount; + LZ4IO_frameInfo_t frameSummary; + unsigned short eqFrameTypes; + unsigned short eqBlockTypes; + unsigned short allContentSize; } LZ4IO_cFileInfo_t; -#define LZ4IO_INIT_CFILEINFO { LZ4F_INIT_FRAMEINFO, NULL, 0ULL } - +#define LZ4IO_INIT_CFILEINFO { NULL, 0ULL, 0, LZ4IO_INIT_FRAMEINFO, 1, 1, 1 } typedef enum { LZ4IO_LZ4F_OK, LZ4IO_format_not_known, LZ4IO_not_a_file } LZ4IO_infoResult; -/* This function is limited, - * it only works fine for a file consisting of a single valid frame using LZ4 Frame specification. - * It will not look at content beyond first frame header. - * It's also unable to parse legacy frames, nor skippable ones. - * - * Things to improve : - * - check the entire file for additional content after first frame - * + combine results from multiple frames, give total - * - Optional : - * + report nb of blocks, hence max. possible decompressed size (when not reported in header) - */ -static LZ4IO_infoResult -LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename) -{ - LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */ - - if (!UTIL_isRegFile(input_filename)) return LZ4IO_not_a_file; - cfinfo->fileSize = UTIL_getFileSize(input_filename); +static const char * LZ4IO_frameTypeNames[]={"LZ4Frame", "LegacyFrame", "SkippableFrame" }; - /* Get filename without path prefix */ - { const char* b = strrchr(input_filename, '/'); - if (!b) { - b = strrchr(input_filename, '\\'); - } - if (b && b != input_filename) { - b++; - } else { - b = input_filename; +/* Read block headers and skip block data + Return total blocks size for this frame including headers, + block checksums and content checksums. + returns 0 in case it can't succesfully skip block data. + Assumes SEEK_CUR after frame header. + */ +static unsigned long long LZ4IO_skipBlocksData(FILE* finput, + const LZ4F_blockChecksum_t blockChecksumFlag, + const LZ4F_contentChecksum_t contentChecksumFlag){ + unsigned char blockInfo[LZ4F_BLOCK_HEADER_SIZE]; + unsigned long long totalBlocksSize = 0; + for(;;){ + if (!fread(blockInfo, 1, LZ4F_BLOCK_HEADER_SIZE, finput)){ + if (feof(finput)) return totalBlocksSize; + return 0; + } + totalBlocksSize += LZ4F_BLOCK_HEADER_SIZE; + { + const unsigned long nextCBlockSize = LZ4IO_readLE32(&blockInfo) & 0x7FFFFFFFU; + const unsigned long nextBlock = nextCBlockSize + (blockChecksumFlag * LZ4F_BLOCK_CHECKSUM_SIZE); + if (nextCBlockSize == 0){ + /* Reached EndMark */ + if(contentChecksumFlag){ + /* Skip content checksum */ + if(fseek(finput, LZ4F_CONTENT_CHECKSUM_SIZE, SEEK_CUR) != 0){ + return 0; + } + totalBlocksSize += LZ4F_CONTENT_CHECKSUM_SIZE; } - cfinfo->fileName = b; + break; + } + totalBlocksSize += nextBlock; + /* skip to the next block */ + if (fseek(finput, nextBlock, SEEK_CUR) != 0){ + return 0; + } } + } + return totalBlocksSize; +} - /* Read file and extract header */ - { size_t const hSize = LZ4F_HEADER_SIZE_MAX; - size_t readSize=0; - - void* const buffer = malloc(hSize); - if (!buffer) EXM_THROW(21, "Allocation error : not enough memory"); - - { FILE* const finput = LZ4IO_openSrcFile(input_filename); - if (finput) { - readSize = fread(buffer, 1, hSize, finput); - fclose(finput); - } } - - if (readSize > 0) { - LZ4F_dctx* dctx; - if (!LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION))) { - if (!LZ4F_isError(LZ4F_getFrameInfo(dctx, &cfinfo->frameInfo, buffer, &readSize))) { - result = LZ4IO_LZ4F_OK; - } } - LZ4F_freeDecompressionContext(dctx); +/* For legacy frames only. + Read block headers and skip block data. + Return total blocks size for this frame including headers. + or 0 in case it can't succesfully skip block data. + This works as long as legacy block header size = magic number size. + Assumes SEEK_CUR after frame header. + */ +static unsigned long long LZ4IO_skipLegacyBlocksData(FILE* finput){ + unsigned char blockInfo[LZIO_LEGACY_BLOCK_HEADER_SIZE]; + unsigned long long totalBlocksSize = 0; + if(LZIO_LEGACY_BLOCK_HEADER_SIZE != MAGICNUMBER_SIZE){ + DISPLAYLEVEL(4, "Legacy block header size not equal to magic number size. Cannot skip blocks"); + return 0; + } + for(;;){ + if (!fread(blockInfo, 1, LZIO_LEGACY_BLOCK_HEADER_SIZE, finput)){ + if (feof(finput)) return totalBlocksSize; + return 0; + } + { const unsigned int nextCBlockSize = LZ4IO_readLE32(&blockInfo); + if( nextCBlockSize == LEGACY_MAGICNUMBER || + nextCBlockSize == LZ4IO_MAGICNUMBER || + LZ4IO_isSkippableMagicNumber(nextCBlockSize)){ + /* Rewind back. we want cursor at the begining of next frame.*/ + if (fseek(finput, -LZIO_LEGACY_BLOCK_HEADER_SIZE, SEEK_CUR) != 0){ + return 0; } - - /* clean */ - free(buffer); + break; + } + totalBlocksSize += LZIO_LEGACY_BLOCK_HEADER_SIZE + nextCBlockSize; + /* skip to the next block */ + if (fseek(finput, nextCBlockSize, SEEK_CUR) != 0){ + return 0; + } } - - return result; + } + return totalBlocksSize; } - /* buffer : must be a valid memory area of at least 4 bytes */ -const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer) -{ +const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer){ buffer[0] = 'B'; assert(sizeID >= 4); assert(sizeID <=7); buffer[1] = (char)(sizeID + '0'); @@ -1362,47 +1398,204 @@ const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer) return buffer; } +/* buffer : must be valid memory area of at least 10 bytes */ +static const char* LZ4IO_toHuman(long double size, char *buf){ + const char units[] = {"\0KMGTPEZY"}; + size_t i = 0; + for(;size>=1024;i++) size /= 1024; + sprintf(buf, "%.2Lf%c", size, units[i]); + return buf; +} -int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx) +/* Get filename without path prefix */ +static const char* LZ4IO_baseName(const char* input_filename) { + const char* b = strrchr(input_filename, '/'); + if (!b) b = strrchr(input_filename, '\\'); + return b ? b + 1 : b; +} + +/* Report frame/s information in verbose mode. + * Will populate file info with fileName and contentSize where applicable. + * - TODO : + * + report nb of blocks, hence max. possible decompressed size (when not reported in header) + */ +static LZ4IO_infoResult +LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename) { - int result = 0; - size_t idx; + LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */ + unsigned char buffer[LZ4F_HEADER_SIZE_MAX]; + FILE* const finput = LZ4IO_openSrcFile(input_filename); + cfinfo->fileSize = UTIL_getFileSize(input_filename); - DISPLAY("%5s %20s %20s %10s %7s %s\n", - "Block", "Compressed", "Uncompressed", "Ratio", "Check", "Filename"); - for (idx=0; idx<ifnIdx; idx++) { + while(!feof(finput)){ + { LZ4IO_frameInfo_t frameInfo = LZ4IO_INIT_FRAMEINFO; + unsigned magicNumber; + /* Get MagicNumber */ + size_t nbReadBytes = fread(buffer, 1, MAGICNUMBER_SIZE, finput); + if (nbReadBytes==0) { break; } /* EOF */ + result = LZ4IO_format_not_known; /* default result (error) */ + if (nbReadBytes != MAGICNUMBER_SIZE) + EXM_THROW(40, "Unrecognized header : Magic Number unreadable"); + magicNumber = LZ4IO_readLE32(buffer); /* Little Endian format */ + if (LZ4IO_isSkippableMagicNumber(magicNumber)) + magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */ + + switch(magicNumber) + { + case LZ4IO_MAGICNUMBER: + if(cfinfo->frameSummary.frameType != lz4Frame) cfinfo->eqFrameTypes = 0; + /* Get frame info */ + { const size_t readBytes = fread(buffer+MAGICNUMBER_SIZE, 1, LZ4F_HEADER_SIZE_MIN-MAGICNUMBER_SIZE, finput); + if (!readBytes || ferror(finput)) EXM_THROW(71, "Error reading %s", input_filename); } + { size_t hSize = LZ4F_headerSize(&buffer, LZ4F_HEADER_SIZE_MIN); + if(!LZ4F_isError(hSize)){ + if(hSize > (LZ4F_HEADER_SIZE_MIN + MAGICNUMBER_SIZE)){ + /* We've already read LZ4F_HEADER_SIZE_MIN so read any extra until hSize*/ + const size_t readBytes = fread(buffer+LZ4F_HEADER_SIZE_MIN, 1, hSize-LZ4F_HEADER_SIZE_MIN, finput); + if (!readBytes || ferror(finput)) EXM_THROW(72, "Error reading %s", input_filename); + } + /* Create decompression context */ + { LZ4F_dctx* dctx; + if (!LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION))) { + if (!LZ4F_isError(LZ4F_getFrameInfo(dctx, &frameInfo.lz4FrameInfo, buffer, &hSize))) { + if ((cfinfo->frameSummary.lz4FrameInfo.blockSizeID != frameInfo.lz4FrameInfo.blockSizeID || + cfinfo->frameSummary.lz4FrameInfo.blockMode != frameInfo.lz4FrameInfo.blockMode) + && cfinfo->frameCount !=0) + cfinfo->eqBlockTypes = 0; + { const unsigned long long totalBlocksSize = LZ4IO_skipBlocksData(finput, + frameInfo.lz4FrameInfo.blockChecksumFlag, + frameInfo.lz4FrameInfo.contentChecksumFlag); + if(totalBlocksSize){ + char bTypeBuffer[5]; + LZ4IO_blockTypeID(frameInfo.lz4FrameInfo.blockSizeID, frameInfo.lz4FrameInfo.blockMode, bTypeBuffer); + DISPLAYLEVEL(3, " %6llu %14s %5s %8s", + cfinfo->frameCount + 1, + LZ4IO_frameTypeNames[frameInfo.frameType], + bTypeBuffer, + frameInfo.lz4FrameInfo.contentChecksumFlag ? "XXH32" : "-"); + if(frameInfo.lz4FrameInfo.contentSize){ + { double const ratio = (double)(totalBlocksSize + hSize) / frameInfo.lz4FrameInfo.contentSize * 100; + DISPLAYLEVEL(3, " %20llu %20llu %9.2f%%\n", + totalBlocksSize + hSize, + frameInfo.lz4FrameInfo.contentSize, + ratio); } + /* Now we've consumed frameInfo we can use it to store the total contentSize */ + frameInfo.lz4FrameInfo.contentSize += cfinfo->frameSummary.lz4FrameInfo.contentSize; + } + else{ + DISPLAYLEVEL(3, " %20llu %20s %9s \n", totalBlocksSize + hSize, "-", "-"); + cfinfo->allContentSize = 0; + } + result = LZ4IO_LZ4F_OK; + } } + } + } } + } } + break; + case LEGACY_MAGICNUMBER: + frameInfo.frameType = legacyFrame; + if (cfinfo->frameSummary.frameType != legacyFrame && cfinfo->frameCount !=0) cfinfo->eqFrameTypes = 0; + cfinfo->eqBlockTypes = 0; + cfinfo->allContentSize = 0; + { const unsigned long long totalBlocksSize = LZ4IO_skipLegacyBlocksData(finput); + if (totalBlocksSize){ + DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20llu %20s %9s\n", + cfinfo->frameCount + 1, + LZ4IO_frameTypeNames[frameInfo.frameType], + "-", "-", + totalBlocksSize + 4, + "-", "-"); + result = LZ4IO_LZ4F_OK; + } } + break; + case LZ4IO_SKIPPABLE0: + frameInfo.frameType = skippableFrame; + if (cfinfo->frameSummary.frameType != skippableFrame && cfinfo->frameCount !=0) cfinfo->eqFrameTypes = 0; + cfinfo->eqBlockTypes = 0; + cfinfo->allContentSize = 0; + { nbReadBytes = fread(buffer, 1, 4, finput); + if (nbReadBytes != 4) + EXM_THROW(42, "Stream error : skippable size unreadable"); + } + { unsigned const size = LZ4IO_readLE32(buffer); + int const errorNb = fseek_u32(finput, size, SEEK_CUR); + if (errorNb != 0) + EXM_THROW(43, "Stream error : cannot skip skippable area"); + DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20u %20s %9s\n", + cfinfo->frameCount + 1, + "SkippableFrame", + "-", "-", size + 8, "-", "-"); + + result = LZ4IO_LZ4F_OK; + } + break; + default: + { long int const position = ftell(finput); /* only works for files < 2 GB */ + DISPLAYLEVEL(3, "Stream followed by undecodable data "); + if (position != -1L) + DISPLAYLEVEL(3, "at position %i ", (int)position); + DISPLAYLEVEL(3, "\n"); + } + break; + } + if(result != LZ4IO_LZ4F_OK){ + break; + } + cfinfo->frameSummary = frameInfo; } + cfinfo->frameCount++; + } + fclose(finput); + return result; +} + + +int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx) +{ + int result = 0; + size_t idx = 0; + if(g_displayLevel < 3){ + DISPLAY("%10s %14s %5s %11s %13s %9s %s\n", + "Frames", "Type", "Block", "Compressed", "Uncompressed", "Ratio", "Filename"); + } + for (; idx<ifnIdx; idx++) { /* Get file info */ LZ4IO_cFileInfo_t cfinfo = LZ4IO_INIT_CFILEINFO; - LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]); - if (op_result != LZ4IO_LZ4F_OK) { - if (op_result == LZ4IO_not_a_file) { - DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]); - } else { - assert(op_result == LZ4IO_format_not_known); - DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]); - } - result = 1; - continue; + cfinfo.fileName = LZ4IO_baseName(inFileNames[idx]); + if (!UTIL_isRegFile(inFileNames[idx])) { + DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]); + return 0; } - if (cfinfo.frameInfo.contentSize) { - char buffer[5]; - double const ratio = (double)cfinfo.fileSize / cfinfo.frameInfo.contentSize; - DISPLAY("%5s %20llu %20llu %8.4f %7s %s \n", - LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer), - cfinfo.fileSize, - cfinfo.frameInfo.contentSize, ratio, - cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-", - cfinfo.fileName); - } else { - char buffer[5]; - DISPLAY("%5s %20llu %20s %10s %7s %s \n", - LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer), - cfinfo.fileSize, - "-", "-", - cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-", - cfinfo.fileName); + DISPLAYLEVEL(3, "%s(%llu/%llu)\n", cfinfo.fileName, (unsigned long long)idx+1, (unsigned long long)ifnIdx); + DISPLAYLEVEL(3, " %6s %14s %5s %8s %20s %20s %9s\n", + "Frame", "Type", "Block", "Checksum", "Compressed", "Uncompressed", "Ratio") + { LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]); + if (op_result != LZ4IO_LZ4F_OK) { + assert(op_result == LZ4IO_format_not_known); + DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]); + return 0; + } } + DISPLAYLEVEL(3,"\n"); + if(g_displayLevel < 3){ + /* Display Summary */ + { char buffers[3][10]; + DISPLAY("%10llu %14s %5s %11s %13s ", + cfinfo.frameCount, + cfinfo.eqFrameTypes ? LZ4IO_frameTypeNames[cfinfo.frameSummary.frameType] : "-" , + cfinfo.eqBlockTypes ? LZ4IO_blockTypeID(cfinfo.frameSummary.lz4FrameInfo.blockSizeID, + cfinfo.frameSummary.lz4FrameInfo.blockMode, buffers[0]) : "-", + LZ4IO_toHuman((long double)cfinfo.fileSize, buffers[1]), + cfinfo.allContentSize ? LZ4IO_toHuman((long double)cfinfo.frameSummary.lz4FrameInfo.contentSize, buffers[2]) : "-"); + if (cfinfo.allContentSize) { + double const ratio = (double)cfinfo.fileSize / cfinfo.frameSummary.lz4FrameInfo.contentSize * 100; + DISPLAY("%9.2f%% %s \n", ratio, cfinfo.fileName); + } else { + DISPLAY("%9s %s\n", + "-", + cfinfo.fileName); + } } } } + return result; } diff --git a/programs/lz4io.h b/programs/lz4io.h index 213dc95..b189e35 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -57,6 +57,8 @@ typedef struct LZ4IO_prefs_s LZ4IO_prefs_t; LZ4IO_prefs_t* LZ4IO_defaultPreferences(void); void LZ4IO_freePreferences(LZ4IO_prefs_t* const prefs); +/* Size in bytes of a legacy block header in little-endian format */ +#define LZIO_LEGACY_BLOCK_HEADER_SIZE 4 /* ************************************************** */ /* ****************** Functions ********************* */ diff --git a/tests/Makefile b/tests/Makefile index 8f0dfd3..3d3cfa2 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -55,7 +55,7 @@ NB_LOOPS ?= -i1 default: all -all: fullbench fuzzer frametest roundTripTest datagen checkFrame +all: fullbench fuzzer frametest roundTripTest datagen checkFrame listTest all32: CFLAGS+=-m32 all32: all @@ -122,10 +122,13 @@ clean: versionsTest: $(PYTHON) test-lz4-versions.py +.PHONY: listTest +listTest: lz4 + QEMU_SYS=$(QEMU_SYS) $(PYTHON) test-lz4-list.py + checkTag: checkTag.c $(LZ4DIR)/lz4.h $(CC) $(FLAGS) $< -o $@$(EXT) - #----------------------------------------------------------------------------- # validated only for Linux, OSX, BSD, Hurd and Solaris targets #----------------------------------------------------------------------------- diff --git a/tests/test-lz4-list.py b/tests/test-lz4-list.py new file mode 100644 index 0000000..4738e99 --- /dev/null +++ b/tests/test-lz4-list.py @@ -0,0 +1,282 @@ +#! /usr/bin/env python3 +import subprocess +import time +import glob +import os +import tempfile +import unittest + +SIZES = [3, 11] # Always 2 sizes +MIB = 1048576 +LZ4 = os.getcwd() + "/../lz4" +if not os.path.exists(LZ4): + LZ4 = os.getcwd() + "/../programs/lz4" +TEMP = tempfile.gettempdir() + + +class NVerboseFileInfo(object): + def __init__(self, line_in): + self.line = line_in + splitlines = line_in.split() + if len(splitlines) != 7: + errout("Unexpected line: {}".format(line_in)) + self.frames, self.type, self.block, self.compressed, self.uncompressed, self.ratio, self.filename = splitlines + self.exp_unc_size = 0 + # Get real file sizes + if "concat-all" in self.filename or "2f--content-size" in self.filename: + for i in SIZES: + self.exp_unc_size += os.path.getsize("{}/test_list_{}M".format(TEMP, i)) + else: + uncompressed_filename = self.filename.split("-")[0] + self.exp_unc_size += os.path.getsize("{}/{}".format(TEMP, uncompressed_filename)) + self.exp_comp_size = os.path.getsize("{}/{}".format(TEMP, self.filename)) + + +class TestNonVerbose(unittest.TestCase): + @classmethod + def setUpClass(self): + self.nvinfo_list = [] + for i, line in enumerate(execute("{} --list -m {}/test_list_*.lz4".format(LZ4, TEMP), print_output=True)): + if i > 0: + self.nvinfo_list.append(NVerboseFileInfo(line)) + + def test_frames(self): + all_concat_frames = 0 + all_concat_index = None + for i, nvinfo in enumerate(self.nvinfo_list): + if "concat-all" in nvinfo.filename: + all_concat_index = i + elif "2f--content-size" in nvinfo.filename: + self.assertEqual("2", nvinfo.frames, nvinfo.line) + all_concat_frames += 2 + else: + self.assertEqual("1", nvinfo.frames, nvinfo.line) + all_concat_frames += 1 + self.assertNotEqual(None, all_concat_index, "Couldn't find concat-all file index.") + self.assertEqual(self.nvinfo_list[all_concat_index].frames, str(all_concat_frames), self.nvinfo_list[all_concat_index].line) + + def test_frame_types(self): + for nvinfo in self.nvinfo_list: + if "-lz4f-" in nvinfo.filename: + self.assertEqual(nvinfo.type, "LZ4Frame", nvinfo.line) + elif "-legc-" in nvinfo.filename: + self.assertEqual(nvinfo.type, "LegacyFrame", nvinfo.line) + elif "-skip-" in nvinfo.filename: + self.assertEqual(nvinfo.type, "SkippableFrame", nvinfo.line) + + def test_block(self): + for nvinfo in self.nvinfo_list: + # if "-leg" in nvinfo.filename or "-skip" in nvinfo.filename: + # self.assertEqual(nvinfo.block, "-", nvinfo.line) + if "--BD" in nvinfo.filename: + self.assertRegex(nvinfo.block, "^B[0-9]+D$", nvinfo.line) + elif "--BI" in nvinfo.filename: + self.assertRegex(nvinfo.block, "^B[0-9]+I$", nvinfo.line) + + def test_compressed_size(self): + for nvinfo in self.nvinfo_list: + self.assertEqual(nvinfo.compressed, to_human(nvinfo.exp_comp_size), nvinfo.line) + + def test_ratio(self): + for nvinfo in self.nvinfo_list: + if "--content-size" in nvinfo.filename: + self.assertEqual(nvinfo.ratio, "{:.2f}%".format(float(nvinfo.exp_comp_size) / float(nvinfo.exp_unc_size) * 100), nvinfo.line) + + def test_uncompressed_size(self): + for nvinfo in self.nvinfo_list: + if "--content-size" in nvinfo.filename: + self.assertEqual(nvinfo.uncompressed, to_human(nvinfo.exp_unc_size), nvinfo.line) + + +class VerboseFileInfo(object): + def __init__(self, lines): + # Parse lines + self.frame_list = [] + self.file_frame_map = [] + for i, line in enumerate(lines): + if i == 0: + self.filename = line + continue + elif i == 1: + # Skip header + continue + frame_info = dict(zip(["frame", "type", "block", "checksum", "compressed", "uncompressed", "ratio"], line.split())) + frame_info["line"] = line + self.frame_list.append(frame_info) + + +class TestVerbose(unittest.TestCase): + @classmethod + def setUpClass(self): + # Even do we're listing 2 files to test multiline working as expected. + # we're only really interested in testing the output of the concat-all file. + self.vinfo_list = [] + start = end = 0 + output = execute("{} --list -m -v {}/test_list_concat-all.lz4 {}/test_list_*M-lz4f-2f--content-size.lz4".format(LZ4, TEMP, TEMP), print_output=True) + for i, line in enumerate(output): + if line.startswith("test_list"): + if start != 0 and end != 0: + self.vinfo_list.append(VerboseFileInfo(output[start:end])) + start = i + if not line: + end = i + self.vinfo_list.append(VerboseFileInfo(output[start:end])) + # Populate file_frame_map as a reference of the expected info + concat_file_list = glob.glob("/tmp/test_list_[!concat]*.lz4") + # One of the files has 2 frames so duplicate it in this list to map each frame 1 to a single file + for i, filename in enumerate(concat_file_list): + if "2f--content-size" in filename: + concat_file_list.insert(i, filename) + break + self.cvinfo = self.vinfo_list[0] + self.cvinfo.file_frame_map = concat_file_list + self.cvinfo.compressed_size = os.path.getsize("{}/test_list_concat-all.lz4".format(TEMP)) + + def test_filename(self): + for i, vinfo in enumerate(self.vinfo_list): + self.assertRegex(vinfo.filename, "^test_list_.*({}/{})".format(i + 1, len(self.vinfo_list))) + + def test_frame_number(self): + for vinfo in self.vinfo_list: + for i, frame_info in enumerate(vinfo.frame_list): + self.assertEqual(frame_info["frame"], str(i + 1), frame_info["line"]) + + def test_frame_type(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "-lz4f-" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["type"], "LZ4Frame", self.cvinfo.frame_list[i]["line"]) + elif "-legc-" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["type"], "LegacyFrame", self.cvinfo.frame_list[i]["line"]) + elif "-skip-" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["type"], "SkippableFrame", self.cvinfo.frame_list[i]["line"]) + + def test_block(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "--BD" in self.cvinfo.file_frame_map[i]: + self.assertRegex(self.cvinfo.frame_list[i]["block"], "^B[0-9]+D$", self.cvinfo.frame_list[i]["line"]) + elif "--BI" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["block"], "^B[0-9]+I$", self.cvinfo.frame_list[i]["line"]) + + def test_checksum(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "-lz4f-" in self.cvinfo.file_frame_map[i] and "--no-frame-crc" not in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["checksum"], "XXH32", self.cvinfo.frame_list[i]["line"]) + + def test_compressed(self): + total = 0 + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "-2f-" not in self.cvinfo.file_frame_map[i]: + expected_size = os.path.getsize(self.cvinfo.file_frame_map[i]) + self.assertEqual(self.cvinfo.frame_list[i]["compressed"], str(expected_size), self.cvinfo.frame_list[i]["line"]) + total += int(self.cvinfo.frame_list[i]["compressed"]) + self.assertEqual(total, self.cvinfo.compressed_size, "Expected total sum ({}) to match {} filesize".format(total, self.cvinfo.filename)) + + def test_uncompressed(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + ffm = self.cvinfo.file_frame_map[i] + if "-2f-" not in ffm and "--content-size" in ffm: + expected_size_unc = int(ffm[ffm.rindex("_") + 1:ffm.index("M")]) * 1048576 + self.assertEqual(self.cvinfo.frame_list[i]["uncompressed"], str(expected_size_unc), self.cvinfo.frame_list[i]["line"]) + + def test_ratio(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "--content-size" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]['ratio'], + "{:.2f}%".format(float(self.cvinfo.frame_list[i]['compressed']) / float(self.cvinfo.frame_list[i]['uncompressed']) * 100), + self.cvinfo.frame_list[i]["line"]) + + +def to_human(size): + for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']: + if size < 1024.0: + break + size /= 1024.0 + return "{:.2f}{}".format(size, unit) + + +def log(text): + print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text) + + +def errout(text, err=1): + log(text) + exit(err) + + +def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True): + if os.environ.get('QEMU_SYS'): + command = "{} {}".format(os.environ['QEMU_SYS'], command) + if print_command: + log("> " + command) + popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell) + stdout_lines, stderr_lines = popen.communicate() + stderr_lines = stderr_lines.decode("utf-8") + stdout_lines = stdout_lines.decode("utf-8") + if print_output: + if stdout_lines: + print(stdout_lines) + if stderr_lines: + print(stderr_lines) + if popen.returncode is not None and popen.returncode != 0: + if stderr_lines and not print_output and print_error: + print(stderr_lines) + errout("Failed to run: {}\n".format(command, stdout_lines + stderr_lines)) + return (stdout_lines + stderr_lines).splitlines() + + +def cleanup(silent=False): + for f in glob.glob("{}/test_list*".format(TEMP)): + if not silent: + log("Deleting {}".format(f)) + os.unlink(f) + + +def datagen(file_name, size): + non_sparse_size = size // 2 + sparse_size = size - non_sparse_size + with open(file_name, "wb") as f: + f.seek(sparse_size) + f.write(os.urandom(non_sparse_size)) + + +def generate_files(): + # file format ~ test_list<frametype>-<no_frames>f<create-args>.lz4 ~ + # Generate LZ4Frames + for i in SIZES: + filename = "{}/test_list_{}M".format(TEMP, i) + log("Generating {}".format(filename)) + datagen(filename, i * MIB) + for j in ["--content-size", "-BI", "-BD", "-BX", "--no-frame-crc"]: + lz4file = "{}-lz4f-1f{}.lz4".format(filename, j) + execute("{} {} {} {}".format(LZ4, j, filename, lz4file)) + # Generate skippable frames + lz4file = "{}-skip-1f.lz4".format(filename) + skipsize = i * 1024 + skipbytes = bytes([80, 42, 77, 24]) + skipsize.to_bytes(4, byteorder='little', signed=False) + with open(lz4file, 'wb') as f: + f.write(skipbytes) + f.write(os.urandom(skipsize)) + # Generate legacy frames + lz4file = "{}-legc-1f.lz4".format(filename) + execute("{} -l {} {}".format(LZ4, filename, lz4file)) + + # Concatenate --content-size files + file_list = glob.glob("{}/test_list_*-lz4f-1f--content-size.lz4".format(TEMP)) + with open("{}/test_list_{}M-lz4f-2f--content-size.lz4".format(TEMP, sum(SIZES)), 'ab') as outfile: + for fname in file_list: + with open(fname, 'rb') as infile: + outfile.write(infile.read()) + + # Concatenate all files + file_list = glob.glob("{}/test_list_*.lz4".format(TEMP)) + with open("{}/test_list_concat-all.lz4".format(TEMP), 'ab') as outfile: + for fname in file_list: + with open(fname, 'rb') as infile: + outfile.write(infile.read()) + + +if __name__ == '__main__': + cleanup() + generate_files() + unittest.main(verbosity=2, exit=False) + cleanup(silent=True) |