diff options
author | gstedman <gabriel.stedman@zynstra.com> | 2019-05-10 15:54:05 (GMT) |
---|---|---|
committer | gabrielstedman <gabriel.stedman@zynstra.com> | 2019-05-15 20:13:19 (GMT) |
commit | 98a86c8ef6ea3202f8cb52772144d0f744bd5c73 (patch) | |
tree | ddeb450b694f4aac94701708e88cfdbf34ae741e /programs | |
parent | 02914300185515097cdbebcd95c379508b5d3053 (diff) | |
download | lz4-98a86c8ef6ea3202f8cb52772144d0f744bd5c73.zip lz4-98a86c8ef6ea3202f8cb52772144d0f744bd5c73.tar.gz lz4-98a86c8ef6ea3202f8cb52772144d0f744bd5c73.tar.bz2 |
Add multiframe report to --list command
Diffstat (limited to 'programs')
-rw-r--r-- | programs/lz4io.c | 377 | ||||
-rw-r--r-- | programs/lz4io.h | 2 |
2 files changed, 287 insertions, 92 deletions
diff --git a/programs/lz4io.c b/programs/lz4io.c index 960c451..c27a0ad 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -1278,82 +1278,118 @@ int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs, /* ********************** LZ4 --list command *********************** */ /* ********************************************************************* */ +typedef enum +{ + lz4Frame = 0, + legacyFrame, + skippableFrame +} LZ4IO_frameType_t; + +typedef struct { + LZ4F_frameInfo_t lz4FrameInfo; + LZ4IO_frameType_t frameType; +} LZ4IO_frameInfo_t; + +#define LZ4IO_INIT_FRAMEINFO { LZ4F_INIT_FRAMEINFO, lz4Frame } + typedef struct { - LZ4F_frameInfo_t frameInfo; const char* fileName; unsigned long long fileSize; + unsigned long long frameCount; + LZ4IO_frameInfo_t frameSummary; + unsigned short eqFrameTypes; + unsigned short eqBlockTypes; + unsigned short allContentSize; } LZ4IO_cFileInfo_t; -#define LZ4IO_INIT_CFILEINFO { LZ4F_INIT_FRAMEINFO, NULL, 0ULL } - +#define LZ4IO_INIT_CFILEINFO { NULL, 0ULL, 0, LZ4IO_INIT_FRAMEINFO, 1, 1, 1 } typedef enum { LZ4IO_LZ4F_OK, LZ4IO_format_not_known, LZ4IO_not_a_file } LZ4IO_infoResult; -/* This function is limited, - * it only works fine for a file consisting of a single valid frame using LZ4 Frame specification. - * It will not look at content beyond first frame header. - * It's also unable to parse legacy frames, nor skippable ones. - * - * Things to improve : - * - check the entire file for additional content after first frame - * + combine results from multiple frames, give total - * - Optional : - * + report nb of blocks, hence max. possible decompressed size (when not reported in header) - */ -static LZ4IO_infoResult -LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename) -{ - LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */ - - if (!UTIL_isRegFile(input_filename)) return LZ4IO_not_a_file; - cfinfo->fileSize = UTIL_getFileSize(input_filename); +static const char * LZ4IO_frameTypeNames[]={"LZ4Frame", "LegacyFrame", "SkippableFrame" }; - /* Get filename without path prefix */ - { const char* b = strrchr(input_filename, '/'); - if (!b) { - b = strrchr(input_filename, '\\'); - } - if (b && b != input_filename) { - b++; - } else { - b = input_filename; +/* Read block headers and skip block data + Return total blocks size for this frame including headers, + block checksums and content checksums. + returns 0 in case it can't succesfully skip block data. + Assumes SEEK_CUR after frame header. + */ +static unsigned long long LZ4IO_skipBlocksData(FILE* finput, + const LZ4F_blockChecksum_t blockChecksumFlag, + const LZ4F_contentChecksum_t contentChecksumFlag){ + unsigned char blockInfo[LZ4F_BLOCK_HEADER_SIZE]; + unsigned long long totalBlocksSize = 0; + for(;;){ + if (!fread(blockInfo, 1, LZ4F_BLOCK_HEADER_SIZE, finput)){ + if (feof(finput)) return totalBlocksSize; + return 0; + } + totalBlocksSize += LZ4F_BLOCK_HEADER_SIZE; + { + const unsigned long nextCBlockSize = LZ4IO_readLE32(&blockInfo) & 0x7FFFFFFFU; + const unsigned long nextBlock = nextCBlockSize + (blockChecksumFlag * LZ4F_BLOCK_CHECKSUM_SIZE); + if (nextCBlockSize == 0){ + /* Reached EndMark */ + if(contentChecksumFlag){ + /* Skip content checksum */ + if(fseek(finput, LZ4F_CONTENT_CHECKSUM_SIZE, SEEK_CUR) != 0){ + return 0; + } + totalBlocksSize += LZ4F_CONTENT_CHECKSUM_SIZE; } - cfinfo->fileName = b; + break; + } + totalBlocksSize += nextBlock; + /* skip to the next block */ + if (fseek(finput, nextBlock, SEEK_CUR) != 0){ + return 0; + } } + } + return totalBlocksSize; +} - /* Read file and extract header */ - { size_t const hSize = LZ4F_HEADER_SIZE_MAX; - size_t readSize=0; - - void* const buffer = malloc(hSize); - if (!buffer) EXM_THROW(21, "Allocation error : not enough memory"); - - { FILE* const finput = LZ4IO_openSrcFile(input_filename); - if (finput) { - readSize = fread(buffer, 1, hSize, finput); - fclose(finput); - } } - - if (readSize > 0) { - LZ4F_dctx* dctx; - if (!LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION))) { - if (!LZ4F_isError(LZ4F_getFrameInfo(dctx, &cfinfo->frameInfo, buffer, &readSize))) { - result = LZ4IO_LZ4F_OK; - } } - LZ4F_freeDecompressionContext(dctx); +/* For legacy frames only. + Read block headers and skip block data. + Return total blocks size for this frame including headers. + or 0 in case it can't succesfully skip block data. + This works as long as legacy block header size = magic number size. + Assumes SEEK_CUR after frame header. + */ +static unsigned long long LZ4IO_skipLegacyBlocksData(FILE* finput){ + unsigned char blockInfo[LZIO_LEGACY_BLOCK_HEADER_SIZE]; + unsigned long long totalBlocksSize = 0; + if(LZIO_LEGACY_BLOCK_HEADER_SIZE != MAGICNUMBER_SIZE){ + DISPLAYLEVEL(4, "Legacy block header size not equal to magic number size. Cannot skip blocks"); + return 0; + } + for(;;){ + if (!fread(blockInfo, 1, LZIO_LEGACY_BLOCK_HEADER_SIZE, finput)){ + if (feof(finput)) return totalBlocksSize; + return 0; + } + { const unsigned int nextCBlockSize = LZ4IO_readLE32(&blockInfo); + if( nextCBlockSize == LEGACY_MAGICNUMBER || + nextCBlockSize == LZ4IO_MAGICNUMBER || + LZ4IO_isSkippableMagicNumber(nextCBlockSize)){ + /* Rewind back. we want cursor at the begining of next frame.*/ + if (fseek(finput, -LZIO_LEGACY_BLOCK_HEADER_SIZE, SEEK_CUR) != 0){ + return 0; } - - /* clean */ - free(buffer); + break; + } + totalBlocksSize += LZIO_LEGACY_BLOCK_HEADER_SIZE + nextCBlockSize; + /* skip to the next block */ + if (fseek(finput, nextCBlockSize, SEEK_CUR) != 0){ + return 0; + } } - - return result; + } + return totalBlocksSize; } - /* buffer : must be a valid memory area of at least 4 bytes */ -const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer) -{ +const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer){ buffer[0] = 'B'; assert(sizeID >= 4); assert(sizeID <=7); buffer[1] = (char)(sizeID + '0'); @@ -1362,47 +1398,204 @@ const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer) return buffer; } +/* buffer : must be valid memory area of at least 10 bytes */ +static const char* LZ4IO_toHuman(long double size, char *buf){ + const char units[] = {"\0KMGTPEZY"}; + size_t i = 0; + for(;size>=1024;i++) size /= 1024; + sprintf(buf, "%.2Lf%c", size, units[i]); + return buf; +} -int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx) +/* Get filename without path prefix */ +static const char* LZ4IO_baseName(const char* input_filename) { + const char* b = strrchr(input_filename, '/'); + if (!b) b = strrchr(input_filename, '\\'); + return b ? b + 1 : b; +} + +/* Report frame/s information in verbose mode. + * Will populate file info with fileName and contentSize where applicable. + * - TODO : + * + report nb of blocks, hence max. possible decompressed size (when not reported in header) + */ +static LZ4IO_infoResult +LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename) { - int result = 0; - size_t idx; + LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */ + unsigned char buffer[LZ4F_HEADER_SIZE_MAX]; + FILE* const finput = LZ4IO_openSrcFile(input_filename); + cfinfo->fileSize = UTIL_getFileSize(input_filename); - DISPLAY("%5s %20s %20s %10s %7s %s\n", - "Block", "Compressed", "Uncompressed", "Ratio", "Check", "Filename"); - for (idx=0; idx<ifnIdx; idx++) { + while(!feof(finput)){ + { LZ4IO_frameInfo_t frameInfo = LZ4IO_INIT_FRAMEINFO; + unsigned magicNumber; + /* Get MagicNumber */ + size_t nbReadBytes = fread(buffer, 1, MAGICNUMBER_SIZE, finput); + if (nbReadBytes==0) { break; } /* EOF */ + result = LZ4IO_format_not_known; /* default result (error) */ + if (nbReadBytes != MAGICNUMBER_SIZE) + EXM_THROW(40, "Unrecognized header : Magic Number unreadable"); + magicNumber = LZ4IO_readLE32(buffer); /* Little Endian format */ + if (LZ4IO_isSkippableMagicNumber(magicNumber)) + magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */ + + switch(magicNumber) + { + case LZ4IO_MAGICNUMBER: + if(cfinfo->frameSummary.frameType != lz4Frame) cfinfo->eqFrameTypes = 0; + /* Get frame info */ + { const size_t readBytes = fread(buffer+MAGICNUMBER_SIZE, 1, LZ4F_HEADER_SIZE_MIN-MAGICNUMBER_SIZE, finput); + if (!readBytes || ferror(finput)) EXM_THROW(71, "Error reading %s", input_filename); } + { size_t hSize = LZ4F_headerSize(&buffer, LZ4F_HEADER_SIZE_MIN); + if(!LZ4F_isError(hSize)){ + if(hSize > (LZ4F_HEADER_SIZE_MIN + MAGICNUMBER_SIZE)){ + /* We've already read LZ4F_HEADER_SIZE_MIN so read any extra until hSize*/ + const size_t readBytes = fread(buffer+LZ4F_HEADER_SIZE_MIN, 1, hSize-LZ4F_HEADER_SIZE_MIN, finput); + if (!readBytes || ferror(finput)) EXM_THROW(72, "Error reading %s", input_filename); + } + /* Create decompression context */ + { LZ4F_dctx* dctx; + if (!LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION))) { + if (!LZ4F_isError(LZ4F_getFrameInfo(dctx, &frameInfo.lz4FrameInfo, buffer, &hSize))) { + if ((cfinfo->frameSummary.lz4FrameInfo.blockSizeID != frameInfo.lz4FrameInfo.blockSizeID || + cfinfo->frameSummary.lz4FrameInfo.blockMode != frameInfo.lz4FrameInfo.blockMode) + && cfinfo->frameCount !=0) + cfinfo->eqBlockTypes = 0; + { const unsigned long long totalBlocksSize = LZ4IO_skipBlocksData(finput, + frameInfo.lz4FrameInfo.blockChecksumFlag, + frameInfo.lz4FrameInfo.contentChecksumFlag); + if(totalBlocksSize){ + char bTypeBuffer[5]; + LZ4IO_blockTypeID(frameInfo.lz4FrameInfo.blockSizeID, frameInfo.lz4FrameInfo.blockMode, bTypeBuffer); + DISPLAYLEVEL(3, " %6llu %14s %5s %8s", + cfinfo->frameCount + 1, + LZ4IO_frameTypeNames[frameInfo.frameType], + bTypeBuffer, + frameInfo.lz4FrameInfo.contentChecksumFlag ? "XXH32" : "-"); + if(frameInfo.lz4FrameInfo.contentSize){ + { double const ratio = (double)(totalBlocksSize + hSize) / frameInfo.lz4FrameInfo.contentSize * 100; + DISPLAYLEVEL(3, " %20llu %20llu %9.2f%%\n", + totalBlocksSize + hSize, + frameInfo.lz4FrameInfo.contentSize, + ratio); } + /* Now we've consumed frameInfo we can use it to store the total contentSize */ + frameInfo.lz4FrameInfo.contentSize += cfinfo->frameSummary.lz4FrameInfo.contentSize; + } + else{ + DISPLAYLEVEL(3, " %20llu %20s %9s \n", totalBlocksSize + hSize, "-", "-"); + cfinfo->allContentSize = 0; + } + result = LZ4IO_LZ4F_OK; + } } + } + } } + } } + break; + case LEGACY_MAGICNUMBER: + frameInfo.frameType = legacyFrame; + if (cfinfo->frameSummary.frameType != legacyFrame && cfinfo->frameCount !=0) cfinfo->eqFrameTypes = 0; + cfinfo->eqBlockTypes = 0; + cfinfo->allContentSize = 0; + { const unsigned long long totalBlocksSize = LZ4IO_skipLegacyBlocksData(finput); + if (totalBlocksSize){ + DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20llu %20s %9s\n", + cfinfo->frameCount + 1, + LZ4IO_frameTypeNames[frameInfo.frameType], + "-", "-", + totalBlocksSize + 4, + "-", "-"); + result = LZ4IO_LZ4F_OK; + } } + break; + case LZ4IO_SKIPPABLE0: + frameInfo.frameType = skippableFrame; + if (cfinfo->frameSummary.frameType != skippableFrame && cfinfo->frameCount !=0) cfinfo->eqFrameTypes = 0; + cfinfo->eqBlockTypes = 0; + cfinfo->allContentSize = 0; + { nbReadBytes = fread(buffer, 1, 4, finput); + if (nbReadBytes != 4) + EXM_THROW(42, "Stream error : skippable size unreadable"); + } + { unsigned const size = LZ4IO_readLE32(buffer); + int const errorNb = fseek_u32(finput, size, SEEK_CUR); + if (errorNb != 0) + EXM_THROW(43, "Stream error : cannot skip skippable area"); + DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20u %20s %9s\n", + cfinfo->frameCount + 1, + "SkippableFrame", + "-", "-", size + 8, "-", "-"); + + result = LZ4IO_LZ4F_OK; + } + break; + default: + { long int const position = ftell(finput); /* only works for files < 2 GB */ + DISPLAYLEVEL(3, "Stream followed by undecodable data "); + if (position != -1L) + DISPLAYLEVEL(3, "at position %i ", (int)position); + DISPLAYLEVEL(3, "\n"); + } + break; + } + if(result != LZ4IO_LZ4F_OK){ + break; + } + cfinfo->frameSummary = frameInfo; } + cfinfo->frameCount++; + } + fclose(finput); + return result; +} + + +int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx) +{ + int result = 0; + size_t idx = 0; + if(g_displayLevel < 3){ + DISPLAY("%10s %14s %5s %11s %13s %9s %s\n", + "Frames", "Type", "Block", "Compressed", "Uncompressed", "Ratio", "Filename"); + } + for (; idx<ifnIdx; idx++) { /* Get file info */ LZ4IO_cFileInfo_t cfinfo = LZ4IO_INIT_CFILEINFO; - LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]); - if (op_result != LZ4IO_LZ4F_OK) { - if (op_result == LZ4IO_not_a_file) { - DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]); - } else { - assert(op_result == LZ4IO_format_not_known); - DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]); - } - result = 1; - continue; + cfinfo.fileName = LZ4IO_baseName(inFileNames[idx]); + if (!UTIL_isRegFile(inFileNames[idx])) { + DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]); + return 0; } - if (cfinfo.frameInfo.contentSize) { - char buffer[5]; - double const ratio = (double)cfinfo.fileSize / cfinfo.frameInfo.contentSize; - DISPLAY("%5s %20llu %20llu %8.4f %7s %s \n", - LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer), - cfinfo.fileSize, - cfinfo.frameInfo.contentSize, ratio, - cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-", - cfinfo.fileName); - } else { - char buffer[5]; - DISPLAY("%5s %20llu %20s %10s %7s %s \n", - LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer), - cfinfo.fileSize, - "-", "-", - cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-", - cfinfo.fileName); + DISPLAYLEVEL(3, "%s(%llu/%llu)\n", cfinfo.fileName, (unsigned long long)idx+1, (unsigned long long)ifnIdx); + DISPLAYLEVEL(3, " %6s %14s %5s %8s %20s %20s %9s\n", + "Frame", "Type", "Block", "Checksum", "Compressed", "Uncompressed", "Ratio") + { LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]); + if (op_result != LZ4IO_LZ4F_OK) { + assert(op_result == LZ4IO_format_not_known); + DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]); + return 0; + } } + DISPLAYLEVEL(3,"\n"); + if(g_displayLevel < 3){ + /* Display Summary */ + { char buffers[3][10]; + DISPLAY("%10llu %14s %5s %11s %13s ", + cfinfo.frameCount, + cfinfo.eqFrameTypes ? LZ4IO_frameTypeNames[cfinfo.frameSummary.frameType] : "-" , + cfinfo.eqBlockTypes ? LZ4IO_blockTypeID(cfinfo.frameSummary.lz4FrameInfo.blockSizeID, + cfinfo.frameSummary.lz4FrameInfo.blockMode, buffers[0]) : "-", + LZ4IO_toHuman((long double)cfinfo.fileSize, buffers[1]), + cfinfo.allContentSize ? LZ4IO_toHuman((long double)cfinfo.frameSummary.lz4FrameInfo.contentSize, buffers[2]) : "-"); + if (cfinfo.allContentSize) { + double const ratio = (double)cfinfo.fileSize / cfinfo.frameSummary.lz4FrameInfo.contentSize * 100; + DISPLAY("%9.2f%% %s \n", ratio, cfinfo.fileName); + } else { + DISPLAY("%9s %s\n", + "-", + cfinfo.fileName); + } } } } + return result; } diff --git a/programs/lz4io.h b/programs/lz4io.h index 213dc95..b189e35 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -57,6 +57,8 @@ typedef struct LZ4IO_prefs_s LZ4IO_prefs_t; LZ4IO_prefs_t* LZ4IO_defaultPreferences(void); void LZ4IO_freePreferences(LZ4IO_prefs_t* const prefs); +/* Size in bytes of a legacy block header in little-endian format */ +#define LZIO_LEGACY_BLOCK_HEADER_SIZE 4 /* ************************************************** */ /* ****************** Functions ********************* */ |