summaryrefslogtreecommitdiffstats
path: root/programs/lz4io.c
diff options
context:
space:
mode:
authorgstedman <gabriel.stedman@zynstra.com>2019-05-10 15:54:05 (GMT)
committergabrielstedman <gabriel.stedman@zynstra.com>2019-05-15 20:13:19 (GMT)
commit98a86c8ef6ea3202f8cb52772144d0f744bd5c73 (patch)
treeddeb450b694f4aac94701708e88cfdbf34ae741e /programs/lz4io.c
parent02914300185515097cdbebcd95c379508b5d3053 (diff)
downloadlz4-98a86c8ef6ea3202f8cb52772144d0f744bd5c73.zip
lz4-98a86c8ef6ea3202f8cb52772144d0f744bd5c73.tar.gz
lz4-98a86c8ef6ea3202f8cb52772144d0f744bd5c73.tar.bz2
Add multiframe report to --list command
Diffstat (limited to 'programs/lz4io.c')
-rw-r--r--programs/lz4io.c377
1 files changed, 285 insertions, 92 deletions
diff --git a/programs/lz4io.c b/programs/lz4io.c
index 960c451..c27a0ad 100644
--- a/programs/lz4io.c
+++ b/programs/lz4io.c
@@ -1278,82 +1278,118 @@ int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs,
/* ********************** LZ4 --list command *********************** */
/* ********************************************************************* */
+typedef enum
+{
+ lz4Frame = 0,
+ legacyFrame,
+ skippableFrame
+} LZ4IO_frameType_t;
+
+typedef struct {
+ LZ4F_frameInfo_t lz4FrameInfo;
+ LZ4IO_frameType_t frameType;
+} LZ4IO_frameInfo_t;
+
+#define LZ4IO_INIT_FRAMEINFO { LZ4F_INIT_FRAMEINFO, lz4Frame }
+
typedef struct {
- LZ4F_frameInfo_t frameInfo;
const char* fileName;
unsigned long long fileSize;
+ unsigned long long frameCount;
+ LZ4IO_frameInfo_t frameSummary;
+ unsigned short eqFrameTypes;
+ unsigned short eqBlockTypes;
+ unsigned short allContentSize;
} LZ4IO_cFileInfo_t;
-#define LZ4IO_INIT_CFILEINFO { LZ4F_INIT_FRAMEINFO, NULL, 0ULL }
-
+#define LZ4IO_INIT_CFILEINFO { NULL, 0ULL, 0, LZ4IO_INIT_FRAMEINFO, 1, 1, 1 }
typedef enum { LZ4IO_LZ4F_OK, LZ4IO_format_not_known, LZ4IO_not_a_file } LZ4IO_infoResult;
-/* This function is limited,
- * it only works fine for a file consisting of a single valid frame using LZ4 Frame specification.
- * It will not look at content beyond first frame header.
- * It's also unable to parse legacy frames, nor skippable ones.
- *
- * Things to improve :
- * - check the entire file for additional content after first frame
- * + combine results from multiple frames, give total
- * - Optional :
- * + report nb of blocks, hence max. possible decompressed size (when not reported in header)
- */
-static LZ4IO_infoResult
-LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename)
-{
- LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */
-
- if (!UTIL_isRegFile(input_filename)) return LZ4IO_not_a_file;
- cfinfo->fileSize = UTIL_getFileSize(input_filename);
+static const char * LZ4IO_frameTypeNames[]={"LZ4Frame", "LegacyFrame", "SkippableFrame" };
- /* Get filename without path prefix */
- { const char* b = strrchr(input_filename, '/');
- if (!b) {
- b = strrchr(input_filename, '\\');
- }
- if (b && b != input_filename) {
- b++;
- } else {
- b = input_filename;
+/* Read block headers and skip block data
+ Return total blocks size for this frame including headers,
+ block checksums and content checksums.
+ returns 0 in case it can't succesfully skip block data.
+ Assumes SEEK_CUR after frame header.
+ */
+static unsigned long long LZ4IO_skipBlocksData(FILE* finput,
+ const LZ4F_blockChecksum_t blockChecksumFlag,
+ const LZ4F_contentChecksum_t contentChecksumFlag){
+ unsigned char blockInfo[LZ4F_BLOCK_HEADER_SIZE];
+ unsigned long long totalBlocksSize = 0;
+ for(;;){
+ if (!fread(blockInfo, 1, LZ4F_BLOCK_HEADER_SIZE, finput)){
+ if (feof(finput)) return totalBlocksSize;
+ return 0;
+ }
+ totalBlocksSize += LZ4F_BLOCK_HEADER_SIZE;
+ {
+ const unsigned long nextCBlockSize = LZ4IO_readLE32(&blockInfo) & 0x7FFFFFFFU;
+ const unsigned long nextBlock = nextCBlockSize + (blockChecksumFlag * LZ4F_BLOCK_CHECKSUM_SIZE);
+ if (nextCBlockSize == 0){
+ /* Reached EndMark */
+ if(contentChecksumFlag){
+ /* Skip content checksum */
+ if(fseek(finput, LZ4F_CONTENT_CHECKSUM_SIZE, SEEK_CUR) != 0){
+ return 0;
+ }
+ totalBlocksSize += LZ4F_CONTENT_CHECKSUM_SIZE;
}
- cfinfo->fileName = b;
+ break;
+ }
+ totalBlocksSize += nextBlock;
+ /* skip to the next block */
+ if (fseek(finput, nextBlock, SEEK_CUR) != 0){
+ return 0;
+ }
}
+ }
+ return totalBlocksSize;
+}
- /* Read file and extract header */
- { size_t const hSize = LZ4F_HEADER_SIZE_MAX;
- size_t readSize=0;
-
- void* const buffer = malloc(hSize);
- if (!buffer) EXM_THROW(21, "Allocation error : not enough memory");
-
- { FILE* const finput = LZ4IO_openSrcFile(input_filename);
- if (finput) {
- readSize = fread(buffer, 1, hSize, finput);
- fclose(finput);
- } }
-
- if (readSize > 0) {
- LZ4F_dctx* dctx;
- if (!LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION))) {
- if (!LZ4F_isError(LZ4F_getFrameInfo(dctx, &cfinfo->frameInfo, buffer, &readSize))) {
- result = LZ4IO_LZ4F_OK;
- } }
- LZ4F_freeDecompressionContext(dctx);
+/* For legacy frames only.
+ Read block headers and skip block data.
+ Return total blocks size for this frame including headers.
+ or 0 in case it can't succesfully skip block data.
+ This works as long as legacy block header size = magic number size.
+ Assumes SEEK_CUR after frame header.
+ */
+static unsigned long long LZ4IO_skipLegacyBlocksData(FILE* finput){
+ unsigned char blockInfo[LZIO_LEGACY_BLOCK_HEADER_SIZE];
+ unsigned long long totalBlocksSize = 0;
+ if(LZIO_LEGACY_BLOCK_HEADER_SIZE != MAGICNUMBER_SIZE){
+ DISPLAYLEVEL(4, "Legacy block header size not equal to magic number size. Cannot skip blocks");
+ return 0;
+ }
+ for(;;){
+ if (!fread(blockInfo, 1, LZIO_LEGACY_BLOCK_HEADER_SIZE, finput)){
+ if (feof(finput)) return totalBlocksSize;
+ return 0;
+ }
+ { const unsigned int nextCBlockSize = LZ4IO_readLE32(&blockInfo);
+ if( nextCBlockSize == LEGACY_MAGICNUMBER ||
+ nextCBlockSize == LZ4IO_MAGICNUMBER ||
+ LZ4IO_isSkippableMagicNumber(nextCBlockSize)){
+ /* Rewind back. we want cursor at the begining of next frame.*/
+ if (fseek(finput, -LZIO_LEGACY_BLOCK_HEADER_SIZE, SEEK_CUR) != 0){
+ return 0;
}
-
- /* clean */
- free(buffer);
+ break;
+ }
+ totalBlocksSize += LZIO_LEGACY_BLOCK_HEADER_SIZE + nextCBlockSize;
+ /* skip to the next block */
+ if (fseek(finput, nextCBlockSize, SEEK_CUR) != 0){
+ return 0;
+ }
}
-
- return result;
+ }
+ return totalBlocksSize;
}
-
/* buffer : must be a valid memory area of at least 4 bytes */
-const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer)
-{
+const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer){
buffer[0] = 'B';
assert(sizeID >= 4); assert(sizeID <=7);
buffer[1] = (char)(sizeID + '0');
@@ -1362,47 +1398,204 @@ const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer)
return buffer;
}
+/* buffer : must be valid memory area of at least 10 bytes */
+static const char* LZ4IO_toHuman(long double size, char *buf){
+ const char units[] = {"\0KMGTPEZY"};
+ size_t i = 0;
+ for(;size>=1024;i++) size /= 1024;
+ sprintf(buf, "%.2Lf%c", size, units[i]);
+ return buf;
+}
-int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx)
+/* Get filename without path prefix */
+static const char* LZ4IO_baseName(const char* input_filename) {
+ const char* b = strrchr(input_filename, '/');
+ if (!b) b = strrchr(input_filename, '\\');
+ return b ? b + 1 : b;
+}
+
+/* Report frame/s information in verbose mode.
+ * Will populate file info with fileName and contentSize where applicable.
+ * - TODO :
+ * + report nb of blocks, hence max. possible decompressed size (when not reported in header)
+ */
+static LZ4IO_infoResult
+LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename)
{
- int result = 0;
- size_t idx;
+ LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */
+ unsigned char buffer[LZ4F_HEADER_SIZE_MAX];
+ FILE* const finput = LZ4IO_openSrcFile(input_filename);
+ cfinfo->fileSize = UTIL_getFileSize(input_filename);
- DISPLAY("%5s %20s %20s %10s %7s %s\n",
- "Block", "Compressed", "Uncompressed", "Ratio", "Check", "Filename");
- for (idx=0; idx<ifnIdx; idx++) {
+ while(!feof(finput)){
+ { LZ4IO_frameInfo_t frameInfo = LZ4IO_INIT_FRAMEINFO;
+ unsigned magicNumber;
+ /* Get MagicNumber */
+ size_t nbReadBytes = fread(buffer, 1, MAGICNUMBER_SIZE, finput);
+ if (nbReadBytes==0) { break; } /* EOF */
+ result = LZ4IO_format_not_known; /* default result (error) */
+ if (nbReadBytes != MAGICNUMBER_SIZE)
+ EXM_THROW(40, "Unrecognized header : Magic Number unreadable");
+ magicNumber = LZ4IO_readLE32(buffer); /* Little Endian format */
+ if (LZ4IO_isSkippableMagicNumber(magicNumber))
+ magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */
+
+ switch(magicNumber)
+ {
+ case LZ4IO_MAGICNUMBER:
+ if(cfinfo->frameSummary.frameType != lz4Frame) cfinfo->eqFrameTypes = 0;
+ /* Get frame info */
+ { const size_t readBytes = fread(buffer+MAGICNUMBER_SIZE, 1, LZ4F_HEADER_SIZE_MIN-MAGICNUMBER_SIZE, finput);
+ if (!readBytes || ferror(finput)) EXM_THROW(71, "Error reading %s", input_filename); }
+ { size_t hSize = LZ4F_headerSize(&buffer, LZ4F_HEADER_SIZE_MIN);
+ if(!LZ4F_isError(hSize)){
+ if(hSize > (LZ4F_HEADER_SIZE_MIN + MAGICNUMBER_SIZE)){
+ /* We've already read LZ4F_HEADER_SIZE_MIN so read any extra until hSize*/
+ const size_t readBytes = fread(buffer+LZ4F_HEADER_SIZE_MIN, 1, hSize-LZ4F_HEADER_SIZE_MIN, finput);
+ if (!readBytes || ferror(finput)) EXM_THROW(72, "Error reading %s", input_filename);
+ }
+ /* Create decompression context */
+ { LZ4F_dctx* dctx;
+ if (!LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION))) {
+ if (!LZ4F_isError(LZ4F_getFrameInfo(dctx, &frameInfo.lz4FrameInfo, buffer, &hSize))) {
+ if ((cfinfo->frameSummary.lz4FrameInfo.blockSizeID != frameInfo.lz4FrameInfo.blockSizeID ||
+ cfinfo->frameSummary.lz4FrameInfo.blockMode != frameInfo.lz4FrameInfo.blockMode)
+ && cfinfo->frameCount !=0)
+ cfinfo->eqBlockTypes = 0;
+ { const unsigned long long totalBlocksSize = LZ4IO_skipBlocksData(finput,
+ frameInfo.lz4FrameInfo.blockChecksumFlag,
+ frameInfo.lz4FrameInfo.contentChecksumFlag);
+ if(totalBlocksSize){
+ char bTypeBuffer[5];
+ LZ4IO_blockTypeID(frameInfo.lz4FrameInfo.blockSizeID, frameInfo.lz4FrameInfo.blockMode, bTypeBuffer);
+ DISPLAYLEVEL(3, " %6llu %14s %5s %8s",
+ cfinfo->frameCount + 1,
+ LZ4IO_frameTypeNames[frameInfo.frameType],
+ bTypeBuffer,
+ frameInfo.lz4FrameInfo.contentChecksumFlag ? "XXH32" : "-");
+ if(frameInfo.lz4FrameInfo.contentSize){
+ { double const ratio = (double)(totalBlocksSize + hSize) / frameInfo.lz4FrameInfo.contentSize * 100;
+ DISPLAYLEVEL(3, " %20llu %20llu %9.2f%%\n",
+ totalBlocksSize + hSize,
+ frameInfo.lz4FrameInfo.contentSize,
+ ratio); }
+ /* Now we've consumed frameInfo we can use it to store the total contentSize */
+ frameInfo.lz4FrameInfo.contentSize += cfinfo->frameSummary.lz4FrameInfo.contentSize;
+ }
+ else{
+ DISPLAYLEVEL(3, " %20llu %20s %9s \n", totalBlocksSize + hSize, "-", "-");
+ cfinfo->allContentSize = 0;
+ }
+ result = LZ4IO_LZ4F_OK;
+ } }
+ }
+ } }
+ } }
+ break;
+ case LEGACY_MAGICNUMBER:
+ frameInfo.frameType = legacyFrame;
+ if (cfinfo->frameSummary.frameType != legacyFrame && cfinfo->frameCount !=0) cfinfo->eqFrameTypes = 0;
+ cfinfo->eqBlockTypes = 0;
+ cfinfo->allContentSize = 0;
+ { const unsigned long long totalBlocksSize = LZ4IO_skipLegacyBlocksData(finput);
+ if (totalBlocksSize){
+ DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20llu %20s %9s\n",
+ cfinfo->frameCount + 1,
+ LZ4IO_frameTypeNames[frameInfo.frameType],
+ "-", "-",
+ totalBlocksSize + 4,
+ "-", "-");
+ result = LZ4IO_LZ4F_OK;
+ } }
+ break;
+ case LZ4IO_SKIPPABLE0:
+ frameInfo.frameType = skippableFrame;
+ if (cfinfo->frameSummary.frameType != skippableFrame && cfinfo->frameCount !=0) cfinfo->eqFrameTypes = 0;
+ cfinfo->eqBlockTypes = 0;
+ cfinfo->allContentSize = 0;
+ { nbReadBytes = fread(buffer, 1, 4, finput);
+ if (nbReadBytes != 4)
+ EXM_THROW(42, "Stream error : skippable size unreadable");
+ }
+ { unsigned const size = LZ4IO_readLE32(buffer);
+ int const errorNb = fseek_u32(finput, size, SEEK_CUR);
+ if (errorNb != 0)
+ EXM_THROW(43, "Stream error : cannot skip skippable area");
+ DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20u %20s %9s\n",
+ cfinfo->frameCount + 1,
+ "SkippableFrame",
+ "-", "-", size + 8, "-", "-");
+
+ result = LZ4IO_LZ4F_OK;
+ }
+ break;
+ default:
+ { long int const position = ftell(finput); /* only works for files < 2 GB */
+ DISPLAYLEVEL(3, "Stream followed by undecodable data ");
+ if (position != -1L)
+ DISPLAYLEVEL(3, "at position %i ", (int)position);
+ DISPLAYLEVEL(3, "\n");
+ }
+ break;
+ }
+ if(result != LZ4IO_LZ4F_OK){
+ break;
+ }
+ cfinfo->frameSummary = frameInfo; }
+ cfinfo->frameCount++;
+ }
+ fclose(finput);
+ return result;
+}
+
+
+int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx)
+{
+ int result = 0;
+ size_t idx = 0;
+ if(g_displayLevel < 3){
+ DISPLAY("%10s %14s %5s %11s %13s %9s %s\n",
+ "Frames", "Type", "Block", "Compressed", "Uncompressed", "Ratio", "Filename");
+ }
+ for (; idx<ifnIdx; idx++) {
/* Get file info */
LZ4IO_cFileInfo_t cfinfo = LZ4IO_INIT_CFILEINFO;
- LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]);
- if (op_result != LZ4IO_LZ4F_OK) {
- if (op_result == LZ4IO_not_a_file) {
- DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]);
- } else {
- assert(op_result == LZ4IO_format_not_known);
- DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]);
- }
- result = 1;
- continue;
+ cfinfo.fileName = LZ4IO_baseName(inFileNames[idx]);
+ if (!UTIL_isRegFile(inFileNames[idx])) {
+ DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]);
+ return 0;
}
- if (cfinfo.frameInfo.contentSize) {
- char buffer[5];
- double const ratio = (double)cfinfo.fileSize / cfinfo.frameInfo.contentSize;
- DISPLAY("%5s %20llu %20llu %8.4f %7s %s \n",
- LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer),
- cfinfo.fileSize,
- cfinfo.frameInfo.contentSize, ratio,
- cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-",
- cfinfo.fileName);
- } else {
- char buffer[5];
- DISPLAY("%5s %20llu %20s %10s %7s %s \n",
- LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer),
- cfinfo.fileSize,
- "-", "-",
- cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-",
- cfinfo.fileName);
+ DISPLAYLEVEL(3, "%s(%llu/%llu)\n", cfinfo.fileName, (unsigned long long)idx+1, (unsigned long long)ifnIdx);
+ DISPLAYLEVEL(3, " %6s %14s %5s %8s %20s %20s %9s\n",
+ "Frame", "Type", "Block", "Checksum", "Compressed", "Uncompressed", "Ratio")
+ { LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]);
+ if (op_result != LZ4IO_LZ4F_OK) {
+ assert(op_result == LZ4IO_format_not_known);
+ DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]);
+ return 0;
+ } }
+ DISPLAYLEVEL(3,"\n");
+ if(g_displayLevel < 3){
+ /* Display Summary */
+ { char buffers[3][10];
+ DISPLAY("%10llu %14s %5s %11s %13s ",
+ cfinfo.frameCount,
+ cfinfo.eqFrameTypes ? LZ4IO_frameTypeNames[cfinfo.frameSummary.frameType] : "-" ,
+ cfinfo.eqBlockTypes ? LZ4IO_blockTypeID(cfinfo.frameSummary.lz4FrameInfo.blockSizeID,
+ cfinfo.frameSummary.lz4FrameInfo.blockMode, buffers[0]) : "-",
+ LZ4IO_toHuman((long double)cfinfo.fileSize, buffers[1]),
+ cfinfo.allContentSize ? LZ4IO_toHuman((long double)cfinfo.frameSummary.lz4FrameInfo.contentSize, buffers[2]) : "-");
+ if (cfinfo.allContentSize) {
+ double const ratio = (double)cfinfo.fileSize / cfinfo.frameSummary.lz4FrameInfo.contentSize * 100;
+ DISPLAY("%9.2f%% %s \n", ratio, cfinfo.fileName);
+ } else {
+ DISPLAY("%9s %s\n",
+ "-",
+ cfinfo.fileName);
+ } }
}
}
+
return result;
}