summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/lz4frame.c4
-rw-r--r--lib/lz4frame.h9
-rw-r--r--programs/lz4io.c377
-rw-r--r--programs/lz4io.h2
-rw-r--r--tests/Makefile7
-rw-r--r--tests/test-lz4-list.py282
6 files changed, 585 insertions, 96 deletions
diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index f131d9a..95b8b8e 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -213,8 +213,8 @@ static void LZ4F_writeLE64 (void* dst, U64 value64)
static const size_t minFHSize = LZ4F_HEADER_SIZE_MIN; /* 7 */
static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX; /* 19 */
-static const size_t BHSize = 4; /* block header : size, and compress flag */
-static const size_t BFSize = 4; /* block footer : checksum (optional) */
+static const size_t BHSize = LZ4F_BLOCK_HEADER_SIZE; /* block header : size, and compress flag */
+static const size_t BFSize = LZ4F_BLOCK_CHECKSUM_SIZE; /* block footer : checksum (optional) */
/*-************************************
diff --git a/lib/lz4frame.h b/lib/lz4frame.h
index 742c252..391e484 100644
--- a/lib/lz4frame.h
+++ b/lib/lz4frame.h
@@ -253,6 +253,15 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
#define LZ4F_HEADER_SIZE_MIN 7 /* LZ4 Frame header size can vary, depending on selected paramaters */
#define LZ4F_HEADER_SIZE_MAX 19
+/* Size in bytes of a block header in little-endian format. Highest bit indicates if block data is uncompressed */
+#define LZ4F_BLOCK_HEADER_SIZE 4
+
+/* Size in bytes of a block checksum footer in little-endian format. */
+#define LZ4F_BLOCK_CHECKSUM_SIZE 4
+
+/* Size in bytes of the content checksum. */
+#define LZ4F_CONTENT_CHECKSUM_SIZE 4
+
/*! LZ4F_compressBegin() :
* will write the frame header into dstBuffer.
* dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
diff --git a/programs/lz4io.c b/programs/lz4io.c
index 960c451..c27a0ad 100644
--- a/programs/lz4io.c
+++ b/programs/lz4io.c
@@ -1278,82 +1278,118 @@ int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs,
/* ********************** LZ4 --list command *********************** */
/* ********************************************************************* */
+typedef enum
+{
+ lz4Frame = 0,
+ legacyFrame,
+ skippableFrame
+} LZ4IO_frameType_t;
+
+typedef struct {
+ LZ4F_frameInfo_t lz4FrameInfo;
+ LZ4IO_frameType_t frameType;
+} LZ4IO_frameInfo_t;
+
+#define LZ4IO_INIT_FRAMEINFO { LZ4F_INIT_FRAMEINFO, lz4Frame }
+
typedef struct {
- LZ4F_frameInfo_t frameInfo;
const char* fileName;
unsigned long long fileSize;
+ unsigned long long frameCount;
+ LZ4IO_frameInfo_t frameSummary;
+ unsigned short eqFrameTypes;
+ unsigned short eqBlockTypes;
+ unsigned short allContentSize;
} LZ4IO_cFileInfo_t;
-#define LZ4IO_INIT_CFILEINFO { LZ4F_INIT_FRAMEINFO, NULL, 0ULL }
-
+#define LZ4IO_INIT_CFILEINFO { NULL, 0ULL, 0, LZ4IO_INIT_FRAMEINFO, 1, 1, 1 }
typedef enum { LZ4IO_LZ4F_OK, LZ4IO_format_not_known, LZ4IO_not_a_file } LZ4IO_infoResult;
-/* This function is limited,
- * it only works fine for a file consisting of a single valid frame using LZ4 Frame specification.
- * It will not look at content beyond first frame header.
- * It's also unable to parse legacy frames, nor skippable ones.
- *
- * Things to improve :
- * - check the entire file for additional content after first frame
- * + combine results from multiple frames, give total
- * - Optional :
- * + report nb of blocks, hence max. possible decompressed size (when not reported in header)
- */
-static LZ4IO_infoResult
-LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename)
-{
- LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */
-
- if (!UTIL_isRegFile(input_filename)) return LZ4IO_not_a_file;
- cfinfo->fileSize = UTIL_getFileSize(input_filename);
+static const char * LZ4IO_frameTypeNames[]={"LZ4Frame", "LegacyFrame", "SkippableFrame" };
- /* Get filename without path prefix */
- { const char* b = strrchr(input_filename, '/');
- if (!b) {
- b = strrchr(input_filename, '\\');
- }
- if (b && b != input_filename) {
- b++;
- } else {
- b = input_filename;
+/* Read block headers and skip block data
+ Return total blocks size for this frame including headers,
+ block checksums and content checksums.
+ returns 0 in case it can't succesfully skip block data.
+ Assumes SEEK_CUR after frame header.
+ */
+static unsigned long long LZ4IO_skipBlocksData(FILE* finput,
+ const LZ4F_blockChecksum_t blockChecksumFlag,
+ const LZ4F_contentChecksum_t contentChecksumFlag){
+ unsigned char blockInfo[LZ4F_BLOCK_HEADER_SIZE];
+ unsigned long long totalBlocksSize = 0;
+ for(;;){
+ if (!fread(blockInfo, 1, LZ4F_BLOCK_HEADER_SIZE, finput)){
+ if (feof(finput)) return totalBlocksSize;
+ return 0;
+ }
+ totalBlocksSize += LZ4F_BLOCK_HEADER_SIZE;
+ {
+ const unsigned long nextCBlockSize = LZ4IO_readLE32(&blockInfo) & 0x7FFFFFFFU;
+ const unsigned long nextBlock = nextCBlockSize + (blockChecksumFlag * LZ4F_BLOCK_CHECKSUM_SIZE);
+ if (nextCBlockSize == 0){
+ /* Reached EndMark */
+ if(contentChecksumFlag){
+ /* Skip content checksum */
+ if(fseek(finput, LZ4F_CONTENT_CHECKSUM_SIZE, SEEK_CUR) != 0){
+ return 0;
+ }
+ totalBlocksSize += LZ4F_CONTENT_CHECKSUM_SIZE;
}
- cfinfo->fileName = b;
+ break;
+ }
+ totalBlocksSize += nextBlock;
+ /* skip to the next block */
+ if (fseek(finput, nextBlock, SEEK_CUR) != 0){
+ return 0;
+ }
}
+ }
+ return totalBlocksSize;
+}
- /* Read file and extract header */
- { size_t const hSize = LZ4F_HEADER_SIZE_MAX;
- size_t readSize=0;
-
- void* const buffer = malloc(hSize);
- if (!buffer) EXM_THROW(21, "Allocation error : not enough memory");
-
- { FILE* const finput = LZ4IO_openSrcFile(input_filename);
- if (finput) {
- readSize = fread(buffer, 1, hSize, finput);
- fclose(finput);
- } }
-
- if (readSize > 0) {
- LZ4F_dctx* dctx;
- if (!LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION))) {
- if (!LZ4F_isError(LZ4F_getFrameInfo(dctx, &cfinfo->frameInfo, buffer, &readSize))) {
- result = LZ4IO_LZ4F_OK;
- } }
- LZ4F_freeDecompressionContext(dctx);
+/* For legacy frames only.
+ Read block headers and skip block data.
+ Return total blocks size for this frame including headers.
+ or 0 in case it can't succesfully skip block data.
+ This works as long as legacy block header size = magic number size.
+ Assumes SEEK_CUR after frame header.
+ */
+static unsigned long long LZ4IO_skipLegacyBlocksData(FILE* finput){
+ unsigned char blockInfo[LZIO_LEGACY_BLOCK_HEADER_SIZE];
+ unsigned long long totalBlocksSize = 0;
+ if(LZIO_LEGACY_BLOCK_HEADER_SIZE != MAGICNUMBER_SIZE){
+ DISPLAYLEVEL(4, "Legacy block header size not equal to magic number size. Cannot skip blocks");
+ return 0;
+ }
+ for(;;){
+ if (!fread(blockInfo, 1, LZIO_LEGACY_BLOCK_HEADER_SIZE, finput)){
+ if (feof(finput)) return totalBlocksSize;
+ return 0;
+ }
+ { const unsigned int nextCBlockSize = LZ4IO_readLE32(&blockInfo);
+ if( nextCBlockSize == LEGACY_MAGICNUMBER ||
+ nextCBlockSize == LZ4IO_MAGICNUMBER ||
+ LZ4IO_isSkippableMagicNumber(nextCBlockSize)){
+ /* Rewind back. we want cursor at the begining of next frame.*/
+ if (fseek(finput, -LZIO_LEGACY_BLOCK_HEADER_SIZE, SEEK_CUR) != 0){
+ return 0;
}
-
- /* clean */
- free(buffer);
+ break;
+ }
+ totalBlocksSize += LZIO_LEGACY_BLOCK_HEADER_SIZE + nextCBlockSize;
+ /* skip to the next block */
+ if (fseek(finput, nextCBlockSize, SEEK_CUR) != 0){
+ return 0;
+ }
}
-
- return result;
+ }
+ return totalBlocksSize;
}
-
/* buffer : must be a valid memory area of at least 4 bytes */
-const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer)
-{
+const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer){
buffer[0] = 'B';
assert(sizeID >= 4); assert(sizeID <=7);
buffer[1] = (char)(sizeID + '0');
@@ -1362,47 +1398,204 @@ const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer)
return buffer;
}
+/* buffer : must be valid memory area of at least 10 bytes */
+static const char* LZ4IO_toHuman(long double size, char *buf){
+ const char units[] = {"\0KMGTPEZY"};
+ size_t i = 0;
+ for(;size>=1024;i++) size /= 1024;
+ sprintf(buf, "%.2Lf%c", size, units[i]);
+ return buf;
+}
-int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx)
+/* Get filename without path prefix */
+static const char* LZ4IO_baseName(const char* input_filename) {
+ const char* b = strrchr(input_filename, '/');
+ if (!b) b = strrchr(input_filename, '\\');
+ return b ? b + 1 : b;
+}
+
+/* Report frame/s information in verbose mode.
+ * Will populate file info with fileName and contentSize where applicable.
+ * - TODO :
+ * + report nb of blocks, hence max. possible decompressed size (when not reported in header)
+ */
+static LZ4IO_infoResult
+LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename)
{
- int result = 0;
- size_t idx;
+ LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */
+ unsigned char buffer[LZ4F_HEADER_SIZE_MAX];
+ FILE* const finput = LZ4IO_openSrcFile(input_filename);
+ cfinfo->fileSize = UTIL_getFileSize(input_filename);
- DISPLAY("%5s %20s %20s %10s %7s %s\n",
- "Block", "Compressed", "Uncompressed", "Ratio", "Check", "Filename");
- for (idx=0; idx<ifnIdx; idx++) {
+ while(!feof(finput)){
+ { LZ4IO_frameInfo_t frameInfo = LZ4IO_INIT_FRAMEINFO;
+ unsigned magicNumber;
+ /* Get MagicNumber */
+ size_t nbReadBytes = fread(buffer, 1, MAGICNUMBER_SIZE, finput);
+ if (nbReadBytes==0) { break; } /* EOF */
+ result = LZ4IO_format_not_known; /* default result (error) */
+ if (nbReadBytes != MAGICNUMBER_SIZE)
+ EXM_THROW(40, "Unrecognized header : Magic Number unreadable");
+ magicNumber = LZ4IO_readLE32(buffer); /* Little Endian format */
+ if (LZ4IO_isSkippableMagicNumber(magicNumber))
+ magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */
+
+ switch(magicNumber)
+ {
+ case LZ4IO_MAGICNUMBER:
+ if(cfinfo->frameSummary.frameType != lz4Frame) cfinfo->eqFrameTypes = 0;
+ /* Get frame info */
+ { const size_t readBytes = fread(buffer+MAGICNUMBER_SIZE, 1, LZ4F_HEADER_SIZE_MIN-MAGICNUMBER_SIZE, finput);
+ if (!readBytes || ferror(finput)) EXM_THROW(71, "Error reading %s", input_filename); }
+ { size_t hSize = LZ4F_headerSize(&buffer, LZ4F_HEADER_SIZE_MIN);
+ if(!LZ4F_isError(hSize)){
+ if(hSize > (LZ4F_HEADER_SIZE_MIN + MAGICNUMBER_SIZE)){
+ /* We've already read LZ4F_HEADER_SIZE_MIN so read any extra until hSize*/
+ const size_t readBytes = fread(buffer+LZ4F_HEADER_SIZE_MIN, 1, hSize-LZ4F_HEADER_SIZE_MIN, finput);
+ if (!readBytes || ferror(finput)) EXM_THROW(72, "Error reading %s", input_filename);
+ }
+ /* Create decompression context */
+ { LZ4F_dctx* dctx;
+ if (!LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION))) {
+ if (!LZ4F_isError(LZ4F_getFrameInfo(dctx, &frameInfo.lz4FrameInfo, buffer, &hSize))) {
+ if ((cfinfo->frameSummary.lz4FrameInfo.blockSizeID != frameInfo.lz4FrameInfo.blockSizeID ||
+ cfinfo->frameSummary.lz4FrameInfo.blockMode != frameInfo.lz4FrameInfo.blockMode)
+ && cfinfo->frameCount !=0)
+ cfinfo->eqBlockTypes = 0;
+ { const unsigned long long totalBlocksSize = LZ4IO_skipBlocksData(finput,
+ frameInfo.lz4FrameInfo.blockChecksumFlag,
+ frameInfo.lz4FrameInfo.contentChecksumFlag);
+ if(totalBlocksSize){
+ char bTypeBuffer[5];
+ LZ4IO_blockTypeID(frameInfo.lz4FrameInfo.blockSizeID, frameInfo.lz4FrameInfo.blockMode, bTypeBuffer);
+ DISPLAYLEVEL(3, " %6llu %14s %5s %8s",
+ cfinfo->frameCount + 1,
+ LZ4IO_frameTypeNames[frameInfo.frameType],
+ bTypeBuffer,
+ frameInfo.lz4FrameInfo.contentChecksumFlag ? "XXH32" : "-");
+ if(frameInfo.lz4FrameInfo.contentSize){
+ { double const ratio = (double)(totalBlocksSize + hSize) / frameInfo.lz4FrameInfo.contentSize * 100;
+ DISPLAYLEVEL(3, " %20llu %20llu %9.2f%%\n",
+ totalBlocksSize + hSize,
+ frameInfo.lz4FrameInfo.contentSize,
+ ratio); }
+ /* Now we've consumed frameInfo we can use it to store the total contentSize */
+ frameInfo.lz4FrameInfo.contentSize += cfinfo->frameSummary.lz4FrameInfo.contentSize;
+ }
+ else{
+ DISPLAYLEVEL(3, " %20llu %20s %9s \n", totalBlocksSize + hSize, "-", "-");
+ cfinfo->allContentSize = 0;
+ }
+ result = LZ4IO_LZ4F_OK;
+ } }
+ }
+ } }
+ } }
+ break;
+ case LEGACY_MAGICNUMBER:
+ frameInfo.frameType = legacyFrame;
+ if (cfinfo->frameSummary.frameType != legacyFrame && cfinfo->frameCount !=0) cfinfo->eqFrameTypes = 0;
+ cfinfo->eqBlockTypes = 0;
+ cfinfo->allContentSize = 0;
+ { const unsigned long long totalBlocksSize = LZ4IO_skipLegacyBlocksData(finput);
+ if (totalBlocksSize){
+ DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20llu %20s %9s\n",
+ cfinfo->frameCount + 1,
+ LZ4IO_frameTypeNames[frameInfo.frameType],
+ "-", "-",
+ totalBlocksSize + 4,
+ "-", "-");
+ result = LZ4IO_LZ4F_OK;
+ } }
+ break;
+ case LZ4IO_SKIPPABLE0:
+ frameInfo.frameType = skippableFrame;
+ if (cfinfo->frameSummary.frameType != skippableFrame && cfinfo->frameCount !=0) cfinfo->eqFrameTypes = 0;
+ cfinfo->eqBlockTypes = 0;
+ cfinfo->allContentSize = 0;
+ { nbReadBytes = fread(buffer, 1, 4, finput);
+ if (nbReadBytes != 4)
+ EXM_THROW(42, "Stream error : skippable size unreadable");
+ }
+ { unsigned const size = LZ4IO_readLE32(buffer);
+ int const errorNb = fseek_u32(finput, size, SEEK_CUR);
+ if (errorNb != 0)
+ EXM_THROW(43, "Stream error : cannot skip skippable area");
+ DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20u %20s %9s\n",
+ cfinfo->frameCount + 1,
+ "SkippableFrame",
+ "-", "-", size + 8, "-", "-");
+
+ result = LZ4IO_LZ4F_OK;
+ }
+ break;
+ default:
+ { long int const position = ftell(finput); /* only works for files < 2 GB */
+ DISPLAYLEVEL(3, "Stream followed by undecodable data ");
+ if (position != -1L)
+ DISPLAYLEVEL(3, "at position %i ", (int)position);
+ DISPLAYLEVEL(3, "\n");
+ }
+ break;
+ }
+ if(result != LZ4IO_LZ4F_OK){
+ break;
+ }
+ cfinfo->frameSummary = frameInfo; }
+ cfinfo->frameCount++;
+ }
+ fclose(finput);
+ return result;
+}
+
+
+int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx)
+{
+ int result = 0;
+ size_t idx = 0;
+ if(g_displayLevel < 3){
+ DISPLAY("%10s %14s %5s %11s %13s %9s %s\n",
+ "Frames", "Type", "Block", "Compressed", "Uncompressed", "Ratio", "Filename");
+ }
+ for (; idx<ifnIdx; idx++) {
/* Get file info */
LZ4IO_cFileInfo_t cfinfo = LZ4IO_INIT_CFILEINFO;
- LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]);
- if (op_result != LZ4IO_LZ4F_OK) {
- if (op_result == LZ4IO_not_a_file) {
- DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]);
- } else {
- assert(op_result == LZ4IO_format_not_known);
- DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]);
- }
- result = 1;
- continue;
+ cfinfo.fileName = LZ4IO_baseName(inFileNames[idx]);
+ if (!UTIL_isRegFile(inFileNames[idx])) {
+ DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]);
+ return 0;
}
- if (cfinfo.frameInfo.contentSize) {
- char buffer[5];
- double const ratio = (double)cfinfo.fileSize / cfinfo.frameInfo.contentSize;
- DISPLAY("%5s %20llu %20llu %8.4f %7s %s \n",
- LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer),
- cfinfo.fileSize,
- cfinfo.frameInfo.contentSize, ratio,
- cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-",
- cfinfo.fileName);
- } else {
- char buffer[5];
- DISPLAY("%5s %20llu %20s %10s %7s %s \n",
- LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer),
- cfinfo.fileSize,
- "-", "-",
- cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-",
- cfinfo.fileName);
+ DISPLAYLEVEL(3, "%s(%llu/%llu)\n", cfinfo.fileName, (unsigned long long)idx+1, (unsigned long long)ifnIdx);
+ DISPLAYLEVEL(3, " %6s %14s %5s %8s %20s %20s %9s\n",
+ "Frame", "Type", "Block", "Checksum", "Compressed", "Uncompressed", "Ratio")
+ { LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]);
+ if (op_result != LZ4IO_LZ4F_OK) {
+ assert(op_result == LZ4IO_format_not_known);
+ DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]);
+ return 0;
+ } }
+ DISPLAYLEVEL(3,"\n");
+ if(g_displayLevel < 3){
+ /* Display Summary */
+ { char buffers[3][10];
+ DISPLAY("%10llu %14s %5s %11s %13s ",
+ cfinfo.frameCount,
+ cfinfo.eqFrameTypes ? LZ4IO_frameTypeNames[cfinfo.frameSummary.frameType] : "-" ,
+ cfinfo.eqBlockTypes ? LZ4IO_blockTypeID(cfinfo.frameSummary.lz4FrameInfo.blockSizeID,
+ cfinfo.frameSummary.lz4FrameInfo.blockMode, buffers[0]) : "-",
+ LZ4IO_toHuman((long double)cfinfo.fileSize, buffers[1]),
+ cfinfo.allContentSize ? LZ4IO_toHuman((long double)cfinfo.frameSummary.lz4FrameInfo.contentSize, buffers[2]) : "-");
+ if (cfinfo.allContentSize) {
+ double const ratio = (double)cfinfo.fileSize / cfinfo.frameSummary.lz4FrameInfo.contentSize * 100;
+ DISPLAY("%9.2f%% %s \n", ratio, cfinfo.fileName);
+ } else {
+ DISPLAY("%9s %s\n",
+ "-",
+ cfinfo.fileName);
+ } }
}
}
+
return result;
}
diff --git a/programs/lz4io.h b/programs/lz4io.h
index 213dc95..b189e35 100644
--- a/programs/lz4io.h
+++ b/programs/lz4io.h
@@ -57,6 +57,8 @@ typedef struct LZ4IO_prefs_s LZ4IO_prefs_t;
LZ4IO_prefs_t* LZ4IO_defaultPreferences(void);
void LZ4IO_freePreferences(LZ4IO_prefs_t* const prefs);
+/* Size in bytes of a legacy block header in little-endian format */
+#define LZIO_LEGACY_BLOCK_HEADER_SIZE 4
/* ************************************************** */
/* ****************** Functions ********************* */
diff --git a/tests/Makefile b/tests/Makefile
index 8f0dfd3..3d3cfa2 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -55,7 +55,7 @@ NB_LOOPS ?= -i1
default: all
-all: fullbench fuzzer frametest roundTripTest datagen checkFrame
+all: fullbench fuzzer frametest roundTripTest datagen checkFrame listTest
all32: CFLAGS+=-m32
all32: all
@@ -122,10 +122,13 @@ clean:
versionsTest:
$(PYTHON) test-lz4-versions.py
+.PHONY: listTest
+listTest: lz4
+ QEMU_SYS=$(QEMU_SYS) $(PYTHON) test-lz4-list.py
+
checkTag: checkTag.c $(LZ4DIR)/lz4.h
$(CC) $(FLAGS) $< -o $@$(EXT)
-
#-----------------------------------------------------------------------------
# validated only for Linux, OSX, BSD, Hurd and Solaris targets
#-----------------------------------------------------------------------------
diff --git a/tests/test-lz4-list.py b/tests/test-lz4-list.py
new file mode 100644
index 0000000..4738e99
--- /dev/null
+++ b/tests/test-lz4-list.py
@@ -0,0 +1,282 @@
+#! /usr/bin/env python3
+import subprocess
+import time
+import glob
+import os
+import tempfile
+import unittest
+
+SIZES = [3, 11] # Always 2 sizes
+MIB = 1048576
+LZ4 = os.getcwd() + "/../lz4"
+if not os.path.exists(LZ4):
+ LZ4 = os.getcwd() + "/../programs/lz4"
+TEMP = tempfile.gettempdir()
+
+
+class NVerboseFileInfo(object):
+ def __init__(self, line_in):
+ self.line = line_in
+ splitlines = line_in.split()
+ if len(splitlines) != 7:
+ errout("Unexpected line: {}".format(line_in))
+ self.frames, self.type, self.block, self.compressed, self.uncompressed, self.ratio, self.filename = splitlines
+ self.exp_unc_size = 0
+ # Get real file sizes
+ if "concat-all" in self.filename or "2f--content-size" in self.filename:
+ for i in SIZES:
+ self.exp_unc_size += os.path.getsize("{}/test_list_{}M".format(TEMP, i))
+ else:
+ uncompressed_filename = self.filename.split("-")[0]
+ self.exp_unc_size += os.path.getsize("{}/{}".format(TEMP, uncompressed_filename))
+ self.exp_comp_size = os.path.getsize("{}/{}".format(TEMP, self.filename))
+
+
+class TestNonVerbose(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ self.nvinfo_list = []
+ for i, line in enumerate(execute("{} --list -m {}/test_list_*.lz4".format(LZ4, TEMP), print_output=True)):
+ if i > 0:
+ self.nvinfo_list.append(NVerboseFileInfo(line))
+
+ def test_frames(self):
+ all_concat_frames = 0
+ all_concat_index = None
+ for i, nvinfo in enumerate(self.nvinfo_list):
+ if "concat-all" in nvinfo.filename:
+ all_concat_index = i
+ elif "2f--content-size" in nvinfo.filename:
+ self.assertEqual("2", nvinfo.frames, nvinfo.line)
+ all_concat_frames += 2
+ else:
+ self.assertEqual("1", nvinfo.frames, nvinfo.line)
+ all_concat_frames += 1
+ self.assertNotEqual(None, all_concat_index, "Couldn't find concat-all file index.")
+ self.assertEqual(self.nvinfo_list[all_concat_index].frames, str(all_concat_frames), self.nvinfo_list[all_concat_index].line)
+
+ def test_frame_types(self):
+ for nvinfo in self.nvinfo_list:
+ if "-lz4f-" in nvinfo.filename:
+ self.assertEqual(nvinfo.type, "LZ4Frame", nvinfo.line)
+ elif "-legc-" in nvinfo.filename:
+ self.assertEqual(nvinfo.type, "LegacyFrame", nvinfo.line)
+ elif "-skip-" in nvinfo.filename:
+ self.assertEqual(nvinfo.type, "SkippableFrame", nvinfo.line)
+
+ def test_block(self):
+ for nvinfo in self.nvinfo_list:
+ # if "-leg" in nvinfo.filename or "-skip" in nvinfo.filename:
+ # self.assertEqual(nvinfo.block, "-", nvinfo.line)
+ if "--BD" in nvinfo.filename:
+ self.assertRegex(nvinfo.block, "^B[0-9]+D$", nvinfo.line)
+ elif "--BI" in nvinfo.filename:
+ self.assertRegex(nvinfo.block, "^B[0-9]+I$", nvinfo.line)
+
+ def test_compressed_size(self):
+ for nvinfo in self.nvinfo_list:
+ self.assertEqual(nvinfo.compressed, to_human(nvinfo.exp_comp_size), nvinfo.line)
+
+ def test_ratio(self):
+ for nvinfo in self.nvinfo_list:
+ if "--content-size" in nvinfo.filename:
+ self.assertEqual(nvinfo.ratio, "{:.2f}%".format(float(nvinfo.exp_comp_size) / float(nvinfo.exp_unc_size) * 100), nvinfo.line)
+
+ def test_uncompressed_size(self):
+ for nvinfo in self.nvinfo_list:
+ if "--content-size" in nvinfo.filename:
+ self.assertEqual(nvinfo.uncompressed, to_human(nvinfo.exp_unc_size), nvinfo.line)
+
+
+class VerboseFileInfo(object):
+ def __init__(self, lines):
+ # Parse lines
+ self.frame_list = []
+ self.file_frame_map = []
+ for i, line in enumerate(lines):
+ if i == 0:
+ self.filename = line
+ continue
+ elif i == 1:
+ # Skip header
+ continue
+ frame_info = dict(zip(["frame", "type", "block", "checksum", "compressed", "uncompressed", "ratio"], line.split()))
+ frame_info["line"] = line
+ self.frame_list.append(frame_info)
+
+
+class TestVerbose(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ # Even do we're listing 2 files to test multiline working as expected.
+ # we're only really interested in testing the output of the concat-all file.
+ self.vinfo_list = []
+ start = end = 0
+ output = execute("{} --list -m -v {}/test_list_concat-all.lz4 {}/test_list_*M-lz4f-2f--content-size.lz4".format(LZ4, TEMP, TEMP), print_output=True)
+ for i, line in enumerate(output):
+ if line.startswith("test_list"):
+ if start != 0 and end != 0:
+ self.vinfo_list.append(VerboseFileInfo(output[start:end]))
+ start = i
+ if not line:
+ end = i
+ self.vinfo_list.append(VerboseFileInfo(output[start:end]))
+ # Populate file_frame_map as a reference of the expected info
+ concat_file_list = glob.glob("/tmp/test_list_[!concat]*.lz4")
+ # One of the files has 2 frames so duplicate it in this list to map each frame 1 to a single file
+ for i, filename in enumerate(concat_file_list):
+ if "2f--content-size" in filename:
+ concat_file_list.insert(i, filename)
+ break
+ self.cvinfo = self.vinfo_list[0]
+ self.cvinfo.file_frame_map = concat_file_list
+ self.cvinfo.compressed_size = os.path.getsize("{}/test_list_concat-all.lz4".format(TEMP))
+
+ def test_filename(self):
+ for i, vinfo in enumerate(self.vinfo_list):
+ self.assertRegex(vinfo.filename, "^test_list_.*({}/{})".format(i + 1, len(self.vinfo_list)))
+
+ def test_frame_number(self):
+ for vinfo in self.vinfo_list:
+ for i, frame_info in enumerate(vinfo.frame_list):
+ self.assertEqual(frame_info["frame"], str(i + 1), frame_info["line"])
+
+ def test_frame_type(self):
+ for i, frame_info in enumerate(self.cvinfo.frame_list):
+ if "-lz4f-" in self.cvinfo.file_frame_map[i]:
+ self.assertEqual(self.cvinfo.frame_list[i]["type"], "LZ4Frame", self.cvinfo.frame_list[i]["line"])
+ elif "-legc-" in self.cvinfo.file_frame_map[i]:
+ self.assertEqual(self.cvinfo.frame_list[i]["type"], "LegacyFrame", self.cvinfo.frame_list[i]["line"])
+ elif "-skip-" in self.cvinfo.file_frame_map[i]:
+ self.assertEqual(self.cvinfo.frame_list[i]["type"], "SkippableFrame", self.cvinfo.frame_list[i]["line"])
+
+ def test_block(self):
+ for i, frame_info in enumerate(self.cvinfo.frame_list):
+ if "--BD" in self.cvinfo.file_frame_map[i]:
+ self.assertRegex(self.cvinfo.frame_list[i]["block"], "^B[0-9]+D$", self.cvinfo.frame_list[i]["line"])
+ elif "--BI" in self.cvinfo.file_frame_map[i]:
+ self.assertEqual(self.cvinfo.frame_list[i]["block"], "^B[0-9]+I$", self.cvinfo.frame_list[i]["line"])
+
+ def test_checksum(self):
+ for i, frame_info in enumerate(self.cvinfo.frame_list):
+ if "-lz4f-" in self.cvinfo.file_frame_map[i] and "--no-frame-crc" not in self.cvinfo.file_frame_map[i]:
+ self.assertEqual(self.cvinfo.frame_list[i]["checksum"], "XXH32", self.cvinfo.frame_list[i]["line"])
+
+ def test_compressed(self):
+ total = 0
+ for i, frame_info in enumerate(self.cvinfo.frame_list):
+ if "-2f-" not in self.cvinfo.file_frame_map[i]:
+ expected_size = os.path.getsize(self.cvinfo.file_frame_map[i])
+ self.assertEqual(self.cvinfo.frame_list[i]["compressed"], str(expected_size), self.cvinfo.frame_list[i]["line"])
+ total += int(self.cvinfo.frame_list[i]["compressed"])
+ self.assertEqual(total, self.cvinfo.compressed_size, "Expected total sum ({}) to match {} filesize".format(total, self.cvinfo.filename))
+
+ def test_uncompressed(self):
+ for i, frame_info in enumerate(self.cvinfo.frame_list):
+ ffm = self.cvinfo.file_frame_map[i]
+ if "-2f-" not in ffm and "--content-size" in ffm:
+ expected_size_unc = int(ffm[ffm.rindex("_") + 1:ffm.index("M")]) * 1048576
+ self.assertEqual(self.cvinfo.frame_list[i]["uncompressed"], str(expected_size_unc), self.cvinfo.frame_list[i]["line"])
+
+ def test_ratio(self):
+ for i, frame_info in enumerate(self.cvinfo.frame_list):
+ if "--content-size" in self.cvinfo.file_frame_map[i]:
+ self.assertEqual(self.cvinfo.frame_list[i]['ratio'],
+ "{:.2f}%".format(float(self.cvinfo.frame_list[i]['compressed']) / float(self.cvinfo.frame_list[i]['uncompressed']) * 100),
+ self.cvinfo.frame_list[i]["line"])
+
+
+def to_human(size):
+ for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']:
+ if size < 1024.0:
+ break
+ size /= 1024.0
+ return "{:.2f}{}".format(size, unit)
+
+
+def log(text):
+ print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text)
+
+
+def errout(text, err=1):
+ log(text)
+ exit(err)
+
+
+def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True):
+ if os.environ.get('QEMU_SYS'):
+ command = "{} {}".format(os.environ['QEMU_SYS'], command)
+ if print_command:
+ log("> " + command)
+ popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell)
+ stdout_lines, stderr_lines = popen.communicate()
+ stderr_lines = stderr_lines.decode("utf-8")
+ stdout_lines = stdout_lines.decode("utf-8")
+ if print_output:
+ if stdout_lines:
+ print(stdout_lines)
+ if stderr_lines:
+ print(stderr_lines)
+ if popen.returncode is not None and popen.returncode != 0:
+ if stderr_lines and not print_output and print_error:
+ print(stderr_lines)
+ errout("Failed to run: {}\n".format(command, stdout_lines + stderr_lines))
+ return (stdout_lines + stderr_lines).splitlines()
+
+
+def cleanup(silent=False):
+ for f in glob.glob("{}/test_list*".format(TEMP)):
+ if not silent:
+ log("Deleting {}".format(f))
+ os.unlink(f)
+
+
+def datagen(file_name, size):
+ non_sparse_size = size // 2
+ sparse_size = size - non_sparse_size
+ with open(file_name, "wb") as f:
+ f.seek(sparse_size)
+ f.write(os.urandom(non_sparse_size))
+
+
+def generate_files():
+ # file format ~ test_list<frametype>-<no_frames>f<create-args>.lz4 ~
+ # Generate LZ4Frames
+ for i in SIZES:
+ filename = "{}/test_list_{}M".format(TEMP, i)
+ log("Generating {}".format(filename))
+ datagen(filename, i * MIB)
+ for j in ["--content-size", "-BI", "-BD", "-BX", "--no-frame-crc"]:
+ lz4file = "{}-lz4f-1f{}.lz4".format(filename, j)
+ execute("{} {} {} {}".format(LZ4, j, filename, lz4file))
+ # Generate skippable frames
+ lz4file = "{}-skip-1f.lz4".format(filename)
+ skipsize = i * 1024
+ skipbytes = bytes([80, 42, 77, 24]) + skipsize.to_bytes(4, byteorder='little', signed=False)
+ with open(lz4file, 'wb') as f:
+ f.write(skipbytes)
+ f.write(os.urandom(skipsize))
+ # Generate legacy frames
+ lz4file = "{}-legc-1f.lz4".format(filename)
+ execute("{} -l {} {}".format(LZ4, filename, lz4file))
+
+ # Concatenate --content-size files
+ file_list = glob.glob("{}/test_list_*-lz4f-1f--content-size.lz4".format(TEMP))
+ with open("{}/test_list_{}M-lz4f-2f--content-size.lz4".format(TEMP, sum(SIZES)), 'ab') as outfile:
+ for fname in file_list:
+ with open(fname, 'rb') as infile:
+ outfile.write(infile.read())
+
+ # Concatenate all files
+ file_list = glob.glob("{}/test_list_*.lz4".format(TEMP))
+ with open("{}/test_list_concat-all.lz4".format(TEMP), 'ab') as outfile:
+ for fname in file_list:
+ with open(fname, 'rb') as infile:
+ outfile.write(infile.read())
+
+
+if __name__ == '__main__':
+ cleanup()
+ generate_files()
+ unittest.main(verbosity=2, exit=False)
+ cleanup(silent=True)