diff options
Diffstat (limited to 'Modules/zlib/examples')
-rw-r--r-- | Modules/zlib/examples/README.examples | 42 | ||||
-rw-r--r-- | Modules/zlib/examples/fitblk.c | 233 | ||||
-rw-r--r-- | Modules/zlib/examples/gun.c | 693 | ||||
-rw-r--r-- | Modules/zlib/examples/gzappend.c | 500 | ||||
-rw-r--r-- | Modules/zlib/examples/gzjoin.c | 448 | ||||
-rw-r--r-- | Modules/zlib/examples/gzlog.c | 413 | ||||
-rw-r--r-- | Modules/zlib/examples/gzlog.h | 58 | ||||
-rw-r--r-- | Modules/zlib/examples/zlib_how.html | 523 | ||||
-rw-r--r-- | Modules/zlib/examples/zpipe.c | 191 | ||||
-rw-r--r-- | Modules/zlib/examples/zran.c | 404 |
10 files changed, 3505 insertions, 0 deletions
diff --git a/Modules/zlib/examples/README.examples b/Modules/zlib/examples/README.examples new file mode 100644 index 0000000..5632d7a --- /dev/null +++ b/Modules/zlib/examples/README.examples @@ -0,0 +1,42 @@ +This directory contains examples of the use of zlib. + +fitblk.c + compress just enough input to nearly fill a requested output size + - zlib isn't designed to do this, but fitblk does it anyway + +gun.c + uncompress a gzip file + - illustrates the use of inflateBack() for high speed file-to-file + decompression using call-back functions + - is approximately twice as fast as gzip -d + - also provides Unix uncompress functionality, again twice as fast + +gzappend.c + append to a gzip file + - illustrates the use of the Z_BLOCK flush parameter for inflate() + - illustrates the use of deflatePrime() to start at any bit + +gzjoin.c + join gzip files without recalculating the crc or recompressing + - illustrates the use of the Z_BLOCK flush parameter for inflate() + - illustrates the use of crc32_combine() + +gzlog.c +gzlog.h + efficiently maintain a message log file in gzip format + - illustrates use of raw deflate and Z_SYNC_FLUSH + - illustrates use of gzip header extra field + +zlib_how.html + painfully comprehensive description of zpipe.c (see below) + - describes in excruciating detail the use of deflate() and inflate() + +zpipe.c + reads and writes zlib streams from stdin to stdout + - illustrates the proper use of deflate() and inflate() + - deeply commented in zlib_how.html (see above) + +zran.c + index a zlib or gzip stream and randomly access it + - illustrates the use of Z_BLOCK, inflatePrime(), and + inflateSetDictionary() to provide random access diff --git a/Modules/zlib/examples/fitblk.c b/Modules/zlib/examples/fitblk.c new file mode 100644 index 0000000..c61de5c --- /dev/null +++ b/Modules/zlib/examples/fitblk.c @@ -0,0 +1,233 @@ +/* fitblk.c: example of fitting compressed output to a specified size + Not copyrighted -- provided to the public domain + Version 1.1 25 November 2004 Mark Adler */ + +/* Version history: + 1.0 24 Nov 2004 First version + 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() + Use fixed-size, stack-allocated raw buffers + Simplify code moving compression to subroutines + Use assert() for internal errors + Add detailed description of approach + */ + +/* Approach to just fitting a requested compressed size: + + fitblk performs three compression passes on a portion of the input + data in order to determine how much of that input will compress to + nearly the requested output block size. The first pass generates + enough deflate blocks to produce output to fill the requested + output size plus a specfied excess amount (see the EXCESS define + below). The last deflate block may go quite a bit past that, but + is discarded. The second pass decompresses and recompresses just + the compressed data that fit in the requested plus excess sized + buffer. The deflate process is terminated after that amount of + input, which is less than the amount consumed on the first pass. + The last deflate block of the result will be of a comparable size + to the final product, so that the header for that deflate block and + the compression ratio for that block will be about the same as in + the final product. The third compression pass decompresses the + result of the second step, but only the compressed data up to the + requested size minus an amount to allow the compressed stream to + complete (see the MARGIN define below). That will result in a + final compressed stream whose length is less than or equal to the + requested size. Assuming sufficient input and a requested size + greater than a few hundred bytes, the shortfall will typically be + less than ten bytes. + + If the input is short enough that the first compression completes + before filling the requested output size, then that compressed + stream is return with no recompression. + + EXCESS is chosen to be just greater than the shortfall seen in a + two pass approach similar to the above. That shortfall is due to + the last deflate block compressing more efficiently with a smaller + header on the second pass. EXCESS is set to be large enough so + that there is enough uncompressed data for the second pass to fill + out the requested size, and small enough so that the final deflate + block of the second pass will be close in size to the final deflate + block of the third and final pass. MARGIN is chosen to be just + large enough to assure that the final compression has enough room + to complete in all cases. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include "zlib.h" + +#define local static + +/* print nastygram and leave */ +local void quit(char *why) +{ + fprintf(stderr, "fitblk abort: %s\n", why); + exit(1); +} + +#define RAWLEN 4096 /* intermediate uncompressed buffer size */ + +/* compress from file to def until provided buffer is full or end of + input reached; return last deflate() return value, or Z_ERRNO if + there was read error on the file */ +local int partcompress(FILE *in, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + def->avail_in = fread(raw, 1, RAWLEN, in); + if (ferror(in)) + return Z_ERRNO; + def->next_in = raw; + if (feof(in)) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (def->avail_out != 0 && flush == Z_NO_FLUSH); + return ret; +} + +/* recompress from inf's input to def's output; the input for inf and + the output for def are set in those structures before calling; + return last deflate() return value, or Z_MEM_ERROR if inflate() + was not able to allocate enough memory when it needed to */ +local int recompress(z_streamp inf, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + /* decompress */ + inf->avail_out = RAWLEN; + inf->next_out = raw; + ret = inflate(inf, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && + ret != Z_NEED_DICT); + if (ret == Z_MEM_ERROR) + return ret; + + /* compress what was decompresed until done or no room */ + def->avail_in = RAWLEN - inf->avail_out; + def->next_in = raw; + if (inf->avail_out != 0) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (ret != Z_STREAM_END && def->avail_out != 0); + return ret; +} + +#define EXCESS 256 /* empirically determined stream overage */ +#define MARGIN 8 /* amount to back off for completion */ + +/* compress from stdin to fixed-size block on stdout */ +int main(int argc, char **argv) +{ + int ret; /* return code */ + unsigned size; /* requested fixed output block size */ + unsigned have; /* bytes written by deflate() call */ + unsigned char *blk; /* intermediate and final stream */ + unsigned char *tmp; /* close to desired size stream */ + z_stream def, inf; /* zlib deflate and inflate states */ + + /* get requested output size */ + if (argc != 2) + quit("need one argument: size of output block"); + ret = strtol(argv[1], argv + 1, 10); + if (argv[1][0] != 0) + quit("argument must be a number"); + if (ret < 8) /* 8 is minimum zlib stream size */ + quit("need positive size of 8 or greater"); + size = (unsigned)ret; + + /* allocate memory for buffers and compression engine */ + blk = malloc(size + EXCESS); + def.zalloc = Z_NULL; + def.zfree = Z_NULL; + def.opaque = Z_NULL; + ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK || blk == NULL) + quit("out of memory"); + + /* compress from stdin until output full, or no more input */ + def.avail_out = size + EXCESS; + def.next_out = blk; + ret = partcompress(stdin, &def); + if (ret == Z_ERRNO) + quit("error reading input"); + + /* if it all fit, then size was undersubscribed -- done! */ + if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { + /* write block to stdout */ + have = size + EXCESS - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (all input)\n", + size - have, size); + return 0; + } + + /* it didn't all fit -- set up for recompression */ + inf.zalloc = Z_NULL; + inf.zfree = Z_NULL; + inf.opaque = Z_NULL; + inf.avail_in = 0; + inf.next_in = Z_NULL; + ret = inflateInit(&inf); + tmp = malloc(size + EXCESS); + if (ret != Z_OK || tmp == NULL) + quit("out of memory"); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do first recompression close to the right amount */ + inf.avail_in = size + EXCESS; + inf.next_in = blk; + def.avail_out = size + EXCESS; + def.next_out = tmp; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + + /* set up for next reocmpression */ + ret = inflateReset(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do second and final recompression (third compression) */ + inf.avail_in = size - MARGIN; /* assure stream will complete */ + inf.next_in = tmp; + def.avail_out = size; + def.next_out = blk; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ + + /* done -- write block to stdout */ + have = size - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + free(tmp); + ret = inflateEnd(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (%lu input)\n", + size - have, size, def.total_in); + return 0; +} diff --git a/Modules/zlib/examples/gun.c b/Modules/zlib/examples/gun.c new file mode 100644 index 0000000..bfec590 --- /dev/null +++ b/Modules/zlib/examples/gun.c @@ -0,0 +1,693 @@ +/* gun.c -- simple gunzip to give an example of the use of inflateBack() + * Copyright (C) 2003, 2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + Version 1.3 12 June 2005 Mark Adler */ + +/* Version history: + 1.0 16 Feb 2003 First version for testing of inflateBack() + 1.1 21 Feb 2005 Decompress concatenated gzip streams + Remove use of "this" variable (C++ keyword) + Fix return value for in() + Improve allocation failure checking + Add typecasting for void * structures + Add -h option for command version and usage + Add a bunch of comments + 1.2 20 Mar 2005 Add Unix compress (LZW) decompression + Copy file attributes from input file to output file + 1.3 12 Jun 2005 Add casts for error messages [Oberhumer] + */ + +/* + gun [ -t ] [ name ... ] + + decompresses the data in the named gzip files. If no arguments are given, + gun will decompress from stdin to stdout. The names must end in .gz, -gz, + .z, -z, _z, or .Z. The uncompressed data will be written to a file name + with the suffix stripped. On success, the original file is deleted. On + failure, the output file is deleted. For most failures, the command will + continue to process the remaining names on the command line. A memory + allocation failure will abort the command. If -t is specified, then the + listed files or stdin will be tested as gzip files for integrity (without + checking for a proper suffix), no output will be written, and no files + will be deleted. + + Like gzip, gun allows concatenated gzip streams and will decompress them, + writing all of the uncompressed data to the output. Unlike gzip, gun allows + an empty file on input, and will produce no error writing an empty output + file. + + gun will also decompress files made by Unix compress, which uses LZW + compression. These files are automatically detected by virtue of their + magic header bytes. Since the end of Unix compress stream is marked by the + end-of-file, they cannot be concantenated. If a Unix compress stream is + encountered in an input file, it is the last stream in that file. + + Like gunzip and uncompress, the file attributes of the orignal compressed + file are maintained in the final uncompressed file, to the extent that the + user permissions allow it. + + On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version + 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the + LZW decompression provided by gun is about twice as fast as the standard + Unix uncompress command. + */ + +/* external functions and related types and constants */ +#include <stdio.h> /* fprintf() */ +#include <stdlib.h> /* malloc(), free() */ +#include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */ +#include <errno.h> /* errno */ +#include <fcntl.h> /* open() */ +#include <unistd.h> /* read(), write(), close(), chown(), unlink() */ +#include <sys/types.h> +#include <sys/stat.h> /* stat(), chmod() */ +#include <utime.h> /* utime() */ +#include "zlib.h" /* inflateBackInit(), inflateBack(), */ + /* inflateBackEnd(), crc32() */ + +/* function declaration */ +#define local static + +/* buffer constants */ +#define SIZE 32768U /* input and output buffer sizes */ +#define PIECE 16384 /* limits i/o chunks for 16-bit int case */ + +/* structure for infback() to pass to input function in() -- it maintains the + input file and a buffer of size SIZE */ +struct ind { + int infile; + unsigned char *inbuf; +}; + +/* Load input buffer, assumed to be empty, and return bytes loaded and a + pointer to them. read() is called until the buffer is full, or until it + returns end-of-file or error. Return 0 on error. */ +local unsigned in(void *in_desc, unsigned char **buf) +{ + int ret; + unsigned len; + unsigned char *next; + struct ind *me = (struct ind *)in_desc; + + next = me->inbuf; + *buf = next; + len = 0; + do { + ret = PIECE; + if ((unsigned)ret > SIZE - len) + ret = (int)(SIZE - len); + ret = (int)read(me->infile, next, ret); + if (ret == -1) { + len = 0; + break; + } + next += ret; + len += ret; + } while (ret != 0 && len < SIZE); + return len; +} + +/* structure for infback() to pass to output function out() -- it maintains the + output file, a running CRC-32 check on the output and the total number of + bytes output, both for checking against the gzip trailer. (The length in + the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and + the output is greater than 4 GB.) */ +struct outd { + int outfile; + int check; /* true if checking crc and total */ + unsigned long crc; + unsigned long total; +}; + +/* Write output buffer and update the CRC-32 and total bytes written. write() + is called until all of the output is written or an error is encountered. + On success out() returns 0. For a write failure, out() returns 1. If the + output file descriptor is -1, then nothing is written. + */ +local int out(void *out_desc, unsigned char *buf, unsigned len) +{ + int ret; + struct outd *me = (struct outd *)out_desc; + + if (me->check) { + me->crc = crc32(me->crc, buf, len); + me->total += len; + } + if (me->outfile != -1) + do { + ret = PIECE; + if ((unsigned)ret > len) + ret = (int)len; + ret = (int)write(me->outfile, buf, ret); + if (ret == -1) + return 1; + buf += ret; + len -= ret; + } while (len != 0); + return 0; +} + +/* next input byte macro for use inside lunpipe() and gunpipe() */ +#define NEXT() (have ? 0 : (have = in(indp, &next)), \ + last = have ? (have--, (int)(*next++)) : -1) + +/* memory for gunpipe() and lunpipe() -- + the first 256 entries of prefix[] and suffix[] are never used, could + have offset the index, but it's faster to waste the memory */ +unsigned char inbuf[SIZE]; /* input buffer */ +unsigned char outbuf[SIZE]; /* output buffer */ +unsigned short prefix[65536]; /* index to LZW prefix string */ +unsigned char suffix[65536]; /* one-character LZW suffix */ +unsigned char match[65280 + 2]; /* buffer for reversed match or gzip + 32K sliding window */ + +/* throw out what's left in the current bits byte buffer (this is a vestigial + aspect of the compressed data format derived from an implementation that + made use of a special VAX machine instruction!) */ +#define FLUSHCODE() \ + do { \ + left = 0; \ + rem = 0; \ + if (chunk > have) { \ + chunk -= have; \ + have = 0; \ + if (NEXT() == -1) \ + break; \ + chunk--; \ + if (chunk > have) { \ + chunk = have = 0; \ + break; \ + } \ + } \ + have -= chunk; \ + next += chunk; \ + chunk = 0; \ + } while (0) + +/* Decompress a compress (LZW) file from indp to outfile. The compress magic + header (two bytes) has already been read and verified. There are have bytes + of buffered input at next. strm is used for passing error information back + to gunpipe(). + + lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of + file, read error, or write error (a write error indicated by strm->next_in + not equal to Z_NULL), or Z_DATA_ERROR for invalid input. + */ +local int lunpipe(unsigned have, unsigned char *next, struct ind *indp, + int outfile, z_stream *strm) +{ + int last; /* last byte read by NEXT(), or -1 if EOF */ + int chunk; /* bytes left in current chunk */ + int left; /* bits left in rem */ + unsigned rem; /* unused bits from input */ + int bits; /* current bits per code */ + unsigned code; /* code, table traversal index */ + unsigned mask; /* mask for current bits codes */ + int max; /* maximum bits per code for this stream */ + int flags; /* compress flags, then block compress flag */ + unsigned end; /* last valid entry in prefix/suffix tables */ + unsigned temp; /* current code */ + unsigned prev; /* previous code */ + unsigned final; /* last character written for previous code */ + unsigned stack; /* next position for reversed string */ + unsigned outcnt; /* bytes in output buffer */ + struct outd outd; /* output structure */ + + /* set up output */ + outd.outfile = outfile; + outd.check = 0; + + /* process remainder of compress header -- a flags byte */ + flags = NEXT(); + if (last == -1) + return Z_BUF_ERROR; + if (flags & 0x60) { + strm->msg = (char *)"unknown lzw flags set"; + return Z_DATA_ERROR; + } + max = flags & 0x1f; + if (max < 9 || max > 16) { + strm->msg = (char *)"lzw bits out of range"; + return Z_DATA_ERROR; + } + if (max == 9) /* 9 doesn't really mean 9 */ + max = 10; + flags &= 0x80; /* true if block compress */ + + /* clear table */ + bits = 9; + mask = 0x1ff; + end = flags ? 256 : 255; + + /* set up: get first 9-bit code, which is the first decompressed byte, but + don't create a table entry until the next code */ + if (NEXT() == -1) /* no compressed data is ok */ + return Z_OK; + final = prev = (unsigned)last; /* low 8 bits of code */ + if (NEXT() == -1) /* missing a bit */ + return Z_BUF_ERROR; + if (last & 1) { /* code must be < 256 */ + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + rem = (unsigned)last >> 1; /* remaining 7 bits */ + left = 7; + chunk = bits - 2; /* 7 bytes left in this chunk */ + outbuf[0] = (unsigned char)final; /* write first decompressed byte */ + outcnt = 1; + + /* decode codes */ + stack = 0; + for (;;) { + /* if the table will be full after this, increment the code size */ + if (end >= mask && bits < max) { + FLUSHCODE(); + bits++; + mask <<= 1; + mask++; + } + + /* get a code of length bits */ + if (chunk == 0) /* decrement chunk modulo bits */ + chunk = bits; + code = rem; /* low bits of code */ + if (NEXT() == -1) { /* EOF is end of compressed data */ + /* write remaining buffered output */ + if (outcnt && out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + return Z_OK; + } + code += (unsigned)last << left; /* middle (or high) bits of code */ + left += 8; + chunk--; + if (bits > left) { /* need more bits */ + if (NEXT() == -1) /* can't end in middle of code */ + return Z_BUF_ERROR; + code += (unsigned)last << left; /* high bits of code */ + left += 8; + chunk--; + } + code &= mask; /* mask to current code length */ + left -= bits; /* number of unused bits */ + rem = (unsigned)last >> (8 - left); /* unused bits from last byte */ + + /* process clear code (256) */ + if (code == 256 && flags) { + FLUSHCODE(); + bits = 9; /* initialize bits and mask */ + mask = 0x1ff; + end = 255; /* empty table */ + continue; /* get next code */ + } + + /* special code to reuse last match */ + temp = code; /* save the current code */ + if (code > end) { + /* Be picky on the allowed code here, and make sure that the code + we drop through (prev) will be a valid index so that random + input does not cause an exception. The code != end + 1 check is + empirically derived, and not checked in the original uncompress + code. If this ever causes a problem, that check could be safely + removed. Leaving this check in greatly improves gun's ability + to detect random or corrupted input after a compress header. + In any case, the prev > end check must be retained. */ + if (code != end + 1 || prev > end) { + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + match[stack++] = (unsigned char)final; + code = prev; + } + + /* walk through linked list to generate output in reverse order */ + while (code >= 256) { + match[stack++] = suffix[code]; + code = prefix[code]; + } + match[stack++] = (unsigned char)code; + final = code; + + /* link new table entry */ + if (end < mask) { + end++; + prefix[end] = (unsigned short)prev; + suffix[end] = (unsigned char)final; + } + + /* set previous code for next iteration */ + prev = temp; + + /* write output in forward order */ + while (stack > SIZE - outcnt) { + while (outcnt < SIZE) + outbuf[outcnt++] = match[--stack]; + if (out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + outcnt = 0; + } + do { + outbuf[outcnt++] = match[--stack]; + } while (stack); + + /* loop for next code with final and prev as the last match, rem and + left provide the first 0..7 bits of the next code, end is the last + valid table entry */ + } +} + +/* Decompress a gzip file from infile to outfile. strm is assumed to have been + successfully initialized with inflateBackInit(). The input file may consist + of a series of gzip streams, in which case all of them will be decompressed + to the output file. If outfile is -1, then the gzip stream(s) integrity is + checked and nothing is written. + + The return value is a zlib error code: Z_MEM_ERROR if out of memory, + Z_DATA_ERROR if the header or the compressed data is invalid, or if the + trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends + prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip + stream) follows a valid gzip stream. + */ +local int gunpipe(z_stream *strm, int infile, int outfile) +{ + int ret, first, last; + unsigned have, flags, len; + unsigned char *next; + struct ind ind, *indp; + struct outd outd; + + /* setup input buffer */ + ind.infile = infile; + ind.inbuf = inbuf; + indp = &ind; + + /* decompress concatenated gzip streams */ + have = 0; /* no input data read in yet */ + first = 1; /* looking for first gzip header */ + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + for (;;) { + /* look for the two magic header bytes for a gzip stream */ + if (NEXT() == -1) { + ret = Z_OK; + break; /* empty gzip stream is ok */ + } + if (last != 31 || (NEXT() != 139 && last != 157)) { + strm->msg = (char *)"incorrect header check"; + ret = first ? Z_DATA_ERROR : Z_ERRNO; + break; /* not a gzip or compress header */ + } + first = 0; /* next non-header is junk */ + + /* process a compress (LZW) file -- can't be concatenated after this */ + if (last == 157) { + ret = lunpipe(have, next, indp, outfile, strm); + break; + } + + /* process remainder of gzip header */ + ret = Z_BUF_ERROR; + if (NEXT() != 8) { /* only deflate method allowed */ + if (last == -1) break; + strm->msg = (char *)"unknown compression method"; + ret = Z_DATA_ERROR; + break; + } + flags = NEXT(); /* header flags */ + NEXT(); /* discard mod time, xflgs, os */ + NEXT(); + NEXT(); + NEXT(); + NEXT(); + NEXT(); + if (last == -1) break; + if (flags & 0xe0) { + strm->msg = (char *)"unknown header flags set"; + ret = Z_DATA_ERROR; + break; + } + if (flags & 4) { /* extra field */ + len = NEXT(); + len += (unsigned)(NEXT()) << 8; + if (last == -1) break; + while (len > have) { + len -= have; + have = 0; + if (NEXT() == -1) break; + len--; + } + if (last == -1) break; + have -= len; + next += len; + } + if (flags & 8) /* file name */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 16) /* comment */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 2) { /* header crc */ + NEXT(); + NEXT(); + } + if (last == -1) break; + + /* set up output */ + outd.outfile = outfile; + outd.check = 1; + outd.crc = crc32(0L, Z_NULL, 0); + outd.total = 0; + + /* decompress data to output */ + strm->next_in = next; + strm->avail_in = have; + ret = inflateBack(strm, in, indp, out, &outd); + if (ret != Z_STREAM_END) break; + next = strm->next_in; + have = strm->avail_in; + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + + /* check trailer */ + ret = Z_BUF_ERROR; + if (NEXT() != (outd.crc & 0xff) || + NEXT() != ((outd.crc >> 8) & 0xff) || + NEXT() != ((outd.crc >> 16) & 0xff) || + NEXT() != ((outd.crc >> 24) & 0xff)) { + /* crc error */ + if (last != -1) { + strm->msg = (char *)"incorrect data check"; + ret = Z_DATA_ERROR; + } + break; + } + if (NEXT() != (outd.total & 0xff) || + NEXT() != ((outd.total >> 8) & 0xff) || + NEXT() != ((outd.total >> 16) & 0xff) || + NEXT() != ((outd.total >> 24) & 0xff)) { + /* length error */ + if (last != -1) { + strm->msg = (char *)"incorrect length check"; + ret = Z_DATA_ERROR; + } + break; + } + + /* go back and look for another gzip stream */ + } + + /* clean up and return */ + return ret; +} + +/* Copy file attributes, from -> to, as best we can. This is best effort, so + no errors are reported. The mode bits, including suid, sgid, and the sticky + bit are copied (if allowed), the owner's user id and group id are copied + (again if allowed), and the access and modify times are copied. */ +local void copymeta(char *from, char *to) +{ + struct stat was; + struct utimbuf when; + + /* get all of from's Unix meta data, return if not a regular file */ + if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG) + return; + + /* set to's mode bits, ignore errors */ + (void)chmod(to, was.st_mode & 07777); + + /* copy owner's user and group, ignore errors */ + (void)chown(to, was.st_uid, was.st_gid); + + /* copy access and modify times, ignore errors */ + when.actime = was.st_atime; + when.modtime = was.st_mtime; + (void)utime(to, &when); +} + +/* Decompress the file inname to the file outnname, of if test is true, just + decompress without writing and check the gzip trailer for integrity. If + inname is NULL or an empty string, read from stdin. If outname is NULL or + an empty string, write to stdout. strm is a pre-initialized inflateBack + structure. When appropriate, copy the file attributes from inname to + outname. + + gunzip() returns 1 if there is an out-of-memory error or an unexpected + return code from gunpipe(). Otherwise it returns 0. + */ +local int gunzip(z_stream *strm, char *inname, char *outname, int test) +{ + int ret; + int infile, outfile; + + /* open files */ + if (inname == NULL || *inname == 0) { + inname = "-"; + infile = 0; /* stdin */ + } + else { + infile = open(inname, O_RDONLY, 0); + if (infile == -1) { + fprintf(stderr, "gun cannot open %s\n", inname); + return 0; + } + } + if (test) + outfile = -1; + else if (outname == NULL || *outname == 0) { + outname = "-"; + outfile = 1; /* stdout */ + } + else { + outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666); + if (outfile == -1) { + close(infile); + fprintf(stderr, "gun cannot create %s\n", outname); + return 0; + } + } + errno = 0; + + /* decompress */ + ret = gunpipe(strm, infile, outfile); + if (outfile > 2) close(outfile); + if (infile > 2) close(infile); + + /* interpret result */ + switch (ret) { + case Z_OK: + case Z_ERRNO: + if (infile > 2 && outfile > 2) { + copymeta(inname, outname); /* copy attributes */ + unlink(inname); + } + if (ret == Z_ERRNO) + fprintf(stderr, "gun warning: trailing garbage ignored in %s\n", + inname); + break; + case Z_DATA_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg); + break; + case Z_MEM_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + case Z_BUF_ERROR: + if (outfile > 2) unlink(outname); + if (strm->next_in != Z_NULL) { + fprintf(stderr, "gun write error on %s: %s\n", + outname, strerror(errno)); + } + else if (errno) { + fprintf(stderr, "gun read error on %s: %s\n", + inname, strerror(errno)); + } + else { + fprintf(stderr, "gun unexpected end of file on %s\n", + inname); + } + break; + default: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun internal error--aborting\n"); + return 1; + } + return 0; +} + +/* Process the gun command line arguments. See the command syntax near the + beginning of this source file. */ +int main(int argc, char **argv) +{ + int ret, len, test; + char *outname; + unsigned char *window; + z_stream strm; + + /* initialize inflateBack state for repeated use */ + window = match; /* reuse LZW match buffer */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = inflateBackInit(&strm, 15, window); + if (ret != Z_OK) { + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + } + + /* decompress each file to the same name with the suffix removed */ + argc--; + argv++; + test = 0; + if (argc && strcmp(*argv, "-h") == 0) { + fprintf(stderr, "gun 1.3 (12 Jun 2005)\n"); + fprintf(stderr, "Copyright (c) 2005 Mark Adler\n"); + fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n"); + return 0; + } + if (argc && strcmp(*argv, "-t") == 0) { + test = 1; + argc--; + argv++; + } + if (argc) + do { + if (test) + outname = NULL; + else { + len = (int)strlen(*argv); + if (strcmp(*argv + len - 3, ".gz") == 0 || + strcmp(*argv + len - 3, "-gz") == 0) + len -= 3; + else if (strcmp(*argv + len - 2, ".z") == 0 || + strcmp(*argv + len - 2, "-z") == 0 || + strcmp(*argv + len - 2, "_z") == 0 || + strcmp(*argv + len - 2, ".Z") == 0) + len -= 2; + else { + fprintf(stderr, "gun error: no gz type on %s--skipping\n", + *argv); + continue; + } + outname = malloc(len + 1); + if (outname == NULL) { + fprintf(stderr, "gun out of memory error--aborting\n"); + ret = 1; + break; + } + memcpy(outname, *argv, len); + outname[len] = 0; + } + ret = gunzip(&strm, *argv, outname, test); + if (outname != NULL) free(outname); + if (ret) break; + } while (argv++, --argc); + else + ret = gunzip(&strm, NULL, NULL, test); + + /* clean up */ + inflateBackEnd(&strm); + return ret; +} diff --git a/Modules/zlib/examples/gzappend.c b/Modules/zlib/examples/gzappend.c new file mode 100644 index 0000000..e9e878e --- /dev/null +++ b/Modules/zlib/examples/gzappend.c @@ -0,0 +1,500 @@ +/* gzappend -- command to append to a gzip file + + Copyright (C) 2003 Mark Adler, all rights reserved + version 1.1, 4 Nov 2003 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 19 Oct 2003 - First version + * 1.1 4 Nov 2003 - Expand and clarify some comments and notes + * - Add version and copyright to help + * - Send help to stdout instead of stderr + * - Add some preemptive typecasts + * - Add L to constants in lseek() calls + * - Remove some debugging information in error messages + * - Use new data_type definition for zlib 1.2.1 + * - Simplfy and unify file operations + * - Finish off gzip file in gztack() + * - Use deflatePrime() instead of adding empty blocks + * - Keep gzip file clean on appended file read errors + * - Use in-place rotate instead of auxiliary buffer + * (Why you ask? Because it was fun to write!) + */ + +/* + gzappend takes a gzip file and appends to it, compressing files from the + command line or data from stdin. The gzip file is written to directly, to + avoid copying that file, in case it's large. Note that this results in the + unfriendly behavior that if gzappend fails, the gzip file is corrupted. + + This program was written to illustrate the use of the new Z_BLOCK option of + zlib 1.2.x's inflate() function. This option returns from inflate() at each + block boundary to facilitate locating and modifying the last block bit at + the start of the final deflate block. Also whether using Z_BLOCK or not, + another required feature of zlib 1.2.x is that inflate() now provides the + number of unusued bits in the last input byte used. gzappend will not work + with versions of zlib earlier than 1.2.1. + + gzappend first decompresses the gzip file internally, discarding all but + the last 32K of uncompressed data, and noting the location of the last block + bit and the number of unused bits in the last byte of the compressed data. + The gzip trailer containing the CRC-32 and length of the uncompressed data + is verified. This trailer will be later overwritten. + + Then the last block bit is cleared by seeking back in the file and rewriting + the byte that contains it. Seeking forward, the last byte of the compressed + data is saved along with the number of unused bits to initialize deflate. + + A deflate process is initialized, using the last 32K of the uncompressed + data from the gzip file to initialize the dictionary. If the total + uncompressed data was less than 32K, then all of it is used to initialize + the dictionary. The deflate output bit buffer is also initialized with the + last bits from the original deflate stream. From here on, the data to + append is simply compressed using deflate, and written to the gzip file. + When that is complete, the new CRC-32 and uncompressed length are written + as the trailer of the gzip file. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include "zlib.h" + +#define local static +#define LGCHUNK 14 +#define CHUNK (1U << LGCHUNK) +#define DSIZE 32768U + +/* print an error message and terminate with extreme prejudice */ +local void bye(char *msg1, char *msg2) +{ + fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2); + exit(1); +} + +/* return the greatest common divisor of a and b using Euclid's algorithm, + modified to be fast when one argument much greater than the other, and + coded to avoid unnecessary swapping */ +local unsigned gcd(unsigned a, unsigned b) +{ + unsigned c; + + while (a && b) + if (a > b) { + c = b; + while (a - c >= c) + c <<= 1; + a -= c; + } + else { + c = a; + while (b - c >= c) + c <<= 1; + b -= c; + } + return a + b; +} + +/* rotate list[0..len-1] left by rot positions, in place */ +local void rotate(unsigned char *list, unsigned len, unsigned rot) +{ + unsigned char tmp; + unsigned cycles; + unsigned char *start, *last, *to, *from; + + /* normalize rot and handle degenerate cases */ + if (len < 2) return; + if (rot >= len) rot %= len; + if (rot == 0) return; + + /* pointer to last entry in list */ + last = list + (len - 1); + + /* do simple left shift by one */ + if (rot == 1) { + tmp = *list; + memcpy(list, list + 1, len - 1); + *last = tmp; + return; + } + + /* do simple right shift by one */ + if (rot == len - 1) { + tmp = *last; + memmove(list + 1, list, len - 1); + *list = tmp; + return; + } + + /* otherwise do rotate as a set of cycles in place */ + cycles = gcd(len, rot); /* number of cycles */ + do { + start = from = list + cycles; /* start index is arbitrary */ + tmp = *from; /* save entry to be overwritten */ + for (;;) { + to = from; /* next step in cycle */ + from += rot; /* go right rot positions */ + if (from > last) from -= len; /* (pointer better not wrap) */ + if (from == start) break; /* all but one shifted */ + *to = *from; /* shift left */ + } + *to = tmp; /* complete the circle */ + } while (--cycles); +} + +/* structure for gzip file read operations */ +typedef struct { + int fd; /* file descriptor */ + int size; /* 1 << size is bytes in buf */ + unsigned left; /* bytes available at next */ + unsigned char *buf; /* buffer */ + unsigned char *next; /* next byte in buffer */ + char *name; /* file name for error messages */ +} file; + +/* reload buffer */ +local int readin(file *in) +{ + int len; + + len = read(in->fd, in->buf, 1 << in->size); + if (len == -1) bye("error reading ", in->name); + in->left = (unsigned)len; + in->next = in->buf; + return len; +} + +/* read from file in, exit if end-of-file */ +local int readmore(file *in) +{ + if (readin(in) == 0) bye("unexpected end of ", in->name); + return 0; +} + +#define read1(in) (in->left == 0 ? readmore(in) : 0, \ + in->left--, *(in->next)++) + +/* skip over n bytes of in */ +local void skip(file *in, unsigned n) +{ + unsigned bypass; + + if (n > in->left) { + n -= in->left; + bypass = n & ~((1U << in->size) - 1); + if (bypass) { + if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1) + bye("seeking ", in->name); + n -= bypass; + } + readmore(in); + if (n > in->left) + bye("unexpected end of ", in->name); + } + in->left -= n; + in->next += n; +} + +/* read a four-byte unsigned integer, little-endian, from in */ +unsigned long read4(file *in) +{ + unsigned long val; + + val = read1(in); + val += (unsigned)read1(in) << 8; + val += (unsigned long)read1(in) << 16; + val += (unsigned long)read1(in) << 24; + return val; +} + +/* skip over gzip header */ +local void gzheader(file *in) +{ + int flags; + unsigned n; + + if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file"); + if (read1(in) != 8) bye("unknown compression method in", in->name); + flags = read1(in); + if (flags & 0xe0) bye("unknown header flags set in", in->name); + skip(in, 6); + if (flags & 4) { + n = read1(in); + n += (unsigned)(read1(in)) << 8; + skip(in, n); + } + if (flags & 8) while (read1(in) != 0) ; + if (flags & 16) while (read1(in) != 0) ; + if (flags & 2) skip(in, 2); +} + +/* decompress gzip file "name", return strm with a deflate stream ready to + continue compression of the data in the gzip file, and return a file + descriptor pointing to where to write the compressed data -- the deflate + stream is initialized to compress using level "level" */ +local int gzscan(char *name, z_stream *strm, int level) +{ + int ret, lastbit, left, full; + unsigned have; + unsigned long crc, tot; + unsigned char *window; + off_t lastoff, end; + file gz; + + /* open gzip file */ + gz.name = name; + gz.fd = open(name, O_RDWR, 0); + if (gz.fd == -1) bye("cannot open ", name); + gz.buf = malloc(CHUNK); + if (gz.buf == NULL) bye("out of memory", ""); + gz.size = LGCHUNK; + gz.left = 0; + + /* skip gzip header */ + gzheader(&gz); + + /* prepare to decompress */ + window = malloc(DSIZE); + if (window == NULL) bye("out of memory", ""); + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = inflateInit2(strm, -15); + if (ret != Z_OK) bye("out of memory", " or library mismatch"); + + /* decompress the deflate stream, saving append information */ + lastbit = 0; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + left = 0; + strm->avail_in = gz.left; + strm->next_in = gz.next; + crc = crc32(0L, Z_NULL, 0); + have = full = 0; + do { + /* if needed, get more input */ + if (strm->avail_in == 0) { + readmore(&gz); + strm->avail_in = gz.left; + strm->next_in = gz.next; + } + + /* set up output to next available section of sliding window */ + strm->avail_out = DSIZE - have; + strm->next_out = window + have; + + /* inflate and check for errors */ + ret = inflate(strm, Z_BLOCK); + if (ret == Z_STREAM_ERROR) bye("internal stream error!", ""); + if (ret == Z_MEM_ERROR) bye("out of memory", ""); + if (ret == Z_DATA_ERROR) + bye("invalid compressed data--format violated in", name); + + /* update crc and sliding window pointer */ + crc = crc32(crc, window + have, DSIZE - have - strm->avail_out); + if (strm->avail_out) + have = DSIZE - strm->avail_out; + else { + have = 0; + full = 1; + } + + /* process end of block */ + if (strm->data_type & 128) { + if (strm->data_type & 64) + left = strm->data_type & 0x1f; + else { + lastbit = strm->data_type & 0x1f; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in; + } + } + } while (ret != Z_STREAM_END); + inflateEnd(strm); + gz.left = strm->avail_in; + gz.next = strm->next_in; + + /* save the location of the end of the compressed data */ + end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + + /* check gzip trailer and save total for deflate */ + if (crc != read4(&gz)) + bye("invalid compressed data--crc mismatch in ", name); + tot = strm->total_out; + if ((tot & 0xffffffffUL) != read4(&gz)) + bye("invalid compressed data--length mismatch in", name); + + /* if not at end of file, warn */ + if (gz.left || readin(&gz)) + fprintf(stderr, + "gzappend warning: junk at end of gzip file overwritten\n"); + + /* clear last block bit */ + lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7))); + lseek(gz.fd, -1L, SEEK_CUR); + if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name); + + /* if window wrapped, build dictionary from window by rotating */ + if (full) { + rotate(window, DSIZE, have); + have = DSIZE; + } + + /* set up deflate stream with window, crc, total_in, and leftover bits */ + ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) bye("out of memory", ""); + deflateSetDictionary(strm, window, have); + strm->adler = crc; + strm->total_in = tot; + if (left) { + lseek(gz.fd, --end, SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + deflatePrime(strm, 8 - left, *gz.buf); + } + lseek(gz.fd, end, SEEK_SET); + + /* clean up and return */ + free(window); + free(gz.buf); + return gz.fd; +} + +/* append file "name" to gzip file gd using deflate stream strm -- if last + is true, then finish off the deflate stream at the end */ +local void gztack(char *name, int gd, z_stream *strm, int last) +{ + int fd, len, ret; + unsigned left; + unsigned char *in, *out; + + /* open file to compress and append */ + fd = 0; + if (name != NULL) { + fd = open(name, O_RDONLY, 0); + if (fd == -1) + fprintf(stderr, "gzappend warning: %s not found, skipping ...\n", + name); + } + + /* allocate buffers */ + in = fd == -1 ? NULL : malloc(CHUNK); + out = malloc(CHUNK); + if (out == NULL) bye("out of memory", ""); + + /* compress input file and append to gzip file */ + do { + /* get more input */ + len = fd == -1 ? 0 : read(fd, in, CHUNK); + if (len == -1) { + fprintf(stderr, + "gzappend warning: error reading %s, skipping rest ...\n", + name); + len = 0; + } + strm->avail_in = (unsigned)len; + strm->next_in = in; + if (len) strm->adler = crc32(strm->adler, in, (unsigned)len); + + /* compress and write all available output */ + do { + strm->avail_out = CHUNK; + strm->next_out = out; + ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH); + left = CHUNK - strm->avail_out; + while (left) { + len = write(gd, out + CHUNK - strm->avail_out - left, left); + if (len == -1) bye("writing gzip file", ""); + left -= (unsigned)len; + } + } while (strm->avail_out == 0 && ret != Z_STREAM_END); + } while (len != 0); + + /* write trailer after last entry */ + if (last) { + deflateEnd(strm); + out[0] = (unsigned char)(strm->adler); + out[1] = (unsigned char)(strm->adler >> 8); + out[2] = (unsigned char)(strm->adler >> 16); + out[3] = (unsigned char)(strm->adler >> 24); + out[4] = (unsigned char)(strm->total_in); + out[5] = (unsigned char)(strm->total_in >> 8); + out[6] = (unsigned char)(strm->total_in >> 16); + out[7] = (unsigned char)(strm->total_in >> 24); + len = 8; + do { + ret = write(gd, out + 8 - len, len); + if (ret == -1) bye("writing gzip file", ""); + len -= ret; + } while (len); + close(gd); + } + + /* clean up and return */ + free(out); + if (in != NULL) free(in); + if (fd > 0) close(fd); +} + +/* process the compression level option if present, scan the gzip file, and + append the specified files, or append the data from stdin if no other file + names are provided on the command line -- the gzip file must be writable + and seekable */ +int main(int argc, char **argv) +{ + int gd, level; + z_stream strm; + + /* ignore command name */ + argv++; + + /* provide usage if no arguments */ + if (*argv == NULL) { + printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n"); + printf( + "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n"); + return 0; + } + + /* set compression level */ + level = Z_DEFAULT_COMPRESSION; + if (argv[0][0] == '-') { + if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0) + bye("invalid compression level", ""); + level = argv[0][1] - '0'; + if (*++argv == NULL) bye("no gzip file name after options", ""); + } + + /* prepare to append to gzip file */ + gd = gzscan(*argv++, &strm, level); + + /* append files on command line, or from stdin if none */ + if (*argv == NULL) + gztack(NULL, gd, &strm, 1); + else + do { + gztack(*argv, gd, &strm, argv[1] == NULL); + } while (*++argv != NULL); + return 0; +} diff --git a/Modules/zlib/examples/gzjoin.c b/Modules/zlib/examples/gzjoin.c new file mode 100644 index 0000000..129347c --- /dev/null +++ b/Modules/zlib/examples/gzjoin.c @@ -0,0 +1,448 @@ +/* gzjoin -- command to join gzip files into one gzip file + + Copyright (C) 2004 Mark Adler, all rights reserved + version 1.0, 11 Dec 2004 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 11 Dec 2004 - First version + * 1.1 12 Jun 2005 - Changed ssize_t to long for portability + */ + +/* + gzjoin takes one or more gzip files on the command line and writes out a + single gzip file that will uncompress to the concatenation of the + uncompressed data from the individual gzip files. gzjoin does this without + having to recompress any of the data and without having to calculate a new + crc32 for the concatenated uncompressed data. gzjoin does however have to + decompress all of the input data in order to find the bits in the compressed + data that need to be modified to concatenate the streams. + + gzjoin does not do an integrity check on the input gzip files other than + checking the gzip header and decompressing the compressed data. They are + otherwise assumed to be complete and correct. + + Each joint between gzip files removes at least 18 bytes of previous trailer + and subsequent header, and inserts an average of about three bytes to the + compressed data in order to connect the streams. The output gzip file + has a minimal ten-byte gzip header with no file name or modification time. + + This program was written to illustrate the use of the Z_BLOCK option of + inflate() and the crc32_combine() function. gzjoin will not compile with + versions of zlib earlier than 1.2.3. + */ + +#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */ +#include <stdlib.h> /* exit(), malloc(), free() */ +#include <fcntl.h> /* open() */ +#include <unistd.h> /* close(), read(), lseek() */ +#include "zlib.h" + /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ + +#define local static + +/* exit with an error (return a value to allow use in an expression) */ +local int bail(char *why1, char *why2) +{ + fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); + exit(1); + return 0; +} + +/* -- simple buffered file input with access to the buffer -- */ + +#define CHUNK 32768 /* must be a power of two and fit in unsigned */ + +/* bin buffered input file type */ +typedef struct { + char *name; /* name of file for error messages */ + int fd; /* file descriptor */ + unsigned left; /* bytes remaining at next */ + unsigned char *next; /* next byte to read */ + unsigned char *buf; /* allocated buffer of length CHUNK */ +} bin; + +/* close a buffered file and free allocated memory */ +local void bclose(bin *in) +{ + if (in != NULL) { + if (in->fd != -1) + close(in->fd); + if (in->buf != NULL) + free(in->buf); + free(in); + } +} + +/* open a buffered file for input, return a pointer to type bin, or NULL on + failure */ +local bin *bopen(char *name) +{ + bin *in; + + in = malloc(sizeof(bin)); + if (in == NULL) + return NULL; + in->buf = malloc(CHUNK); + in->fd = open(name, O_RDONLY, 0); + if (in->buf == NULL || in->fd == -1) { + bclose(in); + return NULL; + } + in->left = 0; + in->next = in->buf; + in->name = name; + return in; +} + +/* load buffer from file, return -1 on read error, 0 or 1 on success, with + 1 indicating that end-of-file was reached */ +local int bload(bin *in) +{ + long len; + + if (in == NULL) + return -1; + if (in->left != 0) + return 0; + in->next = in->buf; + do { + len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); + if (len < 0) + return -1; + in->left += (unsigned)len; + } while (len != 0 && in->left < CHUNK); + return len == 0 ? 1 : 0; +} + +/* get a byte from the file, bail if end of file */ +#define bget(in) (in->left ? 0 : bload(in), \ + in->left ? (in->left--, *(in->next)++) : \ + bail("unexpected end of file on ", in->name)) + +/* get a four-byte little-endian unsigned integer from file */ +local unsigned long bget4(bin *in) +{ + unsigned long val; + + val = bget(in); + val += (unsigned long)(bget(in)) << 8; + val += (unsigned long)(bget(in)) << 16; + val += (unsigned long)(bget(in)) << 24; + return val; +} + +/* skip bytes in file */ +local void bskip(bin *in, unsigned skip) +{ + /* check pointer */ + if (in == NULL) + return; + + /* easy case -- skip bytes in buffer */ + if (skip <= in->left) { + in->left -= skip; + in->next += skip; + return; + } + + /* skip what's in buffer, discard buffer contents */ + skip -= in->left; + in->left = 0; + + /* seek past multiples of CHUNK bytes */ + if (skip > CHUNK) { + unsigned left; + + left = skip & (CHUNK - 1); + if (left == 0) { + /* exact number of chunks: seek all the way minus one byte to check + for end-of-file with a read */ + lseek(in->fd, skip - 1, SEEK_CUR); + if (read(in->fd, in->buf, 1) != 1) + bail("unexpected end of file on ", in->name); + return; + } + + /* skip the integral chunks, update skip with remainder */ + lseek(in->fd, skip - left, SEEK_CUR); + skip = left; + } + + /* read more input and skip remainder */ + bload(in); + if (skip > in->left) + bail("unexpected end of file on ", in->name); + in->left -= skip; + in->next += skip; +} + +/* -- end of buffered input functions -- */ + +/* skip the gzip header from file in */ +local void gzhead(bin *in) +{ + int flags; + + /* verify gzip magic header and compression method */ + if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) + bail(in->name, " is not a valid gzip file"); + + /* get and verify flags */ + flags = bget(in); + if ((flags & 0xe0) != 0) + bail("unknown reserved bits set in ", in->name); + + /* skip modification time, extra flags, and os */ + bskip(in, 6); + + /* skip extra field if present */ + if (flags & 4) { + unsigned len; + + len = bget(in); + len += (unsigned)(bget(in)) << 8; + bskip(in, len); + } + + /* skip file name if present */ + if (flags & 8) + while (bget(in) != 0) + ; + + /* skip comment if present */ + if (flags & 16) + while (bget(in) != 0) + ; + + /* skip header crc if present */ + if (flags & 2) + bskip(in, 2); +} + +/* write a four-byte little-endian unsigned integer to out */ +local void put4(unsigned long val, FILE *out) +{ + putc(val & 0xff, out); + putc((val >> 8) & 0xff, out); + putc((val >> 16) & 0xff, out); + putc((val >> 24) & 0xff, out); +} + +/* Load up zlib stream from buffered input, bail if end of file */ +local void zpull(z_streamp strm, bin *in) +{ + if (in->left == 0) + bload(in); + if (in->left == 0) + bail("unexpected end of file on ", in->name); + strm->avail_in = in->left; + strm->next_in = in->next; +} + +/* Write header for gzip file to out and initialize trailer. */ +local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) +{ + fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); + *crc = crc32(0L, Z_NULL, 0); + *tot = 0; +} + +/* Copy the compressed data from name, zeroing the last block bit of the last + block if clr is true, and adding empty blocks as needed to get to a byte + boundary. If clr is false, then the last block becomes the last block of + the output, and the gzip trailer is written. crc and tot maintains the + crc and length (modulo 2^32) of the output for the trailer. The resulting + gzip file is written to out. gzinit() must be called before the first call + of gzcopy() to write the gzip header and to initialize crc and tot. */ +local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, + FILE *out) +{ + int ret; /* return value from zlib functions */ + int pos; /* where the "last block" bit is in byte */ + int last; /* true if processing the last block */ + bin *in; /* buffered input file */ + unsigned char *start; /* start of compressed data in buffer */ + unsigned char *junk; /* buffer for uncompressed data -- discarded */ + z_off_t len; /* length of uncompressed data (support > 4 GB) */ + z_stream strm; /* zlib inflate stream */ + + /* open gzip file and skip header */ + in = bopen(name); + if (in == NULL) + bail("could not open ", name); + gzhead(in); + + /* allocate buffer for uncompressed data and initialize raw inflate + stream */ + junk = malloc(CHUNK); + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); + if (junk == NULL || ret != Z_OK) + bail("out of memory", ""); + + /* inflate and copy compressed data, clear last-block bit if requested */ + len = 0; + zpull(&strm, in); + start = strm.next_in; + last = start[0] & 1; + if (last && clr) + start[0] &= ~1; + strm.avail_out = 0; + for (;;) { + /* if input used and output done, write used input and get more */ + if (strm.avail_in == 0 && strm.avail_out != 0) { + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + + /* decompress -- return early when end-of-block reached */ + strm.avail_out = CHUNK; + strm.next_out = junk; + ret = inflate(&strm, Z_BLOCK); + switch (ret) { + case Z_MEM_ERROR: + bail("out of memory", ""); + case Z_DATA_ERROR: + bail("invalid compressed data in ", in->name); + } + + /* update length of uncompressed data */ + len += CHUNK - strm.avail_out; + + /* check for block boundary (only get this when block copied out) */ + if (strm.data_type & 128) { + /* if that was the last block, then done */ + if (last) + break; + + /* number of unused bits in last byte */ + pos = strm.data_type & 7; + + /* find the next last-block bit */ + if (pos != 0) { + /* next last-block bit is in last used byte */ + pos = 0x100 >> pos; + last = strm.next_in[-1] & pos; + if (last && clr) + strm.next_in[-1] &= ~pos; + } + else { + /* next last-block bit is in next unused byte */ + if (strm.avail_in == 0) { + /* don't have that byte yet -- get it */ + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + last = strm.next_in[0] & 1; + if (last && clr) + strm.next_in[0] &= ~1; + } + } + } + + /* update buffer with unused input */ + in->left = strm.avail_in; + in->next = strm.next_in; + + /* copy used input, write empty blocks to get to byte boundary */ + pos = strm.data_type & 7; + fwrite(start, 1, in->next - start - 1, out); + last = in->next[-1]; + if (pos == 0 || !clr) + /* already at byte boundary, or last file: write last byte */ + putc(last, out); + else { + /* append empty blocks to last byte */ + last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */ + if (pos & 1) { + /* odd -- append an empty stored block */ + putc(last, out); + if (pos == 1) + putc(0, out); /* two more bits in block header */ + fwrite("\0\0\xff\xff", 1, 4, out); + } + else { + /* even -- append 1, 2, or 3 empty fixed blocks */ + switch (pos) { + case 6: + putc(last | 8, out); + last = 0; + case 4: + putc(last | 0x20, out); + last = 0; + case 2: + putc(last | 0x80, out); + putc(0, out); + } + } + } + + /* update crc and tot */ + *crc = crc32_combine(*crc, bget4(in), len); + *tot += (unsigned long)len; + + /* clean up */ + inflateEnd(&strm); + free(junk); + bclose(in); + + /* write trailer if this is the last gzip file */ + if (!clr) { + put4(*crc, out); + put4(*tot, out); + } +} + +/* join the gzip files on the command line, write result to stdout */ +int main(int argc, char **argv) +{ + unsigned long crc, tot; /* running crc and total uncompressed length */ + + /* skip command name */ + argc--; + argv++; + + /* show usage if no arguments */ + if (argc == 0) { + fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", + stderr); + return 0; + } + + /* join gzip files on command line and write to stdout */ + gzinit(&crc, &tot, stdout); + while (argc--) + gzcopy(*argv++, argc, &crc, &tot, stdout); + + /* done */ + return 0; +} diff --git a/Modules/zlib/examples/gzlog.c b/Modules/zlib/examples/gzlog.c new file mode 100644 index 0000000..f71f817 --- /dev/null +++ b/Modules/zlib/examples/gzlog.c @@ -0,0 +1,413 @@ +/* + * gzlog.c + * Copyright (C) 2004 Mark Adler + * For conditions of distribution and use, see copyright notice in gzlog.h + * version 1.0, 26 Nov 2004 + * + */ + +#include <string.h> /* memcmp() */ +#include <stdlib.h> /* malloc(), free(), NULL */ +#include <sys/types.h> /* size_t, off_t */ +#include <unistd.h> /* read(), close(), sleep(), ftruncate(), */ + /* lseek() */ +#include <fcntl.h> /* open() */ +#include <sys/file.h> /* flock() */ +#include "zlib.h" /* deflateInit2(), deflate(), deflateEnd() */ + +#include "gzlog.h" /* interface */ +#define local static + +/* log object structure */ +typedef struct { + int id; /* object identifier */ + int fd; /* log file descriptor */ + off_t extra; /* offset of extra "ap" subfield */ + off_t mark_off; /* offset of marked data */ + off_t last_off; /* offset of last block */ + unsigned long crc; /* uncompressed crc */ + unsigned long len; /* uncompressed length (modulo 2^32) */ + unsigned stored; /* length of current stored block */ +} gz_log; + +#define GZLOGID 19334 /* gz_log object identifier */ + +#define LOCK_RETRY 1 /* retry lock once a second */ +#define LOCK_PATIENCE 1200 /* try about twenty minutes before forcing */ + +/* acquire a lock on a file */ +local int lock(int fd) +{ + int patience; + + /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */ + patience = LOCK_PATIENCE; + do { + if (flock(fd, LOCK_EX + LOCK_NB) == 0) + return 0; + (void)sleep(LOCK_RETRY); + patience -= LOCK_RETRY; + } while (patience > 0); + + /* we've run out of patience -- give up */ + return -1; +} + +/* release lock */ +local void unlock(int fd) +{ + (void)flock(fd, LOCK_UN); +} + +/* release a log object */ +local void log_clean(gz_log *log) +{ + unlock(log->fd); + (void)close(log->fd); + free(log); +} + +/* read an unsigned long from a byte buffer little-endian */ +local unsigned long make_ulg(unsigned char *buf) +{ + int n; + unsigned long val; + + val = (unsigned long)(*buf++); + for (n = 8; n < 32; n += 8) + val += (unsigned long)(*buf++) << n; + return val; +} + +/* read an off_t from a byte buffer little-endian */ +local off_t make_off(unsigned char *buf) +{ + int n; + off_t val; + + val = (off_t)(*buf++); + for (n = 8; n < 64; n += 8) + val += (off_t)(*buf++) << n; + return val; +} + +/* write an unsigned long little-endian to byte buffer */ +local void dice_ulg(unsigned long val, unsigned char *buf) +{ + int n; + + for (n = 0; n < 4; n++) { + *buf++ = val & 0xff; + val >>= 8; + } +} + +/* write an off_t little-endian to byte buffer */ +local void dice_off(off_t val, unsigned char *buf) +{ + int n; + + for (n = 0; n < 8; n++) { + *buf++ = val & 0xff; + val >>= 8; + } +} + +/* initial, empty gzip file for appending */ +local char empty_gz[] = { + 0x1f, 0x8b, /* magic gzip id */ + 8, /* compression method is deflate */ + 4, /* there is an extra field */ + 0, 0, 0, 0, /* no modification time provided */ + 0, 0xff, /* no extra flags, no OS */ + 20, 0, 'a', 'p', 16, 0, /* extra field with "ap" subfield */ + 32, 0, 0, 0, 0, 0, 0, 0, /* offset of uncompressed data */ + 32, 0, 0, 0, 0, 0, 0, 0, /* offset of last block */ + 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ + 0, 0, 0, 0, /* crc */ + 0, 0, 0, 0 /* uncompressed length */ +}; + +/* initialize a log object with locking */ +void *gzlog_open(char *path) +{ + unsigned xlen; + unsigned char temp[20]; + unsigned sub_len; + int good; + gz_log *log; + + /* allocate log structure */ + log = malloc(sizeof(gz_log)); + if (log == NULL) + return NULL; + log->id = GZLOGID; + + /* open file, creating it if necessary, and locking it */ + log->fd = open(path, O_RDWR | O_CREAT, 0600); + if (log->fd < 0) { + free(log); + return NULL; + } + if (lock(log->fd)) { + close(log->fd); + free(log); + return NULL; + } + + /* if file is empty, write new gzip stream */ + if (lseek(log->fd, 0, SEEK_END) == 0) { + if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) { + log_clean(log); + return NULL; + } + } + + /* check gzip header */ + (void)lseek(log->fd, 0, SEEK_SET); + if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f || + temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) { + log_clean(log); + return NULL; + } + + /* process extra field to find "ap" sub-field */ + xlen = temp[10] + (temp[11] << 8); + good = 0; + while (xlen) { + if (xlen < 4 || read(log->fd, temp, 4) != 4) + break; + sub_len = temp[2]; + sub_len += temp[3] << 8; + xlen -= 4; + if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) { + good = 1; + break; + } + if (xlen < sub_len) + break; + (void)lseek(log->fd, sub_len, SEEK_CUR); + xlen -= sub_len; + } + if (!good) { + log_clean(log); + return NULL; + } + + /* read in "ap" sub-field */ + log->extra = lseek(log->fd, 0, SEEK_CUR); + if (read(log->fd, temp, 16) != 16) { + log_clean(log); + return NULL; + } + log->mark_off = make_off(temp); + log->last_off = make_off(temp + 8); + + /* get crc, length of gzip file */ + (void)lseek(log->fd, log->last_off, SEEK_SET); + if (read(log->fd, temp, 13) != 13 || + memcmp(temp, "\001\000\000\377\377", 5) != 0) { + log_clean(log); + return NULL; + } + log->crc = make_ulg(temp + 5); + log->len = make_ulg(temp + 9); + + /* set up to write over empty last block */ + (void)lseek(log->fd, log->last_off + 5, SEEK_SET); + log->stored = 0; + return (void *)log; +} + +/* maximum amount to put in a stored block before starting a new one */ +#define MAX_BLOCK 16384 + +/* write a block to a log object */ +int gzlog_write(void *obj, char *data, size_t len) +{ + size_t some; + unsigned char temp[5]; + gz_log *log; + + /* check object */ + log = (gz_log *)obj; + if (log == NULL || log->id != GZLOGID) + return 1; + + /* write stored blocks until all of the input is written */ + do { + some = MAX_BLOCK - log->stored; + if (some > len) + some = len; + if (write(log->fd, data, some) != some) + return 1; + log->crc = crc32(log->crc, data, some); + log->len += some; + len -= some; + data += some; + log->stored += some; + + /* if the stored block is full, end it and start another */ + if (log->stored == MAX_BLOCK) { + (void)lseek(log->fd, log->last_off, SEEK_SET); + temp[0] = 0; + dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), + temp + 1); + if (write(log->fd, temp, 5) != 5) + return 1; + log->last_off = lseek(log->fd, log->stored, SEEK_CUR); + (void)lseek(log->fd, 5, SEEK_CUR); + log->stored = 0; + } + } while (len); + return 0; +} + +/* recompress the remaining stored deflate data in place */ +local int recomp(gz_log *log) +{ + z_stream strm; + size_t len, max; + unsigned char *in; + unsigned char *out; + unsigned char temp[16]; + + /* allocate space and read it all in (it's around 1 MB) */ + len = log->last_off - log->mark_off; + max = len + (len >> 12) + (len >> 14) + 11; + out = malloc(max); + if (out == NULL) + return 1; + in = malloc(len); + if (in == NULL) { + free(out); + return 1; + } + (void)lseek(log->fd, log->mark_off, SEEK_SET); + if (read(log->fd, in, len) != len) { + free(in); + free(out); + return 1; + } + + /* recompress in memory, decoding stored data as we go */ + /* note: this assumes that unsigned is four bytes or more */ + /* consider not making that assumption */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8, + Z_DEFAULT_STRATEGY) != Z_OK) { + free(in); + free(out); + return 1; + } + strm.next_in = in; + strm.avail_out = max; + strm.next_out = out; + while (len >= 5) { + if (strm.next_in[0] != 0) + break; + strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8); + strm.next_in += 5; + len -= 5; + if (strm.avail_in != 0) { + if (len < strm.avail_in) + break; + len -= strm.avail_in; + (void)deflate(&strm, Z_NO_FLUSH); + if (strm.avail_in != 0 || strm.avail_out == 0) + break; + } + } + (void)deflate(&strm, Z_SYNC_FLUSH); + (void)deflateEnd(&strm); + free(in); + if (len != 0 || strm.avail_out == 0) { + free(out); + return 1; + } + + /* overwrite stored data with compressed data */ + (void)lseek(log->fd, log->mark_off, SEEK_SET); + len = max - strm.avail_out; + if (write(log->fd, out, len) != len) { + free(out); + return 1; + } + free(out); + + /* write last empty block, crc, and length */ + log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR); + temp[0] = 1; + dice_ulg(0xffffL << 16, temp + 1); + dice_ulg(log->crc, temp + 5); + dice_ulg(log->len, temp + 9); + if (write(log->fd, temp, 13) != 13) + return 1; + + /* truncate file to discard remaining stored data and old trailer */ + ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR)); + + /* update extra field to point to new last empty block */ + (void)lseek(log->fd, log->extra, SEEK_SET); + dice_off(log->mark_off, temp); + dice_off(log->last_off, temp + 8); + if (write(log->fd, temp, 16) != 16) + return 1; + return 0; +} + +/* maximum accumulation of stored blocks before compressing */ +#define MAX_STORED 1048576 + +/* close log object */ +int gzlog_close(void *obj) +{ + unsigned char temp[8]; + gz_log *log; + + /* check object */ + log = (gz_log *)obj; + if (log == NULL || log->id != GZLOGID) + return 1; + + /* go to start of most recent block being written */ + (void)lseek(log->fd, log->last_off, SEEK_SET); + + /* if some stuff was put there, update block */ + if (log->stored) { + temp[0] = 0; + dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), + temp + 1); + if (write(log->fd, temp, 5) != 5) + return 1; + log->last_off = lseek(log->fd, log->stored, SEEK_CUR); + } + + /* write last block (empty) */ + if (write(log->fd, "\001\000\000\377\377", 5) != 5) + return 1; + + /* write updated crc and uncompressed length */ + dice_ulg(log->crc, temp); + dice_ulg(log->len, temp + 4); + if (write(log->fd, temp, 8) != 8) + return 1; + + /* put offset of that last block in gzip extra block */ + (void)lseek(log->fd, log->extra + 8, SEEK_SET); + dice_off(log->last_off, temp); + if (write(log->fd, temp, 8) != 8) + return 1; + + /* if more than 1 MB stored, then time to compress it */ + if (log->last_off - log->mark_off > MAX_STORED) { + if (recomp(log)) + return 1; + } + + /* unlock and close file */ + log_clean(log); + return 0; +} diff --git a/Modules/zlib/examples/gzlog.h b/Modules/zlib/examples/gzlog.h new file mode 100644 index 0000000..a800bd5 --- /dev/null +++ b/Modules/zlib/examples/gzlog.h @@ -0,0 +1,58 @@ +/* gzlog.h + Copyright (C) 2004 Mark Adler, all rights reserved + version 1.0, 26 Nov 2004 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + The gzlog object allows writing short messages to a gzipped log file, + opening the log file locked for small bursts, and then closing it. The log + object works by appending stored data to the gzip file until 1 MB has been + accumulated. At that time, the stored data is compressed, and replaces the + uncompressed data in the file. The log file is truncated to its new size at + that time. After closing, the log file is always valid gzip file that can + decompressed to recover what was written. + + A gzip header "extra" field contains two file offsets for appending. The + first points to just after the last compressed data. The second points to + the last stored block in the deflate stream, which is empty. All of the + data between those pointers is uncompressed. + */ + +/* Open a gzlog object, creating the log file if it does not exist. Return + NULL on error. Note that gzlog_open() could take a long time to return if + there is difficulty in locking the file. */ +void *gzlog_open(char *path); + +/* Write to a gzlog object. Return non-zero on error. This function will + simply write data to the file uncompressed. Compression of the data + will not occur until gzlog_close() is called. It is expected that + gzlog_write() is used for a short message, and then gzlog_close() is + called. If a large amount of data is to be written, then the application + should write no more than 1 MB at a time with gzlog_write() before + calling gzlog_close() and then gzlog_open() again. */ +int gzlog_write(void *log, char *data, size_t len); + +/* Close a gzlog object. Return non-zero on error. The log file is locked + until this function is called. This function will compress stored data + at the end of the gzip file if at least 1 MB has been accumulated. Note + that the file will not be a valid gzip file until this function completes. + */ +int gzlog_close(void *log); diff --git a/Modules/zlib/examples/zlib_how.html b/Modules/zlib/examples/zlib_how.html new file mode 100644 index 0000000..40998db --- /dev/null +++ b/Modules/zlib/examples/zlib_how.html @@ -0,0 +1,523 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" + "http://www.w3.org/TR/REC-html40/loose.dtd"> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> +<title>zlib Usage Example</title> +<!-- Copyright (c) 2004 Mark Adler. --> +</head> +<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000"> +<h2 align="center"> zlib Usage Example </h2> +We often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used. +Users wonder when they should provide more input, when they should use more output, +what to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and +so on. So for those who have read <tt>zlib.h</tt> (a few times), and +would like further edification, below is an annotated example in C of simple routines to compress and decompress +from an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively. The +annotations are interspersed between lines of the code. So please read between the lines. +We hope this helps explain some of the intricacies of <em>zlib</em>. +<p> +Without further adieu, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>: +<pre><b> +/* zpipe.c: example of proper use of zlib's inflate() and deflate() + Not copyrighted -- provided to the public domain + Version 1.2 9 November 2004 Mark Adler */ + +/* Version history: + 1.0 30 Oct 2004 First version + 1.1 8 Nov 2004 Add void casting for unused return values + Use switch statement for inflate() return values + 1.2 9 Nov 2004 Add assertions to document zlib guarantees + */ +</b></pre><!-- --> +We now include the header files for the required definitions. From +<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>, +<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and +<tt>fputs()</tt> for error messages. From <tt>string.h</tt> we use +<tt>strcmp()</tt> for command line argument processing. +From <tt>assert.h</tt> we use the <tt>assert()</tt> macro. +From <tt>zlib.h</tt> +we use the basic compression functions <tt>deflateInit()</tt>, +<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression +functions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and +<tt>inflateEnd()</tt>. +<pre><b> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "zlib.h" +</b></pre><!-- --> +<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data +from the <em>zlib</em> routines. Larger buffer sizes would be more efficient, +especially for <tt>inflate()</tt>. If the memory is available, buffers sizes +on the order of 128K or 256K bytes should be used. +<pre><b> +#define CHUNK 16384 +</b></pre><!-- --> +The <tt>def()</tt> routine compresses data from an input file to an output file. The output data +will be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em> +formats. The <em>zlib</em> format has a very small header of only two bytes to identify it as +a <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast +check value to verify the integrity of the uncompressed data after decoding. +<pre><b> +/* Compress from file source to file dest until EOF on source. + def() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_STREAM_ERROR if an invalid compression + level is supplied, Z_VERSION_ERROR if the version of zlib.h and the + version of the library linked do not match, or Z_ERRNO if there is + an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ +</b></pre> +Here are the local variables for <tt>def()</tt>. <tt>ret</tt> will be used for <em>zlib</em> +return codes. <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>, +which is either no flushing, or flush to completion after the end of the input file is reached. +<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>. The <tt>strm</tt> structure +is used to pass information to and from the <em>zlib</em> routines, and to maintain the +<tt>deflate()</tt> state. <tt>in</tt> and <tt>out</tt> are the input and output buffers for +<tt>deflate()</tt>. +<pre><b> + int ret, flush; + unsigned have; + z_stream strm; + char in[CHUNK]; + char out[CHUNK]; +</b></pre><!-- --> +The first thing we do is to initialize the <em>zlib</em> state for compression using +<tt>deflateInit()</tt>. This must be done before the first use of <tt>deflate()</tt>. +The <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt> +structure must be initialized before calling <tt>deflateInit()</tt>. Here they are +set to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use +the default memory allocation routines. An application may also choose to provide +custom memory allocation routines here. <tt>deflateInit()</tt> will allocate on the +order of 256K bytes for the internal state. +(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.) +<p> +<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and +the compression level, which is an integer in the range of -1 to 9. Lower compression +levels result in faster execution, but less compression. Higher levels result in +greater compression, but slower execution. The <em>zlib</em> constant Z_DEFAULT_COMPRESSION, +equal to -1, +provides a good compromise between compression and speed and is equivalent to level 6. +Level 0 actually does no compression at all, and in fact expands the data slightly to produce +the <em>zlib</em> format (it is not a byte-for-byte copy of the input). +More advanced applications of <em>zlib</em> +may use <tt>deflateInit2()</tt> here instead. Such an application may want to reduce how +much memory will be used, at some price in compression. Or it may need to request a +<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw +encoding with no header or trailer at all. +<p> +We must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant +<tt>Z_OK</tt> to make sure that it was able to +allocate memory for the internal state, and that the provided arguments were valid. +<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt> +file came from matches the version of <em>zlib</em> actually linked with the program. This +is especially important for environments in which <em>zlib</em> is a shared library. +<p> +Note that an application can initialize multiple, independent <em>zlib</em> streams, which can +operate in parallel. The state information maintained in the structure allows the <em>zlib</em> +routines to be reentrant. +<pre><b> + /* allocate deflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = deflateInit(&strm, level); + if (ret != Z_OK) + return ret; +</b></pre><!-- --> +With the pleasantries out of the way, now we can get down to business. The outer <tt>do</tt>-loop +reads all of the input file and exits at the bottom of the loop once end-of-file is reached. +This loop contains the only call of <tt>deflate()</tt>. So we must make sure that all of the +input data has been processed and that all of the output data has been generated and consumed +before we fall out of the loop at the bottom. +<pre><b> + /* compress until end of file */ + do { +</b></pre> +We start off by reading data from the input file. The number of bytes read is put directly +into <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>. We also +check to see if end-of-file on the input has been reached. If we are at the end of file, then <tt>flush</tt> is set to the +<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to +indicate that this is the last chunk of input data to compress. We need to use <tt>feof()</tt> +to check for end-of-file as opposed to seeing if fewer than <tt>CHUNK</tt> bytes have been read. The +reason is that if the input file length is an exact multiple of <tt>CHUNK</tt>, we will miss +the fact that we got to the end-of-file, and not know to tell <tt>deflate()</tt> to finish +up the compressed stream. If we are not yet at the end of the input, then the <em>zlib</em> +constant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still +in the middle of the uncompressed data. +<p> +If there is an error in reading from the input file, the process is aborted with +<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning +the error. We wouldn't want a memory leak, now would we? <tt>deflateEnd()</tt> can be called +at any time after the state has been initialized. Once that's done, <tt>deflateInit()</tt> (or +<tt>deflateInit2()</tt>) would have to be called to start a new compression process. There is +no point here in checking the <tt>deflateEnd()</tt> return code. The deallocation can't fail. +<pre><b> + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; + strm.next_in = in; +</b></pre><!-- --> +The inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then +keeps calling <tt>deflate()</tt> until it is done producing output. Once there is no more +new output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e., +<tt>avail_in</tt> will be zero. +<pre><b> + /* run deflate() on input until output buffer not full, finish + compression if all of source has been read in */ + do { +</b></pre> +Output space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number +of available output bytes and <tt>next_out</tt> to a pointer to that space. +<pre><b> + strm.avail_out = CHUNK; + strm.next_out = out; +</b></pre> +Now we call the compression engine itself, <tt>deflate()</tt>. It takes as many of the +<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as +<tt>avail_out</tt> bytes to <tt>next_out</tt>. Those counters and pointers are then +updated past the input data consumed and the output data written. It is the amount of +output space available that may limit how much input is consumed. +Hence the inner loop to make sure that +all of the input is consumed by providing more output space each time. Since <tt>avail_in</tt> +and <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those +between <tt>deflate()</tt> calls until it's all used up. +<p> +The parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing +the input and output information and the internal compression engine state, and a parameter +indicating whether and how to flush data to the output. Normally <tt>deflate</tt> will consume +several K bytes of input data before producing any output (except for the header), in order +to accumulate statistics on the data for optimum compression. It will then put out a burst of +compressed data, and proceed to consume more input before the next burst. Eventually, +<tt>deflate()</tt> +must be told to terminate the stream, complete the compression with provided input data, and +write out the trailer check value. <tt>deflate()</tt> will continue to compress normally as long +as the flush parameter is <tt>Z_NO_FLUSH</tt>. Once the <tt>Z_FINISH</tt> parameter is provided, +<tt>deflate()</tt> will begin to complete the compressed output stream. However depending on how +much output space is provided, <tt>deflate()</tt> may have to be called several times until it +has provided the complete compressed stream, even after it has consumed all of the input. The flush +parameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls. +<p> +There are other values of the flush parameter that are used in more advanced applications. You can +force <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided +so far, even if it wouldn't have otherwise, for example to control data latency on a link with +compressed data. You can also ask that <tt>deflate()</tt> do that as well as erase any history up to +that point so that what follows can be decompressed independently, for example for random access +applications. Both requests will degrade compression by an amount depending on how often such +requests are made. +<p> +<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here. Why +not? Well, it turns out that <tt>deflate()</tt> can do no wrong here. Let's go through +<tt>deflate()</tt>'s return values and dispense with them one by one. The possible values are +<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>. <tt>Z_OK</tt> +is, well, ok. <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of +<tt>deflate()</tt>. This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt> +until it has no more output. <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not +initialized properly, but we did initialize it properly. There is no harm in checking for +<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some +other part of the application inadvertently clobbered the memory containing the <em>zlib</em> state. +<tt>Z_BUF_ERROR</tt> will be explained further below, but +suffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume +more input or produce more output. <tt>deflate()</tt> can be called again with more output space +or more available input, which it will be in this code. +<pre><b> + ret = deflate(&strm, flush); /* no bad return value */ + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ +</b></pre> +Now we compute how much output <tt>deflate()</tt> provided on the last call, which is the +difference between how much space was provided before the call, and how much output space +is still available after the call. Then that data, if any, is written to the output file. +We can then reuse the output buffer for the next call of <tt>deflate()</tt>. Again if there +is a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak. +<pre><b> + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } +</b></pre> +The inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the +provided output buffer. Then we know that <tt>deflate()</tt> has done as much as it can with +the provided input, and that all of that input has been consumed. We can then fall out of this +loop and reuse the input buffer. +<p> +The way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill +the output buffer, leaving <tt>avail_out</tt> greater than zero. However suppose that +<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer! +<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can. +As far as we know, <tt>deflate()</tt> +has more output for us. So we call it again. But now <tt>deflate()</tt> produces no output +at all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>. That <tt>deflate()</tt> call +wasn't able to do anything, either consume input or produce output, and so it returns +<tt>Z_BUF_ERROR</tt>. (See, I told you I'd cover this later.) However this is not a problem at +all. Now we finally have the desired indication that <tt>deflate()</tt> is really done, +and so we drop out of the inner loop to provide more input to <tt>deflate()</tt>. +<p> +With <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will +complete the output stream. Once that is done, subsequent calls of <tt>deflate()</tt> would return +<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing +until the state is reinitialized. +<p> +Some applications of <em>zlib</em> have two loops that call <tt>deflate()</tt> +instead of the single inner loop we have here. The first loop would call +without flushing and feed all of the data to <tt>deflate()</tt>. The second loop would call +<tt>deflate()</tt> with no more +data and the <tt>Z_FINISH</tt> parameter to complete the process. As you can see from this +example, that can be avoided by simply keeping track of the current flush state. +<pre><b> + } while (strm.avail_out == 0); + assert(strm.avail_in == 0); /* all input will be used */ +</b></pre><!-- --> +Now we check to see if we have already processed all of the input file. That information was +saved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>. If so, +then we're done and we fall out of the outer loop. We're guaranteed to get <tt>Z_STREAM_END</tt> +from the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was +consumed and all of the output was generated. +<pre><b> + /* done when last data in file processed */ + } while (flush != Z_FINISH); + assert(ret == Z_STREAM_END); /* stream will be complete */ +</b></pre><!-- --> +The process is complete, but we still need to deallocate the state to avoid a memory leak +(or rather more like a memory hemorrhage if you didn't do this). Then +finally we can return with a happy return value. +<pre><b> + /* clean up and return */ + (void)deflateEnd(&strm); + return Z_OK; +} +</b></pre><!-- --> +Now we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt> +decompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the +uncompressed data to the output file. Much of the discussion above for <tt>def()</tt> +applies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between +the two. +<pre><b> +/* Decompress from file source to file dest until stream ends or EOF. + inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_DATA_ERROR if the deflate data is + invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and + the version of the library linked do not match, or Z_ERRNO if there + is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ +</b></pre> +The local variables have the same functionality as they do for <tt>def()</tt>. The +only difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt> +can tell from the <em>zlib</em> stream itself when the stream is complete. +<pre><b> + int ret; + unsigned have; + z_stream strm; + char in[CHUNK]; + char out[CHUNK]; +</b></pre><!-- --> +The initialization of the state is the same, except that there is no compression level, +of course, and two more elements of the structure are initialized. <tt>avail_in</tt> +and <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>. This +is because the application has the option to provide the start of the zlib stream in +order for <tt>inflateInit()</tt> to have access to information about the compression +method to aid in memory allocation. In the current implementation of <em>zlib</em> +(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of +<tt>inflate()</tt> anyway. However those fields must be initialized since later versions +of <em>zlib</em> that provide more compression methods may take advantage of this interface. +In any case, no decompression is performed by <tt>inflateInit()</tt>, so the +<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling. +<p> +Here <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to +indicate that no input data is being provided. +<pre><b> + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); + if (ret != Z_OK) + return ret; +</b></pre><!-- --> +The outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates +that it has reached the end of the compressed data and has produced all of the uncompressed +output. This is in contrast to <tt>def()</tt> which processes all of the input file. +If end-of-file is reached before the compressed data self-terminates, then the compressed +data is incomplete and an error is returned. +<pre><b> + /* decompress until deflate stream ends or end of file */ + do { +</b></pre> +We read input data and set the <tt>strm</tt> structure accordingly. If we've reached the +end of the input file, then we leave the outer loop and report an error, since the +compressed data is incomplete. Note that we may read more data than is eventually consumed +by <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream. +For applications where <em>zlib</em> streams are embedded in other data, this routine would +need to be modified to return the unused data, or at least indicate how much of the input +data was not used, so the application would know where to pick up after the <em>zlib</em> stream. +<pre><b> + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + if (strm.avail_in == 0) + break; + strm.next_in = in; +</b></pre><!-- --> +The inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to +keep calling <tt>inflate()</tt> until has generated all of the output it can with the +provided input. +<pre><b> + /* run inflate() on input until output buffer not full */ + do { +</b></pre> +Just like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>. +<pre><b> + strm.avail_out = CHUNK; + strm.next_out = out; +</b></pre> +Now we run the decompression engine itself. There is no need to adjust the flush parameter, since +the <em>zlib</em> format is self-terminating. The main difference here is that there are +return values that we need to pay attention to. <tt>Z_DATA_ERROR</tt> +indicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format, +which means that either the data is not a <em>zlib</em> stream to begin with, or that the data was +corrupted somewhere along the way since it was compressed. The other error to be processed is +<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt> +needs it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>. +<p> +Advanced applications may use +<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the +first 32K or so of compression. This is noted in the <em>zlib</em> header, so <tt>inflate()</tt> +requests that that dictionary be provided before it can start to decompress. Without the dictionary, +correct decompression is not possible. For this routine, we have no idea what the dictionary is, +so the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>. +<p> +<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here, +but could be checked for as noted above for <tt>def()</tt>. <tt>Z_BUF_ERROR</tt> does not need to be +checked for here, for the same reasons noted for <tt>def()</tt>. <tt>Z_STREAM_END</tt> will be +checked for later. +<pre><b> + ret = inflate(&strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + switch (ret) { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; /* and fall through */ + case Z_DATA_ERROR: + case Z_MEM_ERROR: + (void)inflateEnd(&strm); + return ret; + } +</b></pre> +The output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>. +<pre><b> + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } +</b></pre> +The inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated +by not filling the output buffer, just as for <tt>deflate()</tt>. In this case, we cannot +assert that <tt>strm.avail_in</tt> will be zero, since the deflate stream may end before the file +does. +<pre><b> + } while (strm.avail_out == 0); +</b></pre><!-- --> +The outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the +end of the input <em>zlib</em> stream, has completed the decompression and integrity +check, and has provided all of the output. This is indicated by the <tt>inflate()</tt> +return value <tt>Z_STREAM_END</tt>. The inner loop is guaranteed to leave <tt>ret</tt> +equal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end +of the <em>zlib</em> stream. So if the return value is not <tt>Z_STREAM_END</tt>, the +loop continues to read more input. +<pre><b> + /* done when inflate() says it's done */ + } while (ret != Z_STREAM_END); +</b></pre><!-- --> +At this point, decompression successfully completed, or we broke out of the loop due to no +more data being available from the input file. If the last <tt>inflate()</tt> return value +is not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error +is returned. Otherwise, we return with a happy return value. Of course, <tt>inflateEnd()</tt> +is called first to avoid a memory leak. +<pre><b> + /* clean up and return */ + (void)inflateEnd(&strm); + return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} +</b></pre><!-- --> +That ends the routines that directly use <em>zlib</em>. The following routines make this +a command-line program by running data through the above routines from <tt>stdin</tt> to +<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>. +<p> +<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt> +and <tt>inf()</tt>, as detailed in their comments above, and print out an error message. +Note that these are only a subset of the possible return values from <tt>deflate()</tt> +and <tt>inflate()</tt>. +<pre><b> +/* report a zlib or i/o error */ +void zerr(int ret) +{ + fputs("zpipe: ", stderr); + switch (ret) { + case Z_ERRNO: + if (ferror(stdin)) + fputs("error reading stdin\n", stderr); + if (ferror(stdout)) + fputs("error writing stdout\n", stderr); + break; + case Z_STREAM_ERROR: + fputs("invalid compression level\n", stderr); + break; + case Z_DATA_ERROR: + fputs("invalid or incomplete deflate data\n", stderr); + break; + case Z_MEM_ERROR: + fputs("out of memory\n", stderr); + break; + case Z_VERSION_ERROR: + fputs("zlib version mismatch!\n", stderr); + } +} +</b></pre><!-- --> +Here is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>. The +<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if +no arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used. If any other +arguments are provided, no compression or decompression is performed. Instead a usage +message is displayed. Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and +<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress. +<pre><b> +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ + int ret; + + /* do compression if no arguments */ + if (argc == 1) { + ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* do decompression if -d specified */ + else if (argc == 2 && strcmp(argv[1], "-d") == 0) { + ret = inf(stdin, stdout); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* otherwise, report usage */ + else { + fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); + return 1; + } +} +</b></pre> +<hr> +<i>Copyright (c) 2004 by Mark Adler<br>Last modified 13 November 2004</i> +</body> +</html> diff --git a/Modules/zlib/examples/zpipe.c b/Modules/zlib/examples/zpipe.c new file mode 100644 index 0000000..26abb56 --- /dev/null +++ b/Modules/zlib/examples/zpipe.c @@ -0,0 +1,191 @@ +/* zpipe.c: example of proper use of zlib's inflate() and deflate() + Not copyrighted -- provided to the public domain + Version 1.2 9 November 2004 Mark Adler */ + +/* Version history: + 1.0 30 Oct 2004 First version + 1.1 8 Nov 2004 Add void casting for unused return values + Use switch statement for inflate() return values + 1.2 9 Nov 2004 Add assertions to document zlib guarantees + 1.3 6 Apr 2005 Remove incorrect assertion in inf() + */ + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "zlib.h" + +#define CHUNK 16384 + +/* Compress from file source to file dest until EOF on source. + def() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_STREAM_ERROR if an invalid compression + level is supplied, Z_VERSION_ERROR if the version of zlib.h and the + version of the library linked do not match, or Z_ERRNO if there is + an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ + int ret, flush; + unsigned have; + z_stream strm; + char in[CHUNK]; + char out[CHUNK]; + + /* allocate deflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = deflateInit(&strm, level); + if (ret != Z_OK) + return ret; + + /* compress until end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; + strm.next_in = in; + + /* run deflate() on input until output buffer not full, finish + compression if all of source has been read in */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = deflate(&strm, flush); /* no bad return value */ + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + assert(strm.avail_in == 0); /* all input will be used */ + + /* done when last data in file processed */ + } while (flush != Z_FINISH); + assert(ret == Z_STREAM_END); /* stream will be complete */ + + /* clean up and return */ + (void)deflateEnd(&strm); + return Z_OK; +} + +/* Decompress from file source to file dest until stream ends or EOF. + inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_DATA_ERROR if the deflate data is + invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and + the version of the library linked do not match, or Z_ERRNO if there + is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ + int ret; + unsigned have; + z_stream strm; + char in[CHUNK]; + char out[CHUNK]; + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); + if (ret != Z_OK) + return ret; + + /* decompress until deflate stream ends or end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + if (strm.avail_in == 0) + break; + strm.next_in = in; + + /* run inflate() on input until output buffer not full */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = inflate(&strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + switch (ret) { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; /* and fall through */ + case Z_DATA_ERROR: + case Z_MEM_ERROR: + (void)inflateEnd(&strm); + return ret; + } + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + + /* done when inflate() says it's done */ + } while (ret != Z_STREAM_END); + + /* clean up and return */ + (void)inflateEnd(&strm); + return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} + +/* report a zlib or i/o error */ +void zerr(int ret) +{ + fputs("zpipe: ", stderr); + switch (ret) { + case Z_ERRNO: + if (ferror(stdin)) + fputs("error reading stdin\n", stderr); + if (ferror(stdout)) + fputs("error writing stdout\n", stderr); + break; + case Z_STREAM_ERROR: + fputs("invalid compression level\n", stderr); + break; + case Z_DATA_ERROR: + fputs("invalid or incomplete deflate data\n", stderr); + break; + case Z_MEM_ERROR: + fputs("out of memory\n", stderr); + break; + case Z_VERSION_ERROR: + fputs("zlib version mismatch!\n", stderr); + } +} + +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ + int ret; + + /* do compression if no arguments */ + if (argc == 1) { + ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* do decompression if -d specified */ + else if (argc == 2 && strcmp(argv[1], "-d") == 0) { + ret = inf(stdin, stdout); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* otherwise, report usage */ + else { + fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); + return 1; + } +} diff --git a/Modules/zlib/examples/zran.c b/Modules/zlib/examples/zran.c new file mode 100644 index 0000000..8c7717e --- /dev/null +++ b/Modules/zlib/examples/zran.c @@ -0,0 +1,404 @@ +/* zran.c -- example of zlib/gzip stream indexing and random access + * Copyright (C) 2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + Version 1.0 29 May 2005 Mark Adler */ + +/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() + for random access of a compressed file. A file containing a zlib or gzip + stream is provided on the command line. The compressed stream is decoded in + its entirety, and an index built with access points about every SPAN bytes + in the uncompressed output. The compressed file is left open, and can then + be read randomly, having to decompress on the average SPAN/2 uncompressed + bytes before getting to the desired block of data. + + An access point can be created at the start of any deflate block, by saving + the starting file offset and bit of that block, and the 32K bytes of + uncompressed data that precede that block. Also the uncompressed offset of + that block is saved to provide a referece for locating a desired starting + point in the uncompressed stream. build_index() works by decompressing the + input zlib or gzip stream a block at a time, and at the end of each block + deciding if enough uncompressed data has gone by to justify the creation of + a new access point. If so, that point is saved in a data structure that + grows as needed to accommodate the points. + + To use the index, an offset in the uncompressed data is provided, for which + the latest accees point at or preceding that offset is located in the index. + The input file is positioned to the specified location in the index, and if + necessary the first few bits of the compressed data is read from the file. + inflate is initialized with those bits and the 32K of uncompressed data, and + the decompression then proceeds until the desired offset in the file is + reached. Then the decompression continues to read the desired uncompressed + data from the file. + + Another approach would be to generate the index on demand. In that case, + requests for random access reads from the compressed data would try to use + the index, but if a read far enough past the end of the index is required, + then further index entries would be generated and added. + + There is some fair bit of overhead to starting inflation for the random + access, mainly copying the 32K byte dictionary. So if small pieces of the + file are being accessed, it would make sense to implement a cache to hold + some lookahead and avoid many calls to extract() for small lengths. + + Another way to build an index would be to use inflateCopy(). That would + not be constrained to have access points at block boundaries, but requires + more memory per access point, and also cannot be saved to file due to the + use of pointers in the state. The approach here allows for storage of the + index in a file. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "zlib.h" + +#define local static + +#define SPAN 1048576L /* desired distance between access points */ +#define WINSIZE 32768U /* sliding window size */ +#define CHUNK 16384 /* file input buffer size */ + +/* access point entry */ +struct point { + off_t out; /* corresponding offset in uncompressed data */ + off_t in; /* offset in input file of first full byte */ + int bits; /* number of bits (1-7) from byte at in - 1, or 0 */ + unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */ +}; + +/* access point list */ +struct access { + int have; /* number of list entries filled in */ + int size; /* number of list entries allocated */ + struct point *list; /* allocated list */ +}; + +/* Deallocate an index built by build_index() */ +local void free_index(struct access *index) +{ + if (index != NULL) { + free(index->list); + free(index); + } +} + +/* Add an entry to the access point list. If out of memory, deallocate the + existing list and return NULL. */ +local struct access *addpoint(struct access *index, int bits, + off_t in, off_t out, unsigned left, unsigned char *window) +{ + struct point *next; + + /* if list is empty, create it (start with eight points) */ + if (index == NULL) { + index = malloc(sizeof(struct access)); + if (index == NULL) return NULL; + index->list = malloc(sizeof(struct point) << 3); + if (index->list == NULL) { + free(index); + return NULL; + } + index->size = 8; + index->have = 0; + } + + /* if list is full, make it bigger */ + else if (index->have == index->size) { + index->size <<= 1; + next = realloc(index->list, sizeof(struct point) * index->size); + if (next == NULL) { + free_index(index); + return NULL; + } + index->list = next; + } + + /* fill in entry and increment how many we have */ + next = index->list + index->have; + next->bits = bits; + next->in = in; + next->out = out; + if (left) + memcpy(next->window, window + WINSIZE - left, left); + if (left < WINSIZE) + memcpy(next->window + left, window, WINSIZE - left); + index->have++; + + /* return list, possibly reallocated */ + return index; +} + +/* Make one entire pass through the compressed stream and build an index, with + access points about every span bytes of uncompressed output -- span is + chosen to balance the speed of random access against the memory requirements + of the list, about 32K bytes per access point. Note that data after the end + of the first zlib or gzip stream in the file is ignored. build_index() + returns the number of access points on success (>= 1), Z_MEM_ERROR for out + of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a + file read error. On success, *built points to the resulting index. */ +local int build_index(FILE *in, off_t span, struct access **built) +{ + int ret; + off_t totin, totout; /* our own total counters to avoid 4GB limit */ + off_t last; /* totout value of last access point */ + struct access *index; /* access points being generated */ + z_stream strm; + unsigned char input[CHUNK]; + unsigned char window[WINSIZE]; + + /* initialize inflate */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */ + if (ret != Z_OK) + return ret; + + /* inflate the input, maintain a sliding window, and build an index -- this + also validates the integrity of the compressed data using the check + information at the end of the gzip or zlib stream */ + totin = totout = last = 0; + index = NULL; /* will be allocated by first addpoint() */ + strm.avail_out = 0; + do { + /* get some compressed data from input file */ + strm.avail_in = fread(input, 1, CHUNK, in); + if (ferror(in)) { + ret = Z_ERRNO; + goto build_index_error; + } + if (strm.avail_in == 0) { + ret = Z_DATA_ERROR; + goto build_index_error; + } + strm.next_in = input; + + /* process all of that, or until end of stream */ + do { + /* reset sliding window if necessary */ + if (strm.avail_out == 0) { + strm.avail_out = WINSIZE; + strm.next_out = window; + } + + /* inflate until out of input, output, or at end of block -- + update the total input and output counters */ + totin += strm.avail_in; + totout += strm.avail_out; + ret = inflate(&strm, Z_BLOCK); /* return at end of block */ + totin -= strm.avail_in; + totout -= strm.avail_out; + if (ret == Z_NEED_DICT) + ret = Z_DATA_ERROR; + if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) + goto build_index_error; + if (ret == Z_STREAM_END) + break; + + /* if at end of block, consider adding an index entry (note that if + data_type indicates an end-of-block, then all of the + uncompressed data from that block has been delivered, and none + of the compressed data after that block has been consumed, + except for up to seven bits) -- the totout == 0 provides an + entry point after the zlib or gzip header, and assures that the + index always has at least one access point; we avoid creating an + access point after the last block by checking bit 6 of data_type + */ + if ((strm.data_type & 128) && !(strm.data_type & 64) && + (totout == 0 || totout - last > span)) { + index = addpoint(index, strm.data_type & 7, totin, + totout, strm.avail_out, window); + if (index == NULL) { + ret = Z_MEM_ERROR; + goto build_index_error; + } + last = totout; + } + } while (strm.avail_in != 0); + } while (ret != Z_STREAM_END); + + /* clean up and return index (release unused entries in list) */ + (void)inflateEnd(&strm); + index = realloc(index, sizeof(struct point) * index->have); + index->size = index->have; + *built = index; + return index->size; + + /* return error */ + build_index_error: + (void)inflateEnd(&strm); + if (index != NULL) + free_index(index); + return ret; +} + +/* Use the index to read len bytes from offset into buf, return bytes read or + negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past + the end of the uncompressed data, then extract() will return a value less + than len, indicating how much as actually read into buf. This function + should not return a data error unless the file was modified since the index + was generated. extract() may also return Z_ERRNO if there is an error on + reading or seeking the input file. */ +local int extract(FILE *in, struct access *index, off_t offset, + unsigned char *buf, int len) +{ + int ret, skip; + z_stream strm; + struct point *here; + unsigned char input[CHUNK]; + unsigned char discard[WINSIZE]; + + /* proceed only if something reasonable to do */ + if (len < 0) + return 0; + + /* find where in stream to start */ + here = index->list; + ret = index->have; + while (--ret && here[1].out <= offset) + here++; + + /* initialize file and inflate state to start there */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); /* raw inflate */ + if (ret != Z_OK) + return ret; + ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET); + if (ret == -1) + goto extract_ret; + if (here->bits) { + ret = getc(in); + if (ret == -1) { + ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR; + goto extract_ret; + } + (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits)); + } + (void)inflateSetDictionary(&strm, here->window, WINSIZE); + + /* skip uncompressed bytes until offset reached, then satisfy request */ + offset -= here->out; + strm.avail_in = 0; + skip = 1; /* while skipping to offset */ + do { + /* define where to put uncompressed data, and how much */ + if (offset == 0 && skip) { /* at offset now */ + strm.avail_out = len; + strm.next_out = buf; + skip = 0; /* only do this once */ + } + if (offset > WINSIZE) { /* skip WINSIZE bytes */ + strm.avail_out = WINSIZE; + strm.next_out = discard; + offset -= WINSIZE; + } + else if (offset != 0) { /* last skip */ + strm.avail_out = (unsigned)offset; + strm.next_out = discard; + offset = 0; + } + + /* uncompress until avail_out filled, or end of stream */ + do { + if (strm.avail_in == 0) { + strm.avail_in = fread(input, 1, CHUNK, in); + if (ferror(in)) { + ret = Z_ERRNO; + goto extract_ret; + } + if (strm.avail_in == 0) { + ret = Z_DATA_ERROR; + goto extract_ret; + } + strm.next_in = input; + } + ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */ + if (ret == Z_NEED_DICT) + ret = Z_DATA_ERROR; + if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) + goto extract_ret; + if (ret == Z_STREAM_END) + break; + } while (strm.avail_out != 0); + + /* if reach end of stream, then don't keep trying to get more */ + if (ret == Z_STREAM_END) + break; + + /* do until offset reached and requested data read, or stream ends */ + } while (skip); + + /* compute number of uncompressed bytes read after offset */ + ret = skip ? 0 : len - strm.avail_out; + + /* clean up and return bytes read or error */ + extract_ret: + (void)inflateEnd(&strm); + return ret; +} + +/* Demonstrate the use of build_index() and extract() by processing the file + provided on the command line, and the extracting 16K from about 2/3rds of + the way through the uncompressed output, and writing that to stdout. */ +int main(int argc, char **argv) +{ + int len; + off_t offset; + FILE *in; + struct access *index; + unsigned char buf[CHUNK]; + + /* open input file */ + if (argc != 2) { + fprintf(stderr, "usage: zran file.gz\n"); + return 1; + } + in = fopen(argv[1], "rb"); + if (in == NULL) { + fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); + return 1; + } + + /* build index */ + len = build_index(in, SPAN, &index); + if (len < 0) { + fclose(in); + switch (len) { + case Z_MEM_ERROR: + fprintf(stderr, "zran: out of memory\n"); + break; + case Z_DATA_ERROR: + fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); + break; + case Z_ERRNO: + fprintf(stderr, "zran: read error on %s\n", argv[1]); + break; + default: + fprintf(stderr, "zran: error %d while building index\n", len); + } + return 1; + } + fprintf(stderr, "zran: built index with %d access points\n", len); + + /* use index by reading some bytes from an arbitrary offset */ + offset = (index->list[index->have - 1].out << 1) / 3; + len = extract(in, index, offset, buf, CHUNK); + if (len < 0) + fprintf(stderr, "zran: extraction failed: %s error\n", + len == Z_MEM_ERROR ? "out of memory" : "input corrupted"); + else { + fwrite(buf, 1, len, stdout); + fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset); + } + + /* clean up and exit */ + free_index(index); + fclose(in); + return 0; +} |