diff options
Diffstat (limited to 'compat/zlib/examples/gzjoin.c')
| -rw-r--r-- | compat/zlib/examples/gzjoin.c | 449 | 
1 files changed, 449 insertions, 0 deletions
| diff --git a/compat/zlib/examples/gzjoin.c b/compat/zlib/examples/gzjoin.c new file mode 100644 index 0000000..89e8098 --- /dev/null +++ b/compat/zlib/examples/gzjoin.c @@ -0,0 +1,449 @@ +/* gzjoin -- command to join gzip files into one gzip file + +  Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved +  version 1.2, 14 Aug 2012 + +  This software is provided 'as-is', without any express or implied +  warranty.  In no event will the author be held liable for any damages +  arising from the use of this software. + +  Permission is granted to anyone to use this software for any purpose, +  including commercial applications, and to alter it and redistribute it +  freely, subject to the following restrictions: + +  1. The origin of this software must not be misrepresented; you must not +     claim that you wrote the original software. If you use this software +     in a product, an acknowledgment in the product documentation would be +     appreciated but is not required. +  2. Altered source versions must be plainly marked as such, and must not be +     misrepresented as being the original software. +  3. This notice may not be removed or altered from any source distribution. + +  Mark Adler    madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0  11 Dec 2004     - First version + * 1.1  12 Jun 2005     - Changed ssize_t to long for portability + * 1.2  14 Aug 2012     - Clean up for z_const usage + */ + +/* +   gzjoin takes one or more gzip files on the command line and writes out a +   single gzip file that will uncompress to the concatenation of the +   uncompressed data from the individual gzip files.  gzjoin does this without +   having to recompress any of the data and without having to calculate a new +   crc32 for the concatenated uncompressed data.  gzjoin does however have to +   decompress all of the input data in order to find the bits in the compressed +   data that need to be modified to concatenate the streams. + +   gzjoin does not do an integrity check on the input gzip files other than +   checking the gzip header and decompressing the compressed data.  They are +   otherwise assumed to be complete and correct. + +   Each joint between gzip files removes at least 18 bytes of previous trailer +   and subsequent header, and inserts an average of about three bytes to the +   compressed data in order to connect the streams.  The output gzip file +   has a minimal ten-byte gzip header with no file name or modification time. + +   This program was written to illustrate the use of the Z_BLOCK option of +   inflate() and the crc32_combine() function.  gzjoin will not compile with +   versions of zlib earlier than 1.2.3. + */ + +#include <stdio.h>      /* fputs(), fprintf(), fwrite(), putc() */ +#include <stdlib.h>     /* exit(), malloc(), free() */ +#include <fcntl.h>      /* open() */ +#include <unistd.h>     /* close(), read(), lseek() */ +#include "zlib.h" +    /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ + +#define local static + +/* exit with an error (return a value to allow use in an expression) */ +local int bail(char *why1, char *why2) +{ +    fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); +    exit(1); +    return 0; +} + +/* -- simple buffered file input with access to the buffer -- */ + +#define CHUNK 32768         /* must be a power of two and fit in unsigned */ + +/* bin buffered input file type */ +typedef struct { +    char *name;             /* name of file for error messages */ +    int fd;                 /* file descriptor */ +    unsigned left;          /* bytes remaining at next */ +    unsigned char *next;    /* next byte to read */ +    unsigned char *buf;     /* allocated buffer of length CHUNK */ +} bin; + +/* close a buffered file and free allocated memory */ +local void bclose(bin *in) +{ +    if (in != NULL) { +        if (in->fd != -1) +            close(in->fd); +        if (in->buf != NULL) +            free(in->buf); +        free(in); +    } +} + +/* open a buffered file for input, return a pointer to type bin, or NULL on +   failure */ +local bin *bopen(char *name) +{ +    bin *in; + +    in = malloc(sizeof(bin)); +    if (in == NULL) +        return NULL; +    in->buf = malloc(CHUNK); +    in->fd = open(name, O_RDONLY, 0); +    if (in->buf == NULL || in->fd == -1) { +        bclose(in); +        return NULL; +    } +    in->left = 0; +    in->next = in->buf; +    in->name = name; +    return in; +} + +/* load buffer from file, return -1 on read error, 0 or 1 on success, with +   1 indicating that end-of-file was reached */ +local int bload(bin *in) +{ +    long len; + +    if (in == NULL) +        return -1; +    if (in->left != 0) +        return 0; +    in->next = in->buf; +    do { +        len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); +        if (len < 0) +            return -1; +        in->left += (unsigned)len; +    } while (len != 0 && in->left < CHUNK); +    return len == 0 ? 1 : 0; +} + +/* get a byte from the file, bail if end of file */ +#define bget(in) (in->left ? 0 : bload(in), \ +                  in->left ? (in->left--, *(in->next)++) : \ +                    bail("unexpected end of file on ", in->name)) + +/* get a four-byte little-endian unsigned integer from file */ +local unsigned long bget4(bin *in) +{ +    unsigned long val; + +    val = bget(in); +    val += (unsigned long)(bget(in)) << 8; +    val += (unsigned long)(bget(in)) << 16; +    val += (unsigned long)(bget(in)) << 24; +    return val; +} + +/* skip bytes in file */ +local void bskip(bin *in, unsigned skip) +{ +    /* check pointer */ +    if (in == NULL) +        return; + +    /* easy case -- skip bytes in buffer */ +    if (skip <= in->left) { +        in->left -= skip; +        in->next += skip; +        return; +    } + +    /* skip what's in buffer, discard buffer contents */ +    skip -= in->left; +    in->left = 0; + +    /* seek past multiples of CHUNK bytes */ +    if (skip > CHUNK) { +        unsigned left; + +        left = skip & (CHUNK - 1); +        if (left == 0) { +            /* exact number of chunks: seek all the way minus one byte to check +               for end-of-file with a read */ +            lseek(in->fd, skip - 1, SEEK_CUR); +            if (read(in->fd, in->buf, 1) != 1) +                bail("unexpected end of file on ", in->name); +            return; +        } + +        /* skip the integral chunks, update skip with remainder */ +        lseek(in->fd, skip - left, SEEK_CUR); +        skip = left; +    } + +    /* read more input and skip remainder */ +    bload(in); +    if (skip > in->left) +        bail("unexpected end of file on ", in->name); +    in->left -= skip; +    in->next += skip; +} + +/* -- end of buffered input functions -- */ + +/* skip the gzip header from file in */ +local void gzhead(bin *in) +{ +    int flags; + +    /* verify gzip magic header and compression method */ +    if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) +        bail(in->name, " is not a valid gzip file"); + +    /* get and verify flags */ +    flags = bget(in); +    if ((flags & 0xe0) != 0) +        bail("unknown reserved bits set in ", in->name); + +    /* skip modification time, extra flags, and os */ +    bskip(in, 6); + +    /* skip extra field if present */ +    if (flags & 4) { +        unsigned len; + +        len = bget(in); +        len += (unsigned)(bget(in)) << 8; +        bskip(in, len); +    } + +    /* skip file name if present */ +    if (flags & 8) +        while (bget(in) != 0) +            ; + +    /* skip comment if present */ +    if (flags & 16) +        while (bget(in) != 0) +            ; + +    /* skip header crc if present */ +    if (flags & 2) +        bskip(in, 2); +} + +/* write a four-byte little-endian unsigned integer to out */ +local void put4(unsigned long val, FILE *out) +{ +    putc(val & 0xff, out); +    putc((val >> 8) & 0xff, out); +    putc((val >> 16) & 0xff, out); +    putc((val >> 24) & 0xff, out); +} + +/* Load up zlib stream from buffered input, bail if end of file */ +local void zpull(z_streamp strm, bin *in) +{ +    if (in->left == 0) +        bload(in); +    if (in->left == 0) +        bail("unexpected end of file on ", in->name); +    strm->avail_in = in->left; +    strm->next_in = in->next; +} + +/* Write header for gzip file to out and initialize trailer. */ +local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) +{ +    fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); +    *crc = crc32(0L, Z_NULL, 0); +    *tot = 0; +} + +/* Copy the compressed data from name, zeroing the last block bit of the last +   block if clr is true, and adding empty blocks as needed to get to a byte +   boundary.  If clr is false, then the last block becomes the last block of +   the output, and the gzip trailer is written.  crc and tot maintains the +   crc and length (modulo 2^32) of the output for the trailer.  The resulting +   gzip file is written to out.  gzinit() must be called before the first call +   of gzcopy() to write the gzip header and to initialize crc and tot. */ +local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, +                  FILE *out) +{ +    int ret;                /* return value from zlib functions */ +    int pos;                /* where the "last block" bit is in byte */ +    int last;               /* true if processing the last block */ +    bin *in;                /* buffered input file */ +    unsigned char *start;   /* start of compressed data in buffer */ +    unsigned char *junk;    /* buffer for uncompressed data -- discarded */ +    z_off_t len;            /* length of uncompressed data (support > 4 GB) */ +    z_stream strm;          /* zlib inflate stream */ + +    /* open gzip file and skip header */ +    in = bopen(name); +    if (in == NULL) +        bail("could not open ", name); +    gzhead(in); + +    /* allocate buffer for uncompressed data and initialize raw inflate +       stream */ +    junk = malloc(CHUNK); +    strm.zalloc = Z_NULL; +    strm.zfree = Z_NULL; +    strm.opaque = Z_NULL; +    strm.avail_in = 0; +    strm.next_in = Z_NULL; +    ret = inflateInit2(&strm, -15); +    if (junk == NULL || ret != Z_OK) +        bail("out of memory", ""); + +    /* inflate and copy compressed data, clear last-block bit if requested */ +    len = 0; +    zpull(&strm, in); +    start = in->next; +    last = start[0] & 1; +    if (last && clr) +        start[0] &= ~1; +    strm.avail_out = 0; +    for (;;) { +        /* if input used and output done, write used input and get more */ +        if (strm.avail_in == 0 && strm.avail_out != 0) { +            fwrite(start, 1, strm.next_in - start, out); +            start = in->buf; +            in->left = 0; +            zpull(&strm, in); +        } + +        /* decompress -- return early when end-of-block reached */ +        strm.avail_out = CHUNK; +        strm.next_out = junk; +        ret = inflate(&strm, Z_BLOCK); +        switch (ret) { +        case Z_MEM_ERROR: +            bail("out of memory", ""); +        case Z_DATA_ERROR: +            bail("invalid compressed data in ", in->name); +        } + +        /* update length of uncompressed data */ +        len += CHUNK - strm.avail_out; + +        /* check for block boundary (only get this when block copied out) */ +        if (strm.data_type & 128) { +            /* if that was the last block, then done */ +            if (last) +                break; + +            /* number of unused bits in last byte */ +            pos = strm.data_type & 7; + +            /* find the next last-block bit */ +            if (pos != 0) { +                /* next last-block bit is in last used byte */ +                pos = 0x100 >> pos; +                last = strm.next_in[-1] & pos; +                if (last && clr) +                    in->buf[strm.next_in - in->buf - 1] &= ~pos; +            } +            else { +                /* next last-block bit is in next unused byte */ +                if (strm.avail_in == 0) { +                    /* don't have that byte yet -- get it */ +                    fwrite(start, 1, strm.next_in - start, out); +                    start = in->buf; +                    in->left = 0; +                    zpull(&strm, in); +                } +                last = strm.next_in[0] & 1; +                if (last && clr) +                    in->buf[strm.next_in - in->buf] &= ~1; +            } +        } +    } + +    /* update buffer with unused input */ +    in->left = strm.avail_in; +    in->next = in->buf + (strm.next_in - in->buf); + +    /* copy used input, write empty blocks to get to byte boundary */ +    pos = strm.data_type & 7; +    fwrite(start, 1, in->next - start - 1, out); +    last = in->next[-1]; +    if (pos == 0 || !clr) +        /* already at byte boundary, or last file: write last byte */ +        putc(last, out); +    else { +        /* append empty blocks to last byte */ +        last &= ((0x100 >> pos) - 1);       /* assure unused bits are zero */ +        if (pos & 1) { +            /* odd -- append an empty stored block */ +            putc(last, out); +            if (pos == 1) +                putc(0, out);               /* two more bits in block header */ +            fwrite("\0\0\xff\xff", 1, 4, out); +        } +        else { +            /* even -- append 1, 2, or 3 empty fixed blocks */ +            switch (pos) { +            case 6: +                putc(last | 8, out); +                last = 0; +            case 4: +                putc(last | 0x20, out); +                last = 0; +            case 2: +                putc(last | 0x80, out); +                putc(0, out); +            } +        } +    } + +    /* update crc and tot */ +    *crc = crc32_combine(*crc, bget4(in), len); +    *tot += (unsigned long)len; + +    /* clean up */ +    inflateEnd(&strm); +    free(junk); +    bclose(in); + +    /* write trailer if this is the last gzip file */ +    if (!clr) { +        put4(*crc, out); +        put4(*tot, out); +    } +} + +/* join the gzip files on the command line, write result to stdout */ +int main(int argc, char **argv) +{ +    unsigned long crc, tot;     /* running crc and total uncompressed length */ + +    /* skip command name */ +    argc--; +    argv++; + +    /* show usage if no arguments */ +    if (argc == 0) { +        fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", +              stderr); +        return 0; +    } + +    /* join gzip files on command line and write to stdout */ +    gzinit(&crc, &tot, stdout); +    while (argc--) +        gzcopy(*argv++, argc, &crc, &tot, stdout); + +    /* done */ +    return 0; +} | 
