/*- * Copyright (c) 2003-2007 Tim Kientzle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * This file contains the "essential" portions of the read API, that * is, stuff that will probably always be used by any client that * actually needs to read an archive. Optional pieces have been, as * far as possible, separated out into separate files to avoid * needlessly bloating statically-linked clients. */ #include "archive_platform.h" __FBSDID("$FreeBSD: src/lib/libarchive/archive_read.c,v 1.39 2008/12/06 06:45:15 kientzle Exp $"); #ifdef HAVE_ERRNO_H #include <errno.h> #endif #include <stdio.h> #ifdef HAVE_STDLIB_H #include <stdlib.h> #endif #ifdef HAVE_STRING_H #include <string.h> #endif #ifdef HAVE_UNISTD_H #include <unistd.h> #endif #include "archive.h" #include "archive_entry.h" #include "archive_private.h" #include "archive_read_private.h" #define minimum(a, b) (a < b ? a : b) static int build_stream(struct archive_read *); static int choose_format(struct archive_read *); static struct archive_vtable *archive_read_vtable(void); static int _archive_read_close(struct archive *); static int _archive_read_finish(struct archive *); static struct archive_vtable * archive_read_vtable(void) { static struct archive_vtable av; static int inited = 0; if (!inited) { av.archive_finish = _archive_read_finish; av.archive_close = _archive_read_close; } return (&av); } /* * Allocate, initialize and return a struct archive object. */ struct archive * archive_read_new(void) { struct archive_read *a; a = (struct archive_read *)malloc(sizeof(*a)); if (a == NULL) return (NULL); memset(a, 0, sizeof(*a)); a->archive.magic = ARCHIVE_READ_MAGIC; a->archive.state = ARCHIVE_STATE_NEW; a->entry = archive_entry_new(); a->archive.vtable = archive_read_vtable(); return (&a->archive); } /* * Record the do-not-extract-to file. This belongs in archive_read_extract.c. */ void archive_read_extract_set_skip_file(struct archive *_a, dev_t d, ino_t i) { struct archive_read *a = (struct archive_read *)_a; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, "archive_read_extract_set_skip_file"); a->skip_file_dev = d; a->skip_file_ino = i; } /* * Set read options for the format. */ int archive_read_set_format_options(struct archive *_a, const char *s) { struct archive_read *a; struct archive_format_descriptor *format; char key[64], val[64]; char *valp; size_t i; int len, r; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_set_format_options"); if (s == NULL || *s == '\0') return (ARCHIVE_OK); a = (struct archive_read *)_a; __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_set_format_options"); len = 0; for (i = 0; i < sizeof(a->formats)/sizeof(a->formats[0]); i++) { format = &a->formats[i]; if (format == NULL || format->options == NULL || format->name == NULL) /* This format does not support option. */ continue; while ((len = __archive_parse_options(s, format->name, sizeof(key), key, sizeof(val), val)) > 0) { valp = val[0] == '\0' ? NULL : val; a->format = format; r = format->options(a, key, valp); a->format = NULL; if (r == ARCHIVE_FATAL) return (r); s += len; } } if (len < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Illegal format options."); return (ARCHIVE_WARN); } return (ARCHIVE_OK); } /* * Set read options for the filter. */ int archive_read_set_filter_options(struct archive *_a, const char *s) { struct archive_read *a; struct archive_read_filter *filter; struct archive_read_filter_bidder *bidder; char key[64], val[64]; int len, r; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_set_filter_options"); if (s == NULL || *s == '\0') return (ARCHIVE_OK); a = (struct archive_read *)_a; __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_set_filter_options"); filter = a->filter; len = 0; for (filter = a->filter; filter != NULL; filter = filter->upstream) { bidder = filter->bidder; if (bidder == NULL) continue; if (bidder->options == NULL) /* This bidder does not support option */ continue; while ((len = __archive_parse_options(s, filter->name, sizeof(key), key, sizeof(val), val)) > 0) { if (val[0] == '\0') r = bidder->options(bidder, key, NULL); else r = bidder->options(bidder, key, val); if (r == ARCHIVE_FATAL) return (r); s += len; } } if (len < 0) { archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Illegal format options."); return (ARCHIVE_WARN); } return (ARCHIVE_OK); } /* * Set read options for the format and the filter. */ int archive_read_set_options(struct archive *_a, const char *s) { int r; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_set_options"); archive_clear_error(_a); r = archive_read_set_format_options(_a, s); if (r != ARCHIVE_OK) return (r); r = archive_read_set_filter_options(_a, s); if (r != ARCHIVE_OK) return (r); return (ARCHIVE_OK); } /* * Open the archive */ int archive_read_open(struct archive *a, void *client_data, archive_open_callback *client_opener, archive_read_callback *client_reader, archive_close_callback *client_closer) { /* Old archive_read_open() is just a thin shell around * archive_read_open2. */ return archive_read_open2(a, client_data, client_opener, client_reader, NULL, client_closer); } static ssize_t client_read_proxy(struct archive_read_filter *self, const void **buff) { ssize_t r; r = (self->archive->client.reader)(&self->archive->archive, self->data, buff); self->archive->archive.raw_position += r; return (r); } static int64_t client_skip_proxy(struct archive_read_filter *self, int64_t request) { int64_t ask, get, total; /* Limit our maximum seek request to 1GB on platforms * with 32-bit off_t (such as Windows). */ int64_t skip_limit = ((int64_t)1) << (sizeof(off_t) * 8 - 2); if (self->archive->client.skipper == NULL) return (0); total = 0; for (;;) { ask = request; if (ask > skip_limit) ask = skip_limit; get = (self->archive->client.skipper)(&self->archive->archive, self->data, ask); if (get == 0) return (total); request -= get; self->archive->archive.raw_position += get; total += get; } } static int client_close_proxy(struct archive_read_filter *self) { int r = ARCHIVE_OK; if (self->archive->client.closer != NULL) r = (self->archive->client.closer)((struct archive *)self->archive, self->data); self->data = NULL; return (r); } int archive_read_open2(struct archive *_a, void *client_data, archive_open_callback *client_opener, archive_read_callback *client_reader, archive_skip_callback *client_skipper, archive_close_callback *client_closer) { struct archive_read *a = (struct archive_read *)_a; struct archive_read_filter *filter; int e; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_open"); archive_clear_error(&a->archive); if (client_reader == NULL) __archive_errx(1, "No reader function provided to archive_read_open"); /* Open data source. */ if (client_opener != NULL) { e =(client_opener)(&a->archive, client_data); if (e != 0) { /* If the open failed, call the closer to clean up. */ if (client_closer) (client_closer)(&a->archive, client_data); return (e); } } /* Save the client functions and mock up the initial source. */ a->client.reader = client_reader; a->client.skipper = client_skipper; a->client.closer = client_closer; filter = calloc(1, sizeof(*filter)); if (filter == NULL) return (ARCHIVE_FATAL); filter->bidder = NULL; filter->upstream = NULL; filter->archive = a; filter->data = client_data; filter->read = client_read_proxy; filter->skip = client_skip_proxy; filter->close = client_close_proxy; filter->name = "none"; filter->code = ARCHIVE_COMPRESSION_NONE; a->filter = filter; /* Build out the input pipeline. */ e = build_stream(a); if (e == ARCHIVE_OK) a->archive.state = ARCHIVE_STATE_HEADER; return (e); } /* * Allow each registered stream transform to bid on whether * it wants to handle this stream. Repeat until we've finished * building the pipeline. */ static int build_stream(struct archive_read *a) { int number_bidders, i, bid, best_bid; struct archive_read_filter_bidder *bidder, *best_bidder; struct archive_read_filter *filter; ssize_t avail; int r; for (;;) { number_bidders = sizeof(a->bidders) / sizeof(a->bidders[0]); best_bid = 0; best_bidder = NULL; bidder = a->bidders; for (i = 0; i < number_bidders; i++, bidder++) { if (bidder->bid != NULL) { bid = (bidder->bid)(bidder, a->filter); if (bid > best_bid) { best_bid = bid; best_bidder = bidder; } } } /* If no bidder, we're done. */ if (best_bidder == NULL) { a->archive.compression_name = a->filter->name; a->archive.compression_code = a->filter->code; return (ARCHIVE_OK); } filter = (struct archive_read_filter *)calloc(1, sizeof(*filter)); if (filter == NULL) return (ARCHIVE_FATAL); filter->bidder = best_bidder; filter->archive = a; filter->upstream = a->filter; r = (best_bidder->init)(filter); if (r != ARCHIVE_OK) { free(filter); return (r); } /* Verify the filter by asking it for some data. */ __archive_read_filter_ahead(filter, 1, &avail); if (avail < 0) { /* If the read failed, bail out now. */ free(filter); return (avail); } a->filter = filter; } } /* * Read header of next entry. */ int archive_read_next_header2(struct archive *_a, struct archive_entry *entry) { struct archive_read *a = (struct archive_read *)_a; int slot, ret; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, "archive_read_next_header"); ++_a->file_count; archive_entry_clear(entry); archive_clear_error(&a->archive); /* * If no format has yet been chosen, choose one. */ if (a->format == NULL) { slot = choose_format(a); if (slot < 0) { a->archive.state = ARCHIVE_STATE_FATAL; return (ARCHIVE_FATAL); } a->format = &(a->formats[slot]); } /* * If client didn't consume entire data, skip any remainder * (This is especially important for GNU incremental directories.) */ if (a->archive.state == ARCHIVE_STATE_DATA) { ret = archive_read_data_skip(&a->archive); if (ret == ARCHIVE_EOF) { archive_set_error(&a->archive, EIO, "Premature end-of-file."); a->archive.state = ARCHIVE_STATE_FATAL; return (ARCHIVE_FATAL); } if (ret != ARCHIVE_OK) return (ret); } /* Record start-of-header. */ a->header_position = a->archive.file_position; ret = (a->format->read_header)(a, entry); /* * EOF and FATAL are persistent at this layer. By * modifying the state, we guarantee that future calls to * read a header or read data will fail. */ switch (ret) { case ARCHIVE_EOF: a->archive.state = ARCHIVE_STATE_EOF; break; case ARCHIVE_OK: a->archive.state = ARCHIVE_STATE_DATA; break; case ARCHIVE_WARN: a->archive.state = ARCHIVE_STATE_DATA; break; case ARCHIVE_RETRY: break; case ARCHIVE_FATAL: a->archive.state = ARCHIVE_STATE_FATAL; break; } a->read_data_output_offset = 0; a->read_data_remaining = 0; return (ret); } int archive_read_next_header(struct archive *_a, struct archive_entry **entryp) { int ret; struct archive_read *a = (struct archive_read *)_a; *entryp = NULL; ret = archive_read_next_header2(_a, a->entry); *entryp = a->entry; return ret; } /* * Allow each registered format to bid on whether it wants to handle * the next entry. Return index of winning bidder. */ static int choose_format(struct archive_read *a) { int slots; int i; int bid, best_bid; int best_bid_slot; slots = sizeof(a->formats) / sizeof(a->formats[0]); best_bid = -1; best_bid_slot = -1; /* Set up a->format and a->pformat_data for convenience of bidders. */ a->format = &(a->formats[0]); for (i = 0; i < slots; i++, a->format++) { if (a->format->bid) { bid = (a->format->bid)(a); if (bid == ARCHIVE_FATAL) return (ARCHIVE_FATAL); if ((bid > best_bid) || (best_bid_slot < 0)) { best_bid = bid; best_bid_slot = i; } } } /* * There were no bidders; this is a serious programmer error * and demands a quick and definitive abort. */ if (best_bid_slot < 0) __archive_errx(1, "No formats were registered; you must " "invoke at least one archive_read_support_format_XXX " "function in order to successfully read an archive."); /* * There were bidders, but no non-zero bids; this means we * can't support this stream. */ if (best_bid < 1) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Unrecognized archive format"); return (ARCHIVE_FATAL); } return (best_bid_slot); } /* * Return the file offset (within the uncompressed data stream) where * the last header started. */ int64_t archive_read_header_position(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, "archive_read_header_position"); return (a->header_position); } /* * Read data from an archive entry, using a read(2)-style interface. * This is a convenience routine that just calls * archive_read_data_block and copies the results into the client * buffer, filling any gaps with zero bytes. Clients using this * API can be completely ignorant of sparse-file issues; sparse files * will simply be padded with nulls. * * DO NOT intermingle calls to this function and archive_read_data_block * to read a single entry body. */ ssize_t archive_read_data(struct archive *_a, void *buff, size_t s) { struct archive_read *a = (struct archive_read *)_a; char *dest; const void *read_buf; size_t bytes_read; size_t len; int r; bytes_read = 0; dest = (char *)buff; while (s > 0) { if (a->read_data_remaining == 0) { read_buf = a->read_data_block; r = archive_read_data_block(&a->archive, &read_buf, &a->read_data_remaining, &a->read_data_offset); a->read_data_block = read_buf; if (r == ARCHIVE_EOF) return (bytes_read); /* * Error codes are all negative, so the status * return here cannot be confused with a valid * byte count. (ARCHIVE_OK is zero.) */ if (r < ARCHIVE_OK) return (r); } if (a->read_data_offset < a->read_data_output_offset) { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Encountered out-of-order sparse blocks"); return (ARCHIVE_RETRY); } /* Compute the amount of zero padding needed. */ if (a->read_data_output_offset + (off_t)s < a->read_data_offset) { len = s; } else if (a->read_data_output_offset < a->read_data_offset) { len = a->read_data_offset - a->read_data_output_offset; } else len = 0; /* Add zeroes. */ memset(dest, 0, len); s -= len; a->read_data_output_offset += len; dest += len; bytes_read += len; /* Copy data if there is any space left. */ if (s > 0) { len = a->read_data_remaining; if (len > s) len = s; memcpy(dest, a->read_data_block, len); s -= len; a->read_data_block += len; a->read_data_remaining -= len; a->read_data_output_offset += len; a->read_data_offset += len; dest += len; bytes_read += len; } } return (bytes_read); } #if ARCHIVE_API_VERSION < 3 /* * Obsolete function provided for compatibility only. Note that the API * of this function doesn't allow the caller to detect if the remaining * data from the archive entry is shorter than the buffer provided, or * even if an error occurred while reading data. */ int archive_read_data_into_buffer(struct archive *a, void *d, ssize_t len) { archive_read_data(a, d, len); return (ARCHIVE_OK); } #endif /* * Skip over all remaining data in this entry. */ int archive_read_data_skip(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; int r; const void *buff; size_t size; off_t offset; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, "archive_read_data_skip"); if (a->format->read_data_skip != NULL) r = (a->format->read_data_skip)(a); else { while ((r = archive_read_data_block(&a->archive, &buff, &size, &offset)) == ARCHIVE_OK) ; } if (r == ARCHIVE_EOF) r = ARCHIVE_OK; a->archive.state = ARCHIVE_STATE_HEADER; return (r); } /* * Read the next block of entry data from the archive. * This is a zero-copy interface; the client receives a pointer, * size, and file offset of the next available block of data. * * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if * the end of entry is encountered. */ int archive_read_data_block(struct archive *_a, const void **buff, size_t *size, off_t *offset) { struct archive_read *a = (struct archive_read *)_a; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, "archive_read_data_block"); if (a->format->read_data == NULL) { archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, "Internal error: " "No format_read_data_block function registered"); return (ARCHIVE_FATAL); } return (a->format->read_data)(a, buff, size, offset); } /* * Close the file and release most resources. * * Be careful: client might just call read_new and then read_finish. * Don't assume we actually read anything or performed any non-trivial * initialization. */ static int _archive_read_close(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; int r = ARCHIVE_OK, r1 = ARCHIVE_OK; size_t i, n; __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, "archive_read_close"); archive_clear_error(&a->archive); a->archive.state = ARCHIVE_STATE_CLOSED; /* Call cleanup functions registered by optional components. */ if (a->cleanup_archive_extract != NULL) r = (a->cleanup_archive_extract)(a); /* TODO: Clean up the formatters. */ /* Clean up the filter pipeline. */ while (a->filter != NULL) { struct archive_read_filter *t = a->filter->upstream; if (a->filter->close != NULL) { r1 = (a->filter->close)(a->filter); if (r1 < r) r = r1; } free(a->filter->buffer); free(a->filter); a->filter = t; } /* Release the bidder objects. */ n = sizeof(a->bidders)/sizeof(a->bidders[0]); for (i = 0; i < n; i++) { if (a->bidders[i].free != NULL) { r1 = (a->bidders[i].free)(&a->bidders[i]); if (r1 < r) r = r1; } } return (r); } /* * Release memory and other resources. */ static int _archive_read_finish(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; int i; int slots; int r = ARCHIVE_OK; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, "archive_read_finish"); if (a->archive.state != ARCHIVE_STATE_CLOSED) r = archive_read_close(&a->archive); /* Cleanup format-specific data. */ slots = sizeof(a->formats) / sizeof(a->formats[0]); for (i = 0; i < slots; i++) { a->format = &(a->formats[i]); if (a->formats[i].cleanup) (a->formats[i].cleanup)(a); } archive_string_free(&a->archive.error_string); if (a->entry) archive_entry_free(a->entry); a->archive.magic = 0; free(a); #if ARCHIVE_API_VERSION > 1 return (r); #endif } /* * Used internally by read format handlers to register their bid and * initialization functions. */ int __archive_read_register_format(struct archive_read *a, void *format_data, const char *name, int (*bid)(struct archive_read *), int (*options)(struct archive_read *, const char *, const char *), int (*read_header)(struct archive_read *, struct archive_entry *), int (*read_data)(struct archive_read *, const void **, size_t *, off_t *), int (*read_data_skip)(struct archive_read *), int (*cleanup)(struct archive_read *)) { int i, number_slots; __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "__archive_read_register_format"); number_slots = sizeof(a->formats) / sizeof(a->formats[0]); for (i = 0; i < number_slots; i++) { if (a->formats[i].bid == bid) return (ARCHIVE_WARN); /* We've already installed */ if (a->formats[i].bid == NULL) { a->formats[i].bid = bid; a->formats[i].options = options; a->formats[i].read_header = read_header; a->formats[i].read_data = read_data; a->formats[i].read_data_skip = read_data_skip; a->formats[i].cleanup = cleanup; a->formats[i].data = format_data; a->formats[i].name = name; return (ARCHIVE_OK); } } __archive_errx(1, "Not enough slots for format registration"); return (ARCHIVE_FATAL); /* Never actually called. */ } /* * Used internally by decompression routines to register their bid and * initialization functions. */ struct archive_read_filter_bidder * __archive_read_get_bidder(struct archive_read *a) { int i, number_slots; __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "__archive_read_get_bidder"); number_slots = sizeof(a->bidders) / sizeof(a->bidders[0]); for (i = 0; i < number_slots; i++) { if (a->bidders[i].bid == NULL) { memset(a->bidders + i, 0, sizeof(a->bidders[0])); return (a->bidders + i); } } __archive_errx(1, "Not enough slots for compression registration"); return (NULL); /* Never actually executed. */ } /* * The next three functions comprise the peek/consume internal I/O * system used by archive format readers. This system allows fairly * flexible read-ahead and allows the I/O code to operate in a * zero-copy manner most of the time. * * In the ideal case, filters generate blocks of data * and __archive_read_ahead() just returns pointers directly into * those blocks. Then __archive_read_consume() just bumps those * pointers. Only if your request would span blocks does the I/O * layer use a copy buffer to provide you with a contiguous block of * data. The __archive_read_skip() is an optimization; it scans ahead * very quickly (it usually translates into a seek() operation if * you're reading uncompressed disk files). * * A couple of useful idioms: * * "I just want some data." Ask for 1 byte and pay attention to * the "number of bytes available" from __archive_read_ahead(). * You can consume more than you asked for; you just can't consume * more than is available. If you consume everything that's * immediately available, the next read_ahead() call will pull * the next block. * * "I want to output a large block of data." As above, ask for 1 byte, * emit all that's available (up to whatever limit you have), then * repeat until you're done. * * "I want to peek ahead by a large amount." Ask for 4k or so, then * double and repeat until you get an error or have enough. Note * that the I/O layer will likely end up expanding its copy buffer * to fit your request, so use this technique cautiously. This * technique is used, for example, by some of the format tasting * code that has uncertain look-ahead needs. * * TODO: Someday, provide a more generic __archive_read_seek() for * those cases where it's useful. This is tricky because there are lots * of cases where seek() is not available (reading gzip data from a * network socket, for instance), so there needs to be a good way to * communicate whether seek() is available and users of that interface * need to use non-seeking strategies whenever seek() is not available. */ /* * Looks ahead in the input stream: * * If 'avail' pointer is provided, that returns number of bytes available * in the current buffer, which may be much larger than requested. * * If end-of-file, *avail gets set to zero. * * If error, *avail gets error code. * * If request can be met, returns pointer to data, returns NULL * if request is not met. * * Note: If you just want "some data", ask for 1 byte and pay attention * to *avail, which will have the actual amount available. If you * know exactly how many bytes you need, just ask for that and treat * a NULL return as an error. * * Important: This does NOT move the file pointer. See * __archive_read_consume() below. */ /* * This is tricky. We need to provide our clients with pointers to * contiguous blocks of memory but we want to avoid copying whenever * possible. * * Mostly, this code returns pointers directly into the block of data * provided by the client_read routine. It can do this unless the * request would split across blocks. In that case, we have to copy * into an internal buffer to combine reads. */ const void * __archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail) { return (__archive_read_filter_ahead(a->filter, min, avail)); } const void * __archive_read_filter_ahead(struct archive_read_filter *filter, size_t min, ssize_t *avail) { ssize_t bytes_read; size_t tocopy; if (filter->fatal) { if (avail) *avail = ARCHIVE_FATAL; return (NULL); } /* * Keep pulling more data until we can satisfy the request. */ for (;;) { /* * If we can satisfy from the copy buffer (and the * copy buffer isn't empty), we're done. In particular, * note that min == 0 is a perfectly well-defined * request. */ if (filter->avail >= min && filter->avail > 0) { if (avail != NULL) *avail = filter->avail; return (filter->next); } /* * We can satisfy directly from client buffer if everything * currently in the copy buffer is still in the client buffer. */ if (filter->client_total >= filter->client_avail + filter->avail && filter->client_avail + filter->avail >= min) { /* "Roll back" to client buffer. */ filter->client_avail += filter->avail; filter->client_next -= filter->avail; /* Copy buffer is now empty. */ filter->avail = 0; filter->next = filter->buffer; /* Return data from client buffer. */ if (avail != NULL) *avail = filter->client_avail; return (filter->client_next); } /* Move data forward in copy buffer if necessary. */ if (filter->next > filter->buffer && filter->next + min > filter->buffer + filter->buffer_size) { if (filter->avail > 0) memmove(filter->buffer, filter->next, filter->avail); filter->next = filter->buffer; } /* If we've used up the client data, get more. */ if (filter->client_avail <= 0) { if (filter->end_of_file) { if (avail != NULL) *avail = 0; return (NULL); } bytes_read = (filter->read)(filter, &filter->client_buff); if (bytes_read < 0) { /* Read error. */ filter->client_total = filter->client_avail = 0; filter->client_next = filter->client_buff = NULL; filter->fatal = 1; if (avail != NULL) *avail = ARCHIVE_FATAL; return (NULL); } if (bytes_read == 0) { /* Premature end-of-file. */ filter->client_total = filter->client_avail = 0; filter->client_next = filter->client_buff = NULL; filter->end_of_file = 1; /* Return whatever we do have. */ if (avail != NULL) *avail = filter->avail; return (NULL); } filter->position += bytes_read; filter->client_total = bytes_read; filter->client_avail = filter->client_total; filter->client_next = filter->client_buff; } else { /* * We can't satisfy the request from the copy * buffer or the existing client data, so we * need to copy more client data over to the * copy buffer. */ /* Ensure the buffer is big enough. */ if (min > filter->buffer_size) { size_t s, t; char *p; /* Double the buffer; watch for overflow. */ s = t = filter->buffer_size; if (s == 0) s = min; while (s < min) { t *= 2; if (t <= s) { /* Integer overflow! */ archive_set_error( &filter->archive->archive, ENOMEM, "Unable to allocate copy buffer"); filter->fatal = 1; if (avail != NULL) *avail = ARCHIVE_FATAL; return (NULL); } s = t; } /* Now s >= min, so allocate a new buffer. */ p = (char *)malloc(s); if (p == NULL) { archive_set_error( &filter->archive->archive, ENOMEM, "Unable to allocate copy buffer"); filter->fatal = 1; if (avail != NULL) *avail = ARCHIVE_FATAL; return (NULL); } /* Move data into newly-enlarged buffer. */ if (filter->avail > 0) memmove(p, filter->next, filter->avail); free(filter->buffer); filter->next = filter->buffer = p; filter->buffer_size = s; } /* We can add client data to copy buffer. */ /* First estimate: copy to fill rest of buffer. */ tocopy = (filter->buffer + filter->buffer_size) - (filter->next + filter->avail); /* Don't waste time buffering more than we need to. */ if (tocopy + filter->avail > min) tocopy = min - filter->avail; /* Don't copy more than is available. */ if (tocopy > filter->client_avail) tocopy = filter->client_avail; memcpy(filter->next + filter->avail, filter->client_next, tocopy); /* Remove this data from client buffer. */ filter->client_next += tocopy; filter->client_avail -= tocopy; /* add it to copy buffer. */ filter->avail += tocopy; } } } /* * Move the file pointer forward. This should be called after * __archive_read_ahead() returns data to you. Don't try to move * ahead by more than the amount of data available according to * __archive_read_ahead(). */ /* * Mark the appropriate data as used. Note that the request here will * often be much smaller than the size of the previous read_ahead * request. */ ssize_t __archive_read_consume(struct archive_read *a, size_t request) { ssize_t r; r = __archive_read_filter_consume(a->filter, request); a->archive.file_position += r; return (r); } ssize_t __archive_read_filter_consume(struct archive_read_filter * filter, size_t request) { if (filter->avail > 0) { /* Read came from copy buffer. */ filter->next += request; filter->avail -= request; } else { /* Read came from client buffer. */ filter->client_next += request; filter->client_avail -= request; } return (request); } /* * Move the file pointer ahead by an arbitrary amount. If you're * reading uncompressed data from a disk file, this will actually * translate into a seek() operation. Even in cases where seek() * isn't feasible, this at least pushes the read-and-discard loop * down closer to the data source. */ int64_t __archive_read_skip(struct archive_read *a, int64_t request) { int64_t skipped = __archive_read_skip_lenient(a, request); if (skipped == request) return (skipped); /* We hit EOF before we satisfied the skip request. */ if (skipped < 0) /* Map error code to 0 for error message below. */ skipped = 0; archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Truncated input file (needed %jd bytes, only %jd available)", (intmax_t)request, (intmax_t)skipped); return (ARCHIVE_FATAL); } int64_t __archive_read_skip_lenient(struct archive_read *a, int64_t request) { int64_t skipped = __archive_read_filter_skip(a->filter, request); if (skipped > 0) a->archive.file_position += skipped; return (skipped); } int64_t __archive_read_filter_skip(struct archive_read_filter *filter, int64_t request) { int64_t bytes_skipped, total_bytes_skipped = 0; size_t min; if (filter->fatal) return (-1); /* * If there is data in the buffers already, use that first. */ if (filter->avail > 0) { min = minimum(request, (off_t)filter->avail); bytes_skipped = __archive_read_filter_consume(filter, min); request -= bytes_skipped; total_bytes_skipped += bytes_skipped; } if (filter->client_avail > 0) { min = minimum(request, (int64_t)filter->client_avail); bytes_skipped = __archive_read_filter_consume(filter, min); request -= bytes_skipped; total_bytes_skipped += bytes_skipped; } if (request == 0) return (total_bytes_skipped); /* * If a client_skipper was provided, try that first. */ #if ARCHIVE_API_VERSION < 2 if ((filter->skip != NULL) && (request < SSIZE_MAX)) { #else if (filter->skip != NULL) { #endif bytes_skipped = (filter->skip)(filter, request); if (bytes_skipped < 0) { /* error */ filter->client_total = filter->client_avail = 0; filter->client_next = filter->client_buff = NULL; filter->fatal = 1; return (bytes_skipped); } total_bytes_skipped += bytes_skipped; request -= bytes_skipped; filter->client_next = filter->client_buff; filter->client_avail = filter->client_total = 0; } /* * Note that client_skipper will usually not satisfy the * full request (due to low-level blocking concerns), * so even if client_skipper is provided, we may still * have to use ordinary reads to finish out the request. */ while (request > 0) { const void* dummy_buffer; ssize_t bytes_read; dummy_buffer = __archive_read_filter_ahead(filter, 1, &bytes_read); if (bytes_read < 0) return (bytes_read); if (bytes_read == 0) { return (total_bytes_skipped); } min = (size_t)(minimum(bytes_read, request)); bytes_read = __archive_read_filter_consume(filter, min); total_bytes_skipped += bytes_read; request -= bytes_read; } return (total_bytes_skipped); }