summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsebres <sebres@users.sourceforge.net>2020-06-24 09:02:01 (GMT)
committersebres <sebres@users.sourceforge.net>2020-06-24 09:02:01 (GMT)
commit3f90f816f86882d82b3bda1af1d503759acf5039 (patch)
treeb2f286fce09e28a51d4d11e77bc44592e5b84051
parent97499f5ed8cc4ea3177e6fb8544c5cd7aba6e029 (diff)
parent094567e57a4212ad576fa972a418938423e8db0d (diff)
downloadtcl-3f90f816f86882d82b3bda1af1d503759acf5039.zip
tcl-3f90f816f86882d82b3bda1af1d503759acf5039.tar.gz
tcl-3f90f816f86882d82b3bda1af1d503759acf5039.tar.bz2
merge zlib-chan-f70ce1fead, [f70ce1fead] - rewritten and optimized inflate:
default read ahead limit set to 4K (instead of 1); better SF/BO prevention; code simplification, no interim copy/move buffers, no decompression d-string, the inflate occurring in single step (ResultDecompress combines ResultGenerate and ResultCopy); better handling in non-blocking mode (also recognizes or distinguish no data/EAGAIN cases).
-rw-r--r--doc/zlib.n22
-rw-r--r--generic/tclZlib.c282
-rw-r--r--tests/zlib.test84
3 files changed, 229 insertions, 159 deletions
diff --git a/doc/zlib.n b/doc/zlib.n
index fd29e0d..3714fc1 100644
--- a/doc/zlib.n
+++ b/doc/zlib.n
@@ -193,10 +193,18 @@ How hard to compress the data. Must be an integer from 0 (uncompressed) to 9
.TP
\fB\-limit\fI readaheadLimit\fR
.
-The maximum number of bytes ahead to read when decompressing. This defaults to
-1, which ensures that data is always decompressed correctly, but may be
-increased to improve performance. This is more useful when the channel is
-non-blocking.
+The maximum number of bytes ahead to read when decompressing.
+.RS
+.PP
+This option has become \fBirrelevant\fR. It was originally introduced
+to prevent Tcl from reading beyond the end of a compressed stream in
+multi-stream channels to ensure that the data after was left alone for
+further reading, at the cost of speed.
+.PP
+Tcl now automatically returns any bytes it has read beyond the end of
+a compressed stream back to the channel, making them appear as unread
+to further readers.
+.RE
.PP
Both compressing and decompressing channel transformations add extra
configuration options that may be accessed through \fBchan configure\fR. The
@@ -238,10 +246,8 @@ off the data stream.
\fB\-limit\fI readaheadLimit\fR
.
This read-write option is used by decompressing channels to control the
-maximum number of bytes ahead to read from the underlying data source. This
-defaults to 1, which ensures that data is always decompressed correctly, but
-may be increased to improve performance. This is more useful when the channel
-is non-blocking.
+maximum number of bytes ahead to read from the underlying data source. See
+above for more information.
.RE
.SS "STREAMING SUBCOMMAND"
.TP
diff --git a/generic/tclZlib.c b/generic/tclZlib.c
index 86fda86..ac11089 100644
--- a/generic/tclZlib.c
+++ b/generic/tclZlib.c
@@ -124,7 +124,6 @@ typedef struct {
GzipHeader outHeader; /* Header to write to an output stream, when
* compressing a gzip stream. */
Tcl_TimerToken timer; /* Timer used for keeping events fresh. */
- Tcl_DString decompressed; /* Buffer for decompression results. */
Tcl_Obj *compDictObj; /* Byte-array object containing compression
* dictionary (not dictObj!) to use if
* necessary. */
@@ -137,11 +136,15 @@ typedef struct {
* the input compressor.
* OUT_HEADER - Whether the outputHeader field has been registered
* with the output decompressor.
+ * STREAM_DECOMPRESS - Signal decompress pending data.
+ * STREAM_DONE - Flag to signal stream end up to transform input.
*/
-#define ASYNC 0x1
-#define IN_HEADER 0x2
-#define OUT_HEADER 0x4
+#define ASYNC 0x01
+#define IN_HEADER 0x02
+#define OUT_HEADER 0x04
+#define STREAM_DECOMPRESS 0x08
+#define STREAM_DONE 0x10
/*
* Size of buffers allocated by default, and the range it can be set to. The
@@ -184,10 +187,8 @@ static int GenerateHeader(Tcl_Interp *interp, Tcl_Obj *dictObj,
GzipHeader *headerPtr, int *extraSizePtr);
static int ZlibPushSubcmd(Tcl_Interp *interp, int objc,
Tcl_Obj *const objv[]);
-static inline int ResultCopy(ZlibChannelData *cd, char *buf,
- int toRead);
-static int ResultGenerate(ZlibChannelData *cd, int n, int flush,
- int *errorCodePtr);
+static int ResultDecompress(ZlibChannelData *cd, char *buf,
+ int toRead, int flush, int *errorCodePtr);
static Tcl_Channel ZlibStackChannelTransform(Tcl_Interp *interp,
int mode, int format, int level, int limit,
Tcl_Channel channel, Tcl_Obj *gzipHeaderDictPtr,
@@ -2364,7 +2365,7 @@ ZlibPushSubcmd(
const char *const *pushOptions = pushDecompressOptions;
enum pushOptions {poDictionary, poHeader, poLevel, poLimit};
Tcl_Obj *headerObj = NULL, *compDictObj = NULL;
- int limit = 1, dummy;
+ int limit = DEFAULT_BUFFER_SIZE, dummy;
if (objc < 4) {
Tcl_WrongNumArgs(interp, 2, objv, "mode channel ?options...?");
@@ -2946,6 +2947,15 @@ ZlibTransformClose(
} while (e != Z_STREAM_END);
(void) deflateEnd(&cd->outStream);
} else {
+ /*
+ * If we have unused bytes from the read input (overshot by
+ * Z_STREAM_END or on possible error), unget them back to the parent
+ * channel, so that they appear as not being read yet.
+ */
+ if (cd->inStream.avail_in) {
+ Tcl_Ungets (cd->parent, (char *)cd->inStream.next_in, cd->inStream.avail_in, 0);
+ }
+
(void) inflateEnd(&cd->inStream);
}
@@ -2957,7 +2967,6 @@ ZlibTransformClose(
Tcl_DecrRefCount(cd->compDictObj);
cd->compDictObj = NULL;
}
- Tcl_DStringFree(&cd->decompressed);
if (cd->inBuffer) {
ckfree(cd->inBuffer);
@@ -2991,7 +3000,7 @@ ZlibTransformInput(
ZlibChannelData *cd = (ZlibChannelData *)instanceData;
Tcl_DriverInputProc *inProc =
Tcl_ChannelInputProc(Tcl_GetChannelType(cd->parent));
- int readBytes, gotBytes, copied;
+ int readBytes, gotBytes;
if (cd->mode == TCL_ZLIB_STREAM_DEFLATE) {
return inProc(Tcl_GetChannelInstanceData(cd->parent), buf, toRead,
@@ -2999,37 +3008,42 @@ ZlibTransformInput(
}
gotBytes = 0;
- while (toRead > 0) {
- /*
- * Loop until the request is satisfied (or no data available from
- * below, possibly EOF).
+ readBytes = cd->inStream.avail_in; /* how many bytes in buffer now */
+ while (!(cd->flags & STREAM_DONE) && toRead > 0) {
+ int n, decBytes;
+
+ /* if starting from scratch or continuation after full decompression */
+ if (!cd->inStream.avail_in) {
+ /* buffer to start, we can read to whole available buffer */
+ cd->inStream.next_in = (Bytef *) cd->inBuffer;
+ }
+ /*
+ * If done - no read needed anymore, check we have to copy rest of
+ * decompressed data, otherwise return with size (or 0 for Eof)
*/
-
- copied = ResultCopy(cd, buf, toRead);
- toRead -= copied;
- buf += copied;
- gotBytes += copied;
-
- if (toRead == 0) {
- return gotBytes;
+ if (cd->flags & STREAM_DECOMPRESS) {
+ goto copyDecompressed;
}
-
/*
* The buffer is exhausted, but the caller wants even more. We now
* have to go to the underlying channel, get more bytes and then
* transform them for delivery. We may not get what we want (full EOF
* or temporarily out of data).
- *
- * Length (cd->decompressed) == 0, toRead > 0 here.
- *
- * The zlib transform allows us to read at most one character from the
- * underlying channel to properly identify Z_STREAM_END without
- * reading over the border.
*/
- readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer,
- cd->readAheadLimit <= cd->inAllocated ?
- cd->readAheadLimit : cd->inAllocated);
+ /* Check free buffer size and adjust size of next chunk to read. */
+ n = cd->inAllocated - ((char *)cd->inStream.next_in - cd->inBuffer);
+ if (n <= 0) {
+ /* Normally unreachable: not enough input buffer to uncompress.
+ * Todo: firstly try to realloc inBuffer upto MAX_BUFFER_SIZE.
+ */
+ *errorCodePtr = ENOBUFS;
+ return -1;
+ }
+ if (n > cd->readAheadLimit) {
+ n = cd->readAheadLimit;
+ }
+ readBytes = Tcl_ReadRaw(cd->parent, (char *)cd->inStream.next_in, n);
/*
* Three cases here:
@@ -3045,45 +3059,59 @@ ZlibTransformInput(
/* See ReflectInput() in tclIORTrans.c */
if (Tcl_InputBlocked(cd->parent) && (gotBytes > 0)) {
- return gotBytes;
+ break;
}
*errorCodePtr = Tcl_GetErrno();
return -1;
}
- if (readBytes == 0) {
- /*
- * Eof in parent.
- *
- * Now this is a bit different. The partial data waiting is
- * converted and returned.
- */
- if (ResultGenerate(cd, 0, Z_SYNC_FLUSH, errorCodePtr) != TCL_OK) {
- return -1;
- }
+ /* more bytes (or Eof if readBytes == 0) */
+ cd->inStream.avail_in += readBytes;
- if (Tcl_DStringLength(&cd->decompressed) == 0) {
- /*
- * The drain delivered nothing. Time to deliver what we've
- * got.
- */
+copyDecompressed:
- return gotBytes;
- }
- } else /* readBytes > 0 */ {
+ /*
+ * Transform the read chunk, if not empty. Anything we get
+ * back is a transformation result to be put into our buffers, and
+ * the next iteration will put it into the result.
+ * For the case readBytes is 0 which signaling Eof in parent, the
+ * partial data waiting is converted and returned.
+ */
+
+ decBytes = ResultDecompress(cd, buf, toRead,
+ (readBytes != 0) ? Z_NO_FLUSH : Z_SYNC_FLUSH,
+ errorCodePtr);
+ if (decBytes == -1) {
+ return -1;
+ }
+ gotBytes += decBytes;
+ buf += decBytes;
+ toRead -= decBytes;
+
+ if (((decBytes == 0) || (cd->flags & STREAM_DECOMPRESS))) {
/*
- * Transform the read chunk, which was not empty. Anything we get
- * back is a transformation result to be put into our buffers, and
- * the next iteration will put it into the result.
+ * The drain delivered nothing (or buffer too small to decompress).
+ * Time to deliver what we've got.
*/
-
- if (ResultGenerate(cd, readBytes, Z_NO_FLUSH,
- errorCodePtr) != TCL_OK) {
- return -1;
+ if (!gotBytes && !(cd->flags & STREAM_DONE)) {
+ /* if no-data, but not ready - avoid signaling Eof,
+ * continue in blocking mode, otherwise EAGAIN */
+ if (Tcl_InputBlocked(cd->parent)) {
+ continue;
+ }
+ *errorCodePtr = EAGAIN;
+ return -1;
}
+ break;
}
+
+ /*
+ * Loop until the request is satisfied (or no data available from
+ * above, possibly EOF).
+ */
}
+
return gotBytes;
}
@@ -3466,7 +3494,7 @@ ZlibTransformWatch(
watchProc = Tcl_ChannelWatchProc(Tcl_GetChannelType(cd->parent));
watchProc(Tcl_GetChannelInstanceData(cd->parent), mask);
- if (!(mask & TCL_READABLE) || Tcl_DStringLength(&cd->decompressed) == 0) {
+ if (!(mask & TCL_READABLE) || !(cd->flags & STREAM_DECOMPRESS)) {
ZlibTransformEventTimerKill(cd);
} else if (cd->timer == NULL) {
cd->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME,
@@ -3652,7 +3680,10 @@ ZlibStackChannelTransform(
goto error;
}
cd->inAllocated = DEFAULT_BUFFER_SIZE;
- cd->inBuffer = (char *)ckalloc(cd->inAllocated);
+ if (cd->inAllocated < cd->readAheadLimit) {
+ cd->inAllocated = cd->readAheadLimit;
+ }
+ cd->inBuffer = ckalloc(cd->inAllocated);
if (cd->flags & IN_HEADER) {
if (inflateGetHeader(&cd->inStream, &cd->inHeader.header) != Z_OK) {
goto error;
@@ -3682,8 +3713,6 @@ ZlibStackChannelTransform(
}
}
- Tcl_DStringInit(&cd->decompressed);
-
chan = Tcl_StackChannel(interp, &zlibChannelType, cd,
Tcl_GetChannelMode(channel), channel);
if (chan == NULL) {
@@ -3713,96 +3742,37 @@ ZlibStackChannelTransform(
/*
*----------------------------------------------------------------------
*
- * ResultCopy --
- *
- * Copies the requested number of bytes from the buffer into the
- * specified array and removes them from the buffer afterward. Copies
- * less if there is not enough data in the buffer.
- *
- * Side effects:
- * See above.
- *
- * Result:
- * The number of actually copied bytes, possibly less than 'toRead'.
- *
- *----------------------------------------------------------------------
- */
-
-static inline int
-ResultCopy(
- ZlibChannelData *cd, /* The location of the buffer to read from. */
- char *buf, /* The buffer to copy into */
- int toRead) /* Number of requested bytes */
-{
- int have = Tcl_DStringLength(&cd->decompressed);
-
- if (have == 0) {
- /*
- * Nothing to copy in the case of an empty buffer.
- */
-
- return 0;
- } else if (have > toRead) {
- /*
- * The internal buffer contains more than requested. Copy the
- * requested subset to the caller, shift the remaining bytes down, and
- * truncate.
- */
-
- char *src = Tcl_DStringValue(&cd->decompressed);
-
- memcpy(buf, src, toRead);
- memmove(src, src + toRead, have - toRead);
-
- Tcl_DStringSetLength(&cd->decompressed, have - toRead);
- return toRead;
- } else /* have <= toRead */ {
- /*
- * There is just or not enough in the buffer to fully satisfy the
- * caller, so take everything as best effort.
- */
-
- memcpy(buf, Tcl_DStringValue(&cd->decompressed), have);
- TclDStringClear(&cd->decompressed);
- return have;
- }
-}
-
-/*
- *----------------------------------------------------------------------
- *
- * ResultGenerate --
+ * ResultDecompress --
*
* Extract uncompressed bytes from the compression engine and store them
- * in our working buffer.
+ * in our buffer (buf) up to toRead bytes.
*
* Result:
- * TCL_OK/TCL_ERROR (with *errorCodePtr updated with reason).
+ * Number of bytes decompressed or -1 if error (with *errorCodePtr updated with reason).
*
* Side effects:
- * See above.
+ * After execution it updates cd->inStream (next_in, avail_in) to reflect
+ * the data that has been decompressed.
*
*----------------------------------------------------------------------
*/
static int
-ResultGenerate(
+ResultDecompress(
ZlibChannelData *cd,
- int n,
+ char *buf,
+ int toRead,
int flush,
int *errorCodePtr)
{
-#define MAXBUF 1024
- unsigned char buf[MAXBUF];
- int e, written;
+ int e, written, resBytes = 0;
Tcl_Obj *errObj;
- cd->inStream.next_in = (Bytef *) cd->inBuffer;
- cd->inStream.avail_in = n;
- while (1) {
- cd->inStream.next_out = (Bytef *) buf;
- cd->inStream.avail_out = MAXBUF;
+ cd->flags &= ~STREAM_DECOMPRESS;
+ cd->inStream.next_out = (Bytef *) buf;
+ cd->inStream.avail_out = toRead;
+ while (cd->inStream.avail_out > 0) {
e = inflate(&cd->inStream, flush);
if (e == Z_NEED_DICT && cd->compDictObj) {
@@ -3811,31 +3781,35 @@ ResultGenerate(
/*
* A repetition of Z_NEED_DICT is just an error.
*/
-
- cd->inStream.next_out = (Bytef *) buf;
- cd->inStream.avail_out = MAXBUF;
e = inflate(&cd->inStream, flush);
}
}
/*
* avail_out is now the left over space in the output. Therefore
- * "MAXBUF - avail_out" is the amount of bytes generated.
+ * "toRead - avail_out" is the amount of bytes generated.
*/
- written = MAXBUF - cd->inStream.avail_out;
- if (written) {
- Tcl_DStringAppend(&cd->decompressed, (char *) buf, written);
- }
+ written = toRead - cd->inStream.avail_out;
/*
* The cases where we're definitely done.
*/
- if (((flush == Z_SYNC_FLUSH) && (e == Z_BUF_ERROR))
- || (e == Z_STREAM_END)
- || (e == Z_OK && written == 0)) {
- return TCL_OK;
+ if (e == Z_STREAM_END) {
+ cd->flags |= STREAM_DONE;
+ resBytes += written;
+ break;
+ }
+ if (e == Z_OK) {
+ if (written == 0) {
+ break;
+ }
+ resBytes += written;
+ }
+
+ if ((flush == Z_SYNC_FLUSH) && (e == Z_BUF_ERROR)) {
+ break;
}
/*
@@ -3856,10 +3830,20 @@ ResultGenerate(
*/
if (cd->inStream.avail_in <= 0 && flush != Z_SYNC_FLUSH) {
- return TCL_OK;
+ break;
}
}
+ if (!(cd->flags & STREAM_DONE)) {
+ /* if we have pending input data, but no available output buffer */
+ if (cd->inStream.avail_in && !cd->inStream.avail_out) {
+ /* next time try to decompress it got readable (new output buffer) */
+ cd->flags |= STREAM_DECOMPRESS;
+ }
+ }
+
+ return resBytes;
+
handleError:
errObj = Tcl_NewListObj(0, NULL);
Tcl_ListObjAppendElement(NULL, errObj, Tcl_NewStringObj("-errorcode",-1));
@@ -3869,7 +3853,7 @@ ResultGenerate(
Tcl_NewStringObj(cd->inStream.msg, -1));
Tcl_SetChannelError(cd->parent, errObj);
*errorCodePtr = EINVAL;
- return TCL_ERROR;
+ return -1;
}
/*
diff --git a/tests/zlib.test b/tests/zlib.test
index c2f7825..d3a6dff 100644
--- a/tests/zlib.test
+++ b/tests/zlib.test
@@ -920,7 +920,7 @@ test zlib-10.2 "bug #2818131 (mismatch gets)" -constraints {
rename zlibRead {}
} -result {error {invalid block type}}
-test zlib-11.1 "Bug #3390073: mis-appled gzip filtering" -setup {
+test zlib-11.1 "Bug #3390073: mis-applied gzip filtering" -setup {
set file [makeFile {} test.input]
} -constraints zlib -body {
set f [open $file wb]
@@ -934,7 +934,7 @@ test zlib-11.1 "Bug #3390073: mis-appled gzip filtering" -setup {
} -cleanup {
removeFile $file
} -result {1000 0}
-test zlib-11.2 "Bug #3390073: mis-appled gzip filtering" -setup {
+test zlib-11.2 "Bug #3390073: mis-applied gzip filtering" -setup {
set file [makeFile {} test.input]
} -constraints zlib -body {
set f [open $file wb]
@@ -1005,6 +1005,86 @@ test zlib-12.2 {Patrick Dunnigan's issue} -constraints zlib -setup {
removeFile $filesrc
removeFile $filedst
} -result 56
+
+set zlibbinf ""
+proc _zlibbinf {} {
+ # inlined zlib.bin file creator:
+ variable zlibbinf
+ if {$zlibbinf eq ""} {
+ set zlibbinf [makeFile {} test-zlib-13.bin]
+ set f [open $zlibbinf wb]
+ puts -nonewline $f [zlib decompress [binary decode base64 {
+ eJx7e+6s1+EAgYaLjK3ratptGmOck0vT/y/ZujHAd0qJelDBXfUPJ3tfrtLbpX+wOOFHmtn03/tizm
+ /+tXROXU3d203b79p5X6/0cvUyFzTsqOj4sa9r8SrZI5zT7265e2Xzq595Fb9LbpgffVy7cZaJ/d15
+ 4U9L7LLM2vdqut8+aSU/r6q9Ltv6+T9mBhTgIK97bH33m/O1C1eBwf9FDKNgaIDaj9wA+5hToA==
+ }]]
+ close $f
+ }
+ return $zlibbinf
+}
+test zlib-13.1 {Ticket [8af92dfb66] - zlib stream mis-expansion} -constraints zlib -setup {
+ set pathin [_zlibbinf]
+ set chanin [open $pathin rb]
+ set pathout [makeFile {} test-zlib-13.deflated]
+ set chanout [open $pathout wb]
+ zlib push inflate $chanin
+ fcopy $chanin $chanout
+ close $chanin
+ close $chanout
+} -body {
+ file size $pathout
+} -cleanup {
+ removeFile $pathout
+ unset chanin pathin chanout pathout
+} -result 458752
+
+test zlib-13.2 {Ticket [f70ce1fead] - zlib multi-stream expansion} -constraints zlib -setup {
+ # Start from the basic asset
+ set pathin [_zlibbinf]
+ set chanin [open $pathin rb]
+ # Create a multi-stream by copying the asset twice into it.
+ set pathout [makeFile {} test-zlib-13.multi]
+ set chanout [open $pathout wb]
+ fcopy $chanin $chanout
+ seek $chanin 0 start
+ fcopy $chanin $chanout
+ close $chanin
+ close $chanout
+ # The multi-stream file shall be our input
+ set pathin $pathout
+ set chanin [open $pathin rb]
+ # And our destinations
+ set pathout1 [makeFile {} test-zlib-13.multi-1]
+ set pathout2 [makeFile {} test-zlib-13.multi-2]
+} -body {
+ # Decode first stream
+ set chanout [open $pathout1 wb]
+ zlib push inflate $chanin
+ fcopy $chanin $chanout
+ chan pop $chanin
+ close $chanout
+ # Decode second stream
+ set chanout [open $pathout2 wb]
+ zlib push inflate $chanin
+ fcopy $chanin $chanout
+ chan pop $chanin
+ close $chanout
+ #
+ list [file size $pathout1] [file size $pathout2]
+} -cleanup {
+ close $chanin
+ removeFile $pathout
+ removeFile $pathout1
+ removeFile $pathout2
+ unset chanin pathin chanout pathout pathout1 pathout2
+} -result {458752 458752}
+
+if {$zlibbinf ne ""} {
+ removeFile $zlibbinf
+}
+unset zlibbinf
+rename _zlibbinf {}
+
::tcltest::cleanupTests
return