diff options
author | sebres <sebres@users.sourceforge.net> | 2020-06-24 09:02:01 (GMT) |
---|---|---|
committer | sebres <sebres@users.sourceforge.net> | 2020-06-24 09:02:01 (GMT) |
commit | 3f90f816f86882d82b3bda1af1d503759acf5039 (patch) | |
tree | b2f286fce09e28a51d4d11e77bc44592e5b84051 | |
parent | 97499f5ed8cc4ea3177e6fb8544c5cd7aba6e029 (diff) | |
parent | 094567e57a4212ad576fa972a418938423e8db0d (diff) | |
download | tcl-3f90f816f86882d82b3bda1af1d503759acf5039.zip tcl-3f90f816f86882d82b3bda1af1d503759acf5039.tar.gz tcl-3f90f816f86882d82b3bda1af1d503759acf5039.tar.bz2 |
merge zlib-chan-f70ce1fead, [f70ce1fead] - rewritten and optimized inflate:
default read ahead limit set to 4K (instead of 1); better SF/BO prevention;
code simplification, no interim copy/move buffers, no decompression d-string, the inflate occurring in single step (ResultDecompress combines ResultGenerate and ResultCopy);
better handling in non-blocking mode (also recognizes or distinguish no data/EAGAIN cases).
-rw-r--r-- | doc/zlib.n | 22 | ||||
-rw-r--r-- | generic/tclZlib.c | 282 | ||||
-rw-r--r-- | tests/zlib.test | 84 |
3 files changed, 229 insertions, 159 deletions
@@ -193,10 +193,18 @@ How hard to compress the data. Must be an integer from 0 (uncompressed) to 9 .TP \fB\-limit\fI readaheadLimit\fR . -The maximum number of bytes ahead to read when decompressing. This defaults to -1, which ensures that data is always decompressed correctly, but may be -increased to improve performance. This is more useful when the channel is -non-blocking. +The maximum number of bytes ahead to read when decompressing. +.RS +.PP +This option has become \fBirrelevant\fR. It was originally introduced +to prevent Tcl from reading beyond the end of a compressed stream in +multi-stream channels to ensure that the data after was left alone for +further reading, at the cost of speed. +.PP +Tcl now automatically returns any bytes it has read beyond the end of +a compressed stream back to the channel, making them appear as unread +to further readers. +.RE .PP Both compressing and decompressing channel transformations add extra configuration options that may be accessed through \fBchan configure\fR. The @@ -238,10 +246,8 @@ off the data stream. \fB\-limit\fI readaheadLimit\fR . This read-write option is used by decompressing channels to control the -maximum number of bytes ahead to read from the underlying data source. This -defaults to 1, which ensures that data is always decompressed correctly, but -may be increased to improve performance. This is more useful when the channel -is non-blocking. +maximum number of bytes ahead to read from the underlying data source. See +above for more information. .RE .SS "STREAMING SUBCOMMAND" .TP diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 86fda86..ac11089 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -124,7 +124,6 @@ typedef struct { GzipHeader outHeader; /* Header to write to an output stream, when * compressing a gzip stream. */ Tcl_TimerToken timer; /* Timer used for keeping events fresh. */ - Tcl_DString decompressed; /* Buffer for decompression results. */ Tcl_Obj *compDictObj; /* Byte-array object containing compression * dictionary (not dictObj!) to use if * necessary. */ @@ -137,11 +136,15 @@ typedef struct { * the input compressor. * OUT_HEADER - Whether the outputHeader field has been registered * with the output decompressor. + * STREAM_DECOMPRESS - Signal decompress pending data. + * STREAM_DONE - Flag to signal stream end up to transform input. */ -#define ASYNC 0x1 -#define IN_HEADER 0x2 -#define OUT_HEADER 0x4 +#define ASYNC 0x01 +#define IN_HEADER 0x02 +#define OUT_HEADER 0x04 +#define STREAM_DECOMPRESS 0x08 +#define STREAM_DONE 0x10 /* * Size of buffers allocated by default, and the range it can be set to. The @@ -184,10 +187,8 @@ static int GenerateHeader(Tcl_Interp *interp, Tcl_Obj *dictObj, GzipHeader *headerPtr, int *extraSizePtr); static int ZlibPushSubcmd(Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); -static inline int ResultCopy(ZlibChannelData *cd, char *buf, - int toRead); -static int ResultGenerate(ZlibChannelData *cd, int n, int flush, - int *errorCodePtr); +static int ResultDecompress(ZlibChannelData *cd, char *buf, + int toRead, int flush, int *errorCodePtr); static Tcl_Channel ZlibStackChannelTransform(Tcl_Interp *interp, int mode, int format, int level, int limit, Tcl_Channel channel, Tcl_Obj *gzipHeaderDictPtr, @@ -2364,7 +2365,7 @@ ZlibPushSubcmd( const char *const *pushOptions = pushDecompressOptions; enum pushOptions {poDictionary, poHeader, poLevel, poLimit}; Tcl_Obj *headerObj = NULL, *compDictObj = NULL; - int limit = 1, dummy; + int limit = DEFAULT_BUFFER_SIZE, dummy; if (objc < 4) { Tcl_WrongNumArgs(interp, 2, objv, "mode channel ?options...?"); @@ -2946,6 +2947,15 @@ ZlibTransformClose( } while (e != Z_STREAM_END); (void) deflateEnd(&cd->outStream); } else { + /* + * If we have unused bytes from the read input (overshot by + * Z_STREAM_END or on possible error), unget them back to the parent + * channel, so that they appear as not being read yet. + */ + if (cd->inStream.avail_in) { + Tcl_Ungets (cd->parent, (char *)cd->inStream.next_in, cd->inStream.avail_in, 0); + } + (void) inflateEnd(&cd->inStream); } @@ -2957,7 +2967,6 @@ ZlibTransformClose( Tcl_DecrRefCount(cd->compDictObj); cd->compDictObj = NULL; } - Tcl_DStringFree(&cd->decompressed); if (cd->inBuffer) { ckfree(cd->inBuffer); @@ -2991,7 +3000,7 @@ ZlibTransformInput( ZlibChannelData *cd = (ZlibChannelData *)instanceData; Tcl_DriverInputProc *inProc = Tcl_ChannelInputProc(Tcl_GetChannelType(cd->parent)); - int readBytes, gotBytes, copied; + int readBytes, gotBytes; if (cd->mode == TCL_ZLIB_STREAM_DEFLATE) { return inProc(Tcl_GetChannelInstanceData(cd->parent), buf, toRead, @@ -2999,37 +3008,42 @@ ZlibTransformInput( } gotBytes = 0; - while (toRead > 0) { - /* - * Loop until the request is satisfied (or no data available from - * below, possibly EOF). + readBytes = cd->inStream.avail_in; /* how many bytes in buffer now */ + while (!(cd->flags & STREAM_DONE) && toRead > 0) { + int n, decBytes; + + /* if starting from scratch or continuation after full decompression */ + if (!cd->inStream.avail_in) { + /* buffer to start, we can read to whole available buffer */ + cd->inStream.next_in = (Bytef *) cd->inBuffer; + } + /* + * If done - no read needed anymore, check we have to copy rest of + * decompressed data, otherwise return with size (or 0 for Eof) */ - - copied = ResultCopy(cd, buf, toRead); - toRead -= copied; - buf += copied; - gotBytes += copied; - - if (toRead == 0) { - return gotBytes; + if (cd->flags & STREAM_DECOMPRESS) { + goto copyDecompressed; } - /* * The buffer is exhausted, but the caller wants even more. We now * have to go to the underlying channel, get more bytes and then * transform them for delivery. We may not get what we want (full EOF * or temporarily out of data). - * - * Length (cd->decompressed) == 0, toRead > 0 here. - * - * The zlib transform allows us to read at most one character from the - * underlying channel to properly identify Z_STREAM_END without - * reading over the border. */ - readBytes = Tcl_ReadRaw(cd->parent, cd->inBuffer, - cd->readAheadLimit <= cd->inAllocated ? - cd->readAheadLimit : cd->inAllocated); + /* Check free buffer size and adjust size of next chunk to read. */ + n = cd->inAllocated - ((char *)cd->inStream.next_in - cd->inBuffer); + if (n <= 0) { + /* Normally unreachable: not enough input buffer to uncompress. + * Todo: firstly try to realloc inBuffer upto MAX_BUFFER_SIZE. + */ + *errorCodePtr = ENOBUFS; + return -1; + } + if (n > cd->readAheadLimit) { + n = cd->readAheadLimit; + } + readBytes = Tcl_ReadRaw(cd->parent, (char *)cd->inStream.next_in, n); /* * Three cases here: @@ -3045,45 +3059,59 @@ ZlibTransformInput( /* See ReflectInput() in tclIORTrans.c */ if (Tcl_InputBlocked(cd->parent) && (gotBytes > 0)) { - return gotBytes; + break; } *errorCodePtr = Tcl_GetErrno(); return -1; } - if (readBytes == 0) { - /* - * Eof in parent. - * - * Now this is a bit different. The partial data waiting is - * converted and returned. - */ - if (ResultGenerate(cd, 0, Z_SYNC_FLUSH, errorCodePtr) != TCL_OK) { - return -1; - } + /* more bytes (or Eof if readBytes == 0) */ + cd->inStream.avail_in += readBytes; - if (Tcl_DStringLength(&cd->decompressed) == 0) { - /* - * The drain delivered nothing. Time to deliver what we've - * got. - */ +copyDecompressed: - return gotBytes; - } - } else /* readBytes > 0 */ { + /* + * Transform the read chunk, if not empty. Anything we get + * back is a transformation result to be put into our buffers, and + * the next iteration will put it into the result. + * For the case readBytes is 0 which signaling Eof in parent, the + * partial data waiting is converted and returned. + */ + + decBytes = ResultDecompress(cd, buf, toRead, + (readBytes != 0) ? Z_NO_FLUSH : Z_SYNC_FLUSH, + errorCodePtr); + if (decBytes == -1) { + return -1; + } + gotBytes += decBytes; + buf += decBytes; + toRead -= decBytes; + + if (((decBytes == 0) || (cd->flags & STREAM_DECOMPRESS))) { /* - * Transform the read chunk, which was not empty. Anything we get - * back is a transformation result to be put into our buffers, and - * the next iteration will put it into the result. + * The drain delivered nothing (or buffer too small to decompress). + * Time to deliver what we've got. */ - - if (ResultGenerate(cd, readBytes, Z_NO_FLUSH, - errorCodePtr) != TCL_OK) { - return -1; + if (!gotBytes && !(cd->flags & STREAM_DONE)) { + /* if no-data, but not ready - avoid signaling Eof, + * continue in blocking mode, otherwise EAGAIN */ + if (Tcl_InputBlocked(cd->parent)) { + continue; + } + *errorCodePtr = EAGAIN; + return -1; } + break; } + + /* + * Loop until the request is satisfied (or no data available from + * above, possibly EOF). + */ } + return gotBytes; } @@ -3466,7 +3494,7 @@ ZlibTransformWatch( watchProc = Tcl_ChannelWatchProc(Tcl_GetChannelType(cd->parent)); watchProc(Tcl_GetChannelInstanceData(cd->parent), mask); - if (!(mask & TCL_READABLE) || Tcl_DStringLength(&cd->decompressed) == 0) { + if (!(mask & TCL_READABLE) || !(cd->flags & STREAM_DECOMPRESS)) { ZlibTransformEventTimerKill(cd); } else if (cd->timer == NULL) { cd->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, @@ -3652,7 +3680,10 @@ ZlibStackChannelTransform( goto error; } cd->inAllocated = DEFAULT_BUFFER_SIZE; - cd->inBuffer = (char *)ckalloc(cd->inAllocated); + if (cd->inAllocated < cd->readAheadLimit) { + cd->inAllocated = cd->readAheadLimit; + } + cd->inBuffer = ckalloc(cd->inAllocated); if (cd->flags & IN_HEADER) { if (inflateGetHeader(&cd->inStream, &cd->inHeader.header) != Z_OK) { goto error; @@ -3682,8 +3713,6 @@ ZlibStackChannelTransform( } } - Tcl_DStringInit(&cd->decompressed); - chan = Tcl_StackChannel(interp, &zlibChannelType, cd, Tcl_GetChannelMode(channel), channel); if (chan == NULL) { @@ -3713,96 +3742,37 @@ ZlibStackChannelTransform( /* *---------------------------------------------------------------------- * - * ResultCopy -- - * - * Copies the requested number of bytes from the buffer into the - * specified array and removes them from the buffer afterward. Copies - * less if there is not enough data in the buffer. - * - * Side effects: - * See above. - * - * Result: - * The number of actually copied bytes, possibly less than 'toRead'. - * - *---------------------------------------------------------------------- - */ - -static inline int -ResultCopy( - ZlibChannelData *cd, /* The location of the buffer to read from. */ - char *buf, /* The buffer to copy into */ - int toRead) /* Number of requested bytes */ -{ - int have = Tcl_DStringLength(&cd->decompressed); - - if (have == 0) { - /* - * Nothing to copy in the case of an empty buffer. - */ - - return 0; - } else if (have > toRead) { - /* - * The internal buffer contains more than requested. Copy the - * requested subset to the caller, shift the remaining bytes down, and - * truncate. - */ - - char *src = Tcl_DStringValue(&cd->decompressed); - - memcpy(buf, src, toRead); - memmove(src, src + toRead, have - toRead); - - Tcl_DStringSetLength(&cd->decompressed, have - toRead); - return toRead; - } else /* have <= toRead */ { - /* - * There is just or not enough in the buffer to fully satisfy the - * caller, so take everything as best effort. - */ - - memcpy(buf, Tcl_DStringValue(&cd->decompressed), have); - TclDStringClear(&cd->decompressed); - return have; - } -} - -/* - *---------------------------------------------------------------------- - * - * ResultGenerate -- + * ResultDecompress -- * * Extract uncompressed bytes from the compression engine and store them - * in our working buffer. + * in our buffer (buf) up to toRead bytes. * * Result: - * TCL_OK/TCL_ERROR (with *errorCodePtr updated with reason). + * Number of bytes decompressed or -1 if error (with *errorCodePtr updated with reason). * * Side effects: - * See above. + * After execution it updates cd->inStream (next_in, avail_in) to reflect + * the data that has been decompressed. * *---------------------------------------------------------------------- */ static int -ResultGenerate( +ResultDecompress( ZlibChannelData *cd, - int n, + char *buf, + int toRead, int flush, int *errorCodePtr) { -#define MAXBUF 1024 - unsigned char buf[MAXBUF]; - int e, written; + int e, written, resBytes = 0; Tcl_Obj *errObj; - cd->inStream.next_in = (Bytef *) cd->inBuffer; - cd->inStream.avail_in = n; - while (1) { - cd->inStream.next_out = (Bytef *) buf; - cd->inStream.avail_out = MAXBUF; + cd->flags &= ~STREAM_DECOMPRESS; + cd->inStream.next_out = (Bytef *) buf; + cd->inStream.avail_out = toRead; + while (cd->inStream.avail_out > 0) { e = inflate(&cd->inStream, flush); if (e == Z_NEED_DICT && cd->compDictObj) { @@ -3811,31 +3781,35 @@ ResultGenerate( /* * A repetition of Z_NEED_DICT is just an error. */ - - cd->inStream.next_out = (Bytef *) buf; - cd->inStream.avail_out = MAXBUF; e = inflate(&cd->inStream, flush); } } /* * avail_out is now the left over space in the output. Therefore - * "MAXBUF - avail_out" is the amount of bytes generated. + * "toRead - avail_out" is the amount of bytes generated. */ - written = MAXBUF - cd->inStream.avail_out; - if (written) { - Tcl_DStringAppend(&cd->decompressed, (char *) buf, written); - } + written = toRead - cd->inStream.avail_out; /* * The cases where we're definitely done. */ - if (((flush == Z_SYNC_FLUSH) && (e == Z_BUF_ERROR)) - || (e == Z_STREAM_END) - || (e == Z_OK && written == 0)) { - return TCL_OK; + if (e == Z_STREAM_END) { + cd->flags |= STREAM_DONE; + resBytes += written; + break; + } + if (e == Z_OK) { + if (written == 0) { + break; + } + resBytes += written; + } + + if ((flush == Z_SYNC_FLUSH) && (e == Z_BUF_ERROR)) { + break; } /* @@ -3856,10 +3830,20 @@ ResultGenerate( */ if (cd->inStream.avail_in <= 0 && flush != Z_SYNC_FLUSH) { - return TCL_OK; + break; } } + if (!(cd->flags & STREAM_DONE)) { + /* if we have pending input data, but no available output buffer */ + if (cd->inStream.avail_in && !cd->inStream.avail_out) { + /* next time try to decompress it got readable (new output buffer) */ + cd->flags |= STREAM_DECOMPRESS; + } + } + + return resBytes; + handleError: errObj = Tcl_NewListObj(0, NULL); Tcl_ListObjAppendElement(NULL, errObj, Tcl_NewStringObj("-errorcode",-1)); @@ -3869,7 +3853,7 @@ ResultGenerate( Tcl_NewStringObj(cd->inStream.msg, -1)); Tcl_SetChannelError(cd->parent, errObj); *errorCodePtr = EINVAL; - return TCL_ERROR; + return -1; } /* diff --git a/tests/zlib.test b/tests/zlib.test index c2f7825..d3a6dff 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -920,7 +920,7 @@ test zlib-10.2 "bug #2818131 (mismatch gets)" -constraints { rename zlibRead {} } -result {error {invalid block type}} -test zlib-11.1 "Bug #3390073: mis-appled gzip filtering" -setup { +test zlib-11.1 "Bug #3390073: mis-applied gzip filtering" -setup { set file [makeFile {} test.input] } -constraints zlib -body { set f [open $file wb] @@ -934,7 +934,7 @@ test zlib-11.1 "Bug #3390073: mis-appled gzip filtering" -setup { } -cleanup { removeFile $file } -result {1000 0} -test zlib-11.2 "Bug #3390073: mis-appled gzip filtering" -setup { +test zlib-11.2 "Bug #3390073: mis-applied gzip filtering" -setup { set file [makeFile {} test.input] } -constraints zlib -body { set f [open $file wb] @@ -1005,6 +1005,86 @@ test zlib-12.2 {Patrick Dunnigan's issue} -constraints zlib -setup { removeFile $filesrc removeFile $filedst } -result 56 + +set zlibbinf "" +proc _zlibbinf {} { + # inlined zlib.bin file creator: + variable zlibbinf + if {$zlibbinf eq ""} { + set zlibbinf [makeFile {} test-zlib-13.bin] + set f [open $zlibbinf wb] + puts -nonewline $f [zlib decompress [binary decode base64 { + eJx7e+6s1+EAgYaLjK3ratptGmOck0vT/y/ZujHAd0qJelDBXfUPJ3tfrtLbpX+wOOFHmtn03/tizm + /+tXROXU3d203b79p5X6/0cvUyFzTsqOj4sa9r8SrZI5zT7265e2Xzq595Fb9LbpgffVy7cZaJ/d15 + 4U9L7LLM2vdqut8+aSU/r6q9Ltv6+T9mBhTgIK97bH33m/O1C1eBwf9FDKNgaIDaj9wA+5hToA== + }]] + close $f + } + return $zlibbinf +} +test zlib-13.1 {Ticket [8af92dfb66] - zlib stream mis-expansion} -constraints zlib -setup { + set pathin [_zlibbinf] + set chanin [open $pathin rb] + set pathout [makeFile {} test-zlib-13.deflated] + set chanout [open $pathout wb] + zlib push inflate $chanin + fcopy $chanin $chanout + close $chanin + close $chanout +} -body { + file size $pathout +} -cleanup { + removeFile $pathout + unset chanin pathin chanout pathout +} -result 458752 + +test zlib-13.2 {Ticket [f70ce1fead] - zlib multi-stream expansion} -constraints zlib -setup { + # Start from the basic asset + set pathin [_zlibbinf] + set chanin [open $pathin rb] + # Create a multi-stream by copying the asset twice into it. + set pathout [makeFile {} test-zlib-13.multi] + set chanout [open $pathout wb] + fcopy $chanin $chanout + seek $chanin 0 start + fcopy $chanin $chanout + close $chanin + close $chanout + # The multi-stream file shall be our input + set pathin $pathout + set chanin [open $pathin rb] + # And our destinations + set pathout1 [makeFile {} test-zlib-13.multi-1] + set pathout2 [makeFile {} test-zlib-13.multi-2] +} -body { + # Decode first stream + set chanout [open $pathout1 wb] + zlib push inflate $chanin + fcopy $chanin $chanout + chan pop $chanin + close $chanout + # Decode second stream + set chanout [open $pathout2 wb] + zlib push inflate $chanin + fcopy $chanin $chanout + chan pop $chanin + close $chanout + # + list [file size $pathout1] [file size $pathout2] +} -cleanup { + close $chanin + removeFile $pathout + removeFile $pathout1 + removeFile $pathout2 + unset chanin pathin chanout pathout pathout1 pathout2 +} -result {458752 458752} + +if {$zlibbinf ne ""} { + removeFile $zlibbinf +} +unset zlibbinf +rename _zlibbinf {} + ::tcltest::cleanupTests return |