diff options
-rw-r--r-- | ChangeLog | 23 | ||||
-rw-r--r-- | doc/fcopy.n | 15 | ||||
-rw-r--r-- | generic/tclIO.c | 194 | ||||
-rw-r--r-- | tests/io.test | 80 |
4 files changed, 286 insertions, 26 deletions
@@ -1,3 +1,22 @@ +2001-05-19 Andreas Kupries <andreas_kupries@users.sourceforge.net> + + * Note that "tclbench" (see project "tcllib") was extended with + performance benchmarks for [fcopy] too. + + * doc/fcopy.n: Updated to reflect the extended behaviour of 'fcopy'. + + * tests/io.test: Added tests 'io-52.9', 'io-52.10' and 'io-52.11' + to test the handling of encodings by 'fcopy' / 'TclCopychannel' + [Bug #209210]. + + * generic/tclIO.c: Split of both 'Tcl_ReadChars' and + 'Tcl_WriteChars' into a public error checking and an internal + working part. The public functions now use the new internal + ones. The new functions are 'DoReadChars' and 'DoWriteChars'. + Extended 'CopyData' to use the new functions 'DoXChars' when + required by the encodings on the input and output channels + [Bug #209210]. + 2001-05-16 Jeff Hobbs <jeffh@ActiveState.com> * library/history.tcl (tcl::HistAdd): prevent empty calls from @@ -210,7 +229,7 @@ Darley's changes to command tracing were added. A const has been added to the type signature of one of the parameters to Tcl_CommandTraceProc. - + 2001-04-10 Kevin B. Kenny <kennykb@acm.org> * unix/tclUnixTime.c: Altered code to use memcpy instead of structure assigments in an effort to achieve better K&R @@ -220,7 +239,7 @@ * unix/tclUnixTime.c: Fixed silly typo in calls to 'gmtime' and 'localtime' that broke the Linux build. - + 2001-04-09 Kevin B. Kenny <kennykb@acm.org> * unix/tclLoadShl.c: Added DYNAMIC_PATH to the load flags so that diff --git a/doc/fcopy.n b/doc/fcopy.n index 5261f70..30b8dfe 100644 --- a/doc/fcopy.n +++ b/doc/fcopy.n @@ -5,7 +5,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: fcopy.n,v 1.2 1998/09/14 18:39:52 stanton Exp $ +'\" RCS: @(#) $Id: fcopy.n,v 1.3 2001/05/19 16:59:04 andreas_kupries Exp $ '\" .so man.macros .TH fcopy n 8.0 Tcl "Tcl Built-In Commands" @@ -71,6 +71,19 @@ can be different than the number of bytes written to \fIoutchan\fR. Only the number of bytes written to \fIoutchan\fR is reported, either as the return value of a synchronous \fBfcopy\fP or as the argument to the callback for an asynchronous \fBfcopy\fP. +.PP +\fBFcopy\fR obeys the encodings configured for the channels. This +means that the incoming characters are converted internally first +UTF-8 and then into the encoding of the channel \fBfcopy\fR writes +to. See the manual entry for \fBfconfigure\fR for details on the +\fB\-encoding\fR option. No conversion is done if both channels are +set to encoding "binary". If only the output channel is set to +encoding "binary" the system will write the internal UTF-8 +representation of the incoming characters. If only the input channel +is set to encoding "binary" the system will assume that the incoming +bytes are valid UTF-8 characters and convert them according to the +output encoding. The behaviour of the system for bytes which are not +valid UTF-8 characters is undefined in this case. .SH EXAMPLE .PP diff --git a/generic/tclIO.c b/generic/tclIO.c index 2c1bc34..f66e96b 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclIO.c,v 1.29 2001/03/30 23:06:39 andreas_kupries Exp $ + * RCS: @(#) $Id: tclIO.c,v 1.30 2001/05/19 16:59:04 andreas_kupries Exp $ */ #include "tclInt.h" @@ -112,6 +112,10 @@ static int DoRead _ANSI_ARGS_((Channel *chanPtr, char *srcPtr, int slen)); static int DoWrite _ANSI_ARGS_((Channel *chanPtr, char *src, int srcLen)); +static int DoReadChars _ANSI_ARGS_ ((Channel* chan, + Tcl_Obj* objPtr, int toRead, int appendFlag)); +static int DoWriteChars _ANSI_ARGS_ ((Channel* chan, + CONST char* src, int len)); static int FilterInputBytes _ANSI_ARGS_((Channel *chanPtr, GetsState *statePtr)); static int FlushChannel _ANSI_ARGS_((Tcl_Interp *interp, @@ -2508,7 +2512,10 @@ Tcl_ClearChannelHandlers (channel) * Puts a sequence of bytes into an output buffer, may queue the * buffer for output if it gets full, and also remembers whether the * current buffer is ready e.g. if it contains a newline and we are in - * line buffering mode. + * line buffering mode. Compensates stacking, i.e. will redirect the + * data from the specified channel to the topmost channel in a stack. + * + * No encoding conversions are applied to the bytes being read. * * Results: * The number of bytes written or -1 in case of error. If -1, @@ -2555,7 +2562,10 @@ Tcl_Write(chan, src, srcLen) * Puts a sequence of bytes into an output buffer, may queue the * buffer for output if it gets full, and also remembers whether the * current buffer is ready e.g. if it contains a newline and we are in - * line buffering mode. + * line buffering mode. Writes directly to the driver of the channel, + * does not compensate for stacking. + * + * No encoding conversions are applied to the bytes being read. * * Results: * The number of bytes written or -1 in case of error. If -1, @@ -2611,7 +2621,8 @@ Tcl_WriteRaw(chan, src, srcLen) * using the channel's current encoding, may queue the buffer for * output if it gets full, and also remembers whether the current * buffer is ready e.g. if it contains a newline and we are in - * line buffering mode. + * line buffering mode. Compensates stacking, i.e. will redirect the + * data from the specified channel to the topmost channel in a stack. * * Results: * The number of bytes written or -1 in case of error. If -1, @@ -2631,18 +2642,55 @@ Tcl_WriteChars(chan, src, len) int len; /* Length of string in bytes, or < 0 for * strlen(). */ { - /* - * Always use the topmost channel of the stack - */ - Channel *chanPtr; ChannelState *statePtr; /* state info for channel */ statePtr = ((Channel *) chan)->state; - chanPtr = statePtr->topChanPtr; if (CheckChannelErrors(statePtr, TCL_WRITABLE) != 0) { return -1; } + + return DoWriteChars ((Channel*) chan, src, len); +} + +/* + *--------------------------------------------------------------------------- + * + * DoWriteChars -- + * + * Takes a sequence of UTF-8 characters and converts them for output + * using the channel's current encoding, may queue the buffer for + * output if it gets full, and also remembers whether the current + * buffer is ready e.g. if it contains a newline and we are in + * line buffering mode. Compensates stacking, i.e. will redirect the + * data from the specified channel to the topmost channel in a stack. + * + * Results: + * The number of bytes written or -1 in case of error. If -1, + * Tcl_GetErrno will return the error code. + * + * Side effects: + * May buffer up output and may cause output to be produced on the + * channel. + * + *---------------------------------------------------------------------- + */ + +int +DoWriteChars(chanPtr, src, len) + Channel* chanPtr; /* The channel to buffer output for. */ + CONST char *src; /* UTF-8 characters to queue in output buffer. */ + int len; /* Length of string in bytes, or < 0 for + * strlen(). */ +{ + /* + * Always use the topmost channel of the stack + */ + ChannelState *statePtr; /* state info for channel */ + + statePtr = chanPtr->state; + chanPtr = statePtr->topChanPtr; + if (len < 0) { len = strlen(src); } @@ -4037,12 +4085,8 @@ Tcl_ReadChars(chan, objPtr, toRead, appendFlag) * of the object. */ { - Channel *chanPtr = (Channel *) chan; - ChannelState *statePtr = chanPtr->state; /* state info for channel */ - ChannelBuffer *bufPtr; - int offset, factor, copied, copiedNow, result; - Tcl_Encoding encoding; -#define UTF_EXPANSION_FACTOR 1024 + Channel* chanPtr = (Channel *) chan; + ChannelState* statePtr = chanPtr->state; /* state info for channel */ /* * This operation should occur at the top of a channel stack. @@ -4051,12 +4095,64 @@ Tcl_ReadChars(chan, objPtr, toRead, appendFlag) chanPtr = statePtr->topChanPtr; if (CheckChannelErrors(statePtr, TCL_READABLE) != 0) { - copied = -1; - goto done; + /* + * Update the notifier state so we don't block while there is still + * data in the buffers. + */ + UpdateInterest(chanPtr); + return -1; } + return DoReadChars (chanPtr, objPtr, toRead, appendFlag); +} +/* + *--------------------------------------------------------------------------- + * + * DoReadChars -- + * + * Reads from the channel until the requested number of characters + * have been seen, EOF is seen, or the channel would block. EOL + * and EOF translation is done. If reading binary data, the raw + * bytes are wrapped in a Tcl byte array object. Otherwise, the raw + * bytes are converted to UTF-8 using the channel's current encoding + * and stored in a Tcl string object. + * + * Results: + * The number of characters read, or -1 on error. Use Tcl_GetErrno() + * to retrieve the error code for the error that occurred. + * + * Side effects: + * May cause input to be buffered. + * + *--------------------------------------------------------------------------- + */ + +int +DoReadChars(chanPtr, objPtr, toRead, appendFlag) + Channel* chanPtr; /* The channel to read. */ + Tcl_Obj *objPtr; /* Input data is stored in this object. */ + int toRead; /* Maximum number of characters to store, + * or -1 to read all available data (up to EOF + * or when channel blocks). */ + int appendFlag; /* If non-zero, data read from the channel + * will be appended to the object. Otherwise, + * the data will replace the existing contents + * of the object. */ + +{ + ChannelState *statePtr = chanPtr->state; /* state info for channel */ + ChannelBuffer *bufPtr; + int offset, factor, copied, copiedNow, result; + Tcl_Encoding encoding; +#define UTF_EXPANSION_FACTOR 1024 + + /* + * This operation should occur at the top of a channel stack. + */ + + chanPtr = statePtr->topChanPtr; encoding = statePtr->encoding; - factor = UTF_EXPANSION_FACTOR; + factor = UTF_EXPANSION_FACTOR; if (appendFlag == 0) { if (encoding == NULL) { @@ -7037,6 +7133,11 @@ CopyData(csPtr, mask) int result = TCL_OK; int size; int total; + int sizeb; + Tcl_Obj* bufObj = NULL; + char* buffer; + + int inBinary, outBinary, sameEncoding; /* Encoding control */ inChan = (Tcl_Channel) csPtr->readPtr; outChan = (Tcl_Channel) csPtr->writePtr; @@ -7053,8 +7154,16 @@ CopyData(csPtr, mask) * thus gets the bottom of the stack. */ - while (csPtr->toRead != 0) { + inBinary = (inStatePtr->encoding == NULL); + outBinary = (outStatePtr->encoding == NULL); + sameEncoding = (inStatePtr->encoding == outStatePtr->encoding); + + if (!(inBinary || sameEncoding)) { + bufObj = Tcl_NewObj (); + Tcl_IncrRefCount (bufObj); + } + while (csPtr->toRead != 0) { /* * Check for unreported background errors. */ @@ -7079,7 +7188,12 @@ CopyData(csPtr, mask) } else { size = csPtr->toRead; } - size = DoRead(inStatePtr->topChanPtr, csPtr->buffer, size); + + if (inBinary || sameEncoding) { + size = DoRead(inStatePtr->topChanPtr, csPtr->buffer, size); + } else { + size = DoReadChars(inStatePtr->topChanPtr, bufObj, size, 0 /* No append */); + } if (size < 0) { readError: @@ -7105,6 +7219,10 @@ CopyData(csPtr, mask) Tcl_CreateChannelHandler(inChan, TCL_READABLE, CopyEventProc, (ClientData) csPtr); } + if (bufObj != (Tcl_Obj*) NULL) { + Tcl_DecrRefCount (bufObj); + bufObj = (Tcl_Obj*) NULL; + } return TCL_OK; } @@ -7112,8 +7230,25 @@ CopyData(csPtr, mask) * Now write the buffer out. */ - size = DoWrite(outStatePtr->topChanPtr, csPtr->buffer, size); - if (size < 0) { + if (inBinary || sameEncoding) { + buffer = csPtr->buffer; + sizeb = size; + } else { + buffer = Tcl_GetStringFromObj (bufObj, &sizeb); + } + + if (outBinary || sameEncoding) { + sizeb = DoWrite(outStatePtr->topChanPtr, buffer, sizeb); + } else { + sizeb = DoWriteChars(outStatePtr->topChanPtr, buffer, sizeb); + } + + if (inBinary || sameEncoding) { + /* Both read and write counted bytes */ + size = sizeb; + } /* else : Read counted characters, write counted bytes, i.e. size != sizeb */ + + if (sizeb < 0) { writeError: errObj = Tcl_NewObj(); Tcl_AppendStringsToObj(errObj, "error writing \"", @@ -7148,6 +7283,10 @@ CopyData(csPtr, mask) Tcl_CreateChannelHandler(outChan, TCL_WRITABLE, CopyEventProc, (ClientData) csPtr); } + if (bufObj != (Tcl_Obj*) NULL) { + Tcl_DecrRefCount (bufObj); + bufObj = (Tcl_Obj*) NULL; + } return TCL_OK; } @@ -7166,8 +7305,17 @@ CopyData(csPtr, mask) Tcl_CreateChannelHandler(outChan, TCL_WRITABLE, CopyEventProc, (ClientData) csPtr); } + if (bufObj != (Tcl_Obj*) NULL) { + Tcl_DecrRefCount (bufObj); + bufObj = (Tcl_Obj*) NULL; + } return TCL_OK; } + } /* while */ + + if (bufObj != (Tcl_Obj*) NULL) { + Tcl_DecrRefCount (bufObj); + bufObj = (Tcl_Obj*) NULL; } /* @@ -7218,6 +7366,8 @@ CopyData(csPtr, mask) * * Reads a given number of bytes from a channel. * + * No encoding conversions are applied to the bytes being read. + * * Results: * The number of characters read, or -1 on error. Use Tcl_GetErrno() * to retrieve the error code for the error that occurred. diff --git a/tests/io.test b/tests/io.test index e3ac4a1..c193793 100644 --- a/tests/io.test +++ b/tests/io.test @@ -12,7 +12,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: io.test,v 1.16 2001/04/04 17:35:25 andreas_kupries Exp $ +# RCS: @(#) $Id: io.test,v 1.17 2001/05/19 16:59:04 andreas_kupries Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -6369,6 +6369,84 @@ test io-52.8 {TclCopyChannel} {stdio} { list $s0 [file size test1] } {40 40} +# Empty files, to register them with the test facility +makeFile {} kyrillic.txt +makeFile {} utf8-fcopy.txt +makeFile {} utf8-rp.txt + +# Create kyrillic file +set out [open kyrillic.txt w] +fconfigure $out -encoding koi8-r +puts $out "\u0410\u0410" +close $out + +test io-52.9 {TclCopyChannel & encodings} { + # Copy kyrillic to UTF-8, using fcopy. + + set in [open kyrillic.txt r] + set out [open utf8-fcopy.txt w] + + fconfigure $in -encoding koi8-r + fconfigure $out -encoding utf-8 + + fcopy $in $out + close $in + close $out + + # Do the same again, but differently (read/puts). + + set in [open kyrillic.txt r] + set out [open utf8-rp.txt w] + + fconfigure $in -encoding koi8-r + fconfigure $out -encoding utf-8 + + puts -nonewline $out [read $in] + + close $in + close $out + + list \ + [file size kyrillic.txt] \ + [file size utf8-fcopy.txt] \ + [file size utf8-rp.txt] +} {3 5 5} + +test io-52.10 {TclCopyChannel & encodings} { + # encoding to binary (=> implies that the + # internal utf-8 is written) + + set in [open kyrillic.txt r] + set out [open utf8-fcopy.txt w] + + fconfigure $in -encoding koi8-r + fconfigure $out -encoding binary + + fcopy $in $out + close $in + close $out + + file size utf8-fcopy.txt +} 5 + +test io-52.11 {TclCopyChannel & encodings} { + # binary to encoding => the input has to be + # in utf-8 to make sense to the encoder + + set in [open utf8-fcopy.txt r] + set out [open kyrillic.txt w] + + fconfigure $in -encoding binary + fconfigure $out -encoding koi8-r + + fcopy $in $out + close $in + close $out + + file size kyrillic.txt +} 3 + + test io-53.1 {CopyData} { removeFile test1 set f1 [open $thisScript] |