summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog23
-rw-r--r--doc/fcopy.n15
-rw-r--r--generic/tclIO.c194
-rw-r--r--tests/io.test80
4 files changed, 286 insertions, 26 deletions
diff --git a/ChangeLog b/ChangeLog
index e690d1b..6e395c7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2001-05-19 Andreas Kupries <andreas_kupries@users.sourceforge.net>
+
+ * Note that "tclbench" (see project "tcllib") was extended with
+ performance benchmarks for [fcopy] too.
+
+ * doc/fcopy.n: Updated to reflect the extended behaviour of 'fcopy'.
+
+ * tests/io.test: Added tests 'io-52.9', 'io-52.10' and 'io-52.11'
+ to test the handling of encodings by 'fcopy' / 'TclCopychannel'
+ [Bug #209210].
+
+ * generic/tclIO.c: Split of both 'Tcl_ReadChars' and
+ 'Tcl_WriteChars' into a public error checking and an internal
+ working part. The public functions now use the new internal
+ ones. The new functions are 'DoReadChars' and 'DoWriteChars'.
+ Extended 'CopyData' to use the new functions 'DoXChars' when
+ required by the encodings on the input and output channels
+ [Bug #209210].
+
2001-05-16 Jeff Hobbs <jeffh@ActiveState.com>
* library/history.tcl (tcl::HistAdd): prevent empty calls from
@@ -210,7 +229,7 @@
Darley's changes to command tracing were added. A const has been
added to the type signature of one of the parameters to
Tcl_CommandTraceProc.
-
+
2001-04-10 Kevin B. Kenny <kennykb@acm.org>
* unix/tclUnixTime.c: Altered code to use memcpy instead of
structure assigments in an effort to achieve better K&R
@@ -220,7 +239,7 @@
* unix/tclUnixTime.c: Fixed silly typo in calls to 'gmtime' and
'localtime' that broke the Linux build.
-
+
2001-04-09 Kevin B. Kenny <kennykb@acm.org>
* unix/tclLoadShl.c: Added DYNAMIC_PATH to the load flags so that
diff --git a/doc/fcopy.n b/doc/fcopy.n
index 5261f70..30b8dfe 100644
--- a/doc/fcopy.n
+++ b/doc/fcopy.n
@@ -5,7 +5,7 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: fcopy.n,v 1.2 1998/09/14 18:39:52 stanton Exp $
+'\" RCS: @(#) $Id: fcopy.n,v 1.3 2001/05/19 16:59:04 andreas_kupries Exp $
'\"
.so man.macros
.TH fcopy n 8.0 Tcl "Tcl Built-In Commands"
@@ -71,6 +71,19 @@ can be different than the number of bytes written to \fIoutchan\fR.
Only the number of bytes written to \fIoutchan\fR is reported,
either as the return value of a synchronous \fBfcopy\fP or
as the argument to the callback for an asynchronous \fBfcopy\fP.
+.PP
+\fBFcopy\fR obeys the encodings configured for the channels. This
+means that the incoming characters are converted internally first
+UTF-8 and then into the encoding of the channel \fBfcopy\fR writes
+to. See the manual entry for \fBfconfigure\fR for details on the
+\fB\-encoding\fR option. No conversion is done if both channels are
+set to encoding "binary". If only the output channel is set to
+encoding "binary" the system will write the internal UTF-8
+representation of the incoming characters. If only the input channel
+is set to encoding "binary" the system will assume that the incoming
+bytes are valid UTF-8 characters and convert them according to the
+output encoding. The behaviour of the system for bytes which are not
+valid UTF-8 characters is undefined in this case.
.SH EXAMPLE
.PP
diff --git a/generic/tclIO.c b/generic/tclIO.c
index 2c1bc34..f66e96b 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclIO.c,v 1.29 2001/03/30 23:06:39 andreas_kupries Exp $
+ * RCS: @(#) $Id: tclIO.c,v 1.30 2001/05/19 16:59:04 andreas_kupries Exp $
*/
#include "tclInt.h"
@@ -112,6 +112,10 @@ static int DoRead _ANSI_ARGS_((Channel *chanPtr, char *srcPtr,
int slen));
static int DoWrite _ANSI_ARGS_((Channel *chanPtr, char *src,
int srcLen));
+static int DoReadChars _ANSI_ARGS_ ((Channel* chan,
+ Tcl_Obj* objPtr, int toRead, int appendFlag));
+static int DoWriteChars _ANSI_ARGS_ ((Channel* chan,
+ CONST char* src, int len));
static int FilterInputBytes _ANSI_ARGS_((Channel *chanPtr,
GetsState *statePtr));
static int FlushChannel _ANSI_ARGS_((Tcl_Interp *interp,
@@ -2508,7 +2512,10 @@ Tcl_ClearChannelHandlers (channel)
* Puts a sequence of bytes into an output buffer, may queue the
* buffer for output if it gets full, and also remembers whether the
* current buffer is ready e.g. if it contains a newline and we are in
- * line buffering mode.
+ * line buffering mode. Compensates stacking, i.e. will redirect the
+ * data from the specified channel to the topmost channel in a stack.
+ *
+ * No encoding conversions are applied to the bytes being read.
*
* Results:
* The number of bytes written or -1 in case of error. If -1,
@@ -2555,7 +2562,10 @@ Tcl_Write(chan, src, srcLen)
* Puts a sequence of bytes into an output buffer, may queue the
* buffer for output if it gets full, and also remembers whether the
* current buffer is ready e.g. if it contains a newline and we are in
- * line buffering mode.
+ * line buffering mode. Writes directly to the driver of the channel,
+ * does not compensate for stacking.
+ *
+ * No encoding conversions are applied to the bytes being read.
*
* Results:
* The number of bytes written or -1 in case of error. If -1,
@@ -2611,7 +2621,8 @@ Tcl_WriteRaw(chan, src, srcLen)
* using the channel's current encoding, may queue the buffer for
* output if it gets full, and also remembers whether the current
* buffer is ready e.g. if it contains a newline and we are in
- * line buffering mode.
+ * line buffering mode. Compensates stacking, i.e. will redirect the
+ * data from the specified channel to the topmost channel in a stack.
*
* Results:
* The number of bytes written or -1 in case of error. If -1,
@@ -2631,18 +2642,55 @@ Tcl_WriteChars(chan, src, len)
int len; /* Length of string in bytes, or < 0 for
* strlen(). */
{
- /*
- * Always use the topmost channel of the stack
- */
- Channel *chanPtr;
ChannelState *statePtr; /* state info for channel */
statePtr = ((Channel *) chan)->state;
- chanPtr = statePtr->topChanPtr;
if (CheckChannelErrors(statePtr, TCL_WRITABLE) != 0) {
return -1;
}
+
+ return DoWriteChars ((Channel*) chan, src, len);
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * DoWriteChars --
+ *
+ * Takes a sequence of UTF-8 characters and converts them for output
+ * using the channel's current encoding, may queue the buffer for
+ * output if it gets full, and also remembers whether the current
+ * buffer is ready e.g. if it contains a newline and we are in
+ * line buffering mode. Compensates stacking, i.e. will redirect the
+ * data from the specified channel to the topmost channel in a stack.
+ *
+ * Results:
+ * The number of bytes written or -1 in case of error. If -1,
+ * Tcl_GetErrno will return the error code.
+ *
+ * Side effects:
+ * May buffer up output and may cause output to be produced on the
+ * channel.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+DoWriteChars(chanPtr, src, len)
+ Channel* chanPtr; /* The channel to buffer output for. */
+ CONST char *src; /* UTF-8 characters to queue in output buffer. */
+ int len; /* Length of string in bytes, or < 0 for
+ * strlen(). */
+{
+ /*
+ * Always use the topmost channel of the stack
+ */
+ ChannelState *statePtr; /* state info for channel */
+
+ statePtr = chanPtr->state;
+ chanPtr = statePtr->topChanPtr;
+
if (len < 0) {
len = strlen(src);
}
@@ -4037,12 +4085,8 @@ Tcl_ReadChars(chan, objPtr, toRead, appendFlag)
* of the object. */
{
- Channel *chanPtr = (Channel *) chan;
- ChannelState *statePtr = chanPtr->state; /* state info for channel */
- ChannelBuffer *bufPtr;
- int offset, factor, copied, copiedNow, result;
- Tcl_Encoding encoding;
-#define UTF_EXPANSION_FACTOR 1024
+ Channel* chanPtr = (Channel *) chan;
+ ChannelState* statePtr = chanPtr->state; /* state info for channel */
/*
* This operation should occur at the top of a channel stack.
@@ -4051,12 +4095,64 @@ Tcl_ReadChars(chan, objPtr, toRead, appendFlag)
chanPtr = statePtr->topChanPtr;
if (CheckChannelErrors(statePtr, TCL_READABLE) != 0) {
- copied = -1;
- goto done;
+ /*
+ * Update the notifier state so we don't block while there is still
+ * data in the buffers.
+ */
+ UpdateInterest(chanPtr);
+ return -1;
}
+ return DoReadChars (chanPtr, objPtr, toRead, appendFlag);
+}
+/*
+ *---------------------------------------------------------------------------
+ *
+ * DoReadChars --
+ *
+ * Reads from the channel until the requested number of characters
+ * have been seen, EOF is seen, or the channel would block. EOL
+ * and EOF translation is done. If reading binary data, the raw
+ * bytes are wrapped in a Tcl byte array object. Otherwise, the raw
+ * bytes are converted to UTF-8 using the channel's current encoding
+ * and stored in a Tcl string object.
+ *
+ * Results:
+ * The number of characters read, or -1 on error. Use Tcl_GetErrno()
+ * to retrieve the error code for the error that occurred.
+ *
+ * Side effects:
+ * May cause input to be buffered.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+int
+DoReadChars(chanPtr, objPtr, toRead, appendFlag)
+ Channel* chanPtr; /* The channel to read. */
+ Tcl_Obj *objPtr; /* Input data is stored in this object. */
+ int toRead; /* Maximum number of characters to store,
+ * or -1 to read all available data (up to EOF
+ * or when channel blocks). */
+ int appendFlag; /* If non-zero, data read from the channel
+ * will be appended to the object. Otherwise,
+ * the data will replace the existing contents
+ * of the object. */
+
+{
+ ChannelState *statePtr = chanPtr->state; /* state info for channel */
+ ChannelBuffer *bufPtr;
+ int offset, factor, copied, copiedNow, result;
+ Tcl_Encoding encoding;
+#define UTF_EXPANSION_FACTOR 1024
+
+ /*
+ * This operation should occur at the top of a channel stack.
+ */
+
+ chanPtr = statePtr->topChanPtr;
encoding = statePtr->encoding;
- factor = UTF_EXPANSION_FACTOR;
+ factor = UTF_EXPANSION_FACTOR;
if (appendFlag == 0) {
if (encoding == NULL) {
@@ -7037,6 +7133,11 @@ CopyData(csPtr, mask)
int result = TCL_OK;
int size;
int total;
+ int sizeb;
+ Tcl_Obj* bufObj = NULL;
+ char* buffer;
+
+ int inBinary, outBinary, sameEncoding; /* Encoding control */
inChan = (Tcl_Channel) csPtr->readPtr;
outChan = (Tcl_Channel) csPtr->writePtr;
@@ -7053,8 +7154,16 @@ CopyData(csPtr, mask)
* thus gets the bottom of the stack.
*/
- while (csPtr->toRead != 0) {
+ inBinary = (inStatePtr->encoding == NULL);
+ outBinary = (outStatePtr->encoding == NULL);
+ sameEncoding = (inStatePtr->encoding == outStatePtr->encoding);
+
+ if (!(inBinary || sameEncoding)) {
+ bufObj = Tcl_NewObj ();
+ Tcl_IncrRefCount (bufObj);
+ }
+ while (csPtr->toRead != 0) {
/*
* Check for unreported background errors.
*/
@@ -7079,7 +7188,12 @@ CopyData(csPtr, mask)
} else {
size = csPtr->toRead;
}
- size = DoRead(inStatePtr->topChanPtr, csPtr->buffer, size);
+
+ if (inBinary || sameEncoding) {
+ size = DoRead(inStatePtr->topChanPtr, csPtr->buffer, size);
+ } else {
+ size = DoReadChars(inStatePtr->topChanPtr, bufObj, size, 0 /* No append */);
+ }
if (size < 0) {
readError:
@@ -7105,6 +7219,10 @@ CopyData(csPtr, mask)
Tcl_CreateChannelHandler(inChan, TCL_READABLE,
CopyEventProc, (ClientData) csPtr);
}
+ if (bufObj != (Tcl_Obj*) NULL) {
+ Tcl_DecrRefCount (bufObj);
+ bufObj = (Tcl_Obj*) NULL;
+ }
return TCL_OK;
}
@@ -7112,8 +7230,25 @@ CopyData(csPtr, mask)
* Now write the buffer out.
*/
- size = DoWrite(outStatePtr->topChanPtr, csPtr->buffer, size);
- if (size < 0) {
+ if (inBinary || sameEncoding) {
+ buffer = csPtr->buffer;
+ sizeb = size;
+ } else {
+ buffer = Tcl_GetStringFromObj (bufObj, &sizeb);
+ }
+
+ if (outBinary || sameEncoding) {
+ sizeb = DoWrite(outStatePtr->topChanPtr, buffer, sizeb);
+ } else {
+ sizeb = DoWriteChars(outStatePtr->topChanPtr, buffer, sizeb);
+ }
+
+ if (inBinary || sameEncoding) {
+ /* Both read and write counted bytes */
+ size = sizeb;
+ } /* else : Read counted characters, write counted bytes, i.e. size != sizeb */
+
+ if (sizeb < 0) {
writeError:
errObj = Tcl_NewObj();
Tcl_AppendStringsToObj(errObj, "error writing \"",
@@ -7148,6 +7283,10 @@ CopyData(csPtr, mask)
Tcl_CreateChannelHandler(outChan, TCL_WRITABLE,
CopyEventProc, (ClientData) csPtr);
}
+ if (bufObj != (Tcl_Obj*) NULL) {
+ Tcl_DecrRefCount (bufObj);
+ bufObj = (Tcl_Obj*) NULL;
+ }
return TCL_OK;
}
@@ -7166,8 +7305,17 @@ CopyData(csPtr, mask)
Tcl_CreateChannelHandler(outChan, TCL_WRITABLE,
CopyEventProc, (ClientData) csPtr);
}
+ if (bufObj != (Tcl_Obj*) NULL) {
+ Tcl_DecrRefCount (bufObj);
+ bufObj = (Tcl_Obj*) NULL;
+ }
return TCL_OK;
}
+ } /* while */
+
+ if (bufObj != (Tcl_Obj*) NULL) {
+ Tcl_DecrRefCount (bufObj);
+ bufObj = (Tcl_Obj*) NULL;
}
/*
@@ -7218,6 +7366,8 @@ CopyData(csPtr, mask)
*
* Reads a given number of bytes from a channel.
*
+ * No encoding conversions are applied to the bytes being read.
+ *
* Results:
* The number of characters read, or -1 on error. Use Tcl_GetErrno()
* to retrieve the error code for the error that occurred.
diff --git a/tests/io.test b/tests/io.test
index e3ac4a1..c193793 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -12,7 +12,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: io.test,v 1.16 2001/04/04 17:35:25 andreas_kupries Exp $
+# RCS: @(#) $Id: io.test,v 1.17 2001/05/19 16:59:04 andreas_kupries Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -6369,6 +6369,84 @@ test io-52.8 {TclCopyChannel} {stdio} {
list $s0 [file size test1]
} {40 40}
+# Empty files, to register them with the test facility
+makeFile {} kyrillic.txt
+makeFile {} utf8-fcopy.txt
+makeFile {} utf8-rp.txt
+
+# Create kyrillic file
+set out [open kyrillic.txt w]
+fconfigure $out -encoding koi8-r
+puts $out "\u0410\u0410"
+close $out
+
+test io-52.9 {TclCopyChannel & encodings} {
+ # Copy kyrillic to UTF-8, using fcopy.
+
+ set in [open kyrillic.txt r]
+ set out [open utf8-fcopy.txt w]
+
+ fconfigure $in -encoding koi8-r
+ fconfigure $out -encoding utf-8
+
+ fcopy $in $out
+ close $in
+ close $out
+
+ # Do the same again, but differently (read/puts).
+
+ set in [open kyrillic.txt r]
+ set out [open utf8-rp.txt w]
+
+ fconfigure $in -encoding koi8-r
+ fconfigure $out -encoding utf-8
+
+ puts -nonewline $out [read $in]
+
+ close $in
+ close $out
+
+ list \
+ [file size kyrillic.txt] \
+ [file size utf8-fcopy.txt] \
+ [file size utf8-rp.txt]
+} {3 5 5}
+
+test io-52.10 {TclCopyChannel & encodings} {
+ # encoding to binary (=> implies that the
+ # internal utf-8 is written)
+
+ set in [open kyrillic.txt r]
+ set out [open utf8-fcopy.txt w]
+
+ fconfigure $in -encoding koi8-r
+ fconfigure $out -encoding binary
+
+ fcopy $in $out
+ close $in
+ close $out
+
+ file size utf8-fcopy.txt
+} 5
+
+test io-52.11 {TclCopyChannel & encodings} {
+ # binary to encoding => the input has to be
+ # in utf-8 to make sense to the encoder
+
+ set in [open utf8-fcopy.txt r]
+ set out [open kyrillic.txt w]
+
+ fconfigure $in -encoding binary
+ fconfigure $out -encoding koi8-r
+
+ fcopy $in $out
+ close $in
+ close $out
+
+ file size kyrillic.txt
+} 3
+
+
test io-53.1 {CopyData} {
removeFile test1
set f1 [open $thisScript]