diff options
-rw-r--r-- | .github/workflows/mac-build.yml | 6 | ||||
-rw-r--r-- | .github/workflows/onefiledist.yml | 2 | ||||
-rw-r--r-- | doc/Encoding.3 | 70 | ||||
-rw-r--r-- | doc/chan.n | 10 | ||||
-rw-r--r-- | doc/encoding.n | 226 | ||||
-rw-r--r-- | doc/fconfigure.n | 35 | ||||
-rw-r--r-- | generic/tclEncoding.c | 6 | ||||
-rw-r--r-- | library/tcltest/tcltest.tcl | 5 | ||||
-rw-r--r-- | macosx/README | 4 | ||||
-rw-r--r-- | macosx/Tcl.xcodeproj/project.pbxproj | 10 | ||||
-rw-r--r-- | tests/ioTrans.test | 3 | ||||
-rwxr-xr-x | unix/configure | 24 | ||||
-rw-r--r-- | unix/tcl.m4 | 16 |
13 files changed, 257 insertions, 160 deletions
diff --git a/.github/workflows/mac-build.yml b/.github/workflows/mac-build.yml index 1ec784a..a9345a1 100644 --- a/.github/workflows/mac-build.yml +++ b/.github/workflows/mac-build.yml @@ -19,7 +19,7 @@ jobs: - name: Build run: make all env: - CFLAGS: -arch x86_64 -arch arm64e + CFLAGS: -arch x86_64 -arch arm64 - name: Run Tests run: make test styles=develop env: @@ -51,13 +51,13 @@ jobs: # Note that macOS is always a 64 bit platform run: ./configure --enable-dtrace --enable-framework ${CFGOPT} "--prefix=$HOME/install" || (cat config.log && exit 1) env: - CFLAGS: -arch x86_64 -arch arm64e + CFLAGS: -arch x86_64 -arch arm64 CFGOPT: ${{ matrix.cfgopt }} - name: Build run: | make all tcltest env: - CFLAGS: -arch x86_64 -arch arm64e + CFLAGS: -arch x86_64 -arch arm64 - name: Run Tests run: | make test diff --git a/.github/workflows/onefiledist.yml b/.github/workflows/onefiledist.yml index 45ce720..1f75762 100644 --- a/.github/workflows/onefiledist.yml +++ b/.github/workflows/onefiledist.yml @@ -62,7 +62,7 @@ jobs: sudo chmod a+x /usr/local/bin/macher echo "VER_PATH=$(cd tools; pwd)/addVerToFile.tcl" >> $GITHUB_ENV echo "CREATE_DMG=$(cd create-dmg;pwd)/create-dmg" >> $GITHUB_ENV - echo "CFLAGS=-arch x86_64 -arch arm64e" >> $GITHUB_ENV + echo "CFLAGS=-arch x86_64 -arch arm64" >> $GITHUB_ENV - name: Configure run: ./configure --disable-symbols --disable-shared --enable-zipfs working-directory: unix diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 93f389a..1f0dbdf 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -8,7 +8,7 @@ .so man.macros .BS .SH NAME -Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath \- procedures for creating and using encodings +Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_UtfToExternalDStringEx, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternalDStringEx, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath \- procedures for creating and using encodings .SH SYNOPSIS .nf \fB#include <tcl.h>\fR @@ -26,13 +26,13 @@ char * \fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp size_t -\fBTcl_ExternalToUtfDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) +\fBTcl_ExternalToUtfDStringEx\fR(\fIinterp, encoding, src, srcLen, flags, dstPtr, errorIdxPtr\fR) .sp char * \fBTcl_UtfToExternalDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp size_t -\fBTcl_UtfToExternalDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) +\fBTcl_UtfToExternalDStringEx\fR(\fIinterp, encoding, src, srcLen, flags, dstPtr, errorIdxPtr\fR) .sp int \fBTcl_ExternalToUtf\fR(\fIinterp, encoding, src, srcLen, flags, statePtr, @@ -93,7 +93,7 @@ encoding-specific length of the string is used. Pointer to an uninitialized or free \fBTcl_DString\fR in which the converted result will be stored. .AP int flags in -Various flag bits OR-ed together. +This is a bit mask passed in to control the operation of the encoding functions. \fBTCL_ENCODING_START\fR signifies that the source buffer is the first block in a (potentially multi-block) input stream, telling the conversion routine to reset to an initial state and @@ -101,9 +101,15 @@ perform any initialization that needs to occur before the first byte is converted. \fBTCL_ENCODING_END\fR signifies that the source buffer is the last block in a (potentially multi-block) input stream, telling the conversion routine to perform any finalization that needs to occur after the last -byte is converted and then to reset to an initial state. +byte is converted and then to reset to an initial state. The +\fBTCL_PROFILE_*\fR bits defined in the \fBPROFILES\fR section below +control the encoding profile to be used for dealing with invalid data or +other errors in the encoding transform. The flag \fBTCL_ENCODING_STOPONERROR\fR has no effect, it only has meaning in Tcl 8.x. +.PP +Some flags bits may not be usable with some functions as noted in the +function descriptions below. .AP Tcl_EncodingState *statePtr in/out Used when converting a (generally long or indefinite length) byte stream in a piece-by-piece fashion. The conversion routine stores its current @@ -129,6 +135,9 @@ buffer as a result of the conversion. May be NULL. .AP int *dstCharsPtr out Filled with the number of characters that correspond to the number of bytes stored in the output buffer. May be NULL. +.AP Tcl_Size *errorIdxPtr out +Filled with the index of the byte or character that caused the encoding transform +to fail. May be NULL. .AP Tcl_DString *bufPtr out Storage for the prescribed system encoding name. .AP "const Tcl_EncodingType" *typePtr in @@ -203,10 +212,27 @@ When converting, if any of the characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString. .PP -\fBTcl_ExternalToUtfDStringEx\fR is the same as \fBTcl_ExternalToUtfDString\fR, -but it has an additional flags parameter. The return value is the index of -the first byte in the input string causing a conversion error. -Or TCL_INDEX_NONE if all is OK. +\fBTcl_ExternalToUtfDStringEx\fR is a more flexible version of older +\fBTcl_ExternalToUtfDString\fR function. It takes three additional parameters, +\fBinterp\fR, \fBflags\fR and \fBerrorIdxPtr\fR. The \fBflags\fR parameter may +be used to specify the profile to be used for the transform. The +\fBTCL_ENCODING_START\fR and \fBTCL_ENCODING_END\fR bits in \fBflags\fR are +ignored as the function assumes the entire source string to be decoded is passed +into the function. On success, the function returns \fBTCL_ERROR\fR with the +converted string stored in \fB*dstPtr\fR. For errors other than conversion +errors, such as invalid flags, the function returns \fBTCL_OK\fR with an error +message in \fBinterp\fR if it is not NULL. +.PP +For conversion errors, \fBTcl_ExternalToUtfDStringEx\fR returns one +of the \fBTCL_CONVERT_*\fR errors listed below for \fBTcl_ExternalToUtf\fR. +When one of these conversion errors is returned, an error message is +stored in \fBinterp\fR only if \fBerrorIdxPtr\fR is NULL. Otherwise, no error message +is stored as the function expects the caller is interested whatever is +decoded to that point and not treating this as an immediate error condition. +The index of the error location is stored in \fB*errorIdxPtr\fR. +.PP +The caller must call \fBTcl_DStringFree\fR to free up the \fB*dstPtr\fR resources +irrespective of the return value from the function. .PP \fBTcl_ExternalToUtf\fR converts a source buffer \fIsrc\fR from the specified \fIencoding\fR into UTF-8. Up to \fIsrcLen\fR bytes are converted from the @@ -229,7 +255,7 @@ the unconverted bytes that remained in \fIsrc\fR plus some further bytes from the source stream to properly convert the formerly split-up multibyte sequence. .IP \fBTCL_CONVERT_SYNTAX\fR 29 -The source buffer contained an invalid character sequence. This may occur +The source buffer contained an invalid byte or character sequence. This may occur if the input stream has been damaged or if the input encoding method was misidentified. .IP \fBTCL_CONVERT_UNKNOWN\fR 29 @@ -246,10 +272,14 @@ characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString. .PP -\fBTcl_UtfToExternalDStringEx\fR is the same as \fBTcl_UtfToExternalDString\fR, -but it has an additional flags parameter. The return value is the index of -the first byte of an utf-8 byte-sequence in the input string causing a -conversion error. Or TCL_INDEX_NONE if all is OK. +\fBTcl_UtfToExternalDStringEx\fR is an enhanced version of +\fBTcl_UtfToExternalDString\fR that transforms UTF-8 encoded source data to a specified +\fIencoding\fR. Except for the direction of the transform, the parameters and +return values are identical to those of \fBTcl_ExternalToUtfDStringEx\fR. See +that function above for details about the same. + +Irrespective of the return code from the function, the caller must free +resources associated with \fB*dstPtr\fR when the function returns. .PP \fBTcl_UtfToExternal\fR converts a source buffer \fIsrc\fR from UTF-8 into the specified \fIencoding\fR. Up to \fIsrcLen\fR bytes are converted from @@ -552,6 +582,18 @@ to the object, it will be deleted. .PP \fBTcl_GetEncodingSearchPath\fR returns an object with a reference count of at least 1. +.SH "PROFILES" +Encoding profiles define the manner in which errors in the encoding transforms +are handled by the encoding functions. An application can specify the profile +to be used by OR-ing the \fBflags\fR parameter passed to the function +with at most one of \fBTCL_ENCODING_PROFILE_TCL8\fR, +\fBTCL_ENCODING_PROFILE_STRICT\fR or \fBTCL_ENCODING_PROFILE_REPLACE\fR. +These correspond to the \fBtcl8\fR, \fBstrict\fR and \fBreplace\fR profiles +respectively. If none are specified, a version-dependent default profile is used. +For Tcl 8.7, the default profile is \fBtcl8\fR. +.PP +For details about profiles, see the \fBPROFILES\fR section in +the documentation of the \fBencoding\fR command. .SH "SEE ALSO" encoding(n) .SH KEYWORDS @@ -150,6 +150,16 @@ the end of the data. The default value is the empty string. The acceptable range is \ex01 - \ex7f. A value outside this range results in an error. +.VS "TCL8.7 TIP656" +.TP +\fB\-profile\fR \fIprofile\fR +. +Specifies the encoding profile to be used on the channel. The encoding +transforms in use for the channel's input and output will then be subject to the +rules of that profile. Any failures will result in a channel error. See +\fBPROFILES\fR in the \fBencoding(n)\fR documentation for details about encoding +profiles. +.VE "TCL8.7 TIP656" .TP \fB\-translation\fR \fItranslation\fR .TP diff --git a/doc/encoding.n b/doc/encoding.n index 214ce19..8ede974 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -28,69 +28,41 @@ formats. Performs one of several encoding related operations, depending on \fIoption\fR. The legal \fIoption\fRs are: .TP -\fBencoding convertfrom\fR ?\fB-strict\fR? ?\fB-failindex var\fR? ?\fIencoding\fR? \fIdata\fR -\fBencoding convertfrom\fR \fB-nocomplain\fR ?\fIencoding\fR? \fIdata\fR +\fBencoding convertfrom\fR ?\fIencoding\fR? \fIdata\fR +.TP +\fBencoding convertfrom\fR ?\fB-profile \fIprofile\fR? ?\fB-failindex var\fR? \fIencoding\fR \fIdata\fR . -Convert \fIdata\fR to a Unicode string from the specified \fIencoding\fR. The -characters in \fIdata\fR are 8 bit binary data. The resulting -sequence of bytes is a string created by applying the given \fIencoding\fR -to the data. If \fIencoding\fR is not specified, the current +Converts \fIdata\fR, which should be in binary string encoded as per +\fIencoding\fR, to a Tcl string. If \fIencoding\fR is not specified, the current system encoding is used. -.VS "TCL8.7 TIP346, TIP607, TIP601" -.PP -.RS -If the option \fB-nocomplain\fR is given, the command does not fail on -encoding errors. Instead, any not convertable bytes (like incomplete UTF-8 - sequences, see example below) are put as byte values into the output stream. -If the option \fB-nocomplain\fR is not given, the command will fail with an -appropriate error message. -.PP -If the option \fB-failindex\fR with a variable name is given, the error reporting -is changed in the following manner: -in case of a conversion error, the position of the input byte causing the error -is returned in the given variable. The return value of the command are the -converted characters until the first error position. No error condition is raised. -In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR. -.PP -The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR -encoder, it disallows invalid byte sequences and surrogates (which - -otherwise - are just passed through). This option may not be used together -with \fB-nocomplain\fR. -.VE "TCL8.7 TIP346, TIP607, TIP601" -.RE + +.VS "TCL8.7 TIP607, TIP656" +The \fB-profile\fR option determines the command behavior in the presence +of conversion errors. See the \fBPROFILES\fR section below for details. Any premature +termination of processing due to errors is reported through an exception if +the \fB-failindex\fR option is not specified. + +If the \fB-failindex\fR is specified, instead of an exception being raised +on premature termination, the result of the conversion up to the point of the +error is returned as the result of the command. In addition, the index +of the source byte triggering the error is stored in \fBvar\fR. If no +errors are encountered, the entire result of the conversion is returned and +the value \fB-1\fR is stored in \fBvar\fR. +.VE "TCL8.7 TIP607, TIP656" +.TP +\fBencoding convertto\fR ?\fIencoding\fR? \fIdata\fR .TP -\fBencoding convertto\fR ?\fB-strict\fR? ?\fB-failindex var\fR? ?\fIencoding\fR? \fIdata\fR -\fBencoding convertto\fR \fB-nocomplain\fR ?\fIencoding\fR? \fIdata\fR +\fBencoding convertto\fR ?\fB-profile \fIprofile\fR? ?\fB-failindex var\fR? \fIencoding\fR \fIdata\fR . -Convert \fIstring\fR from Unicode to the specified \fIencoding\fR. -The result is a sequence of bytes that represents the converted -string. Each byte is stored in the lower 8-bits of a Unicode -character (indeed, the resulting string is a binary string as far as -Tcl is concerned, at least initially). If \fIencoding\fR is not -specified, the current system encoding is used. -.VS "TCL8.7 TIP346, TIP607, TIP601" -.PP -.RS -If the option \fB-nocomplain\fR is given, the command does not fail on -encoding errors. Instead, the replacement character \fB?\fR is output -for any not representable character (like the dot \fB\\U2022\fR -in \fBiso-8859-1\fR encoding, see example below). -If the option \fB-nocomplain\fR is not given, the command will fail with an -appropriate error message. -.PP -If the option \fB-failindex\fR with a variable name is given, the error reporting -is changed in the following manner: -in case of a conversion error, the position of the input character causing the error -is returned in the given variable. The return value of the command are the -converted bytes until the first error position. No error condition is raised. -In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR. -.PP -The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR -encoder, it has no effect. This option may not be used together with \fB-nocomplain\fR. -.VE "TCL8.7 TIP346, TIP607, TIP601" -.RE +Convert \fIstring\fR to the specified \fIencoding\fR. The result is a Tcl binary +string that contains the sequence of bytes representing the converted string in +the specified encoding. If \fIencoding\fR is not specified, the current system +encoding is used. + +.VS "TCL8.7 TIP607, TIP656" +The \fB-profile\fR and \fB-failindex\fR options have the same effect as +described for the \fBencoding convertfrom\fR command. +.VE "TCL8.7 TIP607, TIP656" .TP \fBencoding dirs\fR ?\fIdirectoryList\fR? . @@ -113,58 +85,146 @@ The encodings and .QW iso8859-1 are guaranteed to be present in the list. +.VS "TCL8.7 TIP656" +.TP +\fBencoding profiles\fR +Returns a list of the names of encoding profiles. See \fBPROFILES\fR below. +.VE "TCL8.7 TIP656" .TP \fBencoding system\fR ?\fIencoding\fR? . Set the system encoding to \fIencoding\fR. If \fIencoding\fR is omitted then the command returns the current system encoding. The system encoding is used whenever Tcl passes strings to system calls. -.SH EXAMPLE +\" Do not put .VS on whole section as that messes up the bullet list alignment +.SH PROFILES +.PP +.VS "TCL8.7 TIP656" +Operations involving encoding transforms may encounter several types of +errors such as invalid sequences in the source data, characters that +cannot be encoded in the target encoding and so on. +A \fIprofile\fR prescribes the strategy for dealing with such errors +in one of two ways: +.VE "TCL8.7 TIP656" +. +.IP \(bu +.VS "TCL8.7 TIP656" +Terminating further processing of the source data. The profile does not +determine how this premature termination is conveyed to the caller. By default, +this is signalled by raising an exception. If the \fB-failindex\fR option +is specified, errors are reported through that mechanism. +.VE "TCL8.7 TIP656" +.IP \(bu +.VS "TCL8.7 TIP656" +Continue further processing of the source data using a fallback strategy such +as replacing or discarding the offending bytes in a profile-defined manner. +.VE "TCL8.7 TIP656" +.PP +The following profiles are currently implemented with \fBtcl8\fR being +the default if the \fB-profile\fR is not specified. +.VS "TCL8.7 TIP656" +.TP +\fBtcl8\fR +. +The \fBtcl8\fR profile always follows the first strategy above and corresponds +to the behavior of encoding transforms in Tcl 8.6. When converting from an +external encoding \fBother than utf-8\fR to Tcl strings with the \fBencoding +convertfrom\fR command, invalid bytes are mapped to their numerically equivalent +code points. For example, the byte 0x80 which is invalid in ASCII would be +mapped to code point U+0080. When converting from \fButf-8\fR, invalid bytes +that are defined in CP1252 are mapped to their Unicode equivalents while those +that are not fall back to the numerical equivalents. For example, byte 0x80 is +defined by CP1252 and is therefore mapped to its Unicode equivalent U+20AC while +byte 0x81 which is not defined by CP1252 is mapped to U+0081. As an additional +special case, the sequence 0xC0 0x80 is mapped to U+0000. + +When converting from Tcl strings to an external encoding format using +\fBencoding convertto\fR, characters that cannot be represented in the +target encoding are replaced by an encoding-dependent character, usually +the question mark \fB?\fR. +.TP +\fBstrict\fR +. +The \fBstrict\fR profile always stops processing when an conversion error is +encountered. The error is signalled via an exception or the \fB-failindex\fR +option mechanism. The \fBstrict\fR profile implements a Unicode standard +conformant behavior. +.TP +\fBreplace\fR +. +Like the \fBtcl8\fR profile, the \fBreplace\fR profile always continues +processing on conversion errors but follows a Unicode standard conformant +method for substitution of invalid source data. + +When converting an encoded byte sequence to a Tcl string using +\fBencoding convertfrom\fR, invalid bytes +are replaced by the U+FFFD REPLACEMENT CHARACTER code point. + +When encoding a Tcl string with \fBencoding convertto\fR, +code points that cannot be represented in the +target encoding are transformed to an encoding-specific fallback character, +U+FFFD REPLACEMENT CHARACTER for UTF targets and generally `?` for other +encodings. +.VE "TCL8.7 TIP656" +.SH EXAMPLES +.PP +These examples use the utility proc below that prints the Unicode code points +comprising a Tcl string. +.PP +.CS +proc codepoints {s} {join [lmap c [split $s ""] { + string cat U+ [format %.6X [scan $c %c]]}] +} +.CE .PP Example 1: convert a byte sequence in Japanese euc-jp encoding to a TCL string: .PP .CS -set s [\fBencoding convertfrom\fR euc-jp "\exA4\exCF"] +% codepoints [\fBencoding convertfrom\fR euc-jp "\exA4\exCF"] +U+00306F .CE .PP -The result is the unicode codepoint: +The result is the unicode codepoint .QW "\eu306F" , which is the Hiragana letter HA. -.VS "TCL8.7 TIP346, TIP607, TIP601" +.VS "TCL8.7 TIP607, TIP656" .PP -Example 2: detect the error location in an incomplete UTF-8 sequence: +Example 2: Error handling based on profiles: .PP +The letter \fBA\fR is Unicode character U+0041 and the byte "\ex80" is invalid +in ASCII encoding. .CS -% set s [\fBencoding convertfrom\fR -failindex i utf-8 "A\exC3"] -A -% set i -1 -.CE -.PP -Example 3: return the incomplete UTF-8 sequence by raw bytes: .PP -.CS -% set s [\fBencoding convertfrom\fR -nocomplain utf-8 "A\exC3"] +% codepoints [encoding convertfrom -profile tcl8 ascii A\ex80] +U+000041 U+000080 +% codepoints [encoding convertfrom -profile replace ascii A\ex80] +U+000041 U+00FFFD +% codepoints [encoding convertfrom -profile strict ascii A\ex80] +unexpected byte sequence starting at index 1: '\ex80' .CE -The result is "A" followed by the byte \exC3. .PP -Example 4: detect the error location while transforming to ISO8859-1 -(ISO-Latin 1): +Example 3: Get partial data and the error location: .PP .CS -% set s [\fBencoding convertto\fR -failindex i iso8859-1 "A\eu0141"] -A -% set i -1 +% codepoints [encoding convertfrom -profile strict -failindex idx ascii AB\ex80] +U+000041 U+000042 +% set idx +2 .CE .PP -Example 5: replace a not representable character by the replacement character: +Example 4: Encode a character that is not representable in ISO8859-1: .PP .CS -% set s [\fBencoding convertto\fR -nocomplain iso8859-1 "A\eu0141"] +% encoding convertto iso8859-1 A\eu0141 A? +% encoding convertto -profile strict iso8859-1 A\eu0141 +unexpected character at index 1: 'U+000141' +% encoding convertto -profile strict -failindex idx iso8859-1 A\eu0141 +A +% set idx +1 .CE -.VE "TCL8.7 TIP346, TIP607, TIP601" +.VE "TCL8.7 TIP607, TIP656" .PP .SH "SEE ALSO" Tcl_GetEncoding(3), fconfigure(n) diff --git a/doc/fconfigure.n b/doc/fconfigure.n index 912702f..c2847cd 100644 --- a/doc/fconfigure.n +++ b/doc/fconfigure.n @@ -112,33 +112,16 @@ string. The acceptable range for \fB\-eofchar\fR values is \ex01 - \ex7F; attempting to set \fB\-eofchar\fR to a value outside of this range will generate an error. -.VS "TCL9.0 TIP633" +.VS "TCL8.7 TIP656" .TP -\fB\-nocomplainencoding\fR \fIbool\fR +\fB\-profile\fR \fIprofile\fR . -Reporting mode of encoding errors. -If set to a \fItrue\fR value, encoding errors are resolved by a replacement -character (output) or verbatim bytes (input). No error is thrown. -If set to a \fIfalse\fR value, errors are thrown in case of encoding errors. -.RS -.PP -The default value is \fIfalse\fR starting from TCL 9.0 and \fItrue\fR on TCL 8.7. -This option was introduced with TCL 8.7 and has the fix value \fItrue\fR. -.PP -See the \fI\-nocomplain\fR option of the \fBencoding\fR command for more information. -.RE -.VE "TCL9.0 TIP633" -.VS "TCL8.7 TIP346" -.TP -\fB\-strictencoding\fR \fIbool\fR -. -Activate additional stricter encoding application rules. -Default value is \fIfalse\fR. -.RS -.PP -See the \fI\-strict\fR option of the \fBencoding\fR command for more information. -.VE "TCL8.7 TIP346" -.RE +Specifies the encoding profile to be used on the channel. The encoding +transforms in use for the channel's input and output will then be subject to the +rules of that profile. Any failures will result in a channel error. See +\fBPROFILES\fR in the \fBencoding(n)\fR documentation for details about encoding +profiles. +.VE "TCL8.7 TIP656" .TP \fB\-translation\fR \fImode\fR .TP @@ -299,7 +282,7 @@ close(n), encoding(n), flush(n), gets(n), open(n), puts(n), read(n), socket(n), Tcl_StandardChannels(3) .SH KEYWORDS blocking, buffering, carriage return, end of line, encoding, flushing, linemode, -newline, nonblocking, platform, translation, encoding, filter, byte array, +newline, nonblocking, platform, profile, translation, encoding, filter, byte array, binary '\" Local Variables: '\" mode: nroff diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 5b68adf..17f7589 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2806,7 +2806,7 @@ Utf32ToUtfProc( * unsigned short-size data. */ - if ((ch > 0) && (ch < 0x80)) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); @@ -3036,7 +3036,7 @@ Utf16ToUtfProc( } if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_UNKNOWN; + result = TCL_CONVERT_SYNTAX; src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ numChars--; @@ -3087,7 +3087,7 @@ Utf16ToUtfProc( if ((ch & ~0x3FF) == 0xD800) { if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_UNKNOWN; + result = TCL_CONVERT_SYNTAX; src -= 2; dst--; numChars--; diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index a77a108..278a4e0 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -1134,7 +1134,6 @@ proc tcltest::SafeFetch {n1 n2 op} { } } - # tcltest::Asciify -- # # Transforms the passed string to contain only printable ascii characters. @@ -1156,9 +1155,9 @@ proc tcltest::Asciify {s} { set i [scan $c %c] if {[string is print $c] && ($i <= 127)} { append print $c - } elseif {$i <= 0xff} { + } elseif {$i <= 0xFF} { append print \\x[format %02X $i] - } elseif {$i <= 0xffff} { + } elseif {$i <= 0xFFFF} { append print \\u[format %04X $i] } else { append print \\U[format %08X $i] diff --git a/macosx/README b/macosx/README index c1f9e87..ee1b7ea 100644 --- a/macosx/README +++ b/macosx/README @@ -113,7 +113,7 @@ your ${USER}.pbxuser file (located inside the Tcl.xcodeproj bundle directory) with a text editor. - To build universal binaries outside of the Xcode IDE, set CFLAGS as follows: - export CFLAGS="-arch x86_64 -arch arm64e" + export CFLAGS="-arch x86_64 -arch arm64" This requires Mac OS X 10.6 and Xcode 10.2 and will work on any architecture. Note that configure requires CFLAGS to contain a least one architecture that can be run on the build machine (i.e. x86_64 on Core2/Xeon). @@ -133,7 +133,7 @@ If you are building from CVS, omit this step (CVS source tree names usually do not contain a version number). - Setup environment variables as desired, e.g. for a universal build on 10.5: - CFLAGS="-arch x86_64 -arch arm64e -mmacosx-version-min=10.5" + CFLAGS="-arch x86_64 -arch arm64 -mmacosx-version-min=10.5" export CFLAGS - Change to the directory containing the Tcl source tree and build: diff --git a/macosx/Tcl.xcodeproj/project.pbxproj b/macosx/Tcl.xcodeproj/project.pbxproj index 872a8cd..fc00f74 100644 --- a/macosx/Tcl.xcodeproj/project.pbxproj +++ b/macosx/Tcl.xcodeproj/project.pbxproj @@ -2122,7 +2122,7 @@ baseConfigurationReference = F97AE82B0B65C69B00310EA2 /* Tcl-Release.xcconfig */; buildSettings = { ARCHS = "$(ARCHS_STANDARD_64_BIT)"; - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; MACOSX_DEPLOYMENT_TARGET = 10.6; PREBINDING = NO; }; @@ -2507,7 +2507,7 @@ baseConfigurationReference = F97AE82B0B65C69B00310EA2 /* Tcl-Release.xcconfig */; buildSettings = { ARCHS = "$(ARCHS_STANDARD_64_BIT)"; - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; GCC_VERSION = 4.0; MACOSX_DEPLOYMENT_TARGET = 10.6; PREBINDING = NO; @@ -2545,7 +2545,7 @@ baseConfigurationReference = F97AE82B0B65C69B00310EA2 /* Tcl-Release.xcconfig */; buildSettings = { ARCHS = "$(ARCHS_STANDARD_64_BIT)"; - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; DEBUG_INFORMATION_FORMAT = dwarf; GCC = "llvm-gcc"; GCC_OPTIMIZATION_LEVEL = 4; @@ -2685,7 +2685,7 @@ ARCHS = ( "$(NATIVE_ARCH_64_BIT)", ); - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; DEBUG_INFORMATION_FORMAT = dwarf; GCC = clang; GCC_OPTIMIZATION_LEVEL = 4; @@ -2752,7 +2752,7 @@ baseConfigurationReference = F97AE82B0B65C69B00310EA2 /* Tcl-Release.xcconfig */; buildSettings = { ARCHS = "$(ARCHS_STANDARD_64_BIT)"; - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; CPPFLAGS = "-isysroot $(SDKROOT) $(CPPFLAGS)"; MACOSX_DEPLOYMENT_TARGET = 10.5; PREBINDING = NO; diff --git a/tests/ioTrans.test b/tests/ioTrans.test index 9639576..9f6542c 100644 --- a/tests/ioTrans.test +++ b/tests/ioTrans.test @@ -643,6 +643,9 @@ namespace eval reflector { proc finalize {_ chan} { + foreach id [after info] { + after cancel $id + } namespace delete $_ } diff --git a/unix/configure b/unix/configure index 6a80e5c..3734dc3 100755 --- a/unix/configure +++ b/unix/configure @@ -6506,16 +6506,16 @@ then : do64bit_ok=yes fi;; - arm64|arm64e) - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if compiler accepts -arch arm64e flag" >&5 -printf %s "checking if compiler accepts -arch arm64e flag... " >&6; } -if test ${tcl_cv_cc_arch_arm64e+y} + arm64) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if compiler accepts -arch arm64 flag" >&5 +printf %s "checking if compiler accepts -arch arm64 flag... " >&6; } +if test ${tcl_cv_cc_arch_arm64+y} then : printf %s "(cached) " >&6 else $as_nop hold_cflags=$CFLAGS - CFLAGS="$CFLAGS -arch arm64e" + CFLAGS="$CFLAGS -arch arm64" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -6529,20 +6529,20 @@ main (void) _ACEOF if ac_fn_c_try_link "$LINENO" then : - tcl_cv_cc_arch_arm64e=yes + tcl_cv_cc_arch_arm64=yes else $as_nop - tcl_cv_cc_arch_arm64e=no + tcl_cv_cc_arch_arm64=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext CFLAGS=$hold_cflags fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $tcl_cv_cc_arch_arm64e" >&5 -printf "%s\n" "$tcl_cv_cc_arch_arm64e" >&6; } - if test $tcl_cv_cc_arch_arm64e = yes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $tcl_cv_cc_arch_arm64" >&5 +printf "%s\n" "$tcl_cv_cc_arch_arm64" >&6; } + if test $tcl_cv_cc_arch_arm64 = yes then : - CFLAGS="$CFLAGS -arch arm64e" + CFLAGS="$CFLAGS -arch arm64" do64bit_ok=yes fi;; @@ -6554,7 +6554,7 @@ printf "%s\n" "$as_me: WARNING: Don't know how enable 64-bit on architecture \`a else $as_nop # Check for combined 32-bit and 64-bit fat build - if echo "$CFLAGS " |grep -E -q -- '-arch (ppc64|x86_64|arm64e) ' \ + if echo "$CFLAGS " |grep -E -q -- '-arch (ppc64|x86_64|arm64) ' \ && echo "$CFLAGS " |grep -E -q -- '-arch (ppc|i386) ' then : diff --git a/unix/tcl.m4 b/unix/tcl.m4 index 17d3b06..cf9f711 100644 --- a/unix/tcl.m4 +++ b/unix/tcl.m4 @@ -1406,16 +1406,16 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ CFLAGS="$CFLAGS -arch x86_64" do64bit_ok=yes ]);; - arm64|arm64e) - AC_CACHE_CHECK([if compiler accepts -arch arm64e flag], - tcl_cv_cc_arch_arm64e, [ + arm64) + AC_CACHE_CHECK([if compiler accepts -arch arm64 flag], + tcl_cv_cc_arch_arm64, [ hold_cflags=$CFLAGS - CFLAGS="$CFLAGS -arch arm64e" + CFLAGS="$CFLAGS -arch arm64" AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[]])], - [tcl_cv_cc_arch_arm64e=yes],[tcl_cv_cc_arch_arm64e=no]) + [tcl_cv_cc_arch_arm64=yes],[tcl_cv_cc_arch_arm64=no]) CFLAGS=$hold_cflags]) - AS_IF([test $tcl_cv_cc_arch_arm64e = yes], [ - CFLAGS="$CFLAGS -arch arm64e" + AS_IF([test $tcl_cv_cc_arch_arm64 = yes], [ + CFLAGS="$CFLAGS -arch arm64" do64bit_ok=yes ]);; *) @@ -1423,7 +1423,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ esac ], [ # Check for combined 32-bit and 64-bit fat build - AS_IF([echo "$CFLAGS " |grep -E -q -- '-arch (ppc64|x86_64|arm64e) ' \ + AS_IF([echo "$CFLAGS " |grep -E -q -- '-arch (ppc64|x86_64|arm64) ' \ && echo "$CFLAGS " |grep -E -q -- '-arch (ppc|i386) '], [ fat_32_64=yes]) ]) |