From a09069115b1c5707f85d873526d0d7c9b2ace1fb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Feb 2012 15:21:27 +0000 Subject: [Bug 3466099] BOM in Unicode --- ChangeLog | 6 ++++++ generic/tclEncoding.c | 4 ++-- generic/tclIOUtil.c | 16 ++++++++++++---- tests/source.test | 15 +++++++++++++++ 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0dbd68c..2031fb5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2012-02-?? Jan Nijtmans + + * generic/tclIOUtil.c: [Bug bug-3466099] BOM in Unicode + * generic/tclEncoding.c: + * tests/source.test + 2012-02-09 Don Porter * generic/tclStringObj.c: [Bug 3484402] Correct Off-By-One diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 9432c05..a4a7027 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -764,13 +764,13 @@ Tcl_GetEncodingNames(interp) int Tcl_SetSystemEncoding(interp, name) Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ - CONST char *name; /* The name of the desired encoding, or NULL + CONST char *name; /* The name of the desired encoding, or NULL/"" * to reset to default encoding. */ { Tcl_Encoding encoding; Encoding *encodingPtr; - if (name == NULL) { + if (!name || !*name) { Tcl_MutexLock(&encodingMutex); encoding = defaultEncoding; encodingPtr = (Encoding *) encoding; diff --git a/generic/tclIOUtil.c b/generic/tclIOUtil.c index b54b76b..d5bb102 100644 --- a/generic/tclIOUtil.c +++ b/generic/tclIOUtil.c @@ -1755,11 +1755,19 @@ Tcl_FSEvalFile(interp, pathPtr) * [Bug: 2040] */ Tcl_SetChannelOption(interp, chan, "-eofchar", "\32"); - if (Tcl_ReadChars(chan, objPtr, -1, 0) < 0) { - Tcl_Close(interp, chan); + /* Try to read utf-8 BOM, if available */ + if (Tcl_ReadChars(chan, objPtr, 1, 0) < 0) { + Tcl_Close(interp, chan); Tcl_AppendResult(interp, "couldn't read file \"", - Tcl_GetString(pathPtr), - "\": ", Tcl_PosixError(interp), (char *) NULL); + Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); + goto end; + } + string = Tcl_GetString(objPtr); + if (Tcl_ReadChars(chan, objPtr, -1, + memcmp(string, "\xef\xbf\xbe", 3)) < 0) { + Tcl_Close(interp, chan); + Tcl_AppendResult(interp, "couldn't read file \"", + Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); goto end; } if (Tcl_Close(interp, chan) != TCL_OK) { diff --git a/tests/source.test b/tests/source.test index 3a1454c..5774a97 100644 --- a/tests/source.test +++ b/tests/source.test @@ -118,6 +118,21 @@ test source-2.6 {source error conditions} -setup { {couldn't read file "*_non_existent_": no such file or directory} \ {POSIX ENOENT {no such file or directory}}] +test source-2.7 {utf-8 with BOM} -setup { + set sourcefile [makeFile {} source.file] + set saveencoding [encoding system] + encoding system utf-8 + set out [open $sourcefile w] + puts $out "\ufffeset y new-y" + close $out +} -body { + set y old-y + source $sourcefile + set y +} -cleanup { + removeFile source.file + encoding system $saveencoding +} -result {new-y} test source-3.1 {return in middle of source file} -setup { set sourcefile [makeFile { -- cgit v0.12