diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2012-02-29 21:56:20 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2012-02-29 21:56:20 (GMT) |
commit | 336410f92239eecba4d739de6e5b67cf02990694 (patch) | |
tree | 982b5959dd17b61c8f3b21f473dfbe426b40cfb6 | |
parent | f3bdd208ac10cfe9a475b0689677acd542debee2 (diff) | |
parent | 6ad807858b1217d40a9e5a2c1d5bf241625971ca (diff) | |
download | tcl-336410f92239eecba4d739de6e5b67cf02990694.zip tcl-336410f92239eecba4d739de6e5b67cf02990694.tar.gz tcl-336410f92239eecba4d739de6e5b67cf02990694.tar.bz2 |
[Bug 3466099] BOM in Unicode
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | generic/tclEncoding.c | 4 | ||||
-rw-r--r-- | generic/tclIOUtil.c | 36 | ||||
-rw-r--r-- | tests/source.test | 13 |
4 files changed, 55 insertions, 4 deletions
@@ -1,3 +1,9 @@ +2012-02-29 Jan Nijtmans <nijtmans@users.sf.net> + + * generic/tclIOUtil.c: [Bug 3466099] BOM in Unicode + * generic/tclEncoding.c: + * tests/source.test + 2012-02-23 Donal K. Fellows <dkf@users.sf.net> * tests/reg.test (14.21-23): Add tests relating to bug 1115587. Actual diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 15411d8..49418c9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -979,13 +979,13 @@ Tcl_GetEncodingNames( int Tcl_SetSystemEncoding( Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ - const char *name) /* The name of the desired encoding, or NULL + const char *name) /* The name of the desired encoding, or NULL/"" * to reset to default encoding. */ { Tcl_Encoding encoding; Encoding *encodingPtr; - if (name == NULL) { + if (!name || !*name) { Tcl_MutexLock(&encodingMutex); encoding = defaultEncoding; encodingPtr = (Encoding *) encoding; diff --git a/generic/tclIOUtil.c b/generic/tclIOUtil.c index fa616b3..62553f2 100644 --- a/generic/tclIOUtil.c +++ b/generic/tclIOUtil.c @@ -1729,7 +1729,22 @@ Tcl_FSEvalFileEx( objPtr = Tcl_NewObj(); Tcl_IncrRefCount(objPtr); - if (Tcl_ReadChars(chan, objPtr, -1, 0) < 0) { + /* Try to read first character of stream, so we can + * check for utf-8 BOM to be handled especially. + */ + if (Tcl_ReadChars(chan, objPtr, 1, 0) < 0) { + Tcl_Close(interp, chan); + Tcl_AppendResult(interp, "couldn't read file \"", + Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); + goto end; + } + string = Tcl_GetString(objPtr); + /* + * If first character is not a BOM, append the remaining characters, + * otherwise replace them [Bug 3466099]. + */ + if (Tcl_ReadChars(chan, objPtr, -1, + memcmp(string, "\xef\xbf\xbe", 3)) < 0) { Tcl_Close(interp, chan); Tcl_AppendResult(interp, "couldn't read file \"", Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); @@ -1798,6 +1813,7 @@ TclNREvalFile( Tcl_Obj *oldScriptFile, *objPtr; Interp *iPtr; Tcl_Channel chan; + const char *string; if (Tcl_FSGetNormalizedPath(interp, pathPtr) == NULL) { return TCL_ERROR; @@ -1839,13 +1855,29 @@ TclNREvalFile( objPtr = Tcl_NewObj(); Tcl_IncrRefCount(objPtr); - if (Tcl_ReadChars(chan, objPtr, -1, 0) < 0) { + /* Try to read first character of stream, so we can + * check for utf-8 BOM to be handled especially. + */ + if (Tcl_ReadChars(chan, objPtr, 1, 0) < 0) { Tcl_Close(interp, chan); Tcl_AppendResult(interp, "couldn't read file \"", Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); Tcl_DecrRefCount(objPtr); return TCL_ERROR; } + string = Tcl_GetString(objPtr); + /* + * If first character is not a BOM, append the remaining characters, + * otherwise replace them [Bug 3466099]. + */ + if (Tcl_ReadChars(chan, objPtr, -1, + memcmp(string, "\xef\xbf\xbe", 3)) < 0) { + Tcl_Close(interp, chan); + Tcl_AppendResult(interp, "couldn't read file \"", + Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); + Tcl_DecrRefCount(objPtr); + return TCL_ERROR; + } if (Tcl_Close(interp, chan) != TCL_OK) { Tcl_DecrRefCount(objPtr); diff --git a/tests/source.test b/tests/source.test index 081a129..9d09429 100644 --- a/tests/source.test +++ b/tests/source.test @@ -107,6 +107,19 @@ test source-2.6 {source error conditions} -setup { } -match listGlob -result [list 1 \ {couldn't read file "*_non_existent_": no such file or directory} \ {POSIX ENOENT {no such file or directory}}] +test source-2.7 {utf-8 with BOM} -setup { + set sourcefile [makeFile {} source.file] +} -body { + set out [open $sourcefile w] + fconfigure $out -encoding utf-8 + puts $out "\ufffeset y new-y" + close $out + set y old-y + source -encoding utf-8 $sourcefile + return $y +} -cleanup { + removeFile $sourcefile +} -result {new-y} test source-3.1 {return in middle of source file} -setup { set sourcefile [makeFile { |