diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2012-02-29 21:34:46 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2012-02-29 21:34:46 (GMT) |
commit | 6ad807858b1217d40a9e5a2c1d5bf241625971ca (patch) | |
tree | 429c25c751c106db85c75db6345c91296dc73692 | |
parent | 9e0ab0841248c6ba0cf49036de9005c8d0120a28 (diff) | |
parent | 70ea61c893a776323211dbc1d3b5d1d10c7d745e (diff) | |
download | tcl-6ad807858b1217d40a9e5a2c1d5bf241625971ca.zip tcl-6ad807858b1217d40a9e5a2c1d5bf241625971ca.tar.gz tcl-6ad807858b1217d40a9e5a2c1d5bf241625971ca.tar.bz2 |
[Bug 3466099] BOM in Unicode
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | generic/tclEncoding.c | 4 | ||||
-rw-r--r-- | generic/tclIOUtil.c | 17 | ||||
-rw-r--r-- | tests/source.test | 13 |
4 files changed, 37 insertions, 3 deletions
@@ -1,3 +1,9 @@ +2012-02-29 Jan Nijtmans <nijtmans@users.sf.net> + + * generic/tclIOUtil.c: [Bug 3466099] BOM in Unicode + * generic/tclEncoding.c: + * tests/source.test + 2012-02-23 Donal K. Fellows <dkf@users.sf.net> * tests/reg.test (14.21-23): Add tests relating to bug 1115587. Actual diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2e0d51f..a4bea31 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -984,13 +984,13 @@ Tcl_GetEncodingNames( int Tcl_SetSystemEncoding( Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ - CONST char *name) /* The name of the desired encoding, or NULL + CONST char *name) /* The name of the desired encoding, or NULL/"" * to reset to default encoding. */ { Tcl_Encoding encoding; Encoding *encodingPtr; - if (name == NULL) { + if (!name || !*name) { Tcl_MutexLock(&encodingMutex); encoding = defaultEncoding; encodingPtr = (Encoding *) encoding; diff --git a/generic/tclIOUtil.c b/generic/tclIOUtil.c index e714471..44849d1 100644 --- a/generic/tclIOUtil.c +++ b/generic/tclIOUtil.c @@ -1788,7 +1788,22 @@ Tcl_FSEvalFileEx( objPtr = Tcl_NewObj(); Tcl_IncrRefCount(objPtr); - if (Tcl_ReadChars(chan, objPtr, -1, 0) < 0) { + /* Try to read first character of stream, so we can + * check for utf-8 BOM to be handled especially. + */ + if (Tcl_ReadChars(chan, objPtr, 1, 0) < 0) { + Tcl_Close(interp, chan); + Tcl_AppendResult(interp, "couldn't read file \"", + Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); + goto end; + } + string = Tcl_GetString(objPtr); + /* + * If first character is not a BOM, append the remaining characters, + * otherwise replace them [Bug 3466099]. + */ + if (Tcl_ReadChars(chan, objPtr, -1, + memcmp(string, "\xef\xbf\xbe", 3)) < 0) { Tcl_Close(interp, chan); Tcl_AppendResult(interp, "couldn't read file \"", Tcl_GetString(pathPtr), "\": ", Tcl_PosixError(interp), NULL); diff --git a/tests/source.test b/tests/source.test index 9162e8e..e79b211 100644 --- a/tests/source.test +++ b/tests/source.test @@ -107,6 +107,19 @@ test source-2.6 {source error conditions} -setup { } -match listGlob -result [list 1 \ {couldn't read file "*_non_existent_": no such file or directory} \ {POSIX ENOENT {no such file or directory}}] +test source-2.7 {utf-8 with BOM} -setup { + set sourcefile [makeFile {} source.file] +} -body { + set out [open $sourcefile w] + fconfigure $out -encoding utf-8 + puts $out "\ufffeset y new-y" + close $out + set y old-y + source -encoding utf-8 $sourcefile + return $y +} -cleanup { + removeFile $sourcefile +} -result {new-y} test source-3.1 {return in middle of source file} -setup { set sourcefile [makeFile { |