From 844792f3bb8eea9124be41d436c7462f1daa19b9 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 12 Mar 2023 17:10:51 +0000 Subject: Cherrypick yip-656. Start on Tcl_ExternalToUtf/Tcl_UtfToExternal tests --- generic/tclTest.c | 124 +++++++++++++++++++++++++++++++++++++++++++++--------- tests/utfext.test | 96 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+), 21 deletions(-) create mode 100644 tests/utfext.test diff --git a/generic/tclTest.c b/generic/tclTest.c index 33205fb..e2d8b3b 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -1996,12 +1996,21 @@ static void SpecialFree( * result of Tcl_UtfToExternal or Tcl_ExternalToUtf. * * Side effects: + * * The result in the interpreter is a list of the return code from the * Tcl_UtfToExternal/Tcl_ExternalToUtf functions, the encoding state, and - * the encoded binary string. If any of the srcreadvar, dstwrotevar and + * an encoded binary string of length dstLen. Note the string is the + * entire output buffer, not just the part containing the decoded + * portion. This allows for additional checks at test script level. + * + * If any of the srcreadvar, dstwrotevar and * dstcharsvar are specified and not empty, they are treated as names * of variables where the *srcRead, *dstWrote and *dstChars output * from the functions are stored. + * + * The function also checks internally whether nuls are correctly + * appended as requested but the TCL_ENCODING_NO_TERMINATE flag + * and that no buffer overflows occur. *------------------------------------------------------------------------ */ typedef int @@ -2013,13 +2022,15 @@ static int UtfExtWrapper( Tcl_Encoding encoding; int encStateValue; /* Assumes Tcl_EncodingState points to integer!!! */ Tcl_EncodingState encState; - int flags; Tcl_Size srcLen, bufLen; const unsigned char *bytes; unsigned char *bufPtr; int srcRead, dstLen, dstWrote, dstChars; Tcl_Obj *srcReadVar, *dstWroteVar, *dstCharsVar; int result; + int flags; + Tcl_Obj **flagObjs; + int nflags; if (objc < 7 || objc > 10) { Tcl_WrongNumArgs(interp, @@ -2031,9 +2042,48 @@ static int UtfExtWrapper( if (Tcl_GetEncodingFromObj(interp, objv[2], &encoding) != TCL_OK) { return TCL_ERROR; } - if (Tcl_GetIntFromObj(interp, objv[4], &flags) != TCL_OK) { - return TCL_ERROR; + + /* Flags may be specified as list of integers and keywords */ + flags = 0; + if (Tcl_ListObjGetElements(interp, objv[4], &nflags, &flagObjs) != TCL_OK) { + return TCL_ERROR; + } + + struct { + const char *flagKey; + int flag; + } flagMap[] = { + {"start", TCL_ENCODING_START}, + {"end", TCL_ENCODING_END}, + {"stoponerror", TCL_ENCODING_STOPONERROR}, + {"noterminate", TCL_ENCODING_NO_TERMINATE}, + {"charlimit", TCL_ENCODING_CHAR_LIMIT}, + {"profiletcl8", TCL_ENCODING_PROFILE_TCL8}, + {"profilestrict", TCL_ENCODING_PROFILE_STRICT}, + {"profilereplace", TCL_ENCODING_PROFILE_REPLACE}, + {NULL, 0} + }; + int i; + for (i = 0; i < nflags; ++i) { + int flag; + if (Tcl_GetIntFromObj(NULL, flagObjs[i], &flag) == TCL_OK) { + flags |= flag; + } + else { + int idx; + if (Tcl_GetIndexFromObjStruct(interp, + flagObjs[i], + flagMap, + sizeof(flagMap[0]), + "flag", + 0, + &idx) != TCL_OK) { + return TCL_ERROR; + } + flags |= flagMap[idx].flag; + } } + /* Assumes state is integer if not "" */ if (Tcl_GetIntFromObj(interp, objv[5], &encStateValue) == TCL_OK) { encState = (Tcl_EncodingState)&encStateValue; @@ -2061,27 +2111,47 @@ static int UtfExtWrapper( if (objc > 9) { if (Tcl_GetCharLength(objv[9])) { dstCharsVar = objv[9]; - } + } } } } + if (flags & TCL_ENCODING_CHAR_LIMIT) { + /* Caller should have specified the dest char limit */ + Tcl_Obj *valueObj; + if (dstCharsVar == NULL || + (valueObj = Tcl_ObjGetVar2(interp, dstCharsVar, NULL, 0)) == NULL + ) { + Tcl_SetResult(interp, + "dstCharsVar must be specified with integer value if " + "TCL_ENCODING_CHAR_LIMIT set in flags.", TCL_STATIC); + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, dstCharsVar, &dstChars) != TCL_OK) { + return TCL_ERROR; + } + } else { + dstChars = 0; /* Only used for output */ + } bufLen = dstLen + 4; /* 4 -> overflow detection */ bufPtr = Tcl_Alloc(bufLen); - memmove(bufPtr + dstLen, "\xAB\xCD\xEF\x00", 4); /* overflow detection */ + memset(bufPtr, 0xFF, dstLen); /* Need to check nul terminator */ + memmove(bufPtr + dstLen, "\xAB\xCD\xEF\xAB", 4); /* overflow detection */ bytes = Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ result = (*transformer)(interp, encoding, bytes, srcLen, flags, &encState, bufPtr, dstLen, srcReadVar ? &srcRead : NULL, &dstWrote, dstCharsVar ? &dstChars : NULL); - if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\x00", 4)) { + if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\xAB", 4)) { Tcl_SetResult(interp, "Tcl_ExternalToUtf wrote past output buffer", TCL_STATIC); result = TCL_ERROR; - } else { + } else if (result != TCL_ERROR) { + Tcl_Obj *resultObjs[3]; + switch (result) { case TCL_OK: resultObjs[0] = Tcl_NewStringObj("ok", -1); @@ -2105,22 +2175,34 @@ static int UtfExtWrapper( result = TCL_OK; resultObjs[1] = encState ? Tcl_NewIntObj(encStateValue) : Tcl_NewObj(); - resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstWrote); + resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstLen); if (srcReadVar) { - if (Tcl_ObjSetVar2(interp, srcReadVar, NULL, Tcl_NewIntObj(srcRead), 0) == NULL) { - result = TCL_ERROR; - } - } + if (Tcl_ObjSetVar2(interp, + srcReadVar, + NULL, + Tcl_NewIntObj(srcRead), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } if (dstWroteVar) { - if (Tcl_ObjSetVar2(interp, dstWroteVar, NULL, Tcl_NewIntObj(dstWrote), 0) == NULL) { - result = TCL_ERROR; - } - } + if (Tcl_ObjSetVar2(interp, + dstWroteVar, + NULL, + Tcl_NewIntObj(dstWrote), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } if (dstCharsVar) { - if (Tcl_ObjSetVar2(interp, dstCharsVar, NULL, Tcl_NewIntObj(dstChars), 0) == NULL) { - result = TCL_ERROR; - } - } + if (Tcl_ObjSetVar2(interp, + dstCharsVar, + NULL, + Tcl_NewIntObj(dstChars), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } Tcl_SetObjResult(interp, Tcl_NewListObj(3, resultObjs)); } diff --git a/tests/utfext.test b/tests/utfext.test new file mode 100644 index 0000000..61e36b8 --- /dev/null +++ b/tests/utfext.test @@ -0,0 +1,96 @@ +# This file contains a collection of tests for Tcl_UtfToExternal and +# Tcl_UtfToExternal. Sourcing this file into Tcl runs the tests and generates +# errors. No output means no errors found. +# +# Copyright (c) 2023 Ashok P. Nadkarni +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. + +if {"::tcltest" ni [namespace children]} { + package require tcltest 2.5 + namespace import -force ::tcltest::* +} + +::tcltest::loadTestedCommands +catch [list package require -exact tcl::test [info patchlevel]] + +testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint testencoding [llength [info commands testencoding]] + +# Maps encoded bytes string to utf-8 equivalents, both in hex +# encoding utf-8 encdata +lappend utfExtMap {*}{ + ascii 414243 414243 +} + +if {[info commands printable] eq ""} { + proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print + } +} + +# Simple test with basic flags +proc testbasic {direction enc hexin hexout {flags {start end}}} { + if {$direction eq "toutf"} { + set cmd Tcl_ExternalToUtf + } else { + set cmd Tcl_UtfToExternal + } + set in [binary decode hex $hexin] + set out [binary decode hex $hexout] + set dstlen 40 ;# Should be enough for all encoding tests + + # The C wrapper fills entire destination buffer with FF. + # Anything beyond expected output should have FF's + set filler [string repeat \xFF $dstlen] + set result [string range "$out$filler" 0 $dstlen-1] + test $cmd-$enc-$hexin-[join $flags -] "$cmd - $enc - $hexin - $flags" -body \ + [list testencoding $cmd $enc $in $flags {} $dstlen] \ + -result [list ok {} $result] + foreach profile [encoding profiles] { + set flags2 [linsert $flags end profile$profile] + test $cmd-$enc-$hexin-[join $flags2 -] "$cmd - $enc - $hexin - $flags" -body \ + [list testencoding $cmd $enc $in $flags2 {} $dstlen] \ + -result [list ok {} $result] + } +} + +# +# Basic tests +foreach {enc utfhex hex} $utfExtMap { + # Basic test - TCL_ENCODING_START|TCL_ENCODING_END + # Note by default output should be terminated with \0 + testbasic toutf $enc $hex ${utfhex}00 {start end} + testbasic fromutf $enc $utfhex ${hex}00 {start end} + + # Test TCL_ENCODING_NO_TERMINATE + testbasic toutf $enc $hex $utfhex {start end noterminate} + # knownBug - noterminate not obeyed by fromutf + # testbasic fromutf $enc $utfhex $hex {start end noterminate} +} + +# Test for insufficient space +test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { + testencoding Tcl_UtfToExternal unicode A {start end} {} 1 +} -result {nospace {} {}} + +::tcltest::cleanupTests +return + +# Local Variables: +# mode: tcl +# End: -- cgit v0.12