summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2022-07-12 12:31:57 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2022-07-12 12:31:57 (GMT)
commit1e6f410bd20b8576dbd0be31a153d56d48a1dae4 (patch)
tree4d1d271f0b2ebd658230ba2acb4eb32da025102a
parenta6ed76df7c2b1d8c8f3296a99023f88fc76c3f04 (diff)
downloadtcl-1e6f410bd20b8576dbd0be31a153d56d48a1dae4.zip
tcl-1e6f410bd20b8576dbd0be31a153d56d48a1dae4.tar.gz
tcl-1e6f410bd20b8576dbd0be31a153d56d48a1dae4.tar.bz2
Fix [b79df322a9]. Tcl_GetUnicode/Tcl_NewUnicodeObj crash. Add tests
-rw-r--r--generic/tclStringObj.c31
-rw-r--r--generic/tclTest.c41
-rw-r--r--tests/string.test12
3 files changed, 75 insertions, 9 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 86b3937..852c4ff 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -68,6 +68,7 @@ static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr);
static void SetUnicodeObj(Tcl_Obj *objPtr,
const Tcl_UniChar *unicode, int numChars);
static int UnicodeLength(const Tcl_UniChar *unicode);
+static int UTF16Length(const unsigned short *unicode);
static void UpdateStringOfString(Tcl_Obj *objPtr);
#if (TCL_UTF_MAX) > 3 && !defined(TCL_NO_DEPRECATED)
static void DupUTF16StringInternalRep(Tcl_Obj *objPtr,
@@ -562,6 +563,10 @@ Tcl_NewUnicodeObj(
TclNewObj(objPtr);
TclInvalidateStringRep(objPtr);
+ if (numChars < 0) {
+ numChars = UTF16Length(unicode);
+ }
+
String *stringPtr = (String *)ckalloc((offsetof(String, unicode)
+ sizeof(unsigned short)) + numChars * sizeof(unsigned short));
memcpy(stringPtr->unicode, unicode, numChars * sizeof(unsigned short));
@@ -984,7 +989,7 @@ TclGetUnicodeFromObj(
{
String *stringPtr;
- SetStringFromAny(NULL, objPtr);
+ SetUTF16StringFromAny(NULL, objPtr);
stringPtr = GET_STRING(objPtr);
if (lengthPtr != NULL) {
@@ -1451,14 +1456,7 @@ Tcl_SetUnicodeObj(
String *stringPtr;
if (numChars < 0) {
- numChars = 0;
-
- if (unicode) {
- while (numChars >= 0 && unicode[numChars] != 0) {
- numChars++;
- }
- }
- stringCheckLimits(numChars);
+ numChars = UTF16Length(unicode);
}
/*
@@ -1482,6 +1480,21 @@ Tcl_SetUnicodeObj(
#endif
static int
+UTF16Length(
+ const unsigned short *ucs2Ptr)
+{
+ int numChars = 0;
+
+ if (ucs2Ptr) {
+ while (numChars >= 0 && ucs2Ptr[numChars] != 0) {
+ numChars++;
+ }
+ }
+ stringCheckLimits(numChars);
+ return numChars;
+}
+
+static int
UnicodeLength(
const Tcl_UniChar *unicode)
{
diff --git a/generic/tclTest.c b/generic/tclTest.c
index b2632f0..bf5741c 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -222,6 +222,7 @@ static Tcl_ObjCmdProc TestbytestringObjCmd;
static Tcl_ObjCmdProc TestsetbytearraylengthObjCmd;
static Tcl_ObjCmdProc TestpurebytesobjObjCmd;
static Tcl_ObjCmdProc TeststringbytesObjCmd;
+static Tcl_ObjCmdProc Testutf16stringObjCmd;
static Tcl_CmdProc TestcmdinfoCmd;
static Tcl_CmdProc TestcmdtokenCmd;
static Tcl_CmdProc TestcmdtraceCmd;
@@ -560,6 +561,7 @@ Tcltest_Init(
Tcl_CreateObjCommand(interp, "testsetbytearraylength", TestsetbytearraylengthObjCmd, NULL, NULL);
Tcl_CreateObjCommand(interp, "testbytestring", TestbytestringObjCmd, NULL, NULL);
Tcl_CreateObjCommand(interp, "teststringbytes", TeststringbytesObjCmd, NULL, NULL);
+ Tcl_CreateObjCommand(interp, "testutf16string", Testutf16stringObjCmd, NULL, NULL);
Tcl_CreateObjCommand(interp, "testwrongnumargs", TestWrongNumArgsObjCmd,
NULL, NULL);
Tcl_CreateObjCommand(interp, "testfilesystem", TestFilesystemObjCmd,
@@ -5176,6 +5178,45 @@ TestbytestringObjCmd(
/*
*----------------------------------------------------------------------
*
+ * Testutf16stringObjCmd --
+ *
+ * This specifically tests the Tcl_GetUnicode and Tcl_NewUnicodeObj
+ * C functions which broke in Tcl 8.7 and were undetected by the
+ * existing test suite. Bug [b79df322a9]
+ *
+ * Results:
+ * Returns the TCL_OK result code.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static int
+Testutf16stringObjCmd(
+ ClientData dummy, /* Not used. */
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* The argument objects. */
+{
+ int n = 0;
+ const Tcl_UniChar *p;
+ (void)dummy;
+
+ if (objc != 2) {
+ Tcl_WrongNumArgs(interp, 1, objv, "string");
+ return TCL_ERROR;
+ }
+
+ p = Tcl_GetUnicode(objv[1]);
+ Tcl_SetObjResult(interp, Tcl_NewUnicodeObj(p, -1));
+ return TCL_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* TestsetCmd --
*
* Implements the "testset{err,noerr}" cmds that are used when testing
diff --git a/tests/string.test b/tests/string.test
index d497b42..d128a0b 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -34,6 +34,7 @@ testConstraint testindexobj [expr {[info commands testindexobj] ne {}}]
testConstraint testevalex [expr {[info commands testevalex] ne {}}]
testConstraint utf16 [expr {[string length \U010000] == 2}]
testConstraint testbytestring [llength [info commands testbytestring]]
+testConstraint testutf16string [llength [info commands testutf16string]]
# Used for constraining memory leak tests
testConstraint memory [llength [info commands memory]]
@@ -2635,6 +2636,17 @@ test string-32.17.$noComp {string is dict, valid dict packed in invalid dict} {
} 0
}; # foreach noComp {0 1}
+
+test string-bug-b79df322a9 {Tcl_GetUnicode/Tcl_NewUnicodeObj api} -constraints {
+ testutf16string
+} -body {
+ # This simple test suffices because the bug has nothing to do with
+ # the actual encoding conversion. The test was added because these
+ # functions are no longer called within the Tcl core and thus
+ # not tested by either `string`, not `encoding` tests.
+ testutf16string "abcde"
+} -result abcde
+
# cleanup
rename MemStress {}