From e87d0bcdf463e1858295c83daaee867dd695dba0 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 29 Nov 2017 12:27:22 +0000 Subject: Fix Tcl_UtfFindFirst()/Tcl_UtfFindLast(), which were broken by [83c0c569d6]. Not detected, because those functions aren't used anywhere in Tcl. So, added new test-cases, makeing sure this doesn't happen again. --- generic/tclTest.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ generic/tclUtf.c | 14 +++++++------- tests/utf.test | 11 +++++++++-- 3 files changed, 70 insertions(+), 9 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index e8539e8..8a59b83 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -407,6 +407,12 @@ static Tcl_FSMatchInDirectoryProc SimpleMatchInDirectory; static int TestNumUtfCharsCmd(ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); +static int TestFindFirstCmd(ClientData clientData, + Tcl_Interp *interp, int objc, + Tcl_Obj *const objv[]); +static int TestFindLastCmd(ClientData clientData, + Tcl_Interp *interp, int objc, + Tcl_Obj *const objv[]); static int TestHashSystemHashCmd(ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); @@ -668,6 +674,10 @@ Tcltest_Init( TestsetobjerrorcodeCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testnumutfchars", TestNumUtfCharsCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testfindfirst", + TestFindFirstCmd, NULL, NULL); + Tcl_CreateObjCommand(interp, "testfindlast", + TestFindLastCmd, NULL, NULL); Tcl_CreateCommand(interp, "testsetplatform", TestsetplatformCmd, NULL, NULL); Tcl_CreateCommand(interp, "teststaticpkg", TeststaticpkgCmd, @@ -6680,6 +6690,50 @@ TestNumUtfCharsCmd( return TCL_OK; } +/* + * Used to check correct operation of Tcl_UtfFindFirst + */ + +static int +TestFindFirstCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + if (objc > 1) { + int len = -1; + + if (objc > 2) { + (void) Tcl_GetIntFromObj(interp, objv[2], &len); + } + Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_UtfFindFirst(Tcl_GetString(objv[1]), len), -1)); + } + return TCL_OK; +} + +/* + * Used to check correct operation of Tcl_UtfFindLast + */ + +static int +TestFindLastCmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *const objv[]) +{ + if (objc > 1) { + int len = -1; + + if (objc > 2) { + (void) Tcl_GetIntFromObj(interp, objv[2], &len); + } + Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_UtfFindLast(Tcl_GetString(objv[1]), len), -1)); + } + return TCL_OK; +} + #if defined(HAVE_CPUID) || defined(_WIN32) /* *---------------------------------------------------------------------- diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 17f769d..c60e99e 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -100,7 +100,7 @@ static int UtfCount(int ch); static inline int UtfCount( - int ch) /* The Tcl_UniChar whose size is returned. */ + int ch) /* The Unicode character whose size is returned. */ { if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) { return 1; @@ -556,10 +556,10 @@ Tcl_UtfFindFirst( #if TCL_UTF_MAX == 4 if (!len) { len += TclUtfToUniChar(src, &find); - fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; + fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; } #endif - if (find == fullchar) { + if (fullchar == ch) { return src; } if (*src == '\0') { @@ -579,7 +579,7 @@ Tcl_UtfFindFirst( * part of the UTF-8 string. Equivalent to Plan 9 utfrrune(). * * Results: - * As above. If the Tcl_UniChar does not exist in the given string, the + * As above. If the Unicode character does not exist in the given string, the * return value is NULL. * * Side effects: @@ -604,10 +604,10 @@ Tcl_UtfFindLast( #if TCL_UTF_MAX == 4 if (!len) { len += TclUtfToUniChar(src, &find); - fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; + fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; } #endif - if (find == fullchar) { + if (fullchar == ch) { last = src; } if (*src == '\0') { @@ -707,7 +707,7 @@ Tcl_UtfPrev( * * Tcl_UniCharAtIndex -- * - * Returns the Unicode character represented at the specified character + * Returns the Tcl_UniChar represented at the specified character * (not byte) position in the UTF-8 string. * * Results: diff --git a/tests/utf.test b/tests/utf.test index 45f9c0c..d0fa7be 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -86,6 +86,9 @@ test utf-3.1 {Tcl_UtfCharComplete} { } {} testConstraint testnumutfchars [llength [info commands testnumutfchars]] +testConstraint testfindfirst [llength [info commands testfindfirst]] +testConstraint testfindlast [llength [info commands testfindlast]] + test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { testnumutfchars "" } {0} @@ -118,8 +121,12 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testnumutfchars [testbytestring "\x00"] 2 } {2} -test utf-5.1 {Tcl_UtfFindFirsts} { -} {} +test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { + testfindfirst [testbytestring "abcbc"] 98 +} {bcbc} +test utf-5.1 {Tcl_UtfFindLast} {testfindlast testbytestring} { + testfindlast [testbytestring "abcbc"] 98 +} {bc} test utf-6.1 {Tcl_UtfNext} { } {} -- cgit v0.12