diff options
author | culler <culler> | 2019-11-18 00:31:48 (GMT) |
---|---|---|
committer | culler <culler> | 2019-11-18 00:31:48 (GMT) |
commit | 48cd8747a366cb72f8622bb65e6579499c6bf5a3 (patch) | |
tree | 92986b6b8bbfb7085962c8c6bd3d0f4c6e668f5f | |
parent | 31e614ca16b2ba0cd71b928754679adecc80be80 (diff) | |
download | tk-48cd8747a366cb72f8622bb65e6579499c6bf5a3.zip tk-48cd8747a366cb72f8622bb65e6579499c6bf5a3.tar.gz tk-48cd8747a366cb72f8622bb65e6579499c6bf5a3.tar.bz2 |
Implement non-BMP unicode for macOS when TCL_UTF_MAX = 3 by encoding surrogates as 3-byte UTF-8-ish sequences.
-rw-r--r-- | macosx/tkMacOSXClipboard.c | 35 | ||||
-rw-r--r-- | macosx/tkMacOSXFont.c | 156 | ||||
-rw-r--r-- | macosx/tkMacOSXKeyEvent.c | 52 | ||||
-rw-r--r-- | macosx/tkMacOSXPrivate.h | 4 |
4 files changed, 166 insertions, 81 deletions
diff --git a/macosx/tkMacOSXClipboard.c b/macosx/tkMacOSXClipboard.c index 696b70e..a5f0ba1 100644 --- a/macosx/tkMacOSXClipboard.c +++ b/macosx/tkMacOSXClipboard.c @@ -35,10 +35,7 @@ static Tk_Window clipboardOwner = NULL; targetPtr->type == dispPtr->utf8Atom) { for (TkClipboardBuffer *cbPtr = targetPtr->firstBufferPtr; cbPtr; cbPtr = cbPtr->nextPtr) { - NSString *s = [[NSString alloc] initWithBytesNoCopy: - cbPtr->buffer length:cbPtr->length - encoding:NSUTF8StringEncoding freeWhenDone:NO]; - + NSString *s = TclUniToNSString(cbPtr->buffer, cbPtr->length); [string appendString:s]; [s release]; } @@ -126,11 +123,11 @@ TkSelGetSelection( int haveExternalClip = ([[NSPasteboard generalPasteboard] changeCount] != changeCount); + printf("TkSelGetSelection\n"); if (dispPtr && (haveExternalClip || dispPtr->clipboardActive) && selection == dispPtr->clipboardAtom && (target == XA_STRING || target == dispPtr->utf8Atom)) { NSString *string = nil; - NSString *clean; NSPasteboard *pb = [NSPasteboard generalPasteboard]; NSString *type = [pb availableTypeFromArray:[NSArray arrayWithObject: NSStringPboardType]]; @@ -139,25 +136,19 @@ TkSelGetSelection( string = [pb stringForType:type]; } if (string) { + int utfSize; + char *tclUni = NSStringToTclUni(string, &utfSize); + /* - * Replace all non-BMP characters by the replacement character 0xfffd. - * This is a workaround until Tcl supports TCL_UTF_MAX > 3. + * Re-encode the string using the encoding which is used in Tcl + * when TCL_UTF_MAX = 3. This replaces each UTF-16 surrogate with + * a 3-byte sequence generated using the UTF-8 algorithm. (Even + * though UTF-8 does not allow encoding surrogates, the algorithm + * does produce a 3-byte sequence.) */ - int i, j, len = [string length]; - CFRange all = CFRangeMake(0, len); - UniChar *buffer = ckalloc(len*sizeof(UniChar)); - CFStringGetCharacters((CFStringRef) string, all, buffer); - for (i = 0, j = 0 ; j < len ; i++, j++) { - if (CFStringIsSurrogateHighCharacter(buffer[j])) { - buffer[i] = 0xfffd; - j++; - } else { - buffer[i] = buffer[j]; - } - } - clean = (NSString *)CFStringCreateWithCharacters(NULL, buffer, i); - ckfree(buffer); - result = proc(clientData, interp, [clean UTF8String]); + + result = proc(clientData, interp, tclUni); + ckfree(tclUni); } } else { Tcl_SetObjResult(interp, Tcl_ObjPrintf( diff --git a/macosx/tkMacOSXFont.c b/macosx/tkMacOSXFont.c index df7f770..8350908 100644 --- a/macosx/tkMacOSXFont.c +++ b/macosx/tkMacOSXFont.c @@ -101,6 +101,132 @@ static void DrawCharsInContext(Display *display, Drawable drawable, #pragma mark - #pragma mark Font Helpers: +/* + *--------------------------------------------------------------------------- + * + * NSStringFromTclUTF -- + * + * When Tcl is compiled with TCL_UTF_MAX = 3 (the default for 8.6) it cannot + * deal directly with UTF-8 encoded non-BMP characters, since their UTF-8 + * encoding requires 4 bytes. + * + * As a workaround, these versions of Tcl encode non-BMP characters as a string + * of length 6 in which the high and low UTF-16 surrogates have been encoded + * using the UTF-8 algorithm. The UTF-8 encoding does not allow encoding + * surrogates, so these 6-byte strings are not valid UTF-8, and hence Apple's + * NString class will refuse to instantiate an NSString from the 6-byte + * encoding. This function allows creating an NSString from a C-string which + * has been encoded using this scheme. + * + * Results: + * An NSString, which may be nil. + * + * Side effects: + * None. + *--------------------------------------------------------------------------- + */ + +MODULE_SCOPE NSString* +TclUniToNSString( + const char *source, + int numBytes) +{ + NSString *string = [[NSString alloc] initWithBytesNoCopy:(void *)source + length:numBytes + encoding:NSUTF8StringEncoding + freeWhenDone:NO]; + if (!string) { + const unichar *characters = ckalloc(numBytes*sizeof(unichar)); + const char *in = source; + unichar *out = (unichar *) characters; + while (in < source + numBytes) { + in += Tcl_UtfToUniChar(in, out++); + } + string = [[NSString alloc] initWithCharacters:characters + length:(out - characters)]; + ckfree(characters); + } + return string; +} + +/* + *--------------------------------------------------------------------------- + * + * TclUniAtIndex -- + * + * Write a sequence of bytes up to length 6 which is an encoding of a UTF-16 + * character in an NSString. Also record the unicode code point of the character. + * this may be a non-BMP character constructed by reading two surrogates from + * the NSString. + * + * Results: + * Returns the number of bytes written. + * + * Side effects: + * Bytes are written to the address uni and the unicode code point is written + * to the integer at address code. + * + */ + +MODULE_SCOPE int +TclUniAtIndex( + NSString *string, + int index, + char *uni, + unsigned int *code) +{ + char *ptr = uni; + UniChar uniChar = [string characterAtIndex: index]; + if (CFStringIsSurrogateHighCharacter(uniChar)) { + UniChar lowChar = [string characterAtIndex: ++index]; + *code = CFStringGetLongCharacterForSurrogatePair( + uniChar, lowChar); + ptr += Tcl_UniCharToUtf(uniChar, ptr); + ptr += Tcl_UniCharToUtf(lowChar, ptr); + return ptr - uni; + } else { + *code = (int) uniChar; + [[string substringWithRange: NSMakeRange(index, 1)] + getCString: uni + maxLength: XMaxTransChars + encoding: NSUTF8StringEncoding]; + return strlen(uni); + } +} + +/* + *--------------------------------------------------------------------------- + * + * NSStringToTclUni -- + * + * Encodes the unicode string represented by an NSString object using the + * special internal Tcl encoding used when TCL_UTF_MAX = 3. This encoding + * is similar to UTF-8 except that non-BMP characters are encoded as two + * successive 3-byte sequences which are constructed from UTF-16 surrogates + * by applying the UTF-8 algorithm. Even though the UTF-8 encoding does not + * allow encoding surrogates, the algorithm does produce a well-defined + * 3-byte sequence. + * + */ + +MODULE_SCOPE char* +NSStringToTclUni( + NSString *string, + int *numBytes) +{ + unsigned int code; + int i, length = [string length]; + char *ptr, *result = ckalloc(6*length + 1); + for (i = 0, ptr = result; i < length; i++) { + ptr += TclUniAtIndex(string, i, ptr, &code); + if (code > 0xffff){ + i++; + } + } + *ptr = '\0'; + return result; +} + #define GetNSFontTraitsFromTkFontAttributes(faPtr) \ ((faPtr)->weight == TK_FW_BOLD ? NSBoldFontMask : NSUnboldFontMask) | \ ((faPtr)->slant == TK_FS_ITALIC ? NSItalicFontMask : NSUnitalicFontMask) @@ -844,8 +970,7 @@ TkpMeasureCharsInContext( if (maxLength > 32767) { maxLength = 32767; } - string = [[NSString alloc] initWithBytesNoCopy:(void*)source - length:numBytes encoding:NSUTF8StringEncoding freeWhenDone:NO]; + string = TclUniToNSString((const char *)source, numBytes); if (!string) { length = 0; fit = rangeLength; @@ -1124,33 +1249,10 @@ DrawCharsInContext( !TkMacOSXSetupDrawingContext(drawable, gc, 1, &drawingContext)) { return; } - string = [[NSString alloc] initWithBytesNoCopy:(void*)source - length:numBytes encoding:NSUTF8StringEncoding freeWhenDone:NO]; + string = TclUniToNSString((const char *)source, numBytes); if (!string) { - - /* - * The decoding might have failed because we got a fake UTF-8 byte - * array in which UTF-16 surrogates had been encoded using the UTF-8 - * algorithm, even though UTF-8 does not allow encoding surrogates. - * (When Tcl is compiled with TCL_UTF_MAX = 3 Tk uses this encoding - * internally.) We can attempt to decode the source using this - * encoding and see if Apple accepts the result as UTF-16. - */ - - const unichar *characters = ckalloc(numBytes*sizeof(unichar)); - const char *in = source; - unichar *out = (unichar *) characters; - while (in < source + numBytes) { - in += Tcl_UtfToUniChar(in, out++); - } - string = [[NSString alloc] initWithCharacters:characters - length:(out - characters)]; - ckfree(characters); - if (!string) { - return; - } + return; } - context = drawingContext.context; fg = TkMacOSXCreateCGColor(gc, gc->foreground); attributes = [fontPtr->nsAttributes mutableCopy]; diff --git a/macosx/tkMacOSXKeyEvent.c b/macosx/tkMacOSXKeyEvent.c index 677f77e..025cccb 100644 --- a/macosx/tkMacOSXKeyEvent.c +++ b/macosx/tkMacOSXKeyEvent.c @@ -14,7 +14,7 @@ */ #include "tkMacOSXPrivate.h" -#include "tkMacOSXEvent.h" +#include "tkMacOSXInt.h" #include "tkMacOSXConstants.h" /* @@ -331,42 +331,30 @@ static unsigned isFunctionKey(unsigned int code); } /* - * NSString represents a non-BMP character as a string of length 2 where - * the first character is the high surrogate and the second character is - * the low surrogate. We could record this in the XEvent by setting the - * keycode to the unicode code point and setting the trans_chars to the - * 4-byte UTF-8 string. However, that will not help as long as TCL_UTF_MAX - * is set to 3. Until that changes, we just replace non-BMP characters by - * the "replacement character" U+FFFD. + * Next we generate an XEvent for each unicode character in our string. + * + * NSString uses UTF-16 internally, which means that a non-BMP character is + * represented by a sequence of two 16-bit "surrogates". In principle we + * could record this in the XEvent by setting the keycode to the 32-bit + * unicode code point and setting the trans_chars string to the 4-byte + * UTF-8 string for the non-BMP character. However, that will not work + * when TCL_UTF_MAX is set to 3, as is the case for Tcl 8.6. A workaround + * used internally by Tcl 8.6 is to encode each surrogate as a 3-byte + * sequence using the UTF-8 algorithm (ignoring the fact that the UTF-8 + * encoding specification does not allow encoding UTF-16 surrogates). + * This gives a 6-byte encoding of the non-BMP character which we write into + * the trans_chars field of the XEvent. */ for (i = 0; i < len; i++) { - UniChar nextChar = [str characterAtIndex: i]; - if (CFStringIsSurrogateHighCharacter(nextChar)) { -#if 0 - UniChar lowChar = [str characterAtIndex: ++i]; - xEvent.xkey.keycode = CFStringGetLongCharacterForSurrogatePair( - nextChar, lowChar); - xEvent.xkey.nbytes = TkUniCharToUtf(xEvent.xkey.keycode, - &xEvent.xkey.trans_chars); -#else + xEvent.xkey.nbytes = TclUniAtIndex(str, i, xEvent.xkey.trans_chars, + &xEvent.xkey.keycode); + if (xEvent.xkey.keycode > 0xffff){ i++; - xEvent.xkey.keycode = 0xfffd; - strcpy(xEvent.xkey.trans_chars, "\xef\xbf\xbd"); - xEvent.xkey.nbytes = strlen(xEvent.xkey.trans_chars); -#endif - } else { - xEvent.xkey.keycode = (int) nextChar; - [[str substringWithRange: NSMakeRange(i,1)] - getCString: xEvent.xkey.trans_chars - maxLength: XMaxTransChars encoding: NSUTF8StringEncoding]; - xEvent.xkey.nbytes = strlen(xEvent.xkey.trans_chars); } - xEvent.xany.type = KeyPress; - releaseCode = (UInt16) nextChar; - Tk_QueueWindowEvent(&xEvent, TCL_QUEUE_TAIL); + xEvent.xany.type = KeyPress; + Tk_QueueWindowEvent(&xEvent, TCL_QUEUE_TAIL); } - releaseCode = (UInt16) [str characterAtIndex: 0]; } @@ -642,7 +630,7 @@ XGrabKeyboard( Time time) { keyboardGrabWinPtr = Tk_IdToWindow(display, grab_window); - TkWindow *captureWinPtr = (TkWindow *) TkMacOSXGetCapture(); + TkWindow *captureWinPtr = (TkWindow *) TkpGetCapture(); if (keyboardGrabWinPtr && captureWinPtr) { NSWindow *w = TkMacOSXDrawableWindow(grab_window); diff --git a/macosx/tkMacOSXPrivate.h b/macosx/tkMacOSXPrivate.h index 68bad41..a285bba 100644 --- a/macosx/tkMacOSXPrivate.h +++ b/macosx/tkMacOSXPrivate.h @@ -239,6 +239,10 @@ MODULE_SCOPE int TkMacOSXServices_Init(Tcl_Interp *interp); MODULE_SCOPE int TkMacOSXRegisterServiceWidgetObjCmd(ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); +MODULE_SCOPE int TclUniAtIndex(NSString *string, int index, char *uni, + unsigned int *code); +MODULE_SCOPE NSString* TclUniToNSString(const char *source, int numBytes); +MODULE_SCOPE char* NSStringToTclUni(NSString *string, int *numBytes); #pragma mark Private Objective-C Classes |