summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorculler <culler>2019-11-18 00:31:48 (GMT)
committerculler <culler>2019-11-18 00:31:48 (GMT)
commit48cd8747a366cb72f8622bb65e6579499c6bf5a3 (patch)
tree92986b6b8bbfb7085962c8c6bd3d0f4c6e668f5f
parent31e614ca16b2ba0cd71b928754679adecc80be80 (diff)
downloadtk-48cd8747a366cb72f8622bb65e6579499c6bf5a3.zip
tk-48cd8747a366cb72f8622bb65e6579499c6bf5a3.tar.gz
tk-48cd8747a366cb72f8622bb65e6579499c6bf5a3.tar.bz2
Implement non-BMP unicode for macOS when TCL_UTF_MAX = 3 by encoding surrogates as 3-byte UTF-8-ish sequences.
-rw-r--r--macosx/tkMacOSXClipboard.c35
-rw-r--r--macosx/tkMacOSXFont.c156
-rw-r--r--macosx/tkMacOSXKeyEvent.c52
-rw-r--r--macosx/tkMacOSXPrivate.h4
4 files changed, 166 insertions, 81 deletions
diff --git a/macosx/tkMacOSXClipboard.c b/macosx/tkMacOSXClipboard.c
index 696b70e..a5f0ba1 100644
--- a/macosx/tkMacOSXClipboard.c
+++ b/macosx/tkMacOSXClipboard.c
@@ -35,10 +35,7 @@ static Tk_Window clipboardOwner = NULL;
targetPtr->type == dispPtr->utf8Atom) {
for (TkClipboardBuffer *cbPtr = targetPtr->firstBufferPtr;
cbPtr; cbPtr = cbPtr->nextPtr) {
- NSString *s = [[NSString alloc] initWithBytesNoCopy:
- cbPtr->buffer length:cbPtr->length
- encoding:NSUTF8StringEncoding freeWhenDone:NO];
-
+ NSString *s = TclUniToNSString(cbPtr->buffer, cbPtr->length);
[string appendString:s];
[s release];
}
@@ -126,11 +123,11 @@ TkSelGetSelection(
int haveExternalClip =
([[NSPasteboard generalPasteboard] changeCount] != changeCount);
+ printf("TkSelGetSelection\n");
if (dispPtr && (haveExternalClip || dispPtr->clipboardActive)
&& selection == dispPtr->clipboardAtom
&& (target == XA_STRING || target == dispPtr->utf8Atom)) {
NSString *string = nil;
- NSString *clean;
NSPasteboard *pb = [NSPasteboard generalPasteboard];
NSString *type = [pb availableTypeFromArray:[NSArray arrayWithObject:
NSStringPboardType]];
@@ -139,25 +136,19 @@ TkSelGetSelection(
string = [pb stringForType:type];
}
if (string) {
+ int utfSize;
+ char *tclUni = NSStringToTclUni(string, &utfSize);
+
/*
- * Replace all non-BMP characters by the replacement character 0xfffd.
- * This is a workaround until Tcl supports TCL_UTF_MAX > 3.
+ * Re-encode the string using the encoding which is used in Tcl
+ * when TCL_UTF_MAX = 3. This replaces each UTF-16 surrogate with
+ * a 3-byte sequence generated using the UTF-8 algorithm. (Even
+ * though UTF-8 does not allow encoding surrogates, the algorithm
+ * does produce a 3-byte sequence.)
*/
- int i, j, len = [string length];
- CFRange all = CFRangeMake(0, len);
- UniChar *buffer = ckalloc(len*sizeof(UniChar));
- CFStringGetCharacters((CFStringRef) string, all, buffer);
- for (i = 0, j = 0 ; j < len ; i++, j++) {
- if (CFStringIsSurrogateHighCharacter(buffer[j])) {
- buffer[i] = 0xfffd;
- j++;
- } else {
- buffer[i] = buffer[j];
- }
- }
- clean = (NSString *)CFStringCreateWithCharacters(NULL, buffer, i);
- ckfree(buffer);
- result = proc(clientData, interp, [clean UTF8String]);
+
+ result = proc(clientData, interp, tclUni);
+ ckfree(tclUni);
}
} else {
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
diff --git a/macosx/tkMacOSXFont.c b/macosx/tkMacOSXFont.c
index df7f770..8350908 100644
--- a/macosx/tkMacOSXFont.c
+++ b/macosx/tkMacOSXFont.c
@@ -101,6 +101,132 @@ static void DrawCharsInContext(Display *display, Drawable drawable,
#pragma mark -
#pragma mark Font Helpers:
+/*
+ *---------------------------------------------------------------------------
+ *
+ * NSStringFromTclUTF --
+ *
+ * When Tcl is compiled with TCL_UTF_MAX = 3 (the default for 8.6) it cannot
+ * deal directly with UTF-8 encoded non-BMP characters, since their UTF-8
+ * encoding requires 4 bytes.
+ *
+ * As a workaround, these versions of Tcl encode non-BMP characters as a string
+ * of length 6 in which the high and low UTF-16 surrogates have been encoded
+ * using the UTF-8 algorithm. The UTF-8 encoding does not allow encoding
+ * surrogates, so these 6-byte strings are not valid UTF-8, and hence Apple's
+ * NString class will refuse to instantiate an NSString from the 6-byte
+ * encoding. This function allows creating an NSString from a C-string which
+ * has been encoded using this scheme.
+ *
+ * Results:
+ * An NSString, which may be nil.
+ *
+ * Side effects:
+ * None.
+ *---------------------------------------------------------------------------
+ */
+
+MODULE_SCOPE NSString*
+TclUniToNSString(
+ const char *source,
+ int numBytes)
+{
+ NSString *string = [[NSString alloc] initWithBytesNoCopy:(void *)source
+ length:numBytes
+ encoding:NSUTF8StringEncoding
+ freeWhenDone:NO];
+ if (!string) {
+ const unichar *characters = ckalloc(numBytes*sizeof(unichar));
+ const char *in = source;
+ unichar *out = (unichar *) characters;
+ while (in < source + numBytes) {
+ in += Tcl_UtfToUniChar(in, out++);
+ }
+ string = [[NSString alloc] initWithCharacters:characters
+ length:(out - characters)];
+ ckfree(characters);
+ }
+ return string;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * TclUniAtIndex --
+ *
+ * Write a sequence of bytes up to length 6 which is an encoding of a UTF-16
+ * character in an NSString. Also record the unicode code point of the character.
+ * this may be a non-BMP character constructed by reading two surrogates from
+ * the NSString.
+ *
+ * Results:
+ * Returns the number of bytes written.
+ *
+ * Side effects:
+ * Bytes are written to the address uni and the unicode code point is written
+ * to the integer at address code.
+ *
+ */
+
+MODULE_SCOPE int
+TclUniAtIndex(
+ NSString *string,
+ int index,
+ char *uni,
+ unsigned int *code)
+{
+ char *ptr = uni;
+ UniChar uniChar = [string characterAtIndex: index];
+ if (CFStringIsSurrogateHighCharacter(uniChar)) {
+ UniChar lowChar = [string characterAtIndex: ++index];
+ *code = CFStringGetLongCharacterForSurrogatePair(
+ uniChar, lowChar);
+ ptr += Tcl_UniCharToUtf(uniChar, ptr);
+ ptr += Tcl_UniCharToUtf(lowChar, ptr);
+ return ptr - uni;
+ } else {
+ *code = (int) uniChar;
+ [[string substringWithRange: NSMakeRange(index, 1)]
+ getCString: uni
+ maxLength: XMaxTransChars
+ encoding: NSUTF8StringEncoding];
+ return strlen(uni);
+ }
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * NSStringToTclUni --
+ *
+ * Encodes the unicode string represented by an NSString object using the
+ * special internal Tcl encoding used when TCL_UTF_MAX = 3. This encoding
+ * is similar to UTF-8 except that non-BMP characters are encoded as two
+ * successive 3-byte sequences which are constructed from UTF-16 surrogates
+ * by applying the UTF-8 algorithm. Even though the UTF-8 encoding does not
+ * allow encoding surrogates, the algorithm does produce a well-defined
+ * 3-byte sequence.
+ *
+ */
+
+MODULE_SCOPE char*
+NSStringToTclUni(
+ NSString *string,
+ int *numBytes)
+{
+ unsigned int code;
+ int i, length = [string length];
+ char *ptr, *result = ckalloc(6*length + 1);
+ for (i = 0, ptr = result; i < length; i++) {
+ ptr += TclUniAtIndex(string, i, ptr, &code);
+ if (code > 0xffff){
+ i++;
+ }
+ }
+ *ptr = '\0';
+ return result;
+}
+
#define GetNSFontTraitsFromTkFontAttributes(faPtr) \
((faPtr)->weight == TK_FW_BOLD ? NSBoldFontMask : NSUnboldFontMask) | \
((faPtr)->slant == TK_FS_ITALIC ? NSItalicFontMask : NSUnitalicFontMask)
@@ -844,8 +970,7 @@ TkpMeasureCharsInContext(
if (maxLength > 32767) {
maxLength = 32767;
}
- string = [[NSString alloc] initWithBytesNoCopy:(void*)source
- length:numBytes encoding:NSUTF8StringEncoding freeWhenDone:NO];
+ string = TclUniToNSString((const char *)source, numBytes);
if (!string) {
length = 0;
fit = rangeLength;
@@ -1124,33 +1249,10 @@ DrawCharsInContext(
!TkMacOSXSetupDrawingContext(drawable, gc, 1, &drawingContext)) {
return;
}
- string = [[NSString alloc] initWithBytesNoCopy:(void*)source
- length:numBytes encoding:NSUTF8StringEncoding freeWhenDone:NO];
+ string = TclUniToNSString((const char *)source, numBytes);
if (!string) {
-
- /*
- * The decoding might have failed because we got a fake UTF-8 byte
- * array in which UTF-16 surrogates had been encoded using the UTF-8
- * algorithm, even though UTF-8 does not allow encoding surrogates.
- * (When Tcl is compiled with TCL_UTF_MAX = 3 Tk uses this encoding
- * internally.) We can attempt to decode the source using this
- * encoding and see if Apple accepts the result as UTF-16.
- */
-
- const unichar *characters = ckalloc(numBytes*sizeof(unichar));
- const char *in = source;
- unichar *out = (unichar *) characters;
- while (in < source + numBytes) {
- in += Tcl_UtfToUniChar(in, out++);
- }
- string = [[NSString alloc] initWithCharacters:characters
- length:(out - characters)];
- ckfree(characters);
- if (!string) {
- return;
- }
+ return;
}
-
context = drawingContext.context;
fg = TkMacOSXCreateCGColor(gc, gc->foreground);
attributes = [fontPtr->nsAttributes mutableCopy];
diff --git a/macosx/tkMacOSXKeyEvent.c b/macosx/tkMacOSXKeyEvent.c
index 677f77e..025cccb 100644
--- a/macosx/tkMacOSXKeyEvent.c
+++ b/macosx/tkMacOSXKeyEvent.c
@@ -14,7 +14,7 @@
*/
#include "tkMacOSXPrivate.h"
-#include "tkMacOSXEvent.h"
+#include "tkMacOSXInt.h"
#include "tkMacOSXConstants.h"
/*
@@ -331,42 +331,30 @@ static unsigned isFunctionKey(unsigned int code);
}
/*
- * NSString represents a non-BMP character as a string of length 2 where
- * the first character is the high surrogate and the second character is
- * the low surrogate. We could record this in the XEvent by setting the
- * keycode to the unicode code point and setting the trans_chars to the
- * 4-byte UTF-8 string. However, that will not help as long as TCL_UTF_MAX
- * is set to 3. Until that changes, we just replace non-BMP characters by
- * the "replacement character" U+FFFD.
+ * Next we generate an XEvent for each unicode character in our string.
+ *
+ * NSString uses UTF-16 internally, which means that a non-BMP character is
+ * represented by a sequence of two 16-bit "surrogates". In principle we
+ * could record this in the XEvent by setting the keycode to the 32-bit
+ * unicode code point and setting the trans_chars string to the 4-byte
+ * UTF-8 string for the non-BMP character. However, that will not work
+ * when TCL_UTF_MAX is set to 3, as is the case for Tcl 8.6. A workaround
+ * used internally by Tcl 8.6 is to encode each surrogate as a 3-byte
+ * sequence using the UTF-8 algorithm (ignoring the fact that the UTF-8
+ * encoding specification does not allow encoding UTF-16 surrogates).
+ * This gives a 6-byte encoding of the non-BMP character which we write into
+ * the trans_chars field of the XEvent.
*/
for (i = 0; i < len; i++) {
- UniChar nextChar = [str characterAtIndex: i];
- if (CFStringIsSurrogateHighCharacter(nextChar)) {
-#if 0
- UniChar lowChar = [str characterAtIndex: ++i];
- xEvent.xkey.keycode = CFStringGetLongCharacterForSurrogatePair(
- nextChar, lowChar);
- xEvent.xkey.nbytes = TkUniCharToUtf(xEvent.xkey.keycode,
- &xEvent.xkey.trans_chars);
-#else
+ xEvent.xkey.nbytes = TclUniAtIndex(str, i, xEvent.xkey.trans_chars,
+ &xEvent.xkey.keycode);
+ if (xEvent.xkey.keycode > 0xffff){
i++;
- xEvent.xkey.keycode = 0xfffd;
- strcpy(xEvent.xkey.trans_chars, "\xef\xbf\xbd");
- xEvent.xkey.nbytes = strlen(xEvent.xkey.trans_chars);
-#endif
- } else {
- xEvent.xkey.keycode = (int) nextChar;
- [[str substringWithRange: NSMakeRange(i,1)]
- getCString: xEvent.xkey.trans_chars
- maxLength: XMaxTransChars encoding: NSUTF8StringEncoding];
- xEvent.xkey.nbytes = strlen(xEvent.xkey.trans_chars);
}
- xEvent.xany.type = KeyPress;
- releaseCode = (UInt16) nextChar;
- Tk_QueueWindowEvent(&xEvent, TCL_QUEUE_TAIL);
+ xEvent.xany.type = KeyPress;
+ Tk_QueueWindowEvent(&xEvent, TCL_QUEUE_TAIL);
}
-
releaseCode = (UInt16) [str characterAtIndex: 0];
}
@@ -642,7 +630,7 @@ XGrabKeyboard(
Time time)
{
keyboardGrabWinPtr = Tk_IdToWindow(display, grab_window);
- TkWindow *captureWinPtr = (TkWindow *) TkMacOSXGetCapture();
+ TkWindow *captureWinPtr = (TkWindow *) TkpGetCapture();
if (keyboardGrabWinPtr && captureWinPtr) {
NSWindow *w = TkMacOSXDrawableWindow(grab_window);
diff --git a/macosx/tkMacOSXPrivate.h b/macosx/tkMacOSXPrivate.h
index 68bad41..a285bba 100644
--- a/macosx/tkMacOSXPrivate.h
+++ b/macosx/tkMacOSXPrivate.h
@@ -239,6 +239,10 @@ MODULE_SCOPE int TkMacOSXServices_Init(Tcl_Interp *interp);
MODULE_SCOPE int TkMacOSXRegisterServiceWidgetObjCmd(ClientData clientData,
Tcl_Interp *interp, int objc,
Tcl_Obj *const objv[]);
+MODULE_SCOPE int TclUniAtIndex(NSString *string, int index, char *uni,
+ unsigned int *code);
+MODULE_SCOPE NSString* TclUniToNSString(const char *source, int numBytes);
+MODULE_SCOPE char* NSStringToTclUni(NSString *string, int *numBytes);
#pragma mark Private Objective-C Classes