summaryrefslogtreecommitdiffstats
path: root/generic/tkTextLineBreak.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tkTextLineBreak.c')
-rw-r--r--generic/tkTextLineBreak.c1010
1 files changed, 1010 insertions, 0 deletions
diff --git a/generic/tkTextLineBreak.c b/generic/tkTextLineBreak.c
new file mode 100644
index 0000000..cc51dd8
--- /dev/null
+++ b/generic/tkTextLineBreak.c
@@ -0,0 +1,1010 @@
+/*
+ * tkTextLineBreak.c --
+ *
+ * This module provides line break computation for line wrapping.
+ * It uses the library "libunibreak" (from Wu Yongwei) for the
+ * computation, but only if available (currently only UNIX), and if
+ * the language support is enabled, otherwise our own line break
+ * algorithm is used (it's a simplified version of the recommendation
+ * at http://www.unicode.org/reports/tr14/tr14-26.html).
+ *
+ * The alternative is the use of ICU library (http://site.icu-project.org/),
+ * instead of libunibreak, but this would require to support a very
+ * complex interface of a dynamic load library, with other words, we
+ * would need dozens of functions pointers. This is not really a drawback,
+ * and probably the ICU library is the better choice, but I think that a
+ * change to the ICU library is reasonable only if the Tcl/Tk developer team
+ * is deciding to use this library also for complete Unicode support (character
+ * conversion, for instance).
+ *
+ * Copyright (c) 2015-2017 Gregor Cramer
+ *
+ * See the file "license.terms" for information on usage and redistribution of
+ * this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ */
+
+#include "tkText.h"
+
+#include <ctype.h>
+#include <assert.h>
+
+#ifndef MAX
+# define MAX(a,b) (((int) a) < ((int) b) ? b : a)
+#endif
+
+
+typedef void (*ComputeBreakLocationsFunc)(
+ const unsigned char *text, size_t len, const char *lang, char *brks);
+
+static void ComputeBreakLocations(
+ const unsigned char *text, size_t len, const char *lang, char *brks);
+
+static ComputeBreakLocationsFunc libLinebreakFunc = ComputeBreakLocations;
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * GetLineBreakFunc --
+ *
+ * Return the appropriate line break function. If argument 'lang'
+ * is NULL, then our own line break alorithm will be used (fast,
+ * but a bit simple). If 'lang' is not NULL, then this function
+ * tries to load the library "libunibreak" (currently only UNIX).
+ * If the load succeeds, then set_linebreaks_utf8 will be returned,
+ * otherwise ComputeBreakLocations will be returned.
+ *
+ * Note that "libunibreak" has language specific support, but
+ * currently only for zh, ja, and ko. Nethertheless any non-NULL
+ * value for 'lang' tries to use this library.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The "libunibreak" library may be loaded, if available.
+ *
+ *----------------------------------------------------------------------
+ */
+
+#ifdef __UNIX__
+
+static int
+LoadFile(
+ Tcl_Interp *interp,
+ Tcl_Obj *pathPtr,
+ Tcl_LoadHandle *handle,
+ char const **symbols,
+ void **funcs)
+{
+ /* Keep backward compatibility to 8.5 */
+# if TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION == 5
+ return Tcl_FSLoadFile(interp, pathPtr, symbols[0], symbols[1],
+ (void *) &funcs[0], (void *) &funcs[1], handle, NULL);
+# else
+ return Tcl_LoadFile(interp, pathPtr, symbols, TCL_LOAD_GLOBAL, funcs, handle);
+# endif
+}
+
+static void
+LoadLibUnibreak(
+ Tcl_Interp *interp)
+{
+ typedef void *VoidP;
+ typedef void (*InitFunc)();
+
+ static char const *Symbols[3] = {
+ "init_linebreak",
+ "set_linebreaks_utf8",
+ NULL
+ };
+
+ VoidP Funcs[sizeof(Symbols)/sizeof(Symbols[0])];
+ Tcl_LoadHandle handle;
+ Tcl_Obj *pathPtr = Tcl_NewStringObj("libunibreak.so.1", -1);
+ bool rc;
+
+ Tcl_IncrRefCount(pathPtr);
+ rc = LoadFile(interp, pathPtr, &handle, Symbols, Funcs);
+ if (rc != TCL_OK) {
+ /*
+ * We couldn't find "libunibreak.so.1", so try the predecessor "liblinebreak.so.2".
+ */
+
+ Tcl_ResetResult(interp);
+ Tcl_DecrRefCount(pathPtr);
+ Tcl_IncrRefCount(pathPtr = Tcl_NewStringObj("liblinebreak.so.2", -1));
+ rc = LoadFile(interp, pathPtr, &handle, Symbols, Funcs);
+ }
+ Tcl_DecrRefCount(pathPtr);
+ if (rc == TCL_OK) {
+ ((InitFunc) Funcs[0])();
+ libLinebreakFunc = Funcs[1];
+ } else {
+ Tcl_ResetResult(interp);
+ }
+}
+
+#endif /* __UNIX__ */
+
+static ComputeBreakLocationsFunc
+GetLineBreakFunc(
+ Tcl_Interp *interp,
+ char const *lang)
+{
+#if TCL_UTF_MAX > 4 /* exclude non-standard encodings */
+ /*
+ * IMPORTANT NOTE:
+ *
+ * TCL_UTF_MAX > 4 is a severe violation of the standard. It's not possible
+ * anymore to use external libraries.
+ *
+ * Furthermore it is causing security problems for all systems, even for
+ * TCL themself. Example: an application is storing a pseudo UTF-8 string
+ * (generated by a TCL library with TCL_UTF_MAX > 4) into a database. Later
+ * the reader of the database fails to read/display this string, or is even
+ * crashing, because the stored string is not UTF-8 conform. With other words:
+ * the database is damaged. It is possible that with the introduction of
+ * TCL_UTF_MAX > 4 the TCL library will get the general assesment to be a
+ * safety-endagering system.
+ *
+ * Another fact: the whole UTF-8 string support of the TCL library is not
+ * reliable anymore. Example:
+ *
+ * int ch, isWordchar;
+ * TkUtfToUniChar(string, &ch);
+ * isWordchar = Tcl_UniCharIsWordChar(ch);
+ *
+ * The result of Tcl_UniCharIsWordChar() might be incorrect, because type
+ * 'int' (32 bit) will be truncated to type 'Tcl_UniChar' (16 bit). With
+ * the introduction of TCL_UTF_MAX > 4 the string handling is not reliable
+ * anymore.
+ *
+ * The whole thing with TCL_UTF_MAX > 4 is nothing else than ignorance of
+ * standards, and the willingness to endager the safety of data and applications.
+ * In general it's even not possible anymore to work with UTF-8 string in a
+ * proper way.
+ */
+
+ return NULL;
+#endif /* TCL_UTF_MAX > 4 */
+#ifdef __UNIX__
+ if (lang) {
+ static bool loaded = false;
+
+ if (!loaded) {
+ LoadLibUnibreak(interp);
+ }
+ }
+#endif
+ return libLinebreakFunc;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TkTextComputeBreakLocations --
+ *
+ * Compute break locations in UTF-8 text. This function expects
+ * a nul-terminated string (this mean that the character at position
+ * 'len' must be NUL). Thus it is also required that the break buffer
+ * 'brks' has at least size 'len+1'. If 'lang' is not NULL, then the
+ * external library linunibreak will be used for the line break
+ * computation, but only if this library is loadable, otherwise the
+ * internal algorithm will be used.
+ *
+ * Results:
+ * The computed break locations. This function returns 'true' if
+ * the external linebreak library has been used for the computation,
+ * otherwise 'false' will be returned.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+bool
+TkTextComputeBreakLocations(
+ Tcl_Interp *interp,
+ const char *text, /* must be nul-terminated */
+ unsigned len, /* without trailing nul byte */
+ const char *lang, /* can be NULL */
+ char *brks)
+{
+ ComputeBreakLocationsFunc func;
+ int lastBreakablePos = -1;
+ unsigned i;
+
+ assert(text);
+ assert(brks);
+ assert(text[len] == '\0');
+ assert(!lang || (isalpha(lang[0]) && isalpha(lang[1]) && !lang[2]));
+
+ func = GetLineBreakFunc(interp, lang);
+
+ /*
+ * The algorithm don't give us a break value for the last character if we do
+ * not include the final nul char into the computation.
+ */
+
+ len += 1;
+ (*func)((const unsigned char *) text, len, lang, brks);
+ len -= 1;
+
+ for (i = 0; i < len; ++i) {
+ switch (brks[i]) {
+ case LINEBREAK_MUSTBREAK:
+ break;
+ case LINEBREAK_ALLOWBREAK:
+ if (text[i] == '-') {
+ if (brks[i] == LINEBREAK_ALLOWBREAK) {
+ /*
+ * Fix the problem with the contextual hyphen-minus sign, the implementation of
+ * libunibreak has (possibly) forgotten this case.
+ *
+ * The HYPHEN-MINUS (U+002D) needs special context treatment. For simplicity we
+ * will only check whether we have two preceding, and two succeeding letters.
+ * TODO: Is there a better method for the decision?
+ */
+
+ const char *r = text + i;
+ const char *p, *q, *s;
+ Tcl_UniChar uc;
+ bool allow = false;
+
+ q = Tcl_UtfPrev(r, text);
+ if (q != r) {
+ Tcl_UtfToUniChar(q, &uc);
+ if (Tcl_UniCharIsAlpha(uc)) {
+ p = Tcl_UtfPrev(q, text);
+ if (p != q) {
+ Tcl_UtfToUniChar(p, &uc);
+ if (Tcl_UniCharIsAlpha(uc)) {
+ s = r + 1;
+ s += Tcl_UtfToUniChar(s, &uc);
+ if (Tcl_UniCharIsAlpha(uc)) {
+ Tcl_UtfToUniChar(s, &uc);
+ if (Tcl_UniCharIsAlpha(uc)) {
+ allow = true;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (!allow) {
+ brks[i] = LINEBREAK_NOBREAK;
+ }
+ }
+ } else if (text[i] == '/' && i > 8) {
+ /*
+ * Ignore the breaking chance if there is a chance immediately before:
+ * no break inside "c/o", and no break after "http://" in a long line
+ * (a suggestion from Wu Yongwei).
+ */
+
+ if (lastBreakablePos >= (int) i - 2
+ || (i > 40u && lastBreakablePos >= (int) i - 7 && text[i - 1] == '/')) {
+ continue;
+ }
+
+ /*
+ * Special rule to treat Unix paths more nicely (a suggestion from Wu Yongwei).
+ */
+
+ if (i < len - 1 && text[i + 1] != ' ' && text[i - 1] == ' ') {
+ lastBreakablePos = i - 1;
+ continue;
+ }
+ }
+ lastBreakablePos = i;
+ break;
+ case LINEBREAK_INSIDEACHAR:
+ break;
+ }
+ }
+
+ return func != ComputeBreakLocations;
+}
+
+/*
+ * The following is implementing the recommendations at
+ * http://www.unicode.org/reports/tr14/tr14-26.html, but simplified -
+ * no language specific support, not all the rules (especially no
+ * combining marks), and mostly restricted to Latin-1 and relevant
+ * letters not belonging to specific languages. For a more sophisticated
+ * line break algorithm the library "libunibreak" (from Wu Yongwei)
+ * should be used.
+ */
+
+typedef enum {
+ /* Note that CR, LF, and NL will be interpreted as BK, so only BK is used. */
+ AI, AL, B2, BA, BB, BK, CL, CP, EX, GL, HY, IN, IS, NS, NU, OP, PO, PR, QU, SP, SY, WJ, ZW
+} LBClass;
+
+#define __ AI
+
+/*
+ * Changes in table below (different from Unicode recommendation):
+ *
+ * 0a: CB -> BK (LINE FEED)
+ * 0d: CR -> BK (CARRIAGE RETURN)
+ * 0e: XX -> BK (SHIFT OUT)
+ * 23: AL -> IN (NUMBER SIGN)
+ * 26: AL -> BB (AMPERSAND)
+ * 3d: AL -> GL (EQUALS SIGN)
+ * 60: CM -> AL (GRAVE ACCENT)
+ */
+
+static const char Table_0000[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, BA, BK, BK, BK, BK, BK, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ SP, EX, QU, IN, PR, PO, BB, QU, OP, CP, AL, PR, IS, HY, IS, SY, /* 20 - 2f */
+/* 3 */ NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, IS, IS, AL, GL, AL, EX, /* 30 - 3f */
+/* 4 */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 40 - 4f */
+/* 5 */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, OP, PR, CP, AL, AL, /* 50 - 5f */
+/* 6 */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, /* 60 - 6f */
+/* 7 */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, OP, BA, CL, AL, __, /* 70 - 7f */
+/* 8 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 80 - 8f */
+/* 9 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 90 - 9f */
+/* a */ GL, OP, PO, PR, PR, PR, AL, AL, AL, AL, __, QU, __, __, AL, AL, /* a0 - af */
+/* b */ PO, PR, AL, AL, BB, __, AL, AL, AL, AL, __, __, AL, AL, AL, OP, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+/*
+ * Changes in table below (different from Unicode recommendation):
+ *
+ * e2 80 89: BA -> WJ (THIN SPACE)
+ * e2 80 0a: BA -> WJ (HAIR SPACE)
+ */
+
+static const char Table_E280[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ BA, BA, BA, BA, BA, BA, BA, GL, BA, __, __, ZW, __, __, __, __, /* 80 - 8f */
+/* 9 */ BA, AL, BA, BA, B2, AL, AL, AL, QU, QU, OP, QU, QU, QU, OP, QU, /* 90 - 9f */
+/* a */ AL, AL, AL, AL, IN, IN, IN, BA, BK, BK, __, __, __, __, __, GL, /* a0 - af */
+/* b */ PO, PO, PO, PO, PO, PO, PO, PO, AL, QU, QU, AL, NS, NS, AL, AL, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_E281[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ AL, AL, AL, AL, IS, OP, CL, NS, NS, NS, AL, AL, AL, AL, AL, AL, /* 80 - 8f */
+/* 9 */ AL, AL, __, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, __, /* 90 - 9f */
+/* a */ WJ, AL, AL, AL, AL, __, __, __, __, __, __, __, __, __, __, __, /* a0 - af */
+/* b */ __, __, __, __, __, __, __, __, __, __, __, __, __, OP, CL, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_E282[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, __, __, __, __, __, __, __, __, __, __, __, __, CL, CL, __, /* 80 - 8f */
+/* 9 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 90 - 9f */
+/* a */ PR, PR, PR, PR, PR, PR, PR, PO, PR, PR, PR, PR, PR, PR, PR, PR, /* a0 - af */
+/* b */ PR, PR, PR, PR, PR, PR, PR, PR, PR, PR, PR, PR, PR, PR, PR, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_E28C[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, __, __, __, __, __, __, __, OP, CL, OP, CL, __, __, __, __, /* 80 - 8f */
+/* 9 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 90 - 9f */
+/* a */ __, __, __, __, __, __, __, __, __, OP, CL, __, __, __, __, __, /* a0 - af */
+/* b */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_E29D[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 80 - 8f */
+/* 9 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 90 - 9f */
+/* a */ __, __, __, __, __, __, __, __, OP, CL, OP, CL, OP, CL, OP, CL, /* a0 - af */
+/* b */ OP, CL, OP, CL, OP, CL, __, __, __, __, __, __, __, __, __, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_E29F[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, __, __, __, __, OP, CL, __, __, __, __, __, __, __, __, __, /* 80 - 8f */
+/* 9 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 90 - 9f */
+/* a */ __, __, __, __, __, __, OP, CL, OP, CL, OP, CL, OP, CL, OP, CL, /* a0 - af */
+/* b */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_E2A6[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, __, __, OP, CL, OP, CL, OP, CL, OP, CL, OP, CL, OP, CL, OP, /* 80 - 8f */
+/* 9 */ CL, OP, CL, OP, CL, OP, CL, OP, CL, __, __, __, __, __, __, __, /* 90 - 9f */
+/* a */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* a0 - af */
+/* b */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_E2A7[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 80 - 8f */
+/* 9 */ __, __, __, __, __, __, __, __, OP, CL, OP, CL, __, __, __, __, /* 90 - 9f */
+/* a */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* a0 - af */
+/* b */ __, __, __, __, __, __, __, __, __, __, __, __, OP, CL, __, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_E2B8[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ AL, AL, QU, QU, QU, QU, AL, AL, AL, QU, QU, AL, QU, QU, AL, AL, /* 80 - 8f */
+/* 9 */ AL, AL, AL, AL, AL, AL, AL, AL, OP, AL, AL, AL, QU, QU, AL, AL, /* 90 - 9f */
+/* a */ QU, QU, OP, CL, OP, CL, OP, CL, OP, CL, AL, AL, AL, AL, AL, __, /* a0 - af */
+/* b */ AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, B2, B2, AL, AL, AL, AL, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_E380[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, CL, CL, AL, __, NS, __, __, OP, CL, OP, CL, OP, CL, OP, CL, /* 80 - 8f */
+/* 9 */ OP, CL, __, __, OP, CL, OP, CL, OP, CL, OP, CL, NS, OP, CL, CL, /* 90 - 9f */
+/* a */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* a0 - af */
+/* b */ AL, __, __, __, __, __, __, __, __, __, __, NS, NS, AL, __, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_EFB8[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 80 - 8f */
+/* 9 */ IS, CL, CL, IS, IS, AL, AL, OP, CL, IN, __, __, __, __, __, __, /* 90 - 9f */
+/* a */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* a0 - af */
+/* b */ AL, AL, AL, AL, AL, OP, CL, OP, CL, OP, CL, OP, CL, OP, CL, OP, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_EFB9[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ CL, OP, CL, OP, CL, AL, AL, OP, CL, AL, AL, AL, AL, AL, AL, AL, /* 80 - 8f */
+/* 9 */ CL, CL, CL, __, NS, NS, AL, AL, B2, OP, CL, OP, CL, OP, CL, AL, /* 90 - 9f */
+/* a */ AL, AL, __, B2, __, __, __, __, AL, PR, PO, AL, __, __, __, __, /* a0 - af */
+/* b */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_EFBC[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, AL, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, EX, AL, AL, PR, PO, AL, AL, OP, CL, AL, __, CL, B2, CL, AL, /* 80 - 8f */
+/* 9 */ NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, NS, NS, __, __, __, EX, /* 90 - 9f */
+/* a */ AL, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* a0 - af */
+/* b */ __, __, __, __, __, __, __, __, __, __, __, OP, AL, CL, __, __, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+static const char Table_EFBD[256] = {
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+/* 0 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 00 - 0f */
+/* 1 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 10 - 1f */
+/* 2 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 20 - 2f */
+/* 3 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 30 - 3f */
+/* 4 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 40 - 4f */
+/* 5 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 50 - 5f */
+/* 6 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 60 - 6f */
+/* 7 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 70 - 7f */
+/* 8 */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* 80 - 8f */
+/* 9 */ __, __, __, __, __, __, __, __, __, __, __, OP, __, CL, __, OP, /* 90 - 9f */
+/* a */ CL, CL, OP, CL, CL, AL, __, __, __, __, __, __, __, __, __, __, /* a0 - af */
+/* b */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, WJ, /* b0 - bf */
+/* c */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* c0 - cf */
+/* d */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* d0 - df */
+/* e */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* e0 - ef */
+/* f */ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, /* f0 - ff */
+/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+};
+
+#undef __
+
+#define PROHIBITED LINEBREAK_NOBREAK
+#define DIRECT LINEBREAK_ALLOWBREAK
+#define INDIRECT ((char) (~LINEBREAK_NOBREAK & ~LINEBREAK_ALLOWBREAK & 0x7f))
+
+#define X PROHIBITED /* B ^ A === B SP* × A */
+#define i INDIRECT /* B % A === B × A and B SP+ ÷ A */
+#define _ DIRECT /* B ÷ A */
+
+/* Note that BK, SP will no be used for lookup. */
+static const char BrkPairTable[23][23] = {
+/* AI AL B2 BA BB BK CL CP EX GL HY IN IS NS NU OP PO PR QU SP SY WJ ZW */
+/* AI */ { X, X, _, i, _, _, X, X, X, i, i, i, X, i, i, i, _, _, i, _, X, X, X }, /* AI */
+/* AL */ { i, i, _, i, _, _, X, X, X, i, i, i, X, i, i, i, _, _, i, _, X, X, X }, /* AL */
+/* B2 */ { _, _, _, i, _, _, X, X, X, i, i, _, X, i, _, _, _, _, i, _, X, X, X }, /* B2 */
+/* BA */ { _, _, _, i, _, _, X, X, X, i, i, _, X, i, _, _, _, _, i, _, X, X, X }, /* BA */
+/* BB */ { i, i, i, i, i, _, X, X, X, _, i, i, X, i, i, i, i, i, i, _, X, X, X }, /* BB */
+/* BK */ { _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ }, /* BK */
+/* CL */ { _, _, _, i, _, _, X, X, X, i, i, _, X, X, _, _, i, i, i, _, X, X, X }, /* CL */
+/* CP */ { i, i, _, i, _, _, X, X, X, i, i, _, X, X, i, _, i, i, i, _, X, X, X }, /* CP */
+/* EX */ { _, _, _, i, _, _, X, X, X, i, i, _, X, i, _, _, _, _, i, _, X, X, X }, /* EX */
+/* GL */ { i, i, i, i, i, _, X, X, X, i, i, i, X, i, i, i, i, i, i, _, X, X, X }, /* GL */
+/* HY */ { _, _, _, i, _, _, X, X, X, _, i, _, X, i, i, _, _, _, i, _, X, X, X }, /* HY */
+/* IN */ { _, _, _, i, _, _, X, X, X, i, i, i, X, i, _, _, _, _, i, _, X, X, X }, /* IN */
+/* IS */ { i, i, _, i, _, _, X, X, X, i, i, _, X, i, i, _, _, _, i, _, X, X, X }, /* IS */
+/* NS */ { _, _, _, i, _, _, X, X, X, i, i, _, X, i, _, _, _, _, i, _, X, X, X }, /* NS */
+/* NU */ { i, i, _, i, _, _, X, X, X, i, i, i, X, i, i, i, i, i, i, _, X, X, X }, /* NU */
+/* OP */ { X, X, X, X, X, _, X, X, X, X, X, X, X, X, X, X, X, X, X, _, X, X, X }, /* OP */
+/* PO */ { i, i, _, i, _, _, X, X, X, i, i, _, X, i, i, i, _, _, i, _, X, X, X }, /* PO */
+/* PR */ { _, i, _, i, _, _, X, X, X, i, i, _, X, i, i, i, _, _, i, _, X, X, X }, /* PR */
+/* QU */ { i, i, i, i, i, _, X, X, X, i, i, i, X, i, i, X, i, i, i, _, X, X, X }, /* QU */
+/* SP */ { _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ }, /* SP */
+/* SY */ { _, _, _, i, _, _, X, X, X, i, i, _, X, i, i, _, _, _, i, _, X, X, X }, /* SY */
+/* WJ */ { i, i, i, i, i, _, X, X, X, i, i, i, X, i, i, i, i, i, i, _, X, X, X }, /* WJ */
+/* ZW */ { _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, X }, /* ZW */
+/* AI AL B2 BA BB BK CL CP EX GL HY IN IS NS NU OP PO PR QU SP SY WJ ZW */
+};
+
+#undef _
+#undef i
+#undef X
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * ComputeBreakLocations --
+ *
+ * Compute break locations in UTF-8 text. This function is doing
+ * the same as set_linebreaks_utf8 (from "libunibreak"), but this
+ * function is using a simplified line break algorithm, although
+ * it is following the recommendations at
+ * http://www.unicode.org/reports/tr14/tr14-26.html.
+ *
+ * Note that this functions expects that the whole line will be
+ * parsed at once. This interface corresponds to the interface
+ * of the linebreak library. Of course, such a design is a bit
+ * unluckily.
+ *
+ * Results:
+ * The computed break locations, in 'brks'. This array must be as
+ * large as the input length (specified by 'len').
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static void
+ComputeBreakLocations(
+ const unsigned char *text,
+ size_t len,
+ const char *lang,
+ char *brks)
+{
+ size_t i;
+ size_t nbytes;
+ size_t nletters;
+ size_t brkIndex;
+ LBClass cls;
+ LBClass prevCls;
+
+ if (len == 0) {
+ return;
+ }
+
+ i = 0;
+ nletters = 0;
+ brkIndex = 0;
+ cls = BK;
+ prevCls = WJ;
+ brks[len - 1] = LINEBREAK_MUSTBREAK;
+
+ while (i < len) {
+ unsigned char ch;
+ LBClass pcls;
+
+ ch = text[i];
+
+ if (ch < 0x80) {
+ pcls = Table_0000[ch];
+ nbytes = 1;
+ } else if ((ch & 0xe0) == 0xc0) {
+ pcls = AI;
+ switch (ch) {
+ case 0xc2:
+ switch (UCHAR(text[i + 1])) {
+ case 0x85: pcls = BK; break; /* NL */
+ case 0xac: pcls = AL; break;
+ case 0xad: pcls = BA; break;
+ case 0xb1: pcls = AL; break;
+ case 0xbb: pcls = QU; break;
+ }
+ break;
+ case 0xc3:
+ case 0xc4:
+ case 0xc5:
+ case 0xc6:
+ case 0xc7:
+ case 0xc8:
+ case 0xc9:
+ ch = text[i + 1];
+ if (0x80 <= ch && ch <= 0xbf) {
+ pcls = AL;
+ }
+ break;
+ case 0xca:
+ ch = text[i + 1];
+ if (0x80 <= ch && ch <= 0xaf) {
+ pcls = AL;
+ }
+ break;
+ case 0xcb:
+ switch (UCHAR(text[i + 1])) {
+ case 0x88: /* fallthru */
+ case 0x8c: /* fallthru */
+ case 0x9f: pcls = BB; break;
+ }
+ break;
+ case 0xcd:
+ if (UCHAR(text[i + 1]) == 0x8f) {
+ pcls = GL;
+ }
+ break;
+ case 0xd7:
+ if (UCHAR(text[i + 1]) == 0x86) {
+ pcls = EX;
+ }
+ break;
+ case 0xdf:
+ if (UCHAR(text[i + 1]) == 0xb8) {
+ pcls = IS;
+ }
+ break;
+ }
+ nbytes = 2;
+ brks[i] = LINEBREAK_INSIDEACHAR;
+ } else if ((ch & 0xf0) == 0xe0) {
+ pcls = AI;
+ switch (ch) {
+ case 0xe2:
+ switch (UCHAR(text[i + 1])) {
+ case 0x80: pcls = Table_E280[UCHAR(text[i + 2])]; break;
+ case 0x81: pcls = Table_E281[UCHAR(text[i + 2])]; break;
+ case 0x82: pcls = Table_E282[UCHAR(text[i + 2])]; break;
+ case 0x8c: pcls = Table_E28C[UCHAR(text[i + 2])]; break;
+ case 0x9d: pcls = Table_E29D[UCHAR(text[i + 2])]; break;
+ case 0x9f: pcls = Table_E29F[UCHAR(text[i + 2])]; break;
+ case 0xa6: pcls = Table_E2A6[UCHAR(text[i + 2])]; break;
+ case 0xa7: pcls = Table_E2A7[UCHAR(text[i + 2])]; break;
+ case 0xb8: pcls = Table_E2B8[UCHAR(text[i + 2])]; break;
+ case 0x84:
+ switch (UCHAR(text[i + 2])) {
+ case 0x83: /* fallthru */
+ case 0x89: pcls = PO; break;
+ case 0x96: pcls = PR; break;
+ }
+ break;
+ case 0x88:
+ switch (UCHAR(text[i + 2])) {
+ case 0x92: /* fallthru */
+ case 0x93: pcls = PR; break;
+ }
+ break;
+ case 0xb9:
+ switch (UCHAR(text[i + 2])) {
+ case 0x80: pcls = B2; break;
+ case 0x81: pcls = AL; break;
+ case 0x82: pcls = OP; break;
+ }
+ break;
+ }
+ break;
+ case 0xe3:
+ if (UCHAR(text[i + 1]) == 0x80) {
+ pcls = Table_E380[UCHAR(text[i + 2])];
+ }
+ break;
+ case 0xef:
+ switch (UCHAR(text[i + 1])) {
+ case 0xb8: pcls = Table_EFB8[UCHAR(text[i + 2])]; break;
+ case 0xb9: pcls = Table_EFB9[UCHAR(text[i + 2])]; break;
+ case 0xbc: pcls = Table_EFBC[UCHAR(text[i + 2])]; break;
+ case 0xbd: pcls = Table_EFBD[UCHAR(text[i + 2])]; break;
+ case 0xb4:
+ switch (UCHAR(text[i + 2])) {
+ case 0xbe: pcls = CL; break;
+ case 0xbf: pcls = OP; break;
+ }
+ break;
+ case 0xbb:
+ if (UCHAR(text[i + 2]) == 0xbf) {
+ pcls = WJ; /* ZWNBSP (deprecated word joiner) */
+ }
+ break;
+ case 0xbf:
+ switch (UCHAR(text[i + 2])) {
+ case 0xa0: pcls = PO; break;
+ case 0xa1: /* fallthru */
+ case 0xa5: /* fallthru */
+ case 0xa6: pcls = PR; break;
+ }
+ break;
+ }
+ break;
+ }
+ nbytes = 3;
+ brks[i + 0] = LINEBREAK_INSIDEACHAR;
+ brks[i + 1] = LINEBREAK_INSIDEACHAR;
+ } else if ((ch & 0xf8) == 0xf0) {
+ pcls = AI;
+ nbytes = 4;
+ brks[i + 0] = LINEBREAK_INSIDEACHAR;
+ brks[i + 1] = LINEBREAK_INSIDEACHAR;
+ brks[i + 2] = LINEBREAK_INSIDEACHAR;
+ } else {
+#if TCL_UTF_MAX > 4
+ /*
+ * NOTE: For any reason newer TCL versions will allow > 4 bytes. I cannot
+ * understand this decision, this is not conform to UTF-8 standard.
+ * Moreover this decision is introducing severe compatibility problems.
+ * BTW: TCL_UTF_MAX>4 nothing else than a bad hack. If TCL want's to support
+ * full Unicode range, a proper implementation is required.
+ * This corrupted encoding will use the fallback handling.
+ */
+#endif /* TCL_UTF_MAX > 4 */
+ /*
+ * This fallback is required, because ths current character conversion
+ * algorithm in Tcl library is producing overlong sequences (a violation
+ * of the UTF-8 standard). This observation has been reported to the
+ * Tcl/Tk team, but the response was ignorance.
+ */
+
+ unsigned k;
+ const char *p = (const char *) text + i;
+
+ pcls = AI;
+ nbytes = Tcl_UtfNext(p) - p;
+ for (k = 0; k < nbytes; ++k) {
+ brks[i + k] = LINEBREAK_INSIDEACHAR;
+ }
+ }
+
+ if (i == 0) {
+ if ((cls = pcls) == SP) {
+ /* treat SP at start of input as if it followed a WJ */
+ prevCls = cls = WJ;
+ }
+ } else {
+ switch (pcls) {
+ case BK:
+ brks[i - 1] = LINEBREAK_NOBREAK;
+ brks[i] = LINEBREAK_MUSTBREAK;
+ prevCls = WJ;
+ return;
+ case SP:
+ /* handle spaces explicitly; do not update cls */
+ if (i > 0) {
+ brks[i - 1] = LINEBREAK_NOBREAK;
+ prevCls = SP;
+ } else {
+ prevCls = WJ;
+ }
+ nletters = 0;
+ break;
+ case HY: {
+ char brk = BrkPairTable[cls][HY];
+
+ /*
+ * The HYPHEN-MINUS (U+002D) needs special context treatment. For simplicity we
+ * will only check whether we have two preceding, and two succeeding letters.
+ * TODO: Is there a better method for the decision?
+ */
+
+ brks[i - 1] = LINEBREAK_NOBREAK;
+ cls = pcls;
+
+ if (brk == INDIRECT) {
+ prevCls = pcls;
+ } else {
+ prevCls = WJ;
+
+ if (brk == LINEBREAK_ALLOWBREAK && nletters >= 2) {
+ brkIndex = i - 1;
+ }
+ }
+ nletters = 0;
+ break;
+ }
+ default: {
+ char brk = BrkPairTable[cls][pcls];
+
+ if (brk == INDIRECT) {
+ brk = (prevCls == SP) ? LINEBREAK_ALLOWBREAK : LINEBREAK_NOBREAK;
+ prevCls = pcls;
+ } else {
+ prevCls = WJ;
+ }
+ brks[i - 1] = brk;
+ cls = pcls;
+
+ if (pcls == AL) {
+ nletters += 1;
+
+ if (brkIndex && nletters >= 2) {
+ brks[brkIndex] = LINEBREAK_ALLOWBREAK;
+ brkIndex = 0;
+ }
+ } else {
+ nletters = 0;
+ }
+ break;
+ }
+ }
+ }
+
+ i += nbytes;
+ }
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 105
+ * End:
+ * vi:set ts=8 sw=4:
+ */