From 249442f44ac5ffabc91276307f8ae7a749f74eb1 Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Mon, 14 Jun 2010 14:45:01 +0200 Subject: Fix issues with thai line breaking Merge latest harfbuzz: commit cce760d41f115fecd5b9b6b20b62883b10a9c204 Author: Lars Knoll Date: Mon Jun 14 14:14:59 2010 +0200 Fixes for thai linebreaking * Load libthai.so.0 since libthai.so is not there on all systems * Remove dependency on codecs. Unicode->TIS620 is so simple we can simply hardcode it in harbuzz-thai.c * Speed up detection of word boundaries * Falback when libthai is not found is now to not break instead of breaking after every character (in line with recommendations from unicode.org linebreaking algorithm) Reviewed-by: Simon Hausmann Adapt qharfbuzz.cpp to the changes in harfbuzz. Reviewed-by: Simon Hausmann --- src/3rdparty/harfbuzz/src/harfbuzz-external.h | 6 +- src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp | 13 ++--- src/3rdparty/harfbuzz/src/harfbuzz-thai.c | 64 +++++++++++++++------- .../harfbuzz/tests/linebreaking/harfbuzz-qt.cpp | 25 +-------- src/corelib/tools/qharfbuzz.cpp | 34 +----------- 5 files changed, 53 insertions(+), 89 deletions(-) diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-external.h b/src/3rdparty/harfbuzz/src/harfbuzz-external.h index 760749b..7644f0d 100644 --- a/src/3rdparty/harfbuzz/src/harfbuzz-external.h +++ b/src/3rdparty/harfbuzz/src/harfbuzz-external.h @@ -146,11 +146,7 @@ HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch); int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch); HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch); -void *HB_Library_Resolve(const char *library, const char *symbol); - -void *HB_TextCodecForMib(int mib); -char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength); -void HB_TextCodec_FreeResult(char *); +void *HB_Library_Resolve(const char *library, int version, const char *symbol); HB_END_HEADER diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp index bfc7bd4..ff69304 100644 --- a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp +++ b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp @@ -183,18 +183,15 @@ static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttribute if (ncls >= HB_LineBreak_CR) goto next; - // two complex chars (thai or lao), thai_attributes might override, but here we do a best guess - if (cls == HB_LineBreak_SA && ncls == HB_LineBreak_SA) { - lineBreakType = HB_Break; - goto next; - } - { int tcls = ncls; + // for south east asian chars that require a complex (dictionary analysis), the unicode + // standard recommends to treat them as AL. thai_attributes and other attribute methods that + // do dictionary analysis can override if (tcls >= HB_LineBreak_SA) - tcls = HB_LineBreak_ID; + tcls = HB_LineBreak_AL; if (cls >= HB_LineBreak_SA) - cls = HB_LineBreak_ID; + cls = HB_LineBreak_AL; int brk = breakTable[cls][tcls]; switch (brk) { diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-thai.c b/src/3rdparty/harfbuzz/src/harfbuzz-thai.c index 1d1aa2f..fc2bdbf 100644 --- a/src/3rdparty/harfbuzz/src/harfbuzz-thai.c +++ b/src/3rdparty/harfbuzz/src/harfbuzz-thai.c @@ -27,57 +27,79 @@ #include "harfbuzz-external.h" #include +#include + +typedef int (*th_brk_def)(const char*, int[], int); +static th_brk_def th_brk = 0; +static int libthai_resolved = 0; + +static void resolve_libthai() +{ + if (!th_brk) + th_brk = (th_brk_def)HB_Library_Resolve("thai", 0, "th_brk"); + libthai_resolved = 1; +} + +static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr) +{ + hb_uint32 i; + unsigned char *result = (unsigned char *)cstr; + + for (i = 0; i < len; ++i) { + if (string[i] <= 0xa0) + result[i] = (unsigned char)string[i]; + if (string[i] >= 0xe01 && string[i] <= 0xe5b) + result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0); + else + result[i] = '?'; + } +} static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes) { - typedef int (*th_brk_def)(const char*, int[], int); - static void *thaiCodec = 0; - static th_brk_def th_brk = 0; - char *cstr = 0; + char s[128]; + char *cstr = s; int brp[128]; int *break_positions = brp; hb_uint32 numbreaks; hb_uint32 i; - if (!thaiCodec) - thaiCodec = HB_TextCodecForMib(2259); - - /* load libthai dynamically */ - if (!th_brk && thaiCodec) { - th_brk = (th_brk_def)HB_Library_Resolve("thai", "th_brk"); - if (!th_brk) - thaiCodec = 0; - } + if (!libthai_resolved) + resolve_libthai(); if (!th_brk) return; - cstr = HB_TextCodec_ConvertFromUnicode(thaiCodec, string, len, 0); - if (!cstr) - return; + if (len > 128) + cstr = (char *)malloc(len*sizeof(char)); + + to_tis620(string, len, cstr); - break_positions = brp; numbreaks = th_brk(cstr, break_positions, 128); if (numbreaks > 128) { break_positions = (int *)malloc(numbreaks * sizeof(int)); numbreaks = th_brk(cstr, break_positions, numbreaks); } - for (i = 0; i < len; ++i) + for (i = 0; i < len; ++i) { attributes[i].lineBreakType = HB_NoBreak; + attributes[i].wordBoundary = FALSE; + } for (i = 0; i < numbreaks; ++i) { - if (break_positions[i] > 0) + if (break_positions[i] > 0) { attributes[break_positions[i]-1].lineBreakType = HB_Break; + attributes[i].wordBoundary = TRUE; + } } if (break_positions != brp) free(break_positions); - HB_TextCodec_FreeResult(cstr); + if (len > 128) + free(cstr); } - void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes) { assert(script == HB_Script_Thai); diff --git a/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp b/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp index ea03052..f0048b7 100644 --- a/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp +++ b/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp @@ -79,30 +79,9 @@ void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme, *lineBreak = (HB_LineBreakClass) prop->line_break_class; } -void *HB_Library_Resolve(const char *library, const char *symbol) +void *HB_Library_Resolve(const char *library, int version, const char *symbol) { - return QLibrary::resolve(library, symbol); -} - -void *HB_TextCodecForMib(int mib) -{ - return QTextCodec::codecForMib(mib); -} - -char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength) -{ - QByteArray data = reinterpret_cast(codec)->fromUnicode((const QChar *)unicode, length); - // ### suboptimal - char *output = (char *)malloc(data.length() + 1); - memcpy(output, data.constData(), data.length() + 1); - if (outputLength) - *outputLength = data.length(); - return output; -} - -void HB_TextCodec_FreeResult(char *string) -{ - free(string); + return QLibrary::resolve(library, version, symbol); } } diff --git a/src/corelib/tools/qharfbuzz.cpp b/src/corelib/tools/qharfbuzz.cpp index 1b6d334..9166a14 100644 --- a/src/corelib/tools/qharfbuzz.cpp +++ b/src/corelib/tools/qharfbuzz.cpp @@ -102,45 +102,15 @@ HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch) return QChar::mirroredChar(ch); } -void *HB_Library_Resolve(const char *library, const char *symbol) +void *HB_Library_Resolve(const char *library, int version, const char *symbol) { #ifdef QT_NO_LIBRARY return 0; #else - return QLibrary::resolve(QLatin1String(library), symbol); + return QLibrary::resolve(QLatin1String(library), version, symbol); #endif } -void *HB_TextCodecForMib(int mib) -{ -#ifndef QT_NO_TEXTCODEC - return QTextCodec::codecForMib(mib); -#else - return 0; -#endif -} - -char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength) -{ -#ifndef QT_NO_TEXTCODEC - QByteArray data = reinterpret_cast(codec)->fromUnicode((const QChar *)unicode, length); - // ### suboptimal - char *output = (char *)malloc(data.length() + 1); - Q_CHECK_PTR(output); - memcpy(output, data.constData(), data.length() + 1); - if (outputLength) - *outputLength = data.length(); - return output; -#else - return 0; -#endif -} - -void HB_TextCodec_FreeResult(char *string) -{ - free(string); -} - } // extern "C" QT_BEGIN_NAMESPACE -- cgit v0.12