summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@nokia.com>2010-06-14 12:45:01 (GMT)
committerLars Knoll <lars.knoll@nokia.com>2010-06-14 12:54:08 (GMT)
commit249442f44ac5ffabc91276307f8ae7a749f74eb1 (patch)
tree32edeba93b21773668f89dfacbf03e441bbbd7b2
parent9d8723745d19985409548f47da85a45c15ce5b0e (diff)
downloadQt-249442f44ac5ffabc91276307f8ae7a749f74eb1.zip
Qt-249442f44ac5ffabc91276307f8ae7a749f74eb1.tar.gz
Qt-249442f44ac5ffabc91276307f8ae7a749f74eb1.tar.bz2
Fix issues with thai line breaking
Merge latest harfbuzz: commit cce760d41f115fecd5b9b6b20b62883b10a9c204 Author: Lars Knoll <lars.knoll@nokia.com> Date: Mon Jun 14 14:14:59 2010 +0200 Fixes for thai linebreaking * Load libthai.so.0 since libthai.so is not there on all systems * Remove dependency on codecs. Unicode->TIS620 is so simple we can simply hardcode it in harbuzz-thai.c * Speed up detection of word boundaries * Falback when libthai is not found is now to not break instead of breaking after every character (in line with recommendations from unicode.org linebreaking algorithm) Reviewed-by: Simon Hausmann Adapt qharfbuzz.cpp to the changes in harfbuzz. Reviewed-by: Simon Hausmann
-rw-r--r--src/3rdparty/harfbuzz/src/harfbuzz-external.h6
-rw-r--r--src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp13
-rw-r--r--src/3rdparty/harfbuzz/src/harfbuzz-thai.c64
-rw-r--r--src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp25
-rw-r--r--src/corelib/tools/qharfbuzz.cpp34
5 files changed, 53 insertions, 89 deletions
diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-external.h b/src/3rdparty/harfbuzz/src/harfbuzz-external.h
index 760749b..7644f0d 100644
--- a/src/3rdparty/harfbuzz/src/harfbuzz-external.h
+++ b/src/3rdparty/harfbuzz/src/harfbuzz-external.h
@@ -146,11 +146,7 @@ HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch);
-void *HB_Library_Resolve(const char *library, const char *symbol);
-
-void *HB_TextCodecForMib(int mib);
-char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength);
-void HB_TextCodec_FreeResult(char *);
+void *HB_Library_Resolve(const char *library, int version, const char *symbol);
HB_END_HEADER
diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp
index bfc7bd4..ff69304 100644
--- a/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp
+++ b/src/3rdparty/harfbuzz/src/harfbuzz-shaper.cpp
@@ -183,18 +183,15 @@ static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttribute
if (ncls >= HB_LineBreak_CR)
goto next;
- // two complex chars (thai or lao), thai_attributes might override, but here we do a best guess
- if (cls == HB_LineBreak_SA && ncls == HB_LineBreak_SA) {
- lineBreakType = HB_Break;
- goto next;
- }
-
{
int tcls = ncls;
+ // for south east asian chars that require a complex (dictionary analysis), the unicode
+ // standard recommends to treat them as AL. thai_attributes and other attribute methods that
+ // do dictionary analysis can override
if (tcls >= HB_LineBreak_SA)
- tcls = HB_LineBreak_ID;
+ tcls = HB_LineBreak_AL;
if (cls >= HB_LineBreak_SA)
- cls = HB_LineBreak_ID;
+ cls = HB_LineBreak_AL;
int brk = breakTable[cls][tcls];
switch (brk) {
diff --git a/src/3rdparty/harfbuzz/src/harfbuzz-thai.c b/src/3rdparty/harfbuzz/src/harfbuzz-thai.c
index 1d1aa2f..fc2bdbf 100644
--- a/src/3rdparty/harfbuzz/src/harfbuzz-thai.c
+++ b/src/3rdparty/harfbuzz/src/harfbuzz-thai.c
@@ -27,57 +27,79 @@
#include "harfbuzz-external.h"
#include <assert.h>
+#include <stdio.h>
+
+typedef int (*th_brk_def)(const char*, int[], int);
+static th_brk_def th_brk = 0;
+static int libthai_resolved = 0;
+
+static void resolve_libthai()
+{
+ if (!th_brk)
+ th_brk = (th_brk_def)HB_Library_Resolve("thai", 0, "th_brk");
+ libthai_resolved = 1;
+}
+
+static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr)
+{
+ hb_uint32 i;
+ unsigned char *result = (unsigned char *)cstr;
+
+ for (i = 0; i < len; ++i) {
+ if (string[i] <= 0xa0)
+ result[i] = (unsigned char)string[i];
+ if (string[i] >= 0xe01 && string[i] <= 0xe5b)
+ result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
+ else
+ result[i] = '?';
+ }
+}
static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
{
- typedef int (*th_brk_def)(const char*, int[], int);
- static void *thaiCodec = 0;
- static th_brk_def th_brk = 0;
- char *cstr = 0;
+ char s[128];
+ char *cstr = s;
int brp[128];
int *break_positions = brp;
hb_uint32 numbreaks;
hb_uint32 i;
- if (!thaiCodec)
- thaiCodec = HB_TextCodecForMib(2259);
-
- /* load libthai dynamically */
- if (!th_brk && thaiCodec) {
- th_brk = (th_brk_def)HB_Library_Resolve("thai", "th_brk");
- if (!th_brk)
- thaiCodec = 0;
- }
+ if (!libthai_resolved)
+ resolve_libthai();
if (!th_brk)
return;
- cstr = HB_TextCodec_ConvertFromUnicode(thaiCodec, string, len, 0);
- if (!cstr)
- return;
+ if (len > 128)
+ cstr = (char *)malloc(len*sizeof(char));
+
+ to_tis620(string, len, cstr);
- break_positions = brp;
numbreaks = th_brk(cstr, break_positions, 128);
if (numbreaks > 128) {
break_positions = (int *)malloc(numbreaks * sizeof(int));
numbreaks = th_brk(cstr, break_positions, numbreaks);
}
- for (i = 0; i < len; ++i)
+ for (i = 0; i < len; ++i) {
attributes[i].lineBreakType = HB_NoBreak;
+ attributes[i].wordBoundary = FALSE;
+ }
for (i = 0; i < numbreaks; ++i) {
- if (break_positions[i] > 0)
+ if (break_positions[i] > 0) {
attributes[break_positions[i]-1].lineBreakType = HB_Break;
+ attributes[i].wordBoundary = TRUE;
+ }
}
if (break_positions != brp)
free(break_positions);
- HB_TextCodec_FreeResult(cstr);
+ if (len > 128)
+ free(cstr);
}
-
void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
{
assert(script == HB_Script_Thai);
diff --git a/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp b/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp
index ea03052..f0048b7 100644
--- a/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp
+++ b/src/3rdparty/harfbuzz/tests/linebreaking/harfbuzz-qt.cpp
@@ -79,30 +79,9 @@ void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme,
*lineBreak = (HB_LineBreakClass) prop->line_break_class;
}
-void *HB_Library_Resolve(const char *library, const char *symbol)
+void *HB_Library_Resolve(const char *library, int version, const char *symbol)
{
- return QLibrary::resolve(library, symbol);
-}
-
-void *HB_TextCodecForMib(int mib)
-{
- return QTextCodec::codecForMib(mib);
-}
-
-char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength)
-{
- QByteArray data = reinterpret_cast<QTextCodec *>(codec)->fromUnicode((const QChar *)unicode, length);
- // ### suboptimal
- char *output = (char *)malloc(data.length() + 1);
- memcpy(output, data.constData(), data.length() + 1);
- if (outputLength)
- *outputLength = data.length();
- return output;
-}
-
-void HB_TextCodec_FreeResult(char *string)
-{
- free(string);
+ return QLibrary::resolve(library, version, symbol);
}
}
diff --git a/src/corelib/tools/qharfbuzz.cpp b/src/corelib/tools/qharfbuzz.cpp
index 1b6d334..9166a14 100644
--- a/src/corelib/tools/qharfbuzz.cpp
+++ b/src/corelib/tools/qharfbuzz.cpp
@@ -102,45 +102,15 @@ HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch)
return QChar::mirroredChar(ch);
}
-void *HB_Library_Resolve(const char *library, const char *symbol)
+void *HB_Library_Resolve(const char *library, int version, const char *symbol)
{
#ifdef QT_NO_LIBRARY
return 0;
#else
- return QLibrary::resolve(QLatin1String(library), symbol);
+ return QLibrary::resolve(QLatin1String(library), version, symbol);
#endif
}
-void *HB_TextCodecForMib(int mib)
-{
-#ifndef QT_NO_TEXTCODEC
- return QTextCodec::codecForMib(mib);
-#else
- return 0;
-#endif
-}
-
-char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength)
-{
-#ifndef QT_NO_TEXTCODEC
- QByteArray data = reinterpret_cast<QTextCodec *>(codec)->fromUnicode((const QChar *)unicode, length);
- // ### suboptimal
- char *output = (char *)malloc(data.length() + 1);
- Q_CHECK_PTR(output);
- memcpy(output, data.constData(), data.length() + 1);
- if (outputLength)
- *outputLength = data.length();
- return output;
-#else
- return 0;
-#endif
-}
-
-void HB_TextCodec_FreeResult(char *string)
-{
- free(string);
-}
-
} // extern "C"
QT_BEGIN_NAMESPACE