From 08dbc36a09e79fa07204164281030320db96926c Mon Sep 17 00:00:00 2001 From: John Tapsell Date: Wed, 8 Feb 2012 10:12:16 +0000 Subject: QTextEngine - treat a fullstop (0x2E) as the same script as the preceeding text when dividing up strings Many languages use a fullstop to indicate an abbreviation, making the fullstop part of the word. For languages like thai, it is required to pass the fullstop along for correct word breaking. Change-Id: Ideded63432d06a1ab3b786a7bd13356f2cc1a090 Reviewed-by: Lars Knoll --- src/gui/text/qtextengine.cpp | 15 ++++++- .../qtextscriptengine/tst_qtextscriptengine.cpp | 49 +++++++++++++++++++--- 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/src/gui/text/qtextengine.cpp b/src/gui/text/qtextengine.cpp index bd66689..effb6e1 100644 --- a/src/gui/text/qtextengine.cpp +++ b/src/gui/text/qtextengine.cpp @@ -114,7 +114,20 @@ private: return; const int end = start + length; for (int i = start + 1; i < end; ++i) { - if ((m_analysis[i] == m_analysis[start]) + // According to the unicode spec we should be treating characters in the Common script + // (punctuation, spaces, etc) as being the same script as the surrounding text for the + // purpose of splitting up text. This is important because, for example, a fullstop + // (0x2E) can be used to indicate an abbreviation and so must be treated as part of a + // word. Thus it must be passed along with the word in languages that have to calculate + // word breaks. For example the thai word "ครม." has no word breaks but the word "ครม" + // does. + // Unfortuntely because we split up the strings for both wordwrapping and for setting + // the font and because Japanese and Chinese are also aliases of the script "Common", + // doing this would break too many things. So instead we only pass the full stop + // along, and nothing else. + if (m_analysis[i].bidiLevel == m_analysis[start].bidiLevel + && m_analysis[i].flags == m_analysis[start].flags + && (m_analysis[i].script == m_analysis[start].script || m_string[i] == QLatin1Char('.')) && m_analysis[i].flags < QScriptAnalysis::SpaceTabOrObject && i - start < MaxItemLength) continue; diff --git a/tests/auto/qtextscriptengine/tst_qtextscriptengine.cpp b/tests/auto/qtextscriptengine/tst_qtextscriptengine.cpp index 83f8cd0..3c20490 100644 --- a/tests/auto/qtextscriptengine/tst_qtextscriptengine.cpp +++ b/tests/auto/qtextscriptengine/tst_qtextscriptengine.cpp @@ -114,6 +114,8 @@ private slots: void thaiIsolatedSaraAm(); void thaiWithZWJ(); + void thaiLineSplitting(); + void thaiSaraAM(); }; tst_QTextScriptEngine::tst_QTextScriptEngine() @@ -1267,7 +1269,7 @@ void tst_QTextScriptEngine::thaiIsolatedSaraAm() void tst_QTextScriptEngine::thaiWithZWJ() { - QString s(QString::fromUtf8("ร‍ร‌ร“ร…ร”ร\xA0ร本ร") + QChar(0x0363)/*superscript 'a', for testing Inherited class*/); + QString s(QString::fromUtf8("ร‍ร‌.ร.“ร…ร”ร\xA0ร本ร") + QChar(0x0363)/*superscript 'a', for testing Inherited class*/); QTextLayout layout(s); layout.beginLayout(); layout.createLine(); @@ -1280,7 +1282,7 @@ void tst_QTextScriptEngine::thaiWithZWJ() // The current implementation hides them, so we test for that. // But make sure that we don't hide anything else QCOMPARE(e->layoutData->items.size(), 11); - QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(5)); // Thai: The ZWJ and ZWNJ characters are inherited, so should be part of the thai script + QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(7)); // Thai: The ZWJ and ZWNJ characters are inherited, so should be part of the thai script QCOMPARE(e->layoutData->items[1].num_glyphs, ushort(1)); // Common: The smart quotes cannot be handled by thai, so should be a seperate item QCOMPARE(e->layoutData->items[2].num_glyphs, ushort(1)); // Thai: Thai character QCOMPARE(e->layoutData->items[3].num_glyphs, ushort(1)); // Common: Ellipsis @@ -1294,17 +1296,52 @@ void tst_QTextScriptEngine::thaiWithZWJ() //A quick sanity check - check all the characters are individual clusters unsigned short *logClusters = e->layoutData->logClustersPtr; - for (int i = 0; i < 5; i++) + for (int i = 0; i < 7; i++) QCOMPARE(logClusters[i], ushort(i)); for (int i = 0; i < 10; i++) - QCOMPARE(logClusters[i+5], ushort(0)); - QCOMPARE(logClusters[15], ushort(1)); + QCOMPARE(logClusters[i+7], ushort(0)); + QCOMPARE(logClusters[17], ushort(1)); // The only characters that we should be hiding are the ZWJ and ZWNJ characters in position 1 // and 3. - for (int i = 0; i < 16; i++) + for (int i = 0; i < 18; i++) QCOMPARE((bool)e->layoutData->glyphLayout.attributes[i].dontPrint, (i == 1 || i == 3)); } +void tst_QTextScriptEngine::thaiLineSplitting() +{ + //Test that a word with full stops is treated as a single item + QString s(QString::fromUtf8("ม.ค.")); + QTextLayout layout(s); + layout.beginLayout(); + layout.createLine(); + layout.endLayout(); + + QTextEngine *e = layout.engine(); + e->width(0, s.length()); //force itemize and shape + + // A thai implementation could either remove the ZWJ and ZWNJ characters, or hide them. + // The current implementation hides them, so we test for that. + // But make sure that we don't hide anything else + QCOMPARE(e->layoutData->items.size(), 1); + QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(4)); // Thai: It's important that the whole string is counted as one string +} + +void tst_QTextScriptEngine::thaiSaraAM() +{ + //U+0E33 (SARA AM, ำ) gets counted as two characters, so make sure it does not throw off the word boundaries + QString s(QString::fromUtf8("ฟงคำตดสนคด")); + QTextLayout layout(s); + layout.beginLayout(); + layout.createLine(); + layout.endLayout(); + + QTextEngine *e = layout.engine(); + e->width(0, s.length()); //force itemize and shape + + QCOMPARE(e->layoutData->items.size(), 1); + QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(11)); //Note that it's 11, not 10, because the SARA AM counts as two + +} QTEST_MAIN(tst_QTextScriptEngine) #include "tst_qtextscriptengine.moc" -- cgit v0.12