2 files changed, 57 insertions, 7 deletions
diff --git a/src/gui/text/qtextengine.cpp b/src/gui/text/qtextengine.cpp
index bd66689..effb6e1 100644
--- a/src/gui/text/qtextengine.cpp
+++ b/src/gui/text/qtextengine.cpp
@@ -114,7 +114,20 @@ private:
             return;
         const int end = start + length;
         for (int i = start + 1; i < end; ++i) {
-            if ((m_analysis[i] == m_analysis[start])
+            // According to the unicode spec we should be treating characters in the Common script
+            // (punctuation, spaces, etc) as being the same script as the surrounding text for the
+            // purpose of splitting up text. This is important because, for example, a fullstop
+            // (0x2E) can be used to indicate an abbreviation and so must be treated as part of a
+            // word.  Thus it must be passed along with the word in languages that have to calculate
+            // word breaks.  For example the thai word "ครม." has no word breaks but the word "ครม"
+            // does.
+            // Unfortuntely because we split up the strings for both wordwrapping and for setting
+            // the font and because Japanese and Chinese are also aliases of the script "Common",
+            // doing this would break too many things.  So instead we only pass the full stop
+            // along, and nothing else.
+            if (m_analysis[i].bidiLevel == m_analysis[start].bidiLevel
+                && m_analysis[i].flags == m_analysis[start].flags
+                && (m_analysis[i].script == m_analysis[start].script || m_string[i] == QLatin1Char('.'))
                 && m_analysis[i].flags < QScriptAnalysis::SpaceTabOrObject
                 && i - start < MaxItemLength)
                 continue;
diff --git a/tests/auto/qtextscriptengine/tst_qtextscriptengine.cpp b/tests/auto/qtextscriptengine/tst_qtextscriptengine.cpp
index 83f8cd0..3c20490 100644
--- a/tests/auto/qtextscriptengine/tst_qtextscriptengine.cpp
+++ b/tests/auto/qtextscriptengine/tst_qtextscriptengine.cpp
@@ -114,6 +114,8 @@ private slots:
 
     void thaiIsolatedSaraAm();
     void thaiWithZWJ();
+    void thaiLineSplitting();
+    void thaiSaraAM();
 };
 
 tst_QTextScriptEngine::tst_QTextScriptEngine()
@@ -1267,7 +1269,7 @@ void tst_QTextScriptEngine::thaiIsolatedSaraAm()
 
 void tst_QTextScriptEngine::thaiWithZWJ()
 {
-    QString s(QString::fromUtf8("ร‍ร‌ร“ร…ร”ร\xA0ร本ร") + QChar(0x0363)/*superscript 'a', for testing Inherited class*/);
+    QString s(QString::fromUtf8("ร‍ร‌.ร.“ร…ร”ร\xA0ร本ร") + QChar(0x0363)/*superscript 'a', for testing Inherited class*/);
     QTextLayout layout(s);
     layout.beginLayout();
     layout.createLine();
@@ -1280,7 +1282,7 @@ void tst_QTextScriptEngine::thaiWithZWJ()
     // The current implementation hides them, so we test for that.
     // But make sure that we don't hide anything else
     QCOMPARE(e->layoutData->items.size(), 11);
-    QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(5));  // Thai: The ZWJ and ZWNJ characters are inherited, so should be part of the thai script
+    QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(7));  // Thai: The ZWJ and ZWNJ characters are inherited, so should be part of the thai script
     QCOMPARE(e->layoutData->items[1].num_glyphs, ushort(1));  // Common: The smart quotes cannot be handled by thai, so should be a seperate item
     QCOMPARE(e->layoutData->items[2].num_glyphs, ushort(1));  // Thai: Thai character
     QCOMPARE(e->layoutData->items[3].num_glyphs, ushort(1));  // Common: Ellipsis
@@ -1294,17 +1296,52 @@ void tst_QTextScriptEngine::thaiWithZWJ()
 
     //A quick sanity check - check all the characters are individual clusters
     unsigned short *logClusters = e->layoutData->logClustersPtr;
-    for (int i = 0; i < 5; i++)
+    for (int i = 0; i < 7; i++)
         QCOMPARE(logClusters[i], ushort(i));
     for (int i = 0; i < 10; i++)
-        QCOMPARE(logClusters[i+5], ushort(0));
-    QCOMPARE(logClusters[15], ushort(1));
+        QCOMPARE(logClusters[i+7], ushort(0));
+    QCOMPARE(logClusters[17], ushort(1));
 
     // The only characters that we should be hiding are the ZWJ and ZWNJ characters in position 1
     // and 3.
-    for (int i = 0; i < 16; i++)
+    for (int i = 0; i < 18; i++)
         QCOMPARE((bool)e->layoutData->glyphLayout.attributes[i].dontPrint, (i == 1 || i == 3));
 }
 
+void tst_QTextScriptEngine::thaiLineSplitting()
+{
+    //Test that a word with full stops is treated as a single item
+    QString s(QString::fromUtf8("ม.ค."));
+    QTextLayout layout(s);
+    layout.beginLayout();
+    layout.createLine();
+    layout.endLayout();
+
+    QTextEngine *e = layout.engine();
+    e->width(0, s.length()); //force itemize and shape
+
+    // A thai implementation could either remove the ZWJ and ZWNJ characters, or hide them.
+    // The current implementation hides them, so we test for that.
+    // But make sure that we don't hide anything else
+    QCOMPARE(e->layoutData->items.size(), 1);
+    QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(4));  // Thai: It's important that the whole string is counted as one string
+}
+
+void tst_QTextScriptEngine::thaiSaraAM()
+{
+    //U+0E33 (SARA AM, ำ) gets counted as two characters, so make sure it does not throw off the word boundaries
+    QString s(QString::fromUtf8("ฟงคำตดสนคด"));
+    QTextLayout layout(s);
+    layout.beginLayout();
+    layout.createLine();
+    layout.endLayout();
+
+    QTextEngine *e = layout.engine();
+    e->width(0, s.length()); //force itemize and shape
+
+    QCOMPARE(e->layoutData->items.size(), 1);
+    QCOMPARE(e->layoutData->items[0].num_glyphs, ushort(11));  //Note that it's 11, not 10, because the SARA AM counts as two
+
+}
 QTEST_MAIN(tst_QTextScriptEngine)
 #include "tst_qtextscriptengine.moc"