From 60fd302e8d88b92ade59d68872c99310128c3a6c Mon Sep 17 00:00:00 2001
From: Benjamin Poulain <benjamin.poulain@nokia.com>
Date: Tue, 23 Feb 2010 17:06:41 +0100
Subject: Implement QString::fromLatin1_helper() with SSE2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When there is at least 16 characters, use SSE2 to convert
from 8bits to 16 bits.

Reviewed-by: Samuel Rødal
Reveiwed-by: Thiago Macieira
---
 src/corelib/tools/qsimd_p.h   |  9 +++++++++
 src/corelib/tools/qstring.cpp | 24 ++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index d535762..d0a057e 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -60,6 +60,15 @@ QT_MODULE(Core)
 #undef QT_HAVE_MMX
 #endif
 
+
+#if defined(__x86_64__) || defined(Q_OS_WIN64) || defined(__ia64__) || defined(__SSE2__)
+#if defined(QT_HAVE_SSE2)
+// Defined for small fast functions that can take advantages of SSE2 intrinsics
+#define QT_ALWAYS_HAVE_SSE2
+#endif
+#endif
+
+
 // SSE intrinsics
 #if defined(QT_HAVE_SSE2) || defined(QT_HAVE_SSE) || defined(QT_HAVE_MMX)
 #if defined(QT_LINUXBASE)
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index dec59b7..571a015 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -46,6 +46,7 @@
 #include <qtextcodec.h>
 #endif
 #include <private/qutfcodec_p.h>
+#include "qsimd_p.h"
 #include <qdatastream.h>
 #include <qlist.h>
 #include "qlocale.h"
@@ -3612,10 +3613,29 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
         d->alloc = d->size = size;
         d->clean = d->asciiCache = d->simpletext = d->righttoleft = d->capacity = 0;
         d->data = d->array;
-        ushort *i = d->data;
         d->array[size] = '\0';
+        ushort *dst = d->data;
+#if defined(QT_ALWAYS_HAVE_SSE2)
+        if (size >= 16) {
+            int chunkCount = size >> 4; // divided by 16
+            const __m128i nullMask = _mm_set1_epi32(0);
+            for (int i = 0; i < chunkCount; ++i) {
+                const __m128i chunk = _mm_loadu_si128((__m128i*)str);
+                str += 16;
+
+                const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
+                _mm_storeu_si128((__m128i*)dst, firstHalf);
+                dst += 8;
+
+                const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
+                _mm_storeu_si128((__m128i*)dst, secondHalf);
+                dst += 8;
+            }
+            size = size % 16;
+        }
+#endif
         while (size--)
-            *i++ = (uchar)*str++;
+            *dst++ = (uchar)*str++;
     }
     return d;
 }
-- 
cgit v0.12