summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-02-23 16:06:41 (GMT)
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-02-23 17:06:30 (GMT)
commit60fd302e8d88b92ade59d68872c99310128c3a6c (patch)
tree870288ddc921f1fb68178b8d17200abeb46545d9
parentd7181ae996f9add10bf4e956ddbedff99cb19378 (diff)
downloadQt-60fd302e8d88b92ade59d68872c99310128c3a6c.zip
Qt-60fd302e8d88b92ade59d68872c99310128c3a6c.tar.gz
Qt-60fd302e8d88b92ade59d68872c99310128c3a6c.tar.bz2
Implement QString::fromLatin1_helper() with SSE2
When there is at least 16 characters, use SSE2 to convert from 8bits to 16 bits. Reviewed-by: Samuel Rødal Reveiwed-by: Thiago Macieira
-rw-r--r--src/corelib/tools/qsimd_p.h9
-rw-r--r--src/corelib/tools/qstring.cpp24
2 files changed, 31 insertions, 2 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index d535762..d0a057e 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -60,6 +60,15 @@ QT_MODULE(Core)
#undef QT_HAVE_MMX
#endif
+
+#if defined(__x86_64__) || defined(Q_OS_WIN64) || defined(__ia64__) || defined(__SSE2__)
+#if defined(QT_HAVE_SSE2)
+// Defined for small fast functions that can take advantages of SSE2 intrinsics
+#define QT_ALWAYS_HAVE_SSE2
+#endif
+#endif
+
+
// SSE intrinsics
#if defined(QT_HAVE_SSE2) || defined(QT_HAVE_SSE) || defined(QT_HAVE_MMX)
#if defined(QT_LINUXBASE)
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index dec59b7..571a015 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -46,6 +46,7 @@
#include <qtextcodec.h>
#endif
#include <private/qutfcodec_p.h>
+#include "qsimd_p.h"
#include <qdatastream.h>
#include <qlist.h>
#include "qlocale.h"
@@ -3612,10 +3613,29 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
d->alloc = d->size = size;
d->clean = d->asciiCache = d->simpletext = d->righttoleft = d->capacity = 0;
d->data = d->array;
- ushort *i = d->data;
d->array[size] = '\0';
+ ushort *dst = d->data;
+#if defined(QT_ALWAYS_HAVE_SSE2)
+ if (size >= 16) {
+ int chunkCount = size >> 4; // divided by 16
+ const __m128i nullMask = _mm_set1_epi32(0);
+ for (int i = 0; i < chunkCount; ++i) {
+ const __m128i chunk = _mm_loadu_si128((__m128i*)str);
+ str += 16;
+
+ const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
+ _mm_storeu_si128((__m128i*)dst, firstHalf);
+ dst += 8;
+
+ const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
+ _mm_storeu_si128((__m128i*)dst, secondHalf);
+ dst += 8;
+ }
+ size = size % 16;
+ }
+#endif
while (size--)
- *i++ = (uchar)*str++;
+ *dst++ = (uchar)*str++;
}
return d;
}