summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gui/image/qpixmap.cpp4
-rw-r--r--src/gui/painting/qdrawhelper.cpp133
-rw-r--r--src/s60main/newallocator_hook.cpp12
3 files changed, 120 insertions, 29 deletions
diff --git a/src/gui/image/qpixmap.cpp b/src/gui/image/qpixmap.cpp
index c5d9a7e..3a7be1d 100644
--- a/src/gui/image/qpixmap.cpp
+++ b/src/gui/image/qpixmap.cpp
@@ -775,8 +775,8 @@ QBitmap QPixmap::createHeuristicMask(bool clipTight) const
/*!
Creates and returns a mask for this pixmap based on the given \a
maskColor. If the \a mode is Qt::MaskInColor, all pixels matching the
- maskColor will be opaque. If \a mode is Qt::MaskOutColor, all pixels
- matching the maskColor will be transparent.
+ maskColor will be transparent. If \a mode is Qt::MaskOutColor, all pixels
+ matching the maskColor will be opaque.
This function is slow because it involves converting to/from a
QImage.
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 62af212..fdb686d 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -712,6 +712,38 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, i
}
#endif
+#if defined(QT_ALWAYS_HAVE_NEON)
+#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
+{ \
+ const int16x8_t dxdy = vmulq_s16(distx, disty); \
+ const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
+ const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
+ const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
+ const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
+ \
+ int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
+ int16x8_t tlRB = vandq_s16(tl, colorMask); \
+ int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
+ int16x8_t trRB = vandq_s16(tr, colorMask); \
+ int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
+ int16x8_t blRB = vandq_s16(bl, colorMask); \
+ int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
+ int16x8_t brRB = vandq_s16(br, colorMask); \
+ \
+ int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
+ int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
+ rAG = vmlaq_s16(rAG, trAG, dxidy); \
+ rRB = vmlaq_s16(rRB, trRB, dxidy); \
+ rAG = vmlaq_s16(rAG, blAG, idxdy); \
+ rRB = vmlaq_s16(rRB, blRB, idxdy); \
+ rAG = vmlaq_s16(rAG, brAG, dxdy); \
+ rRB = vmlaq_s16(rRB, brRB, dxdy); \
+ \
+ rAG = vandq_s16(invColorMask, rAG); \
+ rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
+ vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
+}
+#endif
template<TextureBlendType blendType>
Q_STATIC_TEMPLATE_FUNCTION inline void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2)
@@ -920,35 +952,36 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *
const uchar *s2 = data->texture.scanLine(y2);
int disty = (fy & 0x0000ffff) >> 12;
-#if defined(QT_ALWAYS_HAVE_SSE2)
if (blendType != BlendTransformedBilinearTiled &&
(format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) {
- //prolog to get into the bounds
- while (b < end) {
- int x1 = (fx >> 16);
- int x2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- if (x1 != x2) //break if we are insided the bounds.
- break;
- uint tl = fetch(s1, x1, data->texture.colorTable);
- uint tr = fetch(s1, x2, data->texture.colorTable);
- uint bl = fetch(s2, x1, data->texture.colorTable);
- uint br = fetch(s2, x2, data->texture.colorTable);
- int distx = (fx & 0x0000ffff) >> 12;
- *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
- fx += fdx;
- ++b;
- }
- uint *boundedEnd;
- if (fdx > 0)
- boundedEnd = qMin(end, buffer + uint((image_x2 - (fx >> 16)) / data->m11));
- else
- boundedEnd = qMin(end, buffer + uint((image_x1 - (fx >> 16)) / data->m11));
+#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \
+ while (b < end) { \
+ int x1 = (fx >> 16); \
+ int x2; \
+ fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \
+ if (x1 != x2) \
+ break; \
+ uint tl = fetch(s1, x1, data->texture.colorTable); \
+ uint tr = fetch(s1, x2, data->texture.colorTable); \
+ uint bl = fetch(s2, x1, data->texture.colorTable); \
+ uint br = fetch(s2, x2, data->texture.colorTable); \
+ int distx = (fx & 0x0000ffff) >> 12; \
+ *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \
+ fx += fdx; \
+ ++b; \
+ } \
+ uint *boundedEnd; \
+ if (fdx > 0) \
+ boundedEnd = qMin(end, buffer + uint((image_x2 - (fx >> 16)) / data->m11)); \
+ else \
+ boundedEnd = qMin(end, buffer + uint((image_x1 - (fx >> 16)) / data->m11)); \
boundedEnd -= 3;
+#if defined(QT_ALWAYS_HAVE_SSE2)
+ BILINEAR_DOWNSCALE_BOUNDS_PROLOG
+
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
- //const __m128i distShuffleMask = _mm_set_epi8(13, 12, 13, 12, 9, 8, 9, 8, 5, 4, 5, 4, 1, 0, 1, 0);
const __m128i v_256 = _mm_set1_epi16(256);
const __m128i v_disty = _mm_set1_epi16(disty);
__m128i v_fdx = _mm_set1_epi32(fdx*4);
@@ -976,8 +1009,7 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *
bl.i[i] = *(addr_tl+secondLine);
br.i[i] = *(addr_tr+secondLine);
}
- __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12); //distx = (fx & 0x0000ffff) >> 12;
- //v_distx = _mm_shuffle_epi8(v_disty, distShuffleMask); //distx |= distx << 16;
+ __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12);
v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
@@ -986,8 +1018,57 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *
v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx);
}
fx = v_fx.i[0];
- }
+#elif defined(QT_ALWAYS_HAVE_NEON)
+ BILINEAR_DOWNSCALE_BOUNDS_PROLOG
+
+ const int16x8_t colorMask = vdupq_n_s16(0x00ff);
+ const int16x8_t invColorMask = vmvnq_s16(colorMask);
+ const int16x8_t v_256 = vdupq_n_s16(256);
+ const int16x8_t v_disty = vdupq_n_s16(disty);
+ const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
+ int32x4_t v_fdx = vdupq_n_s32(fdx*4);
+
+ ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1);
+
+ union Vect_buffer { int32x4_t vect; quint32 i[4]; };
+ Vect_buffer v_fx;
+
+ for (int i = 0; i < 4; i++) {
+ v_fx.i[i] = fx;
+ fx += fdx;
+ }
+
+ const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
+
+ while (b < boundedEnd) {
+
+ Vect_buffer tl, tr, bl, br;
+
+ Vect_buffer v_fx_shifted;
+ v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16);
+
+ int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12);
+
+ for (int i = 0; i < 4; i++) {
+ int x1 = v_fx_shifted.i[i];
+ const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1;
+ const uint *addr_tr = addr_tl + 1;
+ tl.i[i] = *addr_tl;
+ tr.i[i] = *addr_tr;
+ bl.i[i] = *(addr_tl+secondLine);
+ br.i[i] = *(addr_tr+secondLine);
+ }
+
+ v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
+
+ interpolate_4_pixels_16_neon(vreinterpretq_s16_s32(tl.vect), vreinterpretq_s16_s32(tr.vect), vreinterpretq_s16_s32(bl.vect), vreinterpretq_s16_s32(br.vect), vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, colorMask, invColorMask, v_256, b);
+ b+=4;
+ v_fx.vect = vaddq_s32(v_fx.vect, v_fdx);
+ }
+ fx = v_fx.i[0];
#endif
+ }
+
while (b < end) {
int x1 = (fx >> 16);
int x2;
diff --git a/src/s60main/newallocator_hook.cpp b/src/s60main/newallocator_hook.cpp
index 9ea2ef0..3e259c2 100644
--- a/src/s60main/newallocator_hook.cpp
+++ b/src/s60main/newallocator_hook.cpp
@@ -69,6 +69,16 @@ TInt UserHeap::SetupThreadHeap(TBool aNotFirst, SStdEpocThreadCreateInfo& aInfo)
// So the function is found and called dynamically, by library lookup. If it is not found, we
// use the OS allocator creation functions instead.
+#if defined(QT_LIBINFIX)
+# define QT_LSTRING2(x) L##x
+# define QT_LSTRING(x) QT_LSTRING2(x)
+# define QT_LIBINFIX_UNICODE QT_LSTRING(QT_LIBINFIX)
+#else
+# define QT_LIBINFIX_UNICODE L""
+#endif
+
+_LIT(QtCoreLibName, "qtcore" QT_LIBINFIX_UNICODE L".dll");
+
struct SThreadCreateInfo
{
TAny* iHandle;
@@ -106,7 +116,7 @@ TInt UserHeap::SetupThreadHeap(TBool aNotFirst, SStdEpocThreadCreateInfo& aInfo)
#ifndef __WINS__
// attempt to create the fast allocator through a known export ordinal in qtcore.dll
RLibrary qtcore;
- if (qtcore.Load(_L("qtcore.dll")) == KErrNone)
+ if (qtcore.Load(QtCoreLibName) == KErrNone)
{
const int qt_symbian_SetupThreadHeap_eabi_ordinal = 3713;
TLibraryFunction libFunc = qtcore.Lookup(qt_symbian_SetupThreadHeap_eabi_ordinal);