5#include <private/qdrawhelper_x86_p.h>
7#ifdef QT_COMPILER_SUPPORTS_SSE2
9#include <private/qdrawingprimitive_sse2_p.h>
10#include <private/qpaintengine_raster_p.h>
14#ifndef QDRAWHELPER_AVX
16void qt_blend_argb32_on_argb32_sse2(
uchar *destPixels,
int dbpl,
17 const uchar *srcPixels,
int sbpl,
23 if (const_alpha == 256) {
24 const __m128i alphaMask = _mm_set1_epi32(0xff000000);
25 const __m128i nullVector = _mm_set1_epi32(0);
26 const __m128i half = _mm_set1_epi16(0x80);
27 const __m128i one = _mm_set1_epi16(0xff);
28 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
29 for (
int y = 0;
y <
h; ++
y) {
30 BLEND_SOURCE_OVER_ARGB32_SSE2(
dst,
src,
w, nullVector, half, one, colorMask, alphaMask);
34 }
else if (const_alpha != 0) {
38 const_alpha = (const_alpha * 255) >> 8;
39 const __m128i nullVector = _mm_set1_epi32(0);
40 const __m128i half = _mm_set1_epi16(0x80);
41 const __m128i one = _mm_set1_epi16(0xff);
42 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
43 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
44 for (
int y = 0;
y <
h; ++
y) {
45 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(
dst,
src,
w, nullVector, half, one, colorMask, constAlphaVector)
55 const uchar *srcPixels,
int sbpl,
59void qt_blend_rgb32_on_rgb32_sse2(
uchar *destPixels,
int dbpl,
60 const uchar *srcPixels,
int sbpl,
66 if (const_alpha != 256) {
67 if (const_alpha != 0) {
68 const __m128i half = _mm_set1_epi16(0x80);
69 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
71 const_alpha = (const_alpha * 255) >> 8;
72 int one_minus_const_alpha = 255 - const_alpha;
73 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
74 const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
75 for (
int y = 0;
y <
h; ++
y) {
83 for (;
x <
w-3;
x += 4) {
84 __m128i srcVector = _mm_loadu_si128((
const __m128i *)&
src[
x]);
85 const __m128i dstVector = _mm_load_si128((__m128i *)&
dst[
x]);
87 INTERPOLATE_PIXEL_255_SSE2(
result, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half);
108 const __m128i nullVector = _mm_set1_epi32(0);
109 const __m128i half = _mm_set1_epi16(0x80);
110 const __m128i one = _mm_set1_epi16(0xff);
111 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
112 if (const_alpha == 255) {
113 const __m128i alphaMask = _mm_set1_epi32(0xff000000);
114 BLEND_SOURCE_OVER_ARGB32_SSE2(
dst,
src,
length, nullVector, half, one, colorMask, alphaMask);
116 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
117 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(
dst,
src,
length, nullVector, half, one, colorMask, constAlphaVector);
125 if (const_alpha == 255) {
132 const __m128i srcVector = _mm_loadu_si128((
const __m128i *)&
src[
x]);
133 const __m128i dstVector = _mm_load_si128((__m128i *)&
dst[
x]);
135 const __m128i
result = _mm_adds_epu8(srcVector, dstVector);
143 const int one_minus_const_alpha = 255 - const_alpha;
144 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
145 const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
151 const __m128i half = _mm_set1_epi16(0x80);
152 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
155 const __m128i srcVector = _mm_loadu_si128((
const __m128i *)&
src[
x]);
156 const __m128i dstVector = _mm_load_si128((__m128i *)&
dst[
x]);
158 __m128i
result = _mm_adds_epu8(srcVector, dstVector);
159 INTERPOLATE_PIXEL_255_SSE2(
result,
result, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
171 if (const_alpha == 255) {
174 const int ialpha = 255 - const_alpha;
183 const __m128i half = _mm_set1_epi16(0x80);
184 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
185 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
186 const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha);
188 const __m128i srcVector = _mm_loadu_si128((
const __m128i *)&
src[
x]);
189 __m128i dstVector = _mm_load_si128((__m128i *)&
dst[
x]);
190 INTERPOLATE_PIXEL_255_SSE2(dstVector, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
191 _mm_store_si128((__m128i *)&
dst[
x], dstVector);
204 __m128i *dst128 =
reinterpret_cast<__m128i *
>(dest);
205 __m128i *end128 =
reinterpret_cast<__m128i *
>(
static_cast<uchar *
>(dest) + bytecount);
207 while (dst128 + 4 <= end128) {
208 _mm_store_si128(dst128 + 0, value128);
209 _mm_store_si128(dst128 + 1, value128);
210 _mm_store_si128(dst128 + 2, value128);
211 _mm_store_si128(dst128 + 3, value128);
215 bytecount %= 4 *
sizeof(__m128i);
216 switch (bytecount /
sizeof(__m128i)) {
217 case 3: _mm_store_si128(dst128++, value128);
Q_FALLTHROUGH();
218 case 2: _mm_store_si128(dst128++, value128);
Q_FALLTHROUGH();
219 case 1: _mm_store_si128(dst128++, value128);
226 if (misaligned &&
count) {
227#if defined(Q_PROCESSOR_X86_32)
261 case 1: *dest =
value;
266 const int align = (
quintptr)(dest) & 0xf;
273 const int rest =
count & 0x3;
288 if (const_alpha == 255) {
291 const quint32 ialpha = 255 - const_alpha;
296 const __m128i colorVector = _mm_set1_epi32(
color);
297 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
298 const __m128i half = _mm_set1_epi16(0x80);
299 const __m128i iAlphaVector = _mm_set1_epi16(ialpha);
305 __m128i dstVector = _mm_load_si128((__m128i *)&
dst[
x]);
306 BYTE_MUL_SSE2(dstVector, dstVector, iAlphaVector, colorMask, half);
307 dstVector = _mm_add_epi8(colorVector, dstVector);
308 _mm_store_si128((__m128i *)&
dst[
x], dstVector);
320 if (const_alpha != 255)
327 const __m128i colorVector = _mm_set1_epi32(
color);
328 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
329 const __m128i half = _mm_set1_epi16(0x80);
330 const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor);
336 __m128i dstVector = _mm_load_si128((__m128i *)&
dst[
x]);
337 BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half);
338 dstVector = _mm_add_epi8(colorVector, dstVector);
339 _mm_store_si128((__m128i *)&
dst[
x], dstVector);
346void qt_bitmapblit32_sse2_base(
QRasterBuffer *rasterBuffer,
int x,
int y,
351 const int destStride = rasterBuffer->stride<
quint32>();
353 const __m128i c128 = _mm_set1_epi32(
color);
354 const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020,
355 0x40404040, 0x80808080);
356 const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060,
357 0x40404040, 0x00000000);
360 const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202,
361 0x04040404, 0x08080808);
362 const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e,
363 0x7c7c7c7c, 0x78787878);
365 for (
int x = 0;
x <
width;
x += 8) {
369 __m128i mask1 = _mm_set1_epi8(
s);
370 __m128i mask2 = mask1;
372 mask1 = _mm_and_si128(mask1, maskmask1);
373 mask1 = _mm_add_epi8(mask1, maskadd1);
374 _mm_maskmoveu_si128(c128, mask1, (
char*)(dest +
x));
375 mask2 = _mm_and_si128(mask2, maskmask2);
376 mask2 = _mm_add_epi8(mask2, maskadd2);
377 _mm_maskmoveu_si128(c128, mask2, (
char*)(dest +
x + 4));
386 __m128i mask1 = _mm_set1_epi8(
s);
387 mask1 = _mm_and_si128(mask1, maskmask1);
388 mask1 = _mm_add_epi8(mask1, maskadd1);
389 _mm_maskmoveu_si128(c128, mask1, (
char*)(dest));
419 const __m128i c128 = _mm_set1_epi16(
c);
421 const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808,
422 0x1010, 0x2020, 0x4040, 0x8080);
423 const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878,
424 0x7070, 0x6060, 0x4040, 0x0000);
427 for (
int x = 0;
x <
width;
x += 8) {
431 __m128i
mask = _mm_set1_epi8(
s);
432 mask = _mm_and_si128(
mask, maskmask);
434 _mm_maskmoveu_si128(c128,
mask, (
char*)(dest +
x));
444 typedef __m128i Int32x4;
445 typedef __m128 Float32x4;
447 union Vect_buffer_i { Int32x4
v;
int i[4]; };
448 union Vect_buffer_f { Float32x4
v;
float f[4]; };
455 static inline Float32x4
Q_DECL_VECTORCALL v_add(Float32x4
a, Float32x4
b) {
return _mm_add_ps(
a,
b); }
458 static inline Float32x4
Q_DECL_VECTORCALL v_max(Float32x4
a, Float32x4
b) {
return _mm_max_ps(
a,
b); }
459 static inline Float32x4
Q_DECL_VECTORCALL v_min(Float32x4
a, Float32x4
b) {
return _mm_min_ps(
a,
b); }
460 static inline Int32x4
Q_DECL_VECTORCALL v_min_16(Int32x4
a, Int32x4
b) {
return _mm_min_epi16(
a,
b); }
464 static inline Float32x4
Q_DECL_VECTORCALL v_sub(Float32x4
a, Float32x4
b) {
return _mm_sub_ps(
a,
b); }
467 static inline Float32x4
Q_DECL_VECTORCALL v_mul(Float32x4
a, Float32x4
b) {
return _mm_mul_ps(
a,
b); }
471 static inline Int32x4
Q_DECL_VECTORCALL v_toInt(Float32x4
x) {
return _mm_cvttps_epi32(
x); }
473 static inline Int32x4
Q_DECL_VECTORCALL v_greaterOrEqual(Float32x4
a, Float32x4
b) {
return _mm_castps_si128(_mm_cmpgt_ps(
a,
b)); }
479 return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2>,
uint>(
buffer, op,
data,
y,
x,
length);
482void qt_scale_image_argb32_on_argb32_sse2(
uchar *destPixels,
int dbpl,
483 const uchar *srcPixels,
int sbpl,
int srch,
489 if (const_alpha != 256) {
492 const uchar *srcPixels,
int sbpl,
int srch,
503 const int ix = 0x00010000 * sx;
504 const int iy = 0x00010000 * sy;
507 tr =
tr.intersected(clip);
510 const int tx1 =
tr.left();
511 const int ty1 =
tr.top();
519 int dstx =
qFloor((tx1 +
qreal(0.5) - targetRect.
right()) * sx * 65536) + 1;
522 int dstx =
qCeil((tx1 +
qreal(0.5) - targetRect.
left()) * sx * 65536) - 1;
529 int dsty =
qCeil((ty1 +
qreal(0.5) - targetRect.
top()) * sy * 65536) - 1;
530 srcy =
quint32(sourceRect.
top() * 65536) + dsty;
535 const __m128i nullVector = _mm_setzero_si128();
536 const __m128i half = _mm_set1_epi16(0x80);
537 const __m128i one = _mm_set1_epi16(0xff);
538 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
539 const __m128i alphaMask = _mm_set1_epi32(0xff000000);
540 const __m128i ixVector = _mm_set1_epi32(4*ix);
544 const int ystart = srcy >> 16;
545 if (ystart >= srch && iy < 0) {
549 const int xstart = basex >> 16;
550 if (xstart >= (
int)(sbpl/
sizeof(
quint32)) && ix < 0) {
554 int yend = (srcy + iy * (
h - 1)) >> 16;
555 if (yend < 0 || yend >= srch)
557 int xend = (basex + ix * (
w - 1)) >> 16;
558 if (xend < 0 || xend >= (
int)(sbpl/
sizeof(
quint32)))
562 const uint *
src = (
const quint32 *) (srcPixels + (srcy >> 16) * sbpl);
572 __m128i srcxVector = _mm_set_epi32(srcx, srcx + ix, srcx + ix + ix, srcx + ix + ix + ix);
574 for (;
x < (
w - 3);
x += 4) {
575 const int idx0 = _mm_extract_epi16(srcxVector, 1);
576 const int idx1 = _mm_extract_epi16(srcxVector, 3);
577 const int idx2 = _mm_extract_epi16(srcxVector, 5);
578 const int idx3 = _mm_extract_epi16(srcxVector, 7);
579 srcxVector = _mm_add_epi32(srcxVector, ixVector);
581 const __m128i srcVector = _mm_set_epi32(
src[idx0],
src[idx1],
src[idx2],
src[idx3]);
582 BLEND_SOURCE_OVER_ARGB32_SSE2_helper(
dst, srcVector, nullVector, half, one, colorMask, alphaMask);
\inmodule QtCore\reentrant
constexpr qreal bottom() const noexcept
Returns the y-coordinate of the rectangle's bottom edge.
constexpr qreal height() const noexcept
Returns the height of the rectangle.
constexpr qreal width() const noexcept
Returns the width of the rectangle.
constexpr qreal left() const noexcept
Returns the x-coordinate of the rectangle's left edge.
QRectF normalized() const noexcept
Returns a normalized rectangle; i.e., a rectangle that has a non-negative width and height.
constexpr QRect toRect() const noexcept
Returns a QRect based on the values of this rectangle.
constexpr qreal top() const noexcept
Returns the y-coordinate of the rectangle's top edge.
constexpr qreal right() const noexcept
Returns the x-coordinate of the rectangle's right edge.
\inmodule QtCore\reentrant
while(i.hasNext()) QString s
Combined button and popup list for selecting options.
void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int srch, const QRectF &targetRect, const QRectF &sourceRect, const QRect &clip, int const_alpha)
void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha)
#define QT_WARNING_DISABLE_MSVC(number)
void qt_memfill32(quint32 *dest, quint32 color, qsizetype count)
uint comp_func_Plus_one_pixel_const_alpha(uint d, const uint s, const uint const_alpha, const uint one_minus_const_alpha)
ushort qConvertRgb32To16(uint c)
static uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b)
static uint BYTE_MUL(uint x, uint a)
uint comp_func_Plus_one_pixel(uint d, const uint s)
void qt_memfill_template(T *dest, T color, qsizetype count)
#define Q_DECL_VECTORCALL
EGLOutputLayerEXT EGLint EGLAttrib value
[5]
GLboolean GLboolean GLboolean b
GLsizei const GLfloat * v
[13]
GLint GLint GLint GLint GLint x
[0]
GLfloat GLfloat GLfloat w
[0]
GLint GLsizei GLsizei height
GLboolean GLboolean GLboolean GLboolean a
[7]
GLenum GLuint GLenum GLsizei length
GLenum GLenum GLsizei count
const void GLsizei GLsizei stride
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLint GLint GLint GLint GLint GLint GLint GLbitfield mask
GLfloat GLfloat GLfloat GLfloat h
static quint32 ARGB2RGBA(quint32 x)
constexpr int qAlpha(QRgb rgb)
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length)
#define SIMD_EPILOGUE(i, length, max)
unsigned long long quint64