6#include <private/qdrawhelper_x86_p.h>
7#include <private/qsimd_p.h>
9#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
12#include <private/qthreadpool_p.h>
15#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
24#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
33 threadPool->
start([&,
y, yn]() {
34 scaleSection(
y,
y + yn);
49 __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*
pix));
50 __m128i vx = _mm_mullo_epi32(vpix, vxyap);
52 for (
i = (1 << 14) - xyap;
i > Cxy;
i -= Cxy) {
54 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*
pix));
55 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy));
58 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*
pix));
59 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(
i)));
64void qt_qimageScaleAARGBA_up_x_down_y_sse4(
QImageScaleInfo *isi,
unsigned int *dest,
65 int dw,
int dh,
int dow,
int sow)
67 const unsigned int **ypoints = isi->
ypoints;
68 const int *xpoints = isi->
xpoints;
72 const __m128i v256 = _mm_set1_epi32(256);
75 auto scaleSection = [&] (
int yStart,
int yEnd) {
76 for (
int y = yStart;
y < yEnd; ++
y) {
77 const int Cy = yapoints[
y] >> 16;
78 const int yap = yapoints[
y] & 0xffff;
79 const __m128i vCy = _mm_set1_epi32(Cy);
80 const __m128i vyap = _mm_set1_epi32(yap);
82 unsigned int *dptr = dest + (
y * dow);
83 for (
int x = 0;
x < dw;
x++) {
84 const unsigned int *sptr = ypoints[
y] + xpoints[
x];
87 const int xap = xapoints[
x];
89 const __m128i vxap = _mm_set1_epi32(xap);
90 const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
93 vx = _mm_mullo_epi32(vx, vinvxap);
94 vr = _mm_mullo_epi32(vr, vxap);
95 vx = _mm_add_epi32(vx, vr);
96 vx = _mm_srli_epi32(vx, 8);
98 vx = _mm_srli_epi32(vx, 14);
99 vx = _mm_packus_epi32(vx, vx);
100 vx = _mm_packus_epi16(vx, vx);
101 *dptr = _mm_cvtsi128_si32(vx);
112void qt_qimageScaleAARGBA_down_x_up_y_sse4(
QImageScaleInfo *isi,
unsigned int *dest,
113 int dw,
int dh,
int dow,
int sow)
115 const unsigned int **ypoints = isi->
ypoints;
120 const __m128i v256 = _mm_set1_epi32(256);
123 auto scaleSection = [&] (
int yStart,
int yEnd) {
124 for (
int y = yStart;
y < yEnd; ++
y) {
125 unsigned int *dptr = dest + (
y * dow);
126 for (
int x = 0;
x < dw;
x++) {
127 int Cx = xapoints[
x] >> 16;
128 int xap = xapoints[
x] & 0xffff;
129 const __m128i vCx = _mm_set1_epi32(Cx);
130 const __m128i vxap = _mm_set1_epi32(xap);
132 const unsigned int *sptr = ypoints[
y] + xpoints[
x];
135 int yap = yapoints[
y];
137 const __m128i vyap = _mm_set1_epi32(yap);
138 const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
141 vx = _mm_mullo_epi32(vx, vinvyap);
142 vr = _mm_mullo_epi32(vr, vyap);
143 vx = _mm_add_epi32(vx, vr);
144 vx = _mm_srli_epi32(vx, 8);
146 vx = _mm_srli_epi32(vx, 14);
147 vx = _mm_packus_epi32(vx, vx);
148 vx = _mm_packus_epi16(vx, vx);
149 *dptr = _mm_cvtsi128_si32(vx);
160void qt_qimageScaleAARGBA_down_xy_sse4(
QImageScaleInfo *isi,
unsigned int *dest,
161 int dw,
int dh,
int dow,
int sow)
163 const unsigned int **ypoints = isi->
ypoints;
168 auto scaleSection = [&] (
int yStart,
int yEnd) {
169 for (
int y = yStart;
y < yEnd; ++
y) {
170 int Cy = yapoints[
y] >> 16;
171 int yap = yapoints[
y] & 0xffff;
172 const __m128i vCy = _mm_set1_epi32(Cy);
173 const __m128i vyap = _mm_set1_epi32(yap);
175 unsigned int *dptr = dest + (
y * dow);
176 for (
int x = 0;
x < dw;
x++) {
177 const int Cx = xapoints[
x] >> 16;
178 const int xap = xapoints[
x] & 0xffff;
179 const __m128i vCx = _mm_set1_epi32(Cx);
180 const __m128i vxap = _mm_set1_epi32(xap);
182 const unsigned int *sptr = ypoints[
y] + xpoints[
x];
184 __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
187 for (
j = (1 << 14) - yap;
j > Cy;
j -= Cy) {
190 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
194 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(
j)));
196 vr = _mm_srli_epi32(vr, 24);
197 vr = _mm_packus_epi32(vr, _mm_setzero_si128());
198 vr = _mm_packus_epi16(vr, _mm_setzero_si128());
199 *dptr = _mm_cvtsi128_si32(vr);
209template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(
QImageScaleInfo *isi,
unsigned int *dest,
210 int dw,
int dh,
int dow,
int sow);
212template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(
QImageScaleInfo *isi,
unsigned int *dest,
213 int dw,
int dh,
int dow,
int sow);
215template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(
QImageScaleInfo *isi,
unsigned int *dest,
216 int dw,
int dh,
int dow,
int sow);
218template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(
QImageScaleInfo *isi,
unsigned int *dest,
219 int dw,
int dh,
int dow,
int sow);
221template void qt_qimageScaleAARGBA_down_xy_sse4<false>(
QImageScaleInfo *isi,
unsigned int *dest,
222 int dw,
int dh,
int dow,
int sow);
224template void qt_qimageScaleAARGBA_down_xy_sse4<true>(
QImageScaleInfo *isi,
unsigned int *dest,
225 int dw,
int dh,
int dow,
int sow);
void acquire(int n=1)
Tries to acquire n resources guarded by the semaphore.
void release(int n=1)
Releases n resources guarded by the semaphore.
static QThreadPool * qtGuiInstance()
Returns the QThreadPool instance for Qt Gui.
void start(QRunnable *runnable, int priority=0)
Reserves a thread and uses it to run runnable, unless this thread will make the current thread count ...
bool contains(const QThread *thread) const
static QThread * currentThread()
Combined button and popup list for selecting options.
#define Q_DECL_VECTORCALL
static void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection)
static void qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, int &r, int &g, int &b, int &a)
GLint GLint GLint GLint GLint x
[0]
const unsigned int ** ypoints