1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qimagescale_p.h"
5#include "qimage.h"
6#include <private/qdrawhelper_x86_p.h>
7#include <private/qsimd_p.h>
8
9#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
10#include <qsemaphore.h>
11#include <qthreadpool.h>
12#include <private/qthreadpool_p.h>
13#endif
14
15#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
16
17QT_BEGIN_NAMESPACE
18
19using namespace QImageScale;
20
21template<typename T>
22static inline void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection)
23{
24#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
25 int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
26 segments = std::min(a: segments, b: dh);
27 QThreadPool *threadPool = QThreadPoolPrivate::qtGuiInstance();
28 if (segments > 1 && threadPool && !threadPool->contains(thread: QThread::currentThread())) {
29 QSemaphore semaphore;
30 int y = 0;
31 for (int i = 0; i < segments; ++i) {
32 int yn = (dh - y) / (segments - i);
33 threadPool->start([&, y, yn]() {
34 scaleSection(y, y + yn);
35 semaphore.release(n: 1);
36 });
37 y += yn;
38 }
39 semaphore.acquire(n: segments);
40 return;
41 }
42#endif
43 scaleSection(0, dh);
44}
45
46inline static __m128i Q_DECL_VECTORCALL
47qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy)
48{
49 __m128i vpix = _mm_cvtepu8_epi32(V: _mm_cvtsi32_si128(a: *pix));
50 __m128i vx = _mm_mullo_epi32(V1: vpix, V2: vxyap);
51 int i;
52 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
53 pix += step;
54 vpix = _mm_cvtepu8_epi32(V: _mm_cvtsi32_si128(a: *pix));
55 vx = _mm_add_epi32(a: vx, b: _mm_mullo_epi32(V1: vpix, V2: vCxy));
56 }
57 pix += step;
58 vpix = _mm_cvtepu8_epi32(V: _mm_cvtsi32_si128(a: *pix));
59 vx = _mm_add_epi32(a: vx, b: _mm_mullo_epi32(V1: vpix, V2: _mm_set1_epi32(i: i)));
60 return vx;
61}
62
63template<bool RGB>
64void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
65 int dw, int dh, int dow, int sow)
66{
67 const unsigned int **ypoints = isi->ypoints;
68 const int *xpoints = isi->xpoints;
69 const int *xapoints = isi->xapoints;
70 const int *yapoints = isi->yapoints;
71
72 const __m128i v256 = _mm_set1_epi32(i: 256);
73
74 /* go through every scanline in the output buffer */
75 auto scaleSection = [&] (int yStart, int yEnd) {
76 for (int y = yStart; y < yEnd; ++y) {
77 const int Cy = yapoints[y] >> 16;
78 const int yap = yapoints[y] & 0xffff;
79 const __m128i vCy = _mm_set1_epi32(i: Cy);
80 const __m128i vyap = _mm_set1_epi32(i: yap);
81
82 unsigned int *dptr = dest + (y * dow);
83 for (int x = 0; x < dw; x++) {
84 const unsigned int *sptr = ypoints[y] + xpoints[x];
85 __m128i vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: yap, Cxy: Cy, step: sow, vxyap: vyap, vCxy: vCy);
86
87 const int xap = xapoints[x];
88 if (xap > 0) {
89 const __m128i vxap = _mm_set1_epi32(i: xap);
90 const __m128i vinvxap = _mm_sub_epi32(a: v256, b: vxap);
91 __m128i vr = qt_qimageScaleAARGBA_helper(pix: sptr + 1, xyap: yap, Cxy: Cy, step: sow, vxyap: vyap, vCxy: vCy);
92
93 vx = _mm_mullo_epi32(V1: vx, V2: vinvxap);
94 vr = _mm_mullo_epi32(V1: vr, V2: vxap);
95 vx = _mm_add_epi32(a: vx, b: vr);
96 vx = _mm_srli_epi32(a: vx, count: 8);
97 }
98 vx = _mm_srli_epi32(a: vx, count: 14);
99 vx = _mm_packus_epi32(V1: vx, V2: vx);
100 vx = _mm_packus_epi16(a: vx, b: vx);
101 *dptr = _mm_cvtsi128_si32(a: vx);
102 if (RGB)
103 *dptr |= 0xff000000;
104 dptr++;
105 }
106 }
107 };
108 multithread_pixels_function(isi, dh, scaleSection);
109}
110
111template<bool RGB>
112void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
113 int dw, int dh, int dow, int sow)
114{
115 const unsigned int **ypoints = isi->ypoints;
116 int *xpoints = isi->xpoints;
117 int *xapoints = isi->xapoints;
118 int *yapoints = isi->yapoints;
119
120 const __m128i v256 = _mm_set1_epi32(i: 256);
121
122 /* go through every scanline in the output buffer */
123 auto scaleSection = [&] (int yStart, int yEnd) {
124 for (int y = yStart; y < yEnd; ++y) {
125 unsigned int *dptr = dest + (y * dow);
126 for (int x = 0; x < dw; x++) {
127 int Cx = xapoints[x] >> 16;
128 int xap = xapoints[x] & 0xffff;
129 const __m128i vCx = _mm_set1_epi32(i: Cx);
130 const __m128i vxap = _mm_set1_epi32(i: xap);
131
132 const unsigned int *sptr = ypoints[y] + xpoints[x];
133 __m128i vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: xap, Cxy: Cx, step: 1, vxyap: vxap, vCxy: vCx);
134
135 int yap = yapoints[y];
136 if (yap > 0) {
137 const __m128i vyap = _mm_set1_epi32(i: yap);
138 const __m128i vinvyap = _mm_sub_epi32(a: v256, b: vyap);
139 __m128i vr = qt_qimageScaleAARGBA_helper(pix: sptr + sow, xyap: xap, Cxy: Cx, step: 1, vxyap: vxap, vCxy: vCx);
140
141 vx = _mm_mullo_epi32(V1: vx, V2: vinvyap);
142 vr = _mm_mullo_epi32(V1: vr, V2: vyap);
143 vx = _mm_add_epi32(a: vx, b: vr);
144 vx = _mm_srli_epi32(a: vx, count: 8);
145 }
146 vx = _mm_srli_epi32(a: vx, count: 14);
147 vx = _mm_packus_epi32(V1: vx, V2: vx);
148 vx = _mm_packus_epi16(a: vx, b: vx);
149 *dptr = _mm_cvtsi128_si32(a: vx);
150 if (RGB)
151 *dptr |= 0xff000000;
152 dptr++;
153 }
154 }
155 };
156 multithread_pixels_function(isi, dh, scaleSection);
157}
158
159template<bool RGB>
160void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
161 int dw, int dh, int dow, int sow)
162{
163 const unsigned int **ypoints = isi->ypoints;
164 int *xpoints = isi->xpoints;
165 int *xapoints = isi->xapoints;
166 int *yapoints = isi->yapoints;
167
168 auto scaleSection = [&] (int yStart, int yEnd) {
169 for (int y = yStart; y < yEnd; ++y) {
170 int Cy = yapoints[y] >> 16;
171 int yap = yapoints[y] & 0xffff;
172 const __m128i vCy = _mm_set1_epi32(i: Cy);
173 const __m128i vyap = _mm_set1_epi32(i: yap);
174
175 unsigned int *dptr = dest + (y * dow);
176 for (int x = 0; x < dw; x++) {
177 const int Cx = xapoints[x] >> 16;
178 const int xap = xapoints[x] & 0xffff;
179 const __m128i vCx = _mm_set1_epi32(i: Cx);
180 const __m128i vxap = _mm_set1_epi32(i: xap);
181
182 const unsigned int *sptr = ypoints[y] + xpoints[x];
183 __m128i vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: xap, Cxy: Cx, step: 1, vxyap: vxap, vCxy: vCx);
184 __m128i vr = _mm_mullo_epi32(V1: _mm_srli_epi32(a: vx, count: 4), V2: vyap);
185
186 int j;
187 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
188 sptr += sow;
189 vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: xap, Cxy: Cx, step: 1, vxyap: vxap, vCxy: vCx);
190 vr = _mm_add_epi32(a: vr, b: _mm_mullo_epi32(V1: _mm_srli_epi32(a: vx, count: 4), V2: vCy));
191 }
192 sptr += sow;
193 vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: xap, Cxy: Cx, step: 1, vxyap: vxap, vCxy: vCx);
194 vr = _mm_add_epi32(a: vr, b: _mm_mullo_epi32(V1: _mm_srli_epi32(a: vx, count: 4), V2: _mm_set1_epi32(i: j)));
195
196 vr = _mm_srli_epi32(a: vr, count: 24);
197 vr = _mm_packus_epi32(V1: vr, V2: _mm_setzero_si128());
198 vr = _mm_packus_epi16(a: vr, b: _mm_setzero_si128());
199 *dptr = _mm_cvtsi128_si32(a: vr);
200 if (RGB)
201 *dptr |= 0xff000000;
202 dptr++;
203 }
204 }
205 };
206 multithread_pixels_function(isi, dh, scaleSection);
207}
208
209template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
210 int dw, int dh, int dow, int sow);
211
212template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
213 int dw, int dh, int dow, int sow);
214
215template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
216 int dw, int dh, int dow, int sow);
217
218template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
219 int dw, int dh, int dow, int sow);
220
221template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
222 int dw, int dh, int dow, int sow);
223
224template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
225 int dw, int dh, int dow, int sow);
226
227QT_END_NAMESPACE
228
229#endif
230

source code of qtbase/src/gui/painting/qimagescale_sse4.cpp