qimagescale_sse4.cpp source code [qtbase/src/gui/painting/qimagescale_sse4.cpp]

1	/****************************************************************************
2	**
3	** Copyright (C) 2016 The Qt Company Ltd.
4	** Contact: https://www.qt.io/licensing/
5	**
6	** This file is part of the QtGui module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial License Usage
10	** Licensees holding valid commercial Qt licenses may use this file in
11	** accordance with the commercial license agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and The Qt Company. For licensing terms
14	** and conditions see https://www.qt.io/terms-conditions. For further
15	** information use the contact form at https://www.qt.io/contact-us.
16	**
17	** GNU Lesser General Public License Usage
18	** Alternatively, this file may be used under the terms of the GNU Lesser
19	** General Public License version 3 as published by the Free Software
20	** Foundation and appearing in the file LICENSE.LGPL3 included in the
21	** packaging of this file. Please review the following information to
22	** ensure the GNU Lesser General Public License version 3 requirements
23	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24	**
25	** GNU General Public License Usage
26	** Alternatively, this file may be used under the terms of the GNU
27	** General Public License version 2.0 or (at your option) the GNU General
28	** Public license version 3 or any later version approved by the KDE Free
29	** Qt Foundation. The licenses are as published by the Free Software
30	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31	** included in the packaging of this file. Please review the following
32	** information to ensure the GNU General Public License requirements will
33	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34	** https://www.gnu.org/licenses/gpl-3.0.html.
35	**
36	** $QT_END_LICENSE$
37	**
38	****************************************************************************/
39
40	#include "qimagescale_p.h"
41	#include "qimage.h"
42	#include <private/qdrawhelper_x86_p.h>
43	#include <private/qsimd_p.h>
44
45	#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
46	#include "qsemaphore.h"
47	#include "qthreadpool.h"
48	#endif
49
50	#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
51
52	QT_BEGIN_NAMESPACE
53
54	using namespace QImageScale;
55
56	template<typename T>
57	static inline void multithread_pixels_function(QImageScaleInfo isi, int* dh, const T &scaleSection)
58	{
59	#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
60	int segments = (qsizetype(isi->sh) * isi->sw) / (`1`<<`16`);
61	segments = std::min(a: segments, b: dh);
62	QThreadPool *threadPool = QThreadPool::globalInstance();
63	if (segments > `1` && threadPool && !threadPool->contains(thread: QThread::currentThread())) {
64	QSemaphore semaphore;
65	int y = `0`;
66	for (int i = `0`; i < segments; ++i) {
67	int yn = (dh - y) / (segments - i);
68	threadPool->start([&, y, yn]() {
69	scaleSection(y, y + yn);
70	semaphore.release(n: `1`);
71	});
72	y += yn;
73	}
74	semaphore.acquire(n: segments);
75	return;
76	}
77	#endif
78	scaleSection(`0`, dh);
79	}
80
81	inline static __m128i Q_DECL_VECTORCALL
82	qt_qimageScaleAARGBA_helper(const unsigned int pix, int* xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy)
83	{
84	__m128i vpix = _mm_cvtepu8_epi32(V: _mm_cvtsi32_si128(a: *pix));
85	__m128i vx = _mm_mullo_epi32(V1: vpix, V2: vxyap);
86	int i;
87	for (i = (`1` << `14`) - xyap; i > Cxy; i -= Cxy) {
88	pix += step;
89	vpix = _mm_cvtepu8_epi32(V: _mm_cvtsi32_si128(a: *pix));
90	vx = _mm_add_epi32(a: vx, b: _mm_mullo_epi32(V1: vpix, V2: vCxy));
91	}
92	pix += step;
93	vpix = _mm_cvtepu8_epi32(V: _mm_cvtsi32_si128(a: *pix));
94	vx = _mm_add_epi32(a: vx, b: _mm_mullo_epi32(V1: vpix, V2: _mm_set1_epi32(i: i)));
95	return vx;
96	}
97
98	template<bool RGB>
99	void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo isi, unsigned* int *dest,
100	int dw, int dh, int dow, int sow)
101	{
102	const unsigned int **ypoints = isi->ypoints;
103	const int *xpoints = isi->xpoints;
104	const int *xapoints = isi->xapoints;
105	const int *yapoints = isi->yapoints;
106
107	const __m128i v256 = _mm_set1_epi32(i: `256`);
108
109	/ go through every scanline in the output buffer /
110	auto scaleSection = [&] (int yStart, int yEnd) {
111	for (int y = yStart; y < yEnd; ++y) {
112	const int Cy = yapoints[y] >> `16`;
113	const int yap = yapoints[y] & `0xffff`;
114	const __m128i vCy = _mm_set1_epi32(i: Cy);
115	const __m128i vyap = _mm_set1_epi32(i: yap);
116
117	unsigned int dptr = dest + (y dow);
118	for (int x = `0`; x < dw; x++) {
119	const unsigned int *sptr = ypoints[y] + xpoints[x];
120	__m128i vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: yap, Cxy: Cy, step: sow, vxyap: vyap, vCxy: vCy);
121
122	const int xap = xapoints[x];
123	if (xap > `0`) {
124	const __m128i vxap = _mm_set1_epi32(i: xap);
125	const __m128i vinvxap = _mm_sub_epi32(a: v256, b: vxap);
126	__m128i vr = qt_qimageScaleAARGBA_helper(pix: sptr + `1`, xyap: yap, Cxy: Cy, step: sow, vxyap: vyap, vCxy: vCy);
127
128	vx = _mm_mullo_epi32(V1: vx, V2: vinvxap);
129	vr = _mm_mullo_epi32(V1: vr, V2: vxap);
130	vx = _mm_add_epi32(a: vx, b: vr);
131	vx = _mm_srli_epi32(a: vx, count: `8`);
132	}
133	vx = _mm_srli_epi32(a: vx, count: `14`);
134	vx = _mm_packus_epi32(V1: vx, V2: vx);
135	vx = _mm_packus_epi16(a: vx, b: vx);
136	*dptr = _mm_cvtsi128_si32(a: vx);
137	if (RGB)
138	*dptr \|= `0xff000000`;
139	dptr++;
140	}
141	}
142	};
143	multithread_pixels_function(isi, dh, scaleSection);
144	}
145
146	template<bool RGB>
147	void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo isi, unsigned* int *dest,
148	int dw, int dh, int dow, int sow)
149	{
150	const unsigned int **ypoints = isi->ypoints;
151	int *xpoints = isi->xpoints;
152	int *xapoints = isi->xapoints;
153	int *yapoints = isi->yapoints;
154
155	const __m128i v256 = _mm_set1_epi32(i: `256`);
156
157	/ go through every scanline in the output buffer /
158	auto scaleSection = [&] (int yStart, int yEnd) {
159	for (int y = yStart; y < yEnd; ++y) {
160	unsigned int dptr = dest + (y dow);
161	for (int x = `0`; x < dw; x++) {
162	int Cx = xapoints[x] >> `16`;
163	int xap = xapoints[x] & `0xffff`;
164	const __m128i vCx = _mm_set1_epi32(i: Cx);
165	const __m128i vxap = _mm_set1_epi32(i: xap);
166
167	const unsigned int *sptr = ypoints[y] + xpoints[x];
168	__m128i vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: xap, Cxy: Cx, step: `1`, vxyap: vxap, vCxy: vCx);
169
170	int yap = yapoints[y];
171	if (yap > `0`) {
172	const __m128i vyap = _mm_set1_epi32(i: yap);
173	const __m128i vinvyap = _mm_sub_epi32(a: v256, b: vyap);
174	__m128i vr = qt_qimageScaleAARGBA_helper(pix: sptr + sow, xyap: xap, Cxy: Cx, step: `1`, vxyap: vxap, vCxy: vCx);
175
176	vx = _mm_mullo_epi32(V1: vx, V2: vinvyap);
177	vr = _mm_mullo_epi32(V1: vr, V2: vyap);
178	vx = _mm_add_epi32(a: vx, b: vr);
179	vx = _mm_srli_epi32(a: vx, count: `8`);
180	}
181	vx = _mm_srli_epi32(a: vx, count: `14`);
182	vx = _mm_packus_epi32(V1: vx, V2: vx);
183	vx = _mm_packus_epi16(a: vx, b: vx);
184	*dptr = _mm_cvtsi128_si32(a: vx);
185	if (RGB)
186	*dptr \|= `0xff000000`;
187	dptr++;
188	}
189	}
190	};
191	multithread_pixels_function(isi, dh, scaleSection);
192	}
193
194	template<bool RGB>
195	void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo isi, unsigned* int *dest,
196	int dw, int dh, int dow, int sow)
197	{
198	const unsigned int **ypoints = isi->ypoints;
199	int *xpoints = isi->xpoints;
200	int *xapoints = isi->xapoints;
201	int *yapoints = isi->yapoints;
202
203	auto scaleSection = [&] (int yStart, int yEnd) {
204	for (int y = yStart; y < yEnd; ++y) {
205	int Cy = yapoints[y] >> `16`;
206	int yap = yapoints[y] & `0xffff`;
207	const __m128i vCy = _mm_set1_epi32(i: Cy);
208	const __m128i vyap = _mm_set1_epi32(i: yap);
209
210	unsigned int dptr = dest + (y dow);
211	for (int x = `0`; x < dw; x++) {
212	const int Cx = xapoints[x] >> `16`;
213	const int xap = xapoints[x] & `0xffff`;
214	const __m128i vCx = _mm_set1_epi32(i: Cx);
215	const __m128i vxap = _mm_set1_epi32(i: xap);
216
217	const unsigned int *sptr = ypoints[y] + xpoints[x];
218	__m128i vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: xap, Cxy: Cx, step: `1`, vxyap: vxap, vCxy: vCx);
219	__m128i vr = _mm_mullo_epi32(V1: _mm_srli_epi32(a: vx, count: `4`), V2: vyap);
220
221	int j;
222	for (j = (`1` << `14`) - yap; j > Cy; j -= Cy) {
223	sptr += sow;
224	vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: xap, Cxy: Cx, step: `1`, vxyap: vxap, vCxy: vCx);
225	vr = _mm_add_epi32(a: vr, b: _mm_mullo_epi32(V1: _mm_srli_epi32(a: vx, count: `4`), V2: vCy));
226	}
227	sptr += sow;
228	vx = qt_qimageScaleAARGBA_helper(pix: sptr, xyap: xap, Cxy: Cx, step: `1`, vxyap: vxap, vCxy: vCx);
229	vr = _mm_add_epi32(a: vr, b: _mm_mullo_epi32(V1: _mm_srli_epi32(a: vx, count: `4`), V2: _mm_set1_epi32(i: j)));
230
231	vr = _mm_srli_epi32(a: vr, count: `24`);
232	vr = _mm_packus_epi32(V1: vr, V2: _mm_setzero_si128());
233	vr = _mm_packus_epi16(a: vr, b: _mm_setzero_si128());
234	*dptr = _mm_cvtsi128_si32(a: vr);
235	if (RGB)
236	*dptr \|= `0xff000000`;
237	dptr++;
238	}
239	}
240	};
241	multithread_pixels_function(isi, dh, scaleSection);
242	}
243
244	template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo isi, unsigned* int *dest,
245	int dw, int dh, int dow, int sow);
246
247	template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo isi, unsigned* int *dest,
248	int dw, int dh, int dow, int sow);
249
250	template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo isi, unsigned* int *dest,
251	int dw, int dh, int dow, int sow);
252
253	template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo isi, unsigned* int *dest,
254	int dw, int dh, int dow, int sow);
255
256	template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo isi, unsigned* int *dest,
257	int dw, int dh, int dow, int sow);
258
259	template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo isi, unsigned* int *dest,
260	int dw, int dh, int dow, int sow);
261
262	QT_END_NAMESPACE
263
264	#endif
265

source code of qtbase/src/gui/painting/qimagescale_sse4.cpp