1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qimagescale_p.h"
41#include "qimage.h"
42#include <private/qdrawhelper_x86_p.h>
43#include <private/qsimd_p.h>
44
45#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
46
47QT_BEGIN_NAMESPACE
48
49using namespace QImageScale;
50
51inline static __m128i Q_DECL_VECTORCALL
52qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy)
53{
54 __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
55 __m128i vx = _mm_mullo_epi32(vpix, vxyap);
56 int i;
57 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
58 pix += step;
59 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
60 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy));
61 }
62 pix += step;
63 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
64 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
65 return vx;
66}
67
68template<bool RGB>
69void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
70 int dw, int dh, int dow, int sow)
71{
72 const unsigned int **ypoints = isi->ypoints;
73 int *xpoints = isi->xpoints;
74 int *xapoints = isi->xapoints;
75 int *yapoints = isi->yapoints;
76
77 const __m128i v256 = _mm_set1_epi32(256);
78
79 /* go through every scanline in the output buffer */
80 for (int y = 0; y < dh; y++) {
81 int Cy = yapoints[y] >> 16;
82 int yap = yapoints[y] & 0xffff;
83 const __m128i vCy = _mm_set1_epi32(Cy);
84 const __m128i vyap = _mm_set1_epi32(yap);
85
86 unsigned int *dptr = dest + (y * dow);
87 for (int x = 0; x < dw; x++) {
88 const unsigned int *sptr = ypoints[y] + xpoints[x];
89 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
90
91 int xap = xapoints[x];
92 if (xap > 0) {
93 const __m128i vxap = _mm_set1_epi32(xap);
94 const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
95 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
96
97 vx = _mm_mullo_epi32(vx, vinvxap);
98 vr = _mm_mullo_epi32(vr, vxap);
99 vx = _mm_add_epi32(vx, vr);
100 vx = _mm_srli_epi32(vx, 8);
101 }
102 vx = _mm_srli_epi32(vx, 14);
103 vx = _mm_packus_epi32(vx, _mm_setzero_si128());
104 vx = _mm_packus_epi16(vx, _mm_setzero_si128());
105 *dptr = _mm_cvtsi128_si32(vx);
106 if (RGB)
107 *dptr |= 0xff000000;
108 dptr++;
109 }
110 }
111}
112
113template<bool RGB>
114void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
115 int dw, int dh, int dow, int sow)
116{
117 const unsigned int **ypoints = isi->ypoints;
118 int *xpoints = isi->xpoints;
119 int *xapoints = isi->xapoints;
120 int *yapoints = isi->yapoints;
121
122 const __m128i v256 = _mm_set1_epi32(256);
123
124 /* go through every scanline in the output buffer */
125 for (int y = 0; y < dh; y++) {
126 unsigned int *dptr = dest + (y * dow);
127 for (int x = 0; x < dw; x++) {
128 int Cx = xapoints[x] >> 16;
129 int xap = xapoints[x] & 0xffff;
130 const __m128i vCx = _mm_set1_epi32(Cx);
131 const __m128i vxap = _mm_set1_epi32(xap);
132
133 const unsigned int *sptr = ypoints[y] + xpoints[x];
134 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
135
136 int yap = yapoints[y];
137 if (yap > 0) {
138 const __m128i vyap = _mm_set1_epi32(yap);
139 const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
140 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
141
142 vx = _mm_mullo_epi32(vx, vinvyap);
143 vr = _mm_mullo_epi32(vr, vyap);
144 vx = _mm_add_epi32(vx, vr);
145 vx = _mm_srli_epi32(vx, 8);
146 }
147 vx = _mm_srli_epi32(vx, 14);
148 vx = _mm_packus_epi32(vx, _mm_setzero_si128());
149 vx = _mm_packus_epi16(vx, _mm_setzero_si128());
150 *dptr = _mm_cvtsi128_si32(vx);
151 if (RGB)
152 *dptr |= 0xff000000;
153 dptr++;
154 }
155 }
156}
157
158template<bool RGB>
159void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
160 int dw, int dh, int dow, int sow)
161{
162 const unsigned int **ypoints = isi->ypoints;
163 int *xpoints = isi->xpoints;
164 int *xapoints = isi->xapoints;
165 int *yapoints = isi->yapoints;
166
167 for (int y = 0; y < dh; y++) {
168 int Cy = yapoints[y] >> 16;
169 int yap = yapoints[y] & 0xffff;
170 const __m128i vCy = _mm_set1_epi32(Cy);
171 const __m128i vyap = _mm_set1_epi32(yap);
172
173 unsigned int *dptr = dest + (y * dow);
174 for (int x = 0; x < dw; x++) {
175 const int Cx = xapoints[x] >> 16;
176 const int xap = xapoints[x] & 0xffff;
177 const __m128i vCx = _mm_set1_epi32(Cx);
178 const __m128i vxap = _mm_set1_epi32(xap);
179
180 const unsigned int *sptr = ypoints[y] + xpoints[x];
181 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
182 __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
183
184 int j;
185 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
186 sptr += sow;
187 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
188 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
189 }
190 sptr += sow;
191 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
192 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
193
194 vr = _mm_srli_epi32(vr, 24);
195 vr = _mm_packus_epi32(vr, _mm_setzero_si128());
196 vr = _mm_packus_epi16(vr, _mm_setzero_si128());
197 *dptr = _mm_cvtsi128_si32(vr);
198 if (RGB)
199 *dptr |= 0xff000000;
200 dptr++;
201 }
202 }
203}
204
205template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
206 int dw, int dh, int dow, int sow);
207
208template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
209 int dw, int dh, int dow, int sow);
210
211template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
212 int dw, int dh, int dow, int sow);
213
214template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
215 int dw, int dh, int dow, int sow);
216
217template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
218 int dw, int dh, int dow, int sow);
219
220template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
221 int dw, int dh, int dow, int sow);
222
223QT_END_NAMESPACE
224
225#endif
226