1 | /* |
2 | * Copyright © 2008 Fredrik Höglund <fredrik@kde.org> |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | * of this software and associated documentation files (the "Software"), to deal |
6 | * in the Software without restriction, including without limitation the rights |
7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | * copies of the Software, and to permit persons to whom the Software is |
9 | * furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | * AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
18 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
19 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | */ |
21 | |
22 | #include <QThread> |
23 | #include <QWidget> |
24 | #include <QPixmap> |
25 | #include <QTimer> |
26 | #include <QX11Info> |
27 | #include <QDebug> |
28 | |
29 | #include <solid/device.h> |
30 | #include <solid/processor.h> |
31 | |
32 | #include <X11/Xlib.h> |
33 | #include <X11/Xutil.h> |
34 | |
35 | #include <string> |
36 | |
37 | #if defined(__INTEL_COMPILER) |
38 | # define HAVE_MMX |
39 | # define HAVE_SSE2 |
40 | #elif defined(__GNUC__) |
41 | # if defined(__MMX__) |
42 | # define HAVE_MMX |
43 | # endif |
44 | # if defined(__SSE2__) && __GNUC__ > 3 |
45 | # define HAVE_SSE2 |
46 | # endif |
47 | #endif |
48 | |
49 | #ifdef HAVE_MMX |
50 | # include <mmintrin.h> |
51 | #endif |
52 | |
53 | #ifdef HAVE_SSE2 |
54 | # include <emmintrin.h> |
55 | #endif |
56 | |
57 | #include "fadeeffect.h" |
58 | #include "fadeeffect.moc" |
59 | |
60 | |
61 | #ifndef HAVE_SSE2 |
62 | static inline void *_mm_malloc(size_t size, int) |
63 | { |
64 | return malloc(size); |
65 | } |
66 | |
67 | static inline void _mm_free(void *p) |
68 | { |
69 | free(p); |
70 | } |
71 | #endif |
72 | |
73 | |
74 | static inline int multiply(int a, int b) |
75 | { |
76 | int res = a * b + 0x80; |
77 | return (res + (res >> 8)) >> 8; |
78 | } |
79 | |
80 | |
81 | static inline void load(const quint32 src, int *r, int *g, int *b) |
82 | { |
83 | *r = (src >> 16) & 0xff; |
84 | *g = (src >> 8) & 0xff; |
85 | *b = src & 0xff; |
86 | } |
87 | |
88 | |
89 | static inline void load16(const quint16 src, int *r, int *g, int *b) |
90 | { |
91 | *r = ((src >> 8) & 0x00f8) | ((src >> 13) & 0x0007); |
92 | *g = ((src >> 3) & 0x00fc) | ((src >> 9) & 0x0003); |
93 | *b = ((src << 3) & 0x00f8) | ((src >> 2) & 0x0007); |
94 | } |
95 | |
96 | |
97 | static inline quint32 store(const int r, const int g, const int b) |
98 | { |
99 | return (r << 16) | (g << 8) | b | 0xff000000; |
100 | } |
101 | |
102 | |
103 | static inline quint16 store16(const int r, const int g, const int b) |
104 | { |
105 | return (((r << 8) | (b >> 3)) & 0xf81f) | ((g << 3) & 0x07e0); |
106 | } |
107 | |
108 | |
109 | static void scanline_blend(const quint32 *over, const quint8 alpha, const quint32 *under, |
110 | quint32 *result, uint length) |
111 | { |
112 | for (uint i = 0; i < length; ++i) |
113 | { |
114 | int sr, sg, sb, dr, dg, db; |
115 | |
116 | load(over[i], &sr, &sg, &sb); |
117 | load(under[i], &dr, &dg, &db); |
118 | |
119 | dr = multiply((sr - dr), alpha) + dr; |
120 | dg = multiply((sg - dg), alpha) + dg; |
121 | db = multiply((sb - db), alpha) + db; |
122 | |
123 | result[i] = store(dr, dg, db); |
124 | } |
125 | } |
126 | |
127 | |
128 | static void scanline_blend_16(const quint16 *over, const quint8 alpha, const quint16 *under, |
129 | quint16 *result, uint length) |
130 | { |
131 | for (uint i = 0; i < length; ++i) |
132 | { |
133 | int sr, sg, sb, dr, dg, db; |
134 | |
135 | load16(over[i], &sr, &sg, &sb); |
136 | load16(under[i], &dr, &dg, &db); |
137 | |
138 | dr = multiply((sr - dr), alpha) + dr; |
139 | dg = multiply((sg - dg), alpha) + dg; |
140 | db = multiply((sb - db), alpha) + db; |
141 | |
142 | result[i] = store16(dr, dg, db); |
143 | } |
144 | } |
145 | |
146 | |
147 | |
148 | // ---------------------------------------------------------------------------- |
149 | |
150 | |
151 | |
152 | #ifdef HAVE_MMX |
153 | static inline __m64 multiply(const __m64 m1, const __m64 m2) |
154 | { |
155 | __m64 res = _mm_mullo_pi16(m1, m2); |
156 | res = _mm_adds_pi16(res, _mm_set1_pi16 (0x0080)); |
157 | res = _mm_adds_pi16(res, _mm_srli_pi16 (res, 8)); |
158 | return _mm_srli_pi16(res, 8); |
159 | } |
160 | |
161 | |
162 | static inline __m64 add(const __m64 m1, const __m64 m2) |
163 | { |
164 | return _mm_adds_pi16(m1, m2); |
165 | } |
166 | |
167 | |
168 | static inline __m64 load(const quint32 pixel, const __m64 zero) |
169 | { |
170 | __m64 m = _mm_cvtsi32_si64(pixel); |
171 | return _mm_unpacklo_pi8(m, zero); |
172 | } |
173 | |
174 | static inline quint32 store(const __m64 pixel, const __m64 zero) |
175 | { |
176 | __m64 packed = _mm_packs_pu16(pixel, zero); |
177 | return _mm_cvtsi64_si32(packed); |
178 | } |
179 | |
180 | |
181 | static void scanline_blend_mmx(const quint32 *over, const quint8 a, const quint32 *under, |
182 | quint32 *result, uint length) |
183 | { |
184 | register const __m64 alpha = _mm_set1_pi16(quint16 (a)); |
185 | register const __m64 negalpha = _mm_xor_si64(alpha, _mm_set1_pi16 (0x00ff)); |
186 | register const __m64 zero = _mm_setzero_si64(); |
187 | |
188 | for (uint i = 0; i < length; ++i) |
189 | { |
190 | __m64 src = load(over[i], zero); |
191 | __m64 dst = load(under[i], zero); |
192 | |
193 | src = multiply(src, alpha); |
194 | dst = multiply(dst, negalpha); |
195 | dst = add(src, dst); |
196 | |
197 | result[i] = store(dst, zero); |
198 | } |
199 | |
200 | _mm_empty(); |
201 | } |
202 | #endif // HAVE_MMX |
203 | |
204 | |
205 | // ---------------------------------------------------------------------------- |
206 | |
207 | |
208 | #ifdef HAVE_SSE2 |
209 | static inline __m128i multiply(const __m128i m1, const __m128i m2) |
210 | { |
211 | __m128i res = _mm_mullo_epi16(m1, m2); |
212 | res = _mm_adds_epi16(res, _mm_set1_epi16 (0x0080)); |
213 | res = _mm_adds_epi16(res, _mm_srli_epi16 (res, 8)); |
214 | return _mm_srli_epi16(res, 8); |
215 | } |
216 | |
217 | |
218 | static inline __m128i add(const __m128i m1, const __m128i m2) |
219 | { |
220 | return _mm_adds_epi16(m1, m2); |
221 | } |
222 | |
223 | |
224 | static inline __m128i lower(__m128i m) |
225 | { |
226 | return _mm_unpacklo_epi8(m, _mm_setzero_si128 ()); |
227 | } |
228 | |
229 | |
230 | static inline __m128i upper(__m128i m) |
231 | { |
232 | return _mm_unpackhi_epi8(m, _mm_setzero_si128 ()); |
233 | } |
234 | |
235 | |
236 | void scanline_blend_sse2(const __m128i *over, const quint8 a, const __m128i *under, |
237 | __m128i *result, uint length) |
238 | { |
239 | length = (length + 15) >> 4; |
240 | register const __m128i alpha = _mm_set1_epi16(__uint16_t (a)); |
241 | register const __m128i negalpha = _mm_xor_si128(alpha, _mm_set1_epi16 (0x00ff)); |
242 | |
243 | for (uint i = 0; i < length; i++) |
244 | { |
245 | __m128i squad = _mm_load_si128(over + i); |
246 | __m128i dquad = _mm_load_si128(under + i); |
247 | |
248 | __m128i src1 = lower(squad); |
249 | __m128i dst1 = lower(dquad); |
250 | __m128i src2 = upper(squad); |
251 | __m128i dst2 = upper(dquad); |
252 | |
253 | squad = add(multiply(src1, alpha), multiply(dst1, negalpha)); |
254 | dquad = add(multiply(src2, alpha), multiply(dst2, negalpha)); |
255 | |
256 | dquad = _mm_packus_epi16(squad, dquad); |
257 | _mm_store_si128(result + i, dquad); |
258 | } |
259 | } |
260 | #endif // HAVE_SSE2 |
261 | |
262 | |
263 | |
264 | // ---------------------------------------------------------------------------- |
265 | |
266 | |
267 | |
268 | class BlendingThread : public QThread |
269 | { |
270 | public: |
271 | BlendingThread(QObject *parent); |
272 | ~BlendingThread(); |
273 | |
274 | void setImage(XImage *image); |
275 | void setAlpha(int alpha) { m_alpha = alpha; } |
276 | |
277 | private: |
278 | void toGray16(quint8 *data); |
279 | void toGray32(quint8 *data); |
280 | |
281 | void blend16(); |
282 | void blend32(); |
283 | void blend32_mmx(); |
284 | void blend32_sse2(); |
285 | |
286 | protected: |
287 | void run(); |
288 | |
289 | private: |
290 | bool have_mmx; |
291 | bool have_sse2; |
292 | int m_alpha; |
293 | XImage *m_image; |
294 | quint8 *m_original; |
295 | quint8 *m_final; |
296 | }; |
297 | |
298 | |
299 | BlendingThread::BlendingThread(QObject *parent) |
300 | : QThread(parent) |
301 | { |
302 | // Check if the CPU supports MMX and SSE2. |
303 | // We only check the first CPU on an SMP system, and assume all CPU's support the same features. |
304 | QList<Solid::Device> list = Solid::Device::listFromType(Solid::DeviceInterface::Processor, QString()); |
305 | if (list.size() > 0) |
306 | { |
307 | Solid::Processor::InstructionSets features = list[0].as<Solid::Processor>()->instructionSets(); |
308 | have_mmx = features & Solid::Processor::IntelMmx; |
309 | have_sse2 = features & Solid::Processor::IntelSse2; |
310 | } |
311 | else |
312 | { |
313 | // Can happen if e.g. there is no usable backend for Solid. Err on the side of caution. |
314 | // (c.f. bug:163112) |
315 | have_mmx = false; |
316 | have_sse2 = false; |
317 | } |
318 | |
319 | m_final = NULL; |
320 | m_original = NULL; |
321 | } |
322 | |
323 | |
324 | BlendingThread::~BlendingThread() |
325 | { |
326 | _mm_free(m_final); |
327 | _mm_free(m_original); |
328 | } |
329 | |
330 | |
331 | void BlendingThread::setImage(XImage *image) |
332 | { |
333 | m_image = image; |
334 | int size = m_image->bytes_per_line * m_image->height; |
335 | |
336 | // We need the data to be aligned on a 128 bit (16 byte) boundary for SSE2 |
337 | m_original = (quint8*) _mm_malloc(size, 16); |
338 | m_final = (quint8*) _mm_malloc(size, 16); |
339 | |
340 | memcpy((void*)m_original, (const void*)m_image->data, size); |
341 | memcpy((void*)m_final, (const void*)m_image->data, size); |
342 | |
343 | if (m_image->depth > 16) { |
344 | // Make sure that the alpha channel is initialized to 0xff |
345 | for (int y = 0; y < image->height; y++) { |
346 | quint32 *pixels = (quint32*)(m_original + (m_image->bytes_per_line * y)); |
347 | for (int x = 0; x < image->width; x++) |
348 | pixels[x] |= 0xff000000; |
349 | } |
350 | } |
351 | |
352 | if (m_image->depth != 16) |
353 | toGray32(m_final); |
354 | else |
355 | toGray16(m_final); |
356 | } |
357 | |
358 | |
359 | void BlendingThread::toGray16(quint8 *data) |
360 | { |
361 | for (int y = 0; y < m_image->height; y++) |
362 | { |
363 | quint16 *pixels = (quint16*)(data + (m_image->bytes_per_line * y)); |
364 | for (int x = 0; x < m_image->width; x++) |
365 | { |
366 | int red, green, blue; |
367 | load16(pixels[x], &red, &green, &blue); |
368 | |
369 | // Make sure the 3 least significant bits are 0, so the red, green and blue |
370 | // channels really have the same value when packed in a 5/6/5 representation. |
371 | int val = int(red * .299 + green * .587 + blue * .114) & 0xf8; |
372 | pixels[x] = store16(val, val, val); |
373 | } |
374 | } |
375 | } |
376 | |
377 | |
378 | void BlendingThread::toGray32(quint8 *data) |
379 | { |
380 | for (int y = 0; y < m_image->height; y++) |
381 | { |
382 | quint32 *pixels = (quint32*)(data + (m_image->bytes_per_line * y)); |
383 | for (int x = 0; x < m_image->width; x++) |
384 | { |
385 | int red, green, blue; |
386 | load(pixels[x], &red, &green, &blue); |
387 | |
388 | int val = int(red * .299 + green * .587 + blue * .114); |
389 | pixels[x] = store(val, val, val); |
390 | } |
391 | } |
392 | } |
393 | |
394 | |
395 | void BlendingThread::blend16() |
396 | { |
397 | for (int y = 0; y < m_image->height; y++) |
398 | { |
399 | uint start = m_image->bytes_per_line * y; |
400 | quint16 *over = (quint16*)(m_original + start); |
401 | quint16 *under = (quint16*)(m_final + start); |
402 | quint16 *result = (quint16*)(m_image->data + start); |
403 | |
404 | scanline_blend_16(over, m_alpha, under, result, m_image->width); |
405 | } |
406 | } |
407 | |
408 | |
409 | void BlendingThread::blend32() |
410 | { |
411 | for (int y = 0; y < m_image->height; y++) |
412 | { |
413 | int start = m_image->bytes_per_line * y; |
414 | quint32 *over = (quint32*)(m_original + start); |
415 | quint32 *under = (quint32*)(m_final + start); |
416 | quint32 *result = (quint32*)(m_image->data + start); |
417 | |
418 | scanline_blend(over, m_alpha, under, result, m_image->width); |
419 | } |
420 | } |
421 | |
422 | |
423 | void BlendingThread::blend32_mmx() |
424 | { |
425 | #ifdef HAVE_MMX |
426 | for (int y = 0; y < m_image->height; y++) |
427 | { |
428 | int start = m_image->bytes_per_line * y; |
429 | quint32 *over = (quint32*)(m_original + start); |
430 | quint32 *under = (quint32*)(m_final + start); |
431 | quint32 *result = (quint32*)(m_image->data + start); |
432 | |
433 | scanline_blend_mmx(over, m_alpha, under, result, m_image->width); |
434 | } |
435 | #endif |
436 | } |
437 | |
438 | |
439 | void BlendingThread::blend32_sse2() |
440 | { |
441 | #ifdef HAVE_SSE2 |
442 | uint length = m_image->bytes_per_line * m_image->height; |
443 | |
444 | __m128i *over = (__m128i*)(m_original); |
445 | __m128i *under = (__m128i*)(m_final); |
446 | __m128i *result = (__m128i*)(m_image->data); |
447 | |
448 | scanline_blend_sse2(over, m_alpha, under, result, length); |
449 | #endif |
450 | } |
451 | |
452 | |
453 | void BlendingThread::run() |
454 | { |
455 | if (m_image->depth != 16) |
456 | { |
457 | #ifdef HAVE_SSE2 |
458 | if (have_sse2) |
459 | blend32_sse2(); |
460 | else |
461 | #endif |
462 | #ifdef HAVE_MMX |
463 | if (have_mmx) |
464 | blend32_mmx(); |
465 | else |
466 | #endif |
467 | blend32(); |
468 | } |
469 | else |
470 | blend16(); |
471 | } |
472 | |
473 | |
474 | |
475 | // ---------------------------------------------------------------------------- |
476 | |
477 | |
478 | |
479 | FadeEffect::FadeEffect(QWidget *parent, QPixmap *pixmap) |
480 | : LogoutEffect(parent, pixmap), blender(NULL) |
481 | { |
482 | Display *dpy = parent->x11Info().display(); |
483 | |
484 | image = XCreateImage(dpy, (Visual*)pixmap->x11Info().visual(), pixmap->depth(), |
485 | ZPixmap, 0, NULL, pixmap->width(), pixmap->height(), 32, 0); |
486 | |
487 | // Allocate the image data on 16 byte boundary for SSE2 |
488 | image->data = (char*)_mm_malloc(image->bytes_per_line * image->height, 16); |
489 | |
490 | gc = XCreateGC(dpy, pixmap->handle(), 0, NULL); |
491 | |
492 | blender = new BlendingThread(this); |
493 | currentY = 0; |
494 | } |
495 | |
496 | |
497 | FadeEffect::~FadeEffect() |
498 | { |
499 | blender->wait(); |
500 | _mm_free(image->data); |
501 | image->data = NULL; |
502 | XDestroyImage(image); |
503 | XFreeGC(QX11Info::display(), gc); |
504 | } |
505 | |
506 | |
507 | void FadeEffect::start() |
508 | { |
509 | done = false; |
510 | alpha = 255; |
511 | |
512 | // Start by grabbing the screenshot |
513 | grabImageSection(); |
514 | } |
515 | |
516 | |
517 | void FadeEffect::grabImageSection() |
518 | { |
519 | const int sectionHeight = 64; |
520 | int h = (currentY + sectionHeight > image->height) ? image->height - currentY : sectionHeight; |
521 | |
522 | XGetSubImage(QX11Info::display(), QX11Info::appRootWindow(), 0, currentY, image->width, h, |
523 | AllPlanes, ZPixmap, image, 0, currentY); |
524 | |
525 | // Continue until we have the whole image |
526 | currentY += sectionHeight; |
527 | if (currentY < image->height) |
528 | { |
529 | QTimer::singleShot(1, this, SLOT(grabImageSection())); |
530 | return; |
531 | } |
532 | |
533 | // Let the owner know we're done. |
534 | emit initialized(); |
535 | |
536 | // Start the fade effect |
537 | blender->setImage(image); |
538 | blender->setAlpha(alpha); |
539 | blender->start(); |
540 | time.start(); |
541 | |
542 | QTimer::singleShot(10, this, SLOT(nextFrame())); |
543 | } |
544 | |
545 | |
546 | void FadeEffect::nextFrame() |
547 | { |
548 | const qreal runTime = 2000; // milliseconds |
549 | |
550 | if (!blender->isFinished()) |
551 | { |
552 | QTimer::singleShot(10, this, SLOT(nextFrame())); |
553 | return; |
554 | } |
555 | |
556 | XPutImage(QX11Info::display(), pixmap->handle(), gc, image, 0, 0, 0, 0, image->width, image->height); |
557 | parent->update(); |
558 | |
559 | alpha = qRound(qMax(255. - (255. * (qreal(time.elapsed() / runTime))), 0.0)); |
560 | |
561 | if (!done) |
562 | { |
563 | blender->setAlpha(alpha); |
564 | blender->start(); |
565 | |
566 | // Make sure we don't send frames faster than the X server can process them |
567 | XSync(QX11Info::display(), False); |
568 | QTimer::singleShot(1, this, SLOT(nextFrame())); |
569 | } |
570 | |
571 | if (alpha == 0) |
572 | done = true; |
573 | } |
574 | |
575 | |