1/*
2 * Copyright © 2008 Fredrik Höglund <fredrik@kde.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22#include <QThread>
23#include <QWidget>
24#include <QPixmap>
25#include <QTimer>
26#include <QX11Info>
27#include <QDebug>
28
29#include <solid/device.h>
30#include <solid/processor.h>
31
32#include <X11/Xlib.h>
33#include <X11/Xutil.h>
34
35#include <string>
36
37#if defined(__INTEL_COMPILER)
38# define HAVE_MMX
39# define HAVE_SSE2
40#elif defined(__GNUC__)
41# if defined(__MMX__)
42# define HAVE_MMX
43# endif
44# if defined(__SSE2__) && __GNUC__ > 3
45# define HAVE_SSE2
46# endif
47#endif
48
49#ifdef HAVE_MMX
50# include <mmintrin.h>
51#endif
52
53#ifdef HAVE_SSE2
54# include <emmintrin.h>
55#endif
56
57#include "fadeeffect.h"
58#include "fadeeffect.moc"
59
60
61#ifndef HAVE_SSE2
62static inline void *_mm_malloc(size_t size, int)
63{
64 return malloc(size);
65}
66
67static inline void _mm_free(void *p)
68{
69 free(p);
70}
71#endif
72
73
74static inline int multiply(int a, int b)
75{
76 int res = a * b + 0x80;
77 return (res + (res >> 8)) >> 8;
78}
79
80
81static inline void load(const quint32 src, int *r, int *g, int *b)
82{
83 *r = (src >> 16) & 0xff;
84 *g = (src >> 8) & 0xff;
85 *b = src & 0xff;
86}
87
88
89static inline void load16(const quint16 src, int *r, int *g, int *b)
90{
91 *r = ((src >> 8) & 0x00f8) | ((src >> 13) & 0x0007);
92 *g = ((src >> 3) & 0x00fc) | ((src >> 9) & 0x0003);
93 *b = ((src << 3) & 0x00f8) | ((src >> 2) & 0x0007);
94}
95
96
97static inline quint32 store(const int r, const int g, const int b)
98{
99 return (r << 16) | (g << 8) | b | 0xff000000;
100}
101
102
103static inline quint16 store16(const int r, const int g, const int b)
104{
105 return (((r << 8) | (b >> 3)) & 0xf81f) | ((g << 3) & 0x07e0);
106}
107
108
109static void scanline_blend(const quint32 *over, const quint8 alpha, const quint32 *under,
110 quint32 *result, uint length)
111{
112 for (uint i = 0; i < length; ++i)
113 {
114 int sr, sg, sb, dr, dg, db;
115
116 load(over[i], &sr, &sg, &sb);
117 load(under[i], &dr, &dg, &db);
118
119 dr = multiply((sr - dr), alpha) + dr;
120 dg = multiply((sg - dg), alpha) + dg;
121 db = multiply((sb - db), alpha) + db;
122
123 result[i] = store(dr, dg, db);
124 }
125}
126
127
128static void scanline_blend_16(const quint16 *over, const quint8 alpha, const quint16 *under,
129 quint16 *result, uint length)
130{
131 for (uint i = 0; i < length; ++i)
132 {
133 int sr, sg, sb, dr, dg, db;
134
135 load16(over[i], &sr, &sg, &sb);
136 load16(under[i], &dr, &dg, &db);
137
138 dr = multiply((sr - dr), alpha) + dr;
139 dg = multiply((sg - dg), alpha) + dg;
140 db = multiply((sb - db), alpha) + db;
141
142 result[i] = store16(dr, dg, db);
143 }
144}
145
146
147
148// ----------------------------------------------------------------------------
149
150
151
152#ifdef HAVE_MMX
153static inline __m64 multiply(const __m64 m1, const __m64 m2)
154{
155 __m64 res = _mm_mullo_pi16(m1, m2);
156 res = _mm_adds_pi16(res, _mm_set1_pi16 (0x0080));
157 res = _mm_adds_pi16(res, _mm_srli_pi16 (res, 8));
158 return _mm_srli_pi16(res, 8);
159}
160
161
162static inline __m64 add(const __m64 m1, const __m64 m2)
163{
164 return _mm_adds_pi16(m1, m2);
165}
166
167
168static inline __m64 load(const quint32 pixel, const __m64 zero)
169{
170 __m64 m = _mm_cvtsi32_si64(pixel);
171 return _mm_unpacklo_pi8(m, zero);
172}
173
174static inline quint32 store(const __m64 pixel, const __m64 zero)
175{
176 __m64 packed = _mm_packs_pu16(pixel, zero);
177 return _mm_cvtsi64_si32(packed);
178}
179
180
181static void scanline_blend_mmx(const quint32 *over, const quint8 a, const quint32 *under,
182 quint32 *result, uint length)
183{
184 register const __m64 alpha = _mm_set1_pi16(quint16 (a));
185 register const __m64 negalpha = _mm_xor_si64(alpha, _mm_set1_pi16 (0x00ff));
186 register const __m64 zero = _mm_setzero_si64();
187
188 for (uint i = 0; i < length; ++i)
189 {
190 __m64 src = load(over[i], zero);
191 __m64 dst = load(under[i], zero);
192
193 src = multiply(src, alpha);
194 dst = multiply(dst, negalpha);
195 dst = add(src, dst);
196
197 result[i] = store(dst, zero);
198 }
199
200 _mm_empty();
201}
202#endif // HAVE_MMX
203
204
205// ----------------------------------------------------------------------------
206
207
208#ifdef HAVE_SSE2
209static inline __m128i multiply(const __m128i m1, const __m128i m2)
210{
211 __m128i res = _mm_mullo_epi16(m1, m2);
212 res = _mm_adds_epi16(res, _mm_set1_epi16 (0x0080));
213 res = _mm_adds_epi16(res, _mm_srli_epi16 (res, 8));
214 return _mm_srli_epi16(res, 8);
215}
216
217
218static inline __m128i add(const __m128i m1, const __m128i m2)
219{
220 return _mm_adds_epi16(m1, m2);
221}
222
223
224static inline __m128i lower(__m128i m)
225{
226 return _mm_unpacklo_epi8(m, _mm_setzero_si128 ());
227}
228
229
230static inline __m128i upper(__m128i m)
231{
232 return _mm_unpackhi_epi8(m, _mm_setzero_si128 ());
233}
234
235
236void scanline_blend_sse2(const __m128i *over, const quint8 a, const __m128i *under,
237 __m128i *result, uint length)
238{
239 length = (length + 15) >> 4;
240 register const __m128i alpha = _mm_set1_epi16(__uint16_t (a));
241 register const __m128i negalpha = _mm_xor_si128(alpha, _mm_set1_epi16 (0x00ff));
242
243 for (uint i = 0; i < length; i++)
244 {
245 __m128i squad = _mm_load_si128(over + i);
246 __m128i dquad = _mm_load_si128(under + i);
247
248 __m128i src1 = lower(squad);
249 __m128i dst1 = lower(dquad);
250 __m128i src2 = upper(squad);
251 __m128i dst2 = upper(dquad);
252
253 squad = add(multiply(src1, alpha), multiply(dst1, negalpha));
254 dquad = add(multiply(src2, alpha), multiply(dst2, negalpha));
255
256 dquad = _mm_packus_epi16(squad, dquad);
257 _mm_store_si128(result + i, dquad);
258 }
259}
260#endif // HAVE_SSE2
261
262
263
264// ----------------------------------------------------------------------------
265
266
267
268class BlendingThread : public QThread
269{
270public:
271 BlendingThread(QObject *parent);
272 ~BlendingThread();
273
274 void setImage(XImage *image);
275 void setAlpha(int alpha) { m_alpha = alpha; }
276
277private:
278 void toGray16(quint8 *data);
279 void toGray32(quint8 *data);
280
281 void blend16();
282 void blend32();
283 void blend32_mmx();
284 void blend32_sse2();
285
286protected:
287 void run();
288
289private:
290 bool have_mmx;
291 bool have_sse2;
292 int m_alpha;
293 XImage *m_image;
294 quint8 *m_original;
295 quint8 *m_final;
296};
297
298
299BlendingThread::BlendingThread(QObject *parent)
300 : QThread(parent)
301{
302 // Check if the CPU supports MMX and SSE2.
303 // We only check the first CPU on an SMP system, and assume all CPU's support the same features.
304 QList<Solid::Device> list = Solid::Device::listFromType(Solid::DeviceInterface::Processor, QString());
305 if (list.size() > 0)
306 {
307 Solid::Processor::InstructionSets features = list[0].as<Solid::Processor>()->instructionSets();
308 have_mmx = features & Solid::Processor::IntelMmx;
309 have_sse2 = features & Solid::Processor::IntelSse2;
310 }
311 else
312 {
313 // Can happen if e.g. there is no usable backend for Solid. Err on the side of caution.
314 // (c.f. bug:163112)
315 have_mmx = false;
316 have_sse2 = false;
317 }
318
319 m_final = NULL;
320 m_original = NULL;
321}
322
323
324BlendingThread::~BlendingThread()
325{
326 _mm_free(m_final);
327 _mm_free(m_original);
328}
329
330
331void BlendingThread::setImage(XImage *image)
332{
333 m_image = image;
334 int size = m_image->bytes_per_line * m_image->height;
335
336 // We need the data to be aligned on a 128 bit (16 byte) boundary for SSE2
337 m_original = (quint8*) _mm_malloc(size, 16);
338 m_final = (quint8*) _mm_malloc(size, 16);
339
340 memcpy((void*)m_original, (const void*)m_image->data, size);
341 memcpy((void*)m_final, (const void*)m_image->data, size);
342
343 if (m_image->depth > 16) {
344 // Make sure that the alpha channel is initialized to 0xff
345 for (int y = 0; y < image->height; y++) {
346 quint32 *pixels = (quint32*)(m_original + (m_image->bytes_per_line * y));
347 for (int x = 0; x < image->width; x++)
348 pixels[x] |= 0xff000000;
349 }
350 }
351
352 if (m_image->depth != 16)
353 toGray32(m_final);
354 else
355 toGray16(m_final);
356}
357
358
359void BlendingThread::toGray16(quint8 *data)
360{
361 for (int y = 0; y < m_image->height; y++)
362 {
363 quint16 *pixels = (quint16*)(data + (m_image->bytes_per_line * y));
364 for (int x = 0; x < m_image->width; x++)
365 {
366 int red, green, blue;
367 load16(pixels[x], &red, &green, &blue);
368
369 // Make sure the 3 least significant bits are 0, so the red, green and blue
370 // channels really have the same value when packed in a 5/6/5 representation.
371 int val = int(red * .299 + green * .587 + blue * .114) & 0xf8;
372 pixels[x] = store16(val, val, val);
373 }
374 }
375}
376
377
378void BlendingThread::toGray32(quint8 *data)
379{
380 for (int y = 0; y < m_image->height; y++)
381 {
382 quint32 *pixels = (quint32*)(data + (m_image->bytes_per_line * y));
383 for (int x = 0; x < m_image->width; x++)
384 {
385 int red, green, blue;
386 load(pixels[x], &red, &green, &blue);
387
388 int val = int(red * .299 + green * .587 + blue * .114);
389 pixels[x] = store(val, val, val);
390 }
391 }
392}
393
394
395void BlendingThread::blend16()
396{
397 for (int y = 0; y < m_image->height; y++)
398 {
399 uint start = m_image->bytes_per_line * y;
400 quint16 *over = (quint16*)(m_original + start);
401 quint16 *under = (quint16*)(m_final + start);
402 quint16 *result = (quint16*)(m_image->data + start);
403
404 scanline_blend_16(over, m_alpha, under, result, m_image->width);
405 }
406}
407
408
409void BlendingThread::blend32()
410{
411 for (int y = 0; y < m_image->height; y++)
412 {
413 int start = m_image->bytes_per_line * y;
414 quint32 *over = (quint32*)(m_original + start);
415 quint32 *under = (quint32*)(m_final + start);
416 quint32 *result = (quint32*)(m_image->data + start);
417
418 scanline_blend(over, m_alpha, under, result, m_image->width);
419 }
420}
421
422
423void BlendingThread::blend32_mmx()
424{
425#ifdef HAVE_MMX
426 for (int y = 0; y < m_image->height; y++)
427 {
428 int start = m_image->bytes_per_line * y;
429 quint32 *over = (quint32*)(m_original + start);
430 quint32 *under = (quint32*)(m_final + start);
431 quint32 *result = (quint32*)(m_image->data + start);
432
433 scanline_blend_mmx(over, m_alpha, under, result, m_image->width);
434 }
435#endif
436}
437
438
439void BlendingThread::blend32_sse2()
440{
441#ifdef HAVE_SSE2
442 uint length = m_image->bytes_per_line * m_image->height;
443
444 __m128i *over = (__m128i*)(m_original);
445 __m128i *under = (__m128i*)(m_final);
446 __m128i *result = (__m128i*)(m_image->data);
447
448 scanline_blend_sse2(over, m_alpha, under, result, length);
449#endif
450}
451
452
453void BlendingThread::run()
454{
455 if (m_image->depth != 16)
456 {
457#ifdef HAVE_SSE2
458 if (have_sse2)
459 blend32_sse2();
460 else
461#endif
462#ifdef HAVE_MMX
463 if (have_mmx)
464 blend32_mmx();
465 else
466#endif
467 blend32();
468 }
469 else
470 blend16();
471}
472
473
474
475// ----------------------------------------------------------------------------
476
477
478
479FadeEffect::FadeEffect(QWidget *parent, QPixmap *pixmap)
480 : LogoutEffect(parent, pixmap), blender(NULL)
481{
482 Display *dpy = parent->x11Info().display();
483
484 image = XCreateImage(dpy, (Visual*)pixmap->x11Info().visual(), pixmap->depth(),
485 ZPixmap, 0, NULL, pixmap->width(), pixmap->height(), 32, 0);
486
487 // Allocate the image data on 16 byte boundary for SSE2
488 image->data = (char*)_mm_malloc(image->bytes_per_line * image->height, 16);
489
490 gc = XCreateGC(dpy, pixmap->handle(), 0, NULL);
491
492 blender = new BlendingThread(this);
493 currentY = 0;
494}
495
496
497FadeEffect::~FadeEffect()
498{
499 blender->wait();
500 _mm_free(image->data);
501 image->data = NULL;
502 XDestroyImage(image);
503 XFreeGC(QX11Info::display(), gc);
504}
505
506
507void FadeEffect::start()
508{
509 done = false;
510 alpha = 255;
511
512 // Start by grabbing the screenshot
513 grabImageSection();
514}
515
516
517void FadeEffect::grabImageSection()
518{
519 const int sectionHeight = 64;
520 int h = (currentY + sectionHeight > image->height) ? image->height - currentY : sectionHeight;
521
522 XGetSubImage(QX11Info::display(), QX11Info::appRootWindow(), 0, currentY, image->width, h,
523 AllPlanes, ZPixmap, image, 0, currentY);
524
525 // Continue until we have the whole image
526 currentY += sectionHeight;
527 if (currentY < image->height)
528 {
529 QTimer::singleShot(1, this, SLOT(grabImageSection()));
530 return;
531 }
532
533 // Let the owner know we're done.
534 emit initialized();
535
536 // Start the fade effect
537 blender->setImage(image);
538 blender->setAlpha(alpha);
539 blender->start();
540 time.start();
541
542 QTimer::singleShot(10, this, SLOT(nextFrame()));
543}
544
545
546void FadeEffect::nextFrame()
547{
548 const qreal runTime = 2000; // milliseconds
549
550 if (!blender->isFinished())
551 {
552 QTimer::singleShot(10, this, SLOT(nextFrame()));
553 return;
554 }
555
556 XPutImage(QX11Info::display(), pixmap->handle(), gc, image, 0, 0, 0, 0, image->width, image->height);
557 parent->update();
558
559 alpha = qRound(qMax(255. - (255. * (qreal(time.elapsed() / runTime))), 0.0));
560
561 if (!done)
562 {
563 blender->setAlpha(alpha);
564 blender->start();
565
566 // Make sure we don't send frames faster than the X server can process them
567 XSync(QX11Info::display(), False);
568 QTimer::singleShot(1, this, SLOT(nextFrame()));
569 }
570
571 if (alpha == 0)
572 done = true;
573}
574
575