fadeeffect.cpp [kde-workspace/ksmserver/fadeeffect.cpp]

1	/*
2	* Copyright © 2008 Fredrik Höglund <fredrik@kde.org>
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a copy
5	* of this software and associated documentation files (the "Software"), to deal
6	* in the Software without restriction, including without limitation the rights
7	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8	* copies of the Software, and to permit persons to whom the Software is
9	* furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17	* AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18	* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20	*/
21
22	#include <QThread>
23	#include <QWidget>
24	#include <QPixmap>
25	#include <QTimer>
26	#include <QX11Info>
27	#include <QDebug>
28
29	#include <solid/device.h>
30	#include <solid/processor.h>
31
32	#include <X11/Xlib.h>
33	#include <X11/Xutil.h>
34
35	#include <string>
36
37	#if defined(__INTEL_COMPILER)
38	# define HAVE_MMX
39	# define HAVE_SSE2
40	#elif defined(__GNUC__)
41	# if defined(__MMX__)
42	# define HAVE_MMX
43	# endif
44	# if defined(__SSE2__) && __GNUC__ > 3
45	# define HAVE_SSE2
46	# endif
47	#endif
48
49	#ifdef HAVE_MMX
50	# include <mmintrin.h>
51	#endif
52
53	#ifdef HAVE_SSE2
54	# include <emmintrin.h>
55	#endif
56
57	#include "fadeeffect.h"
58	#include "fadeeffect.moc"
59
60
61	#ifndef HAVE_SSE2
62	static inline void _mm_malloc(size_t size, int*)
63	{
64	return malloc(size);
65	}
66
67	static inline void _mm_free(void *p)
68	{
69	free(p);
70	}
71	#endif
72
73
74	static inline int multiply(int a, int b)
75	{
76	int res = a * b + `0x80`;
77	return (res + (res >> `8`)) >> `8`;
78	}
79
80
81	static inline void load(const quint32 src, int r, int* g, int* *b)
82	{
83	*r = (src >> `16`) & `0xff`;
84	*g = (src >> `8`) & `0xff`;
85	*b = src & `0xff`;
86	}
87
88
89	static inline void load16(const quint16 src, int r, int* g, int* *b)
90	{
91	*r = ((src >> `8`) & `0x00f8`) \| ((src >> `13`) & `0x0007`);
92	*g = ((src >> `3`) & `0x00fc`) \| ((src >> `9`) & `0x0003`);
93	*b = ((src << `3`) & `0x00f8`) \| ((src >> `2`) & `0x0007`);
94	}
95
96
97	static inline quint32 store(const int r, const int g, const int b)
98	{
99	return (r << `16`) \| (g << `8`) \| b \| `0xff000000`;
100	}
101
102
103	static inline quint16 store16(const int r, const int g, const int b)
104	{
105	return (((r << `8`) \| (b >> `3`)) & `0xf81f`) \| ((g << `3`) & `0x07e0`);
106	}
107
108
109	static void scanline_blend(const quint32 over, const* quint8 alpha, const quint32 *under,
110	quint32 *result, uint length)
111	{
112	for (uint i = `0`; i < length; ++i)
113	{
114	int sr, sg, sb, dr, dg, db;
115
116	load(over[i], &sr, &sg, &sb);
117	load(under[i], &dr, &dg, &db);
118
119	dr = multiply((sr - dr), alpha) + dr;
120	dg = multiply((sg - dg), alpha) + dg;
121	db = multiply((sb - db), alpha) + db;
122
123	result[i] = store(dr, dg, db);
124	}
125	}
126
127
128	static void scanline_blend_16(const quint16 over, const* quint8 alpha, const quint16 *under,
129	quint16 *result, uint length)
130	{
131	for (uint i = `0`; i < length; ++i)
132	{
133	int sr, sg, sb, dr, dg, db;
134
135	load16(over[i], &sr, &sg, &sb);
136	load16(under[i], &dr, &dg, &db);
137
138	dr = multiply((sr - dr), alpha) + dr;
139	dg = multiply((sg - dg), alpha) + dg;
140	db = multiply((sb - db), alpha) + db;
141
142	result[i] = store16(dr, dg, db);
143	}
144	}
145
146
147
148	// ----------------------------------------------------------------------------
149
150
151
152	#ifdef HAVE_MMX
153	static inline __m64 multiply(const __m64 m1, const __m64 m2)
154	{
155	__m64 res = _mm_mullo_pi16(m1, m2);
156	res = _mm_adds_pi16(res, _mm_set1_pi16 (`0x0080`));
157	res = _mm_adds_pi16(res, _mm_srli_pi16 (res, `8`));
158	return _mm_srli_pi16(res, `8`);
159	}
160
161
162	static inline __m64 add(const __m64 m1, const __m64 m2)
163	{
164	return _mm_adds_pi16(m1, m2);
165	}
166
167
168	static inline __m64 load(const quint32 pixel, const __m64 zero)
169	{
170	__m64 m = _mm_cvtsi32_si64(pixel);
171	return _mm_unpacklo_pi8(m, zero);
172	}
173
174	static inline quint32 store(const __m64 pixel, const __m64 zero)
175	{
176	__m64 packed = _mm_packs_pu16(pixel, zero);
177	return _mm_cvtsi64_si32(packed);
178	}
179
180
181	static void scanline_blend_mmx(const quint32 over, const* quint8 a, const quint32 *under,
182	quint32 *result, uint length)
183	{
184	register const __m64 alpha = _mm_set1_pi16(quint16 (a));
185	register const __m64 negalpha = _mm_xor_si64(alpha, _mm_set1_pi16 (`0x00ff`));
186	register const __m64 zero = _mm_setzero_si64();
187
188	for (uint i = `0`; i < length; ++i)
189	{
190	__m64 src = load(over[i], zero);
191	__m64 dst = load(under[i], zero);
192
193	src = multiply(src, alpha);
194	dst = multiply(dst, negalpha);
195	dst = add(src, dst);
196
197	result[i] = store(dst, zero);
198	}
199
200	_mm_empty();
201	}
202	#endif // HAVE_MMX
203
204
205	// ----------------------------------------------------------------------------
206
207
208	#ifdef HAVE_SSE2
209	static inline __m128i multiply(const __m128i m1, const __m128i m2)
210	{
211	__m128i res = _mm_mullo_epi16(m1, m2);
212	res = _mm_adds_epi16(res, _mm_set1_epi16 (`0x0080`));
213	res = _mm_adds_epi16(res, _mm_srli_epi16 (res, `8`));
214	return _mm_srli_epi16(res, `8`);
215	}
216
217
218	static inline __m128i add(const __m128i m1, const __m128i m2)
219	{
220	return _mm_adds_epi16(m1, m2);
221	}
222
223
224	static inline __m128i lower(__m128i m)
225	{
226	return _mm_unpacklo_epi8(m, _mm_setzero_si128 ());
227	}
228
229
230	static inline __m128i upper(__m128i m)
231	{
232	return _mm_unpackhi_epi8(m, _mm_setzero_si128 ());
233	}
234
235
236	void scanline_blend_sse2(const __m128i over, const* quint8 a, const __m128i *under,
237	__m128i *result, uint length)
238	{
239	length = (length + `15`) >> `4`;
240	register const __m128i alpha = _mm_set1_epi16(__uint16_t (a));
241	register const __m128i negalpha = _mm_xor_si128(alpha, _mm_set1_epi16 (`0x00ff`));
242
243	for (uint i = `0`; i < length; i++)
244	{
245	__m128i squad = _mm_load_si128(over + i);
246	__m128i dquad = _mm_load_si128(under + i);
247
248	__m128i src1 = lower(squad);
249	__m128i dst1 = lower(dquad);
250	__m128i src2 = upper(squad);
251	__m128i dst2 = upper(dquad);
252
253	squad = add(multiply(src1, alpha), multiply(dst1, negalpha));
254	dquad = add(multiply(src2, alpha), multiply(dst2, negalpha));
255
256	dquad = _mm_packus_epi16(squad, dquad);
257	_mm_store_si128(result + i, dquad);
258	}
259	}
260	#endif // HAVE_SSE2
261
262
263
264	// ----------------------------------------------------------------------------
265
266
267
268	class BlendingThread : public QThread
269	{
270	public:
271	BlendingThread(QObject *parent);
272	~BlendingThread();
273
274	void setImage(XImage *image);
275	void setAlpha(int alpha) { m_alpha = alpha; }
276
277	private:
278	void toGray16(quint8 *data);
279	void toGray32(quint8 *data);
280
281	void blend16();
282	void blend32();
283	void blend32_mmx();
284	void blend32_sse2();
285
286	protected:
287	void run();
288
289	private:
290	bool have_mmx;
291	bool have_sse2;
292	int m_alpha;
293	XImage *m_image;
294	quint8 *m_original;
295	quint8 *m_final;
296	};
297
298
299	BlendingThread::BlendingThread(QObject *parent)
300	: QThread (parent)
301	{
302	// Check if the CPU supports MMX and SSE2.
303	// We only check the first CPU on an SMP system, and assume all CPU's support the same features.
304	QList<Solid::Device> list = Solid::Device::listFromType(Solid::DeviceInterface::Processor, QString ());
305	if (list.size() > `0`)
306	{
307	Solid::Processor::InstructionSets features = list [`0`].as<Solid::Processor>()->instructionSets();
308	have_mmx = features & Solid::Processor::IntelMmx;
309	have_sse2 = features & Solid::Processor::IntelSse2;
310	}
311	else
312	{
313	// Can happen if e.g. there is no usable backend for Solid. Err on the side of caution.
314	// (c.f. bug:163112)
315	have_mmx = false;
316	have_sse2 = false;
317	}
318
319	m_final = NULL;
320	m_original = NULL;
321	}
322
323
324	BlendingThread::~BlendingThread()
325	{
326	_mm_free(m_final);
327	_mm_free(m_original);
328	}
329
330
331	void BlendingThread::setImage(XImage *image)
332	{
333	m_image = image;
334	int size = m_image->bytes_per_line * m_image->height;
335
336	// We need the data to be aligned on a 128 bit (16 byte) boundary for SSE2
337	m_original = (quint8*) _mm_malloc(size, `16`);
338	m_final = (quint8*) _mm_malloc(size, `16`);
339
340	memcpy((void)m_original, (const* void*)m_image->data, size);
341	memcpy((void)m_final, (const* void*)m_image->data, size);
342
343	if (m_image->depth > `16`) {
344	// Make sure that the alpha channel is initialized to 0xff
345	for (int y = `0`; y < image->height; y++) {
346	quint32 pixels = (quint32)(m_original + (m_image->bytes_per_line * y));
347	for (int x = `0`; x < image->width; x++)
348	pixels[x] \|= `0xff000000`;
349	}
350	}
351
352	if (m_image->depth != `16`)
353	toGray32(m_final);
354	else
355	toGray16(m_final);
356	}
357
358
359	void BlendingThread::toGray16(quint8 *data)
360	{
361	for (int y = `0`; y < m_image->height; y++)
362	{
363	quint16 pixels = (quint16)(data + (m_image->bytes_per_line * y));
364	for (int x = `0`; x < m_image->width; x++)
365	{
366	int red, green, blue;
367	load16(pixels[x], &red, &green, &blue);
368
369	// Make sure the 3 least significant bits are 0, so the red, green and blue
370	// channels really have the same value when packed in a 5/6/5 representation.
371	int val = int(red * `.299` + green * `.587` + blue * `.114`) & `0xf8`;
372	pixels[x] = store16(val, val, val);
373	}
374	}
375	}
376
377
378	void BlendingThread::toGray32(quint8 *data)
379	{
380	for (int y = `0`; y < m_image->height; y++)
381	{
382	quint32 pixels = (quint32)(data + (m_image->bytes_per_line * y));
383	for (int x = `0`; x < m_image->width; x++)
384	{
385	int red, green, blue;
386	load(pixels[x], &red, &green, &blue);
387
388	int val = int(red * `.299` + green * `.587` + blue * `.114`);
389	pixels[x] = store(val, val, val);
390	}
391	}
392	}
393
394
395	void BlendingThread::blend16()
396	{
397	for (int y = `0`; y < m_image->height; y++)
398	{
399	uint start = m_image->bytes_per_line * y;
400	quint16 over = (quint16)(m_original + start);
401	quint16 under = (quint16)(m_final + start);
402	quint16 result = (quint16)(m_image->data + start);
403
404	scanline_blend_16(over, m_alpha, under, result, m_image->width);
405	}
406	}
407
408
409	void BlendingThread::blend32()
410	{
411	for (int y = `0`; y < m_image->height; y++)
412	{
413	int start = m_image->bytes_per_line * y;
414	quint32 over = (quint32)(m_original + start);
415	quint32 under = (quint32)(m_final + start);
416	quint32 result = (quint32)(m_image->data + start);
417
418	scanline_blend(over, m_alpha, under, result, m_image->width);
419	}
420	}
421
422
423	void BlendingThread::blend32_mmx()
424	{
425	#ifdef HAVE_MMX
426	for (int y = `0`; y < m_image->height; y++)
427	{
428	int start = m_image->bytes_per_line * y;
429	quint32 over = (quint32)(m_original + start);
430	quint32 under = (quint32)(m_final + start);
431	quint32 result = (quint32)(m_image->data + start);
432
433	scanline_blend_mmx(over, m_alpha, under, result, m_image->width);
434	}
435	#endif
436	}
437
438
439	void BlendingThread::blend32_sse2()
440	{
441	#ifdef HAVE_SSE2
442	uint length = m_image->bytes_per_line * m_image->height;
443
444	__m128i over = (__m128i)(m_original);
445	__m128i under = (__m128i)(m_final);
446	__m128i result = (__m128i)(m_image->data);
447
448	scanline_blend_sse2(over, m_alpha, under, result, length);
449	#endif
450	}
451
452
453	void BlendingThread::run()
454	{
455	if (m_image->depth != `16`)
456	{
457	#ifdef HAVE_SSE2
458	if (have_sse2)
459	blend32_sse2();
460	else
461	#endif
462	#ifdef HAVE_MMX
463	if (have_mmx)
464	blend32_mmx();
465	else
466	#endif
467	blend32();
468	}
469	else
470	blend16();
471	}
472
473
474
475	// ----------------------------------------------------------------------------
476
477
478
479	FadeEffect::FadeEffect(QWidget parent, QPixmap pixmap)
480	: LogoutEffect (parent, pixmap), blender(NULL)
481	{
482	Display *dpy = parent->x11Info().display();
483
484	image = XCreateImage(dpy, (Visual*)pixmap->x11Info().visual(), pixmap->depth(),
485	ZPixmap, `0`, NULL, pixmap->width(), pixmap->height(), `32`, `0`);
486
487	// Allocate the image data on 16 byte boundary for SSE2
488	image->data = (char)_mm_malloc(image->bytes_per_line image->height, `16`);
489
490	gc = XCreateGC(dpy, pixmap->handle(), `0`, NULL);
491
492	blender = new BlendingThread (this);
493	currentY = `0`;
494	}
495
496
497	FadeEffect::~FadeEffect()
498	{
499	blender->wait();
500	_mm_free(image->data);
501	image->data = NULL;
502	XDestroyImage(image);
503	XFreeGC(QX11Info::display(), gc);
504	}
505
506
507	void FadeEffect::start()
508	{
509	done = false;
510	alpha = `255`;
511
512	// Start by grabbing the screenshot
513	grabImageSection();
514	}
515
516
517	void FadeEffect::grabImageSection()
518	{
519	const int sectionHeight = `64`;
520	int h = (currentY + sectionHeight > image->height) ? image->height - currentY : sectionHeight;
521
522	XGetSubImage(QX11Info::display(), QX11Info::appRootWindow(), `0`, currentY, image->width, h,
523	AllPlanes, ZPixmap, image, `0`, currentY);
524
525	// Continue until we have the whole image
526	currentY += sectionHeight;
527	if (currentY < image->height)
528	{
529	QTimer::singleShot(`1`, this, SLOT(grabImageSection()));
530	return;
531	}
532
533	// Let the owner know we're done.
534	emit initialized();
535
536	// Start the fade effect
537	blender->setImage(image);
538	blender->setAlpha(alpha);
539	blender->start();
540	time.start();
541
542	QTimer::singleShot(`10`, this, SLOT(nextFrame()));
543	}
544
545
546	void FadeEffect::nextFrame()
547	{
548	const qreal runTime = `2000`; // milliseconds
549
550	if (!blender->isFinished())
551	{
552	QTimer::singleShot(`10`, this, SLOT(nextFrame()));
553	return;
554	}
555
556	XPutImage(QX11Info::display(), pixmap->handle(), gc, image, `0`, `0`, `0`, `0`, image->width, image->height);
557	parent->update();
558
559	alpha = qRound(qMax(`255.` - (`255.` * (qreal(time.elapsed() / runTime))), `0.0`));
560
561	if (!done)
562	{
563	blender->setAlpha(alpha);
564	blender->start();
565
566	// Make sure we don't send frames faster than the X server can process them
567	XSync(QX11Info::display(), False);
568	QTimer::singleShot(`1`, this, SLOT(nextFrame()));
569	}
570
571	if (alpha == `0`)
572	done = true;
573	}
574
575