source: trunk/src/gui/painting/qdrawhelper_mmx_p.h@ 713

Last change on this file since 713 was 651, checked in by Dmitry A. Kuminov, 16 years ago

trunk: Merged in qt 4.6.2 sources.

File size: 28.6 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
6**
7** This file is part of the QtGui module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#ifndef QDRAWHELPER_MMX_P_H
43#define QDRAWHELPER_MMX_P_H
44
45//
46// W A R N I N G
47// -------------
48//
49// This file is not part of the Qt API. It exists purely as an
50// implementation detail. This header file may change from version to
51// version without notice, or even be removed.
52//
53// We mean it.
54//
55
56#include <private/qdrawhelper_p.h>
57#include <private/qdrawhelper_x86_p.h>
58#include <private/qpaintengine_raster_p.h>
59
60#ifdef QT_HAVE_MMX
61#include <mmintrin.h>
62#endif
63
64#define C_FF const m64 mmx_0x00ff = _mm_set1_pi16(0xff)
65#define C_80 const m64 mmx_0x0080 = _mm_set1_pi16(0x80)
66#define C_00 const m64 mmx_0x0000 = _mm_setzero_si64()
67
68#ifdef Q_CC_MSVC
69# pragma warning(disable: 4799) // No EMMS at end of function
70#endif
71
72typedef __m64 m64;
73
74QT_BEGIN_NAMESPACE
75
76struct QMMXCommonIntrinsics
77{
78 static inline m64 alpha(m64 x) {
79 x = _mm_unpackhi_pi16(x, x);
80 x = _mm_unpackhi_pi16(x, x);
81 return x;
82 }
83
84 static inline m64 _negate(const m64 &x, const m64 &mmx_0x00ff) {
85 return _mm_xor_si64(x, mmx_0x00ff);
86 }
87
88 static inline m64 add(const m64 &a, const m64 &b) {
89 return _mm_adds_pu16 (a, b);
90 }
91
92 static inline m64 _byte_mul(const m64 &a, const m64 &b,
93 const m64 &mmx_0x0080)
94 {
95 m64 res = _mm_mullo_pi16(a, b);
96 res = _mm_adds_pu16(res, mmx_0x0080);
97 res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
98 return _mm_srli_pi16(res, 8);
99 }
100
101 static inline m64 interpolate_pixel_256(const m64 &x, const m64 &a,
102 const m64 &y, const m64 &b)
103 {
104 m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
105 return _mm_srli_pi16(res, 8);
106 }
107
108 static inline m64 _interpolate_pixel_255(const m64 &x, const m64 &a,
109 const m64 &y, const m64 &b,
110 const m64 &mmx_0x0080)
111 {
112 m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
113 res = _mm_adds_pu16(res, mmx_0x0080);
114 res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
115 return _mm_srli_pi16(res, 8);
116 }
117
118 static inline m64 _premul(m64 x, const m64 &mmx_0x0080) {
119 m64 a = alpha(x);
120 return _byte_mul(x, a, mmx_0x0080);
121 }
122
123 static inline m64 _load(uint x, const m64 &mmx_0x0000) {
124 return _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);
125 }
126
127 static inline m64 _load_alpha(uint x, const m64 &) {
128 x |= (x << 16);
129 return _mm_set1_pi32(x);
130 }
131
132 static inline uint _store(const m64 &x, const m64 &mmx_0x0000) {
133 return _mm_cvtsi64_si32(_mm_packs_pu16(x, mmx_0x0000));
134 }
135};
136
137#define negate(x) _negate(x, mmx_0x00ff)
138#define byte_mul(a, b) _byte_mul(a, b, mmx_0x0080)
139#define interpolate_pixel_255(x, a, y, b) _interpolate_pixel_255(x, a, y, b, mmx_0x0080)
140#define premul(x) _premul(x, mmx_0x0080)
141#define load(x) _load(x, mmx_0x0000)
142#define load_alpha(x) _load_alpha(x, mmx_0x0000)
143#define store(x) _store(x, mmx_0x0000)
144
145/*
146 result = 0
147 d = d * cia
148*/
149template <class MM>
150static void QT_FASTCALL comp_func_solid_Clear(uint *dest, int length, uint, uint const_alpha)
151{
152 if (!length)
153 return;
154
155 if (const_alpha == 255) {
156 qt_memfill(static_cast<quint32*>(dest), quint32(0), length);
157 } else {
158 C_FF; C_80; C_00;
159 m64 ia = MM::negate(MM::load_alpha(const_alpha));
160 for (int i = 0; i < length; ++i) {
161 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), ia));
162 }
163 }
164 MM::end();
165}
166
167template <class MM>
168static void QT_FASTCALL comp_func_Clear(uint *dest, const uint *, int length, uint const_alpha)
169{
170 if (const_alpha == 255) {
171 qt_memfill(static_cast<quint32*>(dest), quint32(0), length);
172 } else {
173 C_FF; C_80; C_00;
174 m64 ia = MM::negate(MM::load_alpha(const_alpha));
175 for (int i = 0; i < length; ++i)
176 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), ia));
177 }
178 MM::end();
179}
180
181/*
182 result = s
183 dest = s * ca + d * cia
184*/
185template <class MM>
186static void QT_FASTCALL comp_func_solid_Source(uint *dest, int length, uint src, uint const_alpha)
187{
188 if (const_alpha == 255) {
189 qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
190 } else {
191 C_FF; C_80; C_00;
192 const m64 a = MM::load_alpha(const_alpha);
193 const m64 ia = MM::negate(a);
194 const m64 s = MM::byte_mul(MM::load(src), a);
195 for (int i = 0; i < length; ++i) {
196 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
197 }
198 MM::end();
199 }
200}
201
202template <class MM>
203static void QT_FASTCALL comp_func_Source(uint *dest, const uint *src, int length, uint const_alpha)
204{
205 if (const_alpha == 255) {
206 ::memcpy(dest, src, length * sizeof(uint));
207 } else {
208 C_FF; C_80; C_00;
209 const m64 a = MM::load_alpha(const_alpha);
210 const m64 ia = MM::negate(a);
211 for (int i = 0; i < length; ++i)
212 dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), a,
213 MM::load(dest[i]), ia));
214 }
215 MM::end();
216}
217
218/*
219 result = s + d * sia
220 dest = (s + d * sia) * ca + d * cia
221 = s * ca + d * (sia * ca + cia)
222 = s * ca + d * (1 - sa*ca)
223*/
224template <class MM>
225static void QT_FASTCALL comp_func_solid_SourceOver(uint *dest, int length, uint src, uint const_alpha)
226{
227 if ((const_alpha & qAlpha(src)) == 255) {
228 qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
229 } else {
230 C_FF; C_80; C_00;
231 m64 s = MM::load(src);
232 if (const_alpha != 255) {
233 m64 ca = MM::load_alpha(const_alpha);
234 s = MM::byte_mul(s, ca);
235 }
236 m64 a = MM::negate(MM::alpha(s));
237 for (int i = 0; i < length; ++i)
238 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), a)));
239 MM::end();
240 }
241}
242
243template <class MM>
244static void QT_FASTCALL comp_func_SourceOver(uint *dest, const uint *src, int length, uint const_alpha)
245{
246 C_FF; C_80; C_00;
247 if (const_alpha == 255) {
248 for (int i = 0; i < length; ++i) {
249 if ((0xff000000 & src[i]) == 0xff000000) {
250 dest[i] = src[i];
251 } else {
252 m64 s = MM::load(src[i]);
253 m64 ia = MM::negate(MM::alpha(s));
254 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
255 }
256 }
257 } else {
258 m64 ca = MM::load_alpha(const_alpha);
259 for (int i = 0; i < length; ++i) {
260 m64 s = MM::byte_mul(MM::load(src[i]), ca);
261 m64 ia = MM::negate(MM::alpha(s));
262 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
263 }
264 }
265 MM::end();
266}
267
268/*
269 result = d + s * dia
270 dest = (d + s * dia) * ca + d * cia
271 = d + s * dia * ca
272*/
273template <class MM>
274static void QT_FASTCALL comp_func_solid_DestinationOver(uint *dest, int length, uint src, uint const_alpha)
275{
276 C_FF; C_80; C_00;
277 m64 s = MM::load(src);
278 if (const_alpha != 255)
279 s = MM::byte_mul(s, MM::load_alpha(const_alpha));
280
281 for (int i = 0; i < length; ++i) {
282 m64 d = MM::load(dest[i]);
283 m64 dia = MM::negate(MM::alpha(d));
284 dest[i] = MM::store(MM::add(d, MM::byte_mul(s, dia)));
285 }
286 MM::end();
287}
288
289template <class MM>
290static void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha)
291{
292 C_FF; C_80; C_00;
293 if (const_alpha == 255) {
294 for (int i = 0; i < length; ++i) {
295 m64 d = MM::load(dest[i]);
296 m64 ia = MM::negate(MM::alpha(d));
297 dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), ia)));
298 }
299 } else {
300 m64 ca = MM::load_alpha(const_alpha);
301 for (int i = 0; i < length; ++i) {
302 m64 d = MM::load(dest[i]);
303 m64 dia = MM::negate(MM::alpha(d));
304 dia = MM::byte_mul(dia, ca);
305 dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), dia)));
306 }
307 }
308 MM::end();
309}
310
311/*
312 result = s * da
313 dest = s * da * ca + d * cia
314*/
315template <class MM>
316static void QT_FASTCALL comp_func_solid_SourceIn(uint *dest, int length, uint src, uint const_alpha)
317{
318 C_80; C_00;
319 if (const_alpha == 255) {
320 m64 s = MM::load(src);
321 for (int i = 0; i < length; ++i) {
322 m64 da = MM::alpha(MM::load(dest[i]));
323 dest[i] = MM::store(MM::byte_mul(s, da));
324 }
325 } else {
326 C_FF;
327 m64 s = MM::load(src);
328 m64 ca = MM::load_alpha(const_alpha);
329 s = MM::byte_mul(s, ca);
330 m64 cia = MM::negate(ca);
331 for (int i = 0; i < length; ++i) {
332 m64 d = MM::load(dest[i]);
333 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, cia));
334 }
335 }
336 MM::end();
337}
338
339template <class MM>
340static void QT_FASTCALL comp_func_SourceIn(uint *dest, const uint *src, int length, uint const_alpha)
341{
342 C_FF; C_80; C_00;
343 if (const_alpha == 255) {
344 for (int i = 0; i < length; ++i) {
345 m64 a = MM::alpha(MM::load(dest[i]));
346 dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), a));
347 }
348 } else {
349 m64 ca = MM::load_alpha(const_alpha);
350 m64 cia = MM::negate(ca);
351 for (int i = 0; i < length; ++i) {
352 m64 d = MM::load(dest[i]);
353 m64 da = MM::byte_mul(MM::alpha(d), ca);
354 dest[i] = MM::store(MM::interpolate_pixel_255(
355 MM::load(src[i]), da, d, cia));
356 }
357 }
358 MM::end();
359}
360
361/*
362 result = d * sa
363 dest = d * sa * ca + d * cia
364 = d * (sa * ca + cia)
365*/
366template <class MM>
367static void QT_FASTCALL comp_func_solid_DestinationIn(uint *dest, int length, uint src, uint const_alpha)
368{
369 C_80; C_00;
370 m64 a = MM::alpha(MM::load(src));
371 if (const_alpha != 255) {
372 C_FF;
373 m64 ca = MM::load_alpha(const_alpha);
374 m64 cia = MM::negate(ca);
375 a = MM::byte_mul(a, ca);
376 a = MM::add(a, cia);
377 }
378 for (int i = 0; i < length; ++i)
379 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
380 MM::end();
381}
382
383template <class MM>
384static void QT_FASTCALL comp_func_DestinationIn(uint *dest, const uint *src, int length, uint const_alpha)
385{
386 C_FF; C_80; C_00;
387 if (const_alpha == 255) {
388 for (int i = 0; i < length; ++i) {
389 m64 a = MM::alpha(MM::load(src[i]));
390 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
391 }
392 } else {
393 m64 ca = MM::load_alpha(const_alpha);
394 m64 cia = MM::negate(ca);
395 for (int i = 0; i < length; ++i) {
396 m64 d = MM::load(dest[i]);
397 m64 a = MM::alpha(MM::load(src[i]));
398 a = MM::byte_mul(a, ca);
399 a = MM::add(a, cia);
400 dest[i] = MM::store(MM::byte_mul(d, a));
401 }
402 }
403 MM::end();
404}
405
406/*
407 result = s * dia
408 dest = s * dia * ca + d * cia
409*/
410template <class MM>
411static void QT_FASTCALL comp_func_solid_SourceOut(uint *dest, int length, uint src, uint const_alpha)
412{
413 C_FF; C_80; C_00;
414 m64 s = MM::load(src);
415 if (const_alpha == 255) {
416 for (int i = 0; i < length; ++i) {
417 m64 dia = MM::negate(MM::alpha(MM::load(dest[i])));
418 dest[i] = MM::store(MM::byte_mul(s, dia));
419 }
420 } else {
421 m64 ca = MM::load_alpha(const_alpha);
422 m64 cia = MM::negate(ca);
423 s = MM::byte_mul(s, ca);
424 for (int i = 0; i < length; ++i) {
425 m64 d = MM::load(dest[i]);
426 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, cia));
427 }
428 }
429 MM::end();
430}
431
432template <class MM>
433static void QT_FASTCALL comp_func_SourceOut(uint *dest, const uint *src, int length, uint const_alpha)
434{
435 C_FF; C_80; C_00;
436 if (const_alpha == 255) {
437 for (int i = 0; i < length; ++i) {
438 m64 ia = MM::negate(MM::alpha(MM::load(dest[i])));
439 dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), ia));
440 }
441 } else {
442 m64 ca = MM::load_alpha(const_alpha);
443 m64 cia = MM::negate(ca);
444 for (int i = 0; i < length; ++i) {
445 m64 d = MM::load(dest[i]);
446 m64 dia = MM::byte_mul(MM::negate(MM::alpha(d)), ca);
447 dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), dia, d, cia));
448 }
449 }
450 MM::end();
451}
452
453/*
454 result = d * sia
455 dest = d * sia * ca + d * cia
456 = d * (sia * ca + cia)
457*/
458template <class MM>
459static void QT_FASTCALL comp_func_solid_DestinationOut(uint *dest, int length, uint src, uint const_alpha)
460{
461 C_FF; C_80; C_00;
462 m64 a = MM::negate(MM::alpha(MM::load(src)));
463 if (const_alpha != 255) {
464 m64 ca = MM::load_alpha(const_alpha);
465 a = MM::byte_mul(a, ca);
466 a = MM::add(a, MM::negate(ca));
467 }
468 for (int i = 0; i < length; ++i)
469 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
470 MM::end();
471}
472
473template <class MM>
474static void QT_FASTCALL comp_func_DestinationOut(uint *dest, const uint *src, int length, uint const_alpha)
475{
476 C_FF; C_80; C_00;
477 if (const_alpha == 255) {
478 for (int i = 0; i < length; ++i) {
479 m64 a = MM::negate(MM::alpha(MM::load(src[i])));
480 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
481 }
482 } else {
483 m64 ca = MM::load_alpha(const_alpha);
484 m64 cia = MM::negate(ca);
485 for (int i = 0; i < length; ++i) {
486 m64 d = MM::load(dest[i]);
487 m64 a = MM::negate(MM::alpha(MM::load(src[i])));
488 a = MM::byte_mul(a, ca);
489 a = MM::add(a, cia);
490 dest[i] = MM::store(MM::byte_mul(d, a));
491 }
492 }
493 MM::end();
494}
495
496/*
497 result = s*da + d*sia
498 dest = s*da*ca + d*sia*ca + d *cia
499 = s*ca * da + d * (sia*ca + cia)
500 = s*ca * da + d * (1 - sa*ca)
501*/
502template <class MM>
503static void QT_FASTCALL comp_func_solid_SourceAtop(uint *dest, int length, uint src, uint const_alpha)
504{
505 C_FF; C_80; C_00;
506 m64 s = MM::load(src);
507 if (const_alpha != 255) {
508 m64 ca = MM::load_alpha(const_alpha);
509 s = MM::byte_mul(s, ca);
510 }
511 m64 a = MM::negate(MM::alpha(s));
512 for (int i = 0; i < length; ++i) {
513 m64 d = MM::load(dest[i]);
514 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, a));
515 }
516 MM::end();
517}
518
519template <class MM>
520static void QT_FASTCALL comp_func_SourceAtop(uint *dest, const uint *src, int length, uint const_alpha)
521{
522 C_FF; C_80; C_00;
523 if (const_alpha == 255) {
524 for (int i = 0; i < length; ++i) {
525 m64 s = MM::load(src[i]);
526 m64 d = MM::load(dest[i]);
527 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
528 MM::negate(MM::alpha(s))));
529 }
530 } else {
531 m64 ca = MM::load_alpha(const_alpha);
532 for (int i = 0; i < length; ++i) {
533 m64 s = MM::load(src[i]);
534 s = MM::byte_mul(s, ca);
535 m64 d = MM::load(dest[i]);
536 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
537 MM::negate(MM::alpha(s))));
538 }
539 }
540 MM::end();
541}
542
543/*
544 result = d*sa + s*dia
545 dest = d*sa*ca + s*dia*ca + d *cia
546 = s*ca * dia + d * (sa*ca + cia)
547*/
548template <class MM>
549static void QT_FASTCALL comp_func_solid_DestinationAtop(uint *dest, int length, uint src, uint const_alpha)
550{
551 C_FF; C_80; C_00;
552 m64 s = MM::load(src);
553 m64 a = MM::alpha(s);
554 if (const_alpha != 255) {
555 m64 ca = MM::load_alpha(const_alpha);
556 s = MM::byte_mul(s, ca);
557 a = MM::alpha(s);
558 a = MM::add(a, MM::negate(ca));
559 }
560 for (int i = 0; i < length; ++i) {
561 m64 d = MM::load(dest[i]);
562 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, a));
563 }
564 MM::end();
565}
566
567template <class MM>
568static void QT_FASTCALL comp_func_DestinationAtop(uint *dest, const uint *src, int length, uint const_alpha)
569{
570 C_FF; C_80; C_00;
571 if (const_alpha == 255) {
572 for (int i = 0; i < length; ++i) {
573 m64 s = MM::load(src[i]);
574 m64 d = MM::load(dest[i]);
575 dest[i] = MM::store(MM::interpolate_pixel_255(d, MM::alpha(s), s,
576 MM::negate(MM::alpha(d))));
577 }
578 } else {
579 m64 ca = MM::load_alpha(const_alpha);
580 for (int i = 0; i < length; ++i) {
581 m64 s = MM::load(src[i]);
582 s = MM::byte_mul(s, ca);
583 m64 d = MM::load(dest[i]);
584 m64 a = MM::alpha(s);
585 a = MM::add(a, MM::negate(ca));
586 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
587 d, a));
588 }
589 }
590 MM::end();
591}
592
593/*
594 result = d*sia + s*dia
595 dest = d*sia*ca + s*dia*ca + d *cia
596 = s*ca * dia + d * (sia*ca + cia)
597 = s*ca * dia + d * (1 - sa*ca)
598*/
599template <class MM>
600static void QT_FASTCALL comp_func_solid_XOR(uint *dest, int length, uint src, uint const_alpha)
601{
602 C_FF; C_80; C_00;
603 m64 s = MM::load(src);
604 if (const_alpha != 255) {
605 m64 ca = MM::load_alpha(const_alpha);
606 s = MM::byte_mul(s, ca);
607 }
608 m64 a = MM::negate(MM::alpha(s));
609 for (int i = 0; i < length; ++i) {
610 m64 d = MM::load(dest[i]);
611 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
612 d, a));
613 }
614 MM::end();
615}
616
617template <class MM>
618static void QT_FASTCALL comp_func_XOR(uint *dest, const uint *src, int length, uint const_alpha)
619{
620 C_FF; C_80; C_00;
621 if (const_alpha == 255) {
622 for (int i = 0; i < length; ++i) {
623 m64 s = MM::load(src[i]);
624 m64 d = MM::load(dest[i]);
625 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
626 d, MM::negate(MM::alpha(s))));
627 }
628 } else {
629 m64 ca = MM::load_alpha(const_alpha);
630 for (int i = 0; i < length; ++i) {
631 m64 s = MM::load(src[i]);
632 s = MM::byte_mul(s, ca);
633 m64 d = MM::load(dest[i]);
634 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
635 d, MM::negate(MM::alpha(s))));
636 }
637 }
638 MM::end();
639}
640
641template <class MM>
642static void QT_FASTCALL rasterop_solid_SourceOrDestination(uint *dest,
643 int length,
644 uint color,
645 uint const_alpha)
646{
647 Q_UNUSED(const_alpha);
648
649 if ((quintptr)(dest) & 0x7) {
650 *dest++ |= color;
651 --length;
652 }
653
654 const int length64 = length / 2;
655 if (length64) {
656 __m64 *dst64 = reinterpret_cast<__m64*>(dest);
657 const __m64 color64 = _mm_set_pi32(color, color);
658
659 int n = (length64 + 3) / 4;
660 switch (length64 & 0x3) {
661 case 0: do { *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
662 case 3: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
663 case 2: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
664 case 1: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
665 } while (--n > 0);
666 }
667 }
668
669 if (length & 0x1) {
670 dest[length - 1] |= color;
671 }
672
673 MM::end();
674}
675
676template <class MM>
677static void QT_FASTCALL rasterop_solid_SourceAndDestination(uint *dest,
678 int length,
679 uint color,
680 uint const_alpha)
681{
682 Q_UNUSED(const_alpha);
683
684 color |= 0xff000000;
685
686 if ((quintptr)(dest) & 0x7) { // align
687 *dest++ &= color;
688 --length;
689 }
690
691 const int length64 = length / 2;
692 if (length64) {
693 __m64 *dst64 = reinterpret_cast<__m64*>(dest);
694 const __m64 color64 = _mm_set_pi32(color, color);
695
696 int n = (length64 + 3) / 4;
697 switch (length64 & 0x3) {
698 case 0: do { *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
699 case 3: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
700 case 2: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
701 case 1: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
702 } while (--n > 0);
703 }
704 }
705
706 if (length & 0x1) {
707 dest[length - 1] &= color;
708 }
709
710 MM::end();
711}
712
713template <class MM>
714static void QT_FASTCALL rasterop_solid_SourceXorDestination(uint *dest,
715 int length,
716 uint color,
717 uint const_alpha)
718{
719 Q_UNUSED(const_alpha);
720
721 color &= 0x00ffffff;
722
723 if ((quintptr)(dest) & 0x7) {
724 *dest++ ^= color;
725 --length;
726 }
727
728 const int length64 = length / 2;
729 if (length64) {
730 __m64 *dst64 = reinterpret_cast<__m64*>(dest);
731 const __m64 color64 = _mm_set_pi32(color, color);
732
733 int n = (length64 + 3) / 4;
734 switch (length64 & 0x3) {
735 case 0: do { *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
736 case 3: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
737 case 2: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
738 case 1: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
739 } while (--n > 0);
740 }
741 }
742
743 if (length & 0x1) {
744 dest[length - 1] ^= color;
745 }
746
747 MM::end();
748}
749
750template <class MM>
751static void QT_FASTCALL rasterop_solid_SourceAndNotDestination(uint *dest,
752 int length,
753 uint color,
754 uint const_alpha)
755{
756
757 Q_UNUSED(const_alpha);
758
759 if ((quintptr)(dest) & 0x7) {
760 *dest = (color & ~(*dest)) | 0xff000000;
761 ++dest;
762 --length;
763 }
764
765 const int length64 = length / 2;
766 if (length64) {
767 __m64 *dst64 = reinterpret_cast<__m64*>(dest);
768 const __m64 color64 = _mm_set_pi32(color, color);
769 const m64 mmx_0xff000000 = _mm_set1_pi32(0xff000000);
770 __m64 tmp1, tmp2, tmp3, tmp4;
771
772 int n = (length64 + 3) / 4;
773 switch (length64 & 0x3) {
774 case 0: do { tmp1 = _mm_andnot_si64(*dst64, color64);
775 *dst64++ = _mm_or_si64(tmp1, mmx_0xff000000);
776 case 3: tmp2 = _mm_andnot_si64(*dst64, color64);
777 *dst64++ = _mm_or_si64(tmp2, mmx_0xff000000);
778 case 2: tmp3 = _mm_andnot_si64(*dst64, color64);
779 *dst64++ = _mm_or_si64(tmp3, mmx_0xff000000);
780 case 1: tmp4 = _mm_andnot_si64(*dst64, color64);
781 *dst64++ = _mm_or_si64(tmp4, mmx_0xff000000);
782 } while (--n > 0);
783 }
784 }
785
786 if (length & 0x1) {
787 dest[length - 1] = (color & ~(dest[length - 1])) | 0xff000000;
788 }
789
790 MM::end();
791}
792
793template <class MM>
794static void QT_FASTCALL rasterop_solid_NotSourceAndNotDestination(uint *dest,
795 int length,
796 uint color,
797 uint const_alpha)
798{
799 rasterop_solid_SourceAndNotDestination<MM>(dest, length,
800 ~color, const_alpha);
801}
802
803template <class MM>
804static void QT_FASTCALL rasterop_solid_NotSourceOrNotDestination(uint *dest,
805 int length,
806 uint color,
807 uint const_alpha)
808{
809 Q_UNUSED(const_alpha);
810 color = ~color | 0xff000000;
811 while (length--) {
812 *dest = color | ~(*dest);
813 ++dest;
814 }
815}
816
817template <class MM>
818static void QT_FASTCALL rasterop_solid_NotSourceXorDestination(uint *dest,
819 int length,
820 uint color,
821 uint const_alpha)
822{
823 rasterop_solid_SourceXorDestination<MM>(dest, length, ~color, const_alpha);
824}
825
826template <class MM>
827static void QT_FASTCALL rasterop_solid_NotSource(uint *dest, int length,
828 uint color, uint const_alpha)
829{
830 Q_UNUSED(const_alpha);
831 qt_memfill((quint32*)dest, ~color | 0xff000000, length);
832}
833
834template <class MM>
835static void QT_FASTCALL rasterop_solid_NotSourceAndDestination(uint *dest,
836 int length,
837 uint color,
838 uint const_alpha)
839{
840 rasterop_solid_SourceAndDestination<MM>(dest, length,
841 ~color, const_alpha);
842}
843
844template <class MM>
845static inline void qt_blend_color_argb_x86(int count, const QSpan *spans,
846 void *userData,
847 CompositionFunctionSolid *solidFunc)
848{
849 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
850 if (data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source
851 || (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver
852 && qAlpha(data->solid.color) == 255)) {
853 // inline for performance
854 C_FF; C_80; C_00;
855 while (count--) {
856 uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
857 if (spans->coverage == 255) {
858 qt_memfill(static_cast<quint32*>(target), quint32(data->solid.color), spans->len);
859 } else {
860 // dest = s * ca + d * (1 - sa*ca) --> dest = s * ca + d * (1-ca)
861 m64 ca = MM::load_alpha(spans->coverage);
862 m64 s = MM::byte_mul(MM::load(data->solid.color), ca);
863 m64 ica = MM::negate(ca);
864 for (int i = 0; i < spans->len; ++i)
865 target[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(target[i]), ica)));
866 }
867 ++spans;
868 }
869 MM::end();
870 return;
871 }
872 CompositionFunctionSolid func = solidFunc[data->rasterBuffer->compositionMode];
873 while (count--) {
874 uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
875 func(target, spans->len, data->solid.color, spans->coverage);
876 ++spans;
877 }
878}
879
880#ifdef QT_HAVE_MMX
881struct QMMXIntrinsics : public QMMXCommonIntrinsics
882{
883 static inline void end() {
884#if !defined(Q_OS_WINCE) || defined(_X86_)
885 _mm_empty();
886#endif
887 }
888};
889#endif // QT_HAVE_MMX
890
891QT_END_NAMESPACE
892
893#endif // QDRAWHELPER_MMX_P_H
Note: See TracBrowser for help on using the repository browser.