1 module CPUblit.composing.specblt;
2 
3 import CPUblit.composing.common;
4 
5 /**
6  * Text blitter, mainly intended for single color texts, can work in other applications as long as they're correctly formatted,
7  * meaning: transparent pixels = 0, colored pixels = T.max 
8  */
9 public void textBlitter(T)(T* src, T* dest, size_t length, T color) @nogc pure nothrow {
10 	static if(USE_INTEL_INTRINSICS){
11 		static if(T.stringof == "ubyte"){
12 			byte16 colorV;
13 			static enum MAINLOOP_LENGTH = 16;
14 			static enum HALFLOAD_LENGTH = 8;
15 			static enum QUTRLOAD_LENGTH = 4;
16 		}else static if(T.stringof == "ushort"){
17 			short8 colorV;
18 			static enum MAINLOOP_LENGTH = 8;
19 			static enum HALFLOAD_LENGTH = 4;
20 			static enum QUTRLOAD_LENGTH = 2;
21 		}else static if(T.stringof == "uint"){
22 			int4 colorV;
23 			static enum MAINLOOP_LENGTH = 4;
24 			static enum HALFLOAD_LENGTH = 2;
25 			static enum QUTRLOAD_LENGTH = 1;
26 		}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
27 		static foreach(i; 0 .. (MAINLOOP_LENGTH)){
28 			colorV[i] = color;
29 		}
30 		while(length >= MAINLOOP_LENGTH){
31 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src) & cast(__m128i)colorV;
32 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
33 			static if(T.stringof == "ubyte")
34 				__m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT);
35 			else static if(T.stringof == "ushort")
36 				__m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT);
37 			else static if(T.stringof == "uint")
38 				__m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT);
39 			destV = srcV | (destV & mask);
40 			_mm_storeu_si128(cast(__m128i*)dest, destV);
41 			src += MAINLOOP_LENGTH;
42 			dest += MAINLOOP_LENGTH;
43 			length -= MAINLOOP_LENGTH;
44 		}
45 		if(length >= HALFLOAD_LENGTH){
46 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src) & cast(__m128i)colorV;
47 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
48 			static if(T.stringof == "ubyte")
49 				__m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT);
50 			else static if(T.stringof == "ushort")
51 				__m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT);
52 			else static if(T.stringof == "uint")
53 				__m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT);
54 			destV = srcV | (destV & mask);
55 			_mm_storel_epi64(cast(__m128i*)dest, destV);
56 			src += HALFLOAD_LENGTH;
57 			dest += HALFLOAD_LENGTH;
58 			length -= HALFLOAD_LENGTH;
59 		}
60 		if(length >= QUTRLOAD_LENGTH){
61 			__m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)) & cast(__m128i)colorV;
62 			__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
63 			static if(T.stringof == "ubyte")
64 				__m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT);
65 			else static if(T.stringof == "ushort")
66 				__m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT);
67 			else static if(T.stringof == "uint")
68 				__m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT);
69 			destV = srcV | (destV & mask);
70 			_mm_storeu_si32(dest, destV);
71 			static if(T.stringof != "uint"){
72 				src += QUTRLOAD_LENGTH;
73 				dest += QUTRLOAD_LENGTH;
74 				length -= QUTRLOAD_LENGTH;
75 			}
76 		}
77 		static if(T.stringof == "ubyte"){
78 			while(length){
79 				const ubyte mask = *src ? ubyte.min : ubyte.max;
80 				*dest = (*src & color) | (*dest & mask);
81 				src++;
82 				dest++;
83 				length--;
84 			}
85 		}else static if(T.stringof == "ushort"){
86 			if(length){
87 				const ushort mask = *src ? ushort.min : ushort.max;
88 				*dest = (*src /+& color+/) | (*dest & mask);
89 			}
90 		}
91 	}else{
92 		while(length){
93 			const ubyte mask = *src ? T.min : T.max;
94 			*dest = (*src & color) | (*dest & mask);
95 			src++;
96 			dest++;
97 			length--;
98 		}
99 	}
100 }
101 /**
102  * Text blitter, mainly intended for single color texts, can work in other applications as long as they're correctly formatted,
103  * meaning: transparent pixels = 0, colored pixels = T.max 
104  */
105 public void textBlitter(T)(T* src, T* dest, T* dest0, size_t length, T color) @nogc pure nothrow {
106 	static if(USE_INTEL_INTRINSICS){
107 		static if(T.stringof == "ubyte"){
108 			byte16 colorV;
109 			static enum MAINLOOP_LENGTH = 16;
110 			static enum HALFLOAD_LENGTH = 8;
111 			static enum QUTRLOAD_LENGTH = 4;
112 		}else static if(T.stringof == "ushort"){
113 			short8 colorV;
114 			static enum MAINLOOP_LENGTH = 8;
115 			static enum HALFLOAD_LENGTH = 4;
116 			static enum QUTRLOAD_LENGTH = 2;
117 		}else static if(T.stringof == "uint"){
118 			int4 colorV;
119 			static enum MAINLOOP_LENGTH = 4;
120 			static enum HALFLOAD_LENGTH = 2;
121 			static enum QUTRLOAD_LENGTH = 1;
122 		}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
123 		static foreach(i; 0 .. (MAINLOOP_LENGTH)){
124 			colorV[i] = color;
125 		}
126 		while(length >= MAINLOOP_LENGTH){
127 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src) & cast(__m128i)colorV;
128 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
129 			static if(T.stringof == "ubyte")
130 				__m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT);
131 			else static if(T.stringof == "ushort")
132 				__m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT);
133 			else static if(T.stringof == "uint")
134 				__m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT);
135 			destV = srcV | (destV & mask);
136 			_mm_storeu_si128(cast(__m128i*)dest0, destV);
137 			src += MAINLOOP_LENGTH;
138 			dest += MAINLOOP_LENGTH;
139 			dest0 += MAINLOOP_LENGTH;
140 			length -= MAINLOOP_LENGTH;
141 		}
142 		if(length >= HALFLOAD_LENGTH){
143 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src) & cast(__m128i)colorV;
144 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
145 			static if(T.stringof == "ubyte")
146 				__m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT);
147 			else static if(T.stringof == "ushort")
148 				__m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT);
149 			else static if(T.stringof == "uint")
150 				__m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT);
151 			destV = srcV | (destV & mask);
152 			_mm_storel_epi64(cast(__m128i*)dest0, destV);
153 			src += HALFLOAD_LENGTH;
154 			dest += HALFLOAD_LENGTH;
155 			dest0 += HALFLOAD_LENGTH;
156 			length -= HALFLOAD_LENGTH;
157 		}
158 		if(length >= QUTRLOAD_LENGTH){
159 			__m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)) & cast(__m128i)colorV;
160 			__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
161 			static if(T.stringof == "ubyte")
162 				__m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT);
163 			else static if(T.stringof == "ushort")
164 				__m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT);
165 			else static if(T.stringof == "uint")
166 				__m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT);
167 			destV = srcV | (destV & mask);
168 			_mm_storeu_si32(dest0, destV);
169 			static if(T.stringof != "uint"){
170 				src += QUTRLOAD_LENGTH;
171 				dest += QUTRLOAD_LENGTH;
172 				dest0 += QUTRLOAD_LENGTH;
173 				length -= QUTRLOAD_LENGTH;
174 			}
175 		}
176 		static if(T.stringof == "ubyte"){
177 			while(length){
178 				const T mask = *src ? T.min : T.max;
179 				*dest0 = (*src & color) | (*dest & mask);
180 				src++;
181 				dest++;
182 				dest0++;
183 				length--;
184 			}
185 		}else static if(T.stringof == "ushort"){
186 			if(length){
187 				const T mask = *src ? T.min : T.max;
188 				*dest0 = (*src & color) | (*dest & mask);
189 			}
190 		}
191 	}else{
192 		while(length){
193 			const T mask = *src ? T.min : T.max;
194 			*dest0 = (*src & color) | (*dest & mask);
195 			src++;
196 			dest++;
197 			dest0++;
198 			length--;
199 		}
200 	}
201 }
202 /**
203  * XOR blitter. Popularly used for selection and pseudo-transparency.
204  */
205 public @nogc void xorBlitter(T)(T* dest, T* dest0, size_t length, T color){
206 	static if(USE_INTEL_INTRINSICS){
207 		static if(T.stringof == "ubyte"){
208 			byte16 colorV;
209 			static enum MAINLOOP_LENGTH = 16;
210 			static enum HALFLOAD_LENGTH = 8;
211 			static enum QUTRLOAD_LENGTH = 4;
212 		}else static if(T.stringof == "ushort"){
213 			short8 colorV;
214 			static enum MAINLOOP_LENGTH = 8;
215 			static enum HALFLOAD_LENGTH = 4;
216 			static enum QUTRLOAD_LENGTH = 2;
217 		}else static if(T.stringof == "uint"){
218 			int4 colorV;
219 			static enum MAINLOOP_LENGTH = 4;
220 			static enum HALFLOAD_LENGTH = 2;
221 			static enum QUTRLOAD_LENGTH = 1;
222 		}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
223 		for (int i ; i < MAINLOOP_LENGTH ; i++){
224 			colorV[i] = color;
225 		}
226 		while(length >= MAINLOOP_LENGTH){
227 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
228 			destV = _mm_xor_si128(destV, cast(__m128i)colorV);
229 			_mm_storeu_si128(cast(__m128i*)dest0, destV);
230 			dest += MAINLOOP_LENGTH;
231 			dest0 += MAINLOOP_LENGTH;
232 			length -= MAINLOOP_LENGTH;
233 		}
234 		if(length >= HALFLOAD_LENGTH){
235 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
236 			destV = _mm_xor_si128(destV, cast(__m128i)colorV);
237 			_mm_storel_epi64(cast(__m128i*)dest0, destV);
238 			dest += HALFLOAD_LENGTH;
239 			dest0 += HALFLOAD_LENGTH;
240 			length -= HALFLOAD_LENGTH;
241 		}
242 		if(length >= QUTRLOAD_LENGTH){
243 			__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
244 			destV = _mm_xor_si128(destV, cast(__m128i)colorV);
245 			_mm_storeu_si32(dest0, destV);
246 			static if(T.stringof != "uint"){
247 				dest += QUTRLOAD_LENGTH;
248 				dest0 += QUTRLOAD_LENGTH;
249 				length -= QUTRLOAD_LENGTH;
250 			}
251 		}
252 		static if(T.stringof == "ubyte"){
253 			while(length){
254 				*dest0 = color ^ *dest;
255 				dest++;
256 				dest0++;
257 				length--;
258 			}
259 		}else static if(T.stringof == "ushort"){
260 			if(length){
261 				*dest0 = color ^ *dest;
262 			}
263 		}
264 	}else{
265 		while(length){
266 			*dest0 = color ^ *dest;
267 			dest++;
268 			dest0++;
269 			length--;
270 		}
271 	}
272 }
273 /**
274  * XOR blitter. Popularly used for selection and pseudo-transparency.
275  */
276 public void xorBlitter(T)(T* dest, size_t length, T color) @nogc pure nothrow {
277 	static if(T.stringof == "ubyte"){
278 		byte16 colorV;
279 		static enum MAINLOOP_LENGTH = 16;
280 		static enum HALFLOAD_LENGTH = 8;
281 		static enum QUTRLOAD_LENGTH = 4;
282 	}else static if(T.stringof == "ushort"){
283 		short8 colorV;
284 		static enum MAINLOOP_LENGTH = 8;
285 		static enum HALFLOAD_LENGTH = 4;
286 		static enum QUTRLOAD_LENGTH = 2;
287 	}else static if(T.stringof == "uint"){
288 		int4 colorV;
289 		static enum MAINLOOP_LENGTH = 4;
290 		static enum HALFLOAD_LENGTH = 2;
291 		static enum QUTRLOAD_LENGTH = 1;
292 	}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
293 	for (int i ; i < MAINLOOP_LENGTH ; i++){
294 		colorV[i] = color;
295 	}
296 	while(length >= MAINLOOP_LENGTH){
297 		__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
298 		destV = _mm_xor_si128(destV, cast(__m128i)colorV);
299 		_mm_storeu_si128(cast(__m128i*)dest, destV);
300 		dest += MAINLOOP_LENGTH;
301 		length -= MAINLOOP_LENGTH;
302 	}
303 	if(length >= HALFLOAD_LENGTH){
304 		__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
305 		destV = _mm_xor_si128(destV, cast(__m128i)colorV);
306 		_mm_storel_epi64(cast(__m128i*)dest, destV);
307 		dest += HALFLOAD_LENGTH;
308 		length -= HALFLOAD_LENGTH;
309 	}
310 	if(length >= QUTRLOAD_LENGTH){
311 		__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
312 		destV = _mm_xor_si128(destV, cast(__m128i)colorV);
313 		_mm_storeu_si32(dest, destV);
314 		static if(T.stringof != "uint"){
315 			dest += QUTRLOAD_LENGTH;
316 			length -= QUTRLOAD_LENGTH;
317 		}
318 	}
319 	static if(T.stringof == "ubyte"){
320 		while(length){
321 			*dest = color ^ *dest;
322 			dest++;
323 			length--;
324 		}
325 	}else static if(T.stringof == "ushort"){
326 		if(length){
327 			*dest = color ^ *dest;
328 		}
329 	}
330 	
331 }
332 /**
333  * XOR blitter. Popularly used for selection and pseudo-transparency.
334  */
335 public void xorBlitter(T)(T* src, T* dest, size_t length) @nogc pure nothrow {
336 	static if(T.stringof == "ubyte"){
337 		static enum MAINLOOP_LENGTH = 16;
338 		static enum HALFLOAD_LENGTH = 8;
339 		static enum QUTRLOAD_LENGTH = 4;
340 	}else static if(T.stringof == "ushort"){
341 		static enum MAINLOOP_LENGTH = 8;
342 		static enum HALFLOAD_LENGTH = 4;
343 		static enum QUTRLOAD_LENGTH = 2;
344 	}else static if(T.stringof == "uint"){
345 		static enum MAINLOOP_LENGTH = 4;
346 		static enum HALFLOAD_LENGTH = 2;
347 		static enum QUTRLOAD_LENGTH = 1;
348 	}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
349 	while(length >= MAINLOOP_LENGTH){
350 		__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
351 		__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
352 		destV = _mm_xor_si128(destV, cast(__m128i)srcV);
353 		_mm_storeu_si128(cast(__m128i*)dest, destV);
354 		dest += MAINLOOP_LENGTH;
355 		src += MAINLOOP_LENGTH;
356 		length -= MAINLOOP_LENGTH;
357 	}
358 	if(length >= HALFLOAD_LENGTH){
359 		__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
360 		__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
361 		destV = _mm_xor_si128(destV, srcV);
362 		_mm_storel_epi64(cast(__m128i*)dest, destV);
363 		dest += HALFLOAD_LENGTH;
364 		src += HALFLOAD_LENGTH;
365 		length -= HALFLOAD_LENGTH;
366 	}
367 	if(length >= QUTRLOAD_LENGTH){
368 		__m128i srcV = _mm_cvtsi32_si128((*cast(int*)src));
369 		__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
370 		destV = _mm_xor_si128(destV, cast(__m128i)srcV);
371 		_mm_storeu_si32(dest, destV);
372 		static if(T.stringof != "uint"){
373 			dest += QUTRLOAD_LENGTH;
374 			src += QUTRLOAD_LENGTH;
375 			length -= QUTRLOAD_LENGTH;
376 		}
377 	}
378 	static if(T.stringof == "ubyte"){
379 		while(length){
380 			*dest = *src ^ *dest;
381 			dest++;
382 			src++;
383 			length--;
384 		}
385 	}else static if(T.stringof == "ushort"){
386 		if(length){
387 			*dest = *src ^ *dest;
388 		}
389 	}
390 }
391 /**
392  * XOR blitter. Popularly used for selection and pseudo-transparency.
393  */
394 public void xorBlitter(T)(T* src, T* dest, T* dest0, size_t length) @nogc pure nothrow {
395 	static if(T.stringof == "ubyte"){
396 		static enum MAINLOOP_LENGTH = 16;
397 		static enum HALFLOAD_LENGTH = 8;
398 		static enum QUTRLOAD_LENGTH = 4;
399 	}else static if(T.stringof == "ushort"){
400 		static enum MAINLOOP_LENGTH = 8;
401 		static enum HALFLOAD_LENGTH = 4;
402 		static enum QUTRLOAD_LENGTH = 2;
403 	}else static if(T.stringof == "uint"){
404 		static enum MAINLOOP_LENGTH = 4;
405 		static enum HALFLOAD_LENGTH = 2;
406 		static enum QUTRLOAD_LENGTH = 1;
407 	}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
408 	while(length >= MAINLOOP_LENGTH){
409 		__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
410 		__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
411 		destV = _mm_xor_si128(destV, cast(__m128i)srcV);
412 		_mm_storeu_si128(cast(__m128i*)dest0, destV);
413 		dest += MAINLOOP_LENGTH;
414 		dest0 += MAINLOOP_LENGTH;
415 		src += MAINLOOP_LENGTH;
416 		length -= MAINLOOP_LENGTH;
417 	}
418 	if(length >= HALFLOAD_LENGTH){
419 		__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
420 		__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
421 		destV = _mm_xor_si128(destV, srcV);
422 		_mm_storel_epi64(cast(__m128i*)dest0, destV);
423 		dest += HALFLOAD_LENGTH;
424 		dest0 += HALFLOAD_LENGTH;
425 		src += HALFLOAD_LENGTH;
426 		length -= HALFLOAD_LENGTH;
427 	}
428 	if(length >= QUTRLOAD_LENGTH){
429 		__m128i srcV = _mm_cvtsi32_si128((*cast(int*)src));
430 		__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
431 		destV = _mm_xor_si128(destV, srcV);
432 		_mm_storeu_si32(dest0, destV);
433 		static if(T.stringof != "uint"){
434 			dest += QUTRLOAD_LENGTH;
435 			dest0 += QUTRLOAD_LENGTH;
436 			src += QUTRLOAD_LENGTH;
437 			length -= QUTRLOAD_LENGTH;
438 		}
439 	}
440 	static if(T.stringof == "ubyte"){
441 		while(length){
442 			*dest = *src ^ *dest;
443 			dest++;
444 			dest0++;
445 			src++;
446 			length--;
447 		}
448 	}else static if(T.stringof == "ushort"){
449 		if(length){
450 			*dest0 = *src ^ *dest;
451 		}
452 	}
453 }
454 /**
455  * AND blitter for misc. usage.
456  */
457 public void andBlitter(T)(T* src, T* dest, size_t length) {
458 	static if(T.stringof == "ubyte"){
459 		static enum MAINLOOP_LENGTH = 16;
460 		static enum HALFLOAD_LENGTH = 8;
461 		static enum QUTRLOAD_LENGTH = 4;
462 	}else static if(T.stringof == "ushort"){
463 		static enum MAINLOOP_LENGTH = 8;
464 		static enum HALFLOAD_LENGTH = 4;
465 		static enum QUTRLOAD_LENGTH = 2;
466 	}else static if(T.stringof == "uint"){
467 		static enum MAINLOOP_LENGTH = 4;
468 		static enum HALFLOAD_LENGTH = 2;
469 		static enum QUTRLOAD_LENGTH = 1;
470 	}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
471 	while(length >= MAINLOOP_LENGTH){
472 		__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
473 		__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
474 		destV = _mm_and_si128(destV, cast(__m128i)srcV);
475 		_mm_storeu_si128(cast(__m128i*)dest, destV);
476 		dest += MAINLOOP_LENGTH;
477 		src += MAINLOOP_LENGTH;
478 		length -= MAINLOOP_LENGTH;
479 	}
480 	if(length >= HALFLOAD_LENGTH){
481 		__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
482 		__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
483 		destV = _mm_and_si128(destV, srcV);
484 		_mm_storel_epi64(cast(__m128i*)dest, destV);
485 		dest += HALFLOAD_LENGTH;
486 		src += HALFLOAD_LENGTH;
487 		length -= HALFLOAD_LENGTH;
488 	}
489 	if(length >= QUTRLOAD_LENGTH){
490 		__m128i srcV = _mm_cvtsi32_si128((*cast(int*)src));
491 		__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
492 		destV = _mm_and_si128(destV, cast(__m128i)srcV);
493 		_mm_storeu_si32(dest, destV);
494 		static if(T.stringof != "uint"){
495 			dest += QUTRLOAD_LENGTH;
496 			src += QUTRLOAD_LENGTH;
497 			length -= QUTRLOAD_LENGTH;
498 		}
499 	}
500 	static if(T.stringof == "ubyte"){
501 		while(length){
502 			*dest = *src & *dest;
503 			dest++;
504 			src++;
505 			length--;
506 		}
507 	}else static if(T.stringof == "ushort"){
508 		if(length){
509 			*dest = *src & *dest;
510 		}
511 	}
512 }
513 /**
514  * AND blitter for misc. usage.
515  */
516 public void andBlitter(T)(T* src, T* dest, T* dest0, size_t length) {
517 	static if(T.stringof == "ubyte"){
518 		static enum MAINLOOP_LENGTH = 16;
519 		static enum HALFLOAD_LENGTH = 8;
520 		static enum QUTRLOAD_LENGTH = 4;
521 	}else static if(T.stringof == "ushort"){
522 		static enum MAINLOOP_LENGTH = 8;
523 		static enum HALFLOAD_LENGTH = 4;
524 		static enum QUTRLOAD_LENGTH = 2;
525 	}else static if(T.stringof == "uint"){
526 		static enum MAINLOOP_LENGTH = 4;
527 		static enum HALFLOAD_LENGTH = 2;
528 		static enum QUTRLOAD_LENGTH = 1;
529 	}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
530 	while(length >= MAINLOOP_LENGTH){
531 		__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
532 		__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
533 		destV = _mm_and_si128(destV, cast(__m128i)srcV);
534 		_mm_storeu_si128(cast(__m128i*)dest0, destV);
535 		dest += MAINLOOP_LENGTH;
536 		dest0 += MAINLOOP_LENGTH;
537 		src += MAINLOOP_LENGTH;
538 		length -= MAINLOOP_LENGTH;
539 	}
540 	if(length >= HALFLOAD_LENGTH){
541 		__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
542 		__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
543 		destV = _mm_and_si128(destV, srcV);
544 		_mm_storel_epi64(cast(__m128i*)dest0, destV);
545 		dest += HALFLOAD_LENGTH;
546 		dest0 += HALFLOAD_LENGTH;
547 		src += HALFLOAD_LENGTH;
548 		length -= HALFLOAD_LENGTH;
549 	}
550 	if(length >= QUTRLOAD_LENGTH){
551 		__m128i srcV = _mm_cvtsi32_si128((*cast(int*)src));
552 		__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
553 		destV = _mm_and_si128(destV, cast(__m128i)srcV);
554 		_mm_storeu_si32(dest0, destV);
555 		static if(T.stringof != "uint"){
556 			dest += QUTRLOAD_LENGTH;
557 			dest0 += QUTRLOAD_LENGTH;
558 			src += QUTRLOAD_LENGTH;
559 			length -= QUTRLOAD_LENGTH;
560 		}
561 	}
562 	static if(T.stringof == "ubyte"){
563 		while(length){
564 			*dest0 = *src & *dest;
565 			dest++;
566 			dest0++;
567 			src++;
568 			length--;
569 		}
570 	}else static if(T.stringof == "ushort"){
571 		if(length){
572 			*dest0 = *src & *dest;
573 		}
574 	}
575 }
576 /**
577  * OR blitter for misc. usage.
578  */
579 public void orBlitter(T)(T* src, T* dest, size_t length) {
580 	static if(T.stringof == "ubyte"){
581 		static enum MAINLOOP_LENGTH = 16;
582 		static enum HALFLOAD_LENGTH = 8;
583 		static enum QUTRLOAD_LENGTH = 4;
584 	}else static if(T.stringof == "ushort"){
585 		static enum MAINLOOP_LENGTH = 8;
586 		static enum HALFLOAD_LENGTH = 4;
587 		static enum QUTRLOAD_LENGTH = 2;
588 	}else static if(T.stringof == "uint"){
589 		static enum MAINLOOP_LENGTH = 4;
590 		static enum HALFLOAD_LENGTH = 2;
591 		static enum QUTRLOAD_LENGTH = 1;
592 	}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
593 	while(length >= MAINLOOP_LENGTH){
594 		__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
595 		__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
596 		destV = _mm_or_si128(destV, cast(__m128i)srcV);
597 		_mm_storeu_si128(cast(__m128i*)dest, destV);
598 		dest += MAINLOOP_LENGTH;
599 		src += MAINLOOP_LENGTH;
600 		length -= MAINLOOP_LENGTH;
601 	}
602 	if(length >= HALFLOAD_LENGTH){
603 		__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
604 		__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
605 		destV = _mm_or_si128(destV, srcV);
606 		_mm_storel_epi64(cast(__m128i*)dest, destV);
607 		dest += HALFLOAD_LENGTH;
608 		src += HALFLOAD_LENGTH;
609 		length -= HALFLOAD_LENGTH;
610 	}
611 	if(length >= QUTRLOAD_LENGTH){
612 		__m128i srcV = _mm_cvtsi32_si128((*cast(int*)src));
613 		__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
614 		destV = _mm_or_si128(destV, cast(__m128i)srcV);
615 		_mm_storeu_si32(dest, destV);
616 		static if(T.stringof != "uint"){
617 			dest += QUTRLOAD_LENGTH;
618 			src += QUTRLOAD_LENGTH;
619 			length -= QUTRLOAD_LENGTH;
620 		}
621 	}
622 	static if(T.stringof == "ubyte"){
623 		while(length){
624 			*dest = *src | *dest;
625 			dest++;
626 			src++;
627 			length--;
628 		}
629 	}else static if(T.stringof == "ushort"){
630 		if(length){
631 			*dest = *src | *dest;
632 		}
633 	}
634 }
635 /**
636  * OR blitter for misc. usage.
637  */
638 public void orBlitter(T)(T* src, T* dest, T* dest0, size_t length) {
639 	static if(T.stringof == "ubyte"){
640 		static enum MAINLOOP_LENGTH = 16;
641 		static enum HALFLOAD_LENGTH = 8;
642 		static enum QUTRLOAD_LENGTH = 4;
643 	}else static if(T.stringof == "ushort"){
644 		static enum MAINLOOP_LENGTH = 8;
645 		static enum HALFLOAD_LENGTH = 4;
646 		static enum QUTRLOAD_LENGTH = 2;
647 	}else static if(T.stringof == "uint"){
648 		static enum MAINLOOP_LENGTH = 4;
649 		static enum HALFLOAD_LENGTH = 2;
650 		static enum QUTRLOAD_LENGTH = 1;
651 	}else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!");
652 	while(length >= MAINLOOP_LENGTH){
653 		__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
654 		__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
655 		destV = _mm_or_si128(destV, cast(__m128i)srcV);
656 		_mm_storeu_si128(cast(__m128i*)dest, destV);
657 		dest += MAINLOOP_LENGTH;
658 		src += MAINLOOP_LENGTH;
659 		length -= MAINLOOP_LENGTH;
660 	}
661 	if(length >= HALFLOAD_LENGTH){
662 		__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
663 		__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
664 		destV = _mm_or_si128(destV, srcV);
665 		_mm_storel_epi64(cast(__m128i*)dest, destV);
666 		dest += HALFLOAD_LENGTH;
667 		src += HALFLOAD_LENGTH;
668 		length -= HALFLOAD_LENGTH;
669 	}
670 	if(length >= QUTRLOAD_LENGTH){
671 		__m128i srcV = _mm_cvtsi32_si128((*cast(int*)src));
672 		__m128i destV = _mm_cvtsi32_si128((*cast(int*)dest));
673 		destV = _mm_or_si128(destV, cast(__m128i)srcV);
674 		_mm_storeu_si32(dest, destV);
675 		static if(T.stringof != "uint"){
676 			dest += QUTRLOAD_LENGTH;
677 			dest0 += QUTRLOAD_LENGTH;
678 			src += QUTRLOAD_LENGTH;
679 			length -= QUTRLOAD_LENGTH;
680 		}
681 	}
682 	static if(T.stringof == "ubyte"){
683 		while(length){
684 			*dest0 = *src | *dest;
685 			dest++;
686 			dest0++;
687 			src++;
688 			length--;
689 		}
690 	}else static if(T.stringof == "ushort"){
691 		if(length){
692 			*dest0 = *src | *dest;
693 		}
694 	}
695 }
696 unittest {
697 	//test for zero correctness.
698 	{
699 		ubyte[255] a, b, c;
700 		textBlitter(a.ptr, b.ptr, 255, 0);
701 		testArrayForValue(b);
702 		textBlitter(a.ptr, b.ptr, c.ptr, 255, 0);
703 		testArrayForValue(c);
704 		xorBlitter(a.ptr, b.ptr, 255);
705 		testArrayForValue(b);
706 		xorBlitter(a.ptr, b.ptr, c.ptr, 255);
707 		testArrayForValue(c);
708 		xorBlitter(b.ptr, 255, 0);
709 		testArrayForValue(b);
710 		xorBlitter(b.ptr, c.ptr, 255, 0);
711 		testArrayForValue(c);
712 
713 		andBlitter(a.ptr, b.ptr, 255);
714 		testArrayForValue(b);
715 		andBlitter(a.ptr, b.ptr, c.ptr, 255);
716 		testArrayForValue(c);
717 
718 		orBlitter(a.ptr, b.ptr, 255);
719 		testArrayForValue(b);
720 		orBlitter(a.ptr, b.ptr, c.ptr, 255);
721 		testArrayForValue(c);
722 	}
723 	{
724 		ushort[255] a, b, c;
725 		textBlitter(a.ptr, b.ptr, 255, 0);
726 		testArrayForValue(b);
727 		textBlitter(a.ptr, b.ptr, c.ptr, 255, 0);
728 		testArrayForValue(c);
729 		xorBlitter(a.ptr, b.ptr, 255);
730 		testArrayForValue(b);
731 		xorBlitter(a.ptr, b.ptr, c.ptr, 255);
732 		testArrayForValue(c);
733 		xorBlitter(b.ptr, 255, 0);
734 		testArrayForValue(b);
735 		xorBlitter(b.ptr, c.ptr, 255, 0);
736 		testArrayForValue(c);
737 
738 		andBlitter(a.ptr, b.ptr, 255);
739 		testArrayForValue(b);
740 		andBlitter(a.ptr, b.ptr, c.ptr, 255);
741 		testArrayForValue(c);
742 
743 		orBlitter(a.ptr, b.ptr, 255);
744 		testArrayForValue(b);
745 		orBlitter(a.ptr, b.ptr, c.ptr, 255);
746 		testArrayForValue(c);
747 	}
748 	{
749 		uint[255] a, b, c;
750 		textBlitter(a.ptr, b.ptr, 255, 0);
751 		testArrayForValue(b);
752 		textBlitter(a.ptr, b.ptr, c.ptr, 255, 0);
753 		testArrayForValue(c);
754 		xorBlitter(a.ptr, b.ptr, 255);
755 		testArrayForValue(b);
756 		xorBlitter(a.ptr, b.ptr, c.ptr, 255);
757 		testArrayForValue(c);
758 		xorBlitter(b.ptr, 255, 0);
759 		testArrayForValue(b);
760 		xorBlitter(b.ptr, c.ptr, 255, 0);
761 		testArrayForValue(c);
762 
763 		andBlitter(a.ptr, b.ptr, 255);
764 		testArrayForValue(b);
765 		andBlitter(a.ptr, b.ptr, c.ptr, 255);
766 		testArrayForValue(c);
767 
768 		orBlitter(a.ptr, b.ptr, 255);
769 		testArrayForValue(b);
770 		orBlitter(a.ptr, b.ptr, c.ptr, 255);
771 		testArrayForValue(c);
772 	}
773 }