1 module CPUblit.composing.alphablend;
2 
3 import CPUblit.composing.common;
4 
5 /*
6  * CPUblit
7  * Alpha-blending functions.
8  * Author: Laszlo Szeremi
9  *
10  * Alpha-blending composes two images together using the following formula:
11  * dest0[rgba] = ((1.0 - mask[aaaa]) * dest[rgba]) + (mask[aaaa] * src[rgba])
12  * where `mask` is either a separate mask (either a master value or a per-pixel one), or the extracted alpha-
13  * channel of src.
14  * For speed's sake, these functions use integer arithmetics, so this formula will be used instead:
15  * dest0[rgba] = (((256 - mask[aaaa]) * dest[rgba]) + ((1 + mask[aaaa]) * src[rgba]))>>>8
16  * There should be no downside for this approach, especially as some workarounds have been done to avoid such
17  * issues.
18  * When a master alpha is used, it's multiplied with either mask.
19  *
20  * These functions only work with 8 bit channels, and many require 32 bit values.
21  * Masks can be either 8 bit per pixel, or 32 bit per pixel with the ability of processing up to 4 channels
22  * independently (only when using vectors).
23  *
24  * Note on differences between vector and non-vector implementation: Vector implementations process all four
25  * channels to save on complexity. Non-vector implementations only process the three color channels to save
26  * on processing speed.
27  */
28 @nogc pure nothrow {
29 /**
30  * 2 operator alpha-blending function.
31  */
32 public void alphaBlend(uint* src, uint* dest, size_t length) {
33 	static if(USE_INTEL_INTRINSICS){
34 		while (length >= 4) {
35 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
36 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
37 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
38 			version (cpublit_revalpha) {
39 				maskV |= _mm_srli_epi32(maskV, 8);
40 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
41 			} else {
42 				maskV |= _mm_slli_epi32(maskV, 8);
43 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
44 			}
45 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
46 			__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
47 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
48 			__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
49 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
50 			mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
51 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
52 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi);
53 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
54 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi);
55 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
56 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
57 			_mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi));
58 			src += 4;
59 			dest += 4;
60 			length -= 4;
61 		}
62 		if (length >= 2) {
63 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
64 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
65 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
66 			version (cpublit_revalpha) {
67 				maskV |= _mm_srli_epi32(maskV, 8);
68 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
69 			} else {
70 				maskV |= _mm_slli_epi32(maskV, 8);
71 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
72 			}
73 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
74 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
75 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
76 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
77 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
78 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
79 			_mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
80 			src += 2;
81 			dest += 2;
82 			length -= 2;
83 		}
84 		if (length) {
85 			__m128i srcV = _mm_loadu_si32(src);
86 			__m128i destV = _mm_loadu_si32(dest);
87 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
88 			version (cpublit_revalpha) {
89 				maskV |= _mm_srli_epi32(maskV, 8);
90 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
91 			} else {
92 				maskV |= _mm_slli_epi32(maskV, 8);
93 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
94 			}
95 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
96 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
97 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
98 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
99 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
100 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
101 			_mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
102 		}
103 	} else {
104 		while (length) {
105 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
106 			const int src1 = 1 + lsrc.a;
107 			const int src256 = 256 - lsrc.a;
108 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
109 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
110 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
111 			src++;
112 			*cast(Color32Bit*)dest = ldest;
113 			dest++;
114 			//dest0++;
115 			length--;
116 		}
117 	}
118 }
119 /**
120  * 3 operator alpha-blending function.
121  */
122 public void alphaBlend(uint* src, uint* dest, uint* dest0, size_t length) {
123 	static if (USE_INTEL_INTRINSICS) { 
124 		while (length >= 4) {
125 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
126 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
127 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
128 			version (cpublit_revalpha) {
129 				maskV |= _mm_srli_epi32(maskV, 8);
130 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
131 			} else {
132 				maskV |= _mm_slli_epi32(maskV, 8);
133 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
134 			}
135 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
136 			__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
137 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
138 			__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
139 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
140 			mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
141 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
142 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi);
143 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
144 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi);
145 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
146 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
147 			_mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi));
148 			src += 4;
149 			dest += 4;
150 			dest0 += 4;
151 			length -= 4;
152 		}
153 		if (length >= 2) {
154 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
155 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
156 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
157 			version (cpublit_revalpha) {
158 				maskV |= _mm_srli_epi32(maskV, 8);
159 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
160 			} else {
161 				maskV |= _mm_slli_epi32(maskV, 8);
162 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
163 			}
164 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
165 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
166 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
167 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
168 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
169 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
170 			_mm_storel_epi64(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
171 			src += 2;
172 			dest += 2;
173 			dest0 += 2;
174 			length -= 2;
175 		}
176 		if (length) {
177 			__m128i srcV = _mm_loadu_si32(src);
178 			__m128i destV = _mm_loadu_si32(dest);
179 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
180 			version (cpublit_revalpha) {
181 				maskV |= _mm_srli_epi32(maskV, 8);
182 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
183 			} else {
184 				maskV |= _mm_slli_epi32(maskV, 8);
185 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
186 			}
187 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
188 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
189 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
190 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
191 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
192 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
193 			_mm_storeu_si32(dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
194 		}
195 	} else {
196 		while (length) {
197 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
198 			const int src1 = 1 + lsrc.a;
199 			const int src256 = 256 - lsrc.a;
200 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
201 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
202 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
203 			src++;
204 			*cast(Color32Bit*)dest0 = ldest;
205 			dest++;
206 			dest0++;
207 			length--;
208 		}
209 	}
210 }
211 /**
212  * 3 operator alpha-blending function.
213  * Mask is either 8 or 32 bit per pixel.
214  */
215 public void alphaBlend(M)(uint* src, uint* dest, size_t length, M* mask) {
216 	static if(USE_INTEL_INTRINSICS){
217 		while(length >= 4){
218 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
219 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
220 			static if (is(M == uint)) {
221 				__m128i maskV = _mm_loadu_si128(cast(__m128i*)mask);
222 			} else static if (is(M == ubyte)) {
223 				__m128i maskV;
224 				maskV[0] = mask[0];
225 				maskV[1] = mask[1];
226 				maskV[2] = mask[2];
227 				maskV[3] = mask[3];
228 				maskV |= _mm_slli_epi32(maskV, 8);
229 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
230 			} else static assert (0, "Alpha mask must be either 8 or 32 bits!");
231 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
232 			__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
233 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
234 			__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
235 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
236 			mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
237 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
238 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi);
239 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
240 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi);
241 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
242 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
243 			_mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi));
244 			src += 4;
245 			dest += 4;
246 			mask += 4;
247 			length -= 4;
248 		}
249 		if(length >= 2){
250 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
251 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
252 			static if (is(M == uint)) {
253 				__m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask);
254 			} else static if (is(M == ubyte)) {
255 				__m128i maskV;
256 				maskV[0] = mask[0];
257 				maskV[1] = mask[1];
258 				maskV |= _mm_slli_epi32(maskV, 8);
259 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
260 			}
261 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
262 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
263 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
264 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
265 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
266 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
267 			_mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
268 			src += 2;
269 			dest += 2;
270 			mask += 2;
271 			length -= 2;
272 		}
273 		if(length){
274 			__m128i srcV = _mm_loadu_si32(src);
275 			__m128i destV = _mm_loadu_si32(dest);
276 			static if (is(M == uint)) {
277 				__m128i maskV = _mm_loadu_si32(mask);
278 			} else static if (is(M == ubyte)) {
279 				__m128i maskV;
280 				maskV[0] = mask[0];
281 				maskV |= _mm_slli_epi32(maskV, 8);
282 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
283 			}
284 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
285 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
286 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
287 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
288 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
289 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
290 			_mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
291 		}
292 	} else {
293 		while (length) {
294 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
295 			static if (is(M == uint)) {
296 				const int src1 = 1 + (*mask & 0xFF);
297 				const int src256 = 256 - (*mask & 0xFF);
298 			} else static if (is(M == ubyte)) {
299 				const int src1 = 1 + *mask;
300 				const int src256 = 256 - *mask;
301 			} else static assert (0, "Alpha mask must be either 8 or 32 bits!");
302 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
303 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
304 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
305 			src++;
306 			*cast(Color32Bit*)dest = ldest;
307 			dest++;
308 			mask++;
309 			//dest0++;
310 			length--;
311 		}
312 	}
313 }
314 /**
315  * 4 operator alpha-blending function.
316  * Mask is either 8 or 32 bit per pixel.
317  */
318 public void alphaBlend(M)(uint* src, uint* dest, uint* dest0, size_t length, M* mask) {
319 	static if(USE_INTEL_INTRINSICS){
320 		while(length >= 4){
321 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
322 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
323 			static if (is(M == uint)) {
324 				__m128i maskV = _mm_loadu_si128(cast(__m128i*)mask);
325 			} else static if (is(M == ubyte)) {
326 				__m128i maskV;
327 				maskV[0] = mask[0];
328 				maskV[1] = mask[1];
329 				maskV[2] = mask[2];
330 				maskV[3] = mask[3];
331 				maskV |= _mm_slli_epi32(maskV, 8);
332 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
333 			} else static assert (0, "Alpha mask must be either 8 or 32 bits!");
334 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
335 			__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
336 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
337 			__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
338 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
339 			mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
340 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
341 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi);
342 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
343 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi);
344 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
345 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
346 			_mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi));
347 			src += 4;
348 			dest += 4;
349 			dest0 += 4;
350 			mask += 4;
351 			length -= 4;
352 		}
353 		if(length >= 2){
354 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
355 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
356 			static if (is(M == uint)) {
357 				__m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask);
358 			} else static if (is(M == ubyte)) {
359 				__m128i maskV;
360 				maskV[0] = mask[0];
361 				maskV[1] = mask[1];
362 				maskV |= _mm_slli_epi32(maskV, 8);
363 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
364 			}
365 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
366 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
367 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
368 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
369 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
370 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
371 			_mm_storel_epi64(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
372 			src += 2;
373 			dest += 2;
374 			dest0 += 2;
375 			mask += 2;
376 			length -= 2;
377 		}
378 		if(length){
379 			__m128i srcV = _mm_loadu_si32(src);
380 			__m128i destV = _mm_loadu_si32(dest);
381 			static if (is(M == uint)) {
382 				__m128i maskV = _mm_loadu_si32(mask);
383 			} else static if (is(M == ubyte)) {
384 				__m128i maskV;
385 				maskV[0] = mask[0];
386 				maskV |= _mm_slli_epi32(maskV, 8);
387 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
388 			}
389 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
390 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
391 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
392 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
393 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
394 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
395 			_mm_storeu_si32(dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
396 		}
397 	} else {
398 		while (length) {
399 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
400 			static if (is(M == uint)) {
401 				const int src1 = 1 + (*mask & 0xFF);
402 				const int src256 = 256 - (*mask & 0xFF);
403 			} else static if (is(M == ubyte)) {
404 				const int src1 = 1 + *mask;
405 				const int src256 = 256 - *mask;
406 			} else static assert (0, "Alpha mask must be either 8 or 32 bits!");
407 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
408 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
409 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
410 			src++;
411 			*cast(Color32Bit*)dest0 = ldest;
412 			dest++;
413 			mask++;
414 			dest0++;
415 			length--;
416 		}
417 	}
418 }
419 /**
420  * Fix value alpha-blending, 3 operator.
421  */
422 public void alphaBlendFV(V)(uint* src, uint* dest, size_t length, V value) {
423 	static if(USE_INTEL_INTRINSICS){
424 		__m128i maskV;
425 		static if (is(V == uint)) {
426 			maskV[0] = value;
427 			maskV[1] = value;
428 			//maskV[2] = value;
429 			//maskV[3] = value;
430 		} else static if (is(V == ubyte)) {
431 			maskV[0] = value;
432 			maskV[1] = value;
433 			//maskV[2] = value;
434 			//maskV[3] = value;
435 			maskV |= _mm_slli_epi32(maskV, 8);
436 			maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
437 		} else static assert (0, "Alpha mask must be either 8 or 32 bits!");
438 		__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
439 		//__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
440 		__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
441 		//__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
442 		mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
443 		//mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
444 		while(length >= 4){
445 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
446 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
447 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
448 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_lo);
449 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
450 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_lo);
451 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
452 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
453 			_mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi));
454 			src += 4;
455 			dest += 4;
456 			//mask += 4;
457 			length -= 4;
458 		}
459 		if(length >= 2){
460 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
461 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
462 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
463 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
464 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
465 			_mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
466 			src += 2;
467 			dest += 2;
468 			//mask += 2;
469 			length -= 2;
470 		}
471 		if(length){
472 			__m128i srcV = _mm_loadu_si32(src);
473 			__m128i destV = _mm_loadu_si32(dest);
474 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
475 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
476 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
477 			_mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
478 		}
479 	} else {
480 		static if (is(V == uint)) {
481 			const int src1 = 1 + value;
482 			const int src256 = 256 - value;
483 		} else static if (is(V == ubyte)) {
484 			const int src1 = 1 + value;
485 			const int src256 = 256 - value;
486 		} else static assert (0, "Alpha mask must be either 8 or 32 bits!");
487 		while (length) {
488 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
489 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
490 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
491 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
492 			src++;
493 			*cast(Color32Bit*)dest = ldest;
494 			dest++;
495 			mask++;
496 			//dest0++;
497 			length--;
498 		}
499 	}
500 }
501 /**
502  * Fix value alpha-blending, 4 operator.
503  */
504 public void alphaBlendFV(V)(uint* src, uint* dest, uint* dest0, size_t length, V value) {
505 	static if(USE_INTEL_INTRINSICS){
506 		__m128i maskV;
507 		static if (is(V == uint)) {
508 			maskV[0] = value;
509 			maskV[1] = value;
510 			//maskV[2] = value;
511 			//maskV[3] = value;
512 		} else static if (is(V == ubyte)) {
513 			maskV[0] = value;
514 			maskV[1] = value;
515 			maskV[2] = value;
516 			maskV[3] = value;
517 			//maskV |= _mm_slli_epi32(maskV, 8);
518 			//maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
519 		} else static assert (0, "Value must be either 8 or 32 bits!");
520 		__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
521 		//__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
522 		__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
523 		//__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
524 		mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
525 		//mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
526 		while(length >= 4){
527 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
528 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
529 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
530 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_lo);
531 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
532 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_lo);
533 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
534 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
535 			_mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi));
536 			src += 4;
537 			dest += 4;
538 			dest0 += 4;
539 			length -= 4;
540 		}
541 		if(length >= 2){
542 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
543 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
544 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
545 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
546 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
547 			_mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
548 			src += 2;
549 			dest += 2;
550 			dest0 += 2;
551 			length -= 2;
552 		}
553 		if(length){
554 			__m128i srcV = _mm_loadu_si32(src);
555 			__m128i destV = _mm_loadu_si32(dest);
556 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
557 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
558 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
559 			_mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
560 		}
561 	} else {
562 		static if (is(V == uint)) {
563 			const int src1 = 1 + value;
564 			const int src256 = 256 - value;
565 		} else static if (is(V == ubyte)) {
566 			const int src1 = 1 + value;
567 			const int src256 = 256 - value;
568 		} else static assert (0, "Alpha mask must be either 8 or 32 bits!");
569 		while (length) {
570 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
571 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
572 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
573 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
574 			src++;
575 			*cast(Color32Bit*)dest = ldest;
576 			dest++;
577 			mask++;
578 			//dest0++;
579 			length--;
580 		}
581 	}
582 }
583 /**
584  * Alpha-blending with per pixel + fix master value alpha.
585  * `value` controls the overall alpha through extra multiplications on the alpha extracted from the pixels.
586  * 2 operator.
587  */
588 public void alphaBlendMV(V)(uint* src, uint* dest, size_t length, V value) {
589 	static if(USE_INTEL_INTRINSICS) {
590 		__m128i masterV;
591 		static if (is(V == uint)) {
592 			masterV[0] = value;
593 			masterV[1] = value;
594 			masterV[2] = value;
595 			masterV[3] = value;
596 		} else static if (is(V == ubyte)) {
597 			masterV[0] = value;
598 			masterV[1] = value;
599 			masterV[2] = value;
600 			masterV[3] = value;
601 			masterV |= _mm_slli_epi32(masterV, 8);
602 			masterV |= _mm_slli_epi32(masterV, 16);
603 		} else static assert (0, "Value must be either 8 or 32 bits!");
604 		masterV = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1);
605 		//__m128i masterV = _mm_adds_epu16(_mm_unpackhi_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1);
606 		while(length >= 4){
607 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
608 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
609 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
610 			version (cpublit_revalpha) {
611 				maskV |= _mm_srli_epi32(maskV, 8);
612 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
613 			} else {
614 				maskV |= _mm_slli_epi32(maskV, 8);
615 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
616 			}
617 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
618 			__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
619 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
620 			mask_hi = _mm_srli_epi16(_mm_mullo_epi16(mask_hi, masterV), 8);
621 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
622 			__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
623 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
624 			mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
625 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
626 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi);
627 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
628 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi);
629 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
630 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
631 			_mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi));
632 			src += 4;
633 			dest += 4;
634 			length -= 4;
635 		}
636 		if(length >= 2){
637 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
638 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
639 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
640 			version (cpublit_revalpha) {
641 				maskV |= _mm_srli_epi32(maskV, 8);
642 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
643 			} else {
644 				maskV |= _mm_slli_epi32(maskV, 8);
645 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
646 			}
647 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
648 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
649 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
650 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
651 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
652 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
653 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
654 			_mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
655 			src += 2;
656 			dest += 2;
657 			length -= 2;
658 		}
659 		if(length){
660 			__m128i srcV = _mm_loadu_si32(src);
661 			__m128i destV = _mm_loadu_si32(dest);
662 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
663 			version (cpublit_revalpha) {
664 				maskV |= _mm_srli_epi32(maskV, 8);
665 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
666 			} else {
667 				maskV |= _mm_slli_epi32(maskV, 8);
668 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
669 			}
670 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
671 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
672 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
673 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
674 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
675 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
676 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
677 			_mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
678 		}
679 	} else {
680 		while (length) {
681 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
682 			const int a = (lsrc.a * (value + 1)) >>> 8;
683 			const int src1 = 1 + a;
684 			const int src256 = 256 - a;
685 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
686 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
687 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
688 			src++;
689 			*cast(Color32Bit*)dest = ldest;
690 			dest++;
691 			//dest0++;
692 			length--;
693 		}
694 	}
695 }
696 /**
697  * Alpha-blending with per pixel + fix master value alpha.
698  * `value` controls the overall alpha through extra multiplications on the alpha extracted from the pixels.
699  * 3 operator.
700  */
701 public void alphaBlendMV(V)(uint* src, uint* dest, uint* dest0, size_t length, V value) {
702 	static if(USE_INTEL_INTRINSICS) {
703 		__m128i masterV;
704 		static if (is(V == uint)) {
705 			masterV[0] = value;
706 			masterV[1] = value;
707 			//masterV[2] = value;
708 			//masterV[3] = value;
709 		} else static if (is(V == ubyte)) {
710 			masterV[0] = value;
711 			masterV[1] = value;
712 			//masterV[2] = value;
713 			//masterV[3] = value;
714 			masterV |= _mm_slli_epi32(masterV, 8);
715 			masterV |= _mm_slli_epi32(masterV, 16);
716 		} else static assert (0, "Value must be either 8 or 32 bits!");
717 		masterV = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1);
718 		while(length >= 4){
719 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
720 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
721 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
722 			version (cpublit_revalpha) {
723 				maskV |= _mm_srli_epi32(maskV, 8);
724 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
725 			} else {
726 				maskV |= _mm_slli_epi32(maskV, 8);
727 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
728 			}
729 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
730 			__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
731 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
732 			mask_hi = _mm_srli_epi16(_mm_mullo_epi16(mask_hi, masterV), 8);
733 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
734 			__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
735 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
736 			mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
737 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
738 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi);
739 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
740 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi);
741 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
742 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
743 			_mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi));
744 			src += 4;
745 			dest += 4;
746 			dest0 += 4;
747 			length -= 4;
748 		}
749 		if(length >= 2){
750 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
751 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
752 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
753 			version (cpublit_revalpha) {
754 				maskV |= _mm_srli_epi32(maskV, 8);
755 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
756 			} else {
757 				maskV |= _mm_slli_epi32(maskV, 8);
758 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
759 			}
760 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
761 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
762 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
763 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
764 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
765 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
766 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
767 			_mm_storel_epi64(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
768 			src += 2;
769 			dest += 2;
770 			dest0 += 2;
771 			length -= 2;
772 		}
773 		if(length){
774 			__m128i srcV = _mm_loadu_si32(src);
775 			__m128i destV = _mm_loadu_si32(dest);
776 			__m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK;
777 			version (cpublit_revalpha) {
778 				maskV |= _mm_srli_epi32(maskV, 8);
779 				maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A]
780 			} else {
781 				maskV |= _mm_slli_epi32(maskV, 8);
782 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
783 			}
784 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
785 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
786 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
787 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
788 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
789 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
790 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
791 			_mm_storeu_si32(dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
792 		}
793 	} else {
794 		while (length) {
795 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
796 			const int a = (lsrc.a * (value + 1)) >>> 8;
797 			const int src1 = 1 + a;
798 			const int src256 = 256 - a;
799 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
800 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
801 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
802 			src++;
803 			*cast(Color32Bit*)dest0 = ldest;
804 			dest++;
805 			dest0++;
806 			length--;
807 		}
808 	}
809 }
810 /**
811  * Alpha-blending with per pixel + fix master value alpha.
812  * `value` controls the overall alpha through extra multiplications on the alpha extracted from the pixels.
813  * 3 operator.
814  */
815 public void alphaBlendMV(V,M)(uint* src, uint* dest, size_t length, M* mask, V value) {
816 	static if(USE_INTEL_INTRINSICS) {
817 		__m128i masterV;
818 		static if (is(V == uint)) {
819 			masterV[0] = value;
820 			masterV[1] = value;
821 			//masterV[2] = value;
822 			//masterV[3] = value;
823 		} else static if (is(V == ubyte)) {
824 			masterV[0] = value;
825 			masterV[1] = value;
826 			//masterV[2] = value;
827 			//masterV[3] = value;
828 			masterV |= _mm_slli_epi32(masterV, 8);
829 			masterV |= _mm_slli_epi32(masterV, 16);
830 		} else static assert (0, "Value must be either 8 or 32 bits!");
831 		masterV = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1);
832 		while(length >= 4){
833 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
834 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
835 			static if (is(M == uint)) {
836 				__m128i maskV = _mm_loadu_si128(cast(__m128i*)mask);
837 			} else static if (is(M == ubyte)) {
838 				__m128i maskV;
839 				maskV[0] = mask[0];
840 				maskV[1] = mask[1];
841 				maskV[2] = mask[2];
842 				maskV[3] = mask[3];
843 				maskV |= _mm_slli_epi32(maskV, 8);
844 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
845 			} else static assert (0, "Alpha mask must be either 8 or 32 bits!");
846 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
847 			__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
848 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
849 			mask_hi = _mm_srli_epi16(_mm_mullo_epi16(mask_hi, masterV), 8);
850 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
851 			__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
852 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
853 			mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
854 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
855 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi);
856 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
857 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi);
858 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
859 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
860 			_mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi));
861 			src += 4;
862 			dest += 4;
863 			length -= 4;
864 		}
865 		if(length >= 2){
866 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
867 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
868 			static if (is(M == uint)) {
869 				__m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask);
870 			} else static if (is(M == ubyte)) {
871 				__m128i maskV;
872 				maskV[0] = mask[0];
873 				maskV[1] = mask[1];
874 				maskV |= _mm_slli_epi32(maskV, 8);
875 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
876 			}
877 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
878 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
879 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
880 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
881 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
882 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
883 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
884 			_mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
885 			src += 2;
886 			dest += 2;
887 			length -= 2;
888 		}
889 		if(length){
890 			__m128i srcV = _mm_loadu_si32(src);
891 			__m128i destV = _mm_loadu_si32(dest);
892 			static if (is(M == uint)) {
893 				__m128i maskV = _mm_loadu_si32(mask);
894 			} else static if (is(M == ubyte)) {
895 				__m128i maskV;
896 				maskV[0] = mask[0];
897 				maskV |= _mm_slli_epi32(maskV, 8);
898 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
899 			}
900 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
901 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
902 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
903 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
904 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
905 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
906 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
907 			_mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
908 		}
909 	} else {
910 		while (length) {
911 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
912 			const int a = (lsrc.a * (value + 1)) >>> 8;
913 			const int src1 = 1 + a;
914 			const int src256 = 256 - a;
915 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
916 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
917 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
918 			src++;
919 			*cast(Color32Bit*)dest = ldest;
920 			dest++;
921 			//dest0++;
922 			length--;
923 		}
924 	}
925 }
926 /**
927  * Alpha-blending with per pixel + fix master value alpha.
928  * `value` controls the overall alpha through extra multiplications on the alpha extracted from the pixels.
929  * 3 operator.
930  */
931 public void alphaBlendMV(V,M)(uint* src, uint* dest, uint* dest0, size_t length, M* mask, V value) {
932 	static if(USE_INTEL_INTRINSICS) {
933 		__m128i masterV;
934 		static if (is(V == uint)) {
935 			masterV[0] = value;
936 			masterV[1] = value;
937 			//masterV[2] = value;
938 			//masterV[3] = value;
939 		} else static if (is(V == ubyte)) {
940 			masterV[0] = value;
941 			masterV[1] = value;
942 			//masterV[2] = value;
943 			//masterV[3] = value;
944 			masterV |= _mm_slli_epi32(masterV, 8);
945 			masterV |= _mm_slli_epi32(masterV, 16);
946 		} else static assert (0, "Value must be either 8 or 32 bits!");
947 		masterV = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1);
948 		while(length >= 4){
949 			__m128i srcV = _mm_loadu_si128(cast(__m128i*)src);
950 			__m128i destV = _mm_loadu_si128(cast(__m128i*)dest);
951 			static if (is(M == uint)) {
952 				__m128i maskV = _mm_loadu_si128(cast(__m128i*)mask);
953 			} else static if (is(M == ubyte)) {
954 				__m128i maskV;
955 				maskV[0] = mask[0];
956 				maskV[1] = mask[1];
957 				maskV[2] = mask[2];
958 				maskV[3] = mask[3];
959 				maskV |= _mm_slli_epi32(maskV, 8);
960 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
961 			} else static assert (0, "Alpha mask must be either 8 or 32 bits!");
962 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
963 			__m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT);
964 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
965 			mask_hi = _mm_srli_epi16(_mm_mullo_epi16(mask_hi, masterV), 8);
966 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
967 			__m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1);
968 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
969 			mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi);
970 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
971 			__m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi);
972 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
973 			__m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi);
974 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
975 			src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8);
976 			_mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi));
977 			src += 4;
978 			dest += 4;
979 			dest0 += 4;
980 			length -= 4;
981 		}
982 		if(length >= 2){
983 			__m128i srcV = _mm_loadl_epi64(cast(__m128i*)src);
984 			__m128i destV = _mm_loadl_epi64(cast(__m128i*)dest);
985 			static if (is(M == uint)) {
986 				__m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask);
987 			} else static if (is(M == ubyte)) {
988 				__m128i maskV;
989 				maskV[0] = mask[0];
990 				maskV[1] = mask[1];
991 				maskV |= _mm_slli_epi32(maskV, 8);
992 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
993 			}
994 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
995 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
996 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
997 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
998 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
999 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
1000 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
1001 			_mm_storel_epi64(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
1002 			src += 2;
1003 			dest += 2;
1004 			dest0 += 2;
1005 			length -= 2;
1006 		}
1007 		if(length){
1008 			__m128i srcV = _mm_loadu_si32(src);
1009 			__m128i destV = _mm_loadu_si32(dest);
1010 			static if (is(M == uint)) {
1011 				__m128i maskV = _mm_loadu_si32(mask);
1012 			} else static if (is(M == ubyte)) {
1013 				__m128i maskV;
1014 				maskV[0] = mask[0];
1015 				maskV |= _mm_slli_epi32(maskV, 8);
1016 				maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A]
1017 			}
1018 			__m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT);
1019 			mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8);
1020 			__m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1);
1021 			mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo);
1022 			__m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo);
1023 			__m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo);
1024 			src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8);
1025 			_mm_storeu_si32(dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT));
1026 		}
1027 	} else {
1028 		while (length) {
1029 			Color32Bit lsrc = *cast(Color32Bit*)src, ldest = *cast(Color32Bit*)dest;
1030 			const int a = (lsrc.a * (value + 1)) >>> 8;
1031 			const int src1 = 1 + a;
1032 			const int src256 = 256 - a;
1033 			ldest.r = cast(ubyte)((lsrc.r * src1 + ldest.r * src256)>>>8);
1034 			ldest.g = cast(ubyte)((lsrc.g * src1 + ldest.g * src256)>>>8);
1035 			ldest.b = cast(ubyte)((lsrc.b * src1 + ldest.b * src256)>>>8);
1036 			src++;
1037 			*cast(Color32Bit*)dest0 = ldest;
1038 			dest++;
1039 			dest0++;
1040 			length--;
1041 		}
1042 	}
1043 }
1044 }
1045 unittest {
1046 	
1047 	
1048 	uint[255] a, b, c, d;
1049 	ubyte[255] e;
1050 	//0 velues should stay 0
1051 	alphaBlend(a.ptr, b.ptr, 255);
1052 	testArrayForValue(b);
1053 	alphaBlend(a.ptr, b.ptr, 255, d.ptr);
1054 	testArrayForValue(b);
1055 	alphaBlend(a.ptr, b.ptr, c.ptr, 255);
1056 	testArrayForValue(c);
1057 	alphaBlend(a.ptr, b.ptr, c.ptr, 255, d.ptr);
1058 	testArrayForValue(c);
1059 	alphaBlend(a.ptr, b.ptr, 255, e.ptr);
1060 	testArrayForValue(b);
1061 	alphaBlend(a.ptr, b.ptr, c.ptr, 255, e.ptr);
1062 	testArrayForValue(c);
1063 	alphaBlendFV!ubyte(a.ptr, b.ptr, 255, 0x0F);
1064 	testArrayForValue(b);
1065 	alphaBlendFV!ubyte(a.ptr, b.ptr, c.ptr, 255, 0x0F);
1066 	testArrayForValue(c);
1067 	alphaBlendFV!uint(a.ptr, b.ptr, 255, 0x0F0F0F0F);
1068 	testArrayForValue(b);
1069 	alphaBlendFV!uint(a.ptr, b.ptr, c.ptr, 255, 0x0F0F0F0F);
1070 	testArrayForValue(c);
1071 	alphaBlendMV!ubyte(a.ptr, b.ptr, 255, ubyte.max);
1072 	testArrayForValue(b);
1073 	alphaBlendMV!ubyte(a.ptr, b.ptr, 255, d.ptr, ubyte.max);
1074 	testArrayForValue(b);
1075 	alphaBlendMV!ubyte(a.ptr, b.ptr, c.ptr, 255, ubyte.max);
1076 	testArrayForValue(c);
1077 	alphaBlendMV!ubyte(a.ptr, b.ptr, c.ptr, 255, d.ptr, ubyte.max);
1078 	testArrayForValue(c);
1079 	alphaBlendMV!uint(a.ptr, b.ptr, 255, uint.max);
1080 	testArrayForValue(b);
1081 	alphaBlendMV!uint(a.ptr, b.ptr, 255, d.ptr, uint.max);
1082 	testArrayForValue(b);
1083 	alphaBlendMV!uint(a.ptr, b.ptr, c.ptr, 255, uint.max);
1084 	testArrayForValue(c);
1085 	alphaBlendMV!uint(a.ptr, b.ptr, c.ptr, 255, d.ptr, uint.max);
1086 	testArrayForValue(c);
1087 }