1 module CPUblit.composing.diff; 2 3 import CPUblit.composing.common; 4 5 /* 6 * CPUblit 7 * Difference functions. 8 * Author: Laszlo Szeremi 9 * 10 * These functions compose two image together using the following function: 11 * dest0[rgba] = max(dest[rgba], src[rbga]) - min(dest[rgba], src[rbga]) 12 * If alpha channel is enabled in the template or mask is used, then the function will be the following: 13 * dest0[rgba] = ((1.0 - mask[aaaa]) * dest[rgba]) + (mask[aaaa] * (max(dest[rgba], src[rbga]) - min(dest[rgba], src[rbga]))) 14 * which translates to the integer implementation: 15 * dest0[rgba] = (((256 - mask[aaaa]) * dest[rgba]) + ((1 + mask[aaaa]) * (max(dest[rgba], src[rbga]) - min(dest[rgba], src[rbga])))) >>> 8 16 * 17 * These functions only work with 8 bit channels, and many require 32 bit values. 18 * Masks can be either 8 bit per pixel, or 32 bit per pixel with the ability of processing up to 4 channels 19 * independently. 20 */ 21 @nogc pure nothrow { 22 /** 23 * 2 operator difference function without alpha 24 */ 25 public void diff(uint* src, uint* dest, size_t length) { 26 while(length >= 4) { 27 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 28 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 29 destV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 30 _mm_storeu_si128(cast(__m128i*)dest, destV); 31 src += 4; 32 dest += 4; 33 length -= 4; 34 } 35 if (length >= 2) { 36 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 37 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 38 destV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 39 _mm_storel_epi64(cast(__m128i*)dest, destV); 40 src += 2; 41 dest += 2; 42 length -= 2; 43 } 44 if (length) { 45 __m128i srcV = _mm_loadu_si32(src); 46 __m128i destV = _mm_loadu_si32(dest); 47 destV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 48 _mm_storeu_si32(dest, destV);//*cast(int*)dest = destV[0]; 49 } 50 51 } 52 /** 53 * 3 operator difference function with separate destination without alpha. 54 */ 55 public void diff(uint* src, uint* dest, uint* dest0, size_t length) { 56 while(length >= 4) { 57 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 58 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 59 destV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 60 _mm_storeu_si128(cast(__m128i*)dest0, destV); 61 src += 4; 62 dest += 4; 63 dest0 += 4; 64 length -= 4; 65 } 66 if (length >= 2) { 67 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 68 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 69 destV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 70 _mm_storel_epi64(cast(__m128i*)dest0, destV); 71 src += 2; 72 dest += 2; 73 dest0 += 2; 74 length -= 2; 75 } 76 if (length) { 77 __m128i srcV = _mm_loadu_si32(src); 78 __m128i destV = _mm_loadu_si32(dest); 79 destV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 80 _mm_storeu_si32(dest0, destV);//*cast(int*)dest0 = destV[0]; 81 } 82 } 83 /** 84 * 2 operator difference function with alpha 85 */ 86 public void diffBl(uint* src, uint* dest, size_t length) { 87 while(length >= 4) { 88 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 89 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 90 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 91 version (cpublit_revalpha) { 92 maskV |= _mm_srli_epi32(maskV, 8); 93 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 94 } else { 95 maskV |= _mm_slli_epi32(maskV, 8); 96 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 97 } 98 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 99 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 100 __m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT); 101 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 102 __m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1); 103 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 104 mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi); 105 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 106 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi); 107 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 108 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi); 109 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 110 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 111 _mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi)); 112 src += 4; 113 dest += 4; 114 length -= 4; 115 } 116 if (length >= 2) { 117 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 118 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 119 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 120 version (cpublit_revalpha) { 121 maskV |= _mm_srli_epi32(maskV, 8); 122 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 123 } else { 124 maskV |= _mm_slli_epi32(maskV, 8); 125 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 126 } 127 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 128 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 129 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 130 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 131 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 132 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 133 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 134 _mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 135 src += 2; 136 dest += 2; 137 length -= 2; 138 } 139 if (length) { 140 __m128i srcV = _mm_loadu_si32(src); 141 __m128i destV = _mm_loadu_si32(dest); 142 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 143 version (cpublit_revalpha) { 144 maskV |= _mm_srli_epi32(maskV, 8); 145 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 146 } else { 147 maskV |= _mm_slli_epi32(maskV, 8); 148 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 149 } 150 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 151 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 152 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 153 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 154 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 155 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 156 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 157 _mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 158 } 159 160 } 161 /** 162 * 3 operator difference function with separate destination and alpha. 163 */ 164 public void diffBl(uint* src, uint* dest, uint* dest0, size_t length) { 165 while(length >= 4) { 166 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 167 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 168 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 169 version (cpublit_revalpha) { 170 maskV |= _mm_srli_epi32(maskV, 8); 171 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 172 } else { 173 maskV |= _mm_slli_epi32(maskV, 8); 174 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 175 } 176 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 177 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 178 __m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT); 179 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 180 __m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1); 181 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 182 mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi); 183 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 184 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi); 185 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 186 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi); 187 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 188 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 189 _mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi)); 190 src += 4; 191 dest += 4; 192 dest0 += 4; 193 length -= 4; 194 } 195 if (length >= 2) { 196 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 197 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 198 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 199 version (cpublit_revalpha) { 200 maskV |= _mm_srli_epi32(maskV, 8); 201 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 202 } else { 203 maskV |= _mm_slli_epi32(maskV, 8); 204 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 205 } 206 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 207 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 208 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 209 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 210 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 211 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 212 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 213 _mm_storel_epi64(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 214 src += 2; 215 dest += 2; 216 dest0 += 2; 217 length -= 2; 218 } 219 if (length) { 220 __m128i srcV = _mm_loadu_si32(src); 221 __m128i destV = _mm_loadu_si32(dest); 222 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 223 version (cpublit_revalpha) { 224 maskV |= _mm_srli_epi32(maskV, 8); 225 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 226 } else { 227 maskV |= _mm_slli_epi32(maskV, 8); 228 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 229 } 230 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 231 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 232 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 233 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 234 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 235 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 236 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 237 _mm_storeu_si32(dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 238 } 239 } 240 /** 241 * 3 operator difference function with masking 242 */ 243 public void diff(M)(uint* src, uint* dest, size_t length, M* mask) { 244 while(length >= 4) { 245 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 246 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 247 static if (is(M == uint)) { 248 __m128i maskV = _mm_loadu_si128(cast(__m128i*)mask); 249 } else static if (is(M == ubyte)) { 250 __m128i maskV; 251 maskV[0] = mask[0]; 252 maskV[1] = mask[1]; 253 maskV[2] = mask[2]; 254 maskV[3] = mask[3]; 255 maskV |= _mm_slli_epi32(maskV, 8); 256 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 257 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 258 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 259 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 260 __m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT); 261 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 262 __m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1); 263 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 264 mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi); 265 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 266 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi); 267 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 268 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi); 269 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 270 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 271 _mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi)); 272 src += 4; 273 dest += 4; 274 mask += 4; 275 length -= 4; 276 } 277 if (length >= 2) { 278 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 279 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 280 static if (is(M == uint)) { 281 __m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask); 282 } else static if (is(M == ubyte)) { 283 __m128i maskV; 284 maskV[0] = mask[0]; 285 maskV[1] = mask[1]; 286 maskV |= _mm_slli_epi32(maskV, 8); 287 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 288 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 289 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 290 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 291 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 292 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 293 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 294 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 295 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 296 _mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 297 src += 2; 298 dest += 2; 299 mask += 2; 300 length -= 2; 301 } 302 if (length) { 303 __m128i srcV = _mm_loadu_si32(src); 304 __m128i destV = _mm_loadu_si32(dest); 305 static if (is(M == uint)) { 306 __m128i maskV = _mm_loadu_si32(cast(__m128i*)mask); 307 } else static if (is(M == ubyte)) { 308 __m128i maskV; 309 maskV[0] = mask[0]; 310 maskV |= _mm_slli_epi32(maskV, 8); 311 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 312 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 313 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 314 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 315 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 316 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 317 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 318 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 319 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 320 _mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 321 } 322 } 323 /** 324 * 4 operator difference function with separate destination and masking. 325 */ 326 public void diff(M)(uint* src, uint* dest, uint* dest0, size_t length, M* mask) { 327 while(length >= 4) { 328 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 329 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 330 static if (is(M == uint)) { 331 __m128i maskV = _mm_loadu_si128(cast(__m128i*)mask); 332 } else static if (is(M == ubyte)) { 333 __m128i maskV; 334 maskV[0] = mask[0]; 335 maskV[1] = mask[1]; 336 maskV[2] = mask[2]; 337 maskV[3] = mask[3]; 338 maskV |= _mm_slli_epi32(maskV, 8); 339 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 340 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 341 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 342 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 343 __m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT); 344 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 345 __m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1); 346 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 347 mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi); 348 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 349 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi); 350 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 351 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi); 352 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 353 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 354 _mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi)); 355 src += 4; 356 dest += 4; 357 dest0 += 4; 358 mask += 4; 359 length -= 4; 360 } 361 if (length >= 2) { 362 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 363 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 364 static if (is(M == uint)) { 365 __m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask); 366 } else static if (is(M == ubyte)) { 367 __m128i maskV; 368 maskV[0] = mask[0]; 369 maskV[1] = mask[1]; 370 maskV |= _mm_slli_epi32(maskV, 8); 371 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 372 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 373 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 374 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 375 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 376 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 377 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 378 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 379 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 380 _mm_storel_epi64(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 381 src += 2; 382 dest += 2; 383 dest0 += 2; 384 mask += 2; 385 length -= 2; 386 } 387 if (length) { 388 __m128i srcV = _mm_loadu_si32(src); 389 __m128i destV = _mm_loadu_si32(dest); 390 static if (is(M == uint)) { 391 __m128i maskV = _mm_loadu_si32(cast(__m128i*)mask); 392 } else static if (is(M == ubyte)) { 393 __m128i maskV; 394 maskV[0] = mask[0]; 395 maskV |= _mm_slli_epi32(maskV, 8); 396 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 397 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 398 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 399 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 400 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 401 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 402 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 403 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 404 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 405 _mm_storeu_si32(dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 406 } 407 } 408 /** 409 * 2 operator add function with master alpha value. 410 * `UseAlpha` determines whether the src's alpha channel will be used or not. 411 */ 412 public void diffMV(V)(uint* src, uint* dest, size_t length, V value) { 413 __m128i masterV; 414 static if (is(V == uint)) { 415 masterV[0] = value; 416 masterV[1] = value; 417 //masterV[2] = value; 418 //masterV[3] = value; 419 } else static if (is(V == ubyte)) { 420 masterV[0] = value; 421 masterV[1] = value; 422 //masterV[2] = value; 423 //masterV[3] = value; 424 masterV |= _mm_slli_epi32(masterV, 8); 425 masterV |= _mm_slli_epi32(masterV, 16); 426 } else static assert (0, "Value must be either 8 or 32 bits!"); 427 __m128i master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 428 __m128i master_256 = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, _mm_unpacklo_epi8(masterV, SSE2_NULLVECT)); 429 while(length >= 4) { 430 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 431 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 432 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 433 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1); 434 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), master_1); 435 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), master_256); 436 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), master_256); 437 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 438 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 439 _mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi)); 440 src += 4; 441 dest += 4; 442 length -= 4; 443 } 444 if (length >= 2) { 445 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 446 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 447 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 448 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1); 449 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), master_256); 450 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 451 _mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 452 src += 2; 453 dest += 2; 454 length -= 2; 455 } 456 if (length) { 457 __m128i srcV = _mm_loadu_si32(src); 458 __m128i destV = _mm_loadu_si32(dest); 459 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 460 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1); 461 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), master_256); 462 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 463 _mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 464 } 465 466 } 467 /** 468 * 3 operator difference function with separate destination and master alpha value. 469 */ 470 public void diffMV(V)(uint* src, uint* dest, uint* dest0, size_t length, V value) { 471 __m128i masterV; 472 static if (is(V == uint)) { 473 masterV[0] = value; 474 masterV[1] = value; 475 //masterV[2] = value; 476 //masterV[3] = value; 477 } else static if (is(V == ubyte)) { 478 masterV[0] = value; 479 masterV[1] = value; 480 //masterV[2] = value; 481 //masterV[3] = value; 482 masterV |= _mm_slli_epi32(masterV, 8); 483 masterV |= _mm_slli_epi32(masterV, 16); 484 } else static assert (0, "Value must be either 8 or 32 bits!"); 485 __m128i master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 486 __m128i master_256 = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, _mm_unpacklo_epi8(masterV, SSE2_NULLVECT)); 487 while(length >= 4) { 488 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 489 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 490 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 491 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1); 492 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), master_1); 493 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), master_256); 494 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), master_256); 495 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 496 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 497 _mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi)); 498 dest += 4; 499 dest0 += 4; 500 length -= 4; 501 } 502 if (length >= 2) { 503 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 504 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 505 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 506 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1); 507 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), master_256); 508 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 509 _mm_storel_epi64(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 510 src += 2; 511 dest += 2; 512 dest0 += 2; 513 length -= 2; 514 } 515 if (length) { 516 __m128i srcV = _mm_loadu_si32(src); 517 __m128i destV = _mm_loadu_si32(dest); 518 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 519 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1); 520 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), master_256); 521 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 522 _mm_storeu_si32(dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 523 } 524 } 525 /** 526 * 2 operator add function with master alpha value and per pixel alpha. 527 */ 528 public void diffMVBl(V)(uint* src, uint* dest, size_t length, V value) { 529 __m128i masterV; 530 static if (is(V == uint)) { 531 masterV[0] = value; 532 masterV[1] = value; 533 //masterV[2] = value; 534 //masterV[3] = value; 535 } else static if (is(V == ubyte)) { 536 masterV[0] = value; 537 masterV[1] = value; 538 //masterV[2] = value; 539 //masterV[3] = value; 540 masterV |= _mm_slli_epi32(masterV, 8); 541 masterV |= _mm_slli_epi32(masterV, 16); 542 } else static assert (0, "Value must be either 8 or 32 bits!"); 543 masterV = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 544 //__m128i master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 545 //__m128i master_256 = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, _mm_unpacklo_epi8(masterV, SSE2_NULLVECT)); 546 while(length >= 4) { 547 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 548 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 549 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 550 version (cpublit_revalpha) { 551 maskV |= _mm_srli_epi32(maskV, 8); 552 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 553 } else { 554 maskV |= _mm_slli_epi32(maskV, 8); 555 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 556 } 557 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 558 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 559 __m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT); 560 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 561 mask_hi = _mm_srli_epi16(_mm_mullo_epi16(mask_hi, masterV), 8); 562 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 563 __m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1); 564 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 565 mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi); 566 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 567 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi); 568 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 569 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi); 570 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 571 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 572 _mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi)); 573 src += 4; 574 dest += 4; 575 length -= 4; 576 } 577 if (length >= 2) { 578 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 579 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 580 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 581 version (cpublit_revalpha) { 582 maskV |= _mm_srli_epi32(maskV, 8); 583 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 584 } else { 585 maskV |= _mm_slli_epi32(maskV, 8); 586 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 587 } 588 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 589 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 590 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 591 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 592 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 593 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 594 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 595 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 596 _mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 597 src += 2; 598 dest += 2; 599 length -= 2; 600 } 601 if (length) { 602 __m128i srcV = _mm_loadu_si32(src); 603 __m128i destV = _mm_loadu_si32(dest); 604 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 605 version (cpublit_revalpha) { 606 maskV |= _mm_srli_epi32(maskV, 8); 607 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 608 } else { 609 maskV |= _mm_slli_epi32(maskV, 8); 610 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 611 } 612 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 613 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 614 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 615 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 616 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 617 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 618 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 619 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 620 _mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 621 } 622 623 } 624 /** 625 * 3 operator difference function with separate destination and master alpha value. 626 */ 627 public void diffMVBl(V)(uint* src, uint* dest, uint* dest0, size_t length, V value) { 628 __m128i masterV; 629 static if (is(V == uint)) { 630 masterV[0] = value; 631 masterV[1] = value; 632 //masterV[2] = value; 633 //masterV[3] = value; 634 } else static if (is(V == ubyte)) { 635 masterV[0] = value; 636 masterV[1] = value; 637 //masterV[2] = value; 638 //masterV[3] = value; 639 masterV |= _mm_slli_epi32(masterV, 8); 640 masterV |= _mm_slli_epi32(masterV, 16); 641 } else static assert (0, "Value must be either 8 or 32 bits!"); 642 masterV = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 643 //__m128i master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 644 //__m128i master_256 = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, _mm_unpacklo_epi8(masterV, SSE2_NULLVECT)); 645 while(length >= 4) { 646 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 647 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 648 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 649 version (cpublit_revalpha) { 650 maskV |= _mm_srli_epi32(maskV, 8); 651 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 652 } else { 653 maskV |= _mm_slli_epi32(maskV, 8); 654 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 655 } 656 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 657 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 658 __m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT); 659 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 660 mask_hi = _mm_srli_epi16(_mm_mullo_epi16(mask_hi, masterV), 8); 661 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 662 __m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1); 663 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 664 mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi); 665 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 666 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi); 667 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 668 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi); 669 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 670 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 671 _mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi)); 672 dest += 4; 673 dest0 += 4; 674 length -= 4; 675 } 676 if (length >= 2) { 677 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 678 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 679 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 680 version (cpublit_revalpha) { 681 maskV |= _mm_srli_epi32(maskV, 8); 682 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 683 } else { 684 maskV |= _mm_slli_epi32(maskV, 8); 685 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 686 } 687 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 688 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 689 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 690 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 691 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 692 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 693 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 694 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 695 _mm_storel_epi64(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 696 src += 2; 697 dest += 2; 698 dest0 += 2; 699 length -= 2; 700 } 701 if (length) { 702 __m128i srcV = _mm_loadu_si32(src); 703 __m128i destV = _mm_loadu_si32(dest); 704 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 705 version (cpublit_revalpha) { 706 maskV |= _mm_srli_epi32(maskV, 8); 707 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 708 } else { 709 maskV |= _mm_slli_epi32(maskV, 8); 710 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 711 } 712 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 713 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 714 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 715 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 716 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 717 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 718 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 719 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 720 _mm_storeu_si32(dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 721 } 722 } 723 /** 724 * 3 operator difference function with masking, per-pixel alpha, and master alpha value. 725 */ 726 public void diffMV(M,V)(uint* src, uint* dest, size_t length, M* mask, V value) { 727 __m128i masterV; 728 static if (is(V == uint)) { 729 masterV[0] = value; 730 masterV[1] = value; 731 //masterV[2] = value; 732 //masterV[3] = value; 733 } else static if (is(V == ubyte)) { 734 masterV[0] = value; 735 masterV[1] = value; 736 //masterV[2] = value; 737 //masterV[3] = value; 738 masterV |= _mm_slli_epi32(masterV, 8); 739 masterV |= _mm_slli_epi32(masterV, 16); 740 } else static assert (0, "Value must be either 8 or 32 bits!"); 741 masterV = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 742 //__m128i master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 743 //__m128i master_256 = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, _mm_unpacklo_epi8(masterV, SSE2_NULLVECT)); 744 while(length >= 4) { 745 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 746 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 747 __m128i maskV = _mm_loadu_si128(cast(__m128i*)mask); 748 version (cpublit_revalpha) { 749 maskV |= _mm_srli_epi32(maskV, 8); 750 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 751 } else { 752 maskV |= _mm_slli_epi32(maskV, 8); 753 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 754 } 755 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 756 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 757 __m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT); 758 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 759 mask_hi = _mm_srli_epi16(_mm_mullo_epi16(mask_hi, masterV), 8); 760 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 761 __m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1); 762 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 763 mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi); 764 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 765 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi); 766 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 767 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi); 768 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 769 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 770 _mm_storeu_si128(cast(__m128i*)dest, _mm_packus_epi16(src_lo, src_hi)); 771 src += 4; 772 dest += 4; 773 mask += 4; 774 length -= 4; 775 } 776 if (length >= 2) { 777 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 778 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 779 __m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask); 780 version (cpublit_revalpha) { 781 maskV |= _mm_srli_epi32(maskV, 8); 782 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 783 } else { 784 maskV |= _mm_slli_epi32(maskV, 8); 785 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 786 } 787 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 788 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 789 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 790 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 791 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 792 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 793 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 794 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 795 _mm_storel_epi64(cast(__m128i*)dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 796 src += 2; 797 dest += 2; 798 mask += 2; 799 length -= 2; 800 } 801 if (length) { 802 __m128i srcV = _mm_loadu_si32(src); 803 __m128i destV = _mm_loadu_si32(dest); 804 __m128i maskV = _mm_loadu_si32(mask); 805 version (cpublit_revalpha) { 806 maskV |= _mm_srli_epi32(maskV, 8); 807 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 808 } else { 809 maskV |= _mm_slli_epi32(maskV, 8); 810 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 811 } 812 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 813 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 814 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 815 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 816 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 817 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 818 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 819 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 820 _mm_storeu_si32(dest, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 821 } 822 } 823 /** 824 * 4 operator difference function with masking, separate destination, per-pixel alpha, and master alpha value. 825 */ 826 public void diffMV(M, V)(uint* src, uint* dest, uint* dest0, size_t length, M* mask, V value) { 827 __m128i masterV; 828 static if (is(V == uint)) { 829 masterV[0] = value; 830 masterV[1] = value; 831 //masterV[2] = value; 832 //masterV[3] = value; 833 } else static if (is(V == ubyte)) { 834 masterV[0] = value; 835 masterV[1] = value; 836 //masterV[2] = value; 837 //masterV[3] = value; 838 masterV |= _mm_slli_epi32(masterV, 8); 839 masterV |= _mm_slli_epi32(masterV, 16); 840 } else static assert (0, "Value must be either 8 or 32 bits!"); 841 masterV = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 842 //__m128i master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(masterV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 843 //__m128i master_256 = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, _mm_unpacklo_epi8(masterV, SSE2_NULLVECT)); 844 while(length >= 4) { 845 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 846 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 847 __m128i maskV = _mm_loadu_si128(cast(__m128i*)mask); 848 version (cpublit_revalpha) { 849 maskV |= _mm_srli_epi32(maskV, 8); 850 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 851 } else { 852 maskV |= _mm_slli_epi32(maskV, 8); 853 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 854 } 855 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 856 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 857 __m128i mask_hi = _mm_unpackhi_epi8(maskV, SSE2_NULLVECT); 858 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 859 mask_hi = _mm_srli_epi16(_mm_mullo_epi16(mask_hi, masterV), 8); 860 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 861 __m128i mask0_hi = _mm_adds_epu16(mask_hi, cast(__m128i)ALPHABLEND_SSE2_CONST1); 862 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 863 mask_hi = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_hi); 864 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 865 __m128i src_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask0_hi); 866 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 867 __m128i dest_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(destV, SSE2_NULLVECT), mask_hi); 868 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 869 src_hi = _mm_srli_epi16(_mm_adds_epu16(src_hi, dest_hi), 8); 870 _mm_storeu_si128(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, src_hi)); 871 dest += 4; 872 dest0 += 4; 873 mask += 4; 874 length -= 4; 875 } 876 if (length >= 2) { 877 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 878 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 879 __m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask); 880 version (cpublit_revalpha) { 881 maskV |= _mm_srli_epi32(maskV, 8); 882 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 883 } else { 884 maskV |= _mm_slli_epi32(maskV, 8); 885 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 886 } 887 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 888 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 889 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 890 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 891 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 892 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 893 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 894 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 895 _mm_storel_epi64(cast(__m128i*)dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 896 src += 2; 897 dest += 2; 898 dest0 += 2; 899 mask += 2; 900 length -= 2; 901 } 902 if (length) { 903 __m128i srcV = _mm_loadu_si32(src); 904 __m128i destV = _mm_loadu_si32(dest); 905 __m128i maskV = _mm_loadu_si32(mask); 906 version (cpublit_revalpha) { 907 maskV |= _mm_srli_epi32(maskV, 8); 908 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 909 } else { 910 maskV |= _mm_slli_epi32(maskV, 8); 911 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 912 } 913 srcV = _mm_subs_epu8(_mm_max_epu8(destV, srcV), _mm_min_epu8(destV, srcV)); 914 __m128i mask_lo = _mm_unpacklo_epi8(maskV, SSE2_NULLVECT); 915 mask_lo = _mm_srli_epi16(_mm_mullo_epi16(mask_lo, masterV), 8); 916 __m128i mask0_lo = _mm_adds_epu16(mask_lo, cast(__m128i)ALPHABLEND_SSE2_CONST1); 917 mask_lo = _mm_subs_epu16(cast(__m128i)ALPHABLEND_SSE2_CONST256, mask_lo); 918 __m128i src_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask0_lo); 919 __m128i dest_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(destV, SSE2_NULLVECT), mask_lo); 920 src_lo = _mm_srli_epi16(_mm_adds_epu16(src_lo, dest_lo), 8); 921 _mm_storeu_si32(dest0, _mm_packus_epi16(src_lo, SSE2_NULLVECT)); 922 } 923 } 924 } 925 926 unittest { 927 uint[] src, dest, dest0, mask; 928 ubyte[] mask0; 929 src.length = 255; 930 dest.length = 255; 931 dest0.length = 255; 932 mask.length = 255; 933 mask0.length = 255; 934 fillWithSingleValue(src, 0x0f010fFF); 935 fillWithSingleValue(dest, 0x010f01FF); 936 937 //test basic functions 938 diff(src.ptr, dest.ptr, 255); 939 testArrayForValue(dest, 0x0e0e0e00); 940 fillWithSingleValue(dest, 0x010f01FF); 941 diff(src.ptr, dest.ptr, dest0.ptr, 255); 942 testArrayForValue(dest0, 0x0e0e0e00); 943 fillWithSingleValue(dest0, 0); 944 945 //test functions with blend 946 diffBl(src.ptr, dest.ptr, 255); 947 testArrayForValue(dest, 0x0e0e0e00); 948 fillWithSingleValue(dest, 0x010f01FF); 949 diffBl(src.ptr, dest.ptr, dest0.ptr, 255); 950 testArrayForValue(dest0, 0x0e0e0e00); 951 fillWithSingleValue(dest0, 0); 952 953 fillWithSingleValue(src, 0x0f010f00); 954 955 diffBl(src.ptr, dest.ptr, 255); 956 testArrayForValue(dest, 0x010f01FF); 957 //fillWithSingleValue(dest, 0x010f01FF); 958 diffBl(src.ptr, dest.ptr, dest0.ptr, 255); 959 testArrayForValue(dest0, 0x010f01FF); 960 fillWithSingleValue(dest0, 0); 961 962 fillWithSingleValue(src, 0x0f010fFF); 963 964 //test functions with masking 965 diff(src.ptr, dest.ptr, 255, mask.ptr); 966 testArrayForValue(dest, 0x010f01FF); 967 //fillWithSingleValue(dest, 0x010f01FF); 968 diff(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr); 969 testArrayForValue(dest0, 0x010f01FF); 970 fillWithSingleValue(dest0, 0); 971 972 diff(src.ptr, dest.ptr, 255, mask0.ptr); 973 testArrayForValue(dest, 0x010f01FF); 974 //fillWithSingleValue(dest, 0x010f01FF); 975 diff(src.ptr, dest.ptr, dest0.ptr, 255, mask0.ptr); 976 testArrayForValue(dest0, 0x010f01FF); 977 fillWithSingleValue(dest0, 0); 978 979 fillWithSingleValue(mask, uint.max); 980 fillWithSingleValue(mask0, ubyte.max); 981 982 diff(src.ptr, dest.ptr, 255, mask.ptr); 983 testArrayForValue(dest, 0x0e0e0e00); 984 fillWithSingleValue(dest, 0x010f01FF); 985 diff(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr); 986 testArrayForValue(dest0, 0x0e0e0e00); 987 fillWithSingleValue(dest0, 0); 988 989 diff(src.ptr, dest.ptr, 255, mask0.ptr); 990 testArrayForValue(dest, 0x0e0e0e00); 991 fillWithSingleValue(dest, 0x010f01FF); 992 diff(src.ptr, dest.ptr, dest0.ptr, 255, mask0.ptr); 993 testArrayForValue(dest0, 0x0e0e0e00); 994 fillWithSingleValue(dest0, 0); 995 996 //test master value functions without blend 997 diffMV(src.ptr, dest.ptr, 255, ubyte.max); 998 testArrayForValue(dest, 0x0e0e0e00); 999 fillWithSingleValue(dest, 0x010f01FF); 1000 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.max); 1001 testArrayForValue(dest0, 0x0e0e0e00); 1002 fillWithSingleValue(dest0, 0); 1003 1004 diffMV(src.ptr, dest.ptr, 255, ubyte.min); 1005 testArrayForValue(dest, 0x010f01FF); 1006 //fillWithSingleValue(dest, 0x010f01FF); 1007 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.min); 1008 testArrayForValue(dest0, 0x010f01FF); 1009 fillWithSingleValue(dest0, 0); 1010 1011 diffMV(src.ptr, dest.ptr, 255, uint.max); 1012 testArrayForValue(dest, 0x0e0e0e00); 1013 fillWithSingleValue(dest, 0x010f01FF); 1014 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, uint.max); 1015 testArrayForValue(dest0, 0x0e0e0e00); 1016 fillWithSingleValue(dest0, 0); 1017 1018 diffMV(src.ptr, dest.ptr, 255, uint.min); 1019 testArrayForValue(dest, 0x010f01FF); 1020 //fillWithSingleValue(dest, 0x010f01FF); 1021 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, uint.min); 1022 testArrayForValue(dest0, 0x010f01FF); 1023 fillWithSingleValue(dest0, 0); 1024 1025 //test master value functions with blend 1026 //255 alpha values 1027 diffMVBl(src.ptr, dest.ptr, 255, ubyte.max); 1028 testArrayForValue(dest, 0x0e0e0e00); 1029 fillWithSingleValue(dest, 0x010f01FF); 1030 diffMVBl(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.max); 1031 testArrayForValue(dest0, 0x0e0e0e00); 1032 fillWithSingleValue(dest0, 0); 1033 1034 diffMVBl(src.ptr, dest.ptr, 255, ubyte.min); 1035 testArrayForValue(dest, 0x010f01FF); 1036 //fillWithSingleValue(dest, 0x010f01FF); 1037 diffMVBl(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.min); 1038 testArrayForValue(dest0, 0x010f01FF); 1039 fillWithSingleValue(dest0, 0); 1040 1041 diffMVBl(src.ptr, dest.ptr, 255, uint.max); 1042 testArrayForValue(dest, 0x0e0e0e00); 1043 fillWithSingleValue(dest, 0x010f01FF); 1044 diffMVBl(src.ptr, dest.ptr, dest0.ptr, 255, uint.max); 1045 testArrayForValue(dest0, 0x0e0e0e00); 1046 fillWithSingleValue(dest0, 0); 1047 1048 diffMVBl(src.ptr, dest.ptr, 255, uint.min); 1049 testArrayForValue(dest, 0x010f01FF); 1050 //fillWithSingleValue(dest, 0x010f01FF); 1051 diffMVBl(src.ptr, dest.ptr, dest0.ptr, 255, uint.min); 1052 testArrayForValue(dest0, 0x010f01FF); 1053 fillWithSingleValue(dest0, 0); 1054 //0 alpha values 1055 fillWithSingleValue(src, 0x0f010f00); 1056 1057 diffMVBl(src.ptr, dest.ptr, 255, ubyte.max); 1058 testArrayForValue(dest, 0x010f01FF); 1059 //fillWithSingleValue(dest, 0x010f01FF); 1060 diffMVBl(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.max); 1061 testArrayForValue(dest0, 0x010f01FF); 1062 fillWithSingleValue(dest0, 0); 1063 1064 diffMVBl(src.ptr, dest.ptr, 255, ubyte.min); 1065 testArrayForValue(dest, 0x010f01FF); 1066 //fillWithSingleValue(dest, 0x010f01FF); 1067 diffMVBl(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.min); 1068 testArrayForValue(dest0, 0x010f01FF); 1069 fillWithSingleValue(dest0, 0); 1070 1071 diffMVBl(src.ptr, dest.ptr, 255, uint.max); 1072 testArrayForValue(dest, 0x010f01FF); 1073 //fillWithSingleValue(dest, 0x010f01FF); 1074 diffMVBl(src.ptr, dest.ptr, dest0.ptr, 255, uint.max); 1075 testArrayForValue(dest0, 0x010f01FF); 1076 fillWithSingleValue(dest0, 0); 1077 1078 diffMVBl(src.ptr, dest.ptr, 255, uint.min); 1079 testArrayForValue(dest, 0x010f01FF); 1080 //fillWithSingleValue(dest, 0x010f01FF); 1081 diffMVBl(src.ptr, dest.ptr, dest0.ptr, 255, uint.min); 1082 testArrayForValue(dest0, 0x010f01FF); 1083 fillWithSingleValue(dest0, 0); 1084 1085 //test master value functions with masking 1086 fillWithSingleValue(src, 0x0f010fFF); 1087 fillWithSingleValue(mask, uint.max); 1088 diffMV(src.ptr, dest.ptr, 255, mask.ptr, ubyte.max); 1089 testArrayForValue(dest, 0x0e0e0e00); 1090 fillWithSingleValue(dest, 0x010f01FF); 1091 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr, ubyte.max); 1092 testArrayForValue(dest0, 0x0e0e0e00); 1093 fillWithSingleValue(dest0, 0); 1094 1095 diffMV(src.ptr, dest.ptr, 255, mask.ptr, ubyte.min); 1096 testArrayForValue(dest, 0x010f01FF); 1097 //fillWithSingleValue(dest, 0x010f01FF); 1098 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr, ubyte.min); 1099 testArrayForValue(dest0, 0x010f01FF); 1100 fillWithSingleValue(dest0, 0); 1101 1102 diffMV(src.ptr, dest.ptr, 255, mask.ptr, uint.max); 1103 testArrayForValue(dest, 0x0e0e0e00); 1104 fillWithSingleValue(dest, 0x010f01FF); 1105 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr, uint.max); 1106 testArrayForValue(dest0, 0x0e0e0e00); 1107 fillWithSingleValue(dest0, 0); 1108 1109 diffMV(src.ptr, dest.ptr, 255, mask.ptr, uint.min); 1110 testArrayForValue(dest, 0x010f01FF); 1111 //fillWithSingleValue(dest, 0x010f01FF); 1112 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr, uint.min); 1113 testArrayForValue(dest0, 0x010f01FF); 1114 fillWithSingleValue(dest0, 0); 1115 //0 alpha values 1116 fillWithSingleValue(mask, uint.min); 1117 1118 diffMV(src.ptr, dest.ptr, 255, mask.ptr, ubyte.max); 1119 testArrayForValue(dest, 0x010f01FF); 1120 //fillWithSingleValue(dest, 0x010f01FF); 1121 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr, ubyte.max); 1122 testArrayForValue(dest0, 0x010f01FF); 1123 fillWithSingleValue(dest0, 0); 1124 1125 diffMV(src.ptr, dest.ptr, 255, mask.ptr, ubyte.min); 1126 testArrayForValue(dest, 0x010f01FF); 1127 //fillWithSingleValue(dest, 0x010f01FF); 1128 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr, ubyte.min); 1129 testArrayForValue(dest0, 0x010f01FF); 1130 fillWithSingleValue(dest0, 0); 1131 1132 diffMV(src.ptr, dest.ptr, 255, mask.ptr, uint.max); 1133 testArrayForValue(dest, 0x010f01FF); 1134 //fillWithSingleValue(dest, 0x010f01FF); 1135 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr, uint.max); 1136 testArrayForValue(dest0, 0x010f01FF); 1137 fillWithSingleValue(dest0, 0); 1138 1139 diffMV(src.ptr, dest.ptr, 255, mask.ptr, uint.min); 1140 testArrayForValue(dest, 0x010f01FF); 1141 //fillWithSingleValue(dest, 0x010f01FF); 1142 diffMV(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr, uint.min); 1143 testArrayForValue(dest0, 0x010f01FF); 1144 fillWithSingleValue(dest0, 0); 1145 }