1 module CPUblit.composing.sub; 2 3 import CPUblit.composing.common; 4 5 /* 6 * CPUblit 7 * Substract with saturation functions. 8 * Author: Laszlo Szeremi 9 * 10 * These functions compose two image together using the following function: 11 * dest0[rgba] = dest[rgba] - src[rbga] 12 * If alpha channel is enabled in the template or mask is used, then the function will be the following: 13 * dest0[rgba] = dest[rgba] - (mask[aaaa] * src[rgba]) 14 * which translates to the integer implementation: 15 * dest0[rgba] = dest[rgba] - ((1 + mask[aaaa]) * src[rgba])>>>8 16 * 17 * These functions only work with 8 bit channels, and many require 32 bit values. 18 * Masks can be either 8 bit per pixel, or 32 bit per pixel with the ability of processing up to 4 channels 19 * independently. 20 */ 21 @nogc pure nothrow { 22 /** 23 * 2 operator subtraction function 24 */ 25 public void sub(bool UseAlpha = false)(uint* src, uint* dest, size_t length) { 26 while(length >= 4) { 27 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 28 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 29 static if (UseAlpha) { 30 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 31 version (cpublit_revalpha) { 32 maskV |= _mm_srli_epi32(maskV, 8); 33 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 34 } else { 35 maskV |= _mm_slli_epi32(maskV, 8); 36 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 37 } 38 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 39 __m128i mask_hi = _mm_adds_epu16(_mm_unpackhi_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 40 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 41 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask_hi), 8); 42 srcV = _mm_packus_epi16(src_lo, src_hi); 43 } 44 destV = _mm_subs_epu8(destV, srcV); 45 _mm_storeu_si128(cast(__m128i*)dest, destV); 46 src += 4; 47 dest += 4; 48 length -= 4; 49 } 50 if (length >= 2) { 51 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 52 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 53 static if (UseAlpha) { 54 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 55 version (cpublit_revalpha) { 56 maskV |= _mm_srli_epi32(maskV, 8); 57 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 58 } else { 59 maskV |= _mm_slli_epi32(maskV, 8); 60 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 61 } 62 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 63 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 64 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 65 } 66 destV = _mm_subs_epu8(destV, srcV); 67 _mm_storel_epi64(cast(__m128i*)dest, destV); 68 src += 2; 69 dest += 2; 70 length -= 2; 71 } 72 if (length) { 73 __m128i srcV = _mm_loadu_si32(src); 74 __m128i destV = _mm_loadu_si32(dest); 75 static if (UseAlpha) { 76 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 77 version (cpublit_revalpha) { 78 maskV |= _mm_srli_epi32(maskV, 8); 79 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 80 } else { 81 maskV |= _mm_slli_epi32(maskV, 8); 82 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 83 } 84 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 85 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 86 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 87 } 88 destV = _mm_subs_epu8(destV, srcV); 89 _mm_storeu_si32(dest, destV); 90 } 91 92 } 93 /** 94 * 3 operator subtraction function with separate destination. 95 */ 96 public void sub(bool UseAlpha = false)(uint* src, uint* dest, uint* dest0, size_t length) { 97 while(length >= 4) { 98 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 99 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 100 static if (UseAlpha) { 101 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 102 version (cpublit_revalpha) { 103 maskV |= _mm_srli_epi32(maskV, 8); 104 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 105 } else { 106 maskV |= _mm_slli_epi32(maskV, 8); 107 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 108 } 109 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 110 __m128i mask_hi = _mm_adds_epu16(_mm_unpackhi_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 111 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 112 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask_hi), 8); 113 srcV = _mm_packus_epi16(src_lo, src_hi); 114 } 115 destV = _mm_subs_epu8(destV, srcV); 116 _mm_storeu_si128(cast(__m128i*)dest0, destV); 117 src += 4; 118 dest += 4; 119 dest0 += 4; 120 length -= 4; 121 } 122 if (length >= 2) { 123 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 124 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 125 static if (UseAlpha) { 126 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 127 version (cpublit_revalpha) { 128 maskV |= _mm_srli_epi32(maskV, 8); 129 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 130 } else { 131 maskV |= _mm_slli_epi32(maskV, 8); 132 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 133 } 134 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 135 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 136 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 137 } 138 destV = _mm_subs_epu8(destV, srcV); 139 _mm_storel_epi64(cast(__m128i*)dest0, destV); 140 src += 2; 141 dest += 2; 142 dest0 += 2; 143 length -= 2; 144 } 145 if (length) { 146 __m128i srcV = _mm_loadu_si32(src); 147 __m128i destV = _mm_loadu_si32(dest); 148 static if (UseAlpha) { 149 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 150 version (cpublit_revalpha) { 151 maskV |= _mm_srli_epi32(maskV, 8); 152 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 153 } else { 154 maskV |= _mm_slli_epi32(maskV, 8); 155 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 156 } 157 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 158 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 159 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 160 } 161 destV = _mm_subs_epu8(destV, srcV); 162 _mm_storeu_si32(dest0, destV); 163 } 164 } 165 /** 166 * 3 operator subtraction function with masking 167 */ 168 public void sub(M)(uint* src, uint* dest, size_t length, M* mask) { 169 while(length >= 4) { 170 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 171 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 172 static if (is(M == uint)) { 173 __m128i maskV = _mm_loadu_si128(cast(__m128i*)mask); 174 } else static if (is(M == ubyte)) { 175 __m128i maskV; 176 maskV[0] = mask[0]; 177 maskV[1] = mask[1]; 178 maskV[2] = mask[2]; 179 maskV[3] = mask[3]; 180 maskV |= _mm_slli_epi32(maskV, 8); 181 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 182 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 183 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 184 __m128i mask_hi = _mm_adds_epu16(_mm_unpackhi_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 185 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 186 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask_hi), 8); 187 srcV = _mm_packus_epi16(src_lo, src_hi); 188 189 destV = _mm_subs_epu8(destV, srcV); 190 _mm_storeu_si128(cast(__m128i*)dest, destV); 191 src += 4; 192 dest += 4; 193 mask += 4; 194 length -= 4; 195 } 196 if (length >= 2) { 197 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 198 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 199 static if (is(M == uint)) { 200 __m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask); 201 } else static if (is(M == ubyte)) { 202 __m128i maskV; 203 maskV[0] = mask[0]; 204 maskV[1] = mask[1]; 205 maskV |= _mm_slli_epi32(maskV, 8); 206 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 207 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 208 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 209 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 210 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 211 destV = _mm_subs_epu8(destV, srcV); 212 _mm_storel_epi64(cast(__m128i*)dest, destV); 213 src += 2; 214 dest += 2; 215 mask += 2; 216 length -= 2; 217 } 218 if (length) { 219 __m128i srcV = _mm_loadu_si32(src); 220 __m128i destV = _mm_loadu_si32(dest); 221 static if (is(M == uint)) { 222 __m128i maskV = _mm_loadu_si32(cast(__m128i*)mask); 223 } else static if (is(M == ubyte)) { 224 __m128i maskV; 225 maskV[0] = mask[0]; 226 maskV |= _mm_slli_epi32(maskV, 8); 227 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 228 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 229 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 230 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 231 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 232 destV = _mm_subs_epu8(destV, srcV); 233 _mm_storeu_si32(dest, destV); 234 } 235 } 236 /** 237 * 3 operator subtraction function with separate destination and masking. 238 */ 239 public void sub(M)(uint* src, uint* dest, uint* dest0, size_t length, M* mask) { 240 while(length >= 4) { 241 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 242 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 243 static if (is(M == uint)) { 244 __m128i maskV = _mm_loadu_si128(cast(__m128i*)mask); 245 } else static if (is(M == ubyte)) { 246 __m128i maskV; 247 maskV[0] = mask[0]; 248 maskV[1] = mask[1]; 249 maskV[2] = mask[2]; 250 maskV[3] = mask[3]; 251 maskV |= _mm_slli_epi32(maskV, 8); 252 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 253 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 254 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 255 __m128i mask_hi = _mm_adds_epu16(_mm_unpackhi_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 256 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 257 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask_hi), 8); 258 srcV = _mm_packus_epi16(src_lo, src_hi); 259 destV = _mm_subs_epu8(destV, srcV); 260 _mm_storeu_si128(cast(__m128i*)dest0, destV); 261 src += 4; 262 dest += 4; 263 dest0 += 4; 264 mask += 4; 265 length -= 4; 266 } 267 if (length >= 2) { 268 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 269 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 270 static if (is(M == uint)) { 271 __m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask); 272 } else static if (is(M == ubyte)) { 273 __m128i maskV; 274 maskV[0] = mask[0]; 275 maskV[1] = mask[1]; 276 maskV |= _mm_slli_epi32(maskV, 8); 277 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 278 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 279 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 280 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 281 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 282 destV = _mm_subs_epu8(destV, srcV); 283 _mm_storel_epi64(cast(__m128i*)dest0, destV); 284 src += 2; 285 dest += 2; 286 dest0 += 2; 287 mask += 2; 288 length -= 2; 289 } 290 if (length) { 291 __m128i srcV = _mm_loadu_si32(src); 292 __m128i destV = _mm_loadu_si32(dest); 293 static if (is(M == uint)) { 294 __m128i maskV = _mm_loadu_si32(cast(__m128i*)mask); 295 } else static if (is(M == ubyte)) { 296 __m128i maskV; 297 maskV[0] = mask[0]; 298 maskV |= _mm_slli_epi32(maskV, 8); 299 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 300 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 301 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 302 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 303 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 304 destV = _mm_subs_epu8(destV, srcV); 305 _mm_storeu_si32(dest0, destV); 306 } 307 } 308 /** 309 * 2 operator subtraction function with master alpha value. 310 * `UseAlpha` determines whether the src's alpha channel will be used or not. 311 */ 312 public void subMV(bool UseAlpha = false, V)(uint* src, uint* dest, size_t length, V value) { 313 __m128i master_1; 314 static if (is(V == uint)) { 315 master_1[0] = value; 316 master_1[1] = value; 317 //master_1[2] = value; 318 //master_1[3] = value; 319 } else static if (is(V == ubyte)) { 320 master_1[0] = value; 321 master_1[1] = value; 322 //master_1[2] = value; 323 //master_1[3] = value; 324 master_1 |= _mm_slli_epi32(master_1, 8); 325 master_1 |= _mm_slli_epi32(master_1, 16); 326 } else static assert (0, "Value must be either 8 or 32 bits!"); 327 master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(master_1, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 328 while(length >= 4) { 329 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 330 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 331 static if (UseAlpha) { 332 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 333 version (cpublit_revalpha) { 334 maskV |= _mm_srli_epi32(maskV, 8); 335 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 336 } else { 337 maskV |= _mm_slli_epi32(maskV, 8); 338 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 339 } 340 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 341 __m128i mask_hi = _mm_adds_epu16(_mm_unpackhi_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 342 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 343 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask_hi), 8); 344 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 345 src_hi = _mm_srli_epi16(_mm_mullo_epi16(src_hi, master_1), 8); 346 } else { 347 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1), 8); 348 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), master_1), 8); 349 } 350 srcV = _mm_packus_epi16(src_lo, src_hi); 351 destV = _mm_subs_epu8(destV, srcV); 352 _mm_storeu_si128(cast(__m128i*)dest, destV); 353 src += 4; 354 dest += 4; 355 length -= 4; 356 } 357 if (length >= 2) { 358 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 359 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 360 static if (UseAlpha) { 361 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 362 version (cpublit_revalpha) { 363 maskV |= _mm_srli_epi32(maskV, 8); 364 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 365 } else { 366 maskV |= _mm_slli_epi32(maskV, 8); 367 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 368 } 369 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 370 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 371 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 372 } else { 373 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1), 8); 374 } 375 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 376 destV = _mm_subs_epu8(destV, srcV); 377 _mm_storel_epi64(cast(__m128i*)dest, destV); 378 src += 2; 379 dest += 2; 380 length -= 2; 381 } 382 if (length) { 383 __m128i srcV = _mm_loadu_si32(src); 384 __m128i destV = _mm_loadu_si32(dest); 385 static if (UseAlpha) { 386 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 387 version (cpublit_revalpha) { 388 maskV |= _mm_srli_epi32(maskV, 8); 389 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 390 } else { 391 maskV |= _mm_slli_epi32(maskV, 8); 392 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 393 } 394 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 395 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 396 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 397 } else { 398 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1), 8); 399 } 400 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 401 destV = _mm_subs_epu8(destV, srcV); 402 _mm_storeu_si32(dest, destV); 403 } 404 405 } 406 /** 407 * 3 operator subtraction function with separate destination and master alpha value. 408 */ 409 public void subMV(bool UseAlpha = false, V)(uint* src, uint* dest, uint* dest0, size_t length, V value) { 410 __m128i master_1; 411 static if (is(V == uint)) { 412 master_1[0] = value; 413 master_1[1] = value; 414 //master_1[2] = value; 415 //master_1[3] = value; 416 } else static if (is(V == ubyte)) { 417 master_1[0] = value; 418 master_1[1] = value; 419 //master_1[2] = value; 420 //master_1[3] = value; 421 master_1 |= _mm_slli_epi32(master_1, 8); 422 master_1 |= _mm_slli_epi32(master_1, 16); 423 } else static assert (0, "Value must be either 8 or 32 bits!"); 424 master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(master_1, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 425 while(length >= 4) { 426 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 427 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 428 static if (UseAlpha) { 429 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 430 version (cpublit_revalpha) { 431 maskV |= _mm_srli_epi32(maskV, 8); 432 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 433 } else { 434 maskV |= _mm_slli_epi32(maskV, 8); 435 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 436 } 437 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 438 __m128i mask_hi = _mm_adds_epu16(_mm_unpackhi_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 439 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 440 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask_hi), 8); 441 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 442 src_hi = _mm_srli_epi16(_mm_mullo_epi16(src_hi, master_1), 8); 443 } else { 444 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1), 8); 445 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), master_1), 8); 446 } 447 srcV = _mm_packus_epi16(src_lo, src_hi); 448 destV = _mm_subs_epu8(destV, srcV); 449 _mm_storeu_si128(cast(__m128i*)dest0, destV); 450 src += 4; 451 dest += 4; 452 dest0 += 4; 453 length -= 4; 454 } 455 if (length >= 2) { 456 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 457 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 458 static if (UseAlpha) { 459 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 460 version (cpublit_revalpha) { 461 maskV |= _mm_srli_epi32(maskV, 8); 462 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 463 } else { 464 maskV |= _mm_slli_epi32(maskV, 8); 465 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 466 } 467 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 468 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 469 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 470 } else { 471 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1), 8); 472 } 473 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 474 destV = _mm_subs_epu8(destV, srcV); 475 _mm_storel_epi64(cast(__m128i*)dest0, destV); 476 src += 2; 477 dest += 2; 478 dest0 += 2; 479 length -= 2; 480 } 481 if (length) { 482 __m128i srcV = _mm_loadu_si32(src); 483 __m128i destV = _mm_loadu_si32(dest); 484 static if (UseAlpha) { 485 __m128i maskV = srcV & cast(__m128i)ALPHABLEND_SSE2_AMASK; 486 version (cpublit_revalpha) { 487 maskV |= _mm_srli_epi32(maskV, 8); 488 maskV |= _mm_srli_epi32(maskV, 16);//[A,A,A,A] 489 } else { 490 maskV |= _mm_slli_epi32(maskV, 8); 491 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 492 } 493 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 494 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 495 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 496 } else { 497 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), master_1), 8); 498 } 499 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 500 destV = _mm_subs_epu8(destV, srcV); 501 _mm_storeu_si32(dest0, destV); 502 } 503 } 504 /** 505 * 3 operator subtraction function with masking and master alpha value. 506 */ 507 public void subMV(M,V)(uint* src, uint* dest, size_t length, M* mask, V value) { 508 __m128i master_1; 509 static if (is(V == uint)) { 510 master_1[0] = value; 511 master_1[1] = value; 512 //master_1[2] = value; 513 //master_1[3] = value; 514 } else static if (is(V == ubyte)) { 515 master_1[0] = value; 516 master_1[1] = value; 517 //master_1[2] = value; 518 //master_1[3] = value; 519 master_1 |= _mm_slli_epi32(master_1, 8); 520 master_1 |= _mm_slli_epi32(master_1, 16); 521 } else static assert (0, "Value must be either 8 or 32 bits!"); 522 master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(master_1, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 523 while(length >= 4) { 524 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 525 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 526 static if (is(M == uint)) { 527 __m128i maskV = _mm_loadu_si128(cast(__m128i*)mask); 528 } else static if (is(M == ubyte)) { 529 __m128i maskV; 530 maskV[0] = mask[0]; 531 maskV[1] = mask[1]; 532 maskV[2] = mask[2]; 533 maskV[3] = mask[3]; 534 maskV |= _mm_slli_epi32(maskV, 8); 535 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 536 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 537 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 538 __m128i mask_hi = _mm_adds_epu16(_mm_unpackhi_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 539 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 540 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask_hi), 8); 541 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 542 src_hi = _mm_srli_epi16(_mm_mullo_epi16(src_hi, master_1), 8); 543 srcV = _mm_packus_epi16(src_lo, src_hi); 544 destV = _mm_subs_epu8(destV, srcV); 545 _mm_storeu_si128(cast(__m128i*)dest, destV); 546 src += 4; 547 dest += 4; 548 mask += 4; 549 length -= 4; 550 } 551 if (length >= 2) { 552 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 553 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 554 static if (is(M == uint)) { 555 __m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask); 556 } else static if (is(M == ubyte)) { 557 __m128i maskV; 558 maskV[0] = mask[0]; 559 maskV[1] = mask[1]; 560 maskV |= _mm_slli_epi32(maskV, 8); 561 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 562 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 563 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 564 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 565 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 566 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 567 destV = _mm_subs_epu8(destV, srcV); 568 _mm_storel_epi64(cast(__m128i*)dest, destV); 569 src += 2; 570 dest += 2; 571 mask += 2; 572 length -= 2; 573 } 574 if (length) { 575 __m128i srcV = _mm_loadu_si32(src); 576 __m128i destV = _mm_loadu_si32(dest); 577 static if (is(M == uint)) { 578 __m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask); 579 } else static if (is(M == ubyte)) { 580 __m128i maskV; 581 maskV[0] = mask[0]; 582 maskV[1] = mask[1]; 583 maskV |= _mm_slli_epi32(maskV, 8); 584 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 585 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 586 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 587 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 588 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 589 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 590 destV = _mm_subs_epu8(destV, srcV); 591 _mm_storeu_si32(dest, destV); 592 } 593 } 594 /** 595 * 3 operator subtraction function with separate destination, masking, and master value. 596 */ 597 public void subMV(M, V)(uint* src, uint* dest, uint* dest0, size_t length, M* mask, V value) { 598 __m128i master_1; 599 static if (is(V == uint)) { 600 master_1[0] = value; 601 master_1[1] = value; 602 //master_1[2] = value; 603 //master_1[3] = value; 604 } else static if (is(V == ubyte)) { 605 master_1[0] = value; 606 master_1[1] = value; 607 //master_1[2] = value; 608 //master_1[3] = value; 609 master_1 |= _mm_slli_epi32(master_1, 8); 610 master_1 |= _mm_slli_epi32(master_1, 16); 611 } else static assert (0, "Value must be either 8 or 32 bits!"); 612 master_1 = _mm_adds_epu16(_mm_unpacklo_epi8(master_1, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 613 while(length >= 4) { 614 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 615 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 616 static if (is(M == uint)) { 617 __m128i maskV = _mm_loadu_si128(cast(__m128i*)mask); 618 } else static if (is(M == ubyte)) { 619 __m128i maskV; 620 maskV[0] = mask[0]; 621 maskV[1] = mask[1]; 622 maskV[2] = mask[2]; 623 maskV[3] = mask[3]; 624 maskV |= _mm_slli_epi32(maskV, 8); 625 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 626 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 627 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 628 __m128i mask_hi = _mm_adds_epu16(_mm_unpackhi_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 629 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 630 __m128i src_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpackhi_epi8(srcV, SSE2_NULLVECT), mask_hi), 8); 631 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 632 src_hi = _mm_srli_epi16(_mm_mullo_epi16(src_hi, master_1), 8); 633 srcV = _mm_packus_epi16(src_lo, src_hi); 634 destV = _mm_subs_epu8(destV, srcV); 635 _mm_storeu_si128(cast(__m128i*)dest0, destV); 636 src += 4; 637 dest += 4; 638 dest0 += 4; 639 mask += 4; 640 length -= 4; 641 } 642 if (length >= 2) { 643 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 644 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 645 static if (is(M == uint)) { 646 __m128i maskV = _mm_loadl_epi64(cast(__m128i*)mask); 647 } else static if (is(M == ubyte)) { 648 __m128i maskV; 649 maskV[0] = mask[0]; 650 maskV[1] = mask[1]; 651 maskV |= _mm_slli_epi32(maskV, 8); 652 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 653 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 654 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 655 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 656 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 657 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 658 destV = _mm_subs_epu8(destV, srcV); 659 _mm_storel_epi64(cast(__m128i*)dest0, destV); 660 src += 2; 661 dest += 2; 662 dest0 += 2; 663 mask += 2; 664 length -= 2; 665 } 666 if (length) { 667 __m128i srcV = _mm_loadu_si32(src); 668 __m128i destV = _mm_loadu_si32(dest); 669 static if (is(M == uint)) { 670 __m128i maskV = _mm_loadu_si32(cast(__m128i*)mask); 671 } else static if (is(M == ubyte)) { 672 __m128i maskV; 673 maskV[0] = mask[0]; 674 maskV |= _mm_slli_epi32(maskV, 8); 675 maskV |= _mm_slli_epi32(maskV, 16);//[A,A,A,A] 676 } else static assert (0, "Alpha mask must be either 8 or 32 bits!"); 677 __m128i mask_lo = _mm_adds_epu16(_mm_unpacklo_epi8(maskV, SSE2_NULLVECT), cast(__m128i)ALPHABLEND_SSE2_CONST1); 678 __m128i src_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_unpacklo_epi8(srcV, SSE2_NULLVECT), mask_lo), 8); 679 src_lo = _mm_srli_epi16(_mm_mullo_epi16(src_lo, master_1), 8); 680 srcV = _mm_packus_epi16(src_lo, SSE2_NULLVECT); 681 destV = _mm_subs_epu8(destV, srcV); 682 _mm_storeu_si32(dest0, destV); 683 } 684 } 685 } 686 unittest { 687 uint[] src, dest, dest0, mask; 688 ubyte[] mask0; 689 src.length = 255; 690 dest.length = 255; 691 dest0.length = 255; 692 mask.length = 255; 693 mask0.length = 255; 694 fillWithSingleValue(src, 0x05050505); 695 fillWithSingleValue(dest, 0x10101010); 696 sub!false(src.ptr, dest.ptr, 255); 697 testArrayForValue(dest, 0x0b0b0b0b); 698 fillWithSingleValue(dest, 0x10101010); 699 sub!false(src.ptr, dest.ptr, dest0.ptr, 255); 700 testArrayForValue(dest0, 0x0b0b0b0b); 701 fillWithSingleValue(dest0, 0); 702 703 //mask value of 0 should generate no change in the output 704 sub(src.ptr, dest.ptr, 255, mask.ptr); 705 testArrayForValue(dest, 0x10101010); 706 sub(src.ptr, dest.ptr, 255, mask0.ptr); 707 testArrayForValue(dest, 0x10101010); 708 sub(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr); 709 testArrayForValue(dest0, 0x10101010); 710 fillWithSingleValue(dest0, 0); 711 sub(src.ptr, dest.ptr, dest0.ptr, 255, mask0.ptr); 712 testArrayForValue(dest0, 0x10101010); 713 fillWithSingleValue(dest0, 0); 714 715 //mask value of 255 should generate maximum change in the output 716 fillWithSingleValue(mask, uint.max); 717 fillWithSingleValue(mask0, ubyte.max); 718 sub(src.ptr, dest.ptr, 255, mask.ptr); 719 testArrayForValue(dest, 0x0b0b0b0b); 720 fillWithSingleValue(dest, 0x10101010); 721 sub(src.ptr, dest.ptr, 255, mask0.ptr); 722 testArrayForValue(dest, 0x0b0b0b0b); 723 fillWithSingleValue(dest, 0x10101010); 724 sub(src.ptr, dest.ptr, dest0.ptr, 255, mask.ptr); 725 testArrayForValue(dest0, 0x0b0b0b0b); 726 fillWithSingleValue(dest0, 0); 727 sub(src.ptr, dest.ptr, dest0.ptr, 255, mask0.ptr); 728 testArrayForValue(dest0, 0x0b0b0b0b); 729 fillWithSingleValue(dest0, 0); 730 731 //test with alpha channel 732 733 //the least significant byte of a 32 bit pixel is the alpha 734 fillWithSingleValue(src, 0x050505FF); 735 fillWithSingleValue(dest, 0x101010FF); 736 sub!true(src.ptr, dest.ptr, 255); 737 testArrayForValue(dest, 0x0b0b0b00); 738 fillWithSingleValue(dest, 0x101010FF); 739 sub!true(src.ptr, dest.ptr, dest0.ptr, 255); 740 testArrayForValue(dest0, 0x0b0b0b00); 741 fillWithSingleValue(dest0, 0); 742 //with alpha value of zero, the destination shouldn't be affected 743 fillWithSingleValue(src, 0x05050500); 744 sub!true(src.ptr, dest.ptr, 255); 745 testArrayForValue(dest, 0x101010FF); 746 sub!true(src.ptr, dest.ptr, dest0.ptr, 255); 747 testArrayForValue(dest0, 0x101010FF); 748 fillWithSingleValue(dest0, 0); 749 750 //test master value functions 751 752 //master value of zero shouldn't affect anything 753 fillWithSingleValue(src, 0x050505FF); 754 subMV!false(src.ptr, dest.ptr, 255, ubyte.min); 755 testArrayForValue(dest, 0x101010FF); 756 subMV!true(src.ptr, dest.ptr, 255, uint.min); 757 testArrayForValue(dest, 0x101010FF); 758 subMV!true(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.min); 759 testArrayForValue(dest0, 0x101010FF); 760 fillWithSingleValue(dest0, 0); 761 subMV!false(src.ptr, dest.ptr, dest0.ptr, 255, uint.min); 762 testArrayForValue(dest0, 0x101010FF); 763 fillWithSingleValue(dest0, 0); 764 765 //masks should be also "ignored" 766 subMV(src.ptr, dest.ptr, 255, mask.ptr, ubyte.min); 767 testArrayForValue(dest, 0x101010FF); 768 subMV(src.ptr, dest.ptr, 255, mask.ptr, uint.min); 769 testArrayForValue(dest, 0x101010FF); 770 subMV(src.ptr, dest.ptr, dest0.ptr, 255, mask0.ptr, ubyte.min); 771 testArrayForValue(dest0, 0x101010FF); 772 fillWithSingleValue(dest0, 0); 773 subMV(src.ptr, dest.ptr, dest0.ptr, 255, mask0.ptr, uint.min); 774 testArrayForValue(dest0, 0x101010FF); 775 fillWithSingleValue(dest0, 0); 776 777 //master value of 255 should generate maximum change in the output 778 subMV!true(src.ptr, dest.ptr, 255, ubyte.max); 779 testArrayForValue(dest, 0x0b0b0b00); 780 fillWithSingleValue(dest, 0x101010FF); 781 subMV!true(src.ptr, dest.ptr, 255, uint.max); 782 testArrayForValue(dest, 0x0b0b0b00); 783 fillWithSingleValue(dest, 0x101010FF); 784 subMV!true(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.max); 785 testArrayForValue(dest0, 0x0b0b0b00); 786 fillWithSingleValue(dest0, 0); 787 subMV!true(src.ptr, dest.ptr, dest0.ptr, 255, uint.max); 788 testArrayForValue(dest0, 0x0b0b0b00); 789 fillWithSingleValue(dest0, 0); 790 791 subMV!false(src.ptr, dest.ptr, 255, ubyte.max); 792 testArrayForValue(dest, 0x0b0b0b00); 793 fillWithSingleValue(dest, 0x101010FF); 794 subMV!false(src.ptr, dest.ptr, 255, uint.max); 795 testArrayForValue(dest, 0x0b0b0b00); 796 fillWithSingleValue(dest, 0x101010FF); 797 subMV!false(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.max); 798 testArrayForValue(dest0, 0x0b0b0b00); 799 fillWithSingleValue(dest0, 0); 800 subMV!false(src.ptr, dest.ptr, dest0.ptr, 255, uint.max); 801 testArrayForValue(dest0, 0x0b0b0b00); 802 fillWithSingleValue(dest0, 0); 803 804 subMV!true(src.ptr, dest.ptr, 255, ubyte.max); 805 testArrayForValue(dest, 0x0b0b0b00); 806 fillWithSingleValue(dest, 0x101010FF); 807 subMV!true(src.ptr, dest.ptr, 255, uint.max); 808 testArrayForValue(dest, 0x0b0b0b00); 809 fillWithSingleValue(dest, 0x101010FF); 810 subMV!true(src.ptr, dest.ptr, dest0.ptr, 255, ubyte.max); 811 testArrayForValue(dest0, 0x0b0b0b00); 812 fillWithSingleValue(dest0, 0); 813 subMV!true(src.ptr, dest.ptr, dest0.ptr, 255, uint.max); 814 testArrayForValue(dest0, 0x0b0b0b00); 815 fillWithSingleValue(dest0, 0); 816 817 //ditto with masks of maximum value 818 subMV(src.ptr, dest.ptr, 255, mask.ptr, ubyte.max); 819 testArrayForValue(dest, 0x0b0b0b00); 820 fillWithSingleValue(dest, 0x101010FF); 821 subMV(src.ptr, dest.ptr, 255, mask.ptr, uint.max); 822 testArrayForValue(dest, 0x0b0b0b00); 823 fillWithSingleValue(dest, 0x101010FF); 824 subMV(src.ptr, dest.ptr, dest0.ptr, 255, mask0.ptr, ubyte.max); 825 testArrayForValue(dest0, 0x0b0b0b00); 826 fillWithSingleValue(dest0, 0); 827 subMV(src.ptr, dest.ptr, dest0.ptr, 255, mask0.ptr, uint.max); 828 testArrayForValue(dest0, 0x0b0b0b00); 829 }