1 module CPUblit.composing.specblt; 2 3 import CPUblit.composing.common; 4 5 /** 6 * Text blitter, mainly intended for single color texts, can work in other applications as long as they're correctly formatted, 7 * meaning: transparent pixels = 0, colored pixels = T.max 8 */ 9 public void textBlitter(T)(T* src, T* dest, size_t length, T color) @nogc pure nothrow { 10 static if(USE_INTEL_INTRINSICS){ 11 static if(T.stringof == "ubyte"){ 12 byte16 colorV; 13 static enum MAINLOOP_LENGTH = 16; 14 static enum HALFLOAD_LENGTH = 8; 15 static enum QUTRLOAD_LENGTH = 4; 16 }else static if(T.stringof == "ushort"){ 17 short8 colorV; 18 static enum MAINLOOP_LENGTH = 8; 19 static enum HALFLOAD_LENGTH = 4; 20 static enum QUTRLOAD_LENGTH = 2; 21 }else static if(T.stringof == "uint"){ 22 int4 colorV; 23 static enum MAINLOOP_LENGTH = 4; 24 static enum HALFLOAD_LENGTH = 2; 25 static enum QUTRLOAD_LENGTH = 1; 26 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 27 static foreach(i; 0 .. (MAINLOOP_LENGTH)){ 28 colorV[i] = color; 29 } 30 while(length >= MAINLOOP_LENGTH){ 31 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src) & cast(__m128i)colorV; 32 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 33 static if(T.stringof == "ubyte") 34 __m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT); 35 else static if(T.stringof == "ushort") 36 __m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT); 37 else static if(T.stringof == "uint") 38 __m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT); 39 destV = srcV | (destV & mask); 40 _mm_storeu_si128(cast(__m128i*)dest, destV); 41 src += MAINLOOP_LENGTH; 42 dest += MAINLOOP_LENGTH; 43 length -= MAINLOOP_LENGTH; 44 } 45 if(length >= HALFLOAD_LENGTH){ 46 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src) & cast(__m128i)colorV; 47 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 48 static if(T.stringof == "ubyte") 49 __m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT); 50 else static if(T.stringof == "ushort") 51 __m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT); 52 else static if(T.stringof == "uint") 53 __m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT); 54 destV = srcV | (destV & mask); 55 _mm_storel_epi64(cast(__m128i*)dest, destV); 56 src += HALFLOAD_LENGTH; 57 dest += HALFLOAD_LENGTH; 58 length -= HALFLOAD_LENGTH; 59 } 60 if(length >= QUTRLOAD_LENGTH){ 61 __m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)) & cast(__m128i)colorV; 62 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 63 static if(T.stringof == "ubyte") 64 __m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT); 65 else static if(T.stringof == "ushort") 66 __m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT); 67 else static if(T.stringof == "uint") 68 __m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT); 69 destV = srcV | (destV & mask); 70 _mm_storeu_si32(dest, destV); 71 static if(T.stringof != "uint"){ 72 src += QUTRLOAD_LENGTH; 73 dest += QUTRLOAD_LENGTH; 74 length -= QUTRLOAD_LENGTH; 75 } 76 } 77 static if(T.stringof == "ubyte"){ 78 while(length){ 79 const ubyte mask = *src ? ubyte.min : ubyte.max; 80 *dest = (*src & color) | (*dest & mask); 81 src++; 82 dest++; 83 length--; 84 } 85 }else static if(T.stringof == "ushort"){ 86 if(length){ 87 const ushort mask = *src ? ushort.min : ushort.max; 88 *dest = (*src /+& color+/) | (*dest & mask); 89 } 90 } 91 }else{ 92 while(length){ 93 const ubyte mask = *src ? T.min : T.max; 94 *dest = (*src & color) | (*dest & mask); 95 src++; 96 dest++; 97 length--; 98 } 99 } 100 } 101 /** 102 * Text blitter, mainly intended for single color texts, can work in other applications as long as they're correctly formatted, 103 * meaning: transparent pixels = 0, colored pixels = T.max 104 */ 105 public void textBlitter(T)(T* src, T* dest, T* dest0, size_t length, T color) @nogc pure nothrow { 106 static if(USE_INTEL_INTRINSICS){ 107 static if(T.stringof == "ubyte"){ 108 byte16 colorV; 109 static enum MAINLOOP_LENGTH = 16; 110 static enum HALFLOAD_LENGTH = 8; 111 static enum QUTRLOAD_LENGTH = 4; 112 }else static if(T.stringof == "ushort"){ 113 short8 colorV; 114 static enum MAINLOOP_LENGTH = 8; 115 static enum HALFLOAD_LENGTH = 4; 116 static enum QUTRLOAD_LENGTH = 2; 117 }else static if(T.stringof == "uint"){ 118 int4 colorV; 119 static enum MAINLOOP_LENGTH = 4; 120 static enum HALFLOAD_LENGTH = 2; 121 static enum QUTRLOAD_LENGTH = 1; 122 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 123 static foreach(i; 0 .. (MAINLOOP_LENGTH)){ 124 colorV[i] = color; 125 } 126 while(length >= MAINLOOP_LENGTH){ 127 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src) & cast(__m128i)colorV; 128 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 129 static if(T.stringof == "ubyte") 130 __m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT); 131 else static if(T.stringof == "ushort") 132 __m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT); 133 else static if(T.stringof == "uint") 134 __m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT); 135 destV = srcV | (destV & mask); 136 _mm_storeu_si128(cast(__m128i*)dest0, destV); 137 src += MAINLOOP_LENGTH; 138 dest += MAINLOOP_LENGTH; 139 dest0 += MAINLOOP_LENGTH; 140 length -= MAINLOOP_LENGTH; 141 } 142 if(length >= HALFLOAD_LENGTH){ 143 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src) & cast(__m128i)colorV; 144 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 145 static if(T.stringof == "ubyte") 146 __m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT); 147 else static if(T.stringof == "ushort") 148 __m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT); 149 else static if(T.stringof == "uint") 150 __m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT); 151 destV = srcV | (destV & mask); 152 _mm_storel_epi64(cast(__m128i*)dest0, destV); 153 src += HALFLOAD_LENGTH; 154 dest += HALFLOAD_LENGTH; 155 dest0 += HALFLOAD_LENGTH; 156 length -= HALFLOAD_LENGTH; 157 } 158 if(length >= QUTRLOAD_LENGTH){ 159 __m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)) & cast(__m128i)colorV; 160 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 161 static if(T.stringof == "ubyte") 162 __m128i mask = _mm_cmpeq_epi8(srcV, SSE2_NULLVECT); 163 else static if(T.stringof == "ushort") 164 __m128i mask = _mm_cmpeq_epi16(srcV, SSE2_NULLVECT); 165 else static if(T.stringof == "uint") 166 __m128i mask = _mm_cmpeq_epi32(srcV, SSE2_NULLVECT); 167 destV = srcV | (destV & mask); 168 _mm_storeu_si32(dest0, destV); 169 static if(T.stringof != "uint"){ 170 src += QUTRLOAD_LENGTH; 171 dest += QUTRLOAD_LENGTH; 172 dest0 += QUTRLOAD_LENGTH; 173 length -= QUTRLOAD_LENGTH; 174 } 175 } 176 static if(T.stringof == "ubyte"){ 177 while(length){ 178 const T mask = *src ? T.min : T.max; 179 *dest0 = (*src & color) | (*dest & mask); 180 src++; 181 dest++; 182 dest0++; 183 length--; 184 } 185 }else static if(T.stringof == "ushort"){ 186 if(length){ 187 const T mask = *src ? T.min : T.max; 188 *dest0 = (*src & color) | (*dest & mask); 189 } 190 } 191 }else{ 192 while(length){ 193 const T mask = *src ? T.min : T.max; 194 *dest0 = (*src & color) | (*dest & mask); 195 src++; 196 dest++; 197 dest0++; 198 length--; 199 } 200 } 201 } 202 /** 203 * XOR blitter. Popularly used for selection and pseudo-transparency. 204 */ 205 public @nogc void xorBlitter(T)(T* dest, T* dest0, size_t length, T color){ 206 static if(USE_INTEL_INTRINSICS){ 207 static if(T.stringof == "ubyte"){ 208 byte16 colorV; 209 static enum MAINLOOP_LENGTH = 16; 210 static enum HALFLOAD_LENGTH = 8; 211 static enum QUTRLOAD_LENGTH = 4; 212 }else static if(T.stringof == "ushort"){ 213 short8 colorV; 214 static enum MAINLOOP_LENGTH = 8; 215 static enum HALFLOAD_LENGTH = 4; 216 static enum QUTRLOAD_LENGTH = 2; 217 }else static if(T.stringof == "uint"){ 218 int4 colorV; 219 static enum MAINLOOP_LENGTH = 4; 220 static enum HALFLOAD_LENGTH = 2; 221 static enum QUTRLOAD_LENGTH = 1; 222 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 223 for (int i ; i < MAINLOOP_LENGTH ; i++){ 224 colorV[i] = color; 225 } 226 while(length >= MAINLOOP_LENGTH){ 227 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 228 destV = _mm_xor_si128(destV, cast(__m128i)colorV); 229 _mm_storeu_si128(cast(__m128i*)dest0, destV); 230 dest += MAINLOOP_LENGTH; 231 dest0 += MAINLOOP_LENGTH; 232 length -= MAINLOOP_LENGTH; 233 } 234 if(length >= HALFLOAD_LENGTH){ 235 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 236 destV = _mm_xor_si128(destV, cast(__m128i)colorV); 237 _mm_storel_epi64(cast(__m128i*)dest0, destV); 238 dest += HALFLOAD_LENGTH; 239 dest0 += HALFLOAD_LENGTH; 240 length -= HALFLOAD_LENGTH; 241 } 242 if(length >= QUTRLOAD_LENGTH){ 243 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 244 destV = _mm_xor_si128(destV, cast(__m128i)colorV); 245 _mm_storeu_si32(dest0, destV); 246 static if(T.stringof != "uint"){ 247 dest += QUTRLOAD_LENGTH; 248 dest0 += QUTRLOAD_LENGTH; 249 length -= QUTRLOAD_LENGTH; 250 } 251 } 252 static if(T.stringof == "ubyte"){ 253 while(length){ 254 *dest0 = color ^ *dest; 255 dest++; 256 dest0++; 257 length--; 258 } 259 }else static if(T.stringof == "ushort"){ 260 if(length){ 261 *dest0 = color ^ *dest; 262 } 263 } 264 }else{ 265 while(length){ 266 *dest0 = color ^ *dest; 267 dest++; 268 dest0++; 269 length--; 270 } 271 } 272 } 273 /** 274 * XOR blitter. Popularly used for selection and pseudo-transparency. 275 */ 276 public void xorBlitter(T)(T* dest, size_t length, T color) @nogc pure nothrow { 277 static if(T.stringof == "ubyte"){ 278 byte16 colorV; 279 static enum MAINLOOP_LENGTH = 16; 280 static enum HALFLOAD_LENGTH = 8; 281 static enum QUTRLOAD_LENGTH = 4; 282 }else static if(T.stringof == "ushort"){ 283 short8 colorV; 284 static enum MAINLOOP_LENGTH = 8; 285 static enum HALFLOAD_LENGTH = 4; 286 static enum QUTRLOAD_LENGTH = 2; 287 }else static if(T.stringof == "uint"){ 288 int4 colorV; 289 static enum MAINLOOP_LENGTH = 4; 290 static enum HALFLOAD_LENGTH = 2; 291 static enum QUTRLOAD_LENGTH = 1; 292 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 293 for (int i ; i < MAINLOOP_LENGTH ; i++){ 294 colorV[i] = color; 295 } 296 while(length >= MAINLOOP_LENGTH){ 297 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 298 destV = _mm_xor_si128(destV, cast(__m128i)colorV); 299 _mm_storeu_si128(cast(__m128i*)dest, destV); 300 dest += MAINLOOP_LENGTH; 301 length -= MAINLOOP_LENGTH; 302 } 303 if(length >= HALFLOAD_LENGTH){ 304 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 305 destV = _mm_xor_si128(destV, cast(__m128i)colorV); 306 _mm_storel_epi64(cast(__m128i*)dest, destV); 307 dest += HALFLOAD_LENGTH; 308 length -= HALFLOAD_LENGTH; 309 } 310 if(length >= QUTRLOAD_LENGTH){ 311 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 312 destV = _mm_xor_si128(destV, cast(__m128i)colorV); 313 _mm_storeu_si32(dest, destV); 314 static if(T.stringof != "uint"){ 315 dest += QUTRLOAD_LENGTH; 316 length -= QUTRLOAD_LENGTH; 317 } 318 } 319 static if(T.stringof == "ubyte"){ 320 while(length){ 321 *dest = color ^ *dest; 322 dest++; 323 length--; 324 } 325 }else static if(T.stringof == "ushort"){ 326 if(length){ 327 *dest = color ^ *dest; 328 } 329 } 330 331 } 332 /** 333 * XOR blitter. Popularly used for selection and pseudo-transparency. 334 */ 335 public void xorBlitter(T)(T* src, T* dest, size_t length) @nogc pure nothrow { 336 static if(T.stringof == "ubyte"){ 337 static enum MAINLOOP_LENGTH = 16; 338 static enum HALFLOAD_LENGTH = 8; 339 static enum QUTRLOAD_LENGTH = 4; 340 }else static if(T.stringof == "ushort"){ 341 static enum MAINLOOP_LENGTH = 8; 342 static enum HALFLOAD_LENGTH = 4; 343 static enum QUTRLOAD_LENGTH = 2; 344 }else static if(T.stringof == "uint"){ 345 static enum MAINLOOP_LENGTH = 4; 346 static enum HALFLOAD_LENGTH = 2; 347 static enum QUTRLOAD_LENGTH = 1; 348 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 349 while(length >= MAINLOOP_LENGTH){ 350 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 351 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 352 destV = _mm_xor_si128(destV, cast(__m128i)srcV); 353 _mm_storeu_si128(cast(__m128i*)dest, destV); 354 dest += MAINLOOP_LENGTH; 355 src += MAINLOOP_LENGTH; 356 length -= MAINLOOP_LENGTH; 357 } 358 if(length >= HALFLOAD_LENGTH){ 359 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 360 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 361 destV = _mm_xor_si128(destV, srcV); 362 _mm_storel_epi64(cast(__m128i*)dest, destV); 363 dest += HALFLOAD_LENGTH; 364 src += HALFLOAD_LENGTH; 365 length -= HALFLOAD_LENGTH; 366 } 367 if(length >= QUTRLOAD_LENGTH){ 368 __m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)); 369 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 370 destV = _mm_xor_si128(destV, cast(__m128i)srcV); 371 _mm_storeu_si32(dest, destV); 372 static if(T.stringof != "uint"){ 373 dest += QUTRLOAD_LENGTH; 374 src += QUTRLOAD_LENGTH; 375 length -= QUTRLOAD_LENGTH; 376 } 377 } 378 static if(T.stringof == "ubyte"){ 379 while(length){ 380 *dest = *src ^ *dest; 381 dest++; 382 src++; 383 length--; 384 } 385 }else static if(T.stringof == "ushort"){ 386 if(length){ 387 *dest = *src ^ *dest; 388 } 389 } 390 } 391 /** 392 * XOR blitter. Popularly used for selection and pseudo-transparency. 393 */ 394 public void xorBlitter(T)(T* src, T* dest, T* dest0, size_t length) @nogc pure nothrow { 395 static if(T.stringof == "ubyte"){ 396 static enum MAINLOOP_LENGTH = 16; 397 static enum HALFLOAD_LENGTH = 8; 398 static enum QUTRLOAD_LENGTH = 4; 399 }else static if(T.stringof == "ushort"){ 400 static enum MAINLOOP_LENGTH = 8; 401 static enum HALFLOAD_LENGTH = 4; 402 static enum QUTRLOAD_LENGTH = 2; 403 }else static if(T.stringof == "uint"){ 404 static enum MAINLOOP_LENGTH = 4; 405 static enum HALFLOAD_LENGTH = 2; 406 static enum QUTRLOAD_LENGTH = 1; 407 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 408 while(length >= MAINLOOP_LENGTH){ 409 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 410 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 411 destV = _mm_xor_si128(destV, cast(__m128i)srcV); 412 _mm_storeu_si128(cast(__m128i*)dest0, destV); 413 dest += MAINLOOP_LENGTH; 414 dest0 += MAINLOOP_LENGTH; 415 src += MAINLOOP_LENGTH; 416 length -= MAINLOOP_LENGTH; 417 } 418 if(length >= HALFLOAD_LENGTH){ 419 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 420 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 421 destV = _mm_xor_si128(destV, srcV); 422 _mm_storel_epi64(cast(__m128i*)dest0, destV); 423 dest += HALFLOAD_LENGTH; 424 dest0 += HALFLOAD_LENGTH; 425 src += HALFLOAD_LENGTH; 426 length -= HALFLOAD_LENGTH; 427 } 428 if(length >= QUTRLOAD_LENGTH){ 429 __m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)); 430 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 431 destV = _mm_xor_si128(destV, srcV); 432 _mm_storeu_si32(dest0, destV); 433 static if(T.stringof != "uint"){ 434 dest += QUTRLOAD_LENGTH; 435 dest0 += QUTRLOAD_LENGTH; 436 src += QUTRLOAD_LENGTH; 437 length -= QUTRLOAD_LENGTH; 438 } 439 } 440 static if(T.stringof == "ubyte"){ 441 while(length){ 442 *dest = *src ^ *dest; 443 dest++; 444 dest0++; 445 src++; 446 length--; 447 } 448 }else static if(T.stringof == "ushort"){ 449 if(length){ 450 *dest0 = *src ^ *dest; 451 } 452 } 453 } 454 /** 455 * AND blitter for misc. usage. 456 */ 457 public void andBlitter(T)(T* src, T* dest, size_t length) { 458 static if(T.stringof == "ubyte"){ 459 static enum MAINLOOP_LENGTH = 16; 460 static enum HALFLOAD_LENGTH = 8; 461 static enum QUTRLOAD_LENGTH = 4; 462 }else static if(T.stringof == "ushort"){ 463 static enum MAINLOOP_LENGTH = 8; 464 static enum HALFLOAD_LENGTH = 4; 465 static enum QUTRLOAD_LENGTH = 2; 466 }else static if(T.stringof == "uint"){ 467 static enum MAINLOOP_LENGTH = 4; 468 static enum HALFLOAD_LENGTH = 2; 469 static enum QUTRLOAD_LENGTH = 1; 470 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 471 while(length >= MAINLOOP_LENGTH){ 472 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 473 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 474 destV = _mm_and_si128(destV, cast(__m128i)srcV); 475 _mm_storeu_si128(cast(__m128i*)dest, destV); 476 dest += MAINLOOP_LENGTH; 477 src += MAINLOOP_LENGTH; 478 length -= MAINLOOP_LENGTH; 479 } 480 if(length >= HALFLOAD_LENGTH){ 481 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 482 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 483 destV = _mm_and_si128(destV, srcV); 484 _mm_storel_epi64(cast(__m128i*)dest, destV); 485 dest += HALFLOAD_LENGTH; 486 src += HALFLOAD_LENGTH; 487 length -= HALFLOAD_LENGTH; 488 } 489 if(length >= QUTRLOAD_LENGTH){ 490 __m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)); 491 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 492 destV = _mm_and_si128(destV, cast(__m128i)srcV); 493 _mm_storeu_si32(dest, destV); 494 static if(T.stringof != "uint"){ 495 dest += QUTRLOAD_LENGTH; 496 src += QUTRLOAD_LENGTH; 497 length -= QUTRLOAD_LENGTH; 498 } 499 } 500 static if(T.stringof == "ubyte"){ 501 while(length){ 502 *dest = *src & *dest; 503 dest++; 504 src++; 505 length--; 506 } 507 }else static if(T.stringof == "ushort"){ 508 if(length){ 509 *dest = *src & *dest; 510 } 511 } 512 } 513 /** 514 * AND blitter for misc. usage. 515 */ 516 public void andBlitter(T)(T* src, T* dest, T* dest0, size_t length) { 517 static if(T.stringof == "ubyte"){ 518 static enum MAINLOOP_LENGTH = 16; 519 static enum HALFLOAD_LENGTH = 8; 520 static enum QUTRLOAD_LENGTH = 4; 521 }else static if(T.stringof == "ushort"){ 522 static enum MAINLOOP_LENGTH = 8; 523 static enum HALFLOAD_LENGTH = 4; 524 static enum QUTRLOAD_LENGTH = 2; 525 }else static if(T.stringof == "uint"){ 526 static enum MAINLOOP_LENGTH = 4; 527 static enum HALFLOAD_LENGTH = 2; 528 static enum QUTRLOAD_LENGTH = 1; 529 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 530 while(length >= MAINLOOP_LENGTH){ 531 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 532 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 533 destV = _mm_and_si128(destV, cast(__m128i)srcV); 534 _mm_storeu_si128(cast(__m128i*)dest0, destV); 535 dest += MAINLOOP_LENGTH; 536 dest0 += MAINLOOP_LENGTH; 537 src += MAINLOOP_LENGTH; 538 length -= MAINLOOP_LENGTH; 539 } 540 if(length >= HALFLOAD_LENGTH){ 541 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 542 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 543 destV = _mm_and_si128(destV, srcV); 544 _mm_storel_epi64(cast(__m128i*)dest0, destV); 545 dest += HALFLOAD_LENGTH; 546 dest0 += HALFLOAD_LENGTH; 547 src += HALFLOAD_LENGTH; 548 length -= HALFLOAD_LENGTH; 549 } 550 if(length >= QUTRLOAD_LENGTH){ 551 __m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)); 552 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 553 destV = _mm_and_si128(destV, cast(__m128i)srcV); 554 _mm_storeu_si32(dest0, destV); 555 static if(T.stringof != "uint"){ 556 dest += QUTRLOAD_LENGTH; 557 dest0 += QUTRLOAD_LENGTH; 558 src += QUTRLOAD_LENGTH; 559 length -= QUTRLOAD_LENGTH; 560 } 561 } 562 static if(T.stringof == "ubyte"){ 563 while(length){ 564 *dest0 = *src & *dest; 565 dest++; 566 dest0++; 567 src++; 568 length--; 569 } 570 }else static if(T.stringof == "ushort"){ 571 if(length){ 572 *dest0 = *src & *dest; 573 } 574 } 575 } 576 /** 577 * OR blitter for misc. usage. 578 */ 579 public void orBlitter(T)(T* src, T* dest, size_t length) { 580 static if(T.stringof == "ubyte"){ 581 static enum MAINLOOP_LENGTH = 16; 582 static enum HALFLOAD_LENGTH = 8; 583 static enum QUTRLOAD_LENGTH = 4; 584 }else static if(T.stringof == "ushort"){ 585 static enum MAINLOOP_LENGTH = 8; 586 static enum HALFLOAD_LENGTH = 4; 587 static enum QUTRLOAD_LENGTH = 2; 588 }else static if(T.stringof == "uint"){ 589 static enum MAINLOOP_LENGTH = 4; 590 static enum HALFLOAD_LENGTH = 2; 591 static enum QUTRLOAD_LENGTH = 1; 592 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 593 while(length >= MAINLOOP_LENGTH){ 594 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 595 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 596 destV = _mm_or_si128(destV, cast(__m128i)srcV); 597 _mm_storeu_si128(cast(__m128i*)dest, destV); 598 dest += MAINLOOP_LENGTH; 599 src += MAINLOOP_LENGTH; 600 length -= MAINLOOP_LENGTH; 601 } 602 if(length >= HALFLOAD_LENGTH){ 603 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 604 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 605 destV = _mm_or_si128(destV, srcV); 606 _mm_storel_epi64(cast(__m128i*)dest, destV); 607 dest += HALFLOAD_LENGTH; 608 src += HALFLOAD_LENGTH; 609 length -= HALFLOAD_LENGTH; 610 } 611 if(length >= QUTRLOAD_LENGTH){ 612 __m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)); 613 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 614 destV = _mm_or_si128(destV, cast(__m128i)srcV); 615 _mm_storeu_si32(dest, destV); 616 static if(T.stringof != "uint"){ 617 dest += QUTRLOAD_LENGTH; 618 src += QUTRLOAD_LENGTH; 619 length -= QUTRLOAD_LENGTH; 620 } 621 } 622 static if(T.stringof == "ubyte"){ 623 while(length){ 624 *dest = *src | *dest; 625 dest++; 626 src++; 627 length--; 628 } 629 }else static if(T.stringof == "ushort"){ 630 if(length){ 631 *dest = *src | *dest; 632 } 633 } 634 } 635 /** 636 * OR blitter for misc. usage. 637 */ 638 public void orBlitter(T)(T* src, T* dest, T* dest0, size_t length) { 639 static if(T.stringof == "ubyte"){ 640 static enum MAINLOOP_LENGTH = 16; 641 static enum HALFLOAD_LENGTH = 8; 642 static enum QUTRLOAD_LENGTH = 4; 643 }else static if(T.stringof == "ushort"){ 644 static enum MAINLOOP_LENGTH = 8; 645 static enum HALFLOAD_LENGTH = 4; 646 static enum QUTRLOAD_LENGTH = 2; 647 }else static if(T.stringof == "uint"){ 648 static enum MAINLOOP_LENGTH = 4; 649 static enum HALFLOAD_LENGTH = 2; 650 static enum QUTRLOAD_LENGTH = 1; 651 }else static assert(0, "Template parameter '"~ T.stringof ~"' not supported!"); 652 while(length >= MAINLOOP_LENGTH){ 653 __m128i srcV = _mm_loadu_si128(cast(__m128i*)src); 654 __m128i destV = _mm_loadu_si128(cast(__m128i*)dest); 655 destV = _mm_or_si128(destV, cast(__m128i)srcV); 656 _mm_storeu_si128(cast(__m128i*)dest, destV); 657 dest += MAINLOOP_LENGTH; 658 src += MAINLOOP_LENGTH; 659 length -= MAINLOOP_LENGTH; 660 } 661 if(length >= HALFLOAD_LENGTH){ 662 __m128i srcV = _mm_loadl_epi64(cast(__m128i*)src); 663 __m128i destV = _mm_loadl_epi64(cast(__m128i*)dest); 664 destV = _mm_or_si128(destV, srcV); 665 _mm_storel_epi64(cast(__m128i*)dest, destV); 666 dest += HALFLOAD_LENGTH; 667 src += HALFLOAD_LENGTH; 668 length -= HALFLOAD_LENGTH; 669 } 670 if(length >= QUTRLOAD_LENGTH){ 671 __m128i srcV = _mm_cvtsi32_si128((*cast(int*)src)); 672 __m128i destV = _mm_cvtsi32_si128((*cast(int*)dest)); 673 destV = _mm_or_si128(destV, cast(__m128i)srcV); 674 _mm_storeu_si32(dest, destV); 675 static if(T.stringof != "uint"){ 676 dest += QUTRLOAD_LENGTH; 677 dest0 += QUTRLOAD_LENGTH; 678 src += QUTRLOAD_LENGTH; 679 length -= QUTRLOAD_LENGTH; 680 } 681 } 682 static if(T.stringof == "ubyte"){ 683 while(length){ 684 *dest0 = *src | *dest; 685 dest++; 686 dest0++; 687 src++; 688 length--; 689 } 690 }else static if(T.stringof == "ushort"){ 691 if(length){ 692 *dest0 = *src | *dest; 693 } 694 } 695 } 696 unittest { 697 //test for zero correctness. 698 { 699 ubyte[255] a, b, c; 700 textBlitter(a.ptr, b.ptr, 255, 0); 701 testArrayForValue(b); 702 textBlitter(a.ptr, b.ptr, c.ptr, 255, 0); 703 testArrayForValue(c); 704 xorBlitter(a.ptr, b.ptr, 255); 705 testArrayForValue(b); 706 xorBlitter(a.ptr, b.ptr, c.ptr, 255); 707 testArrayForValue(c); 708 xorBlitter(b.ptr, 255, 0); 709 testArrayForValue(b); 710 xorBlitter(b.ptr, c.ptr, 255, 0); 711 testArrayForValue(c); 712 713 andBlitter(a.ptr, b.ptr, 255); 714 testArrayForValue(b); 715 andBlitter(a.ptr, b.ptr, c.ptr, 255); 716 testArrayForValue(c); 717 718 orBlitter(a.ptr, b.ptr, 255); 719 testArrayForValue(b); 720 orBlitter(a.ptr, b.ptr, c.ptr, 255); 721 testArrayForValue(c); 722 } 723 { 724 ushort[255] a, b, c; 725 textBlitter(a.ptr, b.ptr, 255, 0); 726 testArrayForValue(b); 727 textBlitter(a.ptr, b.ptr, c.ptr, 255, 0); 728 testArrayForValue(c); 729 xorBlitter(a.ptr, b.ptr, 255); 730 testArrayForValue(b); 731 xorBlitter(a.ptr, b.ptr, c.ptr, 255); 732 testArrayForValue(c); 733 xorBlitter(b.ptr, 255, 0); 734 testArrayForValue(b); 735 xorBlitter(b.ptr, c.ptr, 255, 0); 736 testArrayForValue(c); 737 738 andBlitter(a.ptr, b.ptr, 255); 739 testArrayForValue(b); 740 andBlitter(a.ptr, b.ptr, c.ptr, 255); 741 testArrayForValue(c); 742 743 orBlitter(a.ptr, b.ptr, 255); 744 testArrayForValue(b); 745 orBlitter(a.ptr, b.ptr, c.ptr, 255); 746 testArrayForValue(c); 747 } 748 { 749 uint[255] a, b, c; 750 textBlitter(a.ptr, b.ptr, 255, 0); 751 testArrayForValue(b); 752 textBlitter(a.ptr, b.ptr, c.ptr, 255, 0); 753 testArrayForValue(c); 754 xorBlitter(a.ptr, b.ptr, 255); 755 testArrayForValue(b); 756 xorBlitter(a.ptr, b.ptr, c.ptr, 255); 757 testArrayForValue(c); 758 xorBlitter(b.ptr, 255, 0); 759 testArrayForValue(b); 760 xorBlitter(b.ptr, c.ptr, 255, 0); 761 testArrayForValue(c); 762 763 andBlitter(a.ptr, b.ptr, 255); 764 testArrayForValue(b); 765 andBlitter(a.ptr, b.ptr, c.ptr, 255); 766 testArrayForValue(c); 767 768 orBlitter(a.ptr, b.ptr, 255); 769 testArrayForValue(b); 770 orBlitter(a.ptr, b.ptr, c.ptr, 255); 771 testArrayForValue(c); 772 } 773 }