1 /** 2 * The core.internal.atomic module comtains the low-level atomic features available in hardware. 3 * This module may be a routing layer for compiler intrinsics. 4 * 5 * Copyright: Copyright Manu Evans 2019. 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Authors: Sean Kelly, Alex Rønne Petersen, Manu Evans 8 * Source: $(DRUNTIMESRC core/internal/_atomic.d) 9 */ 10 11 module core.internal.atomic; 12 13 import core.atomic : MemoryOrder, has128BitCAS; 14 15 version (DigitalMars) 16 { 17 private 18 { 19 enum : int 20 { 21 AX, BX, CX, DX, DI, SI, R8, R9 22 } 23 24 immutable string[4][8] registerNames = [ 25 [ "AL", "AX", "EAX", "RAX" ], 26 [ "BL", "BX", "EBX", "RBX" ], 27 [ "CL", "CX", "ECX", "RCX" ], 28 [ "DL", "DX", "EDX", "RDX" ], 29 [ "DIL", "DI", "EDI", "RDI" ], 30 [ "SIL", "SI", "ESI", "RSI" ], 31 [ "R8B", "R8W", "R8D", "R8" ], 32 [ "R9B", "R9W", "R9D", "R9" ], 33 ]; 34 35 template RegIndex(T) 36 { 37 static if (T.sizeof == 1) 38 enum RegIndex = 0; 39 else static if (T.sizeof == 2) 40 enum RegIndex = 1; 41 else static if (T.sizeof == 4) 42 enum RegIndex = 2; 43 else static if (T.sizeof == 8) 44 enum RegIndex = 3; 45 else 46 static assert(false, "Invalid type"); 47 } 48 49 enum SizedReg(int reg, T = size_t) = registerNames[reg][RegIndex!T]; 50 } 51 52 inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted 53 if (CanCAS!T) 54 { 55 static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()"); 56 57 static if (T.sizeof == size_t.sizeof * 2) 58 { 59 version (D_InlineAsm_X86) 60 { 61 asm pure nothrow @nogc @trusted 62 { 63 push EDI; 64 push EBX; 65 mov EBX, 0; 66 mov ECX, 0; 67 mov EAX, 0; 68 mov EDX, 0; 69 mov EDI, src; 70 lock; cmpxchg8b [EDI]; 71 pop EBX; 72 pop EDI; 73 } 74 } 75 else version (D_InlineAsm_X86_64) 76 { 77 version (Windows) 78 { 79 static if (RegisterReturn!T) 80 { 81 enum SrcPtr = SizedReg!CX; 82 enum RetPtr = null; 83 } 84 else 85 { 86 enum SrcPtr = SizedReg!DX; 87 enum RetPtr = SizedReg!CX; 88 } 89 90 mixin (simpleFormat(q{ 91 asm pure nothrow @nogc @trusted 92 { 93 naked; 94 push RBX; 95 mov R8, %0; 96 ?1 mov R9, %1; 97 mov RBX, 0; 98 mov RCX, 0; 99 mov RAX, 0; 100 mov RDX, 0; 101 lock; cmpxchg16b [R8]; 102 ?1 mov [R9], RAX; 103 ?1 mov 8[R9], RDX; 104 pop RBX; 105 ret; 106 } 107 }, [SrcPtr, RetPtr])); 108 } 109 else 110 { 111 asm pure nothrow @nogc @trusted 112 { 113 naked; 114 push RBX; 115 mov RBX, 0; 116 mov RCX, 0; 117 mov RAX, 0; 118 mov RDX, 0; 119 lock; cmpxchg16b [RDI]; 120 pop RBX; 121 ret; 122 } 123 } 124 } 125 } 126 else static if (needsLoadBarrier!order) 127 { 128 version (D_InlineAsm_X86) 129 { 130 enum SrcReg = SizedReg!CX; 131 enum ZeroReg = SizedReg!(DX, T); 132 enum ResReg = SizedReg!(AX, T); 133 134 mixin (simpleFormat(q{ 135 asm pure nothrow @nogc @trusted 136 { 137 mov %1, 0; 138 mov %2, 0; 139 mov %0, src; 140 lock; cmpxchg [%0], %1; 141 } 142 }, [SrcReg, ZeroReg, ResReg])); 143 } 144 else version (D_InlineAsm_X86_64) 145 { 146 version (Windows) 147 enum SrcReg = SizedReg!CX; 148 else 149 enum SrcReg = SizedReg!DI; 150 enum ZeroReg = SizedReg!(DX, T); 151 enum ResReg = SizedReg!(AX, T); 152 153 mixin (simpleFormat(q{ 154 asm pure nothrow @nogc @trusted 155 { 156 naked; 157 mov %1, 0; 158 mov %2, 0; 159 lock; cmpxchg [%0], %1; 160 ret; 161 } 162 }, [SrcReg, ZeroReg, ResReg])); 163 } 164 } 165 else 166 return *src; 167 } 168 169 void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted 170 if (CanCAS!T) 171 { 172 static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()"); 173 174 static if (T.sizeof == size_t.sizeof * 2) 175 { 176 version (D_InlineAsm_X86) 177 { 178 asm pure nothrow @nogc @trusted 179 { 180 push EDI; 181 push EBX; 182 lea EDI, value; 183 mov EBX, [EDI]; 184 mov ECX, 4[EDI]; 185 mov EDI, dest; 186 mov EAX, [EDI]; 187 mov EDX, 4[EDI]; 188 L1: lock; cmpxchg8b [EDI]; 189 jne L1; 190 pop EBX; 191 pop EDI; 192 } 193 } 194 else version (D_InlineAsm_X86_64) 195 { 196 version (Windows) 197 { 198 asm pure nothrow @nogc @trusted 199 { 200 naked; 201 push RBX; 202 mov R8, RDX; 203 mov RAX, [RDX]; 204 mov RDX, 8[RDX]; 205 mov RBX, [RCX]; 206 mov RCX, 8[RCX]; 207 L1: lock; cmpxchg16b [R8]; 208 jne L1; 209 pop RBX; 210 ret; 211 } 212 } 213 else 214 { 215 asm pure nothrow @nogc @trusted 216 { 217 naked; 218 push RBX; 219 mov RBX, RDI; 220 mov RCX, RSI; 221 mov RDI, RDX; 222 mov RAX, [RDX]; 223 mov RDX, 8[RDX]; 224 L1: lock; cmpxchg16b [RDI]; 225 jne L1; 226 pop RBX; 227 ret; 228 } 229 } 230 } 231 } 232 else static if (needsStoreBarrier!order) 233 atomicExchange!(order, false)(dest, value); 234 else 235 *dest = value; 236 } 237 238 T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 239 if (is(T : ulong)) 240 { 241 version (D_InlineAsm_X86) 242 { 243 static assert(T.sizeof <= 4, "64bit atomicFetchAdd not supported on 32bit target." ); 244 245 enum DestReg = SizedReg!DX; 246 enum ValReg = SizedReg!(AX, T); 247 248 mixin (simpleFormat(q{ 249 asm pure nothrow @nogc @trusted 250 { 251 mov %1, value; 252 mov %0, dest; 253 lock; xadd[%0], %1; 254 } 255 }, [DestReg, ValReg])); 256 } 257 else version (D_InlineAsm_X86_64) 258 { 259 version (Windows) 260 { 261 enum DestReg = SizedReg!DX; 262 enum ValReg = SizedReg!(CX, T); 263 } 264 else 265 { 266 enum DestReg = SizedReg!SI; 267 enum ValReg = SizedReg!(DI, T); 268 } 269 enum ResReg = result ? SizedReg!(AX, T) : null; 270 271 mixin (simpleFormat(q{ 272 asm pure nothrow @nogc @trusted 273 { 274 naked; 275 lock; xadd[%0], %1; 276 ?2 mov %2, %1; 277 ret; 278 } 279 }, [DestReg, ValReg, ResReg])); 280 } 281 else 282 static assert (false, "Unsupported architecture."); 283 } 284 285 T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 286 if (is(T : ulong)) 287 { 288 return atomicFetchAdd(dest, cast(T)-cast(IntOrLong!T)value); 289 } 290 291 T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 292 if (CanCAS!T) 293 { 294 version (D_InlineAsm_X86) 295 { 296 static assert(T.sizeof <= 4, "64bit atomicExchange not supported on 32bit target." ); 297 298 enum DestReg = SizedReg!CX; 299 enum ValReg = SizedReg!(AX, T); 300 301 mixin (simpleFormat(q{ 302 asm pure nothrow @nogc @trusted 303 { 304 mov %1, value; 305 mov %0, dest; 306 xchg [%0], %1; 307 } 308 }, [DestReg, ValReg])); 309 } 310 else version (D_InlineAsm_X86_64) 311 { 312 version (Windows) 313 { 314 enum DestReg = SizedReg!DX; 315 enum ValReg = SizedReg!(CX, T); 316 } 317 else 318 { 319 enum DestReg = SizedReg!SI; 320 enum ValReg = SizedReg!(DI, T); 321 } 322 enum ResReg = result ? SizedReg!(AX, T) : null; 323 324 mixin (simpleFormat(q{ 325 asm pure nothrow @nogc @trusted 326 { 327 naked; 328 xchg [%0], %1; 329 ?2 mov %2, %1; 330 ret; 331 } 332 }, [DestReg, ValReg, ResReg])); 333 } 334 else 335 static assert (false, "Unsupported architecture."); 336 } 337 338 alias atomicCompareExchangeWeak = atomicCompareExchangeStrong; 339 340 bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 341 if (CanCAS!T) 342 { 343 version (D_InlineAsm_X86) 344 { 345 static if (T.sizeof <= 4) 346 { 347 enum DestAddr = SizedReg!CX; 348 enum CmpAddr = SizedReg!DI; 349 enum Val = SizedReg!(DX, T); 350 enum Cmp = SizedReg!(AX, T); 351 352 mixin (simpleFormat(q{ 353 asm pure nothrow @nogc @trusted 354 { 355 push %1; 356 mov %2, value; 357 mov %1, compare; 358 mov %3, [%1]; 359 mov %0, dest; 360 lock; cmpxchg [%0], %2; 361 mov [%1], %3; 362 setz AL; 363 pop %1; 364 } 365 }, [DestAddr, CmpAddr, Val, Cmp])); 366 } 367 else static if (T.sizeof == 8) 368 { 369 asm pure nothrow @nogc @trusted 370 { 371 push EDI; 372 push EBX; 373 lea EDI, value; 374 mov EBX, [EDI]; 375 mov ECX, 4[EDI]; 376 mov EDI, compare; 377 mov EAX, [EDI]; 378 mov EDX, 4[EDI]; 379 mov EDI, dest; 380 lock; cmpxchg8b [EDI]; 381 mov EDI, compare; 382 mov [EDI], EAX; 383 mov 4[EDI], EDX; 384 setz AL; 385 pop EBX; 386 pop EDI; 387 } 388 } 389 else 390 static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); 391 } 392 else version (D_InlineAsm_X86_64) 393 { 394 static if (T.sizeof <= 8) 395 { 396 version (Windows) 397 { 398 enum DestAddr = SizedReg!R8; 399 enum CmpAddr = SizedReg!DX; 400 enum Val = SizedReg!(CX, T); 401 } 402 else 403 { 404 enum DestAddr = SizedReg!DX; 405 enum CmpAddr = SizedReg!SI; 406 enum Val = SizedReg!(DI, T); 407 } 408 enum Res = SizedReg!(AX, T); 409 410 mixin (simpleFormat(q{ 411 asm pure nothrow @nogc @trusted 412 { 413 naked; 414 mov %3, [%1]; 415 lock; cmpxchg [%0], %2; 416 jne compare_fail; 417 mov AL, 1; 418 ret; 419 compare_fail: 420 mov [%1], %3; 421 xor AL, AL; 422 ret; 423 } 424 }, [DestAddr, CmpAddr, Val, Res])); 425 } 426 else 427 { 428 version (Windows) 429 { 430 asm pure nothrow @nogc @trusted 431 { 432 naked; 433 push RBX; 434 mov R9, RDX; 435 mov RAX, [RDX]; 436 mov RDX, 8[RDX]; 437 mov RBX, [RCX]; 438 mov RCX, 8[RCX]; 439 lock; cmpxchg16b [R8]; 440 pop RBX; 441 jne compare_fail; 442 mov AL, 1; 443 ret; 444 compare_fail: 445 mov [R9], RAX; 446 mov 8[R9], RDX; 447 xor AL, AL; 448 ret; 449 } 450 } 451 else 452 { 453 asm pure nothrow @nogc @trusted 454 { 455 naked; 456 push RBX; 457 mov R8, RCX; 458 mov R9, RDX; 459 mov RAX, [RDX]; 460 mov RDX, 8[RDX]; 461 mov RBX, RDI; 462 mov RCX, RSI; 463 lock; cmpxchg16b [R8]; 464 pop RBX; 465 jne compare_fail; 466 mov AL, 1; 467 ret; 468 compare_fail: 469 mov [R9], RAX; 470 mov 8[R9], RDX; 471 xor AL, AL; 472 ret; 473 } 474 } 475 } 476 } 477 else 478 static assert (false, "Unsupported architecture."); 479 } 480 481 alias atomicCompareExchangeWeakNoResult = atomicCompareExchangeStrongNoResult; 482 483 bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted 484 if (CanCAS!T) 485 { 486 version (D_InlineAsm_X86) 487 { 488 static if (T.sizeof <= 4) 489 { 490 enum DestAddr = SizedReg!CX; 491 enum Cmp = SizedReg!(AX, T); 492 enum Val = SizedReg!(DX, T); 493 494 mixin (simpleFormat(q{ 495 asm pure nothrow @nogc @trusted 496 { 497 mov %2, value; 498 mov %1, compare; 499 mov %0, dest; 500 lock; cmpxchg [%0], %2; 501 setz AL; 502 } 503 }, [DestAddr, Cmp, Val])); 504 } 505 else static if (T.sizeof == 8) 506 { 507 asm pure nothrow @nogc @trusted 508 { 509 push EDI; 510 push EBX; 511 lea EDI, value; 512 mov EBX, [EDI]; 513 mov ECX, 4[EDI]; 514 lea EDI, compare; 515 mov EAX, [EDI]; 516 mov EDX, 4[EDI]; 517 mov EDI, dest; 518 lock; cmpxchg8b [EDI]; 519 setz AL; 520 pop EBX; 521 pop EDI; 522 } 523 } 524 else 525 static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); 526 } 527 else version (D_InlineAsm_X86_64) 528 { 529 static if (T.sizeof <= 8) 530 { 531 version (Windows) 532 { 533 enum DestAddr = SizedReg!R8; 534 enum Cmp = SizedReg!(DX, T); 535 enum Val = SizedReg!(CX, T); 536 } 537 else 538 { 539 enum DestAddr = SizedReg!DX; 540 enum Cmp = SizedReg!(SI, T); 541 enum Val = SizedReg!(DI, T); 542 } 543 enum AXReg = SizedReg!(AX, T); 544 545 mixin (simpleFormat(q{ 546 asm pure nothrow @nogc @trusted 547 { 548 naked; 549 mov %3, %1; 550 lock; cmpxchg [%0], %2; 551 setz AL; 552 ret; 553 } 554 }, [DestAddr, Cmp, Val, AXReg])); 555 } 556 else 557 { 558 version (Windows) 559 { 560 asm pure nothrow @nogc @trusted 561 { 562 naked; 563 push RBX; 564 mov RAX, [RDX]; 565 mov RDX, 8[RDX]; 566 mov RBX, [RCX]; 567 mov RCX, 8[RCX]; 568 lock; cmpxchg16b [R8]; 569 setz AL; 570 pop RBX; 571 ret; 572 } 573 } 574 else 575 { 576 asm pure nothrow @nogc @trusted 577 { 578 naked; 579 push RBX; 580 mov RAX, RDX; 581 mov RDX, RCX; 582 mov RBX, RDI; 583 mov RCX, RSI; 584 lock; cmpxchg16b [R8]; 585 setz AL; 586 pop RBX; 587 ret; 588 } 589 } 590 } 591 } 592 else 593 static assert (false, "Unsupported architecture."); 594 } 595 596 void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted 597 { 598 // TODO: `mfence` should only be required for seq_cst operations, but this depends on 599 // the compiler's backend knowledge to not reorder code inappropriately, 600 // so we'll apply it conservatively. 601 static if (order != MemoryOrder.raw) 602 { 603 version (D_InlineAsm_X86) 604 { 605 import core.cpuid; 606 607 // TODO: review this implementation; it seems way overly complicated 608 asm pure nothrow @nogc @trusted 609 { 610 naked; 611 612 call sse2; 613 test AL, AL; 614 jne Lcpuid; 615 616 // Fast path: We have SSE2, so just use mfence. 617 mfence; 618 jmp Lend; 619 620 Lcpuid: 621 622 // Slow path: We use cpuid to serialize. This is 623 // significantly slower than mfence, but is the 624 // only serialization facility we have available 625 // on older non-SSE2 chips. 626 push EBX; 627 628 mov EAX, 0; 629 cpuid; 630 631 pop EBX; 632 633 Lend: 634 635 ret; 636 } 637 } 638 else version (D_InlineAsm_X86_64) 639 { 640 asm pure nothrow @nogc @trusted 641 { 642 naked; 643 mfence; 644 ret; 645 } 646 } 647 else 648 static assert (false, "Unsupported architecture."); 649 } 650 } 651 652 void pause() pure nothrow @nogc @trusted 653 { 654 version (D_InlineAsm_X86) 655 { 656 asm pure nothrow @nogc @trusted 657 { 658 naked; 659 rep; nop; 660 ret; 661 } 662 } 663 else version (D_InlineAsm_X86_64) 664 { 665 asm pure nothrow @nogc @trusted 666 { 667 naked; 668 // pause; // TODO: DMD should add this opcode to its inline asm 669 rep; nop; 670 ret; 671 } 672 } 673 else 674 { 675 // ARM should `yield` 676 // other architectures? otherwise some sort of nop... 677 } 678 } 679 } 680 else version (GNU) 681 { 682 import gcc.builtins; 683 import gcc.config; 684 685 inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted 686 if (CanCAS!T) 687 { 688 static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()"); 689 690 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 691 { 692 static if (T.sizeof == ubyte.sizeof) 693 { 694 ubyte value = __atomic_load_1(cast(shared)src, order); 695 return *cast(typeof(return)*)&value; 696 } 697 else static if (T.sizeof == ushort.sizeof) 698 { 699 ushort value = __atomic_load_2(cast(shared)src, order); 700 return *cast(typeof(return)*)&value; 701 } 702 else static if (T.sizeof == uint.sizeof) 703 { 704 uint value = __atomic_load_4(cast(shared)src, order); 705 return *cast(typeof(return)*)&value; 706 } 707 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) 708 { 709 ulong value = __atomic_load_8(cast(shared)src, order); 710 return *cast(typeof(return)*)&value; 711 } 712 else static if (GNU_Have_LibAtomic) 713 { 714 T value; 715 __atomic_load(T.sizeof, cast(shared)src, &value, order); 716 return *cast(typeof(return)*)&value; 717 } 718 else 719 static assert(0, "Invalid template type specified."); 720 } 721 else 722 { 723 getAtomicMutex.lock(); 724 scope(exit) getAtomicMutex.unlock(); 725 return *cast(typeof(return)*)&src; 726 } 727 } 728 729 void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted 730 if (CanCAS!T) 731 { 732 static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()"); 733 734 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 735 { 736 static if (T.sizeof == ubyte.sizeof) 737 __atomic_store_1(cast(shared)dest, *cast(ubyte*)&value, order); 738 else static if (T.sizeof == ushort.sizeof) 739 __atomic_store_2(cast(shared)dest, *cast(ushort*)&value, order); 740 else static if (T.sizeof == uint.sizeof) 741 __atomic_store_4(cast(shared)dest, *cast(uint*)&value, order); 742 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) 743 __atomic_store_8(cast(shared)dest, *cast(ulong*)&value, order); 744 else static if (GNU_Have_LibAtomic) 745 __atomic_store(T.sizeof, cast(shared)dest, cast(void*)&value, order); 746 else 747 static assert(0, "Invalid template type specified."); 748 } 749 else 750 { 751 getAtomicMutex.lock(); 752 *dest = value; 753 getAtomicMutex.unlock(); 754 } 755 } 756 757 T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 758 if (is(T : ulong)) 759 { 760 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 761 { 762 static if (T.sizeof == ubyte.sizeof) 763 return __atomic_fetch_add_1(cast(shared)dest, value, order); 764 else static if (T.sizeof == ushort.sizeof) 765 return __atomic_fetch_add_2(cast(shared)dest, value, order); 766 else static if (T.sizeof == uint.sizeof) 767 return __atomic_fetch_add_4(cast(shared)dest, value, order); 768 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) 769 return __atomic_fetch_add_8(cast(shared)dest, value, order); 770 else static if (GNU_Have_LibAtomic) 771 return __atomic_fetch_add(T.sizeof, cast(shared)dest, cast(void*)&value, order); 772 else 773 static assert(0, "Invalid template type specified."); 774 } 775 else 776 { 777 getAtomicMutex.lock(); 778 scope(exit) getAtomicMutex.unlock(); 779 T tmp = *dest; 780 *dest += value; 781 return tmp; 782 } 783 } 784 785 T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 786 if (is(T : ulong)) 787 { 788 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 789 { 790 static if (T.sizeof == ubyte.sizeof) 791 return __atomic_fetch_sub_1(cast(shared)dest, value, order); 792 else static if (T.sizeof == ushort.sizeof) 793 return __atomic_fetch_sub_2(cast(shared)dest, value, order); 794 else static if (T.sizeof == uint.sizeof) 795 return __atomic_fetch_sub_4(cast(shared)dest, value, order); 796 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) 797 return __atomic_fetch_sub_8(cast(shared)dest, value, order); 798 else static if (GNU_Have_LibAtomic) 799 return __atomic_fetch_sub(T.sizeof, cast(shared)dest, cast(void*)&value, order); 800 else 801 static assert(0, "Invalid template type specified."); 802 } 803 else 804 { 805 getAtomicMutex.lock(); 806 scope(exit) getAtomicMutex.unlock(); 807 T tmp = *dest; 808 *dest -= value; 809 return tmp; 810 } 811 } 812 813 T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 814 if (is(T : ulong) || is(T == class) || is(T == interface) || is(T U : U*)) 815 { 816 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 817 { 818 static if (T.sizeof == byte.sizeof) 819 { 820 ubyte res = __atomic_exchange_1(cast(shared)dest, *cast(ubyte*)&value, order); 821 return *cast(typeof(return)*)&res; 822 } 823 else static if (T.sizeof == short.sizeof) 824 { 825 ushort res = __atomic_exchange_2(cast(shared)dest, *cast(ushort*)&value, order); 826 return *cast(typeof(return)*)&res; 827 } 828 else static if (T.sizeof == int.sizeof) 829 { 830 uint res = __atomic_exchange_4(cast(shared)dest, *cast(uint*)&value, order); 831 return *cast(typeof(return)*)&res; 832 } 833 else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics) 834 { 835 ulong res = __atomic_exchange_8(cast(shared)dest, *cast(ulong*)&value, order); 836 return *cast(typeof(return)*)&res; 837 } 838 else static if (GNU_Have_LibAtomic) 839 { 840 T res = void; 841 __atomic_exchange(T.sizeof, cast(shared)dest, cast(void*)&value, &res, order); 842 return res; 843 } 844 else 845 static assert(0, "Invalid template type specified."); 846 } 847 else 848 { 849 getAtomicMutex.lock(); 850 scope(exit) getAtomicMutex.unlock(); 851 852 T res = *dest; 853 *dest = value; 854 return res; 855 } 856 } 857 858 bool atomicCompareExchangeWeak(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 859 if (CanCAS!T) 860 { 861 return atomicCompareExchangeImpl!(succ, fail, true)(dest, compare, value); 862 } 863 864 bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 865 if (CanCAS!T) 866 { 867 return atomicCompareExchangeImpl!(succ, fail, false)(dest, compare, value); 868 } 869 870 bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted 871 if (CanCAS!T) 872 { 873 return atomicCompareExchangeImpl!(succ, fail, false)(dest, cast(T*)&compare, value); 874 } 875 876 bool atomicCompareExchangeWeakNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted 877 if (CanCAS!T) 878 { 879 return atomicCompareExchangeImpl!(succ, fail, true)(dest, cast(T*)&compare, value); 880 } 881 882 private bool atomicCompareExchangeImpl(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, bool weak, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 883 if (CanCAS!T) 884 { 885 bool res = void; 886 887 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 888 { 889 static if (T.sizeof == byte.sizeof) 890 res = __atomic_compare_exchange_1(cast(shared)dest, compare, *cast(ubyte*)&value, 891 weak, succ, fail); 892 else static if (T.sizeof == short.sizeof) 893 res = __atomic_compare_exchange_2(cast(shared)dest, compare, *cast(ushort*)&value, 894 weak, succ, fail); 895 else static if (T.sizeof == int.sizeof) 896 res = __atomic_compare_exchange_4(cast(shared)dest, compare, *cast(uint*)&value, 897 weak, succ, fail); 898 else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics) 899 res = __atomic_compare_exchange_8(cast(shared)dest, compare, *cast(ulong*)&value, 900 weak, succ, fail); 901 else static if (GNU_Have_LibAtomic) 902 res = __atomic_compare_exchange(T.sizeof, cast(shared)dest, compare, cast(void*)&value, 903 succ, fail); 904 else 905 static assert(0, "Invalid template type specified."); 906 } 907 else 908 { 909 static if (T.sizeof == byte.sizeof) 910 alias U = byte; 911 else static if (T.sizeof == short.sizeof) 912 alias U = short; 913 else static if (T.sizeof == int.sizeof) 914 alias U = int; 915 else static if (T.sizeof == long.sizeof) 916 alias U = long; 917 else 918 static assert(0, "Invalid template type specified."); 919 920 getAtomicMutex.lock(); 921 scope(exit) getAtomicMutex.unlock(); 922 923 if (*cast(U*)dest == *cast(U*)&compare) 924 { 925 *dest = value; 926 res = true; 927 } 928 else 929 { 930 *compare = *dest; 931 res = false; 932 } 933 } 934 935 return res; 936 } 937 938 void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted 939 { 940 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 941 __atomic_thread_fence(order); 942 else 943 { 944 getAtomicMutex.lock(); 945 getAtomicMutex.unlock(); 946 } 947 } 948 949 void pause() pure nothrow @nogc @trusted 950 { 951 version (X86) 952 { 953 __builtin_ia32_pause(); 954 } 955 else version (X86_64) 956 { 957 __builtin_ia32_pause(); 958 } 959 else 960 { 961 // Other architectures? Some sort of nop or barrier. 962 } 963 } 964 965 static if (!GNU_Have_Atomics && !GNU_Have_LibAtomic) 966 { 967 // Use system mutex for atomics, faking the purity of the functions so 968 // that they can be used in pure/nothrow/@safe code. 969 extern (C) private pure @trusted @nogc nothrow 970 { 971 static if (GNU_Thread_Model == ThreadModel.Posix) 972 { 973 import core.sys.posix.pthread; 974 alias atomicMutexHandle = pthread_mutex_t; 975 976 pragma(mangle, "pthread_mutex_init") int fakePureMutexInit(pthread_mutex_t*, pthread_mutexattr_t*); 977 pragma(mangle, "pthread_mutex_lock") int fakePureMutexLock(pthread_mutex_t*); 978 pragma(mangle, "pthread_mutex_unlock") int fakePureMutexUnlock(pthread_mutex_t*); 979 } 980 else static if (GNU_Thread_Model == ThreadModel.Win32) 981 { 982 import core.sys.windows.winbase; 983 alias atomicMutexHandle = CRITICAL_SECTION; 984 985 pragma(mangle, "InitializeCriticalSection") int fakePureMutexInit(CRITICAL_SECTION*); 986 pragma(mangle, "EnterCriticalSection") void fakePureMutexLock(CRITICAL_SECTION*); 987 pragma(mangle, "LeaveCriticalSection") int fakePureMutexUnlock(CRITICAL_SECTION*); 988 } 989 else 990 { 991 alias atomicMutexHandle = int; 992 } 993 } 994 995 // Implements lock/unlock operations. 996 private struct AtomicMutex 997 { 998 int lock() pure @trusted @nogc nothrow 999 { 1000 static if (GNU_Thread_Model == ThreadModel.Posix) 1001 { 1002 if (!_inited) 1003 { 1004 fakePureMutexInit(&_handle, null); 1005 _inited = true; 1006 } 1007 return fakePureMutexLock(&_handle); 1008 } 1009 else 1010 { 1011 static if (GNU_Thread_Model == ThreadModel.Win32) 1012 { 1013 if (!_inited) 1014 { 1015 fakePureMutexInit(&_handle); 1016 _inited = true; 1017 } 1018 fakePureMutexLock(&_handle); 1019 } 1020 return 0; 1021 } 1022 } 1023 1024 int unlock() pure @trusted @nogc nothrow 1025 { 1026 static if (GNU_Thread_Model == ThreadModel.Posix) 1027 return fakePureMutexUnlock(&_handle); 1028 else 1029 { 1030 static if (GNU_Thread_Model == ThreadModel.Win32) 1031 fakePureMutexUnlock(&_handle); 1032 return 0; 1033 } 1034 } 1035 1036 private: 1037 atomicMutexHandle _handle; 1038 bool _inited; 1039 } 1040 1041 // Internal static mutex reference. 1042 private AtomicMutex* _getAtomicMutex() @trusted @nogc nothrow 1043 { 1044 __gshared static AtomicMutex mutex; 1045 return &mutex; 1046 } 1047 1048 // Pure alias for _getAtomicMutex. 1049 pragma(mangle, _getAtomicMutex.mangleof) 1050 private AtomicMutex* getAtomicMutex() pure @trusted @nogc nothrow @property; 1051 } 1052 } 1053 1054 private: 1055 1056 version (Windows) 1057 { 1058 enum RegisterReturn(T) = is(T : U[], U) || is(T : R delegate(A), R, A...); 1059 } 1060 1061 enum CanCAS(T) = is(T : ulong) || 1062 is(T == class) || 1063 is(T == interface) || 1064 is(T : U*, U) || 1065 is(T : U[], U) || 1066 is(T : R delegate(A), R, A...) || 1067 (is(T == struct) && __traits(isPOD, T) && 1068 (T.sizeof <= size_t.sizeof*2 || // no more than 2 words 1069 (T.sizeof == 16 && has128BitCAS)) && // or supports 128-bit CAS 1070 (T.sizeof & (T.sizeof - 1)) == 0 // is power of 2 1071 ); 1072 1073 template IntOrLong(T) 1074 { 1075 static if (T.sizeof > 4) 1076 alias IntOrLong = long; 1077 else 1078 alias IntOrLong = int; 1079 } 1080 1081 // NOTE: x86 loads implicitly have acquire semantics so a memory 1082 // barrier is only necessary on releases. 1083 template needsLoadBarrier( MemoryOrder ms ) 1084 { 1085 enum bool needsLoadBarrier = ms == MemoryOrder.seq; 1086 } 1087 1088 1089 // NOTE: x86 stores implicitly have release semantics so a memory 1090 // barrier is only necessary on acquires. 1091 template needsStoreBarrier( MemoryOrder ms ) 1092 { 1093 enum bool needsStoreBarrier = ms == MemoryOrder.seq; 1094 } 1095 1096 // this is a helper to build asm blocks 1097 string simpleFormat(string format, scope string[] args) 1098 { 1099 string result; 1100 outer: while (format.length) 1101 { 1102 foreach (i; 0 .. format.length) 1103 { 1104 if (format[i] == '%' || format[i] == '?') 1105 { 1106 bool isQ = format[i] == '?'; 1107 result ~= format[0 .. i++]; 1108 assert (i < format.length, "Invalid format string"); 1109 if (format[i] == '%' || format[i] == '?') 1110 { 1111 assert(!isQ, "Invalid format string"); 1112 result ~= format[i++]; 1113 } 1114 else 1115 { 1116 int index = 0; 1117 assert (format[i] >= '0' && format[i] <= '9', "Invalid format string"); 1118 while (i < format.length && format[i] >= '0' && format[i] <= '9') 1119 index = index * 10 + (ubyte(format[i++]) - ubyte('0')); 1120 if (!isQ) 1121 result ~= args[index]; 1122 else if (!args[index]) 1123 { 1124 size_t j = i; 1125 for (; j < format.length;) 1126 { 1127 if (format[j++] == '\n') 1128 break; 1129 } 1130 i = j; 1131 } 1132 } 1133 format = format[i .. $]; 1134 continue outer; 1135 } 1136 } 1137 result ~= format; 1138 break; 1139 } 1140 return result; 1141 }