1 /** 2 * Top level code for the code generator. 3 * 4 * Copyright: Copyright (C) 1985-1998 by Symantec 5 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 6 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 7 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcod.d, backend/cgcod.d) 9 * Documentation: https://dlang.org/phobos/dmd_backend_cgcod.html 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcod.d 11 */ 12 13 module dmd.backend.cgcod; 14 15 version = FRAMEPTR; 16 17 version (SCPP) 18 version = COMPILE; 19 version (MARS) 20 version = COMPILE; 21 22 version (COMPILE) 23 { 24 25 import core.bitop; 26 import core.stdc.stdio; 27 import core.stdc.stdlib; 28 import core.stdc.string; 29 30 import dmd.backend.backend; 31 import dmd.backend.cc; 32 import dmd.backend.cdef; 33 import dmd.backend.code; 34 import dmd.backend.cgcse; 35 import dmd.backend.code_x86; 36 import dmd.backend.codebuilder; 37 import dmd.backend.disasm86; 38 import dmd.backend.dlist; 39 import dmd.backend.dvec; 40 import dmd.backend.melf; 41 import dmd.backend.mem; 42 import dmd.backend.el; 43 import dmd.backend.exh; 44 import dmd.backend.global; 45 import dmd.backend.obj; 46 import dmd.backend.oper; 47 import dmd.backend.rtlsym; 48 import dmd.backend.symtab; 49 import dmd.backend.ty; 50 import dmd.backend.type; 51 import dmd.backend.xmm; 52 53 import dmd.backend.barray; 54 55 version (SCPP) 56 { 57 import parser; 58 import precomp; 59 } 60 61 extern (C++): 62 63 nothrow: 64 @safe: 65 66 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 67 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 68 69 version (MARS) 70 enum MARS = true; 71 else 72 enum MARS = false; 73 74 void dwarf_except_gentables(Funcsym *sfunc, uint startoffset, uint retoffset); 75 76 private extern (D) uint mask(uint m) { return 1 << m; } 77 78 79 __gshared 80 { 81 bool floatreg; // !=0 if floating register is required 82 83 int hasframe; // !=0 if this function has a stack frame 84 bool enforcealign; // enforced stack alignment 85 targ_size_t spoff; 86 targ_size_t Foff; // BP offset of floating register 87 targ_size_t CSoff; // offset of common sub expressions 88 targ_size_t NDPoff; // offset of saved 8087 registers 89 targ_size_t pushoff; // offset of saved registers 90 bool pushoffuse; // using pushoff 91 int BPoff; // offset from BP 92 int EBPtoESP; // add to EBP offset to get ESP offset 93 LocalSection Para; // section of function parameters 94 LocalSection Auto; // section of automatics and registers 95 LocalSection Fast; // section of fastpar 96 LocalSection EEStack; // offset of SCstack variables from ESP 97 LocalSection Alloca; // data for alloca() temporary 98 99 REGSAVE regsave; 100 101 CGstate cgstate; // state of code generator 102 103 regm_t BYTEREGS = BYTEREGS_INIT; 104 regm_t ALLREGS = ALLREGS_INIT; 105 106 107 /************************************ 108 * # of bytes that SP is beyond BP. 109 */ 110 111 uint stackpush; 112 113 int stackchanged; /* set to !=0 if any use of the stack 114 other than accessing parameters. Used 115 to see if we can address parameters 116 with ESP rather than EBP. 117 */ 118 int refparam; // !=0 if we referenced any parameters 119 int reflocal; // !=0 if we referenced any locals 120 bool anyiasm; // !=0 if any inline assembler 121 char calledafunc; // !=0 if we called a function 122 char needframe; // if true, then we will need the frame 123 // pointer (BP for the 8088) 124 char gotref; // !=0 if the GOTsym was referenced 125 uint usednteh; // if !=0, then used NT exception handling 126 bool calledFinally; // true if called a BC_finally block 127 128 /* Register contents */ 129 con_t regcon; 130 131 BackendPass pass; 132 133 private Symbol *retsym; // set to symbol that should be placed in 134 // register AX 135 136 /**************************** 137 * Register masks. 138 */ 139 140 regm_t msavereg; // Mask of registers that we would like to save. 141 // they are temporaries (set by scodelem()) 142 regm_t mfuncreg; // Mask of registers preserved by a function 143 144 regm_t allregs; // ALLREGS optionally including mBP 145 146 int dfoidx; /* which block we are in */ 147 148 targ_size_t funcoffset; // offset of start of function 149 targ_size_t prolog_allocoffset; // offset past adj of stack allocation 150 targ_size_t startoffset; // size of function entry code 151 targ_size_t retoffset; /* offset from start of func to ret code */ 152 targ_size_t retsize; /* size of function return */ 153 154 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs; 155 156 } 157 158 /********************************* 159 * Generate code for a function. 160 * Note at the end of this routine mfuncreg will contain the mask 161 * of registers not affected by the function. Some minor optimization 162 * possibilities are here. 163 * Params: 164 * sfunc = function to generate code for 165 */ 166 @trusted 167 void codgen(Symbol *sfunc) 168 { 169 bool flag; 170 block *btry; 171 172 // Register usage. If a bit is on, the corresponding register is live 173 // in that basic block. 174 175 //printf("codgen('%s')\n",funcsym_p.Sident.ptr); 176 assert(sfunc == funcsym_p); 177 assert(cseg == funcsym_p.Sseg); 178 179 cgreg_init(); 180 CSE.initialize(); 181 tym_t functy = tybasic(sfunc.ty()); 182 cod3_initregs(); 183 allregs = ALLREGS; 184 pass = BackendPass.initial; 185 Alloca.initialize(); 186 anyiasm = 0; 187 188 if (config.ehmethod == EHmethod.EH_DWARF) 189 { 190 /* The dwarf unwinder relies on the function epilog to exist 191 */ 192 for (block* b = startblock; b; b = b.Bnext) 193 { 194 if (b.BC == BCexit) 195 b.BC = BCret; 196 } 197 } 198 199 tryagain: 200 debug 201 if (debugr) 202 printf("------------------ PASS%s -----------------\n", 203 (pass == BackendPass.initial) ? "init".ptr : ((pass == BackendPass.reg) ? "reg".ptr : "final".ptr)); 204 205 lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0; 206 207 // if no parameters, assume we don't need a stack frame 208 needframe = 0; 209 enforcealign = false; 210 gotref = 0; 211 stackchanged = 0; 212 stackpush = 0; 213 refparam = 0; 214 calledafunc = 0; 215 retsym = null; 216 217 cgstate.stackclean = 1; 218 cgstate.funcarg.initialize(); 219 cgstate.funcargtos = ~0; 220 cgstate.accessedTLS = false; 221 STACKALIGN = TARGET_STACKALIGN; 222 223 regsave.reset(); 224 memset(global87.stack.ptr,0,global87.stack.sizeof); 225 226 calledFinally = false; 227 usednteh = 0; 228 229 static if (MARS) 230 { 231 if (sfunc.Sfunc.Fflags3 & Fjmonitor && 232 config.exe & EX_windos) 233 usednteh |= NTEHjmonitor; 234 } 235 else version (SCPP) 236 { 237 if (CPP) 238 { 239 if (config.exe == EX_WIN32 && 240 (sfunc.Stype.Tflags & TFemptyexc || sfunc.Stype.Texcspec)) 241 usednteh |= NTEHexcspec; 242 except_reset(); 243 } 244 } 245 246 // Set on a trial basis, turning it off if anything might throw 247 sfunc.Sfunc.Fflags3 |= Fnothrow; 248 249 floatreg = false; 250 assert(global87.stackused == 0); /* nobody in 8087 stack */ 251 252 CSE.start(); 253 memset(®con,0,regcon.sizeof); 254 regcon.cse.mval = regcon.cse.mops = 0; // no common subs yet 255 msavereg = 0; 256 uint nretblocks = 0; 257 mfuncreg = fregsaved; // so we can see which are used 258 // (bit is cleared each time 259 // we use one) 260 assert(!(needframe && mfuncreg & mBP)); // needframe needs mBP 261 262 for (block* b = startblock; b; b = b.Bnext) 263 { 264 memset(&b.Bregcon,0,b.Bregcon.sizeof); // Clear out values in registers 265 if (b.Belem) 266 resetEcomsub(b.Belem); // reset all the Ecomsubs 267 if (b.BC == BCasm) 268 anyiasm = 1; // we have inline assembler 269 if (b.BC == BCret || b.BC == BCretexp) 270 nretblocks++; 271 } 272 273 if (!config.fulltypes || (config.flags4 & CFG4optimized)) 274 { 275 regm_t noparams = 0; 276 for (int i = 0; i < globsym.length; i++) 277 { 278 Symbol *s = globsym[i]; 279 s.Sflags &= ~SFLread; 280 switch (s.Sclass) 281 { 282 case SC.fastpar: 283 case SC.shadowreg: 284 regcon.params |= s.Spregm(); 285 goto case SC.parameter; 286 287 case SC.parameter: 288 if (s.Sfl == FLreg) 289 noparams |= s.Sregm; 290 break; 291 292 default: 293 break; 294 } 295 } 296 regcon.params &= ~noparams; 297 } 298 299 if (config.flags4 & CFG4optimized) 300 { 301 if (nretblocks == 0 && // if no return blocks in function 302 !(sfunc.ty() & mTYnaked)) // naked functions may have hidden veys of returning 303 sfunc.Sflags |= SFLexit; // mark function as never returning 304 305 assert(dfo); 306 307 cgreg_reset(); 308 for (dfoidx = 0; dfoidx < dfo.length; dfoidx++) 309 { 310 regcon.used = msavereg | regcon.cse.mval; // registers already in use 311 block* b = dfo[dfoidx]; 312 blcodgen(b); // gen code in depth-first order 313 //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used)); 314 cgreg_used(dfoidx, b.Bregcon.used); // gather register used information 315 } 316 } 317 else 318 { 319 pass = BackendPass.final_; 320 for (block* b = startblock; b; b = b.Bnext) 321 blcodgen(b); // generate the code for each block 322 } 323 regcon.immed.mval = 0; 324 assert(!regcon.cse.mops); // should have all been used 325 326 // See which variables we can put into registers 327 if (pass != BackendPass.final_ && 328 !anyiasm) // possible LEA or LES opcodes 329 { 330 allregs |= cod3_useBP(); // see if we can use EBP 331 332 // If pic code, but EBX was never needed 333 if (!(allregs & mask(PICREG)) && !gotref) 334 { 335 allregs |= mask(PICREG); // EBX can now be used 336 cgreg_assign(retsym); 337 pass = BackendPass.reg; 338 } 339 else if (cgreg_assign(retsym)) // if we found some registers 340 pass = BackendPass.reg; 341 else 342 pass = BackendPass.final_; 343 for (block* b = startblock; b; b = b.Bnext) 344 { 345 code_free(b.Bcode); 346 b.Bcode = null; 347 } 348 goto tryagain; 349 } 350 cgreg_term(); 351 352 version (SCPP) 353 { 354 if (CPP) 355 cgcod_eh(); 356 } 357 358 // See if we need to enforce a particular stack alignment 359 foreach (i; 0 .. globsym.length) 360 { 361 Symbol *s = globsym[i]; 362 363 if (Symbol_Sisdead(*s, anyiasm)) 364 continue; 365 366 switch (s.Sclass) 367 { 368 case SC.register: 369 case SC.auto_: 370 case SC.fastpar: 371 if (s.Sfl == FLreg) 372 break; 373 374 const sz = type_alignsize(s.Stype); 375 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX)) 376 { 377 STACKALIGN = sz; 378 enforcealign = true; 379 } 380 break; 381 382 default: 383 break; 384 } 385 } 386 387 stackoffsets(globsym, false); // compute final offsets of stack variables 388 cod5_prol_epi(); // see where to place prolog/epilog 389 CSE.finish(); // compute addresses and sizes of CSE saves 390 391 if (configv.addlinenumbers) 392 objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg)); 393 394 // Otherwise, jmp's to startblock will execute the prolog again 395 assert(!startblock.Bpred); 396 397 CodeBuilder cdbprolog; cdbprolog.ctor(); 398 prolog(cdbprolog); // gen function start code 399 code *cprolog = cdbprolog.finish(); 400 if (cprolog) 401 pinholeopt(cprolog,null); // optimize 402 403 funcoffset = Offset(sfunc.Sseg); 404 targ_size_t coffset = Offset(sfunc.Sseg); 405 406 if (eecontext.EEelem) 407 genEEcode(); 408 409 for (block* b = startblock; b; b = b.Bnext) 410 { 411 // We couldn't do this before because localsize was unknown 412 switch (b.BC) 413 { 414 case BCret: 415 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked)) 416 { 417 CodeBuilder cdb; cdb.ctor(); 418 cdb.append(b.Bcode); 419 cdb.genlinnum(b.Bsrcpos); 420 b.Bcode = cdb.finish(); 421 } 422 goto case BCretexp; 423 424 case BCretexp: 425 epilog(b); 426 break; 427 428 default: 429 if (b.Bflags & BFLepilog) 430 epilog(b); 431 break; 432 } 433 assignaddr(b); // assign addresses 434 pinholeopt(b.Bcode,b); // do pinhole optimization 435 if (b.Bflags & BFLprolog) // do function prolog 436 { 437 startoffset = coffset + calcblksize(cprolog) - funcoffset; 438 b.Bcode = cat(cprolog,b.Bcode); 439 } 440 cgsched_block(b); 441 b.Bsize = calcblksize(b.Bcode); // calculate block size 442 if (b.Balign) 443 { 444 targ_size_t u = b.Balign - 1; 445 coffset = (coffset + u) & ~u; 446 } 447 b.Boffset = coffset; /* offset of this block */ 448 coffset += b.Bsize; /* offset of following block */ 449 } 450 451 debug 452 debugw && printf("code addr complete\n"); 453 454 // Do jump optimization 455 do 456 { 457 flag = false; 458 for (block* b = startblock; b; b = b.Bnext) 459 { 460 if (b.Bflags & BFLjmpoptdone) /* if no more jmp opts for this blk */ 461 continue; 462 int i = branch(b,0); // see if jmp => jmp short 463 if (i) // if any bytes saved 464 { targ_size_t offset; 465 466 b.Bsize -= i; 467 offset = b.Boffset + b.Bsize; 468 for (block* bn = b.Bnext; bn; bn = bn.Bnext) 469 { 470 if (bn.Balign) 471 { targ_size_t u = bn.Balign - 1; 472 473 offset = (offset + u) & ~u; 474 } 475 bn.Boffset = offset; 476 offset += bn.Bsize; 477 } 478 coffset = offset; 479 flag = true; 480 } 481 } 482 if (!I16 && !(config.flags4 & CFG4optimized)) 483 break; // use the long conditional jmps 484 } while (flag); // loop till no more bytes saved 485 486 debug 487 debugw && printf("code jump optimization complete\n"); 488 489 version (MARS) 490 { 491 if (usednteh & NTEH_try) 492 { 493 // Do this before code is emitted because we patch some instructions 494 nteh_filltables(); 495 } 496 } 497 498 // Compute starting offset for switch tables 499 targ_size_t swoffset; 500 int jmpseg = -1; 501 if (config.flags & CFGromable) 502 { 503 jmpseg = 0; 504 swoffset = coffset; 505 } 506 507 // Emit the generated code 508 if (eecontext.EEcompile == 1) 509 { 510 codout(sfunc.Sseg,eecontext.EEcode,null); 511 code_free(eecontext.EEcode); 512 version (SCPP) 513 { 514 el_free(eecontext.EEelem); 515 } 516 } 517 else 518 { 519 __gshared Barray!ubyte disasmBuf; 520 disasmBuf.reset(); 521 522 for (block* b = startblock; b; b = b.Bnext) 523 { 524 if (b.BC == BCjmptab || b.BC == BCswitch) 525 { 526 if (jmpseg == -1) 527 { 528 jmpseg = objmod.jmpTableSegment(sfunc); 529 swoffset = Offset(jmpseg); 530 } 531 swoffset = _align(0,swoffset); 532 b.Btableoffset = swoffset; /* offset of sw tab */ 533 swoffset += b.Btablesize; 534 } 535 jmpaddr(b.Bcode); /* assign jump addresses */ 536 537 debug 538 if (debugc) 539 { 540 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n", 541 cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg)); 542 if (b.Bcode) 543 printf( "First opcode of block is: %0x\n", b.Bcode.Iop ); 544 } 545 546 if (b.Balign) 547 { uint u = b.Balign; 548 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1); 549 550 cod3_align_bytes(sfunc.Sseg, nalign); 551 } 552 assert(b.Boffset == Offset(sfunc.Sseg)); 553 554 version (SCPP) 555 { 556 if (CPP && !(config.exe == EX_WIN32)) 557 { 558 //printf("b = %p, index = %d\n",b,b.Bindex); 559 //except_index_set(b.Bindex); 560 561 if (btry != b.Btry) 562 { 563 btry = b.Btry; 564 except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset); 565 } 566 if (b.BC == BCtry) 567 { 568 btry = b; 569 except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset); 570 } 571 } 572 } 573 574 codout(sfunc.Sseg,b.Bcode,configv.vasm ? &disasmBuf : null); // output code 575 } 576 if (coffset != Offset(sfunc.Sseg)) 577 { 578 debug 579 printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg)); 580 581 assert(0); 582 } 583 sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset; // size of function 584 585 if (configv.vasm) 586 disassemble(disasmBuf[]); // disassemble the code 587 588 static if (NTEXCEPTIONS || MARS) 589 { 590 version (MARS) 591 const nteh = usednteh & NTEH_try; 592 else static if (NTEXCEPTIONS) 593 const nteh = usednteh & NTEHcpp; 594 else 595 enum nteh = true; 596 if (nteh) 597 { 598 assert(!(config.flags & CFGromable)); 599 //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset); 600 objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset); 601 } 602 } 603 604 // Write out switch tables 605 flag = false; // true if last active block was a ret 606 for (block* b = startblock; b; b = b.Bnext) 607 { 608 switch (b.BC) 609 { 610 case BCjmptab: /* if jump table */ 611 outjmptab(b); /* write out jump table */ 612 goto Ldefault; 613 614 case BCswitch: 615 outswitab(b); /* write out switch table */ 616 goto Ldefault; 617 618 case BCret: 619 case BCretexp: 620 /* Compute offset to return code from start of function */ 621 retoffset = b.Boffset + b.Bsize - retsize - funcoffset; 622 version (MARS) 623 { 624 /* Add 3 bytes to retoffset in case we have an exception 625 * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT 626 * IT FIXES THE PROBLEM HERE AS WELL. 627 */ 628 if (usednteh & NTEH_try) 629 retoffset += 3; 630 } 631 flag = true; 632 break; 633 634 default: 635 Ldefault: 636 retoffset = b.Boffset + b.Bsize - funcoffset; 637 break; 638 } 639 } 640 if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked)) 641 /* put line number at end of function on the 642 start of the last instruction 643 */ 644 /* Instead, try offset to cleanup code */ 645 if (retoffset < sfunc.Ssize) 646 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset); 647 648 static if (MARS) 649 { 650 if (config.exe == EX_WIN64) 651 win64_pdata(sfunc); 652 } 653 654 static if (MARS) 655 { 656 if (usednteh & NTEH_try) 657 { 658 // Do this before code is emitted because we patch some instructions 659 nteh_gentables(sfunc); 660 } 661 if (usednteh & (EHtry | EHcleanup) && // saw BCtry or BC_try or OPddtor 662 config.ehmethod == EHmethod.EH_DM) 663 { 664 except_gentables(); 665 } 666 if (config.ehmethod == EHmethod.EH_DWARF) 667 { 668 sfunc.Sfunc.Fstartblock = startblock; 669 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset); 670 sfunc.Sfunc.Fstartblock = null; 671 } 672 } 673 674 version (SCPP) 675 { 676 // Write out frame handler 677 if (NTEXCEPTIONS && usednteh & NTEHcpp) 678 { 679 nteh_framehandler(sfunc, except_gentables()); 680 } 681 else 682 { 683 if (NTEXCEPTIONS && usednteh & NTEH_try) 684 { 685 nteh_gentables(sfunc); 686 } 687 else 688 { 689 if (CPP) 690 except_gentables(); 691 } 692 } 693 } 694 695 for (block* b = startblock; b; b = b.Bnext) 696 { 697 code_free(b.Bcode); 698 b.Bcode = null; 699 } 700 } 701 702 // Mask of regs saved 703 // BUG: do interrupt functions save BP? 704 sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved); 705 706 debug 707 if (global87.stackused != 0) 708 printf("stackused = %d\n",global87.stackused); 709 710 assert(global87.stackused == 0); /* nobody in 8087 stack */ 711 712 global87.save.dtor(); // clean up ndp save array 713 } 714 715 /********************************************* 716 * Align sections on the stack. 717 * base negative offset of section from frame pointer 718 * alignment alignment to use 719 * bias difference between where frame pointer points and the STACKALIGNed 720 * part of the stack 721 * Returns: 722 * base revised downward so it is aligned 723 */ 724 @trusted 725 targ_size_t alignsection(targ_size_t base, uint alignment, int bias) 726 { 727 assert(cast(long)base <= 0); 728 if (alignment > STACKALIGN) 729 alignment = STACKALIGN; 730 if (alignment) 731 { 732 long sz = cast(long)(-base + bias); 733 assert(sz >= 0); 734 sz &= (alignment - 1); 735 if (sz) 736 base -= alignment - sz; 737 } 738 return base; 739 } 740 741 /******************************* 742 * Generate code for a function start. 743 * Input: 744 * Offset(cseg) address of start of code 745 * Auto.alignment 746 * Output: 747 * Offset(cseg) adjusted for size of code generated 748 * EBPtoESP 749 * hasframe 750 * BPoff 751 */ 752 @trusted 753 void prolog(ref CodeBuilder cdb) 754 { 755 bool enter; 756 757 //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident.ptr, needframe, Auto.alignment); 758 debug debugw && printf("funcstart()\n"); 759 regcon.immed.mval = 0; /* no values in registers yet */ 760 version (FRAMEPTR) 761 EBPtoESP = 0; 762 else 763 EBPtoESP = -REGSIZE; 764 hasframe = 0; 765 bool pushds = false; 766 BPoff = 0; 767 bool pushalloc = false; 768 tym_t tyf = funcsym_p.ty(); 769 tym_t tym = tybasic(tyf); 770 const farfunc = tyfarfunc(tym) != 0; 771 772 // Special Intel 64 bit ABI prolog setup for variadic functions 773 Symbol *sv64 = null; // set to __va_argsave 774 if (I64 && variadic(funcsym_p.Stype)) 775 { 776 /* The Intel 64 bit ABI scheme. 777 * abi_sysV_amd64.pdf 778 * Load arguments passed in registers into the varargs save area 779 * so they can be accessed by va_arg(). 780 */ 781 /* Look for __va_argsave 782 */ 783 for (SYMIDX si = 0; si < globsym.length; si++) 784 { 785 Symbol *s = globsym[si]; 786 if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0) 787 { 788 if (!(s.Sflags & SFLdead)) 789 sv64 = s; 790 break; 791 } 792 } 793 } 794 795 if (config.flags & CFGalwaysframe || 796 funcsym_p.Sfunc.Fflags3 & Ffakeeh || 797 /* The exception stack unwinding mechanism relies on the EBP chain being intact, 798 * so need frame if function can possibly throw 799 */ 800 !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) || 801 cgstate.accessedTLS || 802 sv64 803 ) 804 needframe = 1; 805 806 CodeBuilder cdbx; cdbx.ctor(); 807 808 Lagain: 809 spoff = 0; 810 char guessneedframe = needframe; 811 int cfa_offset = 0; 812 // if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru))) 813 // usednteh |= NTEHpassthru; 814 815 /* Compute BP offsets for variables on stack. 816 * The organization is: 817 * Para.size parameters 818 * -------- stack is aligned to STACKALIGN 819 * seg of return addr (if far function) 820 * IP of return addr 821 * BP. caller's BP 822 * DS (if Windows prolog/epilog) 823 * exception handling context symbol 824 * Fast.size fastpar 825 * Auto.size autos and regs 826 * regsave.off any saved registers 827 * Foff floating register 828 * Alloca.size alloca temporary 829 * CSoff common subs 830 * NDPoff any 8087 saved registers 831 * monitor context record 832 * any saved registers 833 */ 834 835 if (tym == TYifunc) 836 Para.size = 26; // how is this number derived? 837 else 838 { 839 version (FRAMEPTR) 840 { 841 bool frame = needframe || tyf & mTYnaked; 842 Para.size = ((farfunc ? 2 : 1) + frame) * REGSIZE; 843 if (frame) 844 EBPtoESP = -REGSIZE; 845 } 846 else 847 Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE; 848 } 849 850 /* The real reason for the FAST section is because the implementation of contracts 851 * requires a consistent stack frame location for the 'this' pointer. But if varying 852 * stuff in Auto.offset causes different alignment for that section, the entire block can 853 * shift around, causing a crash in the contracts. 854 * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their 855 * own FAST section, which is never aligned at a size bigger than REGSIZE, and so 856 * its alignment never shifts around. 857 * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned 858 * individually rather than as a group. 859 */ 860 Fast.size = 0; 861 static if (NTEXCEPTIONS == 2) 862 { 863 Fast.size -= nteh_contextsym_size(); 864 version (MARS) 865 { 866 if (config.exe & EX_windos) 867 { 868 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0) 869 Fast.size -= 5 * 4; 870 } 871 } 872 } 873 874 /* Despite what the comment above says, aligning Fast section to size greater 875 * than REGSIZE does not break contract implementation. Fast.offset and 876 * Fast.alignment must be the same for the overriding and 877 * the overridden function, since they have the same parameters. Fast.size 878 * must be the same because otherwise, contract inheritance wouldn't work 879 * even if we didn't align Fast section to size greater than REGSIZE. Therefore, 880 * the only way aligning the section could cause problems with contract 881 * inheritance is if bias (declared below) differed for the overridden 882 * and the overriding function. 883 * 884 * Bias depends on Para.size and needframe. The value of Para.size depends on 885 * whether the function is an interrupt handler and whether it is a farfunc. 886 * DMD does not have _interrupt attribute and D does not make a distinction 887 * between near and far functions, so Para.size should always be 2 * REGSIZE 888 * for D. 889 * 890 * The value of needframe depends on a global setting that is only set 891 * during backend's initialization and on function flag Ffakeeh. On Windows, 892 * that flag is always set for virtual functions, for which contracts are 893 * defined and on other platforms, it is never set. Because of that 894 * the value of neadframe should always be the same for the overridden 895 * and the overriding function, and so bias should be the same too. 896 */ 897 898 version (FRAMEPTR) 899 int bias = enforcealign ? 0 : cast(int)(Para.size); 900 else 901 int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE)); 902 903 if (Fast.alignment < REGSIZE) 904 Fast.alignment = REGSIZE; 905 906 Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias); 907 908 if (Auto.alignment < REGSIZE) 909 Auto.alignment = REGSIZE; // necessary because localsize must be REGSIZE aligned 910 Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias); 911 912 regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias); 913 //printf("regsave.off = x%x, size = x%x, alignment = %x\n", 914 //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment); 915 916 if (floatreg) 917 { 918 uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE; 919 Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias); 920 //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize); 921 } 922 else 923 Foff = regsave.off; 924 925 Alloca.alignment = REGSIZE; 926 Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias); 927 928 CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias); 929 //printf("CSoff = x%x, size = x%x, alignment = %x\n", 930 //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment); 931 932 NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias); 933 934 regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving 935 pushoffuse = false; 936 pushoff = NDPoff; 937 /* We don't keep track of all the pushes and pops in a function. Hence, 938 * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder 939 * won't be setting ESP correctly. With pushoffuse, the registers are restored 940 * from EBP, which is kept track of properly. 941 */ 942 if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64)) 943 { 944 /* Instead of pushing the registers onto the stack one by one, 945 * allocate space in the stack frame and copy/restore them there. 946 */ 947 int xmmtopush = popcnt(topush & XMMREGS); // XMM regs take 16 bytes 948 int gptopush = popcnt(topush) - xmmtopush; // general purpose registers to save 949 if (NDPoff || xmmtopush || cgstate.funcarg.size) 950 { 951 pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16), 952 xmmtopush ? STACKALIGN : REGSIZE, bias); 953 pushoffuse = true; // tell others we're using this strategy 954 } 955 } 956 957 //printf("Fast.size = x%x, Auto.size = x%x\n", cast(int)Fast.size, cast(int)Auto.size); 958 959 cgstate.funcarg.alignment = STACKALIGN; 960 /* If the function doesn't need the extra alignment, don't do it. 961 * Can expand on this by allowing for locals that don't need extra alignment 962 * and calling functions that don't need it. 963 */ 964 if (pushoff == 0 && !calledafunc && config.fpxmmregs && (I32 || I64)) 965 { 966 cgstate.funcarg.alignment = I64 ? 8 : 4; 967 } 968 969 //printf("pushoff = %d, size = %d, alignment = %d, bias = %d\n", cast(int)pushoff, cast(int)cgstate.funcarg.size, cast(int)cgstate.funcarg.alignment, cast(int)bias); 970 cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias); 971 972 localsize = -cgstate.funcarg.offset; 973 974 //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n", 975 //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize); 976 assert(cast(targ_ptrdiff_t)localsize >= 0); 977 978 // Keep the stack aligned by 8 for any subsequent function calls 979 if (!I16 && calledafunc && 980 (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 981 { 982 int npush = popcnt(topush); // number of registers that need saving 983 npush += popcnt(topush & XMMREGS); // XMM regs take 16 bytes, so count them twice 984 if (pushoffuse) 985 npush = 0; 986 987 //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n", 988 //npush, Para.size, needframe, localsize); 989 990 int sz = cast(int)(localsize + npush * REGSIZE); 991 if (!enforcealign) 992 { 993 version (FRAMEPTR) 994 sz += Para.size; 995 else 996 sz += Para.size + (needframe ? 0 : -REGSIZE); 997 } 998 if (sz & (STACKALIGN - 1)) 999 localsize += STACKALIGN - (sz & (STACKALIGN - 1)); 1000 } 1001 cgstate.funcarg.offset = -localsize; 1002 1003 //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n", 1004 //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize); 1005 1006 uint xlocalsize = cast(uint)localsize; // amount to subtract from ESP to make room for locals 1007 1008 if (tyf & mTYnaked) // if no prolog/epilog for function 1009 { 1010 hasframe = 1; 1011 return; 1012 } 1013 1014 if (tym == TYifunc) 1015 { 1016 prolog_ifunc(cdbx,&tyf); 1017 hasframe = 1; 1018 cdb.append(cdbx); 1019 goto Lcont; 1020 } 1021 1022 /* Determine if we need BP set up */ 1023 if (enforcealign) 1024 { 1025 // we need BP to reset the stack before return 1026 // otherwise the return address is lost 1027 needframe = 1; 1028 } 1029 else if (config.flags & CFGalwaysframe) 1030 needframe = 1; 1031 else 1032 { 1033 if (localsize) 1034 { 1035 if (I16 || 1036 !(config.flags4 & CFG4speed) || 1037 config.target_cpu < TARGET_Pentium || 1038 farfunc || 1039 config.flags & CFGstack || 1040 xlocalsize >= 0x1000 || 1041 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 1042 anyiasm || 1043 Alloca.size 1044 ) 1045 { 1046 needframe = 1; 1047 } 1048 } 1049 if (refparam && (anyiasm || I16)) 1050 needframe = 1; 1051 } 1052 1053 if (needframe) 1054 { 1055 assert(mfuncreg & mBP); // shouldn't have used mBP 1056 1057 if (!guessneedframe) // if guessed wrong 1058 goto Lagain; 1059 } 1060 1061 if (I16 && config.wflags & WFwindows && farfunc) 1062 { 1063 prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds); 1064 enter = false; // don't use ENTER instruction 1065 hasframe = 1; // we have a stack frame 1066 } 1067 else if (needframe) // if variables or parameters 1068 { 1069 prolog_frame(cdbx, farfunc, xlocalsize, enter, cfa_offset); 1070 hasframe = 1; 1071 } 1072 1073 /* Align the stack if necessary */ 1074 prolog_stackalign(cdbx); 1075 1076 /* Subtract from stack pointer the size of the local stack frame 1077 */ 1078 if (config.flags & CFGstack) // if stack overflow check 1079 { 1080 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 1081 if (Alloca.size) 1082 prolog_setupalloca(cdbx); 1083 } 1084 else if (needframe) /* if variables or parameters */ 1085 { 1086 if (xlocalsize) /* if any stack offset */ 1087 { 1088 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 1089 if (Alloca.size) 1090 prolog_setupalloca(cdbx); 1091 } 1092 else 1093 assert(Alloca.size == 0); 1094 } 1095 else if (xlocalsize) 1096 { 1097 assert(I32 || I64); 1098 prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc); 1099 version (FRAMEPTR) { } else 1100 BPoff += REGSIZE; 1101 } 1102 else 1103 assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor)); 1104 EBPtoESP += xlocalsize; 1105 if (hasframe) 1106 EBPtoESP += REGSIZE; 1107 1108 /* Win64 unwind needs the amount of code generated so far 1109 */ 1110 if (config.exe == EX_WIN64) 1111 { 1112 code *c = cdbx.peek(); 1113 pinholeopt(c, null); 1114 prolog_allocoffset = calcblksize(c); 1115 } 1116 1117 version (SCPP) 1118 { 1119 /* The idea is to generate trace for all functions if -Nc is not thrown. 1120 * If -Nc is thrown, generate trace only for global COMDATs, because those 1121 * are relevant to the FUNCTIONS statement in the linker .DEF file. 1122 * This same logic should be in epilog(). 1123 */ 1124 if (config.flags & CFGtrace && 1125 (!(config.flags4 & CFG4allcomdat) || 1126 funcsym_p.Sclass == SC.comdat || 1127 funcsym_p.Sclass == SC.global || 1128 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 1129 ) 1130 ) 1131 { 1132 uint spalign = 0; 1133 int sz = cast(int)localsize; 1134 if (!enforcealign) 1135 { 1136 version (FRAMEPTR) 1137 sz += Para.size; 1138 else 1139 sz += Para.size + (needframe ? 0 : -REGSIZE); 1140 } 1141 if (STACKALIGN >= 16 && (sz & (STACKALIGN - 1))) 1142 spalign = STACKALIGN - (sz & (STACKALIGN - 1)); 1143 1144 if (spalign) 1145 { /* This could be avoided by moving the function call to after the 1146 * registers are saved. But I don't remember why the call is here 1147 * and not there. 1148 */ 1149 cod3_stackadj(cdbx, spalign); 1150 } 1151 1152 uint regsaved; 1153 prolog_trace(cdbx, farfunc, ®saved); 1154 1155 if (spalign) 1156 cod3_stackadj(cdbx, -spalign); 1157 useregs((ALLREGS | mBP | mES) & ~regsaved); 1158 } 1159 } 1160 1161 version (MARS) 1162 { 1163 if (usednteh & NTEHjmonitor) 1164 { Symbol *sthis; 1165 1166 for (SYMIDX si = 0; 1; si++) 1167 { assert(si < globsym.length); 1168 sthis = globsym[si]; 1169 if (strcmp(sthis.Sident.ptr,"this".ptr) == 0) 1170 break; 1171 } 1172 nteh_monitor_prolog(cdbx,sthis); 1173 EBPtoESP += 3 * 4; 1174 } 1175 } 1176 1177 cdb.append(cdbx); 1178 prolog_saveregs(cdb, topush, cfa_offset); 1179 1180 Lcont: 1181 1182 if (config.exe == EX_WIN64) 1183 { 1184 if (variadic(funcsym_p.Stype)) 1185 prolog_gen_win64_varargs(cdb); 1186 regm_t namedargs; 1187 prolog_loadparams(cdb, tyf, pushalloc, namedargs); 1188 return; 1189 } 1190 1191 prolog_ifunc2(cdb, tyf, tym, pushds); 1192 1193 static if (NTEXCEPTIONS == 2) 1194 { 1195 if (usednteh & NTEH_except) 1196 nteh_setsp(cdb, 0x89); // MOV __context[EBP].esp,ESP 1197 } 1198 1199 // Load register parameters off of the stack. Do not use 1200 // assignaddr(), as it will replace the stack reference with 1201 // the register! 1202 regm_t namedargs; 1203 prolog_loadparams(cdb, tyf, pushalloc, namedargs); 1204 1205 if (sv64) 1206 prolog_genvarargs(cdb, sv64, namedargs); 1207 1208 /* Alignment checks 1209 */ 1210 //assert(Auto.alignment <= STACKALIGN); 1211 //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0); 1212 } 1213 1214 /************************************ 1215 * Predicate for sorting auto symbols for qsort(). 1216 * Returns: 1217 * < 0 s1 goes farther from frame pointer 1218 * > 0 s1 goes nearer the frame pointer 1219 * = 0 no difference 1220 */ 1221 1222 @trusted 1223 extern (C) int 1224 autosort_cmp(scope const void *ps1, scope const void *ps2) 1225 { 1226 Symbol *s1 = *cast(Symbol **)ps1; 1227 Symbol *s2 = *cast(Symbol **)ps2; 1228 1229 /* Largest align size goes furthest away from frame pointer, 1230 * so they get allocated first. 1231 */ 1232 uint alignsize1 = Symbol_Salignsize(*s1); 1233 uint alignsize2 = Symbol_Salignsize(*s2); 1234 if (alignsize1 < alignsize2) 1235 return 1; 1236 else if (alignsize1 > alignsize2) 1237 return -1; 1238 1239 /* move variables nearer the frame pointer that have higher Sweights 1240 * because addressing mode is fewer bytes. Grouping together high Sweight 1241 * variables also may put them in the same cache 1242 */ 1243 if (s1.Sweight < s2.Sweight) 1244 return -1; 1245 else if (s1.Sweight > s2.Sweight) 1246 return 1; 1247 1248 /* More: 1249 * 1. put static arrays nearest the frame pointer, so buffer overflows 1250 * can't change other variable contents 1251 * 2. Do the coloring at the byte level to minimize stack usage 1252 */ 1253 return 0; 1254 } 1255 1256 /****************************** 1257 * Compute stack frame offsets for local variables. 1258 * that did not make it into registers. 1259 * Params: 1260 * symtab = function's symbol table 1261 * estimate = true for do estimate only, false for final 1262 */ 1263 @trusted 1264 void stackoffsets(ref symtab_t symtab, bool estimate) 1265 { 1266 //printf("stackoffsets() %s\n", funcsym_p.Sident.ptr); 1267 1268 Para.initialize(); // parameter offset 1269 Fast.initialize(); // SCfastpar offset 1270 Auto.initialize(); // automatic & register offset 1271 EEStack.initialize(); // for SCstack's 1272 1273 // Set if doing optimization of auto layout 1274 bool doAutoOpt = estimate && config.flags4 & CFG4optimized; 1275 1276 // Put autos in another array so we can do optimizations on the stack layout 1277 Symbol*[10] autotmp = void; 1278 Symbol **autos = null; 1279 if (doAutoOpt) 1280 { 1281 if (symtab.length <= autotmp.length) 1282 autos = autotmp.ptr; 1283 else 1284 { autos = cast(Symbol **)malloc(symtab.length * (*autos).sizeof); 1285 assert(autos); 1286 } 1287 } 1288 size_t autosi = 0; // number used in autos[] 1289 1290 for (int si = 0; si < symtab.length; si++) 1291 { Symbol *s = symtab[si]; 1292 1293 /* Don't allocate space for dead or zero size parameters 1294 */ 1295 switch (s.Sclass) 1296 { 1297 case SC.fastpar: 1298 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh)) 1299 goto Ldefault; // don't need consistent stack frame 1300 break; 1301 1302 case SC.parameter: 1303 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty))) 1304 { 1305 Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary 1306 s.Soffset = Para.offset; 1307 continue; 1308 } 1309 break; // allocate even if it's dead 1310 1311 case SC.shadowreg: 1312 break; // allocate even if it's dead 1313 1314 default: 1315 Ldefault: 1316 if (Symbol_Sisdead(*s, anyiasm)) 1317 continue; // don't allocate space 1318 break; 1319 } 1320 1321 targ_size_t sz = type_size(s.Stype); 1322 if (sz == 0) 1323 sz++; // can't handle 0 length structs 1324 1325 uint alignsize = Symbol_Salignsize(*s); 1326 if (alignsize > STACKALIGN) 1327 alignsize = STACKALIGN; // no point if the stack is less aligned 1328 1329 //printf("symbol '%s', size = %d, alignsize = %d, read = %x\n",s.Sident.ptr, cast(int)sz, cast(int)alignsize, s.Sflags & SFLread); 1330 assert(cast(int)sz >= 0); 1331 1332 switch (s.Sclass) 1333 { 1334 case SC.fastpar: 1335 /* Get these 1336 * right next to the stack frame pointer, EBP. 1337 * Needed so we can call nested contract functions 1338 * frequire and fensure. 1339 */ 1340 if (s.Sfl == FLreg) // if allocated in register 1341 continue; 1342 /* Needed because storing fastpar's on the stack in prolog() 1343 * does the entire register 1344 */ 1345 if (sz < REGSIZE) 1346 sz = REGSIZE; 1347 1348 Fast.offset = _align(sz,Fast.offset); 1349 s.Soffset = Fast.offset; 1350 Fast.offset += sz; 1351 //printf("fastpar '%s' sz = %d, fast offset = x%x, %p\n", s.Sident, cast(int) sz, cast(int) s.Soffset, s); 1352 1353 if (alignsize > Fast.alignment) 1354 Fast.alignment = alignsize; 1355 break; 1356 1357 case SC.register: 1358 case SC.auto_: 1359 if (s.Sfl == FLreg) // if allocated in register 1360 break; 1361 1362 if (doAutoOpt) 1363 { autos[autosi++] = s; // deal with later 1364 break; 1365 } 1366 1367 Auto.offset = _align(sz,Auto.offset); 1368 s.Soffset = Auto.offset; 1369 Auto.offset += sz; 1370 //printf("auto '%s' sz = %d, auto offset = x%lx\n", s.Sident,sz, cast(long) s.Soffset); 1371 1372 if (alignsize > Auto.alignment) 1373 Auto.alignment = alignsize; 1374 break; 1375 1376 case SC.stack: 1377 EEStack.offset = _align(sz,EEStack.offset); 1378 s.Soffset = EEStack.offset; 1379 //printf("EEStack.offset = x%lx\n",cast(long)s.Soffset); 1380 EEStack.offset += sz; 1381 break; 1382 1383 case SC.shadowreg: 1384 case SC.parameter: 1385 if (config.exe == EX_WIN64) 1386 { 1387 assert((Para.offset & 7) == 0); 1388 s.Soffset = Para.offset; 1389 Para.offset += 8; 1390 break; 1391 } 1392 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general, 1393 * but are 4 byte aligned on the OSX 32 stack. 1394 */ 1395 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */ 1396 if (alignsize >= 16 && 1397 (I64 || (config.exe == EX_OSX && 1398 (tyaggregate(s.ty()) || tyvector(s.ty()))))) 1399 Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1); 1400 s.Soffset = Para.offset; 1401 //printf("%s param offset = x%lx, alignsize = %d\n", s.Sident, cast(long) s.Soffset, cast(int) alignsize); 1402 Para.offset += (s.Sflags & SFLdouble) 1403 ? type_size(tstypes[TYdouble]) // float passed as double 1404 : type_size(s.Stype); 1405 break; 1406 1407 case SC.pseudo: 1408 case SC.static_: 1409 case SC.bprel: 1410 break; 1411 default: 1412 symbol_print(s); 1413 assert(0); 1414 } 1415 } 1416 1417 if (autosi) 1418 { 1419 qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp); 1420 1421 vec_t tbl = vec_calloc(autosi); 1422 1423 for (size_t si = 0; si < autosi; si++) 1424 { 1425 Symbol *s = autos[si]; 1426 1427 targ_size_t sz = type_size(s.Stype); 1428 if (sz == 0) 1429 sz++; // can't handle 0 length structs 1430 1431 uint alignsize = Symbol_Salignsize(*s); 1432 if (alignsize > STACKALIGN) 1433 alignsize = STACKALIGN; // no point if the stack is less aligned 1434 1435 /* See if we can share storage with another variable 1436 * if their live ranges do not overlap. 1437 */ 1438 if (// Don't share because could stomp on variables 1439 // used in finally blocks 1440 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) && 1441 s.Srange && !(s.Sflags & SFLspill)) 1442 { 1443 for (size_t i = 0; i < si; i++) 1444 { 1445 if (!vec_testbit(i,tbl)) 1446 continue; 1447 Symbol *sp = autos[i]; 1448 //printf("auto s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange)); 1449 if (vec_disjoint(s.Srange,sp.Srange) && 1450 !(sp.Soffset & (alignsize - 1)) && 1451 sz <= type_size(sp.Stype)) 1452 { 1453 vec_or(sp.Srange,sp.Srange,s.Srange); 1454 //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident); 1455 s.Soffset = sp.Soffset; 1456 goto L2; 1457 } 1458 } 1459 } 1460 Auto.offset = _align(sz,Auto.offset); 1461 s.Soffset = Auto.offset; 1462 //printf("auto '%s' sz = %d, auto offset = x%lx\n", s.Sident, sz, cast(long) s.Soffset); 1463 Auto.offset += sz; 1464 if (s.Srange && !(s.Sflags & SFLspill)) 1465 vec_setbit(si,tbl); 1466 1467 if (alignsize > Auto.alignment) 1468 Auto.alignment = alignsize; 1469 L2: { } 1470 } 1471 1472 vec_free(tbl); 1473 1474 if (autos != autotmp.ptr) 1475 free(autos); 1476 } 1477 } 1478 1479 /**************************** 1480 * Generate code for a block. 1481 */ 1482 1483 @trusted 1484 private void blcodgen(block *bl) 1485 { 1486 regm_t mfuncregsave = mfuncreg; 1487 1488 //dbg_printf("blcodgen(%p)\n",bl); 1489 1490 /* Determine existing immediate values in registers by ANDing 1491 together the values from all the predecessors of b. 1492 */ 1493 assert(bl.Bregcon.immed.mval == 0); 1494 regcon.immed.mval = 0; // assume no previous contents in registers 1495 // regcon.cse.mval = 0; 1496 foreach (bpl; ListRange(bl.Bpred)) 1497 { 1498 block *bp = list_block(bpl); 1499 1500 if (bpl == bl.Bpred) 1501 { regcon.immed = bp.Bregcon.immed; 1502 regcon.params = bp.Bregcon.params; 1503 // regcon.cse = bp.Bregcon.cse; 1504 } 1505 else 1506 { 1507 int i; 1508 1509 regcon.params &= bp.Bregcon.params; 1510 if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0) 1511 // Actual values must match, too 1512 for (i = 0; i < REGMAX; i++) 1513 { 1514 if (regcon.immed.value[i] != bp.Bregcon.immed.value[i]) 1515 regcon.immed.mval &= ~mask(i); 1516 } 1517 } 1518 } 1519 regcon.cse.mops &= regcon.cse.mval; 1520 1521 // Set regcon.mvar according to what variables are in registers for this block 1522 CodeBuilder cdb; cdb.ctor(); 1523 regcon.mvar = 0; 1524 regcon.mpvar = 0; 1525 regcon.indexregs = 1; 1526 int anyspill = 0; 1527 char *sflsave = null; 1528 if (config.flags4 & CFG4optimized) 1529 { 1530 CodeBuilder cdbload; cdbload.ctor(); 1531 CodeBuilder cdbstore; cdbstore.ctor(); 1532 1533 sflsave = cast(char *) alloca(globsym.length * char.sizeof); 1534 for (SYMIDX i = 0; i < globsym.length; i++) 1535 { 1536 Symbol *s = globsym[i]; 1537 1538 sflsave[i] = s.Sfl; 1539 if (regParamInPreg(s) && 1540 regcon.params & s.Spregm() && 1541 vec_testbit(dfoidx,s.Srange)) 1542 { 1543 // regcon.used |= s.Spregm(); 1544 } 1545 1546 if (s.Sfl == FLreg) 1547 { 1548 if (vec_testbit(dfoidx,s.Srange)) 1549 { 1550 regcon.mvar |= s.Sregm; 1551 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 1552 regcon.mpvar |= s.Sregm; 1553 } 1554 } 1555 else if (s.Sflags & SFLspill) 1556 { 1557 if (vec_testbit(dfoidx,s.Srange)) 1558 { 1559 anyspill = cast(int)(i + 1); 1560 cgreg_spillreg_prolog(bl,s,cdbstore,cdbload); 1561 if (vec_testbit(dfoidx,s.Slvreg)) 1562 { 1563 s.Sfl = FLreg; 1564 regcon.mvar |= s.Sregm; 1565 regcon.cse.mval &= ~s.Sregm; 1566 regcon.immed.mval &= ~s.Sregm; 1567 regcon.params &= ~s.Sregm; 1568 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 1569 regcon.mpvar |= s.Sregm; 1570 } 1571 } 1572 } 1573 } 1574 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 1575 { 1576 cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval); 1577 } 1578 cdb.append(cdbstore); 1579 cdb.append(cdbload); 1580 mfuncreg &= ~regcon.mvar; // use these registers 1581 regcon.used |= regcon.mvar; 1582 1583 // Determine if we have more than 1 uncommitted index register 1584 regcon.indexregs = IDXREGS & ~regcon.mvar; 1585 regcon.indexregs &= regcon.indexregs - 1; 1586 } 1587 1588 /* This doesn't work when calling the BC_finally function, 1589 * as it is one block calling another. 1590 */ 1591 //regsave.idx = 0; 1592 1593 reflocal = 0; 1594 int refparamsave = refparam; 1595 refparam = 0; 1596 assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops); 1597 1598 outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave); 1599 bl.Bcode = cdb.finish(); 1600 1601 for (int i = 0; i < anyspill; i++) 1602 { 1603 Symbol *s = globsym[i]; 1604 s.Sfl = sflsave[i]; // undo block register assignments 1605 } 1606 1607 if (reflocal) 1608 bl.Bflags |= BFLreflocal; 1609 if (refparam) 1610 bl.Bflags |= BFLrefparam; 1611 refparam |= refparamsave; 1612 bl.Bregcon.immed = regcon.immed; 1613 bl.Bregcon.cse = regcon.cse; 1614 bl.Bregcon.used = regcon.used; 1615 bl.Bregcon.params = regcon.params; 1616 1617 debug 1618 debugw && printf("code gen complete\n"); 1619 } 1620 1621 /***************************************** 1622 * Add in exception handling code. 1623 */ 1624 1625 version (SCPP) 1626 { 1627 1628 private void cgcod_eh() 1629 { 1630 list_t stack; 1631 int idx; 1632 int tryidx; 1633 1634 if (!(usednteh & (EHtry | EHcleanup))) 1635 return; 1636 1637 // Compute Bindex for each block 1638 for (block *b = startblock; b; b = b.Bnext) 1639 { 1640 b.Bindex = -1; 1641 b.Bflags &= ~BFLvisited; /* mark as unvisited */ 1642 } 1643 block *btry = null; 1644 int lastidx = 0; 1645 startblock.Bindex = 0; 1646 for (block *b = startblock; b; b = b.Bnext) 1647 { 1648 if (btry == b.Btry && b.BC == BCcatch) // if don't need to pop try block 1649 { 1650 block *br = list_block(b.Bpred); // find corresponding try block 1651 assert(br.BC == BCtry); 1652 b.Bindex = br.Bindex; 1653 } 1654 else if (btry != b.Btry && b.BC != BCcatch || 1655 !(b.Bflags & BFLvisited)) 1656 b.Bindex = lastidx; 1657 b.Bflags |= BFLvisited; 1658 1659 debug 1660 if (debuge) 1661 { 1662 printf("%s block (%p) Btry=%p Bindex=%d\n",bc_str(b.BC),b,b.Btry,b.Bindex); 1663 } 1664 1665 except_index_set(b.Bindex); 1666 if (btry != b.Btry) // exited previous try block 1667 { 1668 except_pop(b,null,btry); 1669 btry = b.Btry; 1670 } 1671 if (b.BC == BCtry) 1672 { 1673 except_push(b,null,b); 1674 btry = b; 1675 tryidx = except_index_get(); 1676 CodeBuilder cdb; cdb.ctor(); 1677 nteh_gensindex(cdb,tryidx - 1); 1678 cdb.append(b.Bcode); 1679 b.Bcode = cdb.finish(); 1680 } 1681 1682 stack = null; 1683 for (code *c = b.Bcode; c; c = code_next(c)) 1684 { 1685 if ((c.Iop & ESCAPEmask) == ESCAPE) 1686 { 1687 code *c1 = null; 1688 switch (c.Iop & 0xFFFF00) 1689 { 1690 case ESCctor: 1691 //printf("ESCctor\n"); 1692 except_push(c,c.IEV1.Vtor,null); 1693 goto L1; 1694 1695 case ESCdtor: 1696 //printf("ESCdtor\n"); 1697 except_pop(c,c.IEV1.Vtor,null); 1698 L1: if (config.exe == EX_WIN32) 1699 { 1700 CodeBuilder cdb; cdb.ctor(); 1701 nteh_gensindex(cdb,except_index_get() - 1); 1702 c1 = cdb.finish(); 1703 c1.next = code_next(c); 1704 c.next = c1; 1705 } 1706 break; 1707 1708 case ESCmark: 1709 //printf("ESCmark\n"); 1710 idx = except_index_get(); 1711 list_prependdata(&stack,idx); 1712 except_mark(); 1713 break; 1714 1715 case ESCrelease: 1716 //printf("ESCrelease\n"); 1717 version (SCPP) 1718 { 1719 idx = list_data(stack); 1720 list_pop(&stack); 1721 if (idx != except_index_get()) 1722 { 1723 if (config.exe == EX_WIN32) 1724 { 1725 CodeBuilder cdb; cdb.ctor(); 1726 nteh_gensindex(cdb,idx - 1); 1727 c1 = cdb.finish(); 1728 c1.next = code_next(c); 1729 c.next = c1; 1730 } 1731 else 1732 { except_pair_append(c,idx - 1); 1733 c.Iop = ESCAPE | ESCoffset; 1734 } 1735 } 1736 except_release(); 1737 } 1738 break; 1739 1740 case ESCmark2: 1741 //printf("ESCmark2\n"); 1742 except_mark(); 1743 break; 1744 1745 case ESCrelease2: 1746 //printf("ESCrelease2\n"); 1747 version (SCPP) 1748 { 1749 except_release(); 1750 } 1751 break; 1752 1753 default: 1754 break; 1755 } 1756 } 1757 } 1758 assert(stack == null); 1759 b.Bendindex = except_index_get(); 1760 1761 if (b.BC != BCret && b.BC != BCretexp) 1762 lastidx = b.Bendindex; 1763 1764 // Set starting index for each of the successors 1765 int i = 0; 1766 foreach (bl; ListRange(b.Bsucc)) 1767 { 1768 block *bs = list_block(bl); 1769 if (b.BC == BCtry) 1770 { 1771 switch (i) 1772 { 1773 case 0: // block after catches 1774 bs.Bindex = b.Bendindex; 1775 break; 1776 1777 case 1: // 1st catch block 1778 bs.Bindex = tryidx; 1779 break; 1780 1781 default: // subsequent catch blocks 1782 bs.Bindex = b.Bindex; 1783 break; 1784 } 1785 1786 debug 1787 if (debuge) 1788 { 1789 printf(" 1setting %p to %d\n",bs,bs.Bindex); 1790 } 1791 } 1792 else if (!(bs.Bflags & BFLvisited)) 1793 { 1794 bs.Bindex = b.Bendindex; 1795 1796 debug 1797 if (debuge) 1798 { 1799 printf(" 2setting %p to %d\n",bs,bs.Bindex); 1800 } 1801 } 1802 bs.Bflags |= BFLvisited; 1803 i++; 1804 } 1805 } 1806 1807 if (config.exe == EX_WIN32) 1808 for (block *b = startblock; b; b = b.Bnext) 1809 { 1810 if (/*!b.Bcount ||*/ b.BC == BCtry) 1811 continue; 1812 foreach (bl; ListRange(b.Bpred)) 1813 { 1814 int pi = list_block(bl).Bendindex; 1815 if (b.Bindex != pi) 1816 { 1817 CodeBuilder cdb; cdb.ctor(); 1818 nteh_gensindex(cdb,b.Bindex - 1); 1819 cdb.append(b.Bcode); 1820 b.Bcode = cdb.finish(); 1821 break; 1822 } 1823 } 1824 } 1825 } 1826 1827 } 1828 1829 /****************************** 1830 * Given a register mask, find and return the number 1831 * of the first register that fits. 1832 */ 1833 1834 @trusted 1835 reg_t findreg(regm_t regm) 1836 { 1837 return findreg(regm, __LINE__, __FILE__); 1838 } 1839 1840 @trusted 1841 reg_t findreg(regm_t regm, int line, const(char)* file) 1842 { 1843 debug 1844 regm_t regmsave = regm; 1845 1846 reg_t i = 0; 1847 while (1) 1848 { 1849 if (!(regm & 0xF)) 1850 { 1851 regm >>= 4; 1852 i += 4; 1853 if (!regm) 1854 break; 1855 } 1856 if (regm & 1) 1857 return i; 1858 regm >>= 1; 1859 i++; 1860 } 1861 1862 debug 1863 printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr); 1864 fflush(stdout); 1865 1866 // *(char*)0=0; 1867 assert(0); 1868 } 1869 1870 /*************** 1871 * Free element (but not its leaves! (assume they are already freed)) 1872 * Don't decrement Ecount! This is so we can detect if the common subexp 1873 * has already been evaluated. 1874 * If common subexpression is not required anymore, eliminate 1875 * references to it. 1876 */ 1877 1878 @trusted 1879 void freenode(elem *e) 1880 { 1881 elem_debug(e); 1882 //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount); 1883 if (e.Ecomsub--) return; /* usage count */ 1884 if (e.Ecount) /* if it was a CSE */ 1885 { 1886 for (size_t i = 0; i < regcon.cse.value.length; i++) 1887 { 1888 if (regcon.cse.value[i] == e) /* if a register is holding it */ 1889 { 1890 regcon.cse.mval &= ~mask(cast(uint)i); 1891 regcon.cse.mops &= ~mask(cast(uint)i); /* free masks */ 1892 } 1893 } 1894 CSE.remove(e); 1895 } 1896 } 1897 1898 /********************************* 1899 * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode(). 1900 */ 1901 1902 @trusted 1903 private void resetEcomsub(elem *e) 1904 { 1905 while (1) 1906 { 1907 elem_debug(e); 1908 e.Ecomsub = e.Ecount; 1909 const op = e.Eoper; 1910 if (!OTleaf(op)) 1911 { 1912 if (OTbinary(op)) 1913 resetEcomsub(e.EV.E2); 1914 e = e.EV.E1; 1915 } 1916 else 1917 break; 1918 } 1919 } 1920 1921 /********************************* 1922 * Determine if elem e is a register variable. 1923 * If so: 1924 * *pregm = mask of registers that make up the variable 1925 * *preg = the least significant register 1926 * returns true 1927 * Else 1928 * returns false 1929 */ 1930 1931 @trusted 1932 int isregvar(elem *e,regm_t *pregm,reg_t *preg) 1933 { 1934 Symbol *s; 1935 uint u; 1936 regm_t m; 1937 regm_t regm; 1938 reg_t reg; 1939 1940 elem_debug(e); 1941 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 1942 { 1943 s = e.EV.Vsym; 1944 switch (s.Sfl) 1945 { 1946 case FLreg: 1947 if (s.Sclass == SC.parameter) 1948 { refparam = true; 1949 reflocal = true; 1950 } 1951 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw; 1952 regm = s.Sregm; 1953 //assert(tyreg(s.ty())); 1954 static if (0) 1955 { 1956 // Let's just see if there is a CSE in a reg we can use 1957 // instead. This helps avoid AGI's. 1958 if (e.Ecount && e.Ecount != e.Ecomsub) 1959 { int i; 1960 1961 for (i = 0; i < arraysize(regcon.cse.value); i++) 1962 { 1963 if (regcon.cse.value[i] == e) 1964 { reg = i; 1965 break; 1966 } 1967 } 1968 } 1969 } 1970 assert(regm & regcon.mvar && !(regm & ~regcon.mvar)); 1971 goto Lreg; 1972 1973 case FLpseudo: 1974 version (MARS) 1975 { 1976 u = s.Sreglsw; 1977 m = mask(u); 1978 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H 1979 { 1980 reg = u & 7; 1981 regm = m; 1982 goto Lreg; 1983 } 1984 } 1985 else 1986 { 1987 u = s.Sreglsw; 1988 m = pseudomask[u]; 1989 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H 1990 { 1991 reg = pseudoreg[u] & 7; 1992 regm = m; 1993 goto Lreg; 1994 } 1995 } 1996 break; 1997 1998 default: 1999 break; 2000 } 2001 } 2002 return false; 2003 2004 Lreg: 2005 if (preg) 2006 *preg = reg; 2007 if (pregm) 2008 *pregm = regm; 2009 return true; 2010 } 2011 2012 /********************************* 2013 * Allocate some registers. 2014 * Input: 2015 * pretregs Pointer to mask of registers to make selection from. 2016 * tym Mask of type we will store in registers. 2017 * Output: 2018 * *pretregs Mask of allocated registers. 2019 * *preg Register number of first allocated register. 2020 * msavereg,mfuncreg retregs bits are cleared. 2021 * regcon.cse.mval,regcon.cse.mops updated 2022 * Returns: 2023 * pointer to code generated if necessary to save any regcon.cse.mops on the 2024 * stack. 2025 */ 2026 2027 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym) 2028 { 2029 allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__); 2030 } 2031 2032 @trusted 2033 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym 2034 ,int line,const(char)* file) 2035 { 2036 reg_t reg; 2037 2038 static if (0) 2039 { 2040 if (pass == BackendPass.final_) 2041 { 2042 printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym %s\n", 2043 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval), 2044 regm_str(msavereg),regm_str(*pretregs),tym_str(tym)); 2045 } 2046 } 2047 tym = tybasic(tym); 2048 uint size = _tysize[tym]; 2049 *pretregs &= mES | allregs | XMMREGS; 2050 regm_t retregs = *pretregs; 2051 2052 debug if (retregs == 0) 2053 printf("allocreg: file %s(%d)\n", file, line); 2054 2055 if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars 2056 { 2057 if (size <= REGSIZE || (retregs & XMMREGS)) 2058 { 2059 *preg = findreg(retregs); 2060 assert(retregs == mask(*preg)); /* no more bits are set */ 2061 } 2062 else if (size <= 2 * REGSIZE) 2063 { 2064 *preg = findregmsw(retregs); 2065 assert(retregs & mLSW); 2066 } 2067 else 2068 assert(0); 2069 getregs(cdb,retregs); 2070 return; 2071 } 2072 int count = 0; 2073 L1: 2074 //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 2075 assert(++count < 20); /* fail instead of hanging if blocked */ 2076 assert(retregs); 2077 reg_t msreg = NOREG, lsreg = NOREG; /* no value assigned yet */ 2078 L3: 2079 //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 2080 regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params); 2081 if (!r) 2082 { 2083 r = retregs & ~(msavereg | regcon.cse.mval); 2084 if (!r) 2085 { 2086 r = retregs & ~(msavereg | regcon.cse.mops); 2087 if (!r) 2088 { r = retregs & ~msavereg; 2089 if (!r) 2090 r = retregs; 2091 } 2092 } 2093 } 2094 2095 if (size <= REGSIZE || retregs & XMMREGS) 2096 { 2097 if (r & ~mBP) 2098 r &= ~mBP; 2099 2100 // If only one index register, prefer to not use LSW registers 2101 if (!regcon.indexregs && r & ~mLSW) 2102 r &= ~mLSW; 2103 2104 if (pass == BackendPass.final_ && r & ~lastretregs && !I16) 2105 { // Try not to always allocate the same register, 2106 // to schedule better 2107 2108 r &= ~lastretregs; 2109 if (r & ~last2retregs) 2110 { 2111 r &= ~last2retregs; 2112 if (r & ~last3retregs) 2113 { 2114 r &= ~last3retregs; 2115 if (r & ~last4retregs) 2116 { 2117 r &= ~last4retregs; 2118 // if (r & ~last5retregs) 2119 // r &= ~last5retregs; 2120 } 2121 } 2122 } 2123 if (r & ~mfuncreg) 2124 r &= ~mfuncreg; 2125 } 2126 reg = findreg(r); 2127 retregs = mask(reg); 2128 } 2129 else if (size <= 2 * REGSIZE) 2130 { 2131 /* Select pair with both regs free. Failing */ 2132 /* that, select pair with one reg free. */ 2133 2134 if (r & mBP) 2135 { 2136 retregs &= ~mBP; 2137 goto L3; 2138 } 2139 2140 if (r & mMSW) 2141 { 2142 if (r & mDX) 2143 msreg = DX; /* prefer to use DX over CX */ 2144 else 2145 msreg = findregmsw(r); 2146 r &= mLSW; /* see if there's an LSW also */ 2147 if (r) 2148 lsreg = findreg(r); 2149 else if (lsreg == NOREG) /* if don't have LSW yet */ 2150 { 2151 retregs &= mLSW; 2152 goto L3; 2153 } 2154 } 2155 else 2156 { 2157 if (I64 && !(r & mLSW)) 2158 { 2159 retregs = *pretregs & (mMSW | mLSW); 2160 assert(retregs); 2161 goto L1; 2162 } 2163 lsreg = findreglsw(r); 2164 if (msreg == NOREG) 2165 { 2166 retregs &= mMSW; 2167 assert(retregs); 2168 goto L3; 2169 } 2170 } 2171 reg = (msreg == ES) ? lsreg : msreg; 2172 retregs = mask(msreg) | mask(lsreg); 2173 } 2174 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 2175 { 2176 debug 2177 if (retregs != DOUBLEREGS) 2178 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs)); 2179 2180 assert(retregs == DOUBLEREGS); 2181 reg = AX; 2182 } 2183 else 2184 { 2185 debug 2186 { 2187 printf("%s\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n", 2188 tym_str(tym),file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym); 2189 } 2190 assert(0); 2191 } 2192 if (retregs & regcon.mvar) // if conflict with reg vars 2193 { 2194 if (!(size > REGSIZE && *pretregs == (mAX | mDX))) 2195 { 2196 retregs = (*pretregs &= ~(retregs & regcon.mvar)); 2197 goto L1; // try other registers 2198 } 2199 } 2200 *preg = reg; 2201 *pretregs = retregs; 2202 2203 //printf("Allocating %s\n",regm_str(retregs)); 2204 last5retregs = last4retregs; 2205 last4retregs = last3retregs; 2206 last3retregs = last2retregs; 2207 last2retregs = lastretregs; 2208 lastretregs = retregs; 2209 getregs(cdb, retregs); 2210 } 2211 2212 2213 /***************************************** 2214 * Allocate a scratch register. 2215 * Params: 2216 * cdb = where to write any generated code to 2217 * regm = mask of registers to pick one from 2218 * Returns: 2219 * selected register 2220 */ 2221 @trusted 2222 reg_t allocScratchReg(ref CodeBuilder cdb, regm_t regm) 2223 { 2224 reg_t r; 2225 allocreg(cdb, ®m, &r, TYoffset); 2226 return r; 2227 } 2228 2229 2230 /****************************** 2231 * Determine registers that should be destroyed upon arrival 2232 * to code entry point for exception handling. 2233 */ 2234 @trusted 2235 regm_t lpadregs() 2236 { 2237 regm_t used; 2238 if (config.ehmethod == EHmethod.EH_DWARF) 2239 used = allregs & ~mfuncreg; 2240 else 2241 used = (I32 | I64) ? allregs : (ALLREGS | mES); 2242 //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg)); 2243 return used; 2244 } 2245 2246 2247 /************************* 2248 * Mark registers as used. 2249 */ 2250 2251 @trusted 2252 void useregs(regm_t regm) 2253 { 2254 //printf("useregs(x%x) %s\n", regm, regm_str(regm)); 2255 mfuncreg &= ~regm; 2256 regcon.used |= regm; // registers used in this block 2257 regcon.params &= ~regm; 2258 if (regm & regcon.mpvar) // if modified a fastpar register variable 2259 regcon.params = 0; // toss them all out 2260 } 2261 2262 /************************* 2263 * We are going to use the registers in mask r. 2264 * Generate any code necessary to save any regs. 2265 */ 2266 2267 @trusted 2268 void getregs(ref CodeBuilder cdb, regm_t r) 2269 { 2270 //printf("getregs(x%x) %s\n", r, regm_str(r)); 2271 regm_t ms = r & regcon.cse.mops; // mask of common subs we must save 2272 useregs(r); 2273 regcon.cse.mval &= ~r; 2274 msavereg &= ~r; // regs that are destroyed 2275 regcon.immed.mval &= ~r; 2276 if (ms) 2277 cse_save(cdb, ms); 2278 } 2279 2280 /************************* 2281 * We are going to use the registers in mask r. 2282 * Same as getregs(), but assert if code is needed to be generated. 2283 */ 2284 @trusted 2285 void getregsNoSave(regm_t r) 2286 { 2287 //printf("getregsNoSave(x%x) %s\n", r, regm_str(r)); 2288 assert(!(r & regcon.cse.mops)); // mask of common subs we must save 2289 useregs(r); 2290 regcon.cse.mval &= ~r; 2291 msavereg &= ~r; // regs that are destroyed 2292 regcon.immed.mval &= ~r; 2293 } 2294 2295 /***************************************** 2296 * Copy registers in cse.mops into memory. 2297 */ 2298 2299 @trusted 2300 private void cse_save(ref CodeBuilder cdb, regm_t ms) 2301 { 2302 assert((ms & regcon.cse.mops) == ms); 2303 regcon.cse.mops &= ~ms; 2304 2305 /* Skip CSEs that are already saved */ 2306 for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1) 2307 { 2308 if (regm & ms) 2309 { 2310 const e = regcon.cse.value[findreg(regm)]; 2311 const sz = tysize(e.Ety); 2312 foreach (const ref cse; CSE.filter(e)) 2313 { 2314 if (sz <= REGSIZE || 2315 sz <= 2 * REGSIZE && 2316 (regm & mMSW && cse.regm & mMSW || 2317 regm & mLSW && cse.regm & mLSW) || 2318 sz == 4 * REGSIZE && regm == cse.regm 2319 ) 2320 { 2321 ms &= ~regm; 2322 if (!ms) 2323 return; 2324 break; 2325 } 2326 } 2327 } 2328 } 2329 2330 while (ms) 2331 { 2332 auto cse = CSE.add(); 2333 reg_t reg = findreg(ms); /* the register to save */ 2334 cse.e = regcon.cse.value[reg]; 2335 cse.regm = mask(reg); 2336 2337 ms &= ~mask(reg); /* turn off reg bit in ms */ 2338 2339 // If we can simply reload the CSE, we don't need to save it 2340 if (cse_simple(&cse.csimple, cse.e)) 2341 cse.flags |= CSEsimple; 2342 else 2343 { 2344 CSE.updateSizeAndAlign(cse.e); 2345 gen_storecse(cdb, cse.e.Ety, reg, cse.slot); 2346 reflocal = true; 2347 } 2348 } 2349 } 2350 2351 /****************************************** 2352 * Getregs without marking immediate register values as gone. 2353 */ 2354 2355 @trusted 2356 void getregs_imm(ref CodeBuilder cdb, regm_t r) 2357 { 2358 regm_t save = regcon.immed.mval; 2359 getregs(cdb,r); 2360 regcon.immed.mval = save; 2361 } 2362 2363 /****************************************** 2364 * Flush all CSE's out of registers and into memory. 2365 * Input: 2366 * do87 !=0 means save 87 registers too 2367 */ 2368 2369 @trusted 2370 void cse_flush(ref CodeBuilder cdb, int do87) 2371 { 2372 //dbg_printf("cse_flush()\n"); 2373 cse_save(cdb,regcon.cse.mops); // save any CSEs to memory 2374 if (do87) 2375 save87(cdb); // save any 8087 temporaries 2376 } 2377 2378 /************************* 2379 * Common subexpressions exist in registers. Note this in regcon.cse.mval. 2380 * Input: 2381 * e the subexpression 2382 * regm mask of registers holding it 2383 * opsflag if != 0 then regcon.cse.mops gets set too 2384 * Returns: 2385 * false not saved as a CSE 2386 * true saved as a CSE 2387 */ 2388 2389 @trusted 2390 bool cssave(elem *e,regm_t regm,uint opsflag) 2391 { 2392 bool result = false; 2393 2394 /*if (e.Ecount && e.Ecount == e.Ecomsub)*/ 2395 if (e.Ecount && e.Ecomsub) 2396 { 2397 if (!opsflag && pass != BackendPass.final_ && (I32 || I64)) 2398 return false; 2399 2400 //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag); 2401 regm &= mBP | ALLREGS | mES | XMMREGS; /* just to be sure */ 2402 2403 /+ 2404 /* Do not register CSEs if they are register variables and */ 2405 /* are not operator nodes. This forces the register allocation */ 2406 /* to go through allocreg(), which will prevent using register */ 2407 /* variables for scratch. */ 2408 if (opsflag || !(regm & regcon.mvar)) 2409 +/ 2410 for (uint i = 0; regm; i++) 2411 { 2412 regm_t mi = mask(i); 2413 if (regm & mi) 2414 { 2415 regm &= ~mi; 2416 2417 // If we don't need this CSE, and the register already 2418 // holds a CSE that we do need, don't mark the new one 2419 if (regcon.cse.mval & mi && regcon.cse.value[i] != e && 2420 !opsflag && regcon.cse.mops & mi) 2421 continue; 2422 2423 regcon.cse.mval |= mi; 2424 if (opsflag) 2425 regcon.cse.mops |= mi; 2426 //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e); 2427 regcon.cse.value[i] = e; 2428 result = true; 2429 } 2430 } 2431 } 2432 return result; 2433 } 2434 2435 /************************************* 2436 * Determine if a computation should be done into a register. 2437 */ 2438 2439 @trusted 2440 bool evalinregister(elem *e) 2441 { 2442 if (config.exe == EX_WIN64 && e.Eoper == OPrelconst) 2443 return true; 2444 2445 if (e.Ecount == 0) /* elem is not a CSE, therefore */ 2446 /* we don't need to evaluate it */ 2447 /* in a register */ 2448 return false; 2449 if (!OTleaf(e.Eoper)) /* operators are always in register */ 2450 return true; 2451 2452 // Need to rethink this code if float or double can be CSE'd 2453 uint sz = tysize(e.Ety); 2454 if (e.Ecount == e.Ecomsub) /* elem is a CSE that needs */ 2455 /* to be generated */ 2456 { 2457 if ((I32 || I64) && 2458 //pass == BackendPass.final_ && // bug 8987 2459 sz <= REGSIZE) 2460 { 2461 // Do it only if at least 2 registers are available 2462 regm_t m = allregs & ~regcon.mvar; 2463 if (sz == 1) 2464 m &= BYTEREGS; 2465 if (m & (m - 1)) // if more than one register 2466 { // Need to be at least 3 registers available, as 2467 // addressing modes can use up 2. 2468 while (!(m & 1)) 2469 m >>= 1; 2470 m >>= 1; 2471 if (m & (m - 1)) 2472 return true; 2473 } 2474 } 2475 return false; 2476 } 2477 2478 /* Elem is now a CSE that might have been generated. If so, and */ 2479 /* it's in a register already, the computation should be done */ 2480 /* using that register. */ 2481 regm_t emask = 0; 2482 for (uint i = 0; i < regcon.cse.value.length; i++) 2483 if (regcon.cse.value[i] == e) 2484 emask |= mask(i); 2485 emask &= regcon.cse.mval; // mask of available CSEs 2486 if (sz <= REGSIZE) 2487 return emask != 0; /* the CSE is in a register */ 2488 else if (sz <= 2 * REGSIZE) 2489 return (emask & mMSW) && (emask & mLSW); 2490 return true; /* cop-out for now */ 2491 } 2492 2493 /******************************************************* 2494 * Return mask of scratch registers. 2495 */ 2496 2497 @trusted 2498 regm_t getscratch() 2499 { 2500 regm_t scratch = 0; 2501 if (pass == BackendPass.final_) 2502 { 2503 scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval | 2504 regcon.immed.mval | regcon.params | mfuncreg); 2505 } 2506 return scratch; 2507 } 2508 2509 /****************************** 2510 * Evaluate an elem that is a common subexp that has been encountered 2511 * before. 2512 * Look first to see if it is already in a register. 2513 */ 2514 2515 @trusted 2516 private void comsub(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2517 { 2518 tym_t tym; 2519 regm_t regm,emask; 2520 reg_t reg; 2521 uint byte_,sz; 2522 2523 //printf("comsub(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 2524 elem_debug(e); 2525 2526 debug 2527 { 2528 if (e.Ecomsub > e.Ecount) 2529 elem_print(e); 2530 } 2531 2532 assert(e.Ecomsub <= e.Ecount); 2533 2534 if (*pretregs == 0) // no possible side effects anyway 2535 { 2536 return; 2537 } 2538 2539 /* First construct a mask, emask, of all the registers that 2540 * have the right contents. 2541 */ 2542 emask = 0; 2543 for (uint i = 0; i < regcon.cse.value.length; i++) 2544 { 2545 //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]); 2546 if (regcon.cse.value[i] == e) // if contents are right 2547 emask |= mask(i); // turn on bit for reg 2548 } 2549 emask &= regcon.cse.mval; // make sure all bits are valid 2550 2551 if (emask & XMMREGS && *pretregs == mPSW) 2552 { } 2553 else if (tyxmmreg(e.Ety) && config.fpxmmregs) 2554 { 2555 if (*pretregs & (mST0 | mST01)) 2556 { 2557 regm_t retregs = *pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1; 2558 comsub(cdb, e, &retregs); 2559 fixresult(cdb,e,retregs,pretregs); 2560 return; 2561 } 2562 } 2563 else if (tyfloating(e.Ety) && config.inline8087) 2564 { 2565 comsub87(cdb,e,pretregs); 2566 return; 2567 } 2568 2569 2570 /* create mask of CSEs */ 2571 regm_t csemask = CSE.mask(e); 2572 csemask &= ~emask; // stuff already in registers 2573 2574 debug if (debugw) 2575 { 2576 printf("comsub(e=%p): *pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n", 2577 e,regm_str(*pretregs),regm_str(emask),regm_str(csemask), 2578 regm_str(regcon.cse.mval),regm_str(regcon.mvar)); 2579 if (regcon.cse.mval & 1) 2580 elem_print(regcon.cse.value[0]); 2581 } 2582 2583 tym = tybasic(e.Ety); 2584 sz = _tysize[tym]; 2585 byte_ = sz == 1; 2586 2587 if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register 2588 { 2589 /* First see if it is already in a correct register */ 2590 2591 regm = emask & *pretregs; 2592 if (regm == 0) 2593 regm = emask; /* try any other register */ 2594 if (regm) /* if it's in a register */ 2595 { 2596 if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (*pretregs & regcon.mvar) == *pretregs) 2597 { 2598 regm = mask(findreg(regm)); 2599 fixresult(cdb,e,regm,pretregs); 2600 return; 2601 } 2602 } 2603 2604 if (OTleaf(e.Eoper)) /* if not op or func */ 2605 goto reload; /* reload data */ 2606 2607 foreach (ref cse; CSE.filter(e)) 2608 { 2609 regm_t retregs; 2610 2611 if (cse.flags & CSEsimple) 2612 { 2613 retregs = *pretregs; 2614 if (byte_ && !(retregs & BYTEREGS)) 2615 retregs = BYTEREGS; 2616 else if (!(retregs & allregs)) 2617 retregs = allregs; 2618 allocreg(cdb,&retregs,®,tym); 2619 code *cr = &cse.csimple; 2620 cr.setReg(reg); 2621 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1) 2622 cr.Irex |= REX; 2623 cdb.gen(cr); 2624 goto L10; 2625 } 2626 else 2627 { 2628 reflocal = true; 2629 cse.flags |= CSEload; 2630 if (*pretregs == mPSW) // if result in CCs only 2631 { 2632 if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety))) 2633 { 2634 retregs = XMMREGS; 2635 allocreg(cdb,&retregs,®,tym); 2636 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2637 regcon.cse.mval |= mask(reg); // cs is in a reg 2638 regcon.cse.value[reg] = e; 2639 fixresult(cdb,e,retregs,pretregs); 2640 } 2641 else 2642 { 2643 // CMP cs[BP],0 2644 gen_testcse(cdb, cse.e.Ety, sz, cse.slot); 2645 } 2646 } 2647 else 2648 { 2649 retregs = *pretregs; 2650 if (byte_ && !(retregs & BYTEREGS)) 2651 retregs = BYTEREGS; 2652 allocreg(cdb,&retregs,®,tym); 2653 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2654 L10: 2655 regcon.cse.mval |= mask(reg); // cs is in a reg 2656 regcon.cse.value[reg] = e; 2657 fixresult(cdb,e,retregs,pretregs); 2658 } 2659 } 2660 return; 2661 } 2662 2663 debug 2664 { 2665 printf("couldn't find cse e = %p, pass = %d\n",e,pass); 2666 elem_print(e); 2667 } 2668 assert(0); /* should have found it */ 2669 } 2670 else /* reg pair is req'd */ 2671 if (sz <= 2 * REGSIZE) 2672 { 2673 reg_t msreg,lsreg; 2674 2675 /* see if we have both */ 2676 if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP))) 2677 { /* we don't have both */ 2678 debug if (!OTleaf(e.Eoper)) 2679 { 2680 printf("e = %p, op = x%x, emask = %s, csemask = %s\n", 2681 e,e.Eoper,regm_str(emask),regm_str(csemask)); 2682 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW); 2683 elem_print(e); 2684 } 2685 2686 assert(OTleaf(e.Eoper)); /* must have both for operators */ 2687 goto reload; 2688 } 2689 2690 /* Look for right vals in any regs */ 2691 regm = *pretregs & mMSW; 2692 if (emask & regm) 2693 msreg = findreg(emask & regm); 2694 else if (emask & mMSW) 2695 msreg = findregmsw(emask); 2696 else /* reload from cse array */ 2697 { 2698 if (!regm) 2699 regm = mMSW & ALLREGS; 2700 allocreg(cdb,®m,&msreg,TYint); 2701 loadcse(cdb,e,msreg,mMSW); 2702 } 2703 2704 regm = *pretregs & (mLSW | mBP); 2705 if (emask & regm) 2706 lsreg = findreg(emask & regm); 2707 else if (emask & (mLSW | mBP)) 2708 lsreg = findreglsw(emask); 2709 else 2710 { 2711 if (!regm) 2712 regm = mLSW; 2713 allocreg(cdb,®m,&lsreg,TYint); 2714 loadcse(cdb,e,lsreg,mLSW | mBP); 2715 } 2716 2717 regm = mask(msreg) | mask(lsreg); /* mask of result */ 2718 fixresult(cdb,e,regm,pretregs); 2719 return; 2720 } 2721 else if (tym == TYdouble || tym == TYdouble_alias) // double 2722 { 2723 assert(I16); 2724 if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16) 2725 { 2726 static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d 2727 for (reg = 0; reg != NOREG; reg = dblreg[reg]) 2728 { 2729 assert(cast(int) reg >= 0 && reg <= 7); 2730 if (mask(reg) & csemask) 2731 loadcse(cdb,e,reg,mask(reg)); 2732 } 2733 regm = DOUBLEREGS_16; 2734 fixresult(cdb,e,regm,pretregs); 2735 return; 2736 } 2737 if (OTleaf(e.Eoper)) goto reload; 2738 2739 debug 2740 printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask)); 2741 2742 assert(0); 2743 } 2744 else 2745 { 2746 debug 2747 printf("e = %p, tym = x%x\n",e,tym); 2748 2749 assert(0); 2750 } 2751 2752 reload: /* reload result from memory */ 2753 switch (e.Eoper) 2754 { 2755 case OPrelconst: 2756 cdrelconst(cdb,e,pretregs); 2757 break; 2758 2759 case OPgot: 2760 if (config.exe & EX_posix) 2761 { 2762 cdgot(cdb,e,pretregs); 2763 break; 2764 } 2765 goto default; 2766 2767 default: 2768 if (*pretregs == mPSW && 2769 config.fpxmmregs && 2770 (tyxmmreg(tym) || tysimd(tym))) 2771 { 2772 regm_t retregs = XMMREGS | mPSW; 2773 loaddata(cdb,e,&retregs); 2774 cssave(e,retregs,false); 2775 return; 2776 } 2777 loaddata(cdb,e,pretregs); 2778 break; 2779 } 2780 cssave(e,*pretregs,false); 2781 } 2782 2783 2784 /***************************** 2785 * Load reg from cse save area on stack. 2786 */ 2787 2788 @trusted 2789 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm) 2790 { 2791 foreach (ref cse; CSE.filter(e)) 2792 { 2793 //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm)); 2794 if (cse.regm & regm) 2795 { 2796 reflocal = true; 2797 cse.flags |= CSEload; /* it was loaded */ 2798 regcon.cse.value[reg] = e; 2799 regcon.cse.mval |= mask(reg); 2800 getregs(cdb,mask(reg)); 2801 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2802 return; 2803 } 2804 } 2805 debug 2806 { 2807 printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm)); 2808 elem_print(e); 2809 } 2810 assert(0); 2811 } 2812 2813 /*************************** 2814 * Generate code sequence for an elem. 2815 * Input: 2816 * pretregs = mask of possible registers to return result in 2817 * Note: longs are in AX,BX or CX,DX or SI,DI 2818 * doubles are AX,BX,CX,DX only 2819 * constflag = 1 for user of result will not modify the 2820 * registers returned in *pretregs. 2821 * 2 for freenode() not called. 2822 * Output: 2823 * *pretregs mask of registers result is returned in 2824 * Returns: 2825 * pointer to code sequence generated 2826 */ 2827 2828 @trusted 2829 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op) 2830 { 2831 (*cdxxx[op])(cdb,e,pretregs); 2832 } 2833 2834 // jump table 2835 private extern (C++) __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx = 2836 [ 2837 OPunde: &cderr, 2838 OPadd: &cdorth, 2839 OPmul: &cdmul, 2840 OPand: &cdorth, 2841 OPmin: &cdorth, 2842 OPnot: &cdnot, 2843 OPcom: &cdcom, 2844 OPcond: &cdcond, 2845 OPcomma: &cdcomma, 2846 OPremquo: &cddiv, 2847 OPdiv: &cddiv, 2848 OPmod: &cddiv, 2849 OPxor: &cdorth, 2850 OPstring: &cderr, 2851 OPrelconst: &cdrelconst, 2852 OPinp: &cdport, 2853 OPoutp: &cdport, 2854 OPasm: &cdasm, 2855 OPinfo: &cdinfo, 2856 OPdctor: &cddctor, 2857 OPddtor: &cdddtor, 2858 OPctor: &cdctor, 2859 OPdtor: &cddtor, 2860 OPmark: &cdmark, 2861 OPvoid: &cdvoid, 2862 OPhalt: &cdhalt, 2863 OPnullptr: &cderr, 2864 OPpair: &cdpair, 2865 OPrpair: &cdpair, 2866 2867 OPor: &cdorth, 2868 OPoror: &cdloglog, 2869 OPandand: &cdloglog, 2870 OProl: &cdshift, 2871 OPror: &cdshift, 2872 OPshl: &cdshift, 2873 OPshr: &cdshift, 2874 OPashr: &cdshift, 2875 OPbit: &cderr, 2876 OPind: &cdind, 2877 OPaddr: &cderr, 2878 OPneg: &cdneg, 2879 OPuadd: &cderr, 2880 OPabs: &cdabs, 2881 OPtoprec: &cdtoprec, 2882 OPsqrt: &cdneg, 2883 OPsin: &cdneg, 2884 OPcos: &cdneg, 2885 OPscale: &cdscale, 2886 OPyl2x: &cdscale, 2887 OPyl2xp1: &cdscale, 2888 OPcmpxchg: &cdcmpxchg, 2889 OPrint: &cdneg, 2890 OPrndtol: &cdrndtol, 2891 OPstrlen: &cdstrlen, 2892 OPstrcpy: &cdstrcpy, 2893 OPmemcpy: &cdmemcpy, 2894 OPmemset: &cdmemset, 2895 OPstrcat: &cderr, 2896 OPstrcmp: &cdstrcmp, 2897 OPmemcmp: &cdmemcmp, 2898 OPsetjmp: &cdsetjmp, 2899 OPnegass: &cdaddass, 2900 OPpreinc: &cderr, 2901 OPpredec: &cderr, 2902 OPstreq: &cdstreq, 2903 OPpostinc: &cdpost, 2904 OPpostdec: &cdpost, 2905 OPeq: &cdeq, 2906 OPaddass: &cdaddass, 2907 OPminass: &cdaddass, 2908 OPmulass: &cdmulass, 2909 OPdivass: &cddivass, 2910 OPmodass: &cddivass, 2911 OPshrass: &cdshass, 2912 OPashrass: &cdshass, 2913 OPshlass: &cdshass, 2914 OPandass: &cdaddass, 2915 OPxorass: &cdaddass, 2916 OPorass: &cdaddass, 2917 2918 OPle: &cdcmp, 2919 OPgt: &cdcmp, 2920 OPlt: &cdcmp, 2921 OPge: &cdcmp, 2922 OPeqeq: &cdcmp, 2923 OPne: &cdcmp, 2924 2925 OPunord: &cdcmp, 2926 OPlg: &cdcmp, 2927 OPleg: &cdcmp, 2928 OPule: &cdcmp, 2929 OPul: &cdcmp, 2930 OPuge: &cdcmp, 2931 OPug: &cdcmp, 2932 OPue: &cdcmp, 2933 OPngt: &cdcmp, 2934 OPnge: &cdcmp, 2935 OPnlt: &cdcmp, 2936 OPnle: &cdcmp, 2937 OPord: &cdcmp, 2938 OPnlg: &cdcmp, 2939 OPnleg: &cdcmp, 2940 OPnule: &cdcmp, 2941 OPnul: &cdcmp, 2942 OPnuge: &cdcmp, 2943 OPnug: &cdcmp, 2944 OPnue: &cdcmp, 2945 2946 OPvp_fp: &cdcnvt, 2947 OPcvp_fp: &cdcnvt, 2948 OPoffset: &cdlngsht, 2949 OPnp_fp: &cdshtlng, 2950 OPnp_f16p: &cdfar16, 2951 OPf16p_np: &cdfar16, 2952 2953 OPs16_32: &cdshtlng, 2954 OPu16_32: &cdshtlng, 2955 OPd_s32: &cdcnvt, 2956 OPb_8: &cdcnvt, 2957 OPs32_d: &cdcnvt, 2958 OPd_s16: &cdcnvt, 2959 OPs16_d: &cdcnvt, 2960 OPd_u16: &cdcnvt, 2961 OPu16_d: &cdcnvt, 2962 OPd_u32: &cdcnvt, 2963 OPu32_d: &cdcnvt, 2964 OP32_16: &cdlngsht, 2965 OPd_f: &cdcnvt, 2966 OPf_d: &cdcnvt, 2967 OPd_ld: &cdcnvt, 2968 OPld_d: &cdcnvt, 2969 OPc_r: &cdconvt87, 2970 OPc_i: &cdconvt87, 2971 OPu8_16: &cdbyteint, 2972 OPs8_16: &cdbyteint, 2973 OP16_8: &cdlngsht, 2974 OPu32_64: &cdshtlng, 2975 OPs32_64: &cdshtlng, 2976 OP64_32: &cdlngsht, 2977 OPu64_128: &cdshtlng, 2978 OPs64_128: &cdshtlng, 2979 OP128_64: &cdlngsht, 2980 OPmsw: &cdmsw, 2981 2982 OPd_s64: &cdcnvt, 2983 OPs64_d: &cdcnvt, 2984 OPd_u64: &cdcnvt, 2985 OPu64_d: &cdcnvt, 2986 OPld_u64: &cdcnvt, 2987 OPparam: &cderr, 2988 OPsizeof: &cderr, 2989 OParrow: &cderr, 2990 OParrowstar: &cderr, 2991 OPcolon: &cderr, 2992 OPcolon2: &cderr, 2993 OPbool: &cdnot, 2994 OPcall: &cdfunc, 2995 OPucall: &cdfunc, 2996 OPcallns: &cdfunc, 2997 OPucallns: &cdfunc, 2998 OPstrpar: &cderr, 2999 OPstrctor: &cderr, 3000 OPstrthis: &cdstrthis, 3001 OPconst: &cderr, 3002 OPvar: &cderr, 3003 OPnew: &cderr, 3004 OPanew: &cderr, 3005 OPdelete: &cderr, 3006 OPadelete: &cderr, 3007 OPbrack: &cderr, 3008 OPframeptr: &cdframeptr, 3009 OPgot: &cdgot, 3010 3011 OPbsf: &cdbscan, 3012 OPbsr: &cdbscan, 3013 OPbtst: &cdbtst, 3014 OPbt: &cdbt, 3015 OPbtc: &cdbt, 3016 OPbtr: &cdbt, 3017 OPbts: &cdbt, 3018 3019 OPbswap: &cdbswap, 3020 OPpopcnt: &cdpopcnt, 3021 OPvector: &cdvector, 3022 OPvecsto: &cdvecsto, 3023 OPvecfill: &cdvecfill, 3024 OPva_start: &cderr, 3025 OPprefetch: &cdprefetch, 3026 ]; 3027 3028 3029 @trusted 3030 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag) 3031 { 3032 Symbol *s; 3033 3034 debug if (debugw) 3035 { 3036 printf("+codelem(e=%p,*pretregs=%s) %s ",e,regm_str(*pretregs),oper_str(e.Eoper)); 3037 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3038 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3039 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 3040 } 3041 3042 assert(e); 3043 elem_debug(e); 3044 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 3045 { 3046 debug 3047 { 3048 printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs)); 3049 elem_print(e); 3050 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3051 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3052 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 3053 } 3054 assert(0); 3055 } 3056 3057 if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar) 3058 *pretregs &= ~regcon.mvar; /* can't use register vars */ 3059 3060 uint op = e.Eoper; 3061 if (e.Ecount && e.Ecount != e.Ecomsub) // if common subexp 3062 { 3063 comsub(cdb,e,pretregs); 3064 goto L1; 3065 } 3066 3067 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 3068 cdb.genlinnum(e.Esrcpos); 3069 3070 switch (op) 3071 { 3072 default: 3073 if (e.Ecount) /* if common subexp */ 3074 { 3075 /* if no return value */ 3076 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0) 3077 { 3078 if (*pretregs & (mST0 | mST01)) 3079 { 3080 //printf("generate ST0 comsub for:\n"); 3081 //elem_print(e); 3082 3083 regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1; 3084 (*cdxxx[op])(cdb,e,&retregs); 3085 cssave(e,retregs,!OTleaf(op)); 3086 fixresult(cdb, e, retregs, pretregs); 3087 goto L1; 3088 } 3089 if (tysize(e.Ety) == 1) 3090 *pretregs |= BYTEREGS; 3091 else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs) 3092 *pretregs |= XMMREGS; 3093 else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias) 3094 *pretregs |= DOUBLEREGS; 3095 else 3096 *pretregs |= ALLREGS; /* make one */ 3097 } 3098 3099 /* BUG: For CSEs, make sure we have both an MSW */ 3100 /* and an LSW specified in *pretregs */ 3101 } 3102 assert(op <= OPMAX); 3103 (*cdxxx[op])(cdb,e,pretregs); 3104 break; 3105 3106 case OPrelconst: 3107 cdrelconst(cdb,e,pretregs); 3108 break; 3109 3110 case OPvar: 3111 if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg && 3112 (s.Sregm & *pretregs) == s.Sregm) 3113 { 3114 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE) 3115 *pretregs &= mPSW | (s.Sregm & mLSW); 3116 else 3117 *pretregs &= mPSW | s.Sregm; 3118 } 3119 goto case OPconst; 3120 3121 case OPconst: 3122 if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile)) 3123 { 3124 switch (tybasic(e.Ety)) 3125 { 3126 case TYbool: 3127 case TYchar: 3128 case TYschar: 3129 case TYuchar: 3130 *pretregs |= BYTEREGS; 3131 break; 3132 3133 case TYnref: 3134 case TYnptr: 3135 case TYsptr: 3136 case TYcptr: 3137 case TYfgPtr: 3138 case TYimmutPtr: 3139 case TYsharePtr: 3140 case TYrestrictPtr: 3141 *pretregs |= I16 ? IDXREGS : ALLREGS; 3142 break; 3143 3144 case TYshort: 3145 case TYushort: 3146 case TYint: 3147 case TYuint: 3148 case TYlong: 3149 case TYulong: 3150 case TYllong: 3151 case TYullong: 3152 case TYcent: 3153 case TYucent: 3154 case TYfptr: 3155 case TYhptr: 3156 case TYvptr: 3157 *pretregs |= ALLREGS; 3158 break; 3159 3160 default: 3161 break; 3162 } 3163 } 3164 loaddata(cdb,e,pretregs); 3165 break; 3166 } 3167 cssave(e,*pretregs,!OTleaf(op)); 3168 L1: 3169 if (!(constflag & 2)) 3170 freenode(e); 3171 3172 debug if (debugw) 3173 { 3174 printf("-codelem(e=%p,*pretregs=%s) %s ",e,regm_str(*pretregs), oper_str(op)); 3175 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3176 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3177 } 3178 } 3179 3180 /******************************* 3181 * Same as codelem(), but do not destroy the registers in keepmsk. 3182 * Use scratch registers as much as possible, then use stack. 3183 * Input: 3184 * constflag true if user of result will not modify the 3185 * registers returned in *pretregs. 3186 */ 3187 3188 @trusted 3189 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag) 3190 { 3191 regm_t touse; 3192 3193 debug if (debugw) 3194 printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3195 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3196 3197 elem_debug(e); 3198 if (constflag) 3199 { 3200 regm_t regm; 3201 reg_t reg; 3202 3203 if (isregvar(e,®m,®) && // if e is a register variable 3204 (regm & *pretregs) == regm && // in one of the right regs 3205 e.EV.Voffset == 0 3206 ) 3207 { 3208 uint sz1 = tysize(e.Ety); 3209 uint sz2 = tysize(e.EV.Vsym.Stype.Tty); 3210 if (sz1 <= REGSIZE && sz2 > REGSIZE) 3211 regm &= mLSW | XMMREGS; 3212 fixresult(cdb,e,regm,pretregs); 3213 cssave(e,regm,0); 3214 freenode(e); 3215 3216 debug if (debugw) 3217 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3218 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3219 3220 return; 3221 } 3222 } 3223 regm_t overlap = msavereg & keepmsk; 3224 msavereg |= keepmsk; /* add to mask of regs to save */ 3225 regm_t oldregcon = regcon.cse.mval; 3226 regm_t oldregimmed = regcon.immed.mval; 3227 regm_t oldmfuncreg = mfuncreg; /* remember old one */ 3228 mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar; 3229 uint stackpushsave = stackpush; 3230 char calledafuncsave = calledafunc; 3231 calledafunc = 0; 3232 CodeBuilder cdbx; cdbx.ctor(); 3233 codelem(cdbx,e,pretregs,constflag); // generate code for the elem 3234 3235 regm_t tosave = keepmsk & ~msavereg; /* registers to save */ 3236 if (tosave) 3237 { 3238 cgstate.stackclean++; 3239 genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg); 3240 cgstate.stackclean--; 3241 } 3242 3243 /* Assert that no new CSEs are generated that are not reflected */ 3244 /* in mfuncreg. */ 3245 debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0) 3246 printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n", 3247 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar)); 3248 3249 assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0); 3250 3251 /* bugzilla 3521 3252 * The problem is: 3253 * reg op (reg = exp) 3254 * where reg must be preserved (in keepregs) while the expression to be evaluated 3255 * must change it. 3256 * The only solution is to make this variable not a register. 3257 */ 3258 if (regcon.mvar & tosave) 3259 { 3260 //elem_print(e); 3261 //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave)); 3262 cgreg_unregister(regcon.mvar & tosave); 3263 } 3264 3265 /* which registers can we use to save other registers in? */ 3266 if (config.flags4 & CFG4space || // if optimize for space 3267 config.target_cpu >= TARGET_80486) // PUSH/POP ops are 1 cycle 3268 touse = 0; // PUSH/POP pairs are always shorter 3269 else 3270 { 3271 touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval); 3272 /* Don't use registers we'll have to save/restore */ 3273 touse &= ~(fregsaved & oldmfuncreg); 3274 /* Don't use registers that have constant values in them, since 3275 the code generated might have used the value. 3276 */ 3277 touse &= ~oldregimmed; 3278 } 3279 3280 CodeBuilder cdbs1; cdbs1.ctor(); 3281 code *cs2 = null; 3282 int adjesp = 0; 3283 3284 for (uint i = 0; tosave; i++) 3285 { 3286 regm_t mi = mask(i); 3287 3288 assert(i < REGMAX); 3289 if (mi & tosave) /* i = register to save */ 3290 { 3291 if (touse) /* if any scratch registers */ 3292 { 3293 uint j; 3294 for (j = 0; j < 8; j++) 3295 { 3296 regm_t mj = mask(j); 3297 3298 if (touse & mj) 3299 { 3300 genmovreg(cdbs1,j,i); 3301 cs2 = cat(genmovreg(i,j),cs2); 3302 touse &= ~mj; 3303 mfuncreg &= ~mj; 3304 regcon.used |= mj; 3305 break; 3306 } 3307 } 3308 assert(j < 8); 3309 } 3310 else // else use memory 3311 { 3312 CodeBuilder cdby; cdby.ctor(); 3313 uint size = gensaverestore(mask(i), cdbs1, cdby); 3314 cs2 = cat(cdby.finish(),cs2); 3315 if (size) 3316 { 3317 stackchanged = 1; 3318 adjesp += size; 3319 } 3320 } 3321 getregs(cdbx,mi); 3322 tosave &= ~mi; 3323 } 3324 } 3325 CodeBuilder cdbs2; cdbs2.ctor(); 3326 if (adjesp) 3327 { 3328 // If this is done an odd number of times, it 3329 // will throw off the 8 byte stack alignment. 3330 // We should *only* worry about this if a function 3331 // was called in the code generation by codelem(). 3332 int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1); 3333 if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 3334 { 3335 regm_t mval_save = regcon.immed.mval; 3336 regcon.immed.mval = 0; // prevent reghasvalue() optimizations 3337 // because c hasn't been executed yet 3338 cod3_stackadj(cdbs1, sz); 3339 regcon.immed.mval = mval_save; 3340 cdbs1.genadjesp(sz); 3341 3342 cod3_stackadj(cdbs2, -sz); 3343 cdbs2.genadjesp(-sz); 3344 } 3345 cdbs2.append(cs2); 3346 3347 3348 cdbs1.genadjesp(adjesp); 3349 cdbs2.genadjesp(-adjesp); 3350 } 3351 else 3352 cdbs2.append(cs2); 3353 3354 calledafunc |= calledafuncsave; 3355 msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save */ 3356 mfuncreg &= oldmfuncreg; /* update original */ 3357 3358 debug if (debugw) 3359 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3360 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3361 3362 cdb.append(cdbs1); 3363 cdb.append(cdbx); 3364 cdb.append(cdbs2); 3365 return; 3366 } 3367 3368 /********************************************* 3369 * Turn register mask into a string suitable for printing. 3370 */ 3371 3372 @trusted 3373 const(char)* regm_str(regm_t rm) 3374 { 3375 enum NUM = 10; 3376 enum SMAX = 128; 3377 __gshared char[SMAX + 1][NUM] str; 3378 __gshared int i; 3379 3380 if (rm == 0) 3381 return "0"; 3382 if (rm == ALLREGS) 3383 return "ALLREGS"; 3384 if (rm == BYTEREGS) 3385 return "BYTEREGS"; 3386 if (rm == allregs) 3387 return "allregs"; 3388 if (rm == XMMREGS) 3389 return "XMMREGS"; 3390 char *p = str[i].ptr; 3391 if (++i == NUM) 3392 i = 0; 3393 *p = 0; 3394 for (size_t j = 0; j < 32; j++) 3395 { 3396 if (mask(cast(uint)j) & rm) 3397 { 3398 strcat(p,regstring[j]); 3399 rm &= ~mask(cast(uint)j); 3400 if (rm) 3401 strcat(p,"|"); 3402 } 3403 } 3404 if (rm) 3405 { 3406 const pstrlen = strlen(p); 3407 char *s = p + pstrlen; 3408 snprintf(s, SMAX - pstrlen, "x%02x",rm); 3409 } 3410 assert(strlen(p) <= SMAX); 3411 return strdup(p); 3412 } 3413 3414 /********************************* 3415 * Scan down comma-expressions. 3416 * Output: 3417 * *pe = first elem down right side that is not an OPcomma 3418 * Returns: 3419 * code generated for left branches of comma-expressions 3420 */ 3421 3422 @trusted 3423 void docommas(ref CodeBuilder cdb,elem **pe) 3424 { 3425 uint stackpushsave = stackpush; 3426 int stackcleansave = cgstate.stackclean; 3427 cgstate.stackclean = 0; 3428 elem* e = *pe; 3429 while (1) 3430 { 3431 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 3432 { 3433 cdb.genlinnum(e.Esrcpos); 3434 //e.Esrcpos.Slinnum = 0; // don't do it twice 3435 } 3436 if (e.Eoper != OPcomma) 3437 break; 3438 regm_t retregs = 0; 3439 codelem(cdb,e.EV.E1,&retregs,true); 3440 elem* eold = e; 3441 e = e.EV.E2; 3442 freenode(eold); 3443 } 3444 *pe = e; 3445 assert(cgstate.stackclean == 0); 3446 cgstate.stackclean = stackcleansave; 3447 genstackclean(cdb,stackpush - stackpushsave,0); 3448 } 3449 3450 /************************** 3451 * For elems in regcon that don't match regconsave, 3452 * clear the corresponding bit in regcon.cse.mval. 3453 * Do same for regcon.immed. 3454 */ 3455 3456 @trusted 3457 void andregcon(con_t *pregconsave) 3458 { 3459 regm_t m = ~1; 3460 for (int i = 0; i < REGMAX; i++) 3461 { 3462 if (pregconsave.cse.value[i] != regcon.cse.value[i]) 3463 regcon.cse.mval &= m; 3464 if (pregconsave.immed.value[i] != regcon.immed.value[i]) 3465 regcon.immed.mval &= m; 3466 m <<= 1; 3467 m |= 1; 3468 } 3469 //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval)); 3470 regcon.used |= pregconsave.used; 3471 regcon.cse.mval &= pregconsave.cse.mval; 3472 regcon.immed.mval &= pregconsave.immed.mval; 3473 regcon.params &= pregconsave.params; 3474 //printf("regcon.cse.mval®con.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops)); 3475 regcon.cse.mops &= regcon.cse.mval; 3476 } 3477 3478 3479 /********************************************** 3480 * Disassemble the code instruction bytes 3481 * Params: 3482 * code = array of instruction bytes 3483 */ 3484 @trusted 3485 private extern (D) 3486 void disassemble(ubyte[] code) 3487 { 3488 printf("%s:\n", funcsym_p.Sident.ptr); 3489 const model = I16 ? 16 : I32 ? 32 : 64; // 16/32/64 3490 size_t i = 0; 3491 while (i < code.length) 3492 { 3493 printf("%04x:", cast(int)i); 3494 uint pc; 3495 const sz = dmd.backend.disasm86.calccodsize(code, cast(uint)i, pc, model); 3496 3497 void put(char c) { printf("%c", c); } 3498 3499 dmd.backend.disasm86.getopstring(&put, code, cast(uint)i, sz, model, model == 16, true, 3500 null, null, null, null); 3501 printf("\n"); 3502 i += sz; 3503 } 3504 } 3505 3506 }