1 /** 2 * Top level code for the code generator. 3 * 4 * Copyright: Copyright (C) 1985-1998 by Symantec 5 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 6 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 7 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcod.d, backend/cgcod.d) 9 * Documentation: https://dlang.org/phobos/dmd_backend_cgcod.html 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcod.d 11 */ 12 13 module dmd.backend.cgcod; 14 15 version = FRAMEPTR; 16 17 import core.bitop; 18 import core.stdc.stdio; 19 import core.stdc.stdlib; 20 import core.stdc.string; 21 22 import dmd.backend.backend; 23 import dmd.backend.cc; 24 import dmd.backend.cdef; 25 import dmd.backend.code; 26 import dmd.backend.cgcse; 27 import dmd.backend.code_x86; 28 import dmd.backend.codebuilder; 29 import dmd.backend.disasm86; 30 import dmd.backend.dlist; 31 import dmd.backend.dvec; 32 import dmd.backend.melf; 33 import dmd.backend.mem; 34 import dmd.backend.el; 35 import dmd.backend.global; 36 import dmd.backend.obj; 37 import dmd.backend.oper; 38 import dmd.backend.pdata : win64_pdata; 39 import dmd.backend.rtlsym; 40 import dmd.backend.symtab; 41 import dmd.backend.ty; 42 import dmd.backend.type; 43 import dmd.backend.xmm; 44 45 import dmd.backend.barray; 46 47 48 nothrow: 49 @safe: 50 51 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 52 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 53 54 enum MARS = true; 55 56 import dmd.backend.dwarfdbginf : dwarf_except_gentables; 57 58 __gshared 59 { 60 bool floatreg; // !=0 if floating register is required 61 62 int hasframe; // !=0 if this function has a stack frame 63 bool enforcealign; // enforced stack alignment 64 targ_size_t spoff; 65 targ_size_t Foff; // BP offset of floating register 66 targ_size_t CSoff; // offset of common sub expressions 67 targ_size_t NDPoff; // offset of saved 8087 registers 68 targ_size_t pushoff; // offset of saved registers 69 bool pushoffuse; // using pushoff 70 int BPoff; // offset from BP 71 int EBPtoESP; // add to EBP offset to get ESP offset 72 LocalSection Para; // section of function parameters 73 LocalSection Auto; // section of automatics and registers 74 LocalSection Fast; // section of fastpar 75 LocalSection EEStack; // offset of SCstack variables from ESP 76 LocalSection Alloca; // data for alloca() temporary 77 78 REGSAVE regsave; 79 80 CGstate cgstate; // state of code generator 81 82 regm_t BYTEREGS = BYTEREGS_INIT; 83 regm_t ALLREGS = ALLREGS_INIT; 84 85 86 /************************************ 87 * # of bytes that SP is beyond BP. 88 */ 89 90 uint stackpush; 91 92 int stackchanged; /* set to !=0 if any use of the stack 93 other than accessing parameters. Used 94 to see if we can address parameters 95 with ESP rather than EBP. 96 */ 97 int refparam; // !=0 if we referenced any parameters 98 int reflocal; // !=0 if we referenced any locals 99 bool anyiasm; // !=0 if any inline assembler 100 char calledafunc; // !=0 if we called a function 101 char needframe; // if true, then we will need the frame 102 // pointer (BP for the 8088) 103 char gotref; // !=0 if the GOTsym was referenced 104 uint usednteh; // if !=0, then used NT exception handling 105 bool calledFinally; // true if called a BC_finally block 106 107 /* Register contents */ 108 con_t regcon; 109 110 BackendPass pass; 111 112 private Symbol *retsym; // set to symbol that should be placed in 113 // register AX 114 115 /**************************** 116 * Register masks. 117 */ 118 119 regm_t msavereg; // Mask of registers that we would like to save. 120 // they are temporaries (set by scodelem()) 121 regm_t mfuncreg; // Mask of registers preserved by a function 122 123 regm_t allregs; // ALLREGS optionally including mBP 124 125 int dfoidx; /* which block we are in */ 126 127 targ_size_t funcoffset; // offset of start of function 128 targ_size_t prolog_allocoffset; // offset past adj of stack allocation 129 targ_size_t startoffset; // size of function entry code 130 targ_size_t retoffset; /* offset from start of func to ret code */ 131 targ_size_t retsize; /* size of function return */ 132 133 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs; 134 135 } 136 137 /********************************* 138 * Generate code for a function. 139 * Note at the end of this routine mfuncreg will contain the mask 140 * of registers not affected by the function. Some minor optimization 141 * possibilities are here. 142 * Params: 143 * sfunc = function to generate code for 144 */ 145 @trusted 146 void codgen(Symbol *sfunc) 147 { 148 //printf("codgen('%s')\n",funcsym_p.Sident.ptr); 149 assert(sfunc == funcsym_p); 150 assert(cseg == funcsym_p.Sseg); 151 152 cgreg_init(); 153 CSE.initialize(); 154 cod3_initregs(); 155 allregs = ALLREGS; 156 pass = BackendPass.initial; 157 Alloca.initialize(); 158 anyiasm = 0; 159 160 if (config.ehmethod == EHmethod.EH_DWARF) 161 { 162 /* The dwarf unwinder relies on the function epilog to exist 163 */ 164 for (block* b = startblock; b; b = b.Bnext) 165 { 166 if (b.BC == BCexit) 167 b.BC = BCret; 168 } 169 } 170 171 tryagain: 172 debug 173 if (debugr) 174 printf("------------------ PASS%s -----------------\n", 175 (pass == BackendPass.initial) ? "init".ptr : ((pass == BackendPass.reg) ? "reg".ptr : "final".ptr)); 176 177 lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0; 178 179 // if no parameters, assume we don't need a stack frame 180 needframe = 0; 181 enforcealign = false; 182 gotref = 0; 183 stackchanged = 0; 184 stackpush = 0; 185 refparam = 0; 186 calledafunc = 0; 187 retsym = null; 188 189 cgstate.stackclean = 1; 190 cgstate.funcarg.initialize(); 191 cgstate.funcargtos = ~0; 192 cgstate.accessedTLS = false; 193 STACKALIGN = TARGET_STACKALIGN; 194 195 regsave.reset(); 196 memset(global87.stack.ptr,0,global87.stack.sizeof); 197 198 calledFinally = false; 199 usednteh = 0; 200 201 if (sfunc.Sfunc.Fflags3 & Fjmonitor && 202 config.exe & EX_windos) 203 usednteh |= NTEHjmonitor; 204 205 // Set on a trial basis, turning it off if anything might throw 206 sfunc.Sfunc.Fflags3 |= Fnothrow; 207 208 floatreg = false; 209 assert(global87.stackused == 0); /* nobody in 8087 stack */ 210 211 CSE.start(); 212 memset(®con,0,regcon.sizeof); 213 regcon.cse.mval = regcon.cse.mops = 0; // no common subs yet 214 msavereg = 0; 215 uint nretblocks = 0; 216 mfuncreg = fregsaved; // so we can see which are used 217 // (bit is cleared each time 218 // we use one) 219 assert(!(needframe && mfuncreg & mBP)); // needframe needs mBP 220 221 for (block* b = startblock; b; b = b.Bnext) 222 { 223 memset(&b.Bregcon,0,b.Bregcon.sizeof); // Clear out values in registers 224 if (b.Belem) 225 resetEcomsub(b.Belem); // reset all the Ecomsubs 226 if (b.BC == BCasm) 227 anyiasm = 1; // we have inline assembler 228 if (b.BC == BCret || b.BC == BCretexp) 229 nretblocks++; 230 } 231 232 if (!config.fulltypes || (config.flags4 & CFG4optimized)) 233 { 234 regm_t noparams = 0; 235 foreach (s; globsym[]) 236 { 237 s.Sflags &= ~SFLread; 238 switch (s.Sclass) 239 { 240 case SC.fastpar: 241 case SC.shadowreg: 242 regcon.params |= s.Spregm(); 243 goto case SC.parameter; 244 245 case SC.parameter: 246 if (s.Sfl == FLreg) 247 noparams |= s.Sregm; 248 break; 249 250 default: 251 break; 252 } 253 } 254 regcon.params &= ~noparams; 255 } 256 257 if (config.flags4 & CFG4optimized) 258 { 259 if (nretblocks == 0 && // if no return blocks in function 260 !(sfunc.ty() & mTYnaked)) // naked functions may have hidden veys of returning 261 sfunc.Sflags |= SFLexit; // mark function as never returning 262 263 assert(dfo); 264 265 cgreg_reset(); 266 foreach (i, b; dfo[]) 267 { 268 dfoidx = cast(int)i; 269 regcon.used = msavereg | regcon.cse.mval; // registers already in use 270 blcodgen(b); // gen code in depth-first order 271 //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used)); 272 cgreg_used(dfoidx, b.Bregcon.used); // gather register used information 273 } 274 } 275 else 276 { 277 pass = BackendPass.final_; 278 for (block* b = startblock; b; b = b.Bnext) 279 blcodgen(b); // generate the code for each block 280 } 281 regcon.immed.mval = 0; 282 assert(!regcon.cse.mops); // should have all been used 283 284 // See which variables we can put into registers 285 if (pass != BackendPass.final_ && 286 !anyiasm) // possible LEA or LES opcodes 287 { 288 allregs |= cod3_useBP(); // see if we can use EBP 289 290 // If pic code, but EBX was never needed 291 if (!(allregs & mask(PICREG)) && !gotref) 292 { 293 allregs |= mask(PICREG); // EBX can now be used 294 cgreg_assign(retsym); 295 pass = BackendPass.reg; 296 } 297 else if (cgreg_assign(retsym)) // if we found some registers 298 pass = BackendPass.reg; 299 else 300 pass = BackendPass.final_; 301 for (block* b = startblock; b; b = b.Bnext) 302 { 303 code_free(b.Bcode); 304 b.Bcode = null; 305 } 306 goto tryagain; 307 } 308 cgreg_term(); 309 310 // See if we need to enforce a particular stack alignment 311 foreach (s; globsym[]) 312 { 313 if (Symbol_Sisdead(*s, anyiasm)) 314 continue; 315 316 switch (s.Sclass) 317 { 318 case SC.register: 319 case SC.auto_: 320 case SC.fastpar: 321 if (s.Sfl == FLreg) 322 break; 323 324 const sz = type_alignsize(s.Stype); 325 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX)) 326 { 327 STACKALIGN = sz; 328 enforcealign = true; 329 } 330 break; 331 332 default: 333 break; 334 } 335 } 336 337 stackoffsets(globsym, false); // compute final offsets of stack variables 338 cod5_prol_epi(); // see where to place prolog/epilog 339 CSE.finish(); // compute addresses and sizes of CSE saves 340 341 if (configv.addlinenumbers) 342 objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg)); 343 344 // Otherwise, jmp's to startblock will execute the prolog again 345 assert(!startblock.Bpred); 346 347 CodeBuilder cdbprolog; cdbprolog.ctor(); 348 prolog(cdbprolog); // gen function start code 349 code *cprolog = cdbprolog.finish(); 350 if (cprolog) 351 pinholeopt(cprolog,null); // optimize 352 353 funcoffset = Offset(sfunc.Sseg); 354 targ_size_t coffset = Offset(sfunc.Sseg); 355 356 if (eecontext.EEelem) 357 genEEcode(); 358 359 for (block* b = startblock; b; b = b.Bnext) 360 { 361 // We couldn't do this before because localsize was unknown 362 switch (b.BC) 363 { 364 case BCret: 365 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked)) 366 { 367 CodeBuilder cdb; cdb.ctor(); 368 cdb.append(b.Bcode); 369 cdb.genlinnum(b.Bsrcpos); 370 b.Bcode = cdb.finish(); 371 } 372 goto case BCretexp; 373 374 case BCretexp: 375 epilog(b); 376 break; 377 378 default: 379 if (b.Bflags & BFLepilog) 380 epilog(b); 381 break; 382 } 383 assignaddr(b); // assign addresses 384 pinholeopt(b.Bcode,b); // do pinhole optimization 385 if (b.Bflags & BFLprolog) // do function prolog 386 { 387 startoffset = coffset + calcblksize(cprolog) - funcoffset; 388 b.Bcode = cat(cprolog,b.Bcode); 389 } 390 cgsched_block(b); 391 b.Bsize = calcblksize(b.Bcode); // calculate block size 392 if (b.Balign) 393 { 394 targ_size_t u = b.Balign - 1; 395 coffset = (coffset + u) & ~u; 396 } 397 b.Boffset = coffset; /* offset of this block */ 398 coffset += b.Bsize; /* offset of following block */ 399 } 400 401 debug 402 debugw && printf("code addr complete\n"); 403 404 // Do jump optimization 405 bool flag; 406 do 407 { 408 flag = false; 409 for (block* b = startblock; b; b = b.Bnext) 410 { 411 if (b.Bflags & BFLjmpoptdone) /* if no more jmp opts for this blk */ 412 continue; 413 int i = branch(b,0); // see if jmp => jmp short 414 if (i) // if any bytes saved 415 { 416 b.Bsize -= i; 417 auto offset = b.Boffset + b.Bsize; 418 for (block* bn = b.Bnext; bn; bn = bn.Bnext) 419 { 420 if (bn.Balign) 421 { 422 targ_size_t u = bn.Balign - 1; 423 offset = (offset + u) & ~u; 424 } 425 bn.Boffset = offset; 426 offset += bn.Bsize; 427 } 428 coffset = offset; 429 flag = true; 430 } 431 } 432 if (!I16 && !(config.flags4 & CFG4optimized)) 433 break; // use the long conditional jmps 434 } while (flag); // loop till no more bytes saved 435 436 debug 437 debugw && printf("code jump optimization complete\n"); 438 439 if (usednteh & NTEH_try) 440 { 441 // Do this before code is emitted because we patch some instructions 442 nteh_filltables(); 443 } 444 445 // Compute starting offset for switch tables 446 targ_size_t swoffset; 447 int jmpseg = -1; 448 if (config.flags & CFGromable) 449 { 450 jmpseg = 0; 451 swoffset = coffset; 452 } 453 454 // Emit the generated code 455 if (eecontext.EEcompile == 1) 456 { 457 codout(sfunc.Sseg,eecontext.EEcode,null); 458 code_free(eecontext.EEcode); 459 } 460 else 461 { 462 __gshared Barray!ubyte disasmBuf; 463 disasmBuf.reset(); 464 465 for (block* b = startblock; b; b = b.Bnext) 466 { 467 if (b.BC == BCjmptab || b.BC == BCswitch) 468 { 469 if (jmpseg == -1) 470 { 471 jmpseg = objmod.jmpTableSegment(sfunc); 472 swoffset = Offset(jmpseg); 473 } 474 swoffset = _align(0,swoffset); 475 b.Btableoffset = swoffset; /* offset of sw tab */ 476 swoffset += b.Btablesize; 477 } 478 jmpaddr(b.Bcode); /* assign jump addresses */ 479 480 debug 481 if (debugc) 482 { 483 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n", 484 cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg)); 485 if (b.Bcode) 486 printf( "First opcode of block is: %0x\n", b.Bcode.Iop ); 487 } 488 489 if (b.Balign) 490 { uint u = b.Balign; 491 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1); 492 493 cod3_align_bytes(sfunc.Sseg, nalign); 494 } 495 assert(b.Boffset == Offset(sfunc.Sseg)); 496 497 codout(sfunc.Sseg,b.Bcode,configv.vasm ? &disasmBuf : null); // output code 498 } 499 if (coffset != Offset(sfunc.Sseg)) 500 { 501 debug 502 printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg)); 503 504 assert(0); 505 } 506 sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset; // size of function 507 508 if (configv.vasm) 509 disassemble(disasmBuf[]); // disassemble the code 510 511 const nteh = usednteh & NTEH_try; 512 if (nteh) 513 { 514 assert(!(config.flags & CFGromable)); 515 //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset); 516 objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset); 517 } 518 519 // Write out switch tables 520 for (block* b = startblock; b; b = b.Bnext) 521 { 522 switch (b.BC) 523 { 524 case BCjmptab: /* if jump table */ 525 outjmptab(b); /* write out jump table */ 526 goto default; 527 528 case BCswitch: 529 outswitab(b); /* write out switch table */ 530 goto default; 531 532 case BCret: 533 case BCretexp: 534 /* Compute offset to return code from start of function */ 535 retoffset = b.Boffset + b.Bsize - retsize - funcoffset; 536 537 /* Add 3 bytes to retoffset in case we have an exception 538 * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT 539 * IT FIXES THE PROBLEM HERE AS WELL. 540 */ 541 if (usednteh & NTEH_try) 542 retoffset += 3; 543 break; 544 545 default: 546 retoffset = b.Boffset + b.Bsize - funcoffset; 547 break; 548 } 549 } 550 if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked)) 551 /* put line number at end of function on the 552 start of the last instruction 553 */ 554 /* Instead, try offset to cleanup code */ 555 if (retoffset < sfunc.Ssize) 556 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset); 557 558 static if (MARS) 559 { 560 if (config.exe == EX_WIN64) 561 win64_pdata(sfunc); 562 } 563 564 static if (MARS) 565 { 566 if (usednteh & NTEH_try) 567 { 568 // Do this before code is emitted because we patch some instructions 569 nteh_gentables(sfunc); 570 } 571 if (usednteh & (EHtry | EHcleanup) && // saw BCtry or BC_try or OPddtor 572 config.ehmethod == EHmethod.EH_DM) 573 { 574 except_gentables(); 575 } 576 if (config.ehmethod == EHmethod.EH_DWARF) 577 { 578 sfunc.Sfunc.Fstartblock = startblock; 579 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset); 580 sfunc.Sfunc.Fstartblock = null; 581 } 582 } 583 584 for (block* b = startblock; b; b = b.Bnext) 585 { 586 code_free(b.Bcode); 587 b.Bcode = null; 588 } 589 } 590 591 // Mask of regs saved 592 // BUG: do interrupt functions save BP? 593 tym_t functy = tybasic(sfunc.ty()); 594 sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved); 595 596 debug 597 if (global87.stackused != 0) 598 printf("stackused = %d\n",global87.stackused); 599 600 assert(global87.stackused == 0); /* nobody in 8087 stack */ 601 602 global87.save.dtor(); // clean up ndp save array 603 } 604 605 /********************************************* 606 * Align sections on the stack. 607 * base negative offset of section from frame pointer 608 * alignment alignment to use 609 * bias difference between where frame pointer points and the STACKALIGNed 610 * part of the stack 611 * Returns: 612 * base revised downward so it is aligned 613 */ 614 @trusted 615 targ_size_t alignsection(targ_size_t base, uint alignment, int bias) 616 { 617 assert(cast(long)base <= 0); 618 if (alignment > STACKALIGN) 619 alignment = STACKALIGN; 620 if (alignment) 621 { 622 long sz = cast(long)(-base + bias); 623 assert(sz >= 0); 624 sz &= (alignment - 1); 625 if (sz) 626 base -= alignment - sz; 627 } 628 return base; 629 } 630 631 /******************************* 632 * Generate code for a function start. 633 * Input: 634 * Offset(cseg) address of start of code 635 * Auto.alignment 636 * Output: 637 * Offset(cseg) adjusted for size of code generated 638 * EBPtoESP 639 * hasframe 640 * BPoff 641 */ 642 @trusted 643 void prolog(ref CodeBuilder cdb) 644 { 645 bool enter; 646 647 //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident.ptr, needframe, Auto.alignment); 648 debug debugw && printf("funcstart()\n"); 649 regcon.immed.mval = 0; /* no values in registers yet */ 650 version (FRAMEPTR) 651 EBPtoESP = 0; 652 else 653 EBPtoESP = -REGSIZE; 654 hasframe = 0; 655 bool pushds = false; 656 BPoff = 0; 657 bool pushalloc = false; 658 tym_t tyf = funcsym_p.ty(); 659 tym_t tym = tybasic(tyf); 660 const farfunc = tyfarfunc(tym) != 0; 661 662 if (config.flags3 & CFG3ibt && !I16) 663 cdb.gen1(I32 ? ENDBR32 : ENDBR64); 664 665 // Special Intel 64 bit ABI prolog setup for variadic functions 666 Symbol *sv64 = null; // set to __va_argsave 667 if (I64 && variadic(funcsym_p.Stype)) 668 { 669 /* The Intel 64 bit ABI scheme. 670 * abi_sysV_amd64.pdf 671 * Load arguments passed in registers into the varargs save area 672 * so they can be accessed by va_arg(). 673 */ 674 /* Look for __va_argsave 675 */ 676 for (SYMIDX si = 0; si < globsym.length; si++) 677 { 678 Symbol *s = globsym[si]; 679 if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0) 680 { 681 if (!(s.Sflags & SFLdead)) 682 sv64 = s; 683 break; 684 } 685 } 686 } 687 688 if (config.flags & CFGalwaysframe || 689 funcsym_p.Sfunc.Fflags3 & Ffakeeh || 690 /* The exception stack unwinding mechanism relies on the EBP chain being intact, 691 * so need frame if function can possibly throw 692 */ 693 !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) || 694 cgstate.accessedTLS || 695 sv64 696 ) 697 needframe = 1; 698 699 CodeBuilder cdbx; cdbx.ctor(); 700 701 Lagain: 702 spoff = 0; 703 char guessneedframe = needframe; 704 int cfa_offset = 0; 705 // if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru))) 706 // usednteh |= NTEHpassthru; 707 708 /* Compute BP offsets for variables on stack. 709 * The organization is: 710 * Para.size parameters 711 * -------- stack is aligned to STACKALIGN 712 * seg of return addr (if far function) 713 * IP of return addr 714 * BP. caller's BP 715 * DS (if Windows prolog/epilog) 716 * exception handling context symbol 717 * Fast.size fastpar 718 * Auto.size autos and regs 719 * regsave.off any saved registers 720 * Foff floating register 721 * Alloca.size alloca temporary 722 * CSoff common subs 723 * NDPoff any 8087 saved registers 724 * monitor context record 725 * any saved registers 726 */ 727 728 if (tym == TYifunc) 729 Para.size = 26; // how is this number derived? 730 else 731 { 732 version (FRAMEPTR) 733 { 734 bool frame = needframe || tyf & mTYnaked; 735 Para.size = ((farfunc ? 2 : 1) + frame) * REGSIZE; 736 if (frame) 737 EBPtoESP = -REGSIZE; 738 } 739 else 740 Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE; 741 } 742 743 /* The real reason for the FAST section is because the implementation of contracts 744 * requires a consistent stack frame location for the 'this' pointer. But if varying 745 * stuff in Auto.offset causes different alignment for that section, the entire block can 746 * shift around, causing a crash in the contracts. 747 * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their 748 * own FAST section, which is never aligned at a size bigger than REGSIZE, and so 749 * its alignment never shifts around. 750 * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned 751 * individually rather than as a group. 752 */ 753 Fast.size = 0; 754 static if (NTEXCEPTIONS == 2) 755 { 756 Fast.size -= nteh_contextsym_size(); 757 if (config.exe & EX_windos) 758 { 759 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0) 760 Fast.size -= 5 * 4; 761 } 762 } 763 764 /* Despite what the comment above says, aligning Fast section to size greater 765 * than REGSIZE does not break contract implementation. Fast.offset and 766 * Fast.alignment must be the same for the overriding and 767 * the overridden function, since they have the same parameters. Fast.size 768 * must be the same because otherwise, contract inheritance wouldn't work 769 * even if we didn't align Fast section to size greater than REGSIZE. Therefore, 770 * the only way aligning the section could cause problems with contract 771 * inheritance is if bias (declared below) differed for the overridden 772 * and the overriding function. 773 * 774 * Bias depends on Para.size and needframe. The value of Para.size depends on 775 * whether the function is an interrupt handler and whether it is a farfunc. 776 * DMD does not have _interrupt attribute and D does not make a distinction 777 * between near and far functions, so Para.size should always be 2 * REGSIZE 778 * for D. 779 * 780 * The value of needframe depends on a global setting that is only set 781 * during backend's initialization and on function flag Ffakeeh. On Windows, 782 * that flag is always set for virtual functions, for which contracts are 783 * defined and on other platforms, it is never set. Because of that 784 * the value of neadframe should always be the same for the overridden 785 * and the overriding function, and so bias should be the same too. 786 */ 787 788 version (FRAMEPTR) 789 int bias = enforcealign ? 0 : cast(int)(Para.size); 790 else 791 int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE)); 792 793 if (Fast.alignment < REGSIZE) 794 Fast.alignment = REGSIZE; 795 796 Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias); 797 798 if (Auto.alignment < REGSIZE) 799 Auto.alignment = REGSIZE; // necessary because localsize must be REGSIZE aligned 800 Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias); 801 802 regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias); 803 //printf("regsave.off = x%x, size = x%x, alignment = %x\n", 804 //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment); 805 806 if (floatreg) 807 { 808 uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE; 809 Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias); 810 //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize); 811 } 812 else 813 Foff = regsave.off; 814 815 Alloca.alignment = REGSIZE; 816 Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias); 817 818 CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias); 819 //printf("CSoff = x%x, size = x%x, alignment = %x\n", 820 //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment); 821 822 NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias); 823 824 regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving 825 pushoffuse = false; 826 pushoff = NDPoff; 827 /* We don't keep track of all the pushes and pops in a function. Hence, 828 * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder 829 * won't be setting ESP correctly. With pushoffuse, the registers are restored 830 * from EBP, which is kept track of properly. 831 */ 832 if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64)) 833 { 834 /* Instead of pushing the registers onto the stack one by one, 835 * allocate space in the stack frame and copy/restore them there. 836 */ 837 int xmmtopush = popcnt(topush & XMMREGS); // XMM regs take 16 bytes 838 int gptopush = popcnt(topush) - xmmtopush; // general purpose registers to save 839 if (NDPoff || xmmtopush || cgstate.funcarg.size) 840 { 841 pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16), 842 xmmtopush ? STACKALIGN : REGSIZE, bias); 843 pushoffuse = true; // tell others we're using this strategy 844 } 845 } 846 847 //printf("Fast.size = x%x, Auto.size = x%x\n", cast(int)Fast.size, cast(int)Auto.size); 848 849 cgstate.funcarg.alignment = STACKALIGN; 850 /* If the function doesn't need the extra alignment, don't do it. 851 * Can expand on this by allowing for locals that don't need extra alignment 852 * and calling functions that don't need it. 853 */ 854 if (pushoff == 0 && !calledafunc && config.fpxmmregs && (I32 || I64)) 855 { 856 cgstate.funcarg.alignment = I64 ? 8 : 4; 857 } 858 859 //printf("pushoff = %d, size = %d, alignment = %d, bias = %d\n", cast(int)pushoff, cast(int)cgstate.funcarg.size, cast(int)cgstate.funcarg.alignment, cast(int)bias); 860 cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias); 861 862 localsize = -cgstate.funcarg.offset; 863 864 //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n", 865 //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize); 866 assert(cast(targ_ptrdiff_t)localsize >= 0); 867 868 // Keep the stack aligned by 8 for any subsequent function calls 869 if (!I16 && calledafunc && 870 (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 871 { 872 int npush = popcnt(topush); // number of registers that need saving 873 npush += popcnt(topush & XMMREGS); // XMM regs take 16 bytes, so count them twice 874 if (pushoffuse) 875 npush = 0; 876 877 //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n", 878 //npush, Para.size, needframe, localsize); 879 880 int sz = cast(int)(localsize + npush * REGSIZE); 881 if (!enforcealign) 882 { 883 version (FRAMEPTR) 884 sz += Para.size; 885 else 886 sz += Para.size + (needframe ? 0 : -REGSIZE); 887 } 888 if (sz & (STACKALIGN - 1)) 889 localsize += STACKALIGN - (sz & (STACKALIGN - 1)); 890 } 891 cgstate.funcarg.offset = -localsize; 892 893 //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n", 894 //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize); 895 896 uint xlocalsize = cast(uint)localsize; // amount to subtract from ESP to make room for locals 897 898 if (tyf & mTYnaked) // if no prolog/epilog for function 899 { 900 hasframe = 1; 901 return; 902 } 903 904 if (tym == TYifunc) 905 { 906 prolog_ifunc(cdbx,&tyf); 907 hasframe = 1; 908 cdb.append(cdbx); 909 goto Lcont; 910 } 911 912 /* Determine if we need BP set up */ 913 if (enforcealign) 914 { 915 // we need BP to reset the stack before return 916 // otherwise the return address is lost 917 needframe = 1; 918 } 919 else if (config.flags & CFGalwaysframe) 920 needframe = 1; 921 else 922 { 923 if (localsize) 924 { 925 if (I16 || 926 !(config.flags4 & CFG4speed) || 927 config.target_cpu < TARGET_Pentium || 928 farfunc || 929 config.flags & CFGstack || 930 xlocalsize >= 0x1000 || 931 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 932 anyiasm || 933 Alloca.size 934 ) 935 { 936 needframe = 1; 937 } 938 } 939 if (refparam && (anyiasm || I16)) 940 needframe = 1; 941 } 942 943 if (needframe) 944 { 945 assert(mfuncreg & mBP); // shouldn't have used mBP 946 947 if (!guessneedframe) // if guessed wrong 948 goto Lagain; 949 } 950 951 if (I16 && config.wflags & WFwindows && farfunc) 952 { 953 prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds); 954 enter = false; // don't use ENTER instruction 955 hasframe = 1; // we have a stack frame 956 } 957 else if (needframe) // if variables or parameters 958 { 959 prolog_frame(cdbx, farfunc, xlocalsize, enter, cfa_offset); 960 hasframe = 1; 961 } 962 963 /* Align the stack if necessary */ 964 prolog_stackalign(cdbx); 965 966 /* Subtract from stack pointer the size of the local stack frame 967 */ 968 if (config.flags & CFGstack) // if stack overflow check 969 { 970 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 971 if (Alloca.size) 972 prolog_setupalloca(cdbx); 973 } 974 else if (needframe) /* if variables or parameters */ 975 { 976 if (xlocalsize) /* if any stack offset */ 977 { 978 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 979 if (Alloca.size) 980 prolog_setupalloca(cdbx); 981 } 982 else 983 assert(Alloca.size == 0); 984 } 985 else if (xlocalsize) 986 { 987 assert(I32 || I64); 988 prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc); 989 version (FRAMEPTR) { } else 990 BPoff += REGSIZE; 991 } 992 else 993 assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor)); 994 EBPtoESP += xlocalsize; 995 if (hasframe) 996 EBPtoESP += REGSIZE; 997 998 /* Win64 unwind needs the amount of code generated so far 999 */ 1000 if (config.exe == EX_WIN64) 1001 { 1002 code *c = cdbx.peek(); 1003 pinholeopt(c, null); 1004 prolog_allocoffset = calcblksize(c); 1005 } 1006 1007 if (usednteh & NTEHjmonitor) 1008 { Symbol *sthis; 1009 1010 for (SYMIDX si = 0; 1; si++) 1011 { assert(si < globsym.length); 1012 sthis = globsym[si]; 1013 if (strcmp(sthis.Sident.ptr,"this".ptr) == 0) 1014 break; 1015 } 1016 nteh_monitor_prolog(cdbx,sthis); 1017 EBPtoESP += 3 * 4; 1018 } 1019 1020 cdb.append(cdbx); 1021 prolog_saveregs(cdb, topush, cfa_offset); 1022 1023 Lcont: 1024 1025 if (config.exe == EX_WIN64) 1026 { 1027 if (variadic(funcsym_p.Stype)) 1028 prolog_gen_win64_varargs(cdb); 1029 prolog_loadparams(cdb, tyf, pushalloc); 1030 return; 1031 } 1032 1033 prolog_ifunc2(cdb, tyf, tym, pushds); 1034 1035 static if (NTEXCEPTIONS == 2) 1036 { 1037 if (usednteh & NTEH_except) 1038 nteh_setsp(cdb, 0x89); // MOV __context[EBP].esp,ESP 1039 } 1040 1041 // Load register parameters off of the stack. Do not use 1042 // assignaddr(), as it will replace the stack reference with 1043 // the register! 1044 prolog_loadparams(cdb, tyf, pushalloc); 1045 1046 if (sv64) 1047 prolog_genvarargs(cdb, sv64); 1048 1049 /* Alignment checks 1050 */ 1051 //assert(Auto.alignment <= STACKALIGN); 1052 //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0); 1053 } 1054 1055 /************************************ 1056 * Predicate for sorting auto symbols for qsort(). 1057 * Returns: 1058 * < 0 s1 goes farther from frame pointer 1059 * > 0 s1 goes nearer the frame pointer 1060 * = 0 no difference 1061 */ 1062 1063 @trusted 1064 extern (C) int 1065 autosort_cmp(scope const void *ps1, scope const void *ps2) 1066 { 1067 Symbol *s1 = *cast(Symbol **)ps1; 1068 Symbol *s2 = *cast(Symbol **)ps2; 1069 1070 /* Largest align size goes furthest away from frame pointer, 1071 * so they get allocated first. 1072 */ 1073 uint alignsize1 = Symbol_Salignsize(*s1); 1074 uint alignsize2 = Symbol_Salignsize(*s2); 1075 if (alignsize1 < alignsize2) 1076 return 1; 1077 else if (alignsize1 > alignsize2) 1078 return -1; 1079 1080 /* move variables nearer the frame pointer that have higher Sweights 1081 * because addressing mode is fewer bytes. Grouping together high Sweight 1082 * variables also may put them in the same cache 1083 */ 1084 if (s1.Sweight < s2.Sweight) 1085 return -1; 1086 else if (s1.Sweight > s2.Sweight) 1087 return 1; 1088 1089 /* More: 1090 * 1. put static arrays nearest the frame pointer, so buffer overflows 1091 * can't change other variable contents 1092 * 2. Do the coloring at the byte level to minimize stack usage 1093 */ 1094 return 0; 1095 } 1096 1097 /****************************** 1098 * Compute stack frame offsets for local variables. 1099 * that did not make it into registers. 1100 * Params: 1101 * symtab = function's symbol table 1102 * estimate = true for do estimate only, false for final 1103 */ 1104 @trusted 1105 void stackoffsets(ref symtab_t symtab, bool estimate) 1106 { 1107 //printf("stackoffsets() %s\n", funcsym_p.Sident.ptr); 1108 1109 Para.initialize(); // parameter offset 1110 Fast.initialize(); // SCfastpar offset 1111 Auto.initialize(); // automatic & register offset 1112 EEStack.initialize(); // for SCstack's 1113 1114 // Set if doing optimization of auto layout 1115 bool doAutoOpt = estimate && config.flags4 & CFG4optimized; 1116 1117 // Put autos in another array so we can do optimizations on the stack layout 1118 Symbol*[10] autotmp = void; 1119 Symbol **autos = null; 1120 if (doAutoOpt) 1121 { 1122 if (symtab.length <= autotmp.length) 1123 autos = autotmp.ptr; 1124 else 1125 { autos = cast(Symbol **)malloc(symtab.length * (*autos).sizeof); 1126 assert(autos); 1127 } 1128 } 1129 size_t autosi = 0; // number used in autos[] 1130 1131 for (int si = 0; si < symtab.length; si++) 1132 { Symbol *s = symtab[si]; 1133 1134 /* Don't allocate space for dead or zero size parameters 1135 */ 1136 switch (s.Sclass) 1137 { 1138 case SC.fastpar: 1139 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh)) 1140 goto Ldefault; // don't need consistent stack frame 1141 break; 1142 1143 case SC.parameter: 1144 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty))) 1145 { 1146 Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary 1147 s.Soffset = Para.offset; 1148 continue; 1149 } 1150 break; // allocate even if it's dead 1151 1152 case SC.shadowreg: 1153 break; // allocate even if it's dead 1154 1155 default: 1156 Ldefault: 1157 if (Symbol_Sisdead(*s, anyiasm)) 1158 continue; // don't allocate space 1159 break; 1160 } 1161 1162 targ_size_t sz = type_size(s.Stype); 1163 if (sz == 0) 1164 sz++; // can't handle 0 length structs 1165 1166 uint alignsize = Symbol_Salignsize(*s); 1167 if (alignsize > STACKALIGN) 1168 alignsize = STACKALIGN; // no point if the stack is less aligned 1169 1170 //printf("symbol '%s', size = %d, alignsize = %d, read = %x\n",s.Sident.ptr, cast(int)sz, cast(int)alignsize, s.Sflags & SFLread); 1171 assert(cast(int)sz >= 0); 1172 1173 switch (s.Sclass) 1174 { 1175 case SC.fastpar: 1176 /* Get these 1177 * right next to the stack frame pointer, EBP. 1178 * Needed so we can call nested contract functions 1179 * frequire and fensure. 1180 */ 1181 if (s.Sfl == FLreg) // if allocated in register 1182 continue; 1183 /* Needed because storing fastpar's on the stack in prolog() 1184 * does the entire register 1185 */ 1186 if (sz < REGSIZE) 1187 sz = REGSIZE; 1188 1189 Fast.offset = _align(sz,Fast.offset); 1190 s.Soffset = Fast.offset; 1191 Fast.offset += sz; 1192 //printf("fastpar '%s' sz = %d, fast offset = x%x, %p\n", s.Sident, cast(int) sz, cast(int) s.Soffset, s); 1193 1194 if (alignsize > Fast.alignment) 1195 Fast.alignment = alignsize; 1196 break; 1197 1198 case SC.register: 1199 case SC.auto_: 1200 if (s.Sfl == FLreg) // if allocated in register 1201 break; 1202 1203 if (doAutoOpt) 1204 { autos[autosi++] = s; // deal with later 1205 break; 1206 } 1207 1208 Auto.offset = _align(sz,Auto.offset); 1209 s.Soffset = Auto.offset; 1210 Auto.offset += sz; 1211 //printf("auto '%s' sz = %d, auto offset = x%lx\n", s.Sident,sz, cast(long) s.Soffset); 1212 1213 if (alignsize > Auto.alignment) 1214 Auto.alignment = alignsize; 1215 break; 1216 1217 case SC.stack: 1218 EEStack.offset = _align(sz,EEStack.offset); 1219 s.Soffset = EEStack.offset; 1220 //printf("EEStack.offset = x%lx\n",cast(long)s.Soffset); 1221 EEStack.offset += sz; 1222 break; 1223 1224 case SC.shadowreg: 1225 case SC.parameter: 1226 if (config.exe == EX_WIN64) 1227 { 1228 assert((Para.offset & 7) == 0); 1229 s.Soffset = Para.offset; 1230 Para.offset += 8; 1231 break; 1232 } 1233 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general, 1234 * but are 4 byte aligned on the OSX 32 stack. 1235 */ 1236 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */ 1237 if (alignsize >= 16 && 1238 (I64 || (config.exe == EX_OSX && 1239 (tyaggregate(s.ty()) || tyvector(s.ty()))))) 1240 Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1); 1241 s.Soffset = Para.offset; 1242 //printf("%s param offset = x%lx, alignsize = %d\n", s.Sident, cast(long) s.Soffset, cast(int) alignsize); 1243 Para.offset += (s.Sflags & SFLdouble) 1244 ? type_size(tstypes[TYdouble]) // float passed as double 1245 : type_size(s.Stype); 1246 break; 1247 1248 case SC.pseudo: 1249 case SC.static_: 1250 case SC.bprel: 1251 break; 1252 default: 1253 symbol_print(s); 1254 assert(0); 1255 } 1256 } 1257 1258 if (autosi) 1259 { 1260 qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp); 1261 1262 vec_t tbl = vec_calloc(autosi); 1263 1264 for (size_t si = 0; si < autosi; si++) 1265 { 1266 Symbol *s = autos[si]; 1267 1268 targ_size_t sz = type_size(s.Stype); 1269 if (sz == 0) 1270 sz++; // can't handle 0 length structs 1271 1272 uint alignsize = Symbol_Salignsize(*s); 1273 if (alignsize > STACKALIGN) 1274 alignsize = STACKALIGN; // no point if the stack is less aligned 1275 1276 /* See if we can share storage with another variable 1277 * if their live ranges do not overlap. 1278 */ 1279 if (// Don't share because could stomp on variables 1280 // used in finally blocks 1281 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) && 1282 s.Srange && !(s.Sflags & SFLspill)) 1283 { 1284 for (size_t i = 0; i < si; i++) 1285 { 1286 if (!vec_testbit(i,tbl)) 1287 continue; 1288 Symbol *sp = autos[i]; 1289 //printf("auto s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange)); 1290 if (vec_disjoint(s.Srange,sp.Srange) && 1291 !(sp.Soffset & (alignsize - 1)) && 1292 sz <= type_size(sp.Stype)) 1293 { 1294 vec_or(sp.Srange,sp.Srange,s.Srange); 1295 //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident); 1296 s.Soffset = sp.Soffset; 1297 goto L2; 1298 } 1299 } 1300 } 1301 Auto.offset = _align(sz,Auto.offset); 1302 s.Soffset = Auto.offset; 1303 //printf("auto '%s' sz = %d, auto offset = x%lx\n", s.Sident, sz, cast(long) s.Soffset); 1304 Auto.offset += sz; 1305 if (s.Srange && !(s.Sflags & SFLspill)) 1306 vec_setbit(si,tbl); 1307 1308 if (alignsize > Auto.alignment) 1309 Auto.alignment = alignsize; 1310 L2: { } 1311 } 1312 1313 vec_free(tbl); 1314 1315 if (autos != autotmp.ptr) 1316 free(autos); 1317 } 1318 } 1319 1320 /**************************** 1321 * Generate code for a block. 1322 */ 1323 1324 @trusted 1325 private void blcodgen(block *bl) 1326 { 1327 regm_t mfuncregsave = mfuncreg; 1328 1329 //dbg_printf("blcodgen(%p)\n",bl); 1330 1331 /* Determine existing immediate values in registers by ANDing 1332 together the values from all the predecessors of b. 1333 */ 1334 assert(bl.Bregcon.immed.mval == 0); 1335 regcon.immed.mval = 0; // assume no previous contents in registers 1336 // regcon.cse.mval = 0; 1337 foreach (bpl; ListRange(bl.Bpred)) 1338 { 1339 block *bp = list_block(bpl); 1340 1341 if (bpl == bl.Bpred) 1342 { regcon.immed = bp.Bregcon.immed; 1343 regcon.params = bp.Bregcon.params; 1344 // regcon.cse = bp.Bregcon.cse; 1345 } 1346 else 1347 { 1348 int i; 1349 1350 regcon.params &= bp.Bregcon.params; 1351 if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0) 1352 // Actual values must match, too 1353 for (i = 0; i < REGMAX; i++) 1354 { 1355 if (regcon.immed.value[i] != bp.Bregcon.immed.value[i]) 1356 regcon.immed.mval &= ~mask(i); 1357 } 1358 } 1359 } 1360 regcon.cse.mops &= regcon.cse.mval; 1361 1362 // Set regcon.mvar according to what variables are in registers for this block 1363 CodeBuilder cdb; cdb.ctor(); 1364 regcon.mvar = 0; 1365 regcon.mpvar = 0; 1366 regcon.indexregs = 1; 1367 int anyspill = 0; 1368 char *sflsave = null; 1369 if (config.flags4 & CFG4optimized) 1370 { 1371 CodeBuilder cdbload; cdbload.ctor(); 1372 CodeBuilder cdbstore; cdbstore.ctor(); 1373 1374 sflsave = cast(char *) alloca(globsym.length * char.sizeof); 1375 for (SYMIDX i = 0; i < globsym.length; i++) 1376 { 1377 Symbol *s = globsym[i]; 1378 1379 sflsave[i] = s.Sfl; 1380 if (regParamInPreg(s) && 1381 regcon.params & s.Spregm() && 1382 vec_testbit(dfoidx,s.Srange)) 1383 { 1384 // regcon.used |= s.Spregm(); 1385 } 1386 1387 if (s.Sfl == FLreg) 1388 { 1389 if (vec_testbit(dfoidx,s.Srange)) 1390 { 1391 regcon.mvar |= s.Sregm; 1392 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 1393 regcon.mpvar |= s.Sregm; 1394 } 1395 } 1396 else if (s.Sflags & SFLspill) 1397 { 1398 if (vec_testbit(dfoidx,s.Srange)) 1399 { 1400 anyspill = cast(int)(i + 1); 1401 cgreg_spillreg_prolog(bl,s,cdbstore,cdbload); 1402 if (vec_testbit(dfoidx,s.Slvreg)) 1403 { 1404 s.Sfl = FLreg; 1405 regcon.mvar |= s.Sregm; 1406 regcon.cse.mval &= ~s.Sregm; 1407 regcon.immed.mval &= ~s.Sregm; 1408 regcon.params &= ~s.Sregm; 1409 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 1410 regcon.mpvar |= s.Sregm; 1411 } 1412 } 1413 } 1414 } 1415 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 1416 { 1417 cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval); 1418 } 1419 cdb.append(cdbstore); 1420 cdb.append(cdbload); 1421 mfuncreg &= ~regcon.mvar; // use these registers 1422 regcon.used |= regcon.mvar; 1423 1424 // Determine if we have more than 1 uncommitted index register 1425 regcon.indexregs = IDXREGS & ~regcon.mvar; 1426 regcon.indexregs &= regcon.indexregs - 1; 1427 } 1428 1429 /* This doesn't work when calling the BC_finally function, 1430 * as it is one block calling another. 1431 */ 1432 //regsave.idx = 0; 1433 1434 reflocal = 0; 1435 int refparamsave = refparam; 1436 refparam = 0; 1437 assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops); 1438 1439 outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave); 1440 bl.Bcode = cdb.finish(); 1441 1442 for (int i = 0; i < anyspill; i++) 1443 { 1444 Symbol *s = globsym[i]; 1445 s.Sfl = sflsave[i]; // undo block register assignments 1446 } 1447 1448 if (reflocal) 1449 bl.Bflags |= BFLreflocal; 1450 if (refparam) 1451 bl.Bflags |= BFLrefparam; 1452 refparam |= refparamsave; 1453 bl.Bregcon.immed = regcon.immed; 1454 bl.Bregcon.cse = regcon.cse; 1455 bl.Bregcon.used = regcon.used; 1456 bl.Bregcon.params = regcon.params; 1457 1458 debug 1459 debugw && printf("code gen complete\n"); 1460 } 1461 1462 /****************************** 1463 * Given a register mask, find and return the number 1464 * of the first register that fits. 1465 */ 1466 1467 @trusted 1468 reg_t findreg(regm_t regm) 1469 { 1470 return findreg(regm, __LINE__, __FILE__); 1471 } 1472 1473 @trusted 1474 reg_t findreg(regm_t regm, int line, const(char)* file) 1475 { 1476 debug 1477 regm_t regmsave = regm; 1478 1479 reg_t i = 0; 1480 while (1) 1481 { 1482 if (!(regm & 0xF)) 1483 { 1484 regm >>= 4; 1485 i += 4; 1486 if (!regm) 1487 break; 1488 } 1489 if (regm & 1) 1490 return i; 1491 regm >>= 1; 1492 i++; 1493 } 1494 1495 debug 1496 printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr); 1497 fflush(stdout); 1498 1499 // *(char*)0=0; 1500 assert(0); 1501 } 1502 1503 /*************** 1504 * Free element (but not its leaves! (assume they are already freed)) 1505 * Don't decrement Ecount! This is so we can detect if the common subexp 1506 * has already been evaluated. 1507 * If common subexpression is not required anymore, eliminate 1508 * references to it. 1509 */ 1510 1511 @trusted 1512 void freenode(elem *e) 1513 { 1514 elem_debug(e); 1515 //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount); 1516 if (e.Ecomsub--) return; /* usage count */ 1517 if (e.Ecount) /* if it was a CSE */ 1518 { 1519 for (size_t i = 0; i < regcon.cse.value.length; i++) 1520 { 1521 if (regcon.cse.value[i] == e) /* if a register is holding it */ 1522 { 1523 regcon.cse.mval &= ~mask(cast(uint)i); 1524 regcon.cse.mops &= ~mask(cast(uint)i); /* free masks */ 1525 } 1526 } 1527 CSE.remove(e); 1528 } 1529 } 1530 1531 /********************************* 1532 * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode(). 1533 */ 1534 1535 @trusted 1536 private void resetEcomsub(elem *e) 1537 { 1538 while (1) 1539 { 1540 elem_debug(e); 1541 e.Ecomsub = e.Ecount; 1542 const op = e.Eoper; 1543 if (!OTleaf(op)) 1544 { 1545 if (OTbinary(op)) 1546 resetEcomsub(e.EV.E2); 1547 e = e.EV.E1; 1548 } 1549 else 1550 break; 1551 } 1552 } 1553 1554 /********************************* 1555 * Determine if elem e is a register variable. 1556 * Params: 1557 * e = a register variable 1558 * pregm = set to mask of registers that make up the variable otherwise not changed 1559 * reg = the least significant register in pregm, otherwise not changed 1560 * Returns: 1561 * true if register variable 1562 */ 1563 1564 @trusted 1565 bool isregvar(elem *e, ref regm_t pregm, ref reg_t preg) 1566 { 1567 regm_t regm; 1568 reg_t reg; 1569 1570 elem_debug(e); 1571 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 1572 { 1573 Symbol* s = e.EV.Vsym; 1574 switch (s.Sfl) 1575 { 1576 case FLreg: 1577 if (s.Sclass == SC.parameter) 1578 { refparam = true; 1579 reflocal = true; 1580 } 1581 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw; 1582 regm = s.Sregm; 1583 //assert(tyreg(s.ty())); 1584 static if (0) 1585 { 1586 // Let's just see if there is a CSE in a reg we can use 1587 // instead. This helps avoid AGI's. 1588 if (e.Ecount && e.Ecount != e.Ecomsub) 1589 { 1590 foreach (i; 0 .. arraysize(regcon.cse.value)) 1591 { 1592 if (regcon.cse.value[i] == e) 1593 { reg = i; 1594 break; 1595 } 1596 } 1597 } 1598 } 1599 assert(regm & regcon.mvar && !(regm & ~regcon.mvar)); 1600 preg = reg; 1601 pregm = regm; 1602 return true; 1603 1604 case FLpseudo: 1605 uint u = s.Sreglsw; 1606 regm_t m = mask(u); 1607 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H 1608 { 1609 preg = u & 7; 1610 pregm = m; 1611 return true; 1612 } 1613 break; 1614 1615 default: 1616 break; 1617 } 1618 } 1619 return false; 1620 } 1621 1622 /********************************* 1623 * Allocate some registers. 1624 * Input: 1625 * pretregs Pointer to mask of registers to make selection from. 1626 * tym Mask of type we will store in registers. 1627 * Output: 1628 * *pretregs Mask of allocated registers. 1629 * *preg Register number of first allocated register. 1630 * msavereg,mfuncreg retregs bits are cleared. 1631 * regcon.cse.mval,regcon.cse.mops updated 1632 * Returns: 1633 * pointer to code generated if necessary to save any regcon.cse.mops on the 1634 * stack. 1635 */ 1636 1637 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym) 1638 { 1639 allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__); 1640 } 1641 1642 @trusted 1643 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym 1644 ,int line,const(char)* file) 1645 { 1646 reg_t reg; 1647 1648 static if (0) 1649 { 1650 if (pass == BackendPass.final_) 1651 { 1652 printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym %s\n", 1653 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval), 1654 regm_str(msavereg),regm_str(*pretregs),tym_str(tym)); 1655 } 1656 } 1657 tym = tybasic(tym); 1658 uint size = _tysize[tym]; 1659 *pretregs &= mES | allregs | XMMREGS; 1660 regm_t retregs = *pretregs; 1661 1662 debug if (retregs == 0) 1663 printf("allocreg: file %s(%d)\n", file, line); 1664 1665 if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars 1666 { 1667 if (size <= REGSIZE || (retregs & XMMREGS)) 1668 { 1669 *preg = findreg(retregs); 1670 assert(retregs == mask(*preg)); /* no more bits are set */ 1671 } 1672 else if (size <= 2 * REGSIZE) 1673 { 1674 *preg = findregmsw(retregs); 1675 assert(retregs & mLSW); 1676 } 1677 else 1678 assert(0); 1679 getregs(cdb,retregs); 1680 return; 1681 } 1682 int count = 0; 1683 L1: 1684 //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 1685 assert(++count < 20); /* fail instead of hanging if blocked */ 1686 assert(retregs); 1687 reg_t msreg = NOREG, lsreg = NOREG; /* no value assigned yet */ 1688 L3: 1689 //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 1690 regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params); 1691 if (!r) 1692 { 1693 r = retregs & ~(msavereg | regcon.cse.mval); 1694 if (!r) 1695 { 1696 r = retregs & ~(msavereg | regcon.cse.mops); 1697 if (!r) 1698 { r = retregs & ~msavereg; 1699 if (!r) 1700 r = retregs; 1701 } 1702 } 1703 } 1704 1705 if (size <= REGSIZE || retregs & XMMREGS) 1706 { 1707 if (r & ~mBP) 1708 r &= ~mBP; 1709 1710 // If only one index register, prefer to not use LSW registers 1711 if (!regcon.indexregs && r & ~mLSW) 1712 r &= ~mLSW; 1713 1714 if (pass == BackendPass.final_ && r & ~lastretregs && !I16) 1715 { // Try not to always allocate the same register, 1716 // to schedule better 1717 1718 r &= ~lastretregs; 1719 if (r & ~last2retregs) 1720 { 1721 r &= ~last2retregs; 1722 if (r & ~last3retregs) 1723 { 1724 r &= ~last3retregs; 1725 if (r & ~last4retregs) 1726 { 1727 r &= ~last4retregs; 1728 // if (r & ~last5retregs) 1729 // r &= ~last5retregs; 1730 } 1731 } 1732 } 1733 if (r & ~mfuncreg) 1734 r &= ~mfuncreg; 1735 } 1736 reg = findreg(r); 1737 retregs = mask(reg); 1738 } 1739 else if (size <= 2 * REGSIZE) 1740 { 1741 /* Select pair with both regs free. Failing */ 1742 /* that, select pair with one reg free. */ 1743 1744 if (r & mBP) 1745 { 1746 retregs &= ~mBP; 1747 goto L3; 1748 } 1749 1750 if (r & mMSW) 1751 { 1752 if (r & mDX) 1753 msreg = DX; /* prefer to use DX over CX */ 1754 else 1755 msreg = findregmsw(r); 1756 r &= mLSW; /* see if there's an LSW also */ 1757 if (r) 1758 lsreg = findreg(r); 1759 else if (lsreg == NOREG) /* if don't have LSW yet */ 1760 { 1761 retregs &= mLSW; 1762 goto L3; 1763 } 1764 } 1765 else 1766 { 1767 if (I64 && !(r & mLSW)) 1768 { 1769 retregs = *pretregs & (mMSW | mLSW); 1770 assert(retregs); 1771 goto L1; 1772 } 1773 lsreg = findreglsw(r); 1774 if (msreg == NOREG) 1775 { 1776 retregs &= mMSW; 1777 assert(retregs); 1778 goto L3; 1779 } 1780 } 1781 reg = (msreg == ES) ? lsreg : msreg; 1782 retregs = mask(msreg) | mask(lsreg); 1783 } 1784 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 1785 { 1786 debug 1787 if (retregs != DOUBLEREGS) 1788 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs)); 1789 1790 assert(retregs == DOUBLEREGS); 1791 reg = AX; 1792 } 1793 else 1794 { 1795 debug 1796 { 1797 printf("%s\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n", 1798 tym_str(tym),file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym); 1799 } 1800 assert(0); 1801 } 1802 if (retregs & regcon.mvar) // if conflict with reg vars 1803 { 1804 if (!(size > REGSIZE && *pretregs == (mAX | mDX))) 1805 { 1806 retregs = (*pretregs &= ~(retregs & regcon.mvar)); 1807 goto L1; // try other registers 1808 } 1809 } 1810 *preg = reg; 1811 *pretregs = retregs; 1812 1813 //printf("Allocating %s\n",regm_str(retregs)); 1814 last5retregs = last4retregs; 1815 last4retregs = last3retregs; 1816 last3retregs = last2retregs; 1817 last2retregs = lastretregs; 1818 lastretregs = retregs; 1819 getregs(cdb, retregs); 1820 } 1821 1822 1823 /***************************************** 1824 * Allocate a scratch register. 1825 * Params: 1826 * cdb = where to write any generated code to 1827 * regm = mask of registers to pick one from 1828 * Returns: 1829 * selected register 1830 */ 1831 @trusted 1832 reg_t allocScratchReg(ref CodeBuilder cdb, regm_t regm) 1833 { 1834 reg_t r; 1835 allocreg(cdb, ®m, &r, TYoffset); 1836 return r; 1837 } 1838 1839 1840 /****************************** 1841 * Determine registers that should be destroyed upon arrival 1842 * to code entry point for exception handling. 1843 */ 1844 @trusted 1845 regm_t lpadregs() 1846 { 1847 regm_t used; 1848 if (config.ehmethod == EHmethod.EH_DWARF) 1849 used = allregs & ~mfuncreg; 1850 else 1851 used = (I32 | I64) ? allregs : (ALLREGS | mES); 1852 //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg)); 1853 return used; 1854 } 1855 1856 1857 /************************* 1858 * Mark registers as used. 1859 */ 1860 1861 @trusted 1862 void useregs(regm_t regm) 1863 { 1864 //printf("useregs(x%x) %s\n", regm, regm_str(regm)); 1865 mfuncreg &= ~regm; 1866 regcon.used |= regm; // registers used in this block 1867 regcon.params &= ~regm; 1868 if (regm & regcon.mpvar) // if modified a fastpar register variable 1869 regcon.params = 0; // toss them all out 1870 } 1871 1872 /************************* 1873 * We are going to use the registers in mask r. 1874 * Generate any code necessary to save any regs. 1875 */ 1876 1877 @trusted 1878 void getregs(ref CodeBuilder cdb, regm_t r) 1879 { 1880 //printf("getregs(x%x) %s\n", r, regm_str(r)); 1881 regm_t ms = r & regcon.cse.mops; // mask of common subs we must save 1882 useregs(r); 1883 regcon.cse.mval &= ~r; 1884 msavereg &= ~r; // regs that are destroyed 1885 regcon.immed.mval &= ~r; 1886 if (ms) 1887 cse_save(cdb, ms); 1888 } 1889 1890 /************************* 1891 * We are going to use the registers in mask r. 1892 * Same as getregs(), but assert if code is needed to be generated. 1893 */ 1894 @trusted 1895 void getregsNoSave(regm_t r) 1896 { 1897 //printf("getregsNoSave(x%x) %s\n", r, regm_str(r)); 1898 assert(!(r & regcon.cse.mops)); // mask of common subs we must save 1899 useregs(r); 1900 regcon.cse.mval &= ~r; 1901 msavereg &= ~r; // regs that are destroyed 1902 regcon.immed.mval &= ~r; 1903 } 1904 1905 /***************************************** 1906 * Copy registers in cse.mops into memory. 1907 */ 1908 1909 @trusted 1910 private void cse_save(ref CodeBuilder cdb, regm_t ms) 1911 { 1912 assert((ms & regcon.cse.mops) == ms); 1913 regcon.cse.mops &= ~ms; 1914 1915 /* Skip CSEs that are already saved */ 1916 for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1) 1917 { 1918 if (regm & ms) 1919 { 1920 const e = regcon.cse.value[findreg(regm)]; 1921 const sz = tysize(e.Ety); 1922 foreach (const ref cse; CSE.filter(e)) 1923 { 1924 if (sz <= REGSIZE || 1925 sz <= 2 * REGSIZE && 1926 (regm & mMSW && cse.regm & mMSW || 1927 regm & mLSW && cse.regm & mLSW) || 1928 sz == 4 * REGSIZE && regm == cse.regm 1929 ) 1930 { 1931 ms &= ~regm; 1932 if (!ms) 1933 return; 1934 break; 1935 } 1936 } 1937 } 1938 } 1939 1940 while (ms) 1941 { 1942 auto cse = CSE.add(); 1943 reg_t reg = findreg(ms); /* the register to save */ 1944 cse.e = regcon.cse.value[reg]; 1945 cse.regm = mask(reg); 1946 1947 ms &= ~mask(reg); /* turn off reg bit in ms */ 1948 1949 // If we can simply reload the CSE, we don't need to save it 1950 if (cse_simple(&cse.csimple, cse.e)) 1951 cse.flags |= CSEsimple; 1952 else 1953 { 1954 CSE.updateSizeAndAlign(cse.e); 1955 gen_storecse(cdb, cse.e.Ety, reg, cse.slot); 1956 reflocal = true; 1957 } 1958 } 1959 } 1960 1961 /****************************************** 1962 * Getregs without marking immediate register values as gone. 1963 */ 1964 1965 @trusted 1966 void getregs_imm(ref CodeBuilder cdb, regm_t r) 1967 { 1968 regm_t save = regcon.immed.mval; 1969 getregs(cdb,r); 1970 regcon.immed.mval = save; 1971 } 1972 1973 /****************************************** 1974 * Flush all CSE's out of registers and into memory. 1975 * Input: 1976 * do87 !=0 means save 87 registers too 1977 */ 1978 1979 @trusted 1980 void cse_flush(ref CodeBuilder cdb, int do87) 1981 { 1982 //dbg_printf("cse_flush()\n"); 1983 cse_save(cdb,regcon.cse.mops); // save any CSEs to memory 1984 if (do87) 1985 save87(cdb); // save any 8087 temporaries 1986 } 1987 1988 /************************* 1989 * Common subexpressions exist in registers. Note this in regcon.cse.mval. 1990 * Input: 1991 * e the subexpression 1992 * regm mask of registers holding it 1993 * opsflag if != 0 then regcon.cse.mops gets set too 1994 * Returns: 1995 * false not saved as a CSE 1996 * true saved as a CSE 1997 */ 1998 1999 @trusted 2000 bool cssave(elem *e,regm_t regm,uint opsflag) 2001 { 2002 bool result = false; 2003 2004 /*if (e.Ecount && e.Ecount == e.Ecomsub)*/ 2005 if (e.Ecount && e.Ecomsub) 2006 { 2007 if (!opsflag && pass != BackendPass.final_ && (I32 || I64)) 2008 return false; 2009 2010 //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag); 2011 regm &= mBP | ALLREGS | mES | XMMREGS; /* just to be sure */ 2012 2013 /+ 2014 /* Do not register CSEs if they are register variables and */ 2015 /* are not operator nodes. This forces the register allocation */ 2016 /* to go through allocreg(), which will prevent using register */ 2017 /* variables for scratch. */ 2018 if (opsflag || !(regm & regcon.mvar)) 2019 +/ 2020 for (uint i = 0; regm; i++) 2021 { 2022 regm_t mi = mask(i); 2023 if (regm & mi) 2024 { 2025 regm &= ~mi; 2026 2027 // If we don't need this CSE, and the register already 2028 // holds a CSE that we do need, don't mark the new one 2029 if (regcon.cse.mval & mi && regcon.cse.value[i] != e && 2030 !opsflag && regcon.cse.mops & mi) 2031 continue; 2032 2033 regcon.cse.mval |= mi; 2034 if (opsflag) 2035 regcon.cse.mops |= mi; 2036 //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e); 2037 regcon.cse.value[i] = e; 2038 result = true; 2039 } 2040 } 2041 } 2042 return result; 2043 } 2044 2045 /************************************* 2046 * Determine if a computation should be done into a register. 2047 */ 2048 2049 @trusted 2050 bool evalinregister(elem *e) 2051 { 2052 if (config.exe == EX_WIN64 && e.Eoper == OPrelconst) 2053 return true; 2054 2055 if (e.Ecount == 0) /* elem is not a CSE, therefore */ 2056 /* we don't need to evaluate it */ 2057 /* in a register */ 2058 return false; 2059 if (!OTleaf(e.Eoper)) /* operators are always in register */ 2060 return true; 2061 2062 // Need to rethink this code if float or double can be CSE'd 2063 uint sz = tysize(e.Ety); 2064 if (e.Ecount == e.Ecomsub) /* elem is a CSE that needs */ 2065 /* to be generated */ 2066 { 2067 if ((I32 || I64) && 2068 //pass == BackendPass.final_ && // bug 8987 2069 sz <= REGSIZE) 2070 { 2071 // Do it only if at least 2 registers are available 2072 regm_t m = allregs & ~regcon.mvar; 2073 if (sz == 1) 2074 m &= BYTEREGS; 2075 if (m & (m - 1)) // if more than one register 2076 { // Need to be at least 3 registers available, as 2077 // addressing modes can use up 2. 2078 while (!(m & 1)) 2079 m >>= 1; 2080 m >>= 1; 2081 if (m & (m - 1)) 2082 return true; 2083 } 2084 } 2085 return false; 2086 } 2087 2088 /* Elem is now a CSE that might have been generated. If so, and */ 2089 /* it's in a register already, the computation should be done */ 2090 /* using that register. */ 2091 regm_t emask = 0; 2092 for (uint i = 0; i < regcon.cse.value.length; i++) 2093 if (regcon.cse.value[i] == e) 2094 emask |= mask(i); 2095 emask &= regcon.cse.mval; // mask of available CSEs 2096 if (sz <= REGSIZE) 2097 return emask != 0; /* the CSE is in a register */ 2098 else if (sz <= 2 * REGSIZE) 2099 return (emask & mMSW) && (emask & mLSW); 2100 return true; /* cop-out for now */ 2101 } 2102 2103 /******************************************************* 2104 * Return mask of scratch registers. 2105 */ 2106 2107 @trusted 2108 regm_t getscratch() 2109 { 2110 regm_t scratch = 0; 2111 if (pass == BackendPass.final_) 2112 { 2113 scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval | 2114 regcon.immed.mval | regcon.params | mfuncreg); 2115 } 2116 return scratch; 2117 } 2118 2119 /****************************** 2120 * Evaluate an elem that is a common subexp that has been encountered 2121 * before. 2122 * Look first to see if it is already in a register. 2123 * Params: 2124 * cdb = sink for generated code 2125 * e = the elem 2126 * pretregs = input is mask of registers, output is result register 2127 */ 2128 2129 @trusted 2130 private void comsub(ref CodeBuilder cdb,elem *e, ref regm_t pretregs) 2131 { 2132 tym_t tym; 2133 regm_t regm,emask; 2134 reg_t reg; 2135 uint byte_,sz; 2136 2137 //printf("comsub(e = %p, pretregs = %s)\n",e,regm_str(pretregs)); 2138 elem_debug(e); 2139 2140 debug 2141 { 2142 if (e.Ecomsub > e.Ecount) 2143 elem_print(e); 2144 } 2145 2146 assert(e.Ecomsub <= e.Ecount); 2147 2148 if (pretregs == 0) // no possible side effects anyway 2149 { 2150 return; 2151 } 2152 2153 /* First construct a mask, emask, of all the registers that 2154 * have the right contents. 2155 */ 2156 emask = 0; 2157 for (uint i = 0; i < regcon.cse.value.length; i++) 2158 { 2159 //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]); 2160 if (regcon.cse.value[i] == e) // if contents are right 2161 emask |= mask(i); // turn on bit for reg 2162 } 2163 emask &= regcon.cse.mval; // make sure all bits are valid 2164 2165 if (emask & XMMREGS && pretregs == mPSW) 2166 { } 2167 else if (tyxmmreg(e.Ety) && config.fpxmmregs) 2168 { 2169 if (pretregs & (mST0 | mST01)) 2170 { 2171 regm_t retregs = pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1; 2172 comsub(cdb, e, retregs); 2173 fixresult(cdb,e,retregs,&pretregs); 2174 return; 2175 } 2176 } 2177 else if (tyfloating(e.Ety) && config.inline8087) 2178 { 2179 comsub87(cdb,e,&pretregs); 2180 return; 2181 } 2182 2183 2184 /* create mask of CSEs */ 2185 regm_t csemask = CSE.mask(e); 2186 csemask &= ~emask; // stuff already in registers 2187 2188 debug if (debugw) 2189 { 2190 printf("comsub(e=%p): pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n", 2191 e,regm_str(pretregs),regm_str(emask),regm_str(csemask), 2192 regm_str(regcon.cse.mval),regm_str(regcon.mvar)); 2193 if (regcon.cse.mval & 1) 2194 elem_print(regcon.cse.value[0]); 2195 } 2196 2197 tym = tybasic(e.Ety); 2198 sz = _tysize[tym]; 2199 byte_ = sz == 1; 2200 2201 if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register 2202 { 2203 /* First see if it is already in a correct register */ 2204 2205 regm = emask & pretregs; 2206 if (regm == 0) 2207 regm = emask; /* try any other register */ 2208 if (regm) /* if it's in a register */ 2209 { 2210 if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (pretregs & regcon.mvar) == pretregs) 2211 { 2212 regm = mask(findreg(regm)); 2213 fixresult(cdb,e,regm,&pretregs); 2214 return; 2215 } 2216 } 2217 2218 if (OTleaf(e.Eoper)) /* if not op or func */ 2219 goto reload; /* reload data */ 2220 2221 foreach (ref cse; CSE.filter(e)) 2222 { 2223 regm_t retregs; 2224 2225 if (cse.flags & CSEsimple) 2226 { 2227 retregs = pretregs; 2228 if (byte_ && !(retregs & BYTEREGS)) 2229 retregs = BYTEREGS; 2230 else if (!(retregs & allregs)) 2231 retregs = allregs; 2232 allocreg(cdb,&retregs,®,tym); 2233 code *cr = &cse.csimple; 2234 cr.setReg(reg); 2235 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1) 2236 cr.Irex |= REX; 2237 cdb.gen(cr); 2238 goto L10; 2239 } 2240 else 2241 { 2242 reflocal = true; 2243 cse.flags |= CSEload; 2244 if (pretregs == mPSW) // if result in CCs only 2245 { 2246 if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety))) 2247 { 2248 retregs = XMMREGS; 2249 allocreg(cdb,&retregs,®,tym); 2250 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2251 regcon.cse.mval |= mask(reg); // cs is in a reg 2252 regcon.cse.value[reg] = e; 2253 fixresult(cdb,e,retregs,&pretregs); 2254 } 2255 else 2256 { 2257 // CMP cs[BP],0 2258 gen_testcse(cdb, cse.e.Ety, sz, cse.slot); 2259 } 2260 } 2261 else 2262 { 2263 retregs = pretregs; 2264 if (byte_ && !(retregs & BYTEREGS)) 2265 retregs = BYTEREGS; 2266 allocreg(cdb,&retregs,®,tym); 2267 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2268 L10: 2269 regcon.cse.mval |= mask(reg); // cs is in a reg 2270 regcon.cse.value[reg] = e; 2271 fixresult(cdb,e,retregs,&pretregs); 2272 } 2273 } 2274 return; 2275 } 2276 2277 debug 2278 { 2279 printf("couldn't find cse e = %p, pass = %d\n",e,pass); 2280 elem_print(e); 2281 } 2282 assert(0); /* should have found it */ 2283 } 2284 else /* reg pair is req'd */ 2285 if (sz <= 2 * REGSIZE) 2286 { 2287 reg_t msreg,lsreg; 2288 2289 /* see if we have both */ 2290 if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP))) 2291 { /* we don't have both */ 2292 debug if (!OTleaf(e.Eoper)) 2293 { 2294 printf("e = %p, op = x%x, emask = %s, csemask = %s\n", 2295 e,e.Eoper,regm_str(emask),regm_str(csemask)); 2296 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW); 2297 elem_print(e); 2298 } 2299 2300 assert(OTleaf(e.Eoper)); /* must have both for operators */ 2301 goto reload; 2302 } 2303 2304 /* Look for right vals in any regs */ 2305 regm = pretregs & mMSW; 2306 if (emask & regm) 2307 msreg = findreg(emask & regm); 2308 else if (emask & mMSW) 2309 msreg = findregmsw(emask); 2310 else /* reload from cse array */ 2311 { 2312 if (!regm) 2313 regm = mMSW & ALLREGS; 2314 allocreg(cdb,®m,&msreg,TYint); 2315 loadcse(cdb,e,msreg,mMSW); 2316 } 2317 2318 regm = pretregs & (mLSW | mBP); 2319 if (emask & regm) 2320 lsreg = findreg(emask & regm); 2321 else if (emask & (mLSW | mBP)) 2322 lsreg = findreglsw(emask); 2323 else 2324 { 2325 if (!regm) 2326 regm = mLSW; 2327 allocreg(cdb,®m,&lsreg,TYint); 2328 loadcse(cdb,e,lsreg,mLSW | mBP); 2329 } 2330 2331 regm = mask(msreg) | mask(lsreg); /* mask of result */ 2332 fixresult(cdb,e,regm,&pretregs); 2333 return; 2334 } 2335 else if (tym == TYdouble || tym == TYdouble_alias) // double 2336 { 2337 assert(I16); 2338 if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16) 2339 { 2340 static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d 2341 for (reg = 0; reg != NOREG; reg = dblreg[reg]) 2342 { 2343 assert(cast(int) reg >= 0 && reg <= 7); 2344 if (mask(reg) & csemask) 2345 loadcse(cdb,e,reg,mask(reg)); 2346 } 2347 regm = DOUBLEREGS_16; 2348 fixresult(cdb,e,regm,&pretregs); 2349 return; 2350 } 2351 if (OTleaf(e.Eoper)) goto reload; 2352 2353 debug 2354 printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask)); 2355 2356 assert(0); 2357 } 2358 else 2359 { 2360 debug 2361 printf("e = %p, tym = x%x\n",e,tym); 2362 2363 assert(0); 2364 } 2365 2366 reload: /* reload result from memory */ 2367 switch (e.Eoper) 2368 { 2369 case OPrelconst: 2370 cdrelconst(cdb,e,&pretregs); 2371 break; 2372 2373 case OPgot: 2374 if (config.exe & EX_posix) 2375 { 2376 cdgot(cdb,e,&pretregs); 2377 break; 2378 } 2379 goto default; 2380 2381 default: 2382 if (pretregs == mPSW && 2383 config.fpxmmregs && 2384 (tyxmmreg(tym) || tysimd(tym))) 2385 { 2386 regm_t retregs = XMMREGS | mPSW; 2387 loaddata(cdb,e,&retregs); 2388 cssave(e,retregs,false); 2389 return; 2390 } 2391 loaddata(cdb,e,&pretregs); 2392 break; 2393 } 2394 cssave(e,pretregs,false); 2395 } 2396 2397 2398 /***************************** 2399 * Load reg from cse save area on stack. 2400 */ 2401 2402 @trusted 2403 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm) 2404 { 2405 foreach (ref cse; CSE.filter(e)) 2406 { 2407 //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm)); 2408 if (cse.regm & regm) 2409 { 2410 reflocal = true; 2411 cse.flags |= CSEload; /* it was loaded */ 2412 regcon.cse.value[reg] = e; 2413 regcon.cse.mval |= mask(reg); 2414 getregs(cdb,mask(reg)); 2415 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2416 return; 2417 } 2418 } 2419 debug 2420 { 2421 printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm)); 2422 elem_print(e); 2423 } 2424 assert(0); 2425 } 2426 2427 /*************************** 2428 * Generate code sequence for an elem. 2429 * Input: 2430 * pretregs = mask of possible registers to return result in 2431 * Note: longs are in AX,BX or CX,DX or SI,DI 2432 * doubles are AX,BX,CX,DX only 2433 * constflag = 1 for user of result will not modify the 2434 * registers returned in *pretregs. 2435 * 2 for freenode() not called. 2436 * Output: 2437 * *pretregs mask of registers result is returned in 2438 * Returns: 2439 * pointer to code sequence generated 2440 */ 2441 2442 @trusted 2443 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op) 2444 { 2445 (*cdxxx[op])(cdb,e,pretregs); 2446 } 2447 2448 // jump table 2449 private __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx = 2450 [ 2451 OPunde: &cderr, 2452 OPadd: &cdorth, 2453 OPmul: &cdmul, 2454 OPand: &cdorth, 2455 OPmin: &cdorth, 2456 OPnot: &cdnot, 2457 OPcom: &cdcom, 2458 OPcond: &cdcond, 2459 OPcomma: &cdcomma, 2460 OPremquo: &cddiv, 2461 OPdiv: &cddiv, 2462 OPmod: &cddiv, 2463 OPxor: &cdorth, 2464 OPstring: &cderr, 2465 OPrelconst: &cdrelconst, 2466 OPinp: &cdport, 2467 OPoutp: &cdport, 2468 OPasm: &cdasm, 2469 OPinfo: &cdinfo, 2470 OPdctor: &cddctor, 2471 OPddtor: &cdddtor, 2472 OPctor: &cdctor, 2473 OPdtor: &cddtor, 2474 OPmark: &cdmark, 2475 OPvoid: &cdvoid, 2476 OPhalt: &cdhalt, 2477 OPnullptr: &cderr, 2478 OPpair: &cdpair, 2479 OPrpair: &cdpair, 2480 2481 OPor: &cdorth, 2482 OPoror: &cdloglog, 2483 OPandand: &cdloglog, 2484 OProl: &cdshift, 2485 OPror: &cdshift, 2486 OPshl: &cdshift, 2487 OPshr: &cdshift, 2488 OPashr: &cdshift, 2489 OPbit: &cderr, 2490 OPind: &cdind, 2491 OPaddr: &cderr, 2492 OPneg: &cdneg, 2493 OPuadd: &cderr, 2494 OPabs: &cdabs, 2495 OPtoprec: &cdtoprec, 2496 OPsqrt: &cdneg, 2497 OPsin: &cdneg, 2498 OPcos: &cdneg, 2499 OPscale: &cdscale, 2500 OPyl2x: &cdscale, 2501 OPyl2xp1: &cdscale, 2502 OPcmpxchg: &cdcmpxchg, 2503 OPrint: &cdneg, 2504 OPrndtol: &cdrndtol, 2505 OPstrlen: &cdstrlen, 2506 OPstrcpy: &cdstrcpy, 2507 OPmemcpy: &cdmemcpy, 2508 OPmemset: &cdmemset, 2509 OPstrcat: &cderr, 2510 OPstrcmp: &cdstrcmp, 2511 OPmemcmp: &cdmemcmp, 2512 OPsetjmp: &cdsetjmp, 2513 OPnegass: &cdaddass, 2514 OPpreinc: &cderr, 2515 OPpredec: &cderr, 2516 OPstreq: &cdstreq, 2517 OPpostinc: &cdpost, 2518 OPpostdec: &cdpost, 2519 OPeq: &cdeq, 2520 OPaddass: &cdaddass, 2521 OPminass: &cdaddass, 2522 OPmulass: &cdmulass, 2523 OPdivass: &cddivass, 2524 OPmodass: &cddivass, 2525 OPshrass: &cdshass, 2526 OPashrass: &cdshass, 2527 OPshlass: &cdshass, 2528 OPandass: &cdaddass, 2529 OPxorass: &cdaddass, 2530 OPorass: &cdaddass, 2531 2532 OPle: &cdcmp, 2533 OPgt: &cdcmp, 2534 OPlt: &cdcmp, 2535 OPge: &cdcmp, 2536 OPeqeq: &cdcmp, 2537 OPne: &cdcmp, 2538 2539 OPunord: &cdcmp, 2540 OPlg: &cdcmp, 2541 OPleg: &cdcmp, 2542 OPule: &cdcmp, 2543 OPul: &cdcmp, 2544 OPuge: &cdcmp, 2545 OPug: &cdcmp, 2546 OPue: &cdcmp, 2547 OPngt: &cdcmp, 2548 OPnge: &cdcmp, 2549 OPnlt: &cdcmp, 2550 OPnle: &cdcmp, 2551 OPord: &cdcmp, 2552 OPnlg: &cdcmp, 2553 OPnleg: &cdcmp, 2554 OPnule: &cdcmp, 2555 OPnul: &cdcmp, 2556 OPnuge: &cdcmp, 2557 OPnug: &cdcmp, 2558 OPnue: &cdcmp, 2559 2560 OPvp_fp: &cdcnvt, 2561 OPcvp_fp: &cdcnvt, 2562 OPoffset: &cdlngsht, 2563 OPnp_fp: &cdshtlng, 2564 OPnp_f16p: &cdfar16, 2565 OPf16p_np: &cdfar16, 2566 2567 OPs16_32: &cdshtlng, 2568 OPu16_32: &cdshtlng, 2569 OPd_s32: &cdcnvt, 2570 OPb_8: &cdcnvt, 2571 OPs32_d: &cdcnvt, 2572 OPd_s16: &cdcnvt, 2573 OPs16_d: &cdcnvt, 2574 OPd_u16: &cdcnvt, 2575 OPu16_d: &cdcnvt, 2576 OPd_u32: &cdcnvt, 2577 OPu32_d: &cdcnvt, 2578 OP32_16: &cdlngsht, 2579 OPd_f: &cdcnvt, 2580 OPf_d: &cdcnvt, 2581 OPd_ld: &cdcnvt, 2582 OPld_d: &cdcnvt, 2583 OPc_r: &cdconvt87, 2584 OPc_i: &cdconvt87, 2585 OPu8_16: &cdbyteint, 2586 OPs8_16: &cdbyteint, 2587 OP16_8: &cdlngsht, 2588 OPu32_64: &cdshtlng, 2589 OPs32_64: &cdshtlng, 2590 OP64_32: &cdlngsht, 2591 OPu64_128: &cdshtlng, 2592 OPs64_128: &cdshtlng, 2593 OP128_64: &cdlngsht, 2594 OPmsw: &cdmsw, 2595 2596 OPd_s64: &cdcnvt, 2597 OPs64_d: &cdcnvt, 2598 OPd_u64: &cdcnvt, 2599 OPu64_d: &cdcnvt, 2600 OPld_u64: &cdcnvt, 2601 OPparam: &cderr, 2602 OPsizeof: &cderr, 2603 OParrow: &cderr, 2604 OParrowstar: &cderr, 2605 OPcolon: &cderr, 2606 OPcolon2: &cderr, 2607 OPbool: &cdnot, 2608 OPcall: &cdfunc, 2609 OPucall: &cdfunc, 2610 OPcallns: &cdfunc, 2611 OPucallns: &cdfunc, 2612 OPstrpar: &cderr, 2613 OPstrctor: &cderr, 2614 OPstrthis: &cdstrthis, 2615 OPconst: &cderr, 2616 OPvar: &cderr, 2617 OPnew: &cderr, 2618 OPanew: &cderr, 2619 OPdelete: &cderr, 2620 OPadelete: &cderr, 2621 OPbrack: &cderr, 2622 OPframeptr: &cdframeptr, 2623 OPgot: &cdgot, 2624 2625 OPbsf: &cdbscan, 2626 OPbsr: &cdbscan, 2627 OPbtst: &cdbtst, 2628 OPbt: &cdbt, 2629 OPbtc: &cdbt, 2630 OPbtr: &cdbt, 2631 OPbts: &cdbt, 2632 2633 OPbswap: &cdbswap, 2634 OPpopcnt: &cdpopcnt, 2635 OPvector: &cdvector, 2636 OPvecsto: &cdvecsto, 2637 OPvecfill: &cdvecfill, 2638 OPva_start: &cderr, 2639 OPprefetch: &cdprefetch, 2640 ]; 2641 2642 2643 @trusted 2644 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag) 2645 { 2646 Symbol *s; 2647 2648 debug if (debugw) 2649 { 2650 printf("+codelem(e=%p,*pretregs=%s) %s ",e,regm_str(*pretregs),oper_str(e.Eoper)); 2651 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 2652 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 2653 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 2654 } 2655 2656 assert(e); 2657 elem_debug(e); 2658 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 2659 { 2660 debug 2661 { 2662 printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs)); 2663 elem_print(e); 2664 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 2665 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 2666 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 2667 } 2668 assert(0); 2669 } 2670 2671 if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar) 2672 *pretregs &= ~regcon.mvar; /* can't use register vars */ 2673 2674 uint op = e.Eoper; 2675 if (e.Ecount && e.Ecount != e.Ecomsub) // if common subexp 2676 { 2677 comsub(cdb,e, *pretregs); 2678 goto L1; 2679 } 2680 2681 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 2682 cdb.genlinnum(e.Esrcpos); 2683 2684 switch (op) 2685 { 2686 default: 2687 if (e.Ecount) /* if common subexp */ 2688 { 2689 /* if no return value */ 2690 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0) 2691 { 2692 if (*pretregs & (mST0 | mST01)) 2693 { 2694 //printf("generate ST0 comsub for:\n"); 2695 //elem_print(e); 2696 2697 regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1; 2698 (*cdxxx[op])(cdb,e,&retregs); 2699 cssave(e,retregs,!OTleaf(op)); 2700 fixresult(cdb, e, retregs, pretregs); 2701 goto L1; 2702 } 2703 if (tysize(e.Ety) == 1) 2704 *pretregs |= BYTEREGS; 2705 else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs) 2706 *pretregs |= XMMREGS; 2707 else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias) 2708 *pretregs |= DOUBLEREGS; 2709 else 2710 *pretregs |= ALLREGS; /* make one */ 2711 } 2712 2713 /* BUG: For CSEs, make sure we have both an MSW */ 2714 /* and an LSW specified in *pretregs */ 2715 } 2716 assert(op <= OPMAX); 2717 (*cdxxx[op])(cdb,e,pretregs); 2718 break; 2719 2720 case OPrelconst: 2721 cdrelconst(cdb,e,pretregs); 2722 break; 2723 2724 case OPvar: 2725 if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg && 2726 (s.Sregm & *pretregs) == s.Sregm) 2727 { 2728 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE) 2729 *pretregs &= mPSW | (s.Sregm & mLSW); 2730 else 2731 *pretregs &= mPSW | s.Sregm; 2732 } 2733 goto case OPconst; 2734 2735 case OPconst: 2736 if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile)) 2737 { 2738 switch (tybasic(e.Ety)) 2739 { 2740 case TYbool: 2741 case TYchar: 2742 case TYschar: 2743 case TYuchar: 2744 *pretregs |= BYTEREGS; 2745 break; 2746 2747 case TYnref: 2748 case TYnptr: 2749 case TYsptr: 2750 case TYcptr: 2751 case TYfgPtr: 2752 case TYimmutPtr: 2753 case TYsharePtr: 2754 case TYrestrictPtr: 2755 *pretregs |= I16 ? IDXREGS : ALLREGS; 2756 break; 2757 2758 case TYshort: 2759 case TYushort: 2760 case TYint: 2761 case TYuint: 2762 case TYlong: 2763 case TYulong: 2764 case TYllong: 2765 case TYullong: 2766 case TYcent: 2767 case TYucent: 2768 case TYfptr: 2769 case TYhptr: 2770 case TYvptr: 2771 *pretregs |= ALLREGS; 2772 break; 2773 2774 default: 2775 break; 2776 } 2777 } 2778 loaddata(cdb,e,pretregs); 2779 break; 2780 } 2781 cssave(e,*pretregs,!OTleaf(op)); 2782 L1: 2783 if (!(constflag & 2)) 2784 freenode(e); 2785 2786 debug if (debugw) 2787 { 2788 printf("-codelem(e=%p,*pretregs=%s) %s ",e,regm_str(*pretregs), oper_str(op)); 2789 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 2790 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 2791 } 2792 } 2793 2794 /******************************* 2795 * Same as codelem(), but do not destroy the registers in keepmsk. 2796 * Use scratch registers as much as possible, then use stack. 2797 * Input: 2798 * constflag true if user of result will not modify the 2799 * registers returned in *pretregs. 2800 */ 2801 2802 @trusted 2803 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag) 2804 { 2805 regm_t touse; 2806 2807 debug if (debugw) 2808 printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 2809 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 2810 2811 elem_debug(e); 2812 if (constflag) 2813 { 2814 regm_t regm; 2815 reg_t reg; 2816 2817 if (isregvar(e, regm, reg) && // if e is a register variable 2818 (regm & *pretregs) == regm && // in one of the right regs 2819 e.EV.Voffset == 0 2820 ) 2821 { 2822 uint sz1 = tysize(e.Ety); 2823 uint sz2 = tysize(e.EV.Vsym.Stype.Tty); 2824 if (sz1 <= REGSIZE && sz2 > REGSIZE) 2825 regm &= mLSW | XMMREGS; 2826 fixresult(cdb,e,regm,pretregs); 2827 cssave(e,regm,0); 2828 freenode(e); 2829 2830 debug if (debugw) 2831 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 2832 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 2833 2834 return; 2835 } 2836 } 2837 regm_t overlap = msavereg & keepmsk; 2838 msavereg |= keepmsk; /* add to mask of regs to save */ 2839 regm_t oldregcon = regcon.cse.mval; 2840 regm_t oldregimmed = regcon.immed.mval; 2841 regm_t oldmfuncreg = mfuncreg; /* remember old one */ 2842 mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar; 2843 uint stackpushsave = stackpush; 2844 char calledafuncsave = calledafunc; 2845 calledafunc = 0; 2846 CodeBuilder cdbx; cdbx.ctor(); 2847 codelem(cdbx,e,pretregs,constflag); // generate code for the elem 2848 2849 regm_t tosave = keepmsk & ~msavereg; /* registers to save */ 2850 if (tosave) 2851 { 2852 cgstate.stackclean++; 2853 genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg); 2854 cgstate.stackclean--; 2855 } 2856 2857 /* Assert that no new CSEs are generated that are not reflected */ 2858 /* in mfuncreg. */ 2859 debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0) 2860 printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n", 2861 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar)); 2862 2863 assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0); 2864 2865 /* https://issues.dlang.org/show_bug.cgi?id=3521 2866 * The problem is: 2867 * reg op (reg = exp) 2868 * where reg must be preserved (in keepregs) while the expression to be evaluated 2869 * must change it. 2870 * The only solution is to make this variable not a register. 2871 */ 2872 if (regcon.mvar & tosave) 2873 { 2874 //elem_print(e); 2875 //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave)); 2876 cgreg_unregister(regcon.mvar & tosave); 2877 } 2878 2879 /* which registers can we use to save other registers in? */ 2880 if (config.flags4 & CFG4space || // if optimize for space 2881 config.target_cpu >= TARGET_80486) // PUSH/POP ops are 1 cycle 2882 touse = 0; // PUSH/POP pairs are always shorter 2883 else 2884 { 2885 touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval); 2886 /* Don't use registers we'll have to save/restore */ 2887 touse &= ~(fregsaved & oldmfuncreg); 2888 /* Don't use registers that have constant values in them, since 2889 the code generated might have used the value. 2890 */ 2891 touse &= ~oldregimmed; 2892 } 2893 2894 CodeBuilder cdbs1; cdbs1.ctor(); 2895 code *cs2 = null; 2896 int adjesp = 0; 2897 2898 for (uint i = 0; tosave; i++) 2899 { 2900 regm_t mi = mask(i); 2901 2902 assert(i < REGMAX); 2903 if (mi & tosave) /* i = register to save */ 2904 { 2905 if (touse) /* if any scratch registers */ 2906 { 2907 uint j; 2908 for (j = 0; j < 8; j++) 2909 { 2910 regm_t mj = mask(j); 2911 2912 if (touse & mj) 2913 { 2914 genmovreg(cdbs1,j,i); 2915 cs2 = cat(genmovreg(i,j),cs2); 2916 touse &= ~mj; 2917 mfuncreg &= ~mj; 2918 regcon.used |= mj; 2919 break; 2920 } 2921 } 2922 assert(j < 8); 2923 } 2924 else // else use memory 2925 { 2926 CodeBuilder cdby; cdby.ctor(); 2927 uint size = gensaverestore(mask(i), cdbs1, cdby); 2928 cs2 = cat(cdby.finish(),cs2); 2929 if (size) 2930 { 2931 stackchanged = 1; 2932 adjesp += size; 2933 } 2934 } 2935 getregs(cdbx,mi); 2936 tosave &= ~mi; 2937 } 2938 } 2939 CodeBuilder cdbs2; cdbs2.ctor(); 2940 if (adjesp) 2941 { 2942 // If this is done an odd number of times, it 2943 // will throw off the 8 byte stack alignment. 2944 // We should *only* worry about this if a function 2945 // was called in the code generation by codelem(). 2946 int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1); 2947 if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 2948 { 2949 regm_t mval_save = regcon.immed.mval; 2950 regcon.immed.mval = 0; // prevent reghasvalue() optimizations 2951 // because c hasn't been executed yet 2952 cod3_stackadj(cdbs1, sz); 2953 regcon.immed.mval = mval_save; 2954 cdbs1.genadjesp(sz); 2955 2956 cod3_stackadj(cdbs2, -sz); 2957 cdbs2.genadjesp(-sz); 2958 } 2959 cdbs2.append(cs2); 2960 2961 2962 cdbs1.genadjesp(adjesp); 2963 cdbs2.genadjesp(-adjesp); 2964 } 2965 else 2966 cdbs2.append(cs2); 2967 2968 calledafunc |= calledafuncsave; 2969 msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save */ 2970 mfuncreg &= oldmfuncreg; /* update original */ 2971 2972 debug if (debugw) 2973 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 2974 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 2975 2976 cdb.append(cdbs1); 2977 cdb.append(cdbx); 2978 cdb.append(cdbs2); 2979 return; 2980 } 2981 2982 /********************************************* 2983 * Turn register mask into a string suitable for printing. 2984 */ 2985 2986 @trusted 2987 const(char)* regm_str(regm_t rm) 2988 { 2989 enum NUM = 10; 2990 enum SMAX = 128; 2991 __gshared char[SMAX + 1][NUM] str; 2992 __gshared int i; 2993 2994 if (rm == 0) 2995 return "0"; 2996 if (rm == ALLREGS) 2997 return "ALLREGS"; 2998 if (rm == BYTEREGS) 2999 return "BYTEREGS"; 3000 if (rm == allregs) 3001 return "allregs"; 3002 if (rm == XMMREGS) 3003 return "XMMREGS"; 3004 char *p = str[i].ptr; 3005 if (++i == NUM) 3006 i = 0; 3007 *p = 0; 3008 for (size_t j = 0; j < 32; j++) 3009 { 3010 if (mask(cast(uint)j) & rm) 3011 { 3012 strcat(p,regstring[j]); 3013 rm &= ~mask(cast(uint)j); 3014 if (rm) 3015 strcat(p,"|"); 3016 } 3017 } 3018 if (rm) 3019 { 3020 const pstrlen = strlen(p); 3021 char *s = p + pstrlen; 3022 snprintf(s, SMAX - pstrlen, "x%02x",rm); 3023 } 3024 assert(strlen(p) <= SMAX); 3025 return strdup(p); 3026 } 3027 3028 /********************************* 3029 * Scan down comma-expressions. 3030 * Output: 3031 * pe = first elem down right side that is not an OPcomma 3032 * Returns: 3033 * code generated for left branches of comma-expressions 3034 */ 3035 3036 @trusted 3037 void docommas(ref CodeBuilder cdb, ref elem *pe) 3038 { 3039 uint stackpushsave = stackpush; 3040 int stackcleansave = cgstate.stackclean; 3041 cgstate.stackclean = 0; 3042 elem* e = pe; 3043 while (1) 3044 { 3045 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 3046 { 3047 cdb.genlinnum(e.Esrcpos); 3048 //e.Esrcpos.Slinnum = 0; // don't do it twice 3049 } 3050 if (e.Eoper != OPcomma) 3051 break; 3052 regm_t retregs = 0; 3053 codelem(cdb,e.EV.E1,&retregs,true); 3054 elem* eold = e; 3055 e = e.EV.E2; 3056 freenode(eold); 3057 } 3058 pe = e; 3059 assert(cgstate.stackclean == 0); 3060 cgstate.stackclean = stackcleansave; 3061 genstackclean(cdb,stackpush - stackpushsave,0); 3062 } 3063 3064 /************************** 3065 * For elems in regcon that don't match regconsave, 3066 * clear the corresponding bit in regcon.cse.mval. 3067 * Do same for regcon.immed. 3068 */ 3069 3070 @trusted 3071 void andregcon(ref con_t pregconsave) 3072 { 3073 regm_t m = ~1; 3074 foreach (i; 0 ..REGMAX) 3075 { 3076 if (pregconsave.cse.value[i] != regcon.cse.value[i]) 3077 regcon.cse.mval &= m; 3078 if (pregconsave.immed.value[i] != regcon.immed.value[i]) 3079 regcon.immed.mval &= m; 3080 m <<= 1; 3081 m |= 1; 3082 } 3083 //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval)); 3084 regcon.used |= pregconsave.used; 3085 regcon.cse.mval &= pregconsave.cse.mval; 3086 regcon.immed.mval &= pregconsave.immed.mval; 3087 regcon.params &= pregconsave.params; 3088 //printf("regcon.cse.mval®con.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops)); 3089 regcon.cse.mops &= regcon.cse.mval; 3090 } 3091 3092 3093 /********************************************** 3094 * Disassemble the code instruction bytes 3095 * Params: 3096 * code = array of instruction bytes 3097 */ 3098 @trusted 3099 private extern (D) 3100 void disassemble(ubyte[] code) 3101 { 3102 printf("%s:\n", funcsym_p.Sident.ptr); 3103 const model = I16 ? 16 : I32 ? 32 : 64; // 16/32/64 3104 size_t i = 0; 3105 while (i < code.length) 3106 { 3107 printf("%04x:", cast(int)i); 3108 uint pc; 3109 const sz = dmd.backend.disasm86.calccodsize(code, cast(uint)i, pc, model); 3110 3111 void put(char c) { printf("%c", c); } 3112 3113 dmd.backend.disasm86.getopstring(&put, code, cast(uint)i, sz, model, model == 16, true, 3114 null, null, null, null); 3115 printf("\n"); 3116 i += sz; 3117 } 3118 }