1 /** 2 * Code generation 1 3 * 4 * Handles function calls: putting arguments in registers / on the stack, and jumping to the function. 5 * 6 * Compiler implementation of the 7 * $(LINK2 https://www.dlang.org, D programming language). 8 * 9 * Copyright: Copyright (C) 1984-1998 by Symantec 10 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 11 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 12 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 13 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod1.d, backend/cod1.d) 14 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod1.d 15 */ 16 17 module dmd.backend.cod1; 18 19 version (SCPP) 20 version = COMPILE; 21 version (MARS) 22 version = COMPILE; 23 24 version (COMPILE) 25 { 26 27 import core.bitop; 28 import core.stdc.stdio; 29 import core.stdc.stdlib; 30 import core.stdc.string; 31 32 import dmd.backend.backend; 33 import dmd.backend.cc; 34 import dmd.backend.cdef; 35 import dmd.backend.code; 36 import dmd.backend.code_x86; 37 import dmd.backend.codebuilder; 38 import dmd.backend.mem; 39 import dmd.backend.el; 40 import dmd.backend.exh; 41 import dmd.backend.global; 42 import dmd.backend.obj; 43 import dmd.backend.oper; 44 import dmd.backend.rtlsym; 45 import dmd.backend.ty; 46 import dmd.backend.type; 47 import dmd.backend.xmm; 48 49 extern (C++): 50 51 nothrow: 52 @safe: 53 54 extern __gshared CGstate cgstate; 55 extern __gshared ubyte[FLMAX] segfl; 56 extern __gshared bool[FLMAX] stackfl; 57 58 private extern (D) uint mask(uint m) { return 1 << m; } 59 60 private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 61 62 /* array to convert from index register to r/m field */ 63 /* AX CX DX BX SP BP SI DI */ 64 private __gshared const byte[8] regtorm32 = [ 0, 1, 2, 3,-1, 5, 6, 7 ]; 65 __gshared const byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; 66 67 //void funccall(ref CodeBuilder cdb,elem *e,uint numpara,uint numalign, 68 // regm_t *pretregs,regm_t keepmsk, bool usefuncarg); 69 70 /********************************* 71 * Determine if we should leave parameter `s` in the register it 72 * came in, or allocate a register it using the register 73 * allocator. 74 * Params: 75 * s = parameter Symbol 76 * Returns: 77 * `true` if `s` is a register parameter and leave it in the register it came in 78 */ 79 @trusted 80 bool regParamInPreg(Symbol* s) 81 { 82 //printf("regPAramInPreg %s\n", s.Sident.ptr); 83 return (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) && 84 (!(config.flags4 & CFG4optimized) || !(s.Sflags & GTregcand)); 85 } 86 87 88 /************************** 89 * Determine if e is a 32 bit scaled index addressing mode. 90 * Returns: 91 * 0 not a scaled index addressing mode 92 * !=0 the value for ss in the SIB byte 93 */ 94 95 @trusted 96 int isscaledindex(elem *e) 97 { 98 targ_uns ss; 99 100 assert(!I16); 101 while (e.Eoper == OPcomma) 102 e = e.EV.E2; 103 if (!(e.Eoper == OPshl && !e.Ecount && 104 e.EV.E2.Eoper == OPconst && 105 (ss = e.EV.E2.EV.Vuns) <= 3 106 ) 107 ) 108 ss = 0; 109 return ss; 110 } 111 112 /********************************************* 113 * Generate code for which isscaledindex(e) returned a non-zero result. 114 */ 115 116 @trusted 117 /*private*/ void cdisscaledindex(ref CodeBuilder cdb,elem *e,regm_t *pidxregs,regm_t keepmsk) 118 { 119 // Load index register with result of e.EV.E1 120 while (e.Eoper == OPcomma) 121 { 122 regm_t r = 0; 123 scodelem(cdb, e.EV.E1, &r, keepmsk, true); 124 freenode(e); 125 e = e.EV.E2; 126 } 127 assert(e.Eoper == OPshl); 128 scodelem(cdb, e.EV.E1, pidxregs, keepmsk, true); 129 freenode(e.EV.E2); 130 freenode(e); 131 } 132 133 /*********************************** 134 * Determine index if we can do two LEA instructions as a multiply. 135 * Returns: 136 * 0 can't do it 137 */ 138 139 enum 140 { 141 SSFLnobp = 1, /// can't have EBP in relconst 142 SSFLnobase1 = 2, /// no base register for first LEA 143 SSFLnobase = 4, /// no base register 144 SSFLlea = 8, /// can do it in one LEA 145 } 146 147 struct Ssindex 148 { 149 targ_uns product; 150 ubyte ss1; 151 ubyte ss2; 152 ubyte ssflags; /// SSFLxxxx 153 } 154 155 private __gshared const Ssindex[21] ssindex_array = 156 [ 157 { 0, 0, 0 }, // [0] is a place holder 158 159 { 3, 1, 0, SSFLnobp | SSFLlea }, 160 { 5, 2, 0, SSFLnobp | SSFLlea }, 161 { 9, 3, 0, SSFLnobp | SSFLlea }, 162 163 { 6, 1, 1, SSFLnobase }, 164 { 12, 1, 2, SSFLnobase }, 165 { 24, 1, 3, SSFLnobase }, 166 { 10, 2, 1, SSFLnobase }, 167 { 20, 2, 2, SSFLnobase }, 168 { 40, 2, 3, SSFLnobase }, 169 { 18, 3, 1, SSFLnobase }, 170 { 36, 3, 2, SSFLnobase }, 171 { 72, 3, 3, SSFLnobase }, 172 173 { 15, 2, 1, SSFLnobp }, 174 { 25, 2, 2, SSFLnobp }, 175 { 27, 3, 1, SSFLnobp }, 176 { 45, 3, 2, SSFLnobp }, 177 { 81, 3, 3, SSFLnobp }, 178 179 { 16, 3, 1, SSFLnobase1 | SSFLnobase }, 180 { 32, 3, 2, SSFLnobase1 | SSFLnobase }, 181 { 64, 3, 3, SSFLnobase1 | SSFLnobase }, 182 ]; 183 184 int ssindex(OPER op,targ_uns product) 185 { 186 if (op == OPshl) 187 product = 1 << product; 188 for (size_t i = 1; i < ssindex_array.length; i++) 189 { 190 if (ssindex_array[i].product == product) 191 return cast(int)i; 192 } 193 return 0; 194 } 195 196 /*************************************** 197 * Build an EA of the form disp[base][index*scale]. 198 * Input: 199 * c struct to fill in 200 * base base register (-1 if none) 201 * index index register (-1 if none) 202 * scale scale factor - 1,2,4,8 203 * disp displacement 204 */ 205 206 void buildEA(code *c,int base,int index,int scale,targ_size_t disp) 207 { 208 ubyte rm; 209 ubyte sib; 210 ubyte rex = 0; 211 212 sib = 0; 213 if (!I16) 214 { uint ss; 215 216 assert(index != SP); 217 218 switch (scale) 219 { case 1: ss = 0; break; 220 case 2: ss = 1; break; 221 case 4: ss = 2; break; 222 case 8: ss = 3; break; 223 default: assert(0); 224 } 225 226 if (base == -1) 227 { 228 if (index == -1) 229 rm = modregrm(0,0,5); 230 else 231 { 232 rm = modregrm(0,0,4); 233 sib = modregrm(ss,index & 7,5); 234 if (index & 8) 235 rex |= REX_X; 236 } 237 } 238 else if (index == -1) 239 { 240 if (base == SP) 241 { 242 rm = modregrm(2, 0, 4); 243 sib = modregrm(0, 4, SP); 244 } 245 else 246 { rm = modregrm(2, 0, base & 7); 247 if (base & 8) 248 { rex |= REX_B; 249 if (base == R12) 250 { 251 rm = modregrm(2, 0, 4); 252 sib = modregrm(0, 4, 4); 253 } 254 } 255 } 256 } 257 else 258 { 259 rm = modregrm(2, 0, 4); 260 sib = modregrm(ss,index & 7,base & 7); 261 if (index & 8) 262 rex |= REX_X; 263 if (base & 8) 264 rex |= REX_B; 265 } 266 } 267 else 268 { 269 // -1 AX CX DX BX SP BP SI DI 270 static immutable ubyte[9][9] EA16rm = 271 [ 272 [ 0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85, ], // -1 273 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // AX 274 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // CX 275 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // DX 276 [ 0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81, ], // BX 277 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // SP 278 [ 0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83, ], // BP 279 [ 0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09, ], // SI 280 [ 0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09, ] // DI 281 ]; 282 283 assert(scale == 1); 284 rm = EA16rm[base + 1][index + 1]; 285 assert(rm != 9); 286 } 287 c.Irm = rm; 288 c.Isib = sib; 289 c.Irex = rex; 290 c.IFL1 = FLconst; 291 c.IEV1.Vuns = cast(targ_uns)disp; 292 } 293 294 /********************************************* 295 * Build REX, modregrm and sib bytes 296 */ 297 298 uint buildModregrm(int mod, int reg, int rm) 299 { 300 uint m; 301 if (I16) 302 m = modregrm(mod, reg, rm); 303 else 304 { 305 if ((rm & 7) == SP && mod != 3) 306 m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4); 307 else 308 m = modregrm(mod,reg & 7,rm & 7); 309 if (reg & 8) 310 m |= REX_R << 16; 311 if (rm & 8) 312 m |= REX_B << 16; 313 } 314 return m; 315 } 316 317 /**************************************** 318 * Generate code for eecontext 319 */ 320 321 @trusted 322 void genEEcode() 323 { 324 CodeBuilder cdb; 325 cdb.ctor(); 326 327 eecontext.EEin++; 328 regcon.immed.mval = 0; 329 regm_t retregs = 0; //regmask(eecontext.EEelem.Ety); 330 assert(EEStack.offset >= REGSIZE); 331 cod3_stackadj(cdb, cast(int)(EEStack.offset - REGSIZE)); 332 cdb.gen1(0x50 + SI); // PUSH ESI 333 cdb.genadjesp(cast(int)EEStack.offset); 334 gencodelem(cdb, eecontext.EEelem, &retregs, false); 335 code *c = cdb.finish(); 336 assignaddrc(c); 337 pinholeopt(c,null); 338 jmpaddr(c); 339 eecontext.EEcode = gen1(c, 0xCC); // INT 3 340 eecontext.EEin--; 341 } 342 343 344 /******************************************** 345 * Gen a save/restore sequence for mask of registers. 346 * Params: 347 * regm = mask of registers to save 348 * cdbsave = save code appended here 349 * cdbrestore = restore code appended here 350 * Returns: 351 * amount of stack consumed 352 */ 353 @trusted 354 uint gensaverestore(regm_t regm,ref CodeBuilder cdbsave,ref CodeBuilder cdbrestore) 355 { 356 //printf("gensaverestore2(%s)\n", regm_str(regm)); 357 regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01; 358 if (!regm) 359 return 0; 360 361 uint stackused = 0; 362 363 code *[regm.sizeof * 8] restore; 364 365 reg_t i; 366 for (i = 0; regm; i++) 367 { 368 if (regm & 1) 369 { 370 code *cs2; 371 if (i == ES && I16) 372 { 373 stackused += REGSIZE; 374 cdbsave.gen1(0x06); // PUSH ES 375 cs2 = gen1(null, 0x07); // POP ES 376 } 377 else if (i == ST0 || i == ST01) 378 { 379 CodeBuilder cdb; 380 cdb.ctor(); 381 gensaverestore87(1 << i, cdbsave, cdb); 382 cs2 = cdb.finish(); 383 } 384 else if (i >= XMM0 || I64 || cgstate.funcarg.size) 385 { uint idx; 386 regsave.save(cdbsave, i, &idx); 387 CodeBuilder cdb; 388 cdb.ctor(); 389 regsave.restore(cdb, i, idx); 390 cs2 = cdb.finish(); 391 } 392 else 393 { 394 stackused += REGSIZE; 395 cdbsave.gen1(0x50 + (i & 7)); // PUSH i 396 cs2 = gen1(null, 0x58 + (i & 7)); // POP i 397 if (i & 8) 398 { code_orrex(cdbsave.last(), REX_B); 399 code_orrex(cs2, REX_B); 400 } 401 } 402 restore[i] = cs2; 403 } 404 else 405 restore[i] = null; 406 regm >>= 1; 407 } 408 409 while (i) 410 { 411 code *c = restore[--i]; 412 if (c) 413 { 414 cdbrestore.append(c); 415 } 416 } 417 418 return stackused; 419 } 420 421 422 /**************************************** 423 * Clean parameters off stack. 424 * Input: 425 * numpara amount to adjust stack pointer 426 * keepmsk mask of registers to not destroy 427 */ 428 429 @trusted 430 void genstackclean(ref CodeBuilder cdb,uint numpara,regm_t keepmsk) 431 { 432 //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean); 433 if (numpara && (cgstate.stackclean || STACKALIGN >= 16)) 434 { 435 /+ 436 if (0 && // won't work if operand of scodelem 437 numpara == stackpush && // if this is all those pushed 438 needframe && // and there will be a BP 439 !config.windows && 440 !(regcon.mvar & fregsaved) // and no registers will be pushed 441 ) 442 genregs(cdb,0x89,BP,SP); // MOV SP,BP 443 else 444 +/ 445 { 446 regm_t scratchm = 0; 447 448 if (numpara == REGSIZE && config.flags4 & CFG4space) 449 { 450 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar; 451 } 452 453 if (scratchm) 454 { 455 reg_t r; 456 allocreg(cdb, &scratchm, &r, TYint); 457 cdb.gen1(0x58 + r); // POP r 458 } 459 else 460 cod3_stackadj(cdb, -numpara); 461 } 462 stackpush -= numpara; 463 cdb.genadjesp(-numpara); 464 } 465 } 466 467 /********************************* 468 * Generate code for a logical expression. 469 * Input: 470 * e elem 471 * jcond 472 * bit 1 if true then goto jump address if e 473 * if false then goto jump address if !e 474 * 2 don't call save87() 475 * fltarg FLcode or FLblock, flavor of target if e evaluates to jcond 476 * targ either code or block pointer to destination 477 */ 478 479 @trusted 480 void logexp(ref CodeBuilder cdb, elem *e, int jcond, uint fltarg, code *targ) 481 { 482 //printf("logexp(e = %p, jcond = %d)\n", e, jcond); elem_print(e); 483 if (tybasic(e.Ety) == TYnoreturn) 484 { 485 con_t regconsave = regcon; 486 regm_t retregs = 0; 487 codelem(cdb,e,&retregs,0); 488 regconsave.used |= regcon.used; 489 regcon = regconsave; 490 return; 491 } 492 493 int no87 = (jcond & 2) == 0; 494 docommas(cdb, &e); // scan down commas 495 cgstate.stackclean++; 496 497 code* c, ce; 498 if (!OTleaf(e.Eoper) && !e.Ecount) // if operator and not common sub 499 { 500 switch (e.Eoper) 501 { 502 case OPoror: 503 { 504 con_t regconsave; 505 if (jcond & 1) 506 { 507 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 508 regconsave = regcon; 509 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 510 } 511 else 512 { 513 code *cnop = gennop(null); 514 logexp(cdb, e.EV.E1, jcond | 1, FLcode, cnop); 515 regconsave = regcon; 516 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 517 cdb.append(cnop); 518 } 519 andregcon(®consave); 520 freenode(e); 521 cgstate.stackclean--; 522 return; 523 } 524 525 case OPandand: 526 { 527 con_t regconsave; 528 if (jcond & 1) 529 { 530 code *cnop = gennop(null); // a dummy target address 531 logexp(cdb, e.EV.E1, jcond & ~1, FLcode, cnop); 532 regconsave = regcon; 533 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 534 cdb.append(cnop); 535 } 536 else 537 { 538 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 539 regconsave = regcon; 540 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 541 } 542 andregcon(®consave); 543 freenode(e); 544 cgstate.stackclean--; 545 return; 546 } 547 548 case OPnot: 549 jcond ^= 1; 550 goto case OPbool; 551 552 case OPbool: 553 case OPs8_16: 554 case OPu8_16: 555 case OPs16_32: 556 case OPu16_32: 557 case OPs32_64: 558 case OPu32_64: 559 case OPu32_d: 560 case OPd_ld: 561 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 562 freenode(e); 563 cgstate.stackclean--; 564 return; 565 566 case OPcond: 567 { 568 code *cnop2 = gennop(null); // addresses of start of leaves 569 code *cnop = gennop(null); 570 logexp(cdb, e.EV.E1, false, FLcode, cnop2); // eval condition 571 con_t regconold = regcon; 572 logexp(cdb, e.EV.E2.EV.E1, jcond, fltarg, targ); 573 genjmp(cdb, JMP, FLcode, cast(block *) cnop); // skip second leaf 574 575 con_t regconsave = regcon; 576 regcon = regconold; 577 578 cdb.append(cnop2); 579 logexp(cdb, e.EV.E2.EV.E2, jcond, fltarg, targ); 580 andregcon(®conold); 581 andregcon(®consave); 582 freenode(e.EV.E2); 583 freenode(e); 584 cdb.append(cnop); 585 cgstate.stackclean--; 586 return; 587 } 588 589 default: 590 break; 591 } 592 } 593 594 /* Special code for signed long compare. 595 * Not necessary for I64 until we do cents. 596 */ 597 if (OTrel2(e.Eoper) && // if < <= >= > 598 !e.Ecount && 599 ( (I16 && tybasic(e.EV.E1.Ety) == TYlong && tybasic(e.EV.E2.Ety) == TYlong) || 600 (I32 && tybasic(e.EV.E1.Ety) == TYllong && tybasic(e.EV.E2.Ety) == TYllong)) 601 ) 602 { 603 longcmp(cdb, e, jcond != 0, fltarg, targ); 604 cgstate.stackclean--; 605 return; 606 } 607 608 regm_t retregs = mPSW; // return result in flags 609 opcode_t op = jmpopcode(e); // get jump opcode 610 if (!(jcond & 1)) 611 op ^= 0x101; // toggle jump condition(s) 612 codelem(cdb, e, &retregs, true); // evaluate elem 613 if (no87) 614 cse_flush(cdb,no87); // flush CSE's to memory 615 genjmp(cdb, op, fltarg, cast(block *) targ); // generate jmp instruction 616 cgstate.stackclean--; 617 } 618 619 /****************************** 620 * Routine to aid in setting things up for gen(). 621 * Look for common subexpression. 622 * Can handle indirection operators, but not if they're common subs. 623 * Input: 624 * e -> elem where we get some of the data from 625 * cs -> partially filled code to add 626 * op = opcode 627 * reg = reg field of (mod reg r/m) 628 * offset = data to be added to Voffset field 629 * keepmsk = mask of registers we must not destroy 630 * desmsk = mask of registers destroyed by executing the instruction 631 * Returns: 632 * pointer to code generated 633 */ 634 635 @trusted 636 void loadea(ref CodeBuilder cdb,elem *e,code *cs,uint op,uint reg,targ_size_t offset, 637 regm_t keepmsk,regm_t desmsk) 638 { 639 code* c, cg, cd; 640 641 debug 642 if (debugw) 643 printf("loadea: e=%p cs=%p op=x%x reg=%s offset=%lld keepmsk=%s desmsk=%s\n", 644 e, cs, op, regstring[reg], cast(ulong)offset, regm_str(keepmsk), regm_str(desmsk)); 645 assert(e); 646 cs.Iflags = 0; 647 cs.Irex = 0; 648 cs.Iop = op; 649 tym_t tym = e.Ety; 650 int sz = tysize(tym); 651 652 /* Determine if location we want to get is in a register. If so, */ 653 /* substitute the register for the EA. */ 654 /* Note that operators don't go through this. CSE'd operators are */ 655 /* picked up by comsub(). */ 656 if (e.Ecount && /* if cse */ 657 e.Ecount != e.Ecomsub && /* and cse was generated */ 658 op != LEA && op != 0xC4 && /* and not an LEA or LES */ 659 (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ 660 (op & 0xFFF8) != 0xD8) // and not 8087 opcode 661 { 662 assert(OTleaf(e.Eoper)); /* can't handle this */ 663 regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs 664 if (op == 0xFF && reg == 6) 665 rm &= ~XMMREGS; // can't PUSH an XMM register 666 if (sz > REGSIZE) // value is in 2 or 4 registers 667 { 668 if (I16 && sz == 8) // value is in 4 registers 669 { 670 static immutable regm_t[4] rmask = [ mDX,mCX,mBX,mAX ]; 671 rm &= rmask[cast(size_t)(offset >> 1)]; 672 } 673 else if (offset) 674 rm &= mMSW; /* only high words */ 675 else 676 rm &= mLSW; /* only low words */ 677 } 678 for (uint i = 0; rm; i++) 679 { 680 if (mask(i) & rm) 681 { 682 if (regcon.cse.value[i] == e && // if register has elem 683 /* watch out for a CWD destroying DX */ 684 !(i == DX && op == 0xF7 && desmsk & mDX)) 685 { 686 /* if ES, then it can only be a load */ 687 if (i == ES) 688 { 689 if (op != 0x8B) 690 break; // not a load 691 cs.Iop = 0x8C; /* MOV reg,ES */ 692 cs.Irm = modregrm(3, 0, reg & 7); 693 if (reg & 8) 694 code_orrex(cs, REX_B); 695 } 696 else // XXX reg,i 697 { 698 cs.Irm = modregrm(3, reg & 7, i & 7); 699 if (reg & 8) 700 cs.Irex |= REX_R; 701 if (i & 8) 702 cs.Irex |= REX_B; 703 if (sz == 1 && I64 && (i >= 4 || reg >= 4)) 704 cs.Irex |= REX; 705 if (I64 && (sz == 8 || sz == 16)) 706 cs.Irex |= REX_W; 707 } 708 goto L2; 709 } 710 rm &= ~mask(i); 711 } 712 } 713 } 714 715 getlvalue(cdb, cs, e, keepmsk); 716 if (offset == REGSIZE) 717 getlvalue_msw(cs); 718 else 719 cs.IEV1.Voffset += offset; 720 if (I64) 721 { 722 if (reg >= 4 && sz == 1) // if byte register 723 // Can only address those 8 bit registers if a REX byte is present 724 cs.Irex |= REX; 725 if ((op & 0xFFFFFFF8) == 0xD8) 726 cs.Irex &= ~REX_W; // not needed for x87 ops 727 if (mask(reg) & XMMREGS && 728 (op == LODSD || op == STOSD)) 729 cs.Irex &= ~REX_W; // not needed for xmm ops 730 } 731 code_newreg(cs, reg); // OR in reg field 732 if (!I16) 733 { 734 if (reg == 6 && op == 0xFF || /* don't PUSH a word */ 735 op == MOVZXw || op == MOVSXw || /* MOVZX/MOVSX */ 736 (op & 0xFFF8) == 0xD8 || /* 8087 instructions */ 737 op == LEA) /* LEA */ 738 { 739 cs.Iflags &= ~CFopsize; 740 if (reg == 6 && op == 0xFF) // if PUSH 741 cs.Irex &= ~REX_W; // REX is ignored for PUSH anyway 742 } 743 } 744 else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT()) 745 cs.Iflags |= CFwait; 746 L2: 747 getregs(cdb, desmsk); // save any regs we destroy 748 749 /* KLUDGE! fix up DX for divide instructions */ 750 if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ 751 { 752 if (reg == 7) /* if IDIV */ 753 { 754 cdb.gen1(0x99); // CWD 755 if (I64 && sz == 8) 756 code_orrex(cdb.last(), REX_W); 757 } 758 else if (reg == 6) // if DIV 759 genregs(cdb, 0x33, DX, DX); // XOR DX,DX 760 } 761 762 // Eliminate MOV reg,reg 763 if ((cs.Iop & ~3) == 0x88 && 764 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 765 { 766 uint r = cs.Irm & 7; 767 if (cs.Irex & REX_B) 768 r |= 8; 769 if (r == reg) 770 cs.Iop = NOP; 771 } 772 773 // Eliminate MOV xmmreg,xmmreg 774 if ((cs.Iop & ~(LODSD ^ STOSS)) == LODSD && // detect LODSD, LODSS, STOSD, STOSS 775 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 776 { 777 reg_t r = cs.Irm & 7; 778 if (cs.Irex & REX_B) 779 r |= 8; 780 if (r == (reg - XMM0)) 781 cs.Iop = NOP; 782 } 783 784 cdb.gen(cs); 785 } 786 787 788 /************************** 789 * Get addressing mode. 790 */ 791 792 @trusted 793 uint getaddrmode(regm_t idxregs) 794 { 795 uint mode; 796 797 if (I16) 798 { 799 static ubyte error() { assert(0); } 800 801 mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ 802 (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ 803 (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ 804 error(); 805 } 806 else 807 { 808 const reg = findreg(idxregs & (ALLREGS | mBP)); 809 if (reg == R12) 810 mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4); 811 else 812 mode = modregrmx(2,0,reg); 813 } 814 return mode; 815 } 816 817 void setaddrmode(code *c, regm_t idxregs) 818 { 819 uint mode = getaddrmode(idxregs); 820 c.Irm = mode & 0xFF; 821 c.Isib = (mode >> 8) & 0xFF; 822 c.Irex &= ~REX_B; 823 c.Irex |= mode >> 16; 824 } 825 826 /********************************************** 827 */ 828 829 @trusted 830 void getlvalue_msw(code *c) 831 { 832 if (c.IFL1 == FLreg) 833 { 834 const regmsw = c.IEV1.Vsym.Sregmsw; 835 c.Irm = (c.Irm & ~7) | (regmsw & 7); 836 if (regmsw & 8) 837 c.Irex |= REX_B; 838 else 839 c.Irex &= ~REX_B; 840 } 841 else 842 c.IEV1.Voffset += REGSIZE; 843 } 844 845 /********************************************** 846 */ 847 848 @trusted 849 void getlvalue_lsw(code *c) 850 { 851 if (c.IFL1 == FLreg) 852 { 853 const reglsw = c.IEV1.Vsym.Sreglsw; 854 c.Irm = (c.Irm & ~7) | (reglsw & 7); 855 if (reglsw & 8) 856 c.Irex |= REX_B; 857 else 858 c.Irex &= ~REX_B; 859 } 860 else 861 c.IEV1.Voffset -= REGSIZE; 862 } 863 864 /****************** 865 * Compute addressing mode. 866 * Generate & return sequence of code (if any). 867 * Return in cs the info on it. 868 * Input: 869 * pcs -> where to store data about addressing mode 870 * e -> the lvalue elem 871 * keepmsk mask of registers we must not destroy or use 872 * if (keepmsk & RMstore), this will be only a store operation 873 * into the lvalue 874 * if (keepmsk & RMload), this will be a read operation only 875 */ 876 877 @trusted 878 void getlvalue(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk) 879 { 880 uint fl, f, opsave; 881 elem* e1, e11, e12; 882 bool e1isadd, e1free; 883 reg_t reg; 884 tym_t e1ty; 885 Symbol* s; 886 887 //printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 888 //elem_print(e); 889 assert(e); 890 elem_debug(e); 891 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 892 { 893 s = e.EV.Vsym; 894 fl = s.Sfl; 895 if (tyfloating(s.ty())) 896 objmod.fltused(); 897 } 898 else 899 fl = FLoper; 900 pcs.IFL1 = cast(ubyte)fl; 901 pcs.Iflags = CFoff; /* only want offsets */ 902 pcs.Irex = 0; 903 pcs.IEV1.Voffset = 0; 904 905 tym_t ty = e.Ety; 906 uint sz = tysize(ty); 907 if (tyfloating(ty)) 908 objmod.fltused(); 909 if (I64 && (sz == 8 || sz == 16) && !tyvector(ty)) 910 pcs.Irex |= REX_W; 911 if (!I16 && sz == SHORTSIZE) 912 pcs.Iflags |= CFopsize; 913 if (ty & mTYvolatile) 914 pcs.Iflags |= CFvolatile; 915 916 switch (fl) 917 { 918 case FLoper: 919 debug 920 if (debugw) printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 921 922 switch (e.Eoper) 923 { 924 case OPadd: // this way when we want to do LEA 925 e1 = e; 926 e1free = false; 927 e1isadd = true; 928 break; 929 930 case OPind: 931 case OPpostinc: // when doing (*p++ = ...) 932 case OPpostdec: // when doing (*p-- = ...) 933 case OPbt: 934 case OPbtc: 935 case OPbtr: 936 case OPbts: 937 case OPvecfill: 938 e1 = e.EV.E1; 939 e1free = true; 940 e1isadd = e1.Eoper == OPadd; 941 break; 942 943 default: 944 printf("function: %s\n", funcsym_p.Sident.ptr); 945 elem_print(e); 946 assert(0); 947 } 948 e1ty = tybasic(e1.Ety); 949 if (e1isadd) 950 { 951 e12 = e1.EV.E2; 952 e11 = e1.EV.E1; 953 } 954 955 /* First see if we can replace *(e+&v) with 956 * MOV idxreg,e 957 * EA = [ES:] &v+idxreg 958 */ 959 f = FLconst; 960 961 /* Is address of `s` relative to RIP ? 962 */ 963 static bool relativeToRIP(Symbol* s) 964 { 965 if (!I64) 966 return false; 967 if (config.exe == EX_WIN64) 968 return true; 969 if (config.flags3 & CFG3pie) 970 { 971 if (s.Sfl == FLtlsdata || s.ty() & mTYthread) 972 { 973 if (s.Sclass == SC.global || s.Sclass == SC.static_ || s.Sclass == SC.locstat) 974 return false; 975 } 976 return true; 977 } 978 else 979 return (config.flags3 & CFG3pic) != 0; 980 } 981 982 if (e1isadd && 983 ((e12.Eoper == OPrelconst && 984 !relativeToRIP(e12.EV.Vsym) && 985 (f = el_fl(e12)) != FLfardata 986 ) || 987 (e12.Eoper == OPconst && !I16 && !e1.Ecount && (!I64 || el_signx32(e12)))) && 988 e1.Ecount == e1.Ecomsub && 989 (!e1.Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) && 990 tysize(e11.Ety) == REGSIZE 991 ) 992 { 993 uint t; /* component of r/m field */ 994 int ss; 995 int ssi; 996 997 if (e12.Eoper == OPrelconst) 998 f = el_fl(e12); 999 /*assert(datafl[f]);*/ /* what if addr of func? */ 1000 if (!I16) 1001 { /* Any register can be an index register */ 1002 regm_t idxregs = allregs & ~keepmsk; 1003 assert(idxregs); 1004 1005 /* See if e1.EV.E1 can be a scaled index */ 1006 ss = isscaledindex(e11); 1007 if (ss) 1008 { 1009 /* Load index register with result of e11.EV.E1 */ 1010 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 1011 reg = findreg(idxregs); 1012 { 1013 t = stackfl[f] ? 2 : 0; 1014 pcs.Irm = modregrm(t, 0, 4); 1015 pcs.Isib = modregrm(ss, reg & 7, 5); 1016 if (reg & 8) 1017 pcs.Irex |= REX_X; 1018 } 1019 } 1020 else if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && 1021 !e11.Ecount && 1022 e11.EV.E2.Eoper == OPconst && 1023 (ssi = ssindex(e11.Eoper, e11.EV.E2.EV.Vuns)) != 0 1024 ) 1025 { 1026 regm_t scratchm; 1027 1028 char ssflags = ssindex_array[ssi].ssflags; 1029 if (ssflags & SSFLnobp && stackfl[f]) 1030 goto L6; 1031 1032 // Load index register with result of e11.EV.E1 1033 scodelem(cdb, e11.EV.E1, &idxregs, keepmsk, true); 1034 reg = findreg(idxregs); 1035 1036 int ss1 = ssindex_array[ssi].ss1; 1037 if (ssflags & SSFLlea) 1038 { 1039 assert(!stackfl[f]); 1040 pcs.Irm = modregrm(2,0,4); 1041 pcs.Isib = modregrm(ss1, reg & 7, reg & 7); 1042 if (reg & 8) 1043 pcs.Irex |= REX_X | REX_B; 1044 } 1045 else 1046 { 1047 int rbase; 1048 reg_t r; 1049 1050 scratchm = ALLREGS & ~keepmsk; 1051 allocreg(cdb, &scratchm, &r, TYint); 1052 1053 if (ssflags & SSFLnobase1) 1054 { 1055 t = 0; 1056 rbase = 5; 1057 } 1058 else 1059 { 1060 t = 0; 1061 rbase = reg; 1062 if (rbase == BP || rbase == R13) 1063 { 1064 static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1]; 1065 1066 // IMUL r,BP,imm32 1067 cdb.genc2(0x69, modregxrmx(3, r, rbase), imm32[ss1]); 1068 goto L7; 1069 } 1070 } 1071 1072 cdb.gen2sib(LEA, modregxrm(t, r, 4), modregrm(ss1, reg & 7 ,rbase & 7)); 1073 if (reg & 8) 1074 code_orrex(cdb.last(), REX_X); 1075 if (rbase & 8) 1076 code_orrex(cdb.last(), REX_B); 1077 if (I64) 1078 code_orrex(cdb.last(), REX_W); 1079 1080 if (ssflags & SSFLnobase1) 1081 { 1082 cdb.last().IFL1 = FLconst; 1083 cdb.last().IEV1.Vuns = 0; 1084 } 1085 L7: 1086 if (ssflags & SSFLnobase) 1087 { 1088 t = stackfl[f] ? 2 : 0; 1089 rbase = 5; 1090 } 1091 else 1092 { 1093 t = 2; 1094 rbase = r; 1095 assert(rbase != BP); 1096 } 1097 pcs.Irm = modregrm(t, 0, 4); 1098 pcs.Isib = modregrm(ssindex_array[ssi].ss2, r & 7, rbase & 7); 1099 if (r & 8) 1100 pcs.Irex |= REX_X; 1101 if (rbase & 8) 1102 pcs.Irex |= REX_B; 1103 } 1104 freenode(e11.EV.E2); 1105 freenode(e11); 1106 } 1107 else 1108 { 1109 L6: 1110 /* Load index register with result of e11 */ 1111 scodelem(cdb, e11, &idxregs, keepmsk, true); 1112 setaddrmode(pcs, idxregs); 1113 if (stackfl[f]) /* if we need [EBP] too */ 1114 { 1115 uint idx = pcs.Irm & 7; 1116 if (pcs.Irex & REX_B) 1117 pcs.Irex = (pcs.Irex & ~REX_B) | REX_X; 1118 pcs.Isib = modregrm(0, idx, BP); 1119 pcs.Irm = modregrm(2, 0, 4); 1120 } 1121 } 1122 } 1123 else 1124 { 1125 regm_t idxregs = IDXREGS & ~keepmsk; /* only these can be index regs */ 1126 assert(idxregs); 1127 if (stackfl[f]) /* if stack data type */ 1128 { 1129 idxregs &= mSI | mDI; /* BX can't index off stack */ 1130 if (!idxregs) goto L1; /* index regs aren't avail */ 1131 t = 6; /* [BP+SI+disp] */ 1132 } 1133 else 1134 t = 0; /* [SI + disp] */ 1135 scodelem(cdb, e11, &idxregs, keepmsk, true); // load idx reg 1136 pcs.Irm = cast(ubyte)(getaddrmode(idxregs) ^ t); 1137 } 1138 if (f == FLpara) 1139 refparam = true; 1140 else if (f == FLauto || f == FLbprel || f == FLfltreg || f == FLfast) 1141 reflocal = true; 1142 else if (f == FLcsdata || tybasic(e12.Ety) == TYcptr) 1143 pcs.Iflags |= CFcs; 1144 else 1145 assert(f != FLreg); 1146 pcs.IFL1 = cast(ubyte)f; 1147 if (f != FLconst) 1148 pcs.IEV1.Vsym = e12.EV.Vsym; 1149 pcs.IEV1.Voffset = e12.EV.Voffset; /* += ??? */ 1150 1151 /* If e1 is a CSE, we must generate an addressing mode */ 1152 /* but also leave EA in registers so others can use it */ 1153 if (e1.Ecount) 1154 { 1155 uint flagsave; 1156 1157 regm_t idxregs = IDXREGS & ~keepmsk; 1158 allocreg(cdb, &idxregs, ®, TYoffset); 1159 1160 /* If desired result is a far pointer, we'll have */ 1161 /* to load another register with the segment of v */ 1162 if (e1ty == TYfptr) 1163 { 1164 reg_t msreg; 1165 1166 idxregs |= mMSW & ALLREGS & ~keepmsk; 1167 allocreg(cdb, &idxregs, &msreg, TYfptr); 1168 msreg = findregmsw(idxregs); 1169 /* MOV msreg,segreg */ 1170 genregs(cdb, 0x8C, segfl[f], msreg); 1171 } 1172 opsave = pcs.Iop; 1173 flagsave = pcs.Iflags; 1174 ubyte rexsave = pcs.Irex; 1175 pcs.Iop = LEA; 1176 code_newreg(pcs, reg); 1177 if (!I16) 1178 pcs.Iflags &= ~CFopsize; 1179 if (I64) 1180 pcs.Irex |= REX_W; 1181 cdb.gen(pcs); // LEA idxreg,EA 1182 cssave(e1,idxregs,true); 1183 if (!I16) 1184 { 1185 pcs.Iflags = flagsave; 1186 pcs.Irex = rexsave; 1187 } 1188 if (stackfl[f] && (config.wflags & WFssneds)) // if pointer into stack 1189 pcs.Iflags |= CFss; // add SS: override 1190 pcs.Iop = opsave; 1191 pcs.IFL1 = FLoffset; 1192 pcs.IEV1.Vuns = 0; 1193 setaddrmode(pcs, idxregs); 1194 } 1195 freenode(e12); 1196 if (e1free) 1197 freenode(e1); 1198 goto Lptr; 1199 } 1200 1201 L1: 1202 1203 /* The rest of the cases could be a far pointer */ 1204 1205 regm_t idxregs; 1206 idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs 1207 assert(idxregs); 1208 if (!I16 && 1209 (sz == REGSIZE || (I64 && sz == 4)) && 1210 keepmsk & RMstore) 1211 idxregs |= regcon.mvar; 1212 1213 switch (e1ty) 1214 { 1215 case TYfptr: /* if far pointer */ 1216 case TYhptr: 1217 idxregs = (mES | IDXREGS) & ~keepmsk; // need segment too 1218 assert(idxregs & mES); 1219 pcs.Iflags |= CFes; /* ES segment override */ 1220 break; 1221 1222 case TYsptr: /* if pointer to stack */ 1223 if (config.wflags & WFssneds) // if SS != DS 1224 pcs.Iflags |= CFss; /* then need SS: override */ 1225 break; 1226 1227 case TYfgPtr: 1228 if (I32) 1229 pcs.Iflags |= CFgs; 1230 else if (I64) 1231 pcs.Iflags |= CFfs; 1232 else 1233 assert(0); 1234 break; 1235 1236 case TYcptr: /* if pointer to code */ 1237 pcs.Iflags |= CFcs; /* then need CS: override */ 1238 break; 1239 1240 default: 1241 break; 1242 } 1243 pcs.IFL1 = FLoffset; 1244 pcs.IEV1.Vuns = 0; 1245 1246 /* see if we can replace *(e+c) with 1247 * MOV idxreg,e 1248 * [MOV ES,segment] 1249 * EA = [ES:]c[idxreg] 1250 */ 1251 if (e1isadd && e12.Eoper == OPconst && 1252 (!I64 || el_signx32(e12)) && 1253 (tysize(e12.Ety) == REGSIZE || (I64 && tysize(e12.Ety) == 4)) && 1254 (!e1.Ecount || !e1free) 1255 ) 1256 { 1257 int ss; 1258 1259 pcs.IEV1.Vuns = e12.EV.Vuns; 1260 freenode(e12); 1261 if (e1free) freenode(e1); 1262 if (!I16 && e11.Eoper == OPadd && !e11.Ecount && 1263 tysize(e11.Ety) == REGSIZE) 1264 { 1265 e12 = e11.EV.E2; 1266 e11 = e11.EV.E1; 1267 e1 = e1.EV.E1; 1268 e1free = true; 1269 goto L4; 1270 } 1271 if (!I16 && (ss = isscaledindex(e11)) != 0) 1272 { // (v * scale) + const 1273 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 1274 reg = findreg(idxregs); 1275 pcs.Irm = modregrm(0, 0, 4); 1276 pcs.Isib = modregrm(ss, reg & 7, 5); 1277 if (reg & 8) 1278 pcs.Irex |= REX_X; 1279 } 1280 else 1281 { 1282 scodelem(cdb, e11, &idxregs, keepmsk, true); // load index reg 1283 setaddrmode(pcs, idxregs); 1284 } 1285 goto Lptr; 1286 } 1287 1288 /* Look for *(v1 + v2) 1289 * EA = [v1][v2] 1290 */ 1291 1292 if (!I16 && e1isadd && (!e1.Ecount || !e1free) && 1293 (_tysize[e1ty] == REGSIZE || (I64 && _tysize[e1ty] == 4))) 1294 { 1295 L4: 1296 regm_t idxregs2; 1297 uint base, index; 1298 1299 // Look for *(v1 + v2 << scale) 1300 int ss = isscaledindex(e12); 1301 if (ss) 1302 { 1303 scodelem(cdb, e11, &idxregs, keepmsk, true); 1304 idxregs2 = allregs & ~(idxregs | keepmsk); 1305 cdisscaledindex(cdb, e12, &idxregs2, keepmsk | idxregs); 1306 } 1307 1308 // Look for *(v1 << scale + v2) 1309 else if ((ss = isscaledindex(e11)) != 0) 1310 { 1311 idxregs2 = idxregs; 1312 cdisscaledindex(cdb, e11, &idxregs2, keepmsk); 1313 idxregs = allregs & ~(idxregs2 | keepmsk); 1314 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1315 } 1316 // Look for *(((v1 << scale) + c1) + v2) 1317 else if (e11.Eoper == OPadd && !e11.Ecount && 1318 e11.EV.E2.Eoper == OPconst && 1319 (ss = isscaledindex(e11.EV.E1)) != 0 1320 ) 1321 { 1322 pcs.IEV1.Vuns = e11.EV.E2.EV.Vuns; 1323 idxregs2 = idxregs; 1324 cdisscaledindex(cdb, e11.EV.E1, &idxregs2, keepmsk); 1325 idxregs = allregs & ~(idxregs2 | keepmsk); 1326 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1327 freenode(e11.EV.E2); 1328 freenode(e11); 1329 } 1330 else 1331 { 1332 scodelem(cdb, e11, &idxregs, keepmsk, true); 1333 idxregs2 = allregs & ~(idxregs | keepmsk); 1334 scodelem(cdb, e12, &idxregs2, keepmsk | idxregs, true); 1335 } 1336 base = findreg(idxregs); 1337 index = findreg(idxregs2); 1338 pcs.Irm = modregrm(2, 0, 4); 1339 pcs.Isib = modregrm(ss, index & 7, base & 7); 1340 if (index & 8) 1341 pcs.Irex |= REX_X; 1342 if (base & 8) 1343 pcs.Irex |= REX_B; 1344 if (e1free) 1345 freenode(e1); 1346 1347 goto Lptr; 1348 } 1349 1350 /* give up and replace *e1 with 1351 * MOV idxreg,e 1352 * EA = 0[idxreg] 1353 * pinholeopt() will usually correct the 0, we need it in case 1354 * we have a pointer to a long and need an offset to the second 1355 * word. 1356 */ 1357 1358 assert(e1free); 1359 scodelem(cdb, e1, &idxregs, keepmsk, true); // load index register 1360 setaddrmode(pcs, idxregs); 1361 Lptr: 1362 if (config.flags3 & CFG3ptrchk) 1363 cod3_ptrchk(cdb, pcs, keepmsk); // validate pointer code 1364 break; 1365 1366 case FLdatseg: 1367 assert(0); 1368 static if (0) 1369 { 1370 pcs.Irm = modregrm(0, 0, BPRM); 1371 pcs.IEVpointer1 = e.EVpointer; 1372 break; 1373 } 1374 1375 case FLfltreg: 1376 reflocal = true; 1377 pcs.Irm = modregrm(2, 0, BPRM); 1378 pcs.IEV1.Vint = 0; 1379 break; 1380 1381 case FLreg: 1382 goto L2; 1383 1384 case FLpara: 1385 if (s.Sclass == SC.shadowreg) 1386 goto case FLfast; 1387 Lpara: 1388 refparam = true; 1389 pcs.Irm = modregrm(2, 0, BPRM); 1390 goto L2; 1391 1392 case FLauto: 1393 case FLfast: 1394 if (regParamInPreg(s)) 1395 { 1396 regm_t pregm = s.Spregm(); 1397 /* See if the parameter is still hanging about in a register, 1398 * and so can we load from that register instead. 1399 */ 1400 if (regcon.params & pregm /*&& s.Spreg2 == NOREG && !(pregm & XMMREGS)*/) 1401 { 1402 if (keepmsk & RMload && !anyiasm) 1403 { 1404 auto voffset = e.EV.Voffset; 1405 if (sz <= REGSIZE) 1406 { 1407 const reg_t preg = (voffset >= REGSIZE) ? s.Spreg2 : s.Spreg; 1408 if (voffset >= REGSIZE) 1409 voffset -= REGSIZE; 1410 1411 /* preg could be NOREG if it's a variadic function and we're 1412 * in Win64 shadow regs and we're offsetting to get to the start 1413 * of the variadic args. 1414 */ 1415 if (preg != NOREG && regcon.params & mask(preg)) 1416 { 1417 //printf("sz %d, preg %s, Voffset %d\n", cast(int)sz, regm_str(mask(preg)), cast(int)voffset); 1418 if (mask(preg) & XMMREGS && sz != REGSIZE) 1419 { 1420 /* The following fails with this from std.math on Linux64: 1421 void main() 1422 { 1423 alias T = float; 1424 T x = T.infinity; 1425 T e = T.infinity; 1426 int eptr; 1427 T v = frexp(x, eptr); 1428 assert(isIdentical(e, v)); 1429 } 1430 */ 1431 } 1432 else if (voffset == 0) 1433 { 1434 pcs.Irm = modregrm(3, 0, preg & 7); 1435 if (preg & 8) 1436 pcs.Irex |= REX_B; 1437 if (I64 && sz == 1 && preg >= 4) 1438 pcs.Irex |= REX; 1439 regcon.used |= mask(preg); 1440 break; 1441 } 1442 else if (voffset == 1 && sz == 1 && preg < 4) 1443 { 1444 pcs.Irm = modregrm(3, 0, 4 | preg); // use H register 1445 regcon.used |= mask(preg); 1446 break; 1447 } 1448 } 1449 } 1450 } 1451 else 1452 regcon.params &= ~pregm; 1453 } 1454 } 1455 if (s.Sclass == SC.shadowreg) 1456 goto Lpara; 1457 goto case FLbprel; 1458 1459 case FLbprel: 1460 reflocal = true; 1461 pcs.Irm = modregrm(2, 0, BPRM); 1462 goto L2; 1463 1464 case FLextern: 1465 if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) 1466 { 1467 if (config.exe & EX_windos) 1468 { 1469 if (I64) 1470 { // GS:[88] 1471 pcs.Irm = modregrm(0, 0, 4); 1472 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1473 pcs.IFL1 = FLconst; 1474 pcs.IEV1.Vuns = 88; 1475 pcs.Iflags = CFgs; 1476 pcs.Irex |= REX_W; 1477 break; 1478 } 1479 else 1480 { 1481 pcs.Iflags |= CFfs; // add FS: override 1482 } 1483 } 1484 else if (config.exe & (EX_OSX | EX_OSX64)) 1485 { 1486 } 1487 else if (config.exe & EX_posix) 1488 assert(0); 1489 } 1490 if (s.ty() & mTYcs && cast(bool) LARGECODE) 1491 goto Lfardata; 1492 goto L3; 1493 1494 case FLtlsdata: 1495 if (config.exe & EX_posix) 1496 goto L3; 1497 assert(0); 1498 1499 case FLdata: 1500 case FLudata: 1501 case FLcsdata: 1502 case FLgot: 1503 case FLgotoff: 1504 L3: 1505 pcs.Irm = modregrm(0, 0, BPRM); 1506 L2: 1507 if (fl == FLreg) 1508 { 1509 //printf("test: FLreg, %s %d regcon.mvar = %s\n", 1510 // s.Sident.ptr, cast(int)e.EV.Voffset, regm_str(regcon.mvar)); 1511 if (!(s.Sregm & regcon.mvar)) 1512 symbol_print(s); 1513 assert(s.Sregm & regcon.mvar); 1514 1515 /* Attempting to paint a float as an integer or an integer as a float 1516 * will cause serious problems since the EA is loaded separatedly from 1517 * the opcode. The only way to deal with this is to prevent enregistering 1518 * such variables. 1519 */ 1520 if (tyxmmreg(ty) && !(s.Sregm & XMMREGS) || 1521 !tyxmmreg(ty) && (s.Sregm & XMMREGS)) 1522 cgreg_unregister(s.Sregm); 1523 1524 if ( 1525 s.Sclass == SC.regpar || 1526 s.Sclass == SC.parameter) 1527 { refparam = true; 1528 reflocal = true; // kludge to set up prolog 1529 } 1530 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1531 if (s.Sreglsw & 8) 1532 pcs.Irex |= REX_B; 1533 if (e.EV.Voffset == REGSIZE && sz == REGSIZE) 1534 { 1535 pcs.Irm = modregrm(3, 0, s.Sregmsw & 7); 1536 if (s.Sregmsw & 8) 1537 pcs.Irex |= REX_B; 1538 else 1539 pcs.Irex &= ~REX_B; 1540 } 1541 else if (e.EV.Voffset == 1 && sz == 1) 1542 { 1543 assert(s.Sregm & BYTEREGS); 1544 assert(s.Sreglsw < 4); 1545 pcs.Irm |= 4; // use 2nd byte of register 1546 } 1547 else 1548 { 1549 assert(!e.EV.Voffset); 1550 if (I64 && sz == 1 && s.Sreglsw >= 4) 1551 pcs.Irex |= REX; 1552 } 1553 } 1554 else if (s.ty() & mTYcs && !(fl == FLextern && LARGECODE)) 1555 { 1556 pcs.Iflags |= CFcs | CFoff; 1557 } 1558 if (config.flags3 & CFG3pic && 1559 (fl == FLtlsdata || s.ty() & mTYthread)) 1560 { 1561 if (I32) 1562 { 1563 if (config.flags3 & CFG3pie) 1564 { 1565 pcs.Iflags |= CFgs; 1566 } 1567 } 1568 else if (I64) 1569 { 1570 if (config.flags3 & CFG3pie && 1571 (s.Sclass == SC.global || s.Sclass == SC.static_ || s.Sclass == SC.locstat)) 1572 { 1573 pcs.Iflags |= CFfs; 1574 pcs.Irm = modregrm(0, 0, 4); 1575 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1576 } 1577 else 1578 { 1579 //pcs.Iflags |= CFopsize; //I don't know what this was for 1580 pcs.Irex = 0x48; 1581 } 1582 } 1583 } 1584 pcs.IEV1.Vsym = s; 1585 pcs.IEV1.Voffset = e.EV.Voffset; 1586 if (sz == 1) 1587 { /* Don't use SI or DI for this variable */ 1588 s.Sflags |= GTbyte; 1589 if (I64 ? e.EV.Voffset > 0 : e.EV.Voffset > 1) 1590 { 1591 debug if (debugr) printf("'%s' not reg cand due to byte offset\n", s.Sident.ptr); 1592 s.Sflags &= ~GTregcand; 1593 } 1594 } 1595 else if (sz == 2 && tyxmmreg(s.ty()) && config.fpxmmregs) 1596 { 1597 debug if (debugr) printf("'%s' not XMM reg cand due to short access\n", s.Sident.ptr); 1598 s.Sflags &= ~GTregcand; 1599 } 1600 else if (e.EV.Voffset || sz > tysize(s.Stype.Tty)) 1601 { 1602 debug if (debugr) printf("'%s' not reg cand due to offset or size\n", s.Sident.ptr); 1603 s.Sflags &= ~GTregcand; 1604 } 1605 else if (tyvector(s.Stype.Tty) && sz < tysize(s.Stype.Tty)) 1606 { 1607 // https://issues.dlang.org/show_bug.cgi?id=21673 1608 // https://issues.dlang.org/show_bug.cgi?id=21676 1609 // https://issues.dlang.org/show_bug.cgi?id=23009 1610 // PR: https://github.com/dlang/dmd/pull/13977 1611 // cannot read or write to partial vector 1612 debug if (debugr) printf("'%s' not reg cand due to vector type\n", s.Sident.ptr); 1613 s.Sflags &= ~GTregcand; 1614 } 1615 1616 if (config.fpxmmregs && tyfloating(s.ty()) && !tyfloating(ty)) 1617 { 1618 debug if (debugr) printf("'%s' not reg cand due to mix float and int\n", s.Sident.ptr); 1619 // Can't successfully mix XMM register variables accessed as integers 1620 s.Sflags &= ~GTregcand; 1621 } 1622 1623 if (!(keepmsk & RMstore)) // if not store only 1624 s.Sflags |= SFLread; // assume we are doing a read 1625 break; 1626 1627 case FLpseudo: 1628 version (MARS) 1629 { 1630 { 1631 getregs(cdb, mask(s.Sreglsw)); 1632 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1633 if (s.Sreglsw & 8) 1634 pcs.Irex |= REX_B; 1635 if (e.EV.Voffset == 1 && sz == 1) 1636 { assert(s.Sregm & BYTEREGS); 1637 assert(s.Sreglsw < 4); 1638 pcs.Irm |= 4; // use 2nd byte of register 1639 } 1640 else 1641 { assert(!e.EV.Voffset); 1642 if (I64 && sz == 1 && s.Sreglsw >= 4) 1643 pcs.Irex |= REX; 1644 } 1645 break; 1646 } 1647 } 1648 else 1649 { 1650 { 1651 uint u = s.Sreglsw; 1652 getregs(cdb, pseudomask[u]); 1653 pcs.Irm = modregrm(3, 0, pseudoreg[u] & 7); 1654 break; 1655 } 1656 } 1657 1658 case FLfardata: 1659 case FLfunc: /* reading from code seg */ 1660 if (config.exe & EX_flat) 1661 goto L3; 1662 Lfardata: 1663 { 1664 regm_t regm = ALLREGS & ~keepmsk; // need scratch register 1665 allocreg(cdb, ®m, ®, TYint); 1666 getregs(cdb,mES); 1667 // MOV mreg,seg of symbol 1668 cdb.gencs(0xB8 + reg, 0, FLextern, s); 1669 cdb.last().Iflags = CFseg; 1670 cdb.gen2(0x8E, modregrmx(3, 0, reg)); // MOV ES,reg 1671 pcs.Iflags |= CFes | CFoff; /* ES segment override */ 1672 goto L3; 1673 } 1674 1675 case FLstack: 1676 assert(!I16); 1677 pcs.Irm = modregrm(2, 0, 4); 1678 pcs.Isib = modregrm(0, 4, SP); 1679 pcs.IEV1.Vsym = s; 1680 pcs.IEV1.Voffset = e.EV.Voffset; 1681 break; 1682 1683 default: 1684 WRFL(cast(FL)fl); 1685 symbol_print(s); 1686 assert(0); 1687 } 1688 } 1689 1690 /***************************** 1691 * Given an opcode and EA in cs, generate code 1692 * for each floating register in turn. 1693 * Input: 1694 * tym either TYdouble or TYfloat 1695 */ 1696 1697 @trusted 1698 void fltregs(ref CodeBuilder cdb, code* pcs, tym_t tym) 1699 { 1700 assert(!I64); 1701 tym = tybasic(tym); 1702 if (I32) 1703 { 1704 getregs(cdb,(tym == TYfloat) ? mAX : mAX | mDX); 1705 if (tym != TYfloat) 1706 { 1707 pcs.IEV1.Voffset += REGSIZE; 1708 NEWREG(pcs.Irm,DX); 1709 cdb.gen(pcs); 1710 pcs.IEV1.Voffset -= REGSIZE; 1711 } 1712 NEWREG(pcs.Irm,AX); 1713 cdb.gen(pcs); 1714 } 1715 else 1716 { 1717 getregs(cdb,(tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16); 1718 pcs.IEV1.Voffset += (tym == TYfloat) ? 2 : 6; 1719 if (tym == TYfloat) 1720 NEWREG(pcs.Irm, DX); 1721 else 1722 NEWREG(pcs.Irm, AX); 1723 cdb.gen(pcs); 1724 pcs.IEV1.Voffset -= 2; 1725 if (tym == TYfloat) 1726 NEWREG(pcs.Irm, AX); 1727 else 1728 NEWREG(pcs.Irm, BX); 1729 cdb.gen(pcs); 1730 if (tym != TYfloat) 1731 { 1732 pcs.IEV1.Voffset -= 2; 1733 NEWREG(pcs.Irm, CX); 1734 cdb.gen(pcs); 1735 pcs.IEV1.Voffset -= 2; /* note that exit is with Voffset unaltered */ 1736 NEWREG(pcs.Irm, DX); 1737 cdb.gen(pcs); 1738 } 1739 } 1740 } 1741 1742 1743 /***************************** 1744 * Given a result in registers, test it for true or false. 1745 * Will fail if TYfptr and the reg is ES! 1746 * If saveflag is true, preserve the contents of the 1747 * registers. 1748 */ 1749 1750 @trusted 1751 void tstresult(ref CodeBuilder cdb, regm_t regm, tym_t tym, uint saveflag) 1752 { 1753 reg_t scrreg; // scratch register 1754 regm_t scrregm; 1755 1756 //if (!(regm & (mBP | ALLREGS))) 1757 //printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n", 1758 //regm_str(regm),tym,saveflag); 1759 1760 assert(regm & (XMMREGS | mBP | ALLREGS)); 1761 tym = tybasic(tym); 1762 reg_t reg = findreg(regm); 1763 uint sz = _tysize[tym]; 1764 if (sz == 1) 1765 { 1766 assert(regm & BYTEREGS); 1767 genregs(cdb, 0x84, reg, reg); // TEST regL,regL 1768 if (I64 && reg >= 4) 1769 code_orrex(cdb.last(), REX); 1770 return; 1771 } 1772 if (regm & XMMREGS) 1773 { 1774 reg_t xreg; 1775 regm_t xregs = XMMREGS & ~regm; 1776 allocreg(cdb,&xregs, &xreg, TYdouble); 1777 opcode_t op = 0; 1778 if (tym == TYdouble || tym == TYidouble || tym == TYcdouble) 1779 op = 0x660000; 1780 cdb.gen2(op | XORPS, modregrm(3, xreg-XMM0, xreg-XMM0)); // XORPS xreg,xreg 1781 cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1782 if (tym == TYcfloat || tym == TYcdouble) 1783 { code *cnop = gennop(null); 1784 genjmp(cdb, JNE, FLcode, cast(block *) cnop); // JNE L1 1785 genjmp(cdb, JP, FLcode, cast(block *) cnop); // JP L1 1786 reg = findreg(regm & ~mask(reg)); 1787 cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1788 cdb.append(cnop); 1789 } 1790 return; 1791 } 1792 if (sz <= REGSIZE) 1793 { 1794 if (!I16) 1795 { 1796 if (tym == TYfloat) 1797 { 1798 if (saveflag) 1799 { 1800 scrregm = allregs & ~regm; // possible scratch regs 1801 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1802 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1803 reg = scrreg; 1804 } 1805 getregs(cdb, mask(reg)); 1806 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 1807 return; 1808 } 1809 gentstreg(cdb,reg); // TEST reg,reg 1810 if (sz == SHORTSIZE) 1811 cdb.last().Iflags |= CFopsize; // 16 bit operands 1812 else if (sz == 8) 1813 code_orrex(cdb.last(), REX_W); 1814 } 1815 else 1816 gentstreg(cdb, reg); // TEST reg,reg 1817 return; 1818 } 1819 1820 if (saveflag || tyfv(tym)) 1821 { 1822 L1: 1823 scrregm = ALLREGS & ~regm; // possible scratch regs 1824 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1825 if (I32 || sz == REGSIZE * 2) 1826 { 1827 assert(regm & mMSW && regm & mLSW); 1828 1829 reg = findregmsw(regm); 1830 if (I32) 1831 { 1832 if (tyfv(tym)) 1833 genregs(cdb, MOVZXw, scrreg, reg); // MOVZX scrreg,msreg 1834 else 1835 { 1836 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1837 if (tym == TYdouble || tym == TYdouble_alias) 1838 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1839 } 1840 } 1841 else 1842 { 1843 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1844 if (tym == TYfloat) 1845 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1846 } 1847 reg = findreglsw(regm); 1848 genorreg(cdb, scrreg, reg); // OR scrreg,lsreg 1849 } 1850 else if (sz == 8) 1851 { 1852 // !I32 1853 genmovreg(cdb, scrreg, AX); // MOV scrreg,AX 1854 if (tym == TYdouble || tym == TYdouble_alias) 1855 cdb.gen2(0xD1 ,modregrm(3, 4, scrreg)); // SHL scrreg,1 1856 genorreg(cdb, scrreg, BX); // OR scrreg,BX 1857 genorreg(cdb, scrreg, CX); // OR scrreg,CX 1858 genorreg(cdb, scrreg, DX); // OR scrreg,DX 1859 } 1860 else 1861 assert(0); 1862 } 1863 else 1864 { 1865 if (I32 || sz == REGSIZE * 2) 1866 { 1867 // can't test ES:LSW for 0 1868 assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP)); 1869 1870 reg = findregmsw(regm); 1871 if (regcon.mvar & mask(reg)) // if register variable 1872 goto L1; // don't trash it 1873 getregs(cdb, mask(reg)); // we're going to trash reg 1874 if (tyfloating(tym) && sz == 2 * _tysize[TYint]) 1875 cdb.gen2(0xD1, modregrm(3 ,4, reg)); // SHL reg,1 1876 genorreg(cdb, reg, findreglsw(regm)); // OR reg,reg+1 1877 if (I64) 1878 code_orrex(cdb.last(), REX_W); 1879 } 1880 else if (sz == 8) 1881 { assert(regm == DOUBLEREGS_16); 1882 getregs(cdb,mAX); // allocate AX 1883 if (tym == TYdouble || tym == TYdouble_alias) 1884 cdb.gen2(0xD1, modregrm(3, 4, AX)); // SHL AX,1 1885 genorreg(cdb, AX, BX); // OR AX,BX 1886 genorreg(cdb, AX, CX); // OR AX,CX 1887 genorreg(cdb, AX, DX); // OR AX,DX 1888 } 1889 else 1890 assert(0); 1891 } 1892 code_orflag(cdb.last(),CFpsw); 1893 } 1894 1895 /****************************** 1896 * Given the result of an expression is in retregs, 1897 * generate necessary code to return result in *pretregs. 1898 */ 1899 1900 @trusted 1901 void fixresult(ref CodeBuilder cdb, elem *e, regm_t retregs, regm_t *pretregs) 1902 { 1903 //printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs)); 1904 if (*pretregs == 0) return; // if don't want result 1905 assert(e && retregs); // need something to work with 1906 regm_t forccs = *pretregs & mPSW; 1907 regm_t forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS); 1908 tym_t tym = tybasic(e.Ety); 1909 1910 if (tym == TYstruct) 1911 { 1912 if (e.Eoper == OPpair || e.Eoper == OPrpair) 1913 { 1914 if (I64) 1915 tym = TYucent; 1916 else 1917 tym = TYullong; 1918 } 1919 else 1920 // Hack to support cdstreq() 1921 tym = (forregs & mMSW) ? TYfptr : TYnptr; 1922 } 1923 int sz = _tysize[tym]; 1924 1925 if (sz == 1) 1926 { 1927 assert(retregs & BYTEREGS); 1928 const reg = findreg(retregs); 1929 if (e.Eoper == OPvar && 1930 e.EV.Voffset == 1 && 1931 e.EV.Vsym.Sfl == FLreg) 1932 { 1933 assert(reg < 4); 1934 if (forccs) 1935 cdb.gen2(0x84, modregrm(3, reg | 4, reg | 4)); // TEST regH,regH 1936 forccs = 0; 1937 } 1938 } 1939 1940 reg_t reg,rreg; 1941 if ((retregs & forregs) == retregs) // if already in right registers 1942 *pretregs = retregs; 1943 else if (forregs) // if return the result in registers 1944 { 1945 if ((forregs | retregs) & (mST01 | mST0)) 1946 { 1947 fixresult87(cdb, e, retregs, pretregs); 1948 return; 1949 } 1950 uint opsflag = false; 1951 if (I16 && sz == 8) 1952 { 1953 if (forregs & mSTACK) 1954 { 1955 assert(retregs == DOUBLEREGS_16); 1956 // Push floating regs 1957 cdb.gen1(0x50 + AX); 1958 cdb.gen1(0x50 + BX); 1959 cdb.gen1(0x50 + CX); 1960 cdb.gen1(0x50 + DX); 1961 stackpush += DOUBLESIZE; 1962 } 1963 else if (retregs & mSTACK) 1964 { 1965 assert(forregs == DOUBLEREGS_16); 1966 // Pop floating regs 1967 getregs(cdb,forregs); 1968 cdb.gen1(0x58 + DX); 1969 cdb.gen1(0x58 + CX); 1970 cdb.gen1(0x58 + BX); 1971 cdb.gen1(0x58 + AX); 1972 stackpush -= DOUBLESIZE; 1973 retregs = DOUBLEREGS_16; // for tstresult() below 1974 } 1975 else 1976 { 1977 debug 1978 printf("retregs = %s, forregs = %s\n", regm_str(retregs), regm_str(forregs)), 1979 assert(0); 1980 } 1981 if (!OTleaf(e.Eoper)) 1982 opsflag = true; 1983 } 1984 else 1985 { 1986 allocreg(cdb, pretregs, &rreg, tym); // allocate return regs 1987 if (retregs & XMMREGS) 1988 { 1989 reg = findreg(retregs & XMMREGS); 1990 if (mask(rreg) & XMMREGS) 1991 genmovreg(cdb, rreg, reg, tym); 1992 else 1993 { 1994 // MOVSD floatreg, XMM? 1995 cdb.genxmmreg(xmmstore(tym), reg, 0, tym); 1996 // MOV rreg,floatreg 1997 cdb.genfltreg(0x8B,rreg,0); 1998 if (sz == 8) 1999 { 2000 if (I32) 2001 { 2002 rreg = findregmsw(*pretregs); 2003 cdb.genfltreg(0x8B, rreg,4); 2004 } 2005 else 2006 code_orrex(cdb.last(),REX_W); 2007 } 2008 } 2009 } 2010 else if (forregs & XMMREGS) 2011 { 2012 reg = findreg(retregs & (mBP | ALLREGS)); 2013 switch (sz) 2014 { 2015 case 4: 2016 cdb.gen2(LODD, modregxrmx(3, rreg - XMM0, reg)); // MOVD xmm,reg 2017 break; 2018 2019 case 8: 2020 if (I32) 2021 { 2022 cdb.genfltreg(0x89, reg, 0); 2023 reg = findregmsw(retregs); 2024 cdb.genfltreg(0x89, reg, 4); 2025 cdb.genxmmreg(xmmload(tym), rreg, 0, tym); // MOVQ xmm,mem 2026 } 2027 else 2028 { 2029 cdb.gen2(LODD /* [sic!] */, modregxrmx(3, rreg - XMM0, reg)); 2030 code_orrex(cdb.last(), REX_W); // MOVQ xmm,reg 2031 } 2032 break; 2033 2034 default: 2035 assert(false); 2036 } 2037 checkSetVex(cdb.last(), tym); 2038 } 2039 else if (sz > REGSIZE) 2040 { 2041 uint msreg = findregmsw(retregs); 2042 uint lsreg = findreglsw(retregs); 2043 uint msrreg = findregmsw(*pretregs); 2044 uint lsrreg = findreglsw(*pretregs); 2045 2046 genmovreg(cdb, msrreg, msreg); // MOV msrreg,msreg 2047 genmovreg(cdb, lsrreg, lsreg); // MOV lsrreg,lsreg 2048 } 2049 else 2050 { 2051 assert(!(retregs & XMMREGS)); 2052 assert(!(forregs & XMMREGS)); 2053 reg = findreg(retregs & (mBP | ALLREGS)); 2054 if (I64 && sz <= 4) 2055 genregs(cdb, 0x89, reg, rreg); // only move 32 bits, and zero the top 32 bits 2056 else 2057 genmovreg(cdb, rreg, reg); // MOV rreg,reg 2058 } 2059 } 2060 cssave(e,retregs | *pretregs,opsflag); 2061 // Commented out due to Bugzilla 8840 2062 //forregs = 0; // don't care about result in reg cuz real result is in rreg 2063 retregs = *pretregs & ~mPSW; 2064 } 2065 if (forccs) // if return result in flags 2066 { 2067 if (retregs & (mST01 | mST0)) 2068 { 2069 *pretregs |= forccs; 2070 fixresult87(cdb, e, retregs, pretregs); 2071 } 2072 else 2073 tstresult(cdb, retregs, tym, forregs); 2074 } 2075 } 2076 2077 /******************************* 2078 * Extra information about each CLIB runtime library function. 2079 */ 2080 2081 enum 2082 { 2083 INF32 = 1, /// if 32 bit only 2084 INFfloat = 2, /// if this is floating point 2085 INFwkdone = 4, /// if weak extern is already done 2086 INF64 = 8, /// if 64 bit only 2087 INFpushebx = 0x10, /// push EBX before load_localgot() 2088 INFpusheabcdx = 0x20, /// pass EAX/EBX/ECX/EDX on stack, callee does ret 16 2089 } 2090 2091 struct ClibInfo 2092 { 2093 regm_t retregs16; /* registers that 16 bit result is returned in */ 2094 regm_t retregs32; /* registers that 32 bit result is returned in */ 2095 ubyte pop; // # of bytes popped off of stack upon return 2096 ubyte flags; /// INFxxx 2097 byte push87; // # of pushes onto the 8087 stack 2098 byte pop87; // # of pops off of the 8087 stack 2099 } 2100 2101 __gshared int clib_inited = false; // true if initialized 2102 2103 @trusted 2104 Symbol* symboly(const(char)* name, regm_t desregs) 2105 { 2106 Symbol *s = symbol_calloc(name[0 .. strlen(name)]); 2107 s.Stype = tsclib; 2108 s.Sclass = SC.extern_; 2109 s.Sfl = FLfunc; 2110 s.Ssymnum = 0; 2111 s.Sregsaved = ~desregs & (mBP | mES | ALLREGS); 2112 return s; 2113 } 2114 2115 @trusted 2116 void getClibInfo(uint clib, Symbol** ps, ClibInfo** pinfo) 2117 { 2118 __gshared Symbol*[CLIB.MAX] clibsyms; 2119 __gshared ClibInfo[CLIB.MAX] clibinfo; 2120 2121 if (!clib_inited) 2122 { 2123 for (size_t i = 0; i < CLIB.MAX; ++i) 2124 { 2125 Symbol* s = clibsyms[i]; 2126 if (s) 2127 { 2128 s.Sxtrnnum = 0; 2129 s.Stypidx = 0; 2130 clibinfo[i].flags &= ~INFwkdone; 2131 } 2132 } 2133 clib_inited = true; 2134 } 2135 2136 const uint ex_unix = (EX_LINUX | EX_LINUX64 | 2137 EX_OSX | EX_OSX64 | 2138 EX_FREEBSD | EX_FREEBSD64 | 2139 EX_OPENBSD | EX_OPENBSD64 | 2140 EX_DRAGONFLYBSD64 | 2141 EX_SOLARIS | EX_SOLARIS64); 2142 2143 ClibInfo* cinfo = &clibinfo[clib]; 2144 Symbol* s = clibsyms[clib]; 2145 if (!s) 2146 { 2147 2148 switch (clib) 2149 { 2150 case CLIB.lcmp: 2151 { 2152 const(char)* name = (config.exe & ex_unix) ? "__LCMP__" : "_LCMP@"; 2153 s = symboly(name, 0); 2154 } 2155 break; 2156 2157 case CLIB.lmul: 2158 { 2159 const(char)* name = (config.exe & ex_unix) ? "__LMUL__" : "_LMUL@"; 2160 s = symboly(name, mAX|mCX|mDX); 2161 cinfo.retregs16 = mDX|mAX; 2162 cinfo.retregs32 = mDX|mAX; 2163 } 2164 break; 2165 2166 case CLIB.ldiv: 2167 cinfo.retregs16 = mDX|mAX; 2168 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD)) 2169 { 2170 s = symboly("__divdi3", mAX|mBX|mCX|mDX); 2171 cinfo.flags = INFpushebx; 2172 cinfo.retregs32 = mDX|mAX; 2173 } 2174 else if (config.exe & EX_SOLARIS) 2175 { 2176 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2177 cinfo.flags = INFpushebx; 2178 cinfo.retregs32 = mDX|mAX; 2179 } 2180 else if (I32 && config.objfmt == OBJ_MSCOFF) 2181 { 2182 s = symboly("_alldiv", mAX|mBX|mCX|mDX); 2183 cinfo.flags = INFpusheabcdx; 2184 cinfo.retregs32 = mDX|mAX; 2185 } 2186 else 2187 { 2188 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2189 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2190 cinfo.retregs32 = mDX|mAX; 2191 } 2192 break; 2193 2194 case CLIB.lmod: 2195 cinfo.retregs16 = mCX|mBX; 2196 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD)) 2197 { 2198 s = symboly("__moddi3", mAX|mBX|mCX|mDX); 2199 cinfo.flags = INFpushebx; 2200 cinfo.retregs32 = mDX|mAX; 2201 } 2202 else if (config.exe & EX_SOLARIS) 2203 { 2204 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2205 cinfo.flags = INFpushebx; 2206 cinfo.retregs32 = mCX|mBX; 2207 } 2208 else if (I32 && config.objfmt == OBJ_MSCOFF) 2209 { 2210 s = symboly("_allrem", mAX|mBX|mCX|mDX); 2211 cinfo.flags = INFpusheabcdx; 2212 cinfo.retregs32 = mAX|mDX; 2213 } 2214 else 2215 { 2216 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2217 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2218 cinfo.retregs32 = mCX|mBX; 2219 } 2220 break; 2221 2222 case CLIB.uldiv: 2223 cinfo.retregs16 = mDX|mAX; 2224 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD)) 2225 { 2226 s = symboly("__udivdi3", mAX|mBX|mCX|mDX); 2227 cinfo.flags = INFpushebx; 2228 cinfo.retregs32 = mDX|mAX; 2229 } 2230 else if (config.exe & EX_SOLARIS) 2231 { 2232 s = symboly("__ULDIV2__", mAX|mBX|mCX|mDX); 2233 cinfo.flags = INFpushebx; 2234 cinfo.retregs32 = mDX|mAX; 2235 } 2236 else if (I32 && config.objfmt == OBJ_MSCOFF) 2237 { 2238 s = symboly("_aulldiv", mAX|mBX|mCX|mDX); 2239 cinfo.flags = INFpusheabcdx; 2240 cinfo.retregs32 = mDX|mAX; 2241 } 2242 else 2243 { 2244 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2245 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2246 cinfo.retregs32 = mDX|mAX; 2247 } 2248 break; 2249 2250 case CLIB.ulmod: 2251 cinfo.retregs16 = mCX|mBX; 2252 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD)) 2253 { 2254 s = symboly("__umoddi3", mAX|mBX|mCX|mDX); 2255 cinfo.flags = INFpushebx; 2256 cinfo.retregs32 = mDX|mAX; 2257 } 2258 else if (config.exe & EX_SOLARIS) 2259 { 2260 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2261 cinfo.flags = INFpushebx; 2262 cinfo.retregs32 = mCX|mBX; 2263 } 2264 else if (I32 && config.objfmt == OBJ_MSCOFF) 2265 { 2266 s = symboly("_aullrem", mAX|mBX|mCX|mDX); 2267 cinfo.flags = INFpusheabcdx; 2268 cinfo.retregs32 = mAX|mDX; 2269 } 2270 else 2271 { 2272 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2273 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2274 cinfo.retregs32 = mCX|mBX; 2275 } 2276 break; 2277 2278 // This section is only for Windows and DOS (i.e. machines without the x87 FPU) 2279 case CLIB.dmul: 2280 s = symboly("_DMUL@",mAX|mBX|mCX|mDX); 2281 cinfo.retregs16 = DOUBLEREGS_16; 2282 cinfo.retregs32 = DOUBLEREGS_32; 2283 cinfo.pop = 8; 2284 cinfo.flags = INFfloat; 2285 cinfo.push87 = 1; 2286 cinfo.pop87 = 1; 2287 break; 2288 2289 case CLIB.ddiv: 2290 s = symboly("_DDIV@",mAX|mBX|mCX|mDX); 2291 cinfo.retregs16 = DOUBLEREGS_16; 2292 cinfo.retregs32 = DOUBLEREGS_32; 2293 cinfo.pop = 8; 2294 cinfo.flags = INFfloat; 2295 cinfo.push87 = 1; 2296 cinfo.pop87 = 1; 2297 break; 2298 2299 case CLIB.dtst0: 2300 s = symboly("_DTST0@",0); 2301 cinfo.flags = INFfloat; 2302 break; 2303 2304 case CLIB.dtst0exc: 2305 s = symboly("_DTST0EXC@",0); 2306 cinfo.flags = INFfloat; 2307 break; 2308 2309 case CLIB.dcmp: 2310 s = symboly("_DCMP@",0); 2311 cinfo.pop = 8; 2312 cinfo.flags = INFfloat; 2313 cinfo.push87 = 1; 2314 cinfo.pop87 = 1; 2315 break; 2316 2317 case CLIB.dcmpexc: 2318 s = symboly("_DCMPEXC@",0); 2319 cinfo.pop = 8; 2320 cinfo.flags = INFfloat; 2321 cinfo.push87 = 1; 2322 cinfo.pop87 = 1; 2323 break; 2324 2325 case CLIB.dneg: 2326 s = symboly("_DNEG@",I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2327 cinfo.retregs16 = DOUBLEREGS_16; 2328 cinfo.retregs32 = DOUBLEREGS_32; 2329 cinfo.flags = INFfloat; 2330 break; 2331 2332 case CLIB.dadd: 2333 s = symboly("_DADD@",mAX|mBX|mCX|mDX); 2334 cinfo.retregs16 = DOUBLEREGS_16; 2335 cinfo.retregs32 = DOUBLEREGS_32; 2336 cinfo.pop = 8; 2337 cinfo.flags = INFfloat; 2338 cinfo.push87 = 1; 2339 cinfo.pop87 = 1; 2340 break; 2341 2342 case CLIB.dsub: 2343 s = symboly("_DSUB@",mAX|mBX|mCX|mDX); 2344 cinfo.retregs16 = DOUBLEREGS_16; 2345 cinfo.retregs32 = DOUBLEREGS_32; 2346 cinfo.pop = 8; 2347 cinfo.flags = INFfloat; 2348 cinfo.push87 = 1; 2349 cinfo.pop87 = 1; 2350 break; 2351 2352 case CLIB.fmul: 2353 s = symboly("_FMUL@",mAX|mBX|mCX|mDX); 2354 cinfo.retregs16 = FLOATREGS_16; 2355 cinfo.retregs32 = FLOATREGS_32; 2356 cinfo.flags = INFfloat; 2357 cinfo.push87 = 1; 2358 cinfo.pop87 = 1; 2359 break; 2360 2361 case CLIB.fdiv: 2362 s = symboly("_FDIV@",mAX|mBX|mCX|mDX); 2363 cinfo.retregs16 = FLOATREGS_16; 2364 cinfo.retregs32 = FLOATREGS_32; 2365 cinfo.flags = INFfloat; 2366 cinfo.push87 = 1; 2367 cinfo.pop87 = 1; 2368 break; 2369 2370 case CLIB.ftst0: 2371 s = symboly("_FTST0@",0); 2372 cinfo.flags = INFfloat; 2373 break; 2374 2375 case CLIB.ftst0exc: 2376 s = symboly("_FTST0EXC@",0); 2377 cinfo.flags = INFfloat; 2378 break; 2379 2380 case CLIB.fcmp: 2381 s = symboly("_FCMP@",0); 2382 cinfo.flags = INFfloat; 2383 cinfo.push87 = 1; 2384 cinfo.pop87 = 1; 2385 break; 2386 2387 case CLIB.fcmpexc: 2388 s = symboly("_FCMPEXC@",0); 2389 cinfo.flags = INFfloat; 2390 cinfo.push87 = 1; 2391 cinfo.pop87 = 1; 2392 break; 2393 2394 case CLIB.fneg: 2395 s = symboly("_FNEG@",I16 ? FLOATREGS_16 : FLOATREGS_32); 2396 cinfo.retregs16 = FLOATREGS_16; 2397 cinfo.retregs32 = FLOATREGS_32; 2398 cinfo.flags = INFfloat; 2399 break; 2400 2401 case CLIB.fadd: 2402 s = symboly("_FADD@",mAX|mBX|mCX|mDX); 2403 cinfo.retregs16 = FLOATREGS_16; 2404 cinfo.retregs32 = FLOATREGS_32; 2405 cinfo.flags = INFfloat; 2406 cinfo.push87 = 1; 2407 cinfo.pop87 = 1; 2408 break; 2409 2410 case CLIB.fsub: 2411 s = symboly("_FSUB@",mAX|mBX|mCX|mDX); 2412 cinfo.retregs16 = FLOATREGS_16; 2413 cinfo.retregs32 = FLOATREGS_32; 2414 cinfo.flags = INFfloat; 2415 cinfo.push87 = 1; 2416 cinfo.pop87 = 1; 2417 break; 2418 2419 case CLIB.dbllng: 2420 { 2421 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG" : "_DBLLNG@"; 2422 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2423 cinfo.retregs16 = mDX | mAX; 2424 cinfo.retregs32 = mAX; 2425 cinfo.flags = INFfloat; 2426 cinfo.push87 = 1; 2427 cinfo.pop87 = 1; 2428 break; 2429 } 2430 2431 case CLIB.lngdbl: 2432 { 2433 const(char)* name = (config.exe & ex_unix) ? "__LNGDBL" : "_LNGDBL@"; 2434 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2435 cinfo.retregs16 = DOUBLEREGS_16; 2436 cinfo.retregs32 = DOUBLEREGS_32; 2437 cinfo.flags = INFfloat; 2438 cinfo.push87 = 1; 2439 cinfo.pop87 = 1; 2440 break; 2441 } 2442 2443 case CLIB.dblint: 2444 { 2445 const(char)* name = (config.exe & ex_unix) ? "__DBLINT" : "_DBLINT@"; 2446 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2447 cinfo.retregs16 = mAX; 2448 cinfo.retregs32 = mAX; 2449 cinfo.flags = INFfloat; 2450 cinfo.push87 = 1; 2451 cinfo.pop87 = 1; 2452 break; 2453 } 2454 2455 case CLIB.intdbl: 2456 { 2457 const(char)* name = (config.exe & ex_unix) ? "__INTDBL" : "_INTDBL@"; 2458 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2459 cinfo.retregs16 = DOUBLEREGS_16; 2460 cinfo.retregs32 = DOUBLEREGS_32; 2461 cinfo.flags = INFfloat; 2462 cinfo.push87 = 1; 2463 cinfo.pop87 = 1; 2464 break; 2465 } 2466 2467 case CLIB.dbluns: 2468 { 2469 const(char)* name = (config.exe & ex_unix) ? "__DBLUNS" : "_DBLUNS@"; 2470 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2471 cinfo.retregs16 = mAX; 2472 cinfo.retregs32 = mAX; 2473 cinfo.flags = INFfloat; 2474 cinfo.push87 = 1; 2475 cinfo.pop87 = 1; 2476 break; 2477 } 2478 2479 case CLIB.unsdbl: 2480 // Y(DOUBLEREGS_32,"__UNSDBL"), // CLIB.unsdbl 2481 // Y(DOUBLEREGS_16,"_UNSDBL@"), 2482 // {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _UNSDBL@ unsdbl 2483 { 2484 const(char)* name = (config.exe & ex_unix) ? "__UNSDBL" : "_UNSDBL@"; 2485 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2486 cinfo.retregs16 = DOUBLEREGS_16; 2487 cinfo.retregs32 = DOUBLEREGS_32; 2488 cinfo.flags = INFfloat; 2489 cinfo.push87 = 1; 2490 cinfo.pop87 = 1; 2491 break; 2492 } 2493 2494 case CLIB.dblulng: 2495 { 2496 const(char)* name = (config.exe & ex_unix) ? "__DBLULNG" : "_DBLULNG@"; 2497 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2498 cinfo.retregs16 = mDX|mAX; 2499 cinfo.retregs32 = mAX; 2500 cinfo.flags = (config.exe & ex_unix) ? INFfloat | INF32 : INFfloat; 2501 cinfo.push87 = (config.exe & ex_unix) ? 0 : 1; 2502 cinfo.pop87 = 1; 2503 break; 2504 } 2505 2506 case CLIB.ulngdbl: 2507 { 2508 const(char)* name = (config.exe & ex_unix) ? "__ULNGDBL@" : "_ULNGDBL@"; 2509 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2510 cinfo.retregs16 = DOUBLEREGS_16; 2511 cinfo.retregs32 = DOUBLEREGS_32; 2512 cinfo.flags = INFfloat; 2513 cinfo.push87 = 1; 2514 cinfo.pop87 = 1; 2515 break; 2516 } 2517 2518 case CLIB.dblflt: 2519 { 2520 const(char)* name = (config.exe & ex_unix) ? "__DBLFLT" : "_DBLFLT@"; 2521 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2522 cinfo.retregs16 = FLOATREGS_16; 2523 cinfo.retregs32 = FLOATREGS_32; 2524 cinfo.flags = INFfloat; 2525 cinfo.push87 = 1; 2526 cinfo.pop87 = 1; 2527 break; 2528 } 2529 2530 case CLIB.fltdbl: 2531 { 2532 const(char)* name = (config.exe & ex_unix) ? "__FLTDBL" : "_FLTDBL@"; 2533 s = symboly(name, I16 ? ALLREGS : DOUBLEREGS_32); 2534 cinfo.retregs16 = DOUBLEREGS_16; 2535 cinfo.retregs32 = DOUBLEREGS_32; 2536 cinfo.flags = INFfloat; 2537 cinfo.push87 = 1; 2538 cinfo.pop87 = 1; 2539 break; 2540 } 2541 2542 case CLIB.dblllng: 2543 { 2544 const(char)* name = (config.exe & ex_unix) ? "__DBLLLNG" : "_DBLLLNG@"; 2545 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2546 cinfo.retregs16 = DOUBLEREGS_16; 2547 cinfo.retregs32 = mDX|mAX; 2548 cinfo.flags = INFfloat; 2549 cinfo.push87 = 1; 2550 cinfo.pop87 = 1; 2551 break; 2552 } 2553 2554 case CLIB.llngdbl: 2555 { 2556 const(char)* name = (config.exe & ex_unix) ? "__LLNGDBL" : "_LLNGDBL@"; 2557 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2558 cinfo.retregs16 = DOUBLEREGS_16; 2559 cinfo.retregs32 = DOUBLEREGS_32; 2560 cinfo.flags = INFfloat; 2561 cinfo.push87 = 1; 2562 cinfo.pop87 = 1; 2563 break; 2564 } 2565 2566 case CLIB.dblullng: 2567 { 2568 if (config.exe == EX_WIN64) 2569 { 2570 s = symboly("__DBLULLNG", DOUBLEREGS_32); 2571 cinfo.retregs32 = mAX; 2572 cinfo.flags = INFfloat; 2573 cinfo.push87 = 2; 2574 cinfo.pop87 = 2; 2575 } 2576 else 2577 { 2578 const(char)* name = (config.exe & ex_unix) ? "__DBLULLNG" : "_DBLULLNG@"; 2579 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2580 cinfo.retregs16 = DOUBLEREGS_16; 2581 cinfo.retregs32 = I64 ? mAX : mDX|mAX; 2582 cinfo.flags = INFfloat; 2583 cinfo.push87 = (config.exe & ex_unix) ? 2 : 1; 2584 cinfo.pop87 = (config.exe & ex_unix) ? 2 : 1; 2585 } 2586 break; 2587 } 2588 2589 case CLIB.ullngdbl: 2590 { 2591 if (config.exe == EX_WIN64) 2592 { 2593 s = symboly("__ULLNGDBL", DOUBLEREGS_32); 2594 cinfo.retregs32 = mAX; 2595 cinfo.flags = INFfloat; 2596 cinfo.push87 = 1; 2597 cinfo.pop87 = 1; 2598 } 2599 else 2600 { 2601 const(char)* name = (config.exe & ex_unix) ? "__ULLNGDBL" : "_ULLNGDBL@"; 2602 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2603 cinfo.retregs16 = DOUBLEREGS_16; 2604 cinfo.retregs32 = I64 ? mAX : DOUBLEREGS_32; 2605 cinfo.flags = INFfloat; 2606 cinfo.push87 = 1; 2607 cinfo.pop87 = 1; 2608 } 2609 break; 2610 } 2611 2612 case CLIB.dtst: 2613 { 2614 const(char)* name = (config.exe & ex_unix) ? "__DTST" : "_DTST@"; 2615 s = symboly(name, 0); 2616 cinfo.flags = INFfloat; 2617 break; 2618 } 2619 2620 case CLIB.vptrfptr: 2621 { 2622 const(char)* name = (config.exe & ex_unix) ? "__HTOFPTR" : "_HTOFPTR@"; 2623 s = symboly(name, mES|mBX); 2624 cinfo.retregs16 = mES|mBX; 2625 cinfo.retregs32 = mES|mBX; 2626 break; 2627 } 2628 2629 case CLIB.cvptrfptr: 2630 { 2631 const(char)* name = (config.exe & ex_unix) ? "__HCTOFPTR" : "_HCTOFPTR@"; 2632 s = symboly(name, mES|mBX); 2633 cinfo.retregs16 = mES|mBX; 2634 cinfo.retregs32 = mES|mBX; 2635 break; 2636 } 2637 2638 case CLIB._87topsw: 2639 { 2640 const(char)* name = (config.exe & ex_unix) ? "__87TOPSW" : "_87TOPSW@"; 2641 s = symboly(name, 0); 2642 cinfo.flags = INFfloat; 2643 break; 2644 } 2645 2646 case CLIB.fltto87: 2647 { 2648 const(char)* name = (config.exe & ex_unix) ? "__FLTTO87" : "_FLTTO87@"; 2649 s = symboly(name, mST0); 2650 cinfo.retregs16 = mST0; 2651 cinfo.retregs32 = mST0; 2652 cinfo.flags = INFfloat; 2653 cinfo.push87 = 1; 2654 break; 2655 } 2656 2657 case CLIB.dblto87: 2658 { 2659 const(char)* name = (config.exe & ex_unix) ? "__DBLTO87" : "_DBLTO87@"; 2660 s = symboly(name, mST0); 2661 cinfo.retregs16 = mST0; 2662 cinfo.retregs32 = mST0; 2663 cinfo.flags = INFfloat; 2664 cinfo.push87 = 1; 2665 break; 2666 } 2667 2668 case CLIB.dblint87: 2669 { 2670 const(char)* name = (config.exe & ex_unix) ? "__DBLINT87" : "_DBLINT87@"; 2671 s = symboly(name, mST0|mAX); 2672 cinfo.retregs16 = mAX; 2673 cinfo.retregs32 = mAX; 2674 cinfo.flags = INFfloat; 2675 break; 2676 } 2677 2678 case CLIB.dbllng87: 2679 { 2680 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG87" : "_DBLLNG87@"; 2681 s = symboly(name, mST0|mAX|mDX); 2682 cinfo.retregs16 = mDX|mAX; 2683 cinfo.retregs32 = mAX; 2684 cinfo.flags = INFfloat; 2685 break; 2686 } 2687 2688 case CLIB.ftst: 2689 { 2690 const(char)* name = (config.exe & ex_unix) ? "__FTST" : "_FTST@"; 2691 s = symboly(name, 0); 2692 cinfo.flags = INFfloat; 2693 break; 2694 } 2695 2696 case CLIB.fcompp: 2697 { 2698 const(char)* name = (config.exe & ex_unix) ? "__FCOMPP" : "_FCOMPP@"; 2699 s = symboly(name, 0); 2700 cinfo.retregs16 = mPSW; 2701 cinfo.retregs32 = mPSW; 2702 cinfo.flags = INFfloat; 2703 cinfo.pop87 = 2; 2704 break; 2705 } 2706 2707 case CLIB.ftest: 2708 { 2709 const(char)* name = (config.exe & ex_unix) ? "__FTEST" : "_FTEST@"; 2710 s = symboly(name, 0); 2711 cinfo.retregs16 = mPSW; 2712 cinfo.retregs32 = mPSW; 2713 cinfo.flags = INFfloat; 2714 break; 2715 } 2716 2717 case CLIB.ftest0: 2718 { 2719 const(char)* name = (config.exe & ex_unix) ? "__FTEST0" : "_FTEST0@"; 2720 s = symboly(name, 0); 2721 cinfo.retregs16 = mPSW; 2722 cinfo.retregs32 = mPSW; 2723 cinfo.flags = INFfloat; 2724 break; 2725 } 2726 2727 case CLIB.fdiv87: 2728 { 2729 const(char)* name = (config.exe & ex_unix) ? "__FDIVP" : "_FDIVP"; 2730 s = symboly(name, mST0|mAX|mBX|mCX|mDX); 2731 cinfo.retregs16 = mST0; 2732 cinfo.retregs32 = mST0; 2733 cinfo.flags = INFfloat; 2734 cinfo.push87 = 1; 2735 cinfo.pop87 = 1; 2736 break; 2737 } 2738 2739 // Complex numbers 2740 case CLIB.cmul: 2741 { 2742 s = symboly("_Cmul", mST0|mST01); 2743 cinfo.retregs16 = mST01; 2744 cinfo.retregs32 = mST01; 2745 cinfo.flags = INF32|INFfloat; 2746 cinfo.push87 = 3; 2747 cinfo.pop87 = 5; 2748 break; 2749 } 2750 2751 case CLIB.cdiv: 2752 { 2753 s = symboly("_Cdiv", mAX|mCX|mDX|mST0|mST01); 2754 cinfo.retregs16 = mST01; 2755 cinfo.retregs32 = mST01; 2756 cinfo.flags = INF32|INFfloat; 2757 cinfo.push87 = 0; 2758 cinfo.pop87 = 2; 2759 break; 2760 } 2761 2762 case CLIB.ccmp: 2763 { 2764 s = symboly("_Ccmp", mAX|mST0|mST01); 2765 cinfo.retregs16 = mPSW; 2766 cinfo.retregs32 = mPSW; 2767 cinfo.flags = INF32|INFfloat; 2768 cinfo.push87 = 0; 2769 cinfo.pop87 = 4; 2770 break; 2771 } 2772 2773 case CLIB.u64_ldbl: 2774 { 2775 const(char)* name = (config.exe & ex_unix) ? "__U64_LDBL" : "_U64_LDBL"; 2776 s = symboly(name, mST0); 2777 cinfo.retregs16 = mST0; 2778 cinfo.retregs32 = mST0; 2779 cinfo.flags = INF32|INF64|INFfloat; 2780 cinfo.push87 = 2; 2781 cinfo.pop87 = 1; 2782 break; 2783 } 2784 2785 case CLIB.ld_u64: 2786 { 2787 const(char)* name = (config.exe & ex_unix) ? (config.objfmt == OBJ_ELF || 2788 config.objfmt == OBJ_MACH ? 2789 "__LDBLULLNG" : "___LDBLULLNG") 2790 : "__LDBLULLNG"; 2791 s = symboly(name, mST0|mAX|mDX); 2792 cinfo.retregs16 = 0; 2793 cinfo.retregs32 = mDX|mAX; 2794 cinfo.flags = INF32|INF64|INFfloat; 2795 cinfo.push87 = 1; 2796 cinfo.pop87 = 2; 2797 break; 2798 } 2799 2800 default: 2801 assert(0); 2802 } 2803 clibsyms[clib] = s; 2804 } 2805 2806 *ps = s; 2807 *pinfo = cinfo; 2808 } 2809 2810 /******************************** 2811 * Generate code sequence to call C runtime library support routine. 2812 * clib = CLIB.xxxx 2813 * keepmask = mask of registers not to destroy. Currently can 2814 * handle only 1. Should use a temporary rather than 2815 * push/pop for speed. 2816 */ 2817 2818 @trusted 2819 void callclib(ref CodeBuilder cdb, elem* e, uint clib, regm_t* pretregs, regm_t keepmask) 2820 { 2821 //printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask)); 2822 //elem_print(e); 2823 2824 Symbol* s; 2825 ClibInfo* cinfo; 2826 getClibInfo(clib, &s, &cinfo); 2827 2828 if (I16) 2829 assert(!(cinfo.flags & (INF32 | INF64))); 2830 getregs(cdb,(~s.Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed 2831 keepmask &= ~s.Sregsaved; 2832 int npushed = popcnt(keepmask); 2833 CodeBuilder cdbpop; 2834 cdbpop.ctor(); 2835 gensaverestore(keepmask, cdb, cdbpop); 2836 2837 save87regs(cdb,cinfo.push87); 2838 for (int i = 0; i < cinfo.push87; i++) 2839 push87(cdb); 2840 2841 for (int i = 0; i < cinfo.pop87; i++) 2842 pop87(); 2843 2844 if (config.target_cpu >= TARGET_80386 && clib == CLIB.lmul && !I32) 2845 { 2846 static immutable ubyte[23] lmul = 2847 [ 2848 0x66,0xc1,0xe1,0x10, // shl ECX,16 2849 0x8b,0xcb, // mov CX,BX ;ECX = CX,BX 2850 0x66,0xc1,0xe0,0x10, // shl EAX,16 2851 0x66,0x0f,0xac,0xd0,0x10, // shrd EAX,EDX,16 ;EAX = DX,AX 2852 0x66,0xf7,0xe1, // mul ECX 2853 0x66,0x0f,0xa4,0xc2,0x10, // shld EDX,EAX,16 ;DX,AX = EAX 2854 ]; 2855 2856 cdb.genasm(cast(char*)lmul.ptr, lmul.sizeof); 2857 } 2858 else 2859 { 2860 makeitextern(s); 2861 int nalign = 0; 2862 int pushebx = (cinfo.flags & INFpushebx) != 0; 2863 int pushall = (cinfo.flags & INFpusheabcdx) != 0; 2864 if (STACKALIGN >= 16) 2865 { // Align the stack (assume no args on stack) 2866 int npush = (npushed + pushebx + 4 * pushall) * REGSIZE + stackpush; 2867 if (npush & (STACKALIGN - 1)) 2868 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 2869 cod3_stackadj(cdb, nalign); 2870 } 2871 } 2872 if (pushebx) 2873 { 2874 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_FREEBSD | EX_FREEBSD64 | EX_OPENBSD | EX_OPENBSD64 | EX_DRAGONFLYBSD64)) 2875 { 2876 cdb.gen1(0x50 + CX); // PUSH ECX 2877 cdb.gen1(0x50 + BX); // PUSH EBX 2878 cdb.gen1(0x50 + DX); // PUSH EDX 2879 cdb.gen1(0x50 + AX); // PUSH EAX 2880 nalign += 4 * REGSIZE; 2881 } 2882 else 2883 { 2884 cdb.gen1(0x50 + BX); // PUSH EBX 2885 nalign += REGSIZE; 2886 } 2887 } 2888 if (pushall) 2889 { 2890 cdb.gen1(0x50 + CX); // PUSH ECX 2891 cdb.gen1(0x50 + BX); // PUSH EBX 2892 cdb.gen1(0x50 + DX); // PUSH EDX 2893 cdb.gen1(0x50 + AX); // PUSH EAX 2894 } 2895 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) 2896 { 2897 // Note: not for OSX 2898 /* Pass EBX on the stack instead, this is because EBX is used 2899 * for shared library function calls 2900 */ 2901 if (config.flags3 & CFG3pic) 2902 { 2903 load_localgot(cdb); // EBX gets set to this value 2904 } 2905 } 2906 2907 cdb.gencs(LARGECODE ? 0x9A : 0xE8,0,FLfunc,s); // CALL s 2908 if (nalign) 2909 cod3_stackadj(cdb, -nalign); 2910 calledafunc = 1; 2911 2912 version (SCPP) 2913 { 2914 if (I16 && // bug in Optlink for weak references 2915 config.flags3 & CFG3wkfloat && 2916 (cinfo.flags & (INFfloat | INFwkdone)) == INFfloat) 2917 { 2918 cinfo.flags |= INFwkdone; 2919 makeitextern(getRtlsym(RTLSYM.INTONLY)); 2920 objmod.wkext(s, getRtlsym(RTLSYM.INTONLY)); 2921 } 2922 } 2923 } 2924 if (I16) 2925 stackpush -= cinfo.pop; 2926 regm_t retregs = I16 ? cinfo.retregs16 : cinfo.retregs32; 2927 cdb.append(cdbpop); 2928 fixresult(cdb, e, retregs, pretregs); 2929 } 2930 2931 2932 /************************************************* 2933 * Helper function for converting OPparam's into array of Parameters. 2934 */ 2935 struct Parameter { elem* e; reg_t reg; reg_t reg2; uint numalign; } 2936 2937 //void fillParameters(elem* e, Parameter* parameters, int* pi); 2938 2939 @trusted 2940 void fillParameters(elem* e, Parameter* parameters, int* pi) 2941 { 2942 if (e.Eoper == OPparam) 2943 { 2944 fillParameters(e.EV.E1, parameters, pi); 2945 fillParameters(e.EV.E2, parameters, pi); 2946 freenode(e); 2947 } 2948 else 2949 { 2950 parameters[*pi].e = e; 2951 (*pi)++; 2952 } 2953 } 2954 2955 /*********************************** 2956 * tyf: type of the function 2957 */ 2958 @trusted 2959 FuncParamRegs FuncParamRegs_create(tym_t tyf) 2960 { 2961 FuncParamRegs result; 2962 2963 result.tyf = tyf; 2964 2965 if (I16) 2966 { 2967 result.numintegerregs = 0; 2968 result.numfloatregs = 0; 2969 } 2970 else if (I32) 2971 { 2972 if (tyf == TYjfunc) 2973 { 2974 static immutable ubyte[1] reglist1 = [ AX ]; 2975 result.argregs = ®list1[0]; 2976 result.numintegerregs = reglist1.length; 2977 } 2978 else if (tyf == TYmfunc) 2979 { 2980 static immutable ubyte[1] reglist2 = [ CX ]; 2981 result.argregs = ®list2[0]; 2982 result.numintegerregs = reglist2.length; 2983 } 2984 else 2985 result.numintegerregs = 0; 2986 result.numfloatregs = 0; 2987 } 2988 else if (I64 && config.exe == EX_WIN64) 2989 { 2990 static immutable ubyte[4] reglist3 = [ CX,DX,R8,R9 ]; 2991 result.argregs = ®list3[0]; 2992 result.numintegerregs = reglist3.length; 2993 2994 static immutable ubyte[4] freglist3 = [ XMM0, XMM1, XMM2, XMM3 ]; 2995 result.floatregs = &freglist3[0]; 2996 result.numfloatregs = freglist3.length; 2997 } 2998 else if (I64) 2999 { 3000 static immutable ubyte[6] reglist4 = [ DI,SI,DX,CX,R8,R9 ]; 3001 result.argregs = ®list4[0]; 3002 result.numintegerregs = reglist4.length; 3003 3004 static immutable ubyte[8] freglist4 = [ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 ]; 3005 result.floatregs = &freglist4[0]; 3006 result.numfloatregs = freglist4.length; 3007 } 3008 else 3009 assert(0); 3010 return result; 3011 } 3012 3013 /***************************************** 3014 * Allocate parameter of type t and ty to registers *preg1 and *preg2. 3015 * Params: 3016 * t = type, valid only if ty is TYstruct or TYarray 3017 * Returns: 3018 * false not allocated to any register 3019 * true *preg1, *preg2 set to allocated register pair 3020 */ 3021 3022 //bool type_jparam2(type* t, tym_t ty); 3023 3024 @trusted 3025 private bool type_jparam2(type* t, tym_t ty) 3026 { 3027 ty = tybasic(ty); 3028 3029 if (tyfloating(ty)) 3030 return false; 3031 else if (ty == TYstruct || ty == TYarray) 3032 { 3033 type_debug(t); 3034 targ_size_t sz = type_size(t); 3035 return (sz <= _tysize[TYnptr]) && 3036 (config.exe == EX_WIN64 || sz == 1 || sz == 2 || sz == 4 || sz == 8); 3037 } 3038 else if (tysize(ty) <= _tysize[TYnptr]) 3039 return true; 3040 return false; 3041 } 3042 3043 @trusted 3044 int FuncParamRegs_alloc(ref FuncParamRegs fpr, type* t, tym_t ty, reg_t* preg1, reg_t* preg2) 3045 { 3046 //printf("FuncParamRegs::alloc(ty: TY%sm t: %p)\n", tystring[tybasic(ty)], t); 3047 //if (t) type_print(t); 3048 3049 *preg1 = NOREG; 3050 *preg2 = NOREG; 3051 3052 type* t2 = null; 3053 tym_t ty2 = TYMAX; 3054 3055 // SROA with mixed registers 3056 if (ty & mTYxmmgpr) 3057 { 3058 ty = TYdouble; 3059 ty2 = TYllong; 3060 } 3061 else if (ty & mTYgprxmm) 3062 { 3063 ty = TYllong; 3064 ty2 = TYdouble; 3065 } 3066 3067 // Treat array of 1 the same as its element type 3068 // (Don't put volatile parameters in registers) 3069 if (tybasic(ty) == TYarray && tybasic(t.Tty) == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile) 3070 && type_size(t.Tnext) > 1) 3071 { 3072 t = t.Tnext; 3073 ty = t.Tty; 3074 } 3075 3076 if (tybasic(ty) == TYstruct && type_zeroSize(t, fpr.tyf)) 3077 return 0; // don't allocate into registers 3078 3079 ++fpr.i; 3080 3081 // If struct or array 3082 if (tyaggregate(ty)) 3083 { 3084 assert(t); 3085 if (config.exe == EX_WIN64) 3086 { 3087 /* Structs occupy a general purpose register, regardless of the struct 3088 * size or the number & types of its fields. 3089 */ 3090 t = null; 3091 ty = TYnptr; 3092 } 3093 else 3094 { 3095 type* targ1, targ2; 3096 if (tybasic(t.Tty) == TYstruct) 3097 { 3098 targ1 = t.Ttag.Sstruct.Sarg1type; 3099 targ2 = t.Ttag.Sstruct.Sarg2type; 3100 } 3101 else if (tybasic(t.Tty) == TYarray) 3102 { 3103 if (I64) 3104 argtypes(t, targ1, targ2); 3105 } 3106 else 3107 assert(0); 3108 3109 if (targ1) 3110 { 3111 t = targ1; 3112 ty = t.Tty; 3113 if (targ2) 3114 { 3115 t2 = targ2; 3116 ty2 = t2.Tty; 3117 } 3118 } 3119 else if (I64 && !targ2) 3120 return 0; 3121 } 3122 } 3123 3124 reg_t* preg = preg1; 3125 int regcntsave = fpr.regcnt; 3126 int xmmcntsave = fpr.xmmcnt; 3127 3128 if (config.exe == EX_WIN64) 3129 { 3130 if (tybasic(ty) == TYcfloat) 3131 { 3132 ty = TYnptr; // treat like a struct 3133 } 3134 } 3135 else if (I64) 3136 { 3137 if ((tybasic(ty) == TYcent || tybasic(ty) == TYucent) && 3138 fpr.numintegerregs - fpr.regcnt >= 2) 3139 { 3140 // Allocate to register pair 3141 *preg1 = fpr.argregs[fpr.regcnt]; 3142 *preg2 = fpr.argregs[fpr.regcnt + 1]; 3143 fpr.regcnt += 2; 3144 return 1; 3145 } 3146 3147 if (tybasic(ty) == TYcdouble && 3148 fpr.numfloatregs - fpr.xmmcnt >= 2) 3149 { 3150 // Allocate to register pair 3151 *preg1 = fpr.floatregs[fpr.xmmcnt]; 3152 *preg2 = fpr.floatregs[fpr.xmmcnt + 1]; 3153 fpr.xmmcnt += 2; 3154 return 1; 3155 } 3156 3157 if (tybasic(ty) == TYcfloat 3158 && fpr.numfloatregs - fpr.xmmcnt >= 1) 3159 { 3160 // Allocate XMM register 3161 *preg1 = fpr.floatregs[fpr.xmmcnt++]; 3162 return 1; 3163 } 3164 } 3165 3166 foreach (j; 0 .. 2) 3167 { 3168 if (fpr.regcnt < fpr.numintegerregs) 3169 { 3170 if ((I64 || (fpr.i == 1 && (fpr.tyf == TYjfunc || fpr.tyf == TYmfunc))) && 3171 type_jparam2(t, ty)) 3172 { 3173 *preg = fpr.argregs[fpr.regcnt]; 3174 ++fpr.regcnt; 3175 if (config.exe == EX_WIN64) 3176 ++fpr.xmmcnt; 3177 goto Lnext; 3178 } 3179 } 3180 if (fpr.xmmcnt < fpr.numfloatregs) 3181 { 3182 if (tyxmmreg(ty)) 3183 { 3184 *preg = fpr.floatregs[fpr.xmmcnt]; 3185 if (config.exe == EX_WIN64) 3186 ++fpr.regcnt; 3187 ++fpr.xmmcnt; 3188 goto Lnext; 3189 } 3190 } 3191 // Failed to allocate to a register 3192 if (j == 1) 3193 { /* Unwind first preg1 assignment, because it's both or nothing 3194 */ 3195 *preg1 = NOREG; 3196 fpr.regcnt = regcntsave; 3197 fpr.xmmcnt = xmmcntsave; 3198 } 3199 return 0; 3200 3201 Lnext: 3202 if (tybasic(ty2) == TYMAX) 3203 break; 3204 preg = preg2; 3205 t = t2; 3206 ty = ty2; 3207 } 3208 return 1; 3209 } 3210 3211 /*************************************** 3212 * Finds replacement types for register passing of aggregates. 3213 */ 3214 @trusted 3215 void argtypes(type* t, ref type* arg1type, ref type* arg2type) 3216 { 3217 if (!t) return; 3218 3219 tym_t ty = t.Tty; 3220 3221 if (!tyaggregate(ty)) 3222 return; 3223 3224 arg1type = arg2type = null; 3225 3226 if (tybasic(ty) == TYarray) 3227 { 3228 size_t sz = cast(size_t) type_size(t); 3229 if (sz == 0) 3230 return; 3231 3232 if ((I32 || config.exe == EX_WIN64) && (sz & (sz - 1))) // power of 2 3233 return; 3234 3235 if (config.exe == EX_WIN64 && sz > REGSIZE) 3236 return; 3237 3238 if (sz <= 2 * REGSIZE) 3239 { 3240 type** argtype = &arg1type; 3241 size_t argsz = sz < REGSIZE ? sz : REGSIZE; 3242 foreach (v; 0 .. (sz > REGSIZE) + 1) 3243 { 3244 *argtype = argsz == 1 ? tstypes[TYchar] 3245 : argsz == 2 ? tstypes[TYshort] 3246 : argsz <= 4 ? tstypes[TYlong] 3247 : tstypes[TYllong]; 3248 argtype = &arg2type; 3249 argsz = sz - REGSIZE; 3250 } 3251 } 3252 3253 if (I64 && config.exe != EX_WIN64) 3254 { 3255 type* tn = t.Tnext; 3256 tym_t tyn = tn.Tty; 3257 while (tyn == TYarray) 3258 { 3259 tn = tn.Tnext; 3260 assert(tn); 3261 tyn = tybasic(tn.Tty); 3262 } 3263 3264 if (tybasic(tyn) == TYstruct) 3265 { 3266 if (type_size(tn) == sz) // array(s) of size 1 3267 { 3268 arg1type = tn.Ttag.Sstruct.Sarg1type; 3269 arg2type = tn.Ttag.Sstruct.Sarg2type; 3270 return; 3271 } 3272 3273 type* t1 = tn.Ttag.Sstruct.Sarg1type; 3274 if (t1) 3275 { 3276 tn = t1; 3277 tyn = tn.Tty; 3278 } 3279 } 3280 3281 if (sz == tysize(tyn)) 3282 { 3283 if (tysimd(tyn)) 3284 { 3285 type* ts = type_fake(tybasic(tyn)); 3286 ts.Tcount = 1; 3287 arg1type = ts; 3288 return; 3289 } 3290 else if (tybasic(tyn) == TYldouble || tybasic(tyn) == TYildouble) 3291 { 3292 arg1type = tstypes[tybasic(tyn)]; 3293 return; 3294 } 3295 } 3296 3297 if (sz <= 16) 3298 { 3299 if (tyfloating(tyn)) 3300 { 3301 arg1type = sz <= 4 ? tstypes[TYfloat] : tstypes[TYdouble]; 3302 if (sz > 8) 3303 arg2type = (sz - 8) <= 4 ? tstypes[TYfloat] : tstypes[TYdouble]; 3304 } 3305 } 3306 } 3307 } 3308 else if (tybasic(ty) == TYstruct) 3309 { 3310 // TODO: Move code from `cgelem.d:elstruct()` here 3311 } 3312 } 3313 3314 /******************************* 3315 * Generate code sequence for function call. 3316 */ 3317 3318 @trusted 3319 void cdfunc(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3320 { 3321 //printf("cdfunc()\n"); elem_print(e); 3322 assert(e); 3323 uint numpara = 0; // bytes of parameters 3324 uint numalign = 0; // bytes to align stack before pushing parameters 3325 uint stackpushsave = stackpush; // so we can compute # of parameters 3326 cgstate.stackclean++; 3327 regm_t keepmsk = 0; 3328 int xmmcnt = 0; 3329 tym_t tyf = tybasic(e.EV.E1.Ety); // the function type 3330 3331 // Easier to deal with parameters as an array: parameters[0..np] 3332 int np = OTbinary(e.Eoper) ? el_nparams(e.EV.E2) : 0; 3333 Parameter *parameters = cast(Parameter *)alloca(np * Parameter.sizeof); 3334 3335 if (np) 3336 { 3337 int n = 0; 3338 fillParameters(e.EV.E2, parameters, &n); 3339 assert(n == np); 3340 } 3341 3342 Symbol *sf = null; // symbol of the function being called 3343 if (e.EV.E1.Eoper == OPvar) 3344 sf = e.EV.E1.EV.Vsym; 3345 3346 /* Assume called function access statics 3347 */ 3348 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_FREEBSD | EX_FREEBSD64 | EX_OPENBSD | EX_OPENBSD64) && 3349 config.flags3 & CFG3pic) 3350 cgstate.accessedTLS = true; 3351 3352 /* Special handling for call to __tls_get_addr, we must save registers 3353 * before evaluating the parameter, so that the parameter load and call 3354 * are adjacent. 3355 */ 3356 if (np == 1 && sf) 3357 { 3358 if (sf == tls_get_addr_sym) 3359 getregs(cdb, ~sf.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3360 } 3361 3362 uint stackalign = REGSIZE; 3363 if (tyf == TYf16func) 3364 stackalign = 2; 3365 // Figure out which parameters go in registers. 3366 // Compute numpara, the total bytes pushed on the stack 3367 FuncParamRegs fpr = FuncParamRegs_create(tyf); 3368 for (int i = np; --i >= 0;) 3369 { 3370 elem *ep = parameters[i].e; 3371 uint psize = cast(uint)_align(stackalign, paramsize(ep, tyf)); // align on stack boundary 3372 if (config.exe == EX_WIN64) 3373 { 3374 //printf("[%d] size = %u, numpara = %d ep = %p %s\n", i, psize, numpara, ep, tym_str(ep.Ety)); 3375 debug 3376 if (psize > REGSIZE) elem_print(e); 3377 3378 assert(psize <= REGSIZE); 3379 psize = REGSIZE; 3380 } 3381 //printf("[%d] size = %u, numpara = %d %s\n", i, psize, numpara, tym_str(ep.Ety)); 3382 if (FuncParamRegs_alloc(fpr, ep.ET, ep.Ety, ¶meters[i].reg, ¶meters[i].reg2)) 3383 { 3384 if (config.exe == EX_WIN64) 3385 numpara += REGSIZE; // allocate stack space for it anyway 3386 continue; // goes in register, not stack 3387 } 3388 3389 // Parameter i goes on the stack 3390 parameters[i].reg = NOREG; 3391 uint alignsize = el_alignsize(ep); 3392 parameters[i].numalign = 0; 3393 if (alignsize > stackalign && 3394 (I64 || (alignsize >= 16 && 3395 (config.exe & (EX_OSX | EX_LINUX) && (tyaggregate(ep.Ety) || tyvector(ep.Ety)))))) 3396 { 3397 if (alignsize > STACKALIGN) 3398 { 3399 STACKALIGN = alignsize; 3400 enforcealign = true; 3401 } 3402 uint newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1); 3403 parameters[i].numalign = newnumpara - numpara; 3404 numpara = newnumpara; 3405 assert(config.exe != EX_WIN64); 3406 } 3407 numpara += psize; 3408 } 3409 3410 if (config.exe == EX_WIN64) 3411 { 3412 if (numpara < 4 * REGSIZE) 3413 numpara = 4 * REGSIZE; 3414 } 3415 3416 //printf("numpara = %d, stackpush = %d\n", numpara, stackpush); 3417 assert((numpara & (REGSIZE - 1)) == 0); 3418 assert((stackpush & (REGSIZE - 1)) == 0); 3419 3420 /* Should consider reordering the order of evaluation of the parameters 3421 * so that args that go into registers are evaluated after args that get 3422 * pushed. We can reorder args that are constants or relconst's. 3423 */ 3424 3425 /* Determine if we should use cgstate.funcarg for the parameters or push them 3426 */ 3427 bool usefuncarg = false; 3428 static if (0) 3429 { 3430 printf("test1 %d %d %d %d %d %d %d %d\n", (config.flags4 & CFG4speed)!=0, !Alloca.size, 3431 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)), 3432 cast(int)numpara, !stackpush, 3433 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos), 3434 (!typfunc(tyf) || sf && sf.Sflags & SFLexit), !I16); 3435 } 3436 if (config.flags4 & CFG4speed && 3437 !Alloca.size && 3438 /* The cleanup code calls a local function, leaving the return address on 3439 * the top of the stack. If parameters are placed there, the return address 3440 * is stepped on. 3441 * A better solution is turn this off only inside the cleanup code. 3442 */ 3443 !usednteh && 3444 !calledFinally && 3445 (numpara || config.exe == EX_WIN64) && 3446 stackpush == 0 && // cgstate.funcarg needs to be at top of stack 3447 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos) && 3448 (!(typfunc(tyf) || tyf == TYhfunc) || sf && sf.Sflags & SFLexit) && 3449 !anyiasm && !I16 3450 ) 3451 { 3452 for (int i = 0; i < np; i++) 3453 { 3454 elem* ep = parameters[i].e; 3455 int preg = parameters[i].reg; 3456 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3457 if (preg == NOREG) 3458 { 3459 switch (ep.Eoper) 3460 { 3461 case OPstrctor: 3462 case OPstrthis: 3463 case OPstrpar: 3464 case OPnp_fp: 3465 goto Lno; 3466 3467 default: 3468 break; 3469 } 3470 } 3471 } 3472 3473 if (numpara > cgstate.funcarg.size) 3474 { // New high water mark 3475 //printf("increasing size from %d to %d\n", cast(int)cgstate.funcarg.size, cast(int)numpara); 3476 cgstate.funcarg.size = numpara; 3477 } 3478 usefuncarg = true; 3479 } 3480 Lno: 3481 3482 /* Adjust start of the stack so after all args are pushed, 3483 * the stack will be aligned. 3484 */ 3485 if (!usefuncarg && STACKALIGN >= 16 && (numpara + stackpush) & (STACKALIGN - 1)) 3486 { 3487 numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1)); 3488 cod3_stackadj(cdb, numalign); 3489 cdb.genadjesp(numalign); 3490 stackpush += numalign; 3491 stackpushsave += numalign; 3492 } 3493 assert(stackpush == stackpushsave); 3494 if (config.exe == EX_WIN64) 3495 { 3496 //printf("np = %d, numpara = %d, stackpush = %d\n", np, numpara, stackpush); 3497 assert(numpara == ((np < 4) ? 4 * REGSIZE : np * REGSIZE)); 3498 3499 // Allocate stack space for four entries anyway 3500 // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29 3501 } 3502 3503 int[XMM7 + 1] regsaved = void; 3504 memset(regsaved.ptr, -1, regsaved.sizeof); 3505 CodeBuilder cdbrestore; 3506 cdbrestore.ctor(); 3507 regm_t saved = 0; 3508 targ_size_t funcargtossave = cgstate.funcargtos; 3509 targ_size_t funcargtos = numpara; 3510 //printf("funcargtos1 = %d\n", cast(int)funcargtos); 3511 3512 /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9 3513 * float and double parameters go into XMM0..XMM7 3514 * For variadic functions, count of XMM registers used goes in AL 3515 */ 3516 for (int i = 0; i < np; i++) 3517 { 3518 elem* ep = parameters[i].e; 3519 int preg = parameters[i].reg; 3520 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3521 if (preg == NOREG) 3522 { 3523 /* Push parameter on stack, but keep track of registers used 3524 * in the process. If they interfere with keepmsk, we'll have 3525 * to save/restore them. 3526 */ 3527 CodeBuilder cdbsave; 3528 cdbsave.ctor(); 3529 regm_t overlap = msavereg & keepmsk; 3530 msavereg |= keepmsk; 3531 CodeBuilder cdbparams; 3532 cdbparams.ctor(); 3533 if (usefuncarg) 3534 movParams(cdbparams, ep, stackalign, cast(uint)funcargtos, tyf); 3535 else 3536 pushParams(cdbparams,ep,stackalign, tyf); 3537 regm_t tosave = keepmsk & ~msavereg; 3538 msavereg &= ~keepmsk | overlap; 3539 3540 // tosave is the mask to save and restore 3541 for (reg_t j = 0; tosave; j++) 3542 { 3543 regm_t mi = mask(j); 3544 assert(j <= XMM7); 3545 if (mi & tosave) 3546 { 3547 uint idx; 3548 regsave.save(cdbsave, j, &idx); 3549 regsave.restore(cdbrestore, j, idx); 3550 saved |= mi; 3551 keepmsk &= ~mi; // don't need to keep these for rest of params 3552 tosave &= ~mi; 3553 } 3554 } 3555 3556 cdb.append(cdbsave); 3557 cdb.append(cdbparams); 3558 3559 // Alignment for parameter comes after it got pushed 3560 const uint numalignx = parameters[i].numalign; 3561 if (usefuncarg) 3562 { 3563 funcargtos -= _align(stackalign, paramsize(ep, tyf)) + numalignx; 3564 cgstate.funcargtos = funcargtos; 3565 } 3566 else if (numalignx) 3567 { 3568 cod3_stackadj(cdb, numalignx); 3569 cdb.genadjesp(numalignx); 3570 stackpush += numalignx; 3571 } 3572 } 3573 else 3574 { 3575 // Goes in register preg, not stack 3576 regm_t retregs = mask(preg); 3577 if (retregs & XMMREGS) 3578 ++xmmcnt; 3579 int preg2 = parameters[i].reg2; 3580 reg_t mreg,lreg; 3581 if (preg2 != NOREG || tybasic(ep.Ety) == TYcfloat) 3582 { 3583 assert(ep.Eoper != OPstrthis); 3584 if (mask(preg2) & XMMREGS) 3585 ++xmmcnt; 3586 if (tybasic(ep.Ety) == TYcfloat) 3587 { 3588 lreg = ST01; 3589 mreg = NOREG; 3590 } 3591 else if (tyrelax(ep.Ety) == TYcent) 3592 { 3593 lreg = mask(preg ) & mLSW ? cast(reg_t)preg : AX; 3594 mreg = mask(preg2) & mMSW ? cast(reg_t)preg2 : DX; 3595 } 3596 else 3597 { 3598 lreg = XMM0; 3599 mreg = XMM1; 3600 } 3601 retregs = (mask(mreg) | mask(lreg)) & ~mask(NOREG); 3602 CodeBuilder cdbsave; 3603 cdbsave.ctor(); 3604 if (keepmsk & retregs) 3605 { 3606 regm_t tosave = keepmsk & retregs; 3607 3608 // tosave is the mask to save and restore 3609 for (reg_t j = 0; tosave; j++) 3610 { 3611 regm_t mi = mask(j); 3612 assert(j <= XMM7); 3613 if (mi & tosave) 3614 { 3615 uint idx; 3616 regsave.save(cdbsave, j, &idx); 3617 regsave.restore(cdbrestore, j, idx); 3618 saved |= mi; 3619 keepmsk &= ~mi; // don't need to keep these for rest of params 3620 tosave &= ~mi; 3621 } 3622 } 3623 } 3624 cdb.append(cdbsave); 3625 3626 scodelem(cdb, ep, &retregs, keepmsk, false); 3627 3628 // Move result [mreg,lreg] into parameter registers from [preg2,preg] 3629 retregs = 0; 3630 if (preg != lreg) 3631 retregs |= mask(preg); 3632 if (preg2 != mreg) 3633 retregs |= mask(preg2); 3634 retregs &= ~mask(NOREG); 3635 getregs(cdb,retregs); 3636 3637 tym_t ty1 = tybasic(ep.Ety); 3638 tym_t ty2 = ty1; 3639 if (ep.Ety & mTYgprxmm) 3640 { 3641 ty1 = TYllong; 3642 ty2 = TYdouble; 3643 } 3644 else if (ep.Ety & mTYxmmgpr) 3645 { 3646 ty1 = TYdouble; 3647 ty2 = TYllong; 3648 } 3649 else if (ty1 == TYstruct) 3650 { 3651 type* targ1 = ep.ET.Ttag.Sstruct.Sarg1type; 3652 type* targ2 = ep.ET.Ttag.Sstruct.Sarg2type; 3653 if (targ1) 3654 ty1 = targ1.Tty; 3655 if (targ2) 3656 ty2 = targ2.Tty; 3657 } 3658 else if (tyrelax(ty1) == TYcent) 3659 ty1 = ty2 = TYllong; 3660 else if (tybasic(ty1) == TYcdouble) 3661 ty1 = ty2 = TYdouble; 3662 3663 if (tybasic(ep.Ety) == TYcfloat) 3664 { 3665 assert(I64); 3666 assert(lreg == ST01 && mreg == NOREG); 3667 // spill 3668 pop87(); 3669 pop87(); 3670 cdb.genfltreg(0xD9, 3, tysize(TYfloat)); 3671 genfwait(cdb); 3672 cdb.genfltreg(0xD9, 3, 0); 3673 genfwait(cdb); 3674 // reload 3675 if (config.exe == EX_WIN64) 3676 { 3677 cdb.genfltreg(LOD, preg, 0); 3678 code_orrex(cdb.last(), REX_W); 3679 } 3680 else 3681 { 3682 assert(mask(preg) & XMMREGS); 3683 cdb.genxmmreg(xmmload(TYdouble), cast(reg_t) preg, 0, TYdouble); 3684 } 3685 } 3686 else foreach (v; 0 .. 2) 3687 { 3688 if (v ^ (preg != mreg)) 3689 genmovreg(cdb, preg, lreg, ty1); 3690 else 3691 genmovreg(cdb, preg2, mreg, ty2); 3692 } 3693 3694 retregs = (mask(preg) | mask(preg2)) & ~mask(NOREG); 3695 } 3696 else if (ep.Eoper == OPstrthis) 3697 { 3698 getregs(cdb,retregs); 3699 // LEA preg,np[RSP] 3700 uint delta = stackpush - ep.EV.Vuns; // stack delta to parameter 3701 cdb.genc1(LEA, 3702 (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,delta); 3703 if (I64) 3704 code_orrex(cdb.last(), REX_W); 3705 } 3706 else if (ep.Eoper == OPstrpar && config.exe == EX_WIN64 && type_size(ep.ET) == 0) 3707 { 3708 retregs = 0; 3709 scodelem(cdb, ep.EV.E1, &retregs, keepmsk, false); 3710 freenode(ep); 3711 } 3712 else 3713 { 3714 scodelem(cdb, ep, &retregs, keepmsk, false); 3715 } 3716 keepmsk |= retregs; // don't change preg when evaluating func address 3717 } 3718 } 3719 3720 if (config.exe == EX_WIN64) 3721 { // Allocate stack space for four entries anyway 3722 // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29 3723 { uint sz = 4 * REGSIZE; 3724 if (usefuncarg) 3725 { 3726 funcargtos -= sz; 3727 cgstate.funcargtos = funcargtos; 3728 } 3729 else 3730 { 3731 cod3_stackadj(cdb, sz); 3732 cdb.genadjesp(sz); 3733 stackpush += sz; 3734 } 3735 } 3736 3737 /* Variadic functions store XMM parameters into their corresponding GP registers 3738 */ 3739 for (int i = 0; i < np; i++) 3740 { 3741 int preg = parameters[i].reg; 3742 regm_t retregs = mask(preg); 3743 if (retregs & XMMREGS) 3744 { 3745 reg_t reg; 3746 switch (preg) 3747 { 3748 case XMM0: reg = CX; break; 3749 case XMM1: reg = DX; break; 3750 case XMM2: reg = R8; break; 3751 case XMM3: reg = R9; break; 3752 3753 default: assert(0); 3754 } 3755 getregs(cdb,mask(reg)); 3756 cdb.gen2(STOD,(REX_W << 16) | modregxrmx(3,preg-XMM0,reg)); // MOVD reg,preg 3757 } 3758 } 3759 } 3760 3761 // Restore any register parameters we saved 3762 getregs(cdb,saved); 3763 cdb.append(cdbrestore); 3764 keepmsk |= saved; 3765 3766 // Variadic functions store the number of XMM registers used in AL 3767 if (I64 && config.exe != EX_WIN64 && e.Eflags & EFLAGS_variadic) 3768 { 3769 getregs(cdb,mAX); 3770 movregconst(cdb,AX,xmmcnt,1); 3771 keepmsk |= mAX; 3772 } 3773 3774 //printf("funcargtos2 = %d\n", cast(int)funcargtos); 3775 assert(!usefuncarg || (funcargtos == 0 && cgstate.funcargtos == 0)); 3776 cgstate.stackclean--; 3777 3778 debug 3779 if (!usefuncarg && numpara != stackpush - stackpushsave) 3780 { 3781 printf("function %s\n", funcsym_p.Sident.ptr); 3782 printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave); 3783 elem_print(e); 3784 } 3785 3786 assert(usefuncarg || numpara == stackpush - stackpushsave); 3787 3788 funccall(cdb,e,numpara,numalign,pretregs,keepmsk,usefuncarg); 3789 cgstate.funcargtos = funcargtossave; 3790 } 3791 3792 /*********************************** 3793 */ 3794 3795 @trusted 3796 void cdstrthis(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3797 { 3798 assert(tysize(e.Ety) == REGSIZE); 3799 const reg = findreg(*pretregs & allregs); 3800 getregs(cdb,mask(reg)); 3801 // LEA reg,np[ESP] 3802 uint np = stackpush - e.EV.Vuns; // stack delta to parameter 3803 cdb.genc1(LEA,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np); 3804 if (I64) 3805 code_orrex(cdb.last(), REX_W); 3806 fixresult(cdb, e, mask(reg), pretregs); 3807 } 3808 3809 /****************************** 3810 * Call function. All parameters have already been pushed onto the stack. 3811 * Params: 3812 * e = function call 3813 * numpara = size in bytes of all the parameters 3814 * numalign = amount the stack was aligned by before the parameters were pushed 3815 * pretregs = where return value goes 3816 * keepmsk = registers to not change when evaluating the function address 3817 * usefuncarg = using cgstate.funcarg, so no need to adjust stack after func return 3818 */ 3819 3820 @trusted 3821 private void funccall(ref CodeBuilder cdb, elem* e, uint numpara, uint numalign, 3822 regm_t* pretregs,regm_t keepmsk, bool usefuncarg) 3823 { 3824 //printf("funccall(e = %p, *pretregs = %s, numpara = %d, numalign = %d, usefuncarg=%d)\n",e,regm_str(*pretregs),numpara,numalign,usefuncarg); 3825 //printf(" from %s\n", funcsym_p.Sident.ptr); 3826 //elem_print(e); 3827 calledafunc = 1; 3828 // Determine if we need frame for function prolog/epilog 3829 3830 if (config.memmodel == Vmodel) 3831 { 3832 if (tyfarfunc(funcsym_p.ty())) 3833 needframe = true; 3834 } 3835 3836 code cs; 3837 regm_t retregs; 3838 Symbol* s; 3839 3840 elem* e1 = e.EV.E1; 3841 tym_t tym1 = tybasic(e1.Ety); 3842 char farfunc = tyfarfunc(tym1) || tym1 == TYifunc; 3843 3844 CodeBuilder cdbe; 3845 cdbe.ctor(); 3846 3847 if (e1.Eoper == OPvar) 3848 { // Call function directly 3849 3850 if (!tyfunc(tym1)) 3851 printf("%s\n", tym_str(tym1)); 3852 assert(tyfunc(tym1)); 3853 s = e1.EV.Vsym; 3854 if (s.Sflags & SFLexit) 3855 { } 3856 else if (s != tls_get_addr_sym) 3857 save87(cdb); // assume 8087 regs are all trashed 3858 3859 // Function calls may throw Errors, unless marked that they don't 3860 if (s == funcsym_p || !s.Sfunc || !(s.Sfunc.Fflags3 & Fnothrow)) 3861 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3862 3863 if (s.Sflags & SFLexit) 3864 { 3865 // Function doesn't return, so don't worry about registers 3866 // it may use 3867 } 3868 else if (!tyfunc(s.ty()) || !(config.flags4 & CFG4optimized)) 3869 // so we can replace func at runtime 3870 getregs(cdbe,~fregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3871 else 3872 getregs(cdbe,~s.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3873 if (strcmp(s.Sident.ptr, "alloca") == 0) 3874 { 3875 s = getRtlsym(RTLSYM.ALLOCA); 3876 makeitextern(s); 3877 int areg = CX; 3878 if (config.exe == EX_WIN64) 3879 areg = DX; 3880 getregs(cdbe, mask(areg)); 3881 cdbe.genc(LEA, modregrm(2, areg, BPRM), FLallocatmp, 0, 0, 0); // LEA areg,&localsize[BP] 3882 if (I64) 3883 code_orrex(cdbe.last(), REX_W); 3884 Alloca.size = REGSIZE; 3885 } 3886 if (sytab[s.Sclass] & SCSS) // if function is on stack (!) 3887 { 3888 retregs = allregs & ~keepmsk; 3889 s.Sflags &= ~GTregcand; 3890 s.Sflags |= SFLread; 3891 cdrelconst(cdbe,e1,&retregs); 3892 if (farfunc) 3893 { 3894 const reg = findregmsw(retregs); 3895 const lsreg = findreglsw(retregs); 3896 floatreg = true; // use float register 3897 reflocal = true; 3898 cdbe.genc1(0x89, // MOV floatreg+2,reg 3899 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3900 cdbe.genc1(0x89, // MOV floatreg,lsreg 3901 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3902 if (tym1 == TYifunc) 3903 cdbe.gen1(0x9C); // PUSHF 3904 cdbe.genc1(0xFF, // CALL [floatreg] 3905 modregrm(2, 3, BPRM), FLfltreg, 0); 3906 } 3907 else 3908 { 3909 const reg = findreg(retregs); 3910 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 3911 if (I64) 3912 code_orrex(cdbe.last(), REX_W); 3913 } 3914 } 3915 else 3916 { 3917 int fl = FLfunc; 3918 if (!tyfunc(s.ty())) 3919 fl = el_fl(e1); 3920 if (tym1 == TYifunc) 3921 cdbe.gen1(0x9C); // PUSHF 3922 if (config.exe & (EX_windos | EX_OSX | EX_OSX64)) 3923 { 3924 cdbe.gencs(farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern 3925 } 3926 else 3927 { 3928 assert(!farfunc); 3929 if (s != tls_get_addr_sym) 3930 { 3931 //printf("call %s\n", s.Sident.ptr); 3932 load_localgot(cdb); 3933 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3934 } 3935 else if (I64) 3936 { 3937 /* Prepend 66 66 48 so GNU linker has patch room 3938 */ 3939 assert(!farfunc); 3940 cdbe.gen1(0x66); 3941 cdbe.gen1(0x66); 3942 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3943 cdbe.last().Irex = REX | REX_W; 3944 } 3945 else 3946 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3947 } 3948 code_orflag(cdbe.last(), farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff)); 3949 } 3950 } 3951 else 3952 { // Call function via pointer 3953 3954 // Function calls may throw Errors 3955 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3956 3957 if (e1.Eoper != OPind) { WRFL(cast(FL)el_fl(e1)); printf("e1.Eoper: %s\n", oper_str(e1.Eoper)); } 3958 save87(cdb); // assume 8087 regs are all trashed 3959 assert(e1.Eoper == OPind); 3960 elem *e11 = e1.EV.E1; 3961 tym_t e11ty = tybasic(e11.Ety); 3962 assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); 3963 load_localgot(cdb); 3964 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // 32 bit only 3965 { 3966 if (config.flags3 & CFG3pic) 3967 keepmsk |= mBX; 3968 } 3969 3970 /* Mask of registers destroyed by the function call 3971 */ 3972 regm_t desmsk = (mBP | ALLREGS | mES | XMMREGS) & ~fregsaved; 3973 3974 // if we can't use loadea() 3975 if ((!OTleaf(e11.Eoper) || e11.Eoper == OPconst) && 3976 (e11.Eoper != OPind || e11.Ecount)) 3977 { 3978 retregs = allregs & ~keepmsk; 3979 cgstate.stackclean++; 3980 scodelem(cdbe,e11,&retregs,keepmsk,true); 3981 cgstate.stackclean--; 3982 // Kill registers destroyed by an arbitrary function call 3983 getregs(cdbe,desmsk); 3984 if (e11ty == TYfptr) 3985 { 3986 const reg = findregmsw(retregs); 3987 const lsreg = findreglsw(retregs); 3988 floatreg = true; // use float register 3989 reflocal = true; 3990 cdbe.genc1(0x89, // MOV floatreg+2,reg 3991 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3992 cdbe.genc1(0x89, // MOV floatreg,lsreg 3993 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3994 if (tym1 == TYifunc) 3995 cdbe.gen1(0x9C); // PUSHF 3996 cdbe.genc1(0xFF, // CALL [floatreg] 3997 modregrm(2, 3, BPRM), FLfltreg, 0); 3998 } 3999 else 4000 { 4001 const reg = findreg(retregs); 4002 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 4003 if (I64) 4004 code_orrex(cdbe.last(), REX_W); 4005 } 4006 } 4007 else 4008 { 4009 if (tym1 == TYifunc) 4010 cdb.gen1(0x9C); // PUSHF 4011 // CALL [function] 4012 cs.Iflags = 0; 4013 cgstate.stackclean++; 4014 loadea(cdbe, e11, &cs, 0xFF, farfunc ? 3 : 2, 0, keepmsk, desmsk); 4015 cgstate.stackclean--; 4016 freenode(e11); 4017 } 4018 s = null; 4019 } 4020 cdb.append(cdbe); 4021 freenode(e1); 4022 4023 /* See if we will need the frame pointer. 4024 Calculate it here so we can possibly use BP to fix the stack. 4025 */ 4026 static if (0) 4027 { 4028 if (!needframe) 4029 { 4030 // If there is a register available for this basic block 4031 if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used)) 4032 { } 4033 else 4034 { 4035 for (SYMIDX si = 0; si < globsym.length; si++) 4036 { 4037 Symbol* s = globsym[si]; 4038 4039 if (s.Sflags & GTregcand && type_size(s.Stype) != 0) 4040 { 4041 if (config.flags4 & CFG4optimized) 4042 { // If symbol is live in this basic block and 4043 // isn't already in a register 4044 if (s.Srange && vec_testbit(dfoidx, s.Srange) && 4045 s.Sfl != FLreg) 4046 { // Then symbol must be allocated on stack 4047 needframe = true; 4048 break; 4049 } 4050 } 4051 else 4052 { if (mfuncreg == 0) // if no registers left 4053 { needframe = true; 4054 break; 4055 } 4056 } 4057 } 4058 } 4059 } 4060 } 4061 } 4062 4063 reg_t reg1, reg2; 4064 retregs = allocretregs(e.Ety, e.ET, tym1, reg1, reg2); 4065 4066 assert(retregs || !*pretregs); 4067 4068 if (!usefuncarg) 4069 { 4070 // If stack needs cleanup 4071 if (s && s.Sflags & SFLexit) 4072 { 4073 if (config.fulltypes && TARGET_WINDOS) 4074 { 4075 // the stack walker evaluates the return address, not a byte of the 4076 // call instruction, so ensure there is an instruction byte after 4077 // the call that still has the same line number information 4078 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); 4079 } 4080 /* Function never returns, so don't need to generate stack 4081 * cleanup code. But still need to log the stack cleanup 4082 * as if it did return. 4083 */ 4084 cdb.genadjesp(-(numpara + numalign)); 4085 stackpush -= numpara + numalign; 4086 } 4087 else if ((OTbinary(e.Eoper) || config.exe == EX_WIN64) && 4088 (!typfunc(tym1) || config.exe == EX_WIN64)) 4089 { 4090 if (tym1 == TYhfunc) 4091 { // Hidden parameter is popped off by the callee 4092 cdb.genadjesp(-REGSIZE); 4093 stackpush -= REGSIZE; 4094 if (numpara + numalign > REGSIZE) 4095 genstackclean(cdb, numpara + numalign - REGSIZE, retregs); 4096 } 4097 else 4098 genstackclean(cdb, numpara + numalign, retregs); 4099 } 4100 else 4101 { 4102 cdb.genadjesp(-numpara); // popped off by the callee's 'RET numpara' 4103 stackpush -= numpara; 4104 if (numalign) // callee doesn't know about alignment adjustment 4105 genstackclean(cdb,numalign,retregs); 4106 } 4107 } 4108 4109 /* Special handling for functions which return a floating point 4110 value in the top of the 8087 stack. 4111 */ 4112 4113 if (retregs & mST0) 4114 { 4115 cdb.genadjfpu(1); 4116 if (*pretregs) // if we want the result 4117 { 4118 //assert(global87.stackused == 0); 4119 push87(cdb); // one item on 8087 stack 4120 fixresult87(cdb,e,retregs,pretregs); 4121 return; 4122 } 4123 else 4124 // Pop unused result off 8087 stack 4125 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4126 } 4127 else if (retregs & mST01) 4128 { 4129 cdb.genadjfpu(2); 4130 if (*pretregs) // if we want the result 4131 { 4132 assert(global87.stackused == 0); 4133 push87(cdb); 4134 push87(cdb); // two items on 8087 stack 4135 fixresult_complex87(cdb, e, retregs, pretregs, true); 4136 return; 4137 } 4138 else 4139 { 4140 // Pop unused result off 8087 stack 4141 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4142 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4143 } 4144 } 4145 4146 /* Special handling for functions that return one part 4147 in XMM0 and the other part in AX 4148 */ 4149 if (*pretregs && retregs) 4150 { 4151 if (reg1 == NOREG || reg2 == NOREG) 4152 {} 4153 else if ((0 == (mask(reg1) & XMMREGS)) ^ (0 == (mask(reg2) & XMMREGS))) 4154 { 4155 reg_t lreg, mreg; 4156 if (mask(reg1) & XMMREGS) 4157 { 4158 lreg = XMM0; 4159 mreg = XMM1; 4160 } 4161 else 4162 { 4163 lreg = mask(reg1) & mLSW ? reg1 : AX; 4164 mreg = mask(reg2) & mMSW ? reg2 : DX; 4165 } 4166 for (int v = 0; v < 2; v++) 4167 { 4168 if (v ^ (reg2 != lreg)) 4169 genmovreg(cdb,lreg,reg1); 4170 else 4171 genmovreg(cdb,mreg,reg2); 4172 } 4173 retregs = mask(lreg) | mask(mreg); 4174 } 4175 } 4176 4177 /* Special handling for functions which return complex float in XMM0 or RAX. */ 4178 4179 if (I64 4180 && config.exe != EX_WIN64 // broken 4181 && *pretregs && tybasic(e.Ety) == TYcfloat) 4182 { 4183 assert(reg2 == NOREG); 4184 // spill 4185 if (config.exe == EX_WIN64) 4186 { 4187 assert(reg1 == AX); 4188 cdb.genfltreg(STO, reg1, 0); 4189 code_orrex(cdb.last(), REX_W); 4190 } 4191 else 4192 { 4193 assert(reg1 == XMM0); 4194 cdb.genxmmreg(xmmstore(TYdouble), reg1, 0, TYdouble); 4195 } 4196 // reload real 4197 push87(cdb); 4198 cdb.genfltreg(0xD9, 0, 0); 4199 genfwait(cdb); 4200 // reload imaginary 4201 push87(cdb); 4202 cdb.genfltreg(0xD9, 0, tysize(TYfloat)); 4203 genfwait(cdb); 4204 4205 retregs = mST01; 4206 } 4207 4208 fixresult(cdb, e, retregs, pretregs); 4209 } 4210 4211 /*************************** 4212 * Determine size of argument e that will be pushed. 4213 */ 4214 4215 @trusted 4216 targ_size_t paramsize(elem* e, tym_t tyf) 4217 { 4218 assert(e.Eoper != OPparam); 4219 targ_size_t szb; 4220 tym_t tym = tybasic(e.Ety); 4221 if (tyscalar(tym)) 4222 szb = size(tym); 4223 else if (tym == TYstruct || tym == TYarray) 4224 szb = type_parameterSize(e.ET, tyf); 4225 else 4226 { 4227 printf("%s\n", tym_str(tym)); 4228 assert(0); 4229 } 4230 return szb; 4231 } 4232 4233 /*************************** 4234 * Generate code to move argument e on the stack. 4235 */ 4236 4237 @trusted 4238 private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funcargtos, tym_t tyf) 4239 { 4240 //printf("movParams(e = %p, stackalign = %d, funcargtos = %d)\n", e, stackalign, funcargtos); 4241 //printf("movParams()\n"); elem_print(e); 4242 assert(!I16); 4243 assert(e && e.Eoper != OPparam); 4244 4245 tym_t tym = tybasic(e.Ety); 4246 if (tyfloating(tym)) 4247 objmod.fltused(); 4248 4249 int grex = I64 ? REX_W << 16 : 0; 4250 4251 targ_size_t szb = paramsize(e, tyf); // size before alignment 4252 targ_size_t sz = _align(stackalign, szb); // size after alignment 4253 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 4254 assert((sz & (REGSIZE - 1)) == 0); 4255 //printf("szb = %d sz = %d\n", cast(int)szb, cast(int)sz); 4256 4257 code cs; 4258 cs.Iflags = 0; 4259 cs.Irex = 0; 4260 switch (e.Eoper) 4261 { 4262 case OPstrctor: 4263 case OPstrthis: 4264 case OPstrpar: 4265 case OPnp_fp: 4266 assert(0); 4267 4268 case OPrelconst: 4269 { 4270 int fl; 4271 if (!evalinregister(e) && 4272 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 4273 ((fl = el_fl(e)) == FLdata || fl == FLudata || fl == FLextern) 4274 ) 4275 { 4276 // MOV -stackoffset[EBP],&variable 4277 cs.Iop = 0xC7; 4278 cs.Irm = modregrm(2,0,BPRM); 4279 if (I64 && sz == 8) 4280 cs.Irex |= REX_W; 4281 cs.IFL1 = FLfuncarg; 4282 cs.IEV1.Voffset = funcargtos - REGSIZE; 4283 cs.IEV2.Voffset = e.EV.Voffset; 4284 cs.IFL2 = cast(ubyte)fl; 4285 cs.IEV2.Vsym = e.EV.Vsym; 4286 cs.Iflags |= CFoff; 4287 cdb.gen(&cs); 4288 return; 4289 } 4290 break; 4291 } 4292 4293 case OPconst: 4294 if (!evalinregister(e)) 4295 { 4296 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 4297 cs.Irm = modregrm(2,0,BPRM); 4298 cs.IFL1 = FLfuncarg; 4299 cs.IEV1.Voffset = funcargtos - sz; 4300 cs.IFL2 = FLconst; 4301 targ_size_t *p = cast(targ_size_t *) &(e.EV); 4302 cs.IEV2.Vsize_t = *p; 4303 if (I64 && tym == TYcldouble) 4304 // The alignment of EV.Vcldouble is not the same on the compiler 4305 // as on the target 4306 goto Lbreak; 4307 if (I64 && sz >= 8) 4308 { 4309 int i = cast(int)sz; 4310 do 4311 { 4312 if (*p >= 0x80000000) 4313 { // Use 64 bit register MOV, as the 32 bit one gets sign extended 4314 // MOV reg,imm64 4315 // MOV EA,reg 4316 goto Lbreak; 4317 } 4318 p = cast(targ_size_t *)(cast(char *) p + REGSIZE); 4319 i -= REGSIZE; 4320 } while (i > 0); 4321 p = cast(targ_size_t *) &(e.EV); 4322 } 4323 4324 int i = cast(int)sz; 4325 do 4326 { int regsize = REGSIZE; 4327 regm_t retregs = (sz == 1) ? BYTEREGS : allregs; 4328 reg_t reg; 4329 if (reghasvalue(retregs,*p,®)) 4330 { 4331 cs.Iop = (cs.Iop & 1) | 0x88; 4332 cs.Irm |= modregrm(0, reg & 7, 0); // MOV EA,reg 4333 if (reg & 8) 4334 cs.Irex |= REX_R; 4335 if (I64 && sz == 1 && reg >= 4) 4336 cs.Irex |= REX; 4337 } 4338 if (I64 && sz >= 8) 4339 cs.Irex |= REX_W; 4340 cdb.gen(&cs); // MOV EA,const 4341 4342 p = cast(targ_size_t *)(cast(char *) p + regsize); 4343 cs.Iop = 0xC7; 4344 cs.Irm &= cast(ubyte)~cast(int)modregrm(0, 7, 0); 4345 cs.Irex &= ~REX_R; 4346 cs.IEV1.Voffset += regsize; 4347 cs.IEV2.Vint = cast(targ_int)*p; 4348 i -= regsize; 4349 } while (i > 0); 4350 return; 4351 } 4352 4353 Lbreak: 4354 break; 4355 4356 default: 4357 break; 4358 } 4359 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 4360 if (tyvector(tym) || 4361 config.fpxmmregs && tyxmmreg(tym) && 4362 // If not already in x87 register from function call return 4363 !((e.Eoper == OPcall || e.Eoper == OPucall) && I32)) 4364 { 4365 retregs = XMMREGS; 4366 codelem(cdb, e, &retregs, false); 4367 const op = xmmstore(tym); 4368 const r = findreg(retregs); 4369 cdb.genc1(op, modregxrm(2, r - XMM0, BPRM), FLfuncarg, funcargtos - sz); // MOV funcarg[EBP],r 4370 checkSetVex(cdb.last(),tym); 4371 return; 4372 } 4373 else if (tyfloating(tym)) 4374 { 4375 if (config.inline8087) 4376 { 4377 retregs = tycomplex(tym) ? mST01 : mST0; 4378 codelem(cdb, e, &retregs, false); 4379 4380 opcode_t op; 4381 uint r; 4382 switch (tym) 4383 { 4384 case TYfloat: 4385 case TYifloat: 4386 case TYcfloat: 4387 op = 0xD9; 4388 r = 3; 4389 break; 4390 4391 case TYdouble: 4392 case TYidouble: 4393 case TYdouble_alias: 4394 case TYcdouble: 4395 op = 0xDD; 4396 r = 3; 4397 break; 4398 4399 case TYldouble: 4400 case TYildouble: 4401 case TYcldouble: 4402 op = 0xDB; 4403 r = 7; 4404 break; 4405 4406 default: 4407 assert(0); 4408 } 4409 if (tycomplex(tym)) 4410 { 4411 // FSTP sz/2[ESP] 4412 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz/2); 4413 pop87(); 4414 } 4415 pop87(); 4416 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz); // FSTP -sz[EBP] 4417 return; 4418 } 4419 } 4420 scodelem(cdb, e, &retregs, 0, true); 4421 if (sz <= REGSIZE) 4422 { 4423 uint r = findreg(retregs); 4424 cdb.genc1(0x89, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4425 if (sz == 8) 4426 code_orrex(cdb.last(), REX_W); 4427 } 4428 else if (sz == REGSIZE * 2) 4429 { 4430 uint r = findregmsw(retregs); 4431 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4432 r = findreglsw(retregs); 4433 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE * 2); // MOV -2*REGSIZE[EBP],r 4434 } 4435 else 4436 assert(0); 4437 } 4438 4439 4440 /*************************** 4441 * Generate code to push argument e on the stack. 4442 * stackpush is incremented by stackalign for each PUSH. 4443 */ 4444 4445 @trusted 4446 void pushParams(ref CodeBuilder cdb, elem* e, uint stackalign, tym_t tyf) 4447 { 4448 //printf("params(e = %p, stackalign = %d)\n", e, stackalign); 4449 //printf("params()\n"); elem_print(e); 4450 stackchanged = 1; 4451 assert(e && e.Eoper != OPparam); 4452 4453 tym_t tym = tybasic(e.Ety); 4454 if (tyfloating(tym)) 4455 objmod.fltused(); 4456 4457 int grex = I64 ? REX_W << 16 : 0; 4458 4459 targ_size_t szb = paramsize(e, tyf); // size before alignment 4460 targ_size_t sz = _align(stackalign,szb); // size after alignment 4461 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 4462 assert((sz & (REGSIZE - 1)) == 0); 4463 4464 switch (e.Eoper) 4465 { 4466 version (SCPP) 4467 { 4468 case OPstrctor: 4469 { 4470 elem* e1 = e.EV.E1; 4471 docommas(cdb,&e1); // skip over any comma expressions 4472 4473 cod3_stackadj(cdb, sz); 4474 stackpush += sz; 4475 cdb.genadjesp(sz); 4476 4477 // Find OPstrthis and set it to stackpush 4478 exp2_setstrthis(e1, null, stackpush, null); 4479 4480 regm_t retregs = 0; 4481 codelem(cdb, e1, &retregs, true); 4482 freenode(e); 4483 return; 4484 } 4485 case OPstrthis: 4486 // This is the parameter for the 'this' pointer corresponding to 4487 // OPstrctor. We push a pointer to an object that was already 4488 // allocated on the stack by OPstrctor. 4489 { 4490 regm_t retregs = allregs; 4491 reg_t reg; 4492 allocreg(cdb, &retregs, ®, TYoffset); 4493 genregs(cdb, 0x89, SP, reg); // MOV reg,SP 4494 if (I64) 4495 code_orrex(cdb.last(), REX_W); 4496 uint np = stackpush - e.EV.Vuns; // stack delta to parameter 4497 cdb.genc2(0x81, grex | modregrmx(3, 0, reg), np); // ADD reg,np 4498 if (sz > REGSIZE) 4499 { 4500 cdb.gen1(0x16); // PUSH SS 4501 stackpush += REGSIZE; 4502 } 4503 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4504 if (reg & 8) 4505 code_orrex(cdb.last(), REX_B); 4506 stackpush += REGSIZE; 4507 cdb.genadjesp(sz); 4508 freenode(e); 4509 return; 4510 } 4511 } 4512 4513 case OPstrpar: 4514 { 4515 uint rm; 4516 4517 elem* e1 = e.EV.E1; 4518 if (sz == 0) 4519 { 4520 docommas(cdb, &e1); // skip over any commas 4521 4522 const stackpushsave = stackpush; 4523 const stackcleansave = cgstate.stackclean; 4524 cgstate.stackclean = 0; 4525 4526 regm_t retregs = 0; 4527 codelem(cdb,e1,&retregs,true); 4528 4529 assert(cgstate.stackclean == 0); 4530 cgstate.stackclean = stackcleansave; 4531 genstackclean(cdb,stackpush - stackpushsave,0); 4532 4533 freenode(e); 4534 return; 4535 } 4536 if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1.Eoper == OPvar) 4537 { 4538 freenode(e); 4539 e = e1; 4540 goto L1; 4541 } 4542 docommas(cdb,&e1); // skip over any commas 4543 code_flags_t seg = 0; // assume no seg override 4544 regm_t retregs = sz ? IDXREGS : 0; 4545 bool doneoff = false; 4546 uint pushsize = REGSIZE; 4547 uint op16 = 0; 4548 if (!I16 && sz & 2) // if odd number of words to push 4549 { 4550 pushsize = 2; 4551 op16 = 1; 4552 } 4553 else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0) 4554 { 4555 pushsize = 4; // push DWORDs at a time 4556 op16 = 1; 4557 } 4558 uint npushes = cast(uint)(sz / pushsize); 4559 switch (e1.Eoper) 4560 { 4561 case OPind: 4562 if (sz) 4563 { 4564 switch (tybasic(e1.EV.E1.Ety)) 4565 { 4566 case TYfptr: 4567 case TYhptr: 4568 seg = CFes; 4569 retregs |= mES; 4570 break; 4571 4572 case TYsptr: 4573 if (config.wflags & WFssneds) 4574 seg = CFss; 4575 break; 4576 4577 case TYfgPtr: 4578 if (I32) 4579 seg = CFgs; 4580 else if (I64) 4581 seg = CFfs; 4582 else 4583 assert(0); 4584 break; 4585 4586 case TYcptr: 4587 seg = CFcs; 4588 break; 4589 4590 default: 4591 break; 4592 } 4593 } 4594 codelem(cdb, e1.EV.E1, &retregs, false); 4595 freenode(e1); 4596 break; 4597 4598 case OPvar: 4599 /* Symbol is no longer a candidate for a register */ 4600 e1.EV.Vsym.Sflags &= ~GTregcand; 4601 4602 if (!e1.Ecount && npushes > 4) 4603 { 4604 /* Kludge to point at last word in struct. */ 4605 /* Don't screw up CSEs. */ 4606 e1.EV.Voffset += sz - pushsize; 4607 doneoff = true; 4608 } 4609 //if (LARGEDATA) /* if default isn't DS */ 4610 { 4611 static immutable uint[4] segtocf = [ CFes,CFcs,CFss,0 ]; 4612 4613 int fl = el_fl(e1); 4614 if (fl == FLfardata) 4615 { 4616 seg = CFes; 4617 retregs |= mES; 4618 } 4619 else 4620 { 4621 uint s = segfl[fl]; 4622 assert(s < 4); 4623 seg = segtocf[s]; 4624 if (seg == CFss && !(config.wflags & WFssneds)) 4625 seg = 0; 4626 } 4627 } 4628 if (e1.Ety & mTYfar) 4629 { 4630 seg = CFes; 4631 retregs |= mES; 4632 } 4633 cdrelconst(cdb, e1, &retregs); 4634 // Reverse the effect of the previous add 4635 if (doneoff) 4636 e1.EV.Voffset -= sz - pushsize; 4637 freenode(e1); 4638 break; 4639 4640 case OPstreq: 4641 //case OPcond: 4642 if (config.exe & EX_segmented) 4643 { 4644 seg = CFes; 4645 retregs |= mES; 4646 } 4647 codelem(cdb, e1, &retregs, false); 4648 break; 4649 4650 case OPpair: 4651 case OPrpair: 4652 pushParams(cdb, e1, stackalign, tyf); 4653 freenode(e); 4654 return; 4655 4656 default: 4657 elem_print(e1); 4658 assert(0); 4659 } 4660 reg_t reg = findreglsw(retregs); 4661 rm = I16 ? regtorm[reg] : regtorm32[reg]; 4662 if (op16) 4663 seg |= CFopsize; // operand size 4664 if (npushes <= 4) 4665 { 4666 assert(!doneoff); 4667 for (; npushes > 1; --npushes) 4668 { 4669 cdb.genc1(0xFF, buildModregrm(2, 6, rm), FLconst, pushsize * (npushes - 1)); // PUSH [reg] 4670 code_orflag(cdb.last(),seg); 4671 cdb.genadjesp(pushsize); 4672 } 4673 cdb.gen2(0xFF,buildModregrm(0, 6, rm)); // PUSH [reg] 4674 cdb.last().Iflags |= seg; 4675 cdb.genadjesp(pushsize); 4676 } 4677 else if (sz) 4678 { 4679 getregs_imm(cdb, mCX | retregs); 4680 // MOV CX,sz/2 4681 movregconst(cdb, CX, npushes, 0); 4682 if (!doneoff) 4683 { // This should be done when 4684 // reg is loaded. Fix later 4685 // ADD reg,sz-pushsize 4686 cdb.genc2(0x81, grex | modregrmx(3, 0, reg), sz-pushsize); 4687 } 4688 getregs(cdb,mCX); // the LOOP decrements it 4689 cdb.gen2(0xFF, buildModregrm(0, 6, rm)); // PUSH [reg] 4690 cdb.last().Iflags |= seg | CFtarg2; 4691 code* c3 = cdb.last(); 4692 cdb.genc2(0x81,grex | buildModregrm(3, 5,reg), pushsize); // SUB reg,pushsize 4693 if (I16 || config.flags4 & CFG4space) 4694 genjmp(cdb,0xE2,FLcode,cast(block *)c3);// LOOP c3 4695 else 4696 { 4697 if (I64) 4698 cdb.gen2(0xFF, modregrm(3, 1, CX));// DEC CX 4699 else 4700 cdb.gen1(0x48 + CX); // DEC CX 4701 genjmp(cdb, JNE, FLcode, cast(block *)c3); // JNE c3 4702 } 4703 regimmed_set(CX,0); 4704 cdb.genadjesp(cast(int)sz); 4705 } 4706 stackpush += sz; 4707 freenode(e); 4708 return; 4709 } 4710 4711 case OPind: 4712 if (!e.Ecount) /* if *e1 */ 4713 { 4714 if (sz < REGSIZE) 4715 { 4716 /* Don't push REGSIZE quantity because it may 4717 * straddle past the end of valid memory 4718 */ 4719 break; 4720 } 4721 if (sz == REGSIZE) 4722 goto case OPvar; // handle it with loadea() 4723 4724 // Avoid PUSH MEM on the Pentium when optimizing for speed 4725 if (config.flags4 & CFG4speed && 4726 (config.target_cpu >= TARGET_80486 && 4727 config.target_cpu <= TARGET_PentiumMMX) && 4728 sz <= 2 * REGSIZE && 4729 !tyfloating(tym)) 4730 break; 4731 4732 if (tym == TYldouble || tym == TYildouble || tycomplex(tym)) 4733 break; 4734 4735 code cs; 4736 cs.Iflags = 0; 4737 cs.Irex = 0; 4738 if (I32) 4739 { 4740 assert(sz >= REGSIZE * 2); 4741 loadea(cdb, e, &cs, 0xFF, 6, sz - REGSIZE, 0, 0); // PUSH EA+4 4742 cdb.genadjesp(REGSIZE); 4743 stackpush += REGSIZE; 4744 sz -= REGSIZE; 4745 4746 if (sz > REGSIZE) 4747 { 4748 while (sz) 4749 { 4750 cs.IEV1.Voffset -= REGSIZE; 4751 cdb.gen(&cs); // PUSH EA+... 4752 cdb.genadjesp(REGSIZE); 4753 stackpush += REGSIZE; 4754 sz -= REGSIZE; 4755 } 4756 freenode(e); 4757 return; 4758 } 4759 } 4760 else 4761 { 4762 if (sz == DOUBLESIZE) 4763 { 4764 loadea(cdb, e, &cs, 0xFF, 6, DOUBLESIZE - REGSIZE, 0, 0); // PUSH EA+6 4765 cs.IEV1.Voffset -= REGSIZE; 4766 cdb.gen(&cs); // PUSH EA+4 4767 cdb.genadjesp(REGSIZE); 4768 getlvalue_lsw(&cs); 4769 cdb.gen(&cs); // PUSH EA+2 4770 } 4771 else /* TYlong */ 4772 loadea(cdb, e, &cs, 0xFF, 6, REGSIZE, 0, 0); // PUSH EA+2 4773 cdb.genadjesp(REGSIZE); 4774 } 4775 stackpush += sz; 4776 getlvalue_lsw(&cs); 4777 cdb.gen(&cs); // PUSH EA 4778 cdb.genadjesp(REGSIZE); 4779 freenode(e); 4780 return; 4781 } 4782 break; 4783 4784 case OPnp_fp: 4785 if (!e.Ecount) /* if (far *)e1 */ 4786 { 4787 elem* e1 = e.EV.E1; 4788 tym_t tym1 = tybasic(e1.Ety); 4789 /* BUG: what about pointers to functions? */ 4790 int segreg; 4791 switch (tym1) 4792 { 4793 case TYnptr: segreg = 3<<3; break; 4794 case TYcptr: segreg = 1<<3; break; 4795 default: segreg = 2<<3; break; 4796 } 4797 if (I32 && stackalign == 2) 4798 cdb.gen1(0x66); // push a word 4799 cdb.gen1(0x06 + segreg); // PUSH SEGREG 4800 if (I32 && stackalign == 2) 4801 code_orflag(cdb.last(), CFopsize); // push a word 4802 cdb.genadjesp(stackalign); 4803 stackpush += stackalign; 4804 pushParams(cdb, e1, stackalign, tyf); 4805 freenode(e); 4806 return; 4807 } 4808 break; 4809 4810 case OPrelconst: 4811 if (config.exe & EX_segmented) 4812 { 4813 /* Determine if we can just push the segment register */ 4814 /* Test size of type rather than TYfptr because of (long)(&v) */ 4815 Symbol* s = e.EV.Vsym; 4816 //if (sytab[s.Sclass] & SCSS && !I32) // if variable is on stack 4817 // needframe = true; // then we need stack frame 4818 int fl; 4819 if (_tysize[tym] == tysize(TYfptr) && 4820 (fl = s.Sfl) != FLfardata && 4821 /* not a function that CS might not be the segment of */ 4822 (!((fl == FLfunc || s.ty() & mTYcs) && 4823 (s.Sclass == SC.comdat || s.Sclass == SC.extern_ || 4824 s.Sclass == SC.inline || config.wflags & WFthunk)) || 4825 (fl == FLfunc && config.exe == EX_DOSX) 4826 ) 4827 ) 4828 { 4829 stackpush += sz; 4830 cdb.gen1(0x06 + // PUSH SEGREG 4831 (((fl == FLfunc || s.ty() & mTYcs) ? 1 : segfl[fl]) << 3)); 4832 cdb.genadjesp(REGSIZE); 4833 4834 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4835 { 4836 getoffset(cdb, e, STACK); 4837 freenode(e); 4838 return; 4839 } 4840 else 4841 { 4842 regm_t retregs; 4843 offsetinreg(cdb, e, &retregs); 4844 const reg = findreg(retregs); 4845 genpush(cdb,reg); // PUSH reg 4846 cdb.genadjesp(REGSIZE); 4847 } 4848 return; 4849 } 4850 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4851 { 4852 stackpush += sz; 4853 if (_tysize[tym] == tysize(TYfptr)) 4854 { 4855 // PUSH SEG e 4856 cdb.gencs(0x68,0,FLextern,s); 4857 cdb.last().Iflags = CFseg; 4858 cdb.genadjesp(REGSIZE); 4859 } 4860 getoffset(cdb, e, STACK); 4861 freenode(e); 4862 return; 4863 } 4864 } 4865 break; /* else must evaluate expression */ 4866 4867 case OPvar: 4868 L1: 4869 if (config.flags4 & CFG4speed && 4870 (config.target_cpu >= TARGET_80486 && 4871 config.target_cpu <= TARGET_PentiumMMX) && 4872 sz <= 2 * REGSIZE && 4873 !tyfloating(tym)) 4874 { // Avoid PUSH MEM on the Pentium when optimizing for speed 4875 break; 4876 } 4877 else if (movOnly(e) || (tyxmmreg(tym) && config.fpxmmregs) || tyvector(tym)) 4878 break; // no PUSH MEM 4879 else 4880 { 4881 int regsize = REGSIZE; 4882 uint flag = 0; 4883 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 && 4884 !e.Ecount) 4885 { 4886 regsize = 4; 4887 flag |= CFopsize; 4888 } 4889 code cs; 4890 cs.Iflags = 0; 4891 cs.Irex = 0; 4892 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); // PUSH EA+sz-2 4893 code_orflag(cdb.last(), flag); 4894 cdb.genadjesp(REGSIZE); 4895 stackpush += sz; 4896 while (cast(targ_int)(sz -= regsize) > 0) 4897 { 4898 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); 4899 code_orflag(cdb.last(), flag); 4900 cdb.genadjesp(REGSIZE); 4901 } 4902 freenode(e); 4903 return; 4904 } 4905 4906 case OPconst: 4907 { 4908 char pushi = 0; 4909 uint flag = 0; 4910 int regsize = REGSIZE; 4911 4912 if (tycomplex(tym)) 4913 break; 4914 4915 if (I64 && tyfloating(tym) && sz > 4 && boolres(e)) 4916 // Can't push 64 bit non-zero args directly 4917 break; 4918 4919 if (I32 && szb == 10) // special case for long double constants 4920 { 4921 assert(sz == 12); 4922 targ_int value = e.EV.Vushort8[4]; // pick upper 2 bytes of Vldouble 4923 stackpush += sz; 4924 cdb.genadjesp(cast(int)sz); 4925 for (int i = 0; i < 3; ++i) 4926 { 4927 reg_t reg; 4928 if (reghasvalue(allregs, value, ®)) 4929 cdb.gen1(0x50 + reg); // PUSH reg 4930 else 4931 cdb.genc2(0x68,0,value); // PUSH value 4932 value = e.EV.Vulong4[i ^ 1]; // treat Vldouble as 2 element array of 32 bit uint 4933 } 4934 freenode(e); 4935 return; 4936 } 4937 4938 assert(I64 || sz <= tysize(TYldouble)); 4939 int i = cast(int)sz; 4940 if (!I16 && i == 2) 4941 flag = CFopsize; 4942 4943 if (config.target_cpu >= TARGET_80286) 4944 // && (e.Ecount == 0 || e.Ecount != e.Ecomsub)) 4945 { 4946 pushi = 1; 4947 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4) 4948 { 4949 regsize = 4; 4950 flag = CFopsize; 4951 } 4952 } 4953 else if (i == REGSIZE) 4954 break; 4955 4956 stackpush += sz; 4957 cdb.genadjesp(cast(int)sz); 4958 targ_uns* pi = &e.EV.Vuns; // point to start of Vdouble 4959 targ_ushort* ps = cast(targ_ushort *) pi; 4960 targ_ullong* pl = cast(targ_ullong *)pi; 4961 i /= regsize; 4962 do 4963 { 4964 if (i) /* be careful not to go negative */ 4965 i--; 4966 4967 targ_size_t value; 4968 switch (regsize) 4969 { 4970 case 2: 4971 value = ps[i]; 4972 break; 4973 4974 case 4: 4975 if (tym == TYldouble || tym == TYildouble) 4976 /* The size is 10 bytes, and since we have 2 bytes left over, 4977 * just read those 2 bytes, not 4. 4978 * Otherwise we're reading uninitialized data. 4979 * I.e. read 4 bytes, 4 bytes, then 2 bytes 4980 */ 4981 value = i == 2 ? ps[4] : pi[i]; // 80 bits 4982 else 4983 value = pi[i]; 4984 break; 4985 4986 case 8: 4987 value = cast(targ_size_t)pl[i]; 4988 break; 4989 4990 default: 4991 assert(0); 4992 } 4993 4994 reg_t reg; 4995 if (pushi) 4996 { 4997 if (I64 && regsize == 8 && value != cast(int)value) 4998 { 4999 regwithvalue(cdb,allregs,value,®,64); 5000 goto Preg; // cannot push imm64 unless it is sign extended 32 bit value 5001 } 5002 if (regsize == REGSIZE && reghasvalue(allregs,value,®)) 5003 goto Preg; 5004 cdb.genc2((szb == 1) ? 0x6A : 0x68, 0, value); // PUSH value 5005 } 5006 else 5007 { 5008 regwithvalue(cdb, allregs, value, ®, 0); 5009 Preg: 5010 genpush(cdb,reg); // PUSH reg 5011 } 5012 code_orflag(cdb.last(), flag); // operand size 5013 } while (i); 5014 freenode(e); 5015 return; 5016 } 5017 5018 case OPpair: 5019 { 5020 if (e.Ecount) 5021 break; 5022 const op1 = e.EV.E1.Eoper; 5023 const op2 = e.EV.E2.Eoper; 5024 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 5025 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 5026 { 5027 pushParams(cdb, e.EV.E2, stackalign, tyf); 5028 pushParams(cdb, e.EV.E1, stackalign, tyf); 5029 freenode(e); 5030 } 5031 else if (tyfloating(e.EV.E1.Ety) || 5032 tyfloating(e.EV.E2.Ety)) 5033 { 5034 // Need special handling because of order of evaluation of e1 and e2 5035 break; 5036 } 5037 else 5038 { 5039 regm_t regs = allregs; 5040 codelem(cdb, e, ®s, false); 5041 genpush(cdb, findregmsw(regs)); // PUSH msreg 5042 genpush(cdb, findreglsw(regs)); // PUSH lsreg 5043 cdb.genadjesp(cast(int)sz); 5044 stackpush += sz; 5045 } 5046 return; 5047 } 5048 5049 case OPrpair: 5050 { 5051 if (e.Ecount) 5052 break; 5053 const op1 = e.EV.E1.Eoper; 5054 const op2 = e.EV.E2.Eoper; 5055 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 5056 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 5057 { 5058 pushParams(cdb, e.EV.E1, stackalign, tyf); 5059 pushParams(cdb, e.EV.E2, stackalign, tyf); 5060 freenode(e); 5061 } 5062 else if (tyfloating(e.EV.E1.Ety) || 5063 tyfloating(e.EV.E2.Ety)) 5064 { 5065 // Need special handling because of order of evaluation of e1 and e2 5066 break; 5067 } 5068 else 5069 { 5070 regm_t regs = allregs; 5071 codelem(cdb, e, ®s, false); 5072 genpush(cdb, findregmsw(regs)); // PUSH msreg 5073 genpush(cdb, findreglsw(regs)); // PUSH lsreg 5074 cdb.genadjesp(cast(int)sz); 5075 stackpush += sz; 5076 } 5077 return; 5078 } 5079 5080 default: 5081 break; 5082 } 5083 5084 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 5085 if (tyvector(tym) || (tyxmmreg(tym) && config.fpxmmregs)) 5086 { 5087 regm_t retxmm = XMMREGS; 5088 codelem(cdb, e, &retxmm, false); 5089 stackpush += sz; 5090 cdb.genadjesp(cast(int)sz); 5091 cod3_stackadj(cdb, cast(int)sz); 5092 const op = xmmstore(tym); 5093 const r = findreg(retxmm); 5094 cdb.gen2sib(op, modregxrm(0, r - XMM0,4 ), modregrm(0, 4, SP)); // MOV [ESP],r 5095 checkSetVex(cdb.last(),tym); 5096 return; 5097 } 5098 else if (tyfloating(tym)) 5099 { 5100 if (config.inline8087) 5101 { 5102 retregs = tycomplex(tym) ? mST01 : mST0; 5103 codelem(cdb, e, &retregs, false); 5104 stackpush += sz; 5105 cdb.genadjesp(cast(int)sz); 5106 cod3_stackadj(cdb, cast(int)sz); 5107 opcode_t op; 5108 uint r; 5109 switch (tym) 5110 { 5111 case TYfloat: 5112 case TYifloat: 5113 case TYcfloat: 5114 op = 0xD9; 5115 r = 3; 5116 break; 5117 5118 case TYdouble: 5119 case TYidouble: 5120 case TYdouble_alias: 5121 case TYcdouble: 5122 op = 0xDD; 5123 r = 3; 5124 break; 5125 5126 case TYldouble: 5127 case TYildouble: 5128 case TYcldouble: 5129 op = 0xDB; 5130 r = 7; 5131 break; 5132 5133 default: 5134 assert(0); 5135 } 5136 if (!I16) 5137 { 5138 if (tycomplex(tym)) 5139 { 5140 // FSTP sz/2[ESP] 5141 cdb.genc1(op, (modregrm(0, 4, SP) << 8) | modregxrm(2, r, 4),FLconst, sz/2); 5142 pop87(); 5143 } 5144 pop87(); 5145 cdb.gen2sib(op, modregrm(0, r, 4),modregrm(0, 4, SP)); // FSTP [ESP] 5146 } 5147 else 5148 { 5149 retregs = IDXREGS; // get an index reg 5150 reg_t reg; 5151 allocreg(cdb, &retregs, ®, TYoffset); 5152 genregs(cdb, 0x89, SP, reg); // MOV reg,SP 5153 pop87(); 5154 cdb.gen2(op, modregrm(0, r, regtorm[reg])); // FSTP [reg] 5155 } 5156 if (LARGEDATA) 5157 cdb.last().Iflags |= CFss; // want to store into stack 5158 genfwait(cdb); // FWAIT 5159 return; 5160 } 5161 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 5162 retregs = mSTACK; 5163 } 5164 else if (I16 && sz == 8) // if long long 5165 retregs = mSTACK; 5166 5167 scodelem(cdb,e,&retregs,0,true); 5168 if (retregs != mSTACK) // if stackpush not already inc'd 5169 stackpush += sz; 5170 if (sz <= REGSIZE) 5171 { 5172 genpush(cdb,findreg(retregs)); // PUSH reg 5173 cdb.genadjesp(cast(int)REGSIZE); 5174 } 5175 else if (sz == REGSIZE * 2) 5176 { 5177 genpush(cdb,findregmsw(retregs)); // PUSH msreg 5178 genpush(cdb,findreglsw(retregs)); // PUSH lsreg 5179 cdb.genadjesp(cast(int)sz); 5180 } 5181 } 5182 5183 /******************************* 5184 * Get offset portion of e, and store it in an index 5185 * register. Return mask of index register in *pretregs. 5186 */ 5187 5188 @trusted 5189 void offsetinreg(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 5190 { 5191 reg_t reg; 5192 regm_t retregs = mLSW; // want only offset 5193 if (e.Ecount && e.Ecount != e.Ecomsub) 5194 { 5195 regm_t rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */ 5196 for (uint i = 0; rm; i++) 5197 { 5198 if (mask(i) & rm && regcon.cse.value[i] == e) 5199 { 5200 *pretregs = mask(i); 5201 getregs(cdb, *pretregs); 5202 goto L3; 5203 } 5204 rm &= ~mask(i); 5205 } 5206 } 5207 5208 *pretregs = retregs; 5209 allocreg(cdb, pretregs, ®, TYoffset); 5210 getoffset(cdb,e,reg); 5211 L3: 5212 cssave(e, *pretregs,false); 5213 freenode(e); 5214 } 5215 5216 /****************************** 5217 * Generate code to load data into registers. 5218 */ 5219 5220 5221 @trusted 5222 void loaddata(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 5223 { 5224 reg_t reg; 5225 reg_t nreg; 5226 reg_t sreg; 5227 opcode_t op; 5228 tym_t tym; 5229 code cs; 5230 regm_t flags, forregs, regm; 5231 5232 debug 5233 { 5234 // if (debugw) 5235 // printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs)); 5236 // elem_print(e); 5237 } 5238 5239 assert(e); 5240 elem_debug(e); 5241 if (*pretregs == 0) 5242 return; 5243 tym = tybasic(e.Ety); 5244 if (tym == TYstruct) 5245 { 5246 cdrelconst(cdb,e,pretregs); 5247 return; 5248 } 5249 if (tyfloating(tym)) 5250 { 5251 objmod.fltused(); 5252 if (config.fpxmmregs && 5253 (tym == TYcfloat || tym == TYcdouble) && 5254 (*pretregs & (XMMREGS | mPSW)) 5255 ) 5256 { 5257 cloadxmm(cdb, e, pretregs); 5258 return; 5259 } 5260 else if (config.inline8087) 5261 { 5262 if (*pretregs & mST0) 5263 { 5264 load87(cdb, e, 0, pretregs, null, -1); 5265 return; 5266 } 5267 else if (tycomplex(tym)) 5268 { 5269 cload87(cdb, e, pretregs); 5270 return; 5271 } 5272 } 5273 } 5274 int sz = _tysize[tym]; 5275 cs.Iflags = 0; 5276 cs.Irex = 0; 5277 if (*pretregs == mPSW) 5278 { 5279 Symbol *s; 5280 regm = allregs; 5281 if (e.Eoper == OPconst) 5282 { /* true: OR SP,SP (SP is never 0) */ 5283 /* false: CMP SP,SP (always equal) */ 5284 genregs(cdb, (boolres(e)) ? 0x09 : 0x39 , SP, SP); 5285 if (I64) 5286 code_orrex(cdb.last(), REX_W); 5287 } 5288 else if (e.Eoper == OPvar && 5289 (s = e.EV.Vsym).Sfl == FLreg && 5290 s.Sregm & XMMREGS && 5291 (tym == TYfloat || tym == TYifloat || tym == TYdouble || tym ==TYidouble)) 5292 { 5293 /* Evaluate using XMM register and XMM instruction. 5294 * This affects jmpopcode() 5295 */ 5296 if (s.Sclass == SC.parameter) 5297 refparam = true; 5298 tstresult(cdb,s.Sregm,e.Ety,true); 5299 } 5300 else if (sz <= REGSIZE) 5301 { 5302 if (!I16 && (tym == TYfloat || tym == TYifloat)) 5303 { 5304 allocreg(cdb, ®m, ®, TYoffset); // get a register 5305 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5306 cdb.gen2(0xD1,modregrmx(3,4,reg)); // SHL reg,1 5307 } 5308 else if (I64 && (tym == TYdouble || tym ==TYidouble)) 5309 { 5310 allocreg(cdb, ®m, ®, TYoffset); // get a register 5311 loadea(cdb, e,&cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5312 // remove sign bit, so that -0.0 == 0.0 5313 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 5314 code_orrex(cdb.last(), REX_W); 5315 } 5316 else if (TARGET_OSX && e.Eoper == OPvar && movOnly(e)) 5317 { 5318 allocreg(cdb, ®m, ®, TYoffset); // get a register 5319 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5320 fixresult(cdb, e, regm, pretregs); 5321 } 5322 else 5323 { cs.IFL2 = FLconst; 5324 cs.IEV2.Vsize_t = 0; 5325 op = (sz == 1) ? 0x80 : 0x81; 5326 loadea(cdb, e, &cs, op, 7, 0, 0, 0); // CMP EA,0 5327 5328 // Convert to TEST instruction if EA is a register 5329 // (to avoid register contention on Pentium) 5330 code *c = cdb.last(); 5331 if ((c.Iop & ~1) == 0x38 && 5332 (c.Irm & modregrm(3, 0, 0)) == modregrm(3, 0, 0) 5333 ) 5334 { 5335 c.Iop = (c.Iop & 1) | 0x84; 5336 code_newreg(c, c.Irm & 7); 5337 if (c.Irex & REX_B) 5338 //c.Irex = (c.Irex & ~REX_B) | REX_R; 5339 c.Irex |= REX_R; 5340 } 5341 } 5342 } 5343 else if (sz < 8) 5344 { 5345 allocreg(cdb, ®m, ®, TYoffset); // get a register 5346 if (I32) // it's a 48 bit pointer 5347 loadea(cdb, e, &cs, MOVZXw, reg, REGSIZE, 0, 0); // MOVZX reg,data+4 5348 else 5349 { 5350 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, 0, 0); // MOV reg,data+2 5351 if (tym == TYfloat || tym == TYifloat) // dump sign bit 5352 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5353 } 5354 loadea(cdb,e,&cs,0x0B,reg,0,regm,0); // OR reg,data 5355 } 5356 else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym))) 5357 { 5358 allocreg(cdb, ®m, ®, TYoffset); // get a register 5359 int i = sz - REGSIZE; 5360 loadea(cdb, e, &cs, 0x8B, reg, i, 0, 0); // MOV reg,data+6 5361 if (tyfloating(tym)) // TYdouble or TYdouble_alias 5362 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5363 5364 while ((i -= REGSIZE) >= 0) 5365 { 5366 loadea(cdb, e, &cs, 0x0B, reg, i, regm, 0); // OR reg,data+i 5367 code *c = cdb.last(); 5368 if (i == 0) 5369 c.Iflags |= CFpsw; // need the flags on last OR 5370 } 5371 } 5372 else if (sz == tysize(TYldouble)) // TYldouble 5373 load87(cdb, e, 0, pretregs, null, -1); 5374 else 5375 { 5376 elem_print(e); 5377 assert(0); 5378 } 5379 return; 5380 } 5381 /* not for flags only */ 5382 flags = *pretregs & mPSW; /* save original */ 5383 forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS); 5384 if (*pretregs & mSTACK) 5385 forregs |= DOUBLEREGS; 5386 if (e.Eoper == OPconst) 5387 { 5388 if (tyvector(tym) && forregs & XMMREGS) 5389 { 5390 assert(!flags); 5391 reg_t xreg; 5392 allocreg(cdb, &forregs, &xreg, tym); // allocate registers 5393 movxmmconst(cdb, xreg, tym, &e.EV, flags); 5394 fixresult(cdb, e, forregs, pretregs); 5395 return; 5396 } 5397 5398 targ_size_t value = e.EV.Vint; 5399 if (sz == 8) 5400 value = cast(targ_size_t)e.EV.Vullong; 5401 5402 if (sz == REGSIZE && reghasvalue(forregs, value, ®)) 5403 forregs = mask(reg); 5404 5405 regm_t save = regcon.immed.mval; 5406 allocreg(cdb, &forregs, ®, tym); // allocate registers 5407 regcon.immed.mval = save; // allocreg could unnecessarily clear .mval 5408 if (sz <= REGSIZE) 5409 { 5410 if (sz == 1) 5411 flags |= 1; 5412 else if (!I16 && sz == SHORTSIZE && 5413 !(mask(reg) & regcon.mvar) && 5414 !(config.flags4 & CFG4speed) 5415 ) 5416 flags |= 2; 5417 if (sz == 8) 5418 flags |= 64; 5419 if (isXMMreg(reg)) 5420 { 5421 movxmmconst(cdb, reg, tym, &e.EV, 0); 5422 flags = 0; 5423 } 5424 else 5425 { 5426 movregconst(cdb, reg, value, flags); 5427 flags = 0; // flags are already set 5428 } 5429 } 5430 else if (sz < 8) // far pointers, longs for 16 bit targets 5431 { 5432 targ_int msw = I32 ? e.EV.Vseg 5433 : (e.EV.Vulong >> 16); 5434 targ_int lsw = e.EV.Voff; 5435 regm_t mswflags = 0; 5436 if (forregs & mES) 5437 { 5438 movregconst(cdb, reg, msw, 0); // MOV reg,segment 5439 genregs(cdb, 0x8E, 0, reg); // MOV ES,reg 5440 msw = lsw; // MOV reg,offset 5441 } 5442 else 5443 { 5444 sreg = findreglsw(forregs); 5445 movregconst(cdb, sreg, lsw, 0); 5446 reg = findregmsw(forregs); 5447 /* Decide if we need to set flags when we load msw */ 5448 if (flags && (msw && msw|lsw || !(msw|lsw))) 5449 { mswflags = mPSW; 5450 flags = 0; 5451 } 5452 } 5453 movregconst(cdb, reg, msw, mswflags); 5454 } 5455 else if (sz == 8) 5456 { 5457 if (I32) 5458 { 5459 targ_long *p = cast(targ_long *)cast(void*)&e.EV.Vdouble; 5460 if (isXMMreg(reg)) 5461 { /* This comes about because 0, 1, pi, etc., constants don't get stored 5462 * in the data segment, because they are x87 opcodes. 5463 * Not so efficient. We should at least do a PXOR for 0. 5464 */ 5465 reg_t r; 5466 regm_t rm = ALLREGS; 5467 allocreg(cdb, &rm, &r, TYint); // allocate scratch register 5468 movregconst(cdb, r, p[0], 0); 5469 cdb.genfltreg(0x89, r, 0); // MOV floatreg,r 5470 movregconst(cdb, r, p[1], 0); 5471 cdb.genfltreg(0x89, r, 4); // MOV floatreg+4,r 5472 5473 const opmv = xmmload(tym); 5474 cdb.genxmmreg(opmv, reg, 0, tym); // MOVSS/MOVSD XMMreg,floatreg 5475 } 5476 else 5477 { 5478 movregconst(cdb, findreglsw(forregs) ,p[0], 0); 5479 movregconst(cdb, findregmsw(forregs) ,p[1], 0); 5480 } 5481 } 5482 else 5483 { targ_short *p = &e.EV.Vshort; // point to start of Vdouble 5484 5485 assert(reg == AX); 5486 movregconst(cdb, AX, p[3], 0); // MOV AX,p[3] 5487 movregconst(cdb, DX, p[0], 0); 5488 movregconst(cdb, CX, p[1], 0); 5489 movregconst(cdb, BX, p[2], 0); 5490 } 5491 } 5492 else if (I64 && sz == 16) 5493 { 5494 movregconst(cdb, findreglsw(forregs), cast(targ_size_t)e.EV.Vcent.lo, 64); 5495 movregconst(cdb, findregmsw(forregs), cast(targ_size_t)e.EV.Vcent.hi, 64); 5496 } 5497 else 5498 assert(0); 5499 // Flags may already be set 5500 *pretregs &= flags | ~mPSW; 5501 fixresult(cdb, e, forregs, pretregs); 5502 return; 5503 } 5504 else 5505 { 5506 // See if we can use register that parameter was passed in 5507 if (regcon.params && 5508 regParamInPreg(e.EV.Vsym) && 5509 !anyiasm && // may have written to the memory for the parameter 5510 (regcon.params & mask(e.EV.Vsym.Spreg) && e.EV.Voffset == 0 || 5511 regcon.params & mask(e.EV.Vsym.Spreg2) && e.EV.Voffset == REGSIZE) && 5512 sz <= REGSIZE) // make sure no 'paint' to a larger size happened 5513 { 5514 const reg_t preg = e.EV.Voffset ? e.EV.Vsym.Spreg2 : e.EV.Vsym.Spreg; 5515 const regm_t pregm = mask(preg); 5516 5517 if (!(sz <= 2 && pregm & XMMREGS)) // no SIMD instructions to load 1 or 2 byte quantities 5518 { 5519 if (debugr) 5520 printf("%s.%d is fastpar and using register %s\n", 5521 e.EV.Vsym.Sident.ptr, 5522 cast(int)e.EV.Voffset, 5523 regm_str(pregm)); 5524 5525 mfuncreg &= ~pregm; 5526 regcon.used |= pregm; 5527 fixresult(cdb,e,pregm,pretregs); 5528 return; 5529 } 5530 } 5531 5532 allocreg(cdb, &forregs, ®, tym); // allocate registers 5533 5534 if (sz == 1) 5535 { regm_t nregm; 5536 5537 debug 5538 if (!(forregs & BYTEREGS)) 5539 { elem_print(e); 5540 printf("forregs = %s\n", regm_str(forregs)); 5541 } 5542 5543 opcode_t opmv = 0x8A; // byte MOV 5544 if (config.exe & (EX_OSX | EX_OSX64)) 5545 { 5546 if (movOnly(e)) 5547 opmv = 0x8B; 5548 } 5549 assert(forregs & BYTEREGS); 5550 if (!I16) 5551 { 5552 if (config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed && 5553 // Workaround for OSX linker bug: 5554 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5555 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5556 ) 5557 { 5558 // opmv = tyuns(tym) ? MOVZXb : MOVSXb; // MOVZX/MOVSX 5559 } 5560 loadea(cdb, e, &cs, opmv, reg, 0, 0, 0); // MOV regL,data 5561 } 5562 else 5563 { 5564 nregm = tyuns(tym) ? BYTEREGS : cast(regm_t) mAX; 5565 if (*pretregs & nregm) 5566 nreg = reg; // already allocated 5567 else 5568 allocreg(cdb, &nregm, &nreg, tym); 5569 loadea(cdb, e, &cs, opmv, nreg, 0, 0, 0); // MOV nregL,data 5570 if (reg != nreg) 5571 { 5572 genmovreg(cdb, reg, nreg); // MOV reg,nreg 5573 cssave(e, mask(nreg), false); 5574 } 5575 } 5576 } 5577 else if (forregs & XMMREGS) 5578 { 5579 // Can't load from registers directly to XMM regs 5580 //e.EV.Vsym.Sflags &= ~GTregcand; 5581 5582 opcode_t opmv = xmmload(tym, xmmIsAligned(e)); 5583 if (e.Eoper == OPvar) 5584 { 5585 Symbol *s = e.EV.Vsym; 5586 if (s.Sfl == FLreg && !(mask(s.Sreglsw) & XMMREGS)) 5587 { opmv = LODD; // MOVD/MOVQ 5588 /* getlvalue() will unwind this and unregister s; could use a better solution */ 5589 } 5590 } 5591 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); // MOVSS/MOVSD reg,data 5592 checkSetVex(cdb.last(),tym); 5593 } 5594 else if (sz <= REGSIZE) 5595 { 5596 opcode_t opmv = 0x8B; // MOV reg,data 5597 if (sz == 2 && !I16 && config.target_cpu >= TARGET_PentiumPro && 5598 // Workaround for OSX linker bug: 5599 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5600 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5601 ) 5602 { 5603 // opmv = tyuns(tym) ? MOVZXw : MOVSXw; // MOVZX/MOVSX 5604 } 5605 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); 5606 } 5607 else if (sz <= 2 * REGSIZE && forregs & mES) 5608 { 5609 loadea(cdb, e, &cs, 0xC4, reg, 0, 0, mES); // LES data 5610 } 5611 else if (sz <= 2 * REGSIZE) 5612 { 5613 if (I32 && sz == 8 && 5614 (*pretregs & (mSTACK | mPSW)) == mSTACK) 5615 { 5616 assert(0); 5617 /+ 5618 /* Note that we allocreg(DOUBLEREGS) needlessly */ 5619 stackchanged = 1; 5620 int i = DOUBLESIZE - REGSIZE; 5621 do 5622 { 5623 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5624 cdb.genadjesp(REGSIZE); 5625 stackpush += REGSIZE; 5626 i -= REGSIZE; 5627 } 5628 while (i >= 0); 5629 return; 5630 +/ 5631 } 5632 5633 reg = findregmsw(forregs); 5634 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, forregs, 0); // MOV reg,data+2 5635 if (I32 && sz == REGSIZE + 2) 5636 cdb.last().Iflags |= CFopsize; // seg is 16 bits 5637 reg = findreglsw(forregs); 5638 loadea(cdb, e, &cs, 0x8B, reg, 0, forregs, 0); // MOV reg,data 5639 } 5640 else if (sz >= 8) 5641 { 5642 assert(!I32); 5643 if ((*pretregs & (mSTACK | mPSW)) == mSTACK) 5644 { 5645 // Note that we allocreg(DOUBLEREGS) needlessly 5646 stackchanged = 1; 5647 int i = sz - REGSIZE; 5648 do 5649 { 5650 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5651 cdb.genadjesp(REGSIZE); 5652 stackpush += REGSIZE; 5653 i -= REGSIZE; 5654 } 5655 while (i >= 0); 5656 return; 5657 } 5658 else 5659 { 5660 assert(reg == AX); 5661 loadea(cdb, e, &cs, 0x8B, AX, 6, 0, 0); // MOV AX,data+6 5662 loadea(cdb, e, &cs, 0x8B, BX, 4, mAX, 0); // MOV BX,data+4 5663 loadea(cdb, e, &cs, 0x8B, CX, 2, mAX|mBX, 0); // MOV CX,data+2 5664 loadea(cdb, e, &cs, 0x8B, DX, 0, mAX|mCX|mCX, 0); // MOV DX,data 5665 } 5666 } 5667 else 5668 assert(0); 5669 // Flags may already be set 5670 *pretregs &= flags | ~mPSW; 5671 fixresult(cdb, e, forregs, pretregs); 5672 return; 5673 } 5674 } 5675 5676 }