1 /** 2 * Code generation 1 3 * 4 * Handles function calls: putting arguments in registers / on the stack, and jumping to the function. 5 * 6 * Compiler implementation of the 7 * $(LINK2 https://www.dlang.org, D programming language). 8 * 9 * Copyright: Copyright (C) 1984-1998 by Symantec 10 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 11 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 12 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 13 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod1.d, backend/cod1.d) 14 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod1.d 15 */ 16 17 module dmd.backend.cod1; 18 19 import core.bitop; 20 import core.stdc.stdio; 21 import core.stdc.stdlib; 22 import core.stdc.string; 23 24 import dmd.backend.backend; 25 import dmd.backend.cc; 26 import dmd.backend.cdef; 27 import dmd.backend.code; 28 import dmd.backend.code_x86; 29 import dmd.backend.codebuilder; 30 import dmd.backend.mem; 31 import dmd.backend.el; 32 import dmd.backend.global; 33 import dmd.backend.obj; 34 import dmd.backend.oper; 35 import dmd.backend.rtlsym; 36 import dmd.backend.ty; 37 import dmd.backend.type; 38 import dmd.backend.xmm; 39 40 41 import dmd.backend.cg : segfl, stackfl; 42 43 nothrow: 44 @safe: 45 46 private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 47 48 /* array to convert from index register to r/m field */ 49 /* AX CX DX BX SP BP SI DI */ 50 private __gshared const byte[8] regtorm32 = [ 0, 1, 2, 3,-1, 5, 6, 7 ]; 51 __gshared const byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; 52 53 //void funccall(ref CodeBuilder cdb,elem *e,uint numpara,uint numalign, 54 // regm_t *pretregs,regm_t keepmsk, bool usefuncarg); 55 56 /********************************* 57 * Determine if we should leave parameter `s` in the register it 58 * came in, or allocate a register it using the register 59 * allocator. 60 * Params: 61 * s = parameter Symbol 62 * Returns: 63 * `true` if `s` is a register parameter and leave it in the register it came in 64 */ 65 @trusted 66 bool regParamInPreg(Symbol* s) 67 { 68 //printf("regPAramInPreg %s\n", s.Sident.ptr); 69 return (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) && 70 (!(config.flags4 & CFG4optimized) || !(s.Sflags & GTregcand)); 71 } 72 73 74 /************************** 75 * Determine if e is a 32 bit scaled index addressing mode. 76 * Returns: 77 * 0 not a scaled index addressing mode 78 * !=0 the value for ss in the SIB byte 79 */ 80 81 @trusted 82 int isscaledindex(elem *e) 83 { 84 targ_uns ss; 85 86 assert(!I16); 87 while (e.Eoper == OPcomma) 88 e = e.EV.E2; 89 if (!(e.Eoper == OPshl && !e.Ecount && 90 e.EV.E2.Eoper == OPconst && 91 (ss = e.EV.E2.EV.Vuns) <= 3 92 ) 93 ) 94 ss = 0; 95 return ss; 96 } 97 98 /********************************************* 99 * Generate code for which isscaledindex(e) returned a non-zero result. 100 */ 101 102 @trusted 103 /*private*/ void cdisscaledindex(ref CodeBuilder cdb,elem *e,regm_t *pidxregs,regm_t keepmsk) 104 { 105 // Load index register with result of e.EV.E1 106 while (e.Eoper == OPcomma) 107 { 108 regm_t r = 0; 109 scodelem(cdb, e.EV.E1, &r, keepmsk, true); 110 freenode(e); 111 e = e.EV.E2; 112 } 113 assert(e.Eoper == OPshl); 114 scodelem(cdb, e.EV.E1, pidxregs, keepmsk, true); 115 freenode(e.EV.E2); 116 freenode(e); 117 } 118 119 /*********************************** 120 * Determine index if we can do two LEA instructions as a multiply. 121 * Returns: 122 * 0 can't do it 123 */ 124 125 enum 126 { 127 SSFLnobp = 1, /// can't have EBP in relconst 128 SSFLnobase1 = 2, /// no base register for first LEA 129 SSFLnobase = 4, /// no base register 130 SSFLlea = 8, /// can do it in one LEA 131 } 132 133 struct Ssindex 134 { 135 targ_uns product; 136 ubyte ss1; 137 ubyte ss2; 138 ubyte ssflags; /// SSFLxxxx 139 } 140 141 private __gshared const Ssindex[21] ssindex_array = 142 [ 143 { 0, 0, 0 }, // [0] is a place holder 144 145 { 3, 1, 0, SSFLnobp | SSFLlea }, 146 { 5, 2, 0, SSFLnobp | SSFLlea }, 147 { 9, 3, 0, SSFLnobp | SSFLlea }, 148 149 { 6, 1, 1, SSFLnobase }, 150 { 12, 1, 2, SSFLnobase }, 151 { 24, 1, 3, SSFLnobase }, 152 { 10, 2, 1, SSFLnobase }, 153 { 20, 2, 2, SSFLnobase }, 154 { 40, 2, 3, SSFLnobase }, 155 { 18, 3, 1, SSFLnobase }, 156 { 36, 3, 2, SSFLnobase }, 157 { 72, 3, 3, SSFLnobase }, 158 159 { 15, 2, 1, SSFLnobp }, 160 { 25, 2, 2, SSFLnobp }, 161 { 27, 3, 1, SSFLnobp }, 162 { 45, 3, 2, SSFLnobp }, 163 { 81, 3, 3, SSFLnobp }, 164 165 { 16, 3, 1, SSFLnobase1 | SSFLnobase }, 166 { 32, 3, 2, SSFLnobase1 | SSFLnobase }, 167 { 64, 3, 3, SSFLnobase1 | SSFLnobase }, 168 ]; 169 170 int ssindex(OPER op,targ_uns product) 171 { 172 if (op == OPshl) 173 product = 1 << product; 174 for (size_t i = 1; i < ssindex_array.length; i++) 175 { 176 if (ssindex_array[i].product == product) 177 return cast(int)i; 178 } 179 return 0; 180 } 181 182 /*************************************** 183 * Build an EA of the form disp[base][index*scale]. 184 * Input: 185 * c struct to fill in 186 * base base register (-1 if none) 187 * index index register (-1 if none) 188 * scale scale factor - 1,2,4,8 189 * disp displacement 190 */ 191 192 void buildEA(code *c,int base,int index,int scale,targ_size_t disp) 193 { 194 ubyte rm; 195 ubyte sib; 196 ubyte rex = 0; 197 198 sib = 0; 199 if (!I16) 200 { uint ss; 201 202 assert(index != SP); 203 204 switch (scale) 205 { case 1: ss = 0; break; 206 case 2: ss = 1; break; 207 case 4: ss = 2; break; 208 case 8: ss = 3; break; 209 default: assert(0); 210 } 211 212 if (base == -1) 213 { 214 if (index == -1) 215 rm = modregrm(0,0,5); 216 else 217 { 218 rm = modregrm(0,0,4); 219 sib = modregrm(ss,index & 7,5); 220 if (index & 8) 221 rex |= REX_X; 222 } 223 } 224 else if (index == -1) 225 { 226 if (base == SP) 227 { 228 rm = modregrm(2, 0, 4); 229 sib = modregrm(0, 4, SP); 230 } 231 else 232 { rm = modregrm(2, 0, base & 7); 233 if (base & 8) 234 { rex |= REX_B; 235 if (base == R12) 236 { 237 rm = modregrm(2, 0, 4); 238 sib = modregrm(0, 4, 4); 239 } 240 } 241 } 242 } 243 else 244 { 245 rm = modregrm(2, 0, 4); 246 sib = modregrm(ss,index & 7,base & 7); 247 if (index & 8) 248 rex |= REX_X; 249 if (base & 8) 250 rex |= REX_B; 251 } 252 } 253 else 254 { 255 // -1 AX CX DX BX SP BP SI DI 256 static immutable ubyte[9][9] EA16rm = 257 [ 258 [ 0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85, ], // -1 259 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // AX 260 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // CX 261 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // DX 262 [ 0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81, ], // BX 263 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // SP 264 [ 0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83, ], // BP 265 [ 0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09, ], // SI 266 [ 0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09, ] // DI 267 ]; 268 269 assert(scale == 1); 270 rm = EA16rm[base + 1][index + 1]; 271 assert(rm != 9); 272 } 273 c.Irm = rm; 274 c.Isib = sib; 275 c.Irex = rex; 276 c.IFL1 = FLconst; 277 c.IEV1.Vuns = cast(targ_uns)disp; 278 } 279 280 /********************************************* 281 * Build REX, modregrm and sib bytes 282 */ 283 284 uint buildModregrm(int mod, int reg, int rm) 285 { 286 uint m; 287 if (I16) 288 m = modregrm(mod, reg, rm); 289 else 290 { 291 if ((rm & 7) == SP && mod != 3) 292 m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4); 293 else 294 m = modregrm(mod,reg & 7,rm & 7); 295 if (reg & 8) 296 m |= REX_R << 16; 297 if (rm & 8) 298 m |= REX_B << 16; 299 } 300 return m; 301 } 302 303 /**************************************** 304 * Generate code for eecontext 305 */ 306 307 @trusted 308 void genEEcode() 309 { 310 CodeBuilder cdb; 311 cdb.ctor(); 312 313 eecontext.EEin++; 314 regcon.immed.mval = 0; 315 regm_t retregs = 0; //regmask(eecontext.EEelem.Ety); 316 assert(EEStack.offset >= REGSIZE); 317 cod3_stackadj(cdb, cast(int)(EEStack.offset - REGSIZE)); 318 cdb.gen1(0x50 + SI); // PUSH ESI 319 cdb.genadjesp(cast(int)EEStack.offset); 320 gencodelem(cdb, eecontext.EEelem, &retregs, false); 321 code *c = cdb.finish(); 322 assignaddrc(c); 323 pinholeopt(c,null); 324 jmpaddr(c); 325 eecontext.EEcode = gen1(c, 0xCC); // INT 3 326 eecontext.EEin--; 327 } 328 329 330 /******************************************** 331 * Gen a save/restore sequence for mask of registers. 332 * Params: 333 * regm = mask of registers to save 334 * cdbsave = save code appended here 335 * cdbrestore = restore code appended here 336 * Returns: 337 * amount of stack consumed 338 */ 339 @trusted 340 uint gensaverestore(regm_t regm,ref CodeBuilder cdbsave,ref CodeBuilder cdbrestore) 341 { 342 //printf("gensaverestore2(%s)\n", regm_str(regm)); 343 regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01; 344 if (!regm) 345 return 0; 346 347 uint stackused = 0; 348 349 code *[regm.sizeof * 8] restore; 350 351 reg_t i; 352 for (i = 0; regm; i++) 353 { 354 if (regm & 1) 355 { 356 code *cs2; 357 if (i == ES && I16) 358 { 359 stackused += REGSIZE; 360 cdbsave.gen1(0x06); // PUSH ES 361 cs2 = gen1(null, 0x07); // POP ES 362 } 363 else if (i == ST0 || i == ST01) 364 { 365 CodeBuilder cdb; 366 cdb.ctor(); 367 gensaverestore87(1 << i, cdbsave, cdb); 368 cs2 = cdb.finish(); 369 } 370 else if (i >= XMM0 || I64 || cgstate.funcarg.size) 371 { uint idx; 372 regsave.save(cdbsave, i, &idx); 373 CodeBuilder cdb; 374 cdb.ctor(); 375 regsave.restore(cdb, i, idx); 376 cs2 = cdb.finish(); 377 } 378 else 379 { 380 stackused += REGSIZE; 381 cdbsave.gen1(0x50 + (i & 7)); // PUSH i 382 cs2 = gen1(null, 0x58 + (i & 7)); // POP i 383 if (i & 8) 384 { code_orrex(cdbsave.last(), REX_B); 385 code_orrex(cs2, REX_B); 386 } 387 } 388 restore[i] = cs2; 389 } 390 else 391 restore[i] = null; 392 regm >>= 1; 393 } 394 395 while (i) 396 { 397 code *c = restore[--i]; 398 if (c) 399 { 400 cdbrestore.append(c); 401 } 402 } 403 404 return stackused; 405 } 406 407 408 /**************************************** 409 * Clean parameters off stack. 410 * Input: 411 * numpara amount to adjust stack pointer 412 * keepmsk mask of registers to not destroy 413 */ 414 415 @trusted 416 void genstackclean(ref CodeBuilder cdb,uint numpara,regm_t keepmsk) 417 { 418 //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean); 419 if (numpara && (cgstate.stackclean || STACKALIGN >= 16)) 420 { 421 /+ 422 if (0 && // won't work if operand of scodelem 423 numpara == stackpush && // if this is all those pushed 424 needframe && // and there will be a BP 425 !config.windows && 426 !(regcon.mvar & fregsaved) // and no registers will be pushed 427 ) 428 genregs(cdb,0x89,BP,SP); // MOV SP,BP 429 else 430 +/ 431 { 432 regm_t scratchm = 0; 433 434 if (numpara == REGSIZE && config.flags4 & CFG4space) 435 { 436 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar; 437 } 438 439 if (scratchm) 440 { 441 reg_t r; 442 allocreg(cdb, &scratchm, &r, TYint); 443 cdb.gen1(0x58 + r); // POP r 444 } 445 else 446 cod3_stackadj(cdb, -numpara); 447 } 448 stackpush -= numpara; 449 cdb.genadjesp(-numpara); 450 } 451 } 452 453 /********************************* 454 * Generate code for a logical expression. 455 * Input: 456 * e elem 457 * jcond 458 * bit 1 if true then goto jump address if e 459 * if false then goto jump address if !e 460 * 2 don't call save87() 461 * fltarg FLcode or FLblock, flavor of target if e evaluates to jcond 462 * targ either code or block pointer to destination 463 */ 464 465 @trusted 466 void logexp(ref CodeBuilder cdb, elem *e, int jcond, uint fltarg, code *targ) 467 { 468 //printf("logexp(e = %p, jcond = %d)\n", e, jcond); elem_print(e); 469 if (tybasic(e.Ety) == TYnoreturn) 470 { 471 con_t regconsave = regcon; 472 regm_t retregs = 0; 473 codelem(cdb,e,&retregs,0); 474 regconsave.used |= regcon.used; 475 regcon = regconsave; 476 return; 477 } 478 479 int no87 = (jcond & 2) == 0; 480 docommas(cdb, e); // scan down commas 481 cgstate.stackclean++; 482 483 code* c, ce; 484 if (!OTleaf(e.Eoper) && !e.Ecount) // if operator and not common sub 485 { 486 switch (e.Eoper) 487 { 488 case OPoror: 489 { 490 con_t regconsave; 491 if (jcond & 1) 492 { 493 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 494 regconsave = regcon; 495 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 496 } 497 else 498 { 499 code *cnop = gennop(null); 500 logexp(cdb, e.EV.E1, jcond | 1, FLcode, cnop); 501 regconsave = regcon; 502 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 503 cdb.append(cnop); 504 } 505 andregcon(regconsave); 506 freenode(e); 507 cgstate.stackclean--; 508 return; 509 } 510 511 case OPandand: 512 { 513 con_t regconsave; 514 if (jcond & 1) 515 { 516 code *cnop = gennop(null); // a dummy target address 517 logexp(cdb, e.EV.E1, jcond & ~1, FLcode, cnop); 518 regconsave = regcon; 519 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 520 cdb.append(cnop); 521 } 522 else 523 { 524 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 525 regconsave = regcon; 526 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 527 } 528 andregcon(regconsave); 529 freenode(e); 530 cgstate.stackclean--; 531 return; 532 } 533 534 case OPnot: 535 jcond ^= 1; 536 goto case OPbool; 537 538 case OPbool: 539 case OPs8_16: 540 case OPu8_16: 541 case OPs16_32: 542 case OPu16_32: 543 case OPs32_64: 544 case OPu32_64: 545 case OPu32_d: 546 case OPd_ld: 547 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 548 freenode(e); 549 cgstate.stackclean--; 550 return; 551 552 case OPcond: 553 { 554 code *cnop2 = gennop(null); // addresses of start of leaves 555 code *cnop = gennop(null); 556 logexp(cdb, e.EV.E1, false, FLcode, cnop2); // eval condition 557 con_t regconold = regcon; 558 logexp(cdb, e.EV.E2.EV.E1, jcond, fltarg, targ); 559 genjmp(cdb, JMP, FLcode, cast(block *) cnop); // skip second leaf 560 561 con_t regconsave = regcon; 562 regcon = regconold; 563 564 cdb.append(cnop2); 565 logexp(cdb, e.EV.E2.EV.E2, jcond, fltarg, targ); 566 andregcon(regconold); 567 andregcon(regconsave); 568 freenode(e.EV.E2); 569 freenode(e); 570 cdb.append(cnop); 571 cgstate.stackclean--; 572 return; 573 } 574 575 default: 576 break; 577 } 578 } 579 580 /* Special code for signed long compare. 581 * Not necessary for I64 until we do cents. 582 */ 583 if (OTrel2(e.Eoper) && // if < <= >= > 584 !e.Ecount && 585 ( (I16 && tybasic(e.EV.E1.Ety) == TYlong && tybasic(e.EV.E2.Ety) == TYlong) || 586 (I32 && tybasic(e.EV.E1.Ety) == TYllong && tybasic(e.EV.E2.Ety) == TYllong)) 587 ) 588 { 589 longcmp(cdb, e, jcond != 0, fltarg, targ); 590 cgstate.stackclean--; 591 return; 592 } 593 594 regm_t retregs = mPSW; // return result in flags 595 opcode_t op = jmpopcode(e); // get jump opcode 596 if (!(jcond & 1)) 597 op ^= 0x101; // toggle jump condition(s) 598 codelem(cdb, e, &retregs, true); // evaluate elem 599 if (no87) 600 cse_flush(cdb,no87); // flush CSE's to memory 601 genjmp(cdb, op, fltarg, cast(block *) targ); // generate jmp instruction 602 cgstate.stackclean--; 603 } 604 605 /****************************** 606 * Routine to aid in setting things up for gen(). 607 * Look for common subexpression. 608 * Can handle indirection operators, but not if they're common subs. 609 * Input: 610 * e -> elem where we get some of the data from 611 * cs -> partially filled code to add 612 * op = opcode 613 * reg = reg field of (mod reg r/m) 614 * offset = data to be added to Voffset field 615 * keepmsk = mask of registers we must not destroy 616 * desmsk = mask of registers destroyed by executing the instruction 617 * Returns: 618 * pointer to code generated 619 */ 620 621 @trusted 622 void loadea(ref CodeBuilder cdb,elem *e,code *cs,uint op,uint reg,targ_size_t offset, 623 regm_t keepmsk,regm_t desmsk) 624 { 625 code* c, cg, cd; 626 627 debug 628 if (debugw) 629 printf("loadea: e=%p cs=%p op=x%x reg=%s offset=%lld keepmsk=%s desmsk=%s\n", 630 e, cs, op, regstring[reg], cast(ulong)offset, regm_str(keepmsk), regm_str(desmsk)); 631 assert(e); 632 cs.Iflags = 0; 633 cs.Irex = 0; 634 cs.Iop = op; 635 tym_t tym = e.Ety; 636 int sz = tysize(tym); 637 638 /* Determine if location we want to get is in a register. If so, */ 639 /* substitute the register for the EA. */ 640 /* Note that operators don't go through this. CSE'd operators are */ 641 /* picked up by comsub(). */ 642 if (e.Ecount && /* if cse */ 643 e.Ecount != e.Ecomsub && /* and cse was generated */ 644 op != LEA && op != 0xC4 && /* and not an LEA or LES */ 645 (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ 646 (op & 0xFFF8) != 0xD8) // and not 8087 opcode 647 { 648 assert(OTleaf(e.Eoper)); /* can't handle this */ 649 regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs 650 if (op == 0xFF && reg == 6) 651 rm &= ~XMMREGS; // can't PUSH an XMM register 652 if (sz > REGSIZE) // value is in 2 or 4 registers 653 { 654 if (I16 && sz == 8) // value is in 4 registers 655 { 656 static immutable regm_t[4] rmask = [ mDX,mCX,mBX,mAX ]; 657 rm &= rmask[cast(size_t)(offset >> 1)]; 658 } 659 else if (offset) 660 rm &= mMSW; /* only high words */ 661 else 662 rm &= mLSW; /* only low words */ 663 } 664 for (uint i = 0; rm; i++) 665 { 666 if (mask(i) & rm) 667 { 668 if (regcon.cse.value[i] == e && // if register has elem 669 /* watch out for a CWD destroying DX */ 670 !(i == DX && op == 0xF7 && desmsk & mDX)) 671 { 672 /* if ES, then it can only be a load */ 673 if (i == ES) 674 { 675 if (op != 0x8B) 676 break; // not a load 677 cs.Iop = 0x8C; /* MOV reg,ES */ 678 cs.Irm = modregrm(3, 0, reg & 7); 679 if (reg & 8) 680 code_orrex(cs, REX_B); 681 } 682 else // XXX reg,i 683 { 684 cs.Irm = modregrm(3, reg & 7, i & 7); 685 if (reg & 8) 686 cs.Irex |= REX_R; 687 if (i & 8) 688 cs.Irex |= REX_B; 689 if (sz == 1 && I64 && (i >= 4 || reg >= 4)) 690 cs.Irex |= REX; 691 if (I64 && (sz == 8 || sz == 16)) 692 cs.Irex |= REX_W; 693 } 694 goto L2; 695 } 696 rm &= ~mask(i); 697 } 698 } 699 } 700 701 getlvalue(cdb, cs, e, keepmsk); 702 if (offset == REGSIZE) 703 getlvalue_msw(cs); 704 else 705 cs.IEV1.Voffset += offset; 706 if (I64) 707 { 708 if (reg >= 4 && sz == 1) // if byte register 709 // Can only address those 8 bit registers if a REX byte is present 710 cs.Irex |= REX; 711 if ((op & 0xFFFFFFF8) == 0xD8) 712 cs.Irex &= ~REX_W; // not needed for x87 ops 713 if (mask(reg) & XMMREGS && 714 (op == LODSD || op == STOSD)) 715 cs.Irex &= ~REX_W; // not needed for xmm ops 716 } 717 code_newreg(cs, reg); // OR in reg field 718 if (!I16) 719 { 720 if (reg == 6 && op == 0xFF || /* don't PUSH a word */ 721 op == MOVZXw || op == MOVSXw || /* MOVZX/MOVSX */ 722 (op & 0xFFF8) == 0xD8 || /* 8087 instructions */ 723 op == LEA) /* LEA */ 724 { 725 cs.Iflags &= ~CFopsize; 726 if (reg == 6 && op == 0xFF) // if PUSH 727 cs.Irex &= ~REX_W; // REX is ignored for PUSH anyway 728 } 729 } 730 else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT()) 731 cs.Iflags |= CFwait; 732 L2: 733 getregs(cdb, desmsk); // save any regs we destroy 734 735 /* KLUDGE! fix up DX for divide instructions */ 736 if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ 737 { 738 if (reg == 7) /* if IDIV */ 739 { 740 cdb.gen1(0x99); // CWD 741 if (I64 && sz == 8) 742 code_orrex(cdb.last(), REX_W); 743 } 744 else if (reg == 6) // if DIV 745 genregs(cdb, 0x33, DX, DX); // XOR DX,DX 746 } 747 748 // Eliminate MOV reg,reg 749 if ((cs.Iop & ~3) == 0x88 && 750 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 751 { 752 uint r = cs.Irm & 7; 753 if (cs.Irex & REX_B) 754 r |= 8; 755 if (r == reg) 756 cs.Iop = NOP; 757 } 758 759 // Eliminate MOV xmmreg,xmmreg 760 if ((cs.Iop & ~(LODSD ^ STOSS)) == LODSD && // detect LODSD, LODSS, STOSD, STOSS 761 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 762 { 763 reg_t r = cs.Irm & 7; 764 if (cs.Irex & REX_B) 765 r |= 8; 766 if (r == (reg - XMM0)) 767 cs.Iop = NOP; 768 } 769 770 cdb.gen(cs); 771 } 772 773 774 /************************** 775 * Get addressing mode. 776 */ 777 778 @trusted 779 uint getaddrmode(regm_t idxregs) 780 { 781 uint mode; 782 783 if (I16) 784 { 785 static ubyte error() { assert(0); } 786 787 mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ 788 (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ 789 (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ 790 error(); 791 } 792 else 793 { 794 const reg = findreg(idxregs & (ALLREGS | mBP)); 795 if (reg == R12) 796 mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4); 797 else 798 mode = modregrmx(2,0,reg); 799 } 800 return mode; 801 } 802 803 void setaddrmode(code *c, regm_t idxregs) 804 { 805 uint mode = getaddrmode(idxregs); 806 c.Irm = mode & 0xFF; 807 c.Isib = (mode >> 8) & 0xFF; 808 c.Irex &= ~REX_B; 809 c.Irex |= mode >> 16; 810 } 811 812 /********************************************** 813 */ 814 815 @trusted 816 void getlvalue_msw(code *c) 817 { 818 if (c.IFL1 == FLreg) 819 { 820 const regmsw = c.IEV1.Vsym.Sregmsw; 821 c.Irm = (c.Irm & ~7) | (regmsw & 7); 822 if (regmsw & 8) 823 c.Irex |= REX_B; 824 else 825 c.Irex &= ~REX_B; 826 } 827 else 828 c.IEV1.Voffset += REGSIZE; 829 } 830 831 /********************************************** 832 */ 833 834 @trusted 835 void getlvalue_lsw(code *c) 836 { 837 if (c.IFL1 == FLreg) 838 { 839 const reglsw = c.IEV1.Vsym.Sreglsw; 840 c.Irm = (c.Irm & ~7) | (reglsw & 7); 841 if (reglsw & 8) 842 c.Irex |= REX_B; 843 else 844 c.Irex &= ~REX_B; 845 } 846 else 847 c.IEV1.Voffset -= REGSIZE; 848 } 849 850 /****************** 851 * Compute addressing mode. 852 * Generate & return sequence of code (if any). 853 * Return in cs the info on it. 854 * Input: 855 * pcs -> where to store data about addressing mode 856 * e -> the lvalue elem 857 * keepmsk mask of registers we must not destroy or use 858 * if (keepmsk & RMstore), this will be only a store operation 859 * into the lvalue 860 * if (keepmsk & RMload), this will be a read operation only 861 */ 862 863 @trusted 864 void getlvalue(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk) 865 { 866 FL fl; 867 uint f, opsave; 868 elem* e1, e11, e12; 869 bool e1isadd, e1free; 870 reg_t reg; 871 tym_t e1ty; 872 Symbol* s; 873 874 //printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 875 //elem_print(e); 876 assert(e); 877 elem_debug(e); 878 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 879 { 880 s = e.EV.Vsym; 881 fl = s.Sfl; 882 if (tyfloating(s.ty())) 883 objmod.fltused(); 884 } 885 else 886 fl = FLoper; 887 pcs.IFL1 = cast(ubyte)fl; 888 pcs.Iflags = CFoff; /* only want offsets */ 889 pcs.Irex = 0; 890 pcs.IEV1.Voffset = 0; 891 892 tym_t ty = e.Ety; 893 uint sz = tysize(ty); 894 if (tyfloating(ty)) 895 objmod.fltused(); 896 if (I64 && (sz == 8 || sz == 16) && !tyvector(ty)) 897 pcs.Irex |= REX_W; 898 if (!I16 && sz == SHORTSIZE) 899 pcs.Iflags |= CFopsize; 900 if (ty & mTYvolatile) 901 pcs.Iflags |= CFvolatile; 902 903 switch (fl) 904 { 905 case FLoper: 906 debug 907 if (debugw) printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 908 909 switch (e.Eoper) 910 { 911 case OPadd: // this way when we want to do LEA 912 e1 = e; 913 e1free = false; 914 e1isadd = true; 915 break; 916 917 case OPind: 918 case OPpostinc: // when doing (*p++ = ...) 919 case OPpostdec: // when doing (*p-- = ...) 920 case OPbt: 921 case OPbtc: 922 case OPbtr: 923 case OPbts: 924 case OPvecfill: 925 e1 = e.EV.E1; 926 e1free = true; 927 e1isadd = e1.Eoper == OPadd; 928 break; 929 930 default: 931 printf("function: %s\n", funcsym_p.Sident.ptr); 932 elem_print(e); 933 assert(0); 934 } 935 e1ty = tybasic(e1.Ety); 936 if (e1isadd) 937 { 938 e12 = e1.EV.E2; 939 e11 = e1.EV.E1; 940 } 941 942 /* First see if we can replace *(e+&v) with 943 * MOV idxreg,e 944 * EA = [ES:] &v+idxreg 945 */ 946 f = FLconst; 947 948 /* Is address of `s` relative to RIP ? 949 */ 950 static bool relativeToRIP(Symbol* s) 951 { 952 if (!I64) 953 return false; 954 if (config.exe == EX_WIN64) 955 return true; 956 if (config.flags3 & CFG3pie) 957 { 958 if (s.Sfl == FLtlsdata || s.ty() & mTYthread) 959 { 960 if (s.Sclass == SC.global || s.Sclass == SC.static_ || s.Sclass == SC.locstat) 961 return false; 962 } 963 return true; 964 } 965 else 966 return (config.flags3 & CFG3pic) != 0; 967 } 968 969 if (e1isadd && 970 ((e12.Eoper == OPrelconst && 971 !relativeToRIP(e12.EV.Vsym) && 972 (f = el_fl(e12)) != FLfardata 973 ) || 974 (e12.Eoper == OPconst && !I16 && !e1.Ecount && (!I64 || el_signx32(e12)))) && 975 e1.Ecount == e1.Ecomsub && 976 (!e1.Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) && 977 tysize(e11.Ety) == REGSIZE 978 ) 979 { 980 uint t; /* component of r/m field */ 981 int ss; 982 int ssi; 983 984 if (e12.Eoper == OPrelconst) 985 f = el_fl(e12); 986 /*assert(datafl[f]);*/ /* what if addr of func? */ 987 if (!I16) 988 { /* Any register can be an index register */ 989 regm_t idxregs = allregs & ~keepmsk; 990 assert(idxregs); 991 992 /* See if e1.EV.E1 can be a scaled index */ 993 ss = isscaledindex(e11); 994 if (ss) 995 { 996 /* Load index register with result of e11.EV.E1 */ 997 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 998 reg = findreg(idxregs); 999 { 1000 t = stackfl[f] ? 2 : 0; 1001 pcs.Irm = modregrm(t, 0, 4); 1002 pcs.Isib = modregrm(ss, reg & 7, 5); 1003 if (reg & 8) 1004 pcs.Irex |= REX_X; 1005 } 1006 } 1007 else if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && 1008 !e11.Ecount && 1009 e11.EV.E2.Eoper == OPconst && 1010 (ssi = ssindex(e11.Eoper, e11.EV.E2.EV.Vuns)) != 0 1011 ) 1012 { 1013 regm_t scratchm; 1014 1015 char ssflags = ssindex_array[ssi].ssflags; 1016 if (ssflags & SSFLnobp && stackfl[f]) 1017 goto L6; 1018 1019 // Load index register with result of e11.EV.E1 1020 scodelem(cdb, e11.EV.E1, &idxregs, keepmsk, true); 1021 reg = findreg(idxregs); 1022 1023 int ss1 = ssindex_array[ssi].ss1; 1024 if (ssflags & SSFLlea) 1025 { 1026 assert(!stackfl[f]); 1027 pcs.Irm = modregrm(2,0,4); 1028 pcs.Isib = modregrm(ss1, reg & 7, reg & 7); 1029 if (reg & 8) 1030 pcs.Irex |= REX_X | REX_B; 1031 } 1032 else 1033 { 1034 int rbase; 1035 reg_t r; 1036 1037 scratchm = ALLREGS & ~keepmsk; 1038 allocreg(cdb, &scratchm, &r, TYint); 1039 1040 if (ssflags & SSFLnobase1) 1041 { 1042 t = 0; 1043 rbase = 5; 1044 } 1045 else 1046 { 1047 t = 0; 1048 rbase = reg; 1049 if (rbase == BP || rbase == R13) 1050 { 1051 static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1]; 1052 1053 // IMUL r,BP,imm32 1054 cdb.genc2(0x69, modregxrmx(3, r, rbase), imm32[ss1]); 1055 goto L7; 1056 } 1057 } 1058 1059 cdb.gen2sib(LEA, modregxrm(t, r, 4), modregrm(ss1, reg & 7 ,rbase & 7)); 1060 if (reg & 8) 1061 code_orrex(cdb.last(), REX_X); 1062 if (rbase & 8) 1063 code_orrex(cdb.last(), REX_B); 1064 if (I64) 1065 code_orrex(cdb.last(), REX_W); 1066 1067 if (ssflags & SSFLnobase1) 1068 { 1069 cdb.last().IFL1 = FLconst; 1070 cdb.last().IEV1.Vuns = 0; 1071 } 1072 L7: 1073 if (ssflags & SSFLnobase) 1074 { 1075 t = stackfl[f] ? 2 : 0; 1076 rbase = 5; 1077 } 1078 else 1079 { 1080 t = 2; 1081 rbase = r; 1082 assert(rbase != BP); 1083 } 1084 pcs.Irm = modregrm(t, 0, 4); 1085 pcs.Isib = modregrm(ssindex_array[ssi].ss2, r & 7, rbase & 7); 1086 if (r & 8) 1087 pcs.Irex |= REX_X; 1088 if (rbase & 8) 1089 pcs.Irex |= REX_B; 1090 } 1091 freenode(e11.EV.E2); 1092 freenode(e11); 1093 } 1094 else 1095 { 1096 L6: 1097 /* Load index register with result of e11 */ 1098 scodelem(cdb, e11, &idxregs, keepmsk, true); 1099 setaddrmode(pcs, idxregs); 1100 if (stackfl[f]) /* if we need [EBP] too */ 1101 { 1102 uint idx = pcs.Irm & 7; 1103 if (pcs.Irex & REX_B) 1104 pcs.Irex = (pcs.Irex & ~REX_B) | REX_X; 1105 pcs.Isib = modregrm(0, idx, BP); 1106 pcs.Irm = modregrm(2, 0, 4); 1107 } 1108 } 1109 } 1110 else 1111 { 1112 regm_t idxregs = IDXREGS & ~keepmsk; /* only these can be index regs */ 1113 assert(idxregs); 1114 if (stackfl[f]) /* if stack data type */ 1115 { 1116 idxregs &= mSI | mDI; /* BX can't index off stack */ 1117 if (!idxregs) goto L1; /* index regs aren't avail */ 1118 t = 6; /* [BP+SI+disp] */ 1119 } 1120 else 1121 t = 0; /* [SI + disp] */ 1122 scodelem(cdb, e11, &idxregs, keepmsk, true); // load idx reg 1123 pcs.Irm = cast(ubyte)(getaddrmode(idxregs) ^ t); 1124 } 1125 if (f == FLpara) 1126 refparam = true; 1127 else if (f == FLauto || f == FLbprel || f == FLfltreg || f == FLfast) 1128 reflocal = true; 1129 else if (f == FLcsdata || tybasic(e12.Ety) == TYcptr) 1130 pcs.Iflags |= CFcs; 1131 else 1132 assert(f != FLreg); 1133 pcs.IFL1 = cast(ubyte)f; 1134 if (f != FLconst) 1135 pcs.IEV1.Vsym = e12.EV.Vsym; 1136 pcs.IEV1.Voffset = e12.EV.Voffset; /* += ??? */ 1137 1138 /* If e1 is a CSE, we must generate an addressing mode */ 1139 /* but also leave EA in registers so others can use it */ 1140 if (e1.Ecount) 1141 { 1142 uint flagsave; 1143 1144 regm_t idxregs = IDXREGS & ~keepmsk; 1145 allocreg(cdb, &idxregs, ®, TYoffset); 1146 1147 /* If desired result is a far pointer, we'll have */ 1148 /* to load another register with the segment of v */ 1149 if (e1ty == TYfptr) 1150 { 1151 reg_t msreg; 1152 1153 idxregs |= mMSW & ALLREGS & ~keepmsk; 1154 allocreg(cdb, &idxregs, &msreg, TYfptr); 1155 msreg = findregmsw(idxregs); 1156 /* MOV msreg,segreg */ 1157 genregs(cdb, 0x8C, segfl[f], msreg); 1158 } 1159 opsave = pcs.Iop; 1160 flagsave = pcs.Iflags; 1161 ubyte rexsave = pcs.Irex; 1162 pcs.Iop = LEA; 1163 code_newreg(pcs, reg); 1164 if (!I16) 1165 pcs.Iflags &= ~CFopsize; 1166 if (I64) 1167 pcs.Irex |= REX_W; 1168 cdb.gen(pcs); // LEA idxreg,EA 1169 cssave(e1,idxregs,true); 1170 if (!I16) 1171 { 1172 pcs.Iflags = flagsave; 1173 pcs.Irex = rexsave; 1174 } 1175 if (stackfl[f] && (config.wflags & WFssneds)) // if pointer into stack 1176 pcs.Iflags |= CFss; // add SS: override 1177 pcs.Iop = opsave; 1178 pcs.IFL1 = FLoffset; 1179 pcs.IEV1.Vuns = 0; 1180 setaddrmode(pcs, idxregs); 1181 } 1182 freenode(e12); 1183 if (e1free) 1184 freenode(e1); 1185 goto Lptr; 1186 } 1187 1188 L1: 1189 1190 /* The rest of the cases could be a far pointer */ 1191 1192 regm_t idxregs; 1193 idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs 1194 assert(idxregs); 1195 if (!I16 && 1196 (sz == REGSIZE || (I64 && sz == 4)) && 1197 keepmsk & RMstore) 1198 idxregs |= regcon.mvar; 1199 1200 switch (e1ty) 1201 { 1202 case TYfptr: /* if far pointer */ 1203 case TYhptr: 1204 idxregs = (mES | IDXREGS) & ~keepmsk; // need segment too 1205 assert(idxregs & mES); 1206 pcs.Iflags |= CFes; /* ES segment override */ 1207 break; 1208 1209 case TYsptr: /* if pointer to stack */ 1210 if (config.wflags & WFssneds) // if SS != DS 1211 pcs.Iflags |= CFss; /* then need SS: override */ 1212 break; 1213 1214 case TYfgPtr: 1215 if (I32) 1216 pcs.Iflags |= CFgs; 1217 else if (I64) 1218 pcs.Iflags |= CFfs; 1219 else 1220 assert(0); 1221 break; 1222 1223 case TYcptr: /* if pointer to code */ 1224 pcs.Iflags |= CFcs; /* then need CS: override */ 1225 break; 1226 1227 default: 1228 break; 1229 } 1230 pcs.IFL1 = FLoffset; 1231 pcs.IEV1.Vuns = 0; 1232 1233 /* see if we can replace *(e+c) with 1234 * MOV idxreg,e 1235 * [MOV ES,segment] 1236 * EA = [ES:]c[idxreg] 1237 */ 1238 if (e1isadd && e12.Eoper == OPconst && 1239 (!I64 || el_signx32(e12)) && 1240 (tysize(e12.Ety) == REGSIZE || (I64 && tysize(e12.Ety) == 4)) && 1241 (!e1.Ecount || !e1free) 1242 ) 1243 { 1244 int ss; 1245 1246 pcs.IEV1.Vuns = e12.EV.Vuns; 1247 freenode(e12); 1248 if (e1free) freenode(e1); 1249 if (!I16 && e11.Eoper == OPadd && !e11.Ecount && 1250 tysize(e11.Ety) == REGSIZE) 1251 { 1252 e12 = e11.EV.E2; 1253 e11 = e11.EV.E1; 1254 e1 = e1.EV.E1; 1255 e1free = true; 1256 goto L4; 1257 } 1258 if (!I16 && (ss = isscaledindex(e11)) != 0) 1259 { // (v * scale) + const 1260 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 1261 reg = findreg(idxregs); 1262 pcs.Irm = modregrm(0, 0, 4); 1263 pcs.Isib = modregrm(ss, reg & 7, 5); 1264 if (reg & 8) 1265 pcs.Irex |= REX_X; 1266 } 1267 else 1268 { 1269 scodelem(cdb, e11, &idxregs, keepmsk, true); // load index reg 1270 setaddrmode(pcs, idxregs); 1271 } 1272 goto Lptr; 1273 } 1274 1275 /* Look for *(v1 + v2) 1276 * EA = [v1][v2] 1277 */ 1278 1279 if (!I16 && e1isadd && (!e1.Ecount || !e1free) && 1280 (_tysize[e1ty] == REGSIZE || (I64 && _tysize[e1ty] == 4))) 1281 { 1282 L4: 1283 regm_t idxregs2; 1284 uint base, index; 1285 1286 // Look for *(v1 + v2 << scale) 1287 int ss = isscaledindex(e12); 1288 if (ss) 1289 { 1290 scodelem(cdb, e11, &idxregs, keepmsk, true); 1291 idxregs2 = allregs & ~(idxregs | keepmsk); 1292 cdisscaledindex(cdb, e12, &idxregs2, keepmsk | idxregs); 1293 } 1294 1295 // Look for *(v1 << scale + v2) 1296 else if ((ss = isscaledindex(e11)) != 0) 1297 { 1298 idxregs2 = idxregs; 1299 cdisscaledindex(cdb, e11, &idxregs2, keepmsk); 1300 idxregs = allregs & ~(idxregs2 | keepmsk); 1301 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1302 } 1303 // Look for *(((v1 << scale) + c1) + v2) 1304 else if (e11.Eoper == OPadd && !e11.Ecount && 1305 e11.EV.E2.Eoper == OPconst && 1306 (ss = isscaledindex(e11.EV.E1)) != 0 1307 ) 1308 { 1309 pcs.IEV1.Vuns = e11.EV.E2.EV.Vuns; 1310 idxregs2 = idxregs; 1311 cdisscaledindex(cdb, e11.EV.E1, &idxregs2, keepmsk); 1312 idxregs = allregs & ~(idxregs2 | keepmsk); 1313 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1314 freenode(e11.EV.E2); 1315 freenode(e11); 1316 } 1317 else 1318 { 1319 scodelem(cdb, e11, &idxregs, keepmsk, true); 1320 idxregs2 = allregs & ~(idxregs | keepmsk); 1321 scodelem(cdb, e12, &idxregs2, keepmsk | idxregs, true); 1322 } 1323 base = findreg(idxregs); 1324 index = findreg(idxregs2); 1325 pcs.Irm = modregrm(2, 0, 4); 1326 pcs.Isib = modregrm(ss, index & 7, base & 7); 1327 if (index & 8) 1328 pcs.Irex |= REX_X; 1329 if (base & 8) 1330 pcs.Irex |= REX_B; 1331 if (e1free) 1332 freenode(e1); 1333 1334 goto Lptr; 1335 } 1336 1337 /* give up and replace *e1 with 1338 * MOV idxreg,e 1339 * EA = 0[idxreg] 1340 * pinholeopt() will usually correct the 0, we need it in case 1341 * we have a pointer to a long and need an offset to the second 1342 * word. 1343 */ 1344 1345 assert(e1free); 1346 scodelem(cdb, e1, &idxregs, keepmsk, true); // load index register 1347 setaddrmode(pcs, idxregs); 1348 Lptr: 1349 if (config.flags3 & CFG3ptrchk) 1350 cod3_ptrchk(cdb, pcs, keepmsk); // validate pointer code 1351 break; 1352 1353 case FLdatseg: 1354 assert(0); 1355 static if (0) 1356 { 1357 pcs.Irm = modregrm(0, 0, BPRM); 1358 pcs.IEVpointer1 = e.EVpointer; 1359 break; 1360 } 1361 1362 case FLfltreg: 1363 reflocal = true; 1364 pcs.Irm = modregrm(2, 0, BPRM); 1365 pcs.IEV1.Vint = 0; 1366 break; 1367 1368 case FLreg: 1369 goto L2; 1370 1371 case FLpara: 1372 if (s.Sclass == SC.shadowreg) 1373 goto case FLfast; 1374 Lpara: 1375 refparam = true; 1376 pcs.Irm = modregrm(2, 0, BPRM); 1377 goto L2; 1378 1379 case FLauto: 1380 case FLfast: 1381 if (regParamInPreg(s)) 1382 { 1383 regm_t pregm = s.Spregm(); 1384 /* See if the parameter is still hanging about in a register, 1385 * and so can we load from that register instead. 1386 */ 1387 if (regcon.params & pregm /*&& s.Spreg2 == NOREG && !(pregm & XMMREGS)*/) 1388 { 1389 if (keepmsk & RMload && !anyiasm) 1390 { 1391 auto voffset = e.EV.Voffset; 1392 if (sz <= REGSIZE) 1393 { 1394 const reg_t preg = (voffset >= REGSIZE) ? s.Spreg2 : s.Spreg; 1395 if (voffset >= REGSIZE) 1396 voffset -= REGSIZE; 1397 1398 /* preg could be NOREG if it's a variadic function and we're 1399 * in Win64 shadow regs and we're offsetting to get to the start 1400 * of the variadic args. 1401 */ 1402 if (preg != NOREG && regcon.params & mask(preg)) 1403 { 1404 //printf("sz %d, preg %s, Voffset %d\n", cast(int)sz, regm_str(mask(preg)), cast(int)voffset); 1405 if (mask(preg) & XMMREGS) 1406 { 1407 /* The following fails with this from std.math on Linux64: 1408 void main() 1409 { 1410 alias T = float; 1411 T x = T.infinity; 1412 T e = T.infinity; 1413 int eptr; 1414 T v = frexp(x, eptr); 1415 assert(isIdentical(e, v)); 1416 } 1417 */ 1418 } 1419 else if (voffset == 0) 1420 { 1421 pcs.Irm = modregrm(3, 0, preg & 7); 1422 if (preg & 8) 1423 pcs.Irex |= REX_B; 1424 if (I64 && sz == 1 && preg >= 4) 1425 pcs.Irex |= REX; 1426 regcon.used |= mask(preg); 1427 break; 1428 } 1429 else if (voffset == 1 && sz == 1 && preg < 4) 1430 { 1431 pcs.Irm = modregrm(3, 0, 4 | preg); // use H register 1432 regcon.used |= mask(preg); 1433 break; 1434 } 1435 } 1436 } 1437 } 1438 else 1439 regcon.params &= ~pregm; 1440 } 1441 } 1442 if (s.Sclass == SC.shadowreg) 1443 goto Lpara; 1444 goto case FLbprel; 1445 1446 case FLbprel: 1447 reflocal = true; 1448 pcs.Irm = modregrm(2, 0, BPRM); 1449 goto L2; 1450 1451 case FLextern: 1452 if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) 1453 { 1454 if (config.exe & EX_windos) 1455 { 1456 if (I64) 1457 { // GS:[88] 1458 pcs.Irm = modregrm(0, 0, 4); 1459 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1460 pcs.IFL1 = FLconst; 1461 pcs.IEV1.Vuns = 88; 1462 pcs.Iflags = CFgs; 1463 pcs.Irex |= REX_W; 1464 break; 1465 } 1466 else 1467 { 1468 pcs.Iflags |= CFfs; // add FS: override 1469 } 1470 } 1471 else if (config.exe & (EX_OSX | EX_OSX64)) 1472 { 1473 } 1474 else if (config.exe & EX_posix) 1475 assert(0); 1476 } 1477 if (s.ty() & mTYcs && cast(bool) LARGECODE) 1478 goto Lfardata; 1479 goto L3; 1480 1481 case FLtlsdata: 1482 if (config.exe & EX_posix) 1483 goto L3; 1484 assert(0); 1485 1486 case FLdata: 1487 case FLudata: 1488 case FLcsdata: 1489 case FLgot: 1490 case FLgotoff: 1491 L3: 1492 pcs.Irm = modregrm(0, 0, BPRM); 1493 L2: 1494 if (fl == FLreg) 1495 { 1496 //printf("test: FLreg, %s %d regcon.mvar = %s\n", 1497 // s.Sident.ptr, cast(int)e.EV.Voffset, regm_str(regcon.mvar)); 1498 if (!(s.Sregm & regcon.mvar)) 1499 symbol_print(s); 1500 assert(s.Sregm & regcon.mvar); 1501 1502 /* Attempting to paint a float as an integer or an integer as a float 1503 * will cause serious problems since the EA is loaded separatedly from 1504 * the opcode. The only way to deal with this is to prevent enregistering 1505 * such variables. 1506 */ 1507 if (tyxmmreg(ty) && !(s.Sregm & XMMREGS) || 1508 !tyxmmreg(ty) && (s.Sregm & XMMREGS)) 1509 cgreg_unregister(s.Sregm); 1510 1511 if ( 1512 s.Sclass == SC.regpar || 1513 s.Sclass == SC.parameter) 1514 { refparam = true; 1515 reflocal = true; // kludge to set up prolog 1516 } 1517 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1518 if (s.Sreglsw & 8) 1519 pcs.Irex |= REX_B; 1520 if (e.EV.Voffset == REGSIZE && sz == REGSIZE) 1521 { 1522 pcs.Irm = modregrm(3, 0, s.Sregmsw & 7); 1523 if (s.Sregmsw & 8) 1524 pcs.Irex |= REX_B; 1525 else 1526 pcs.Irex &= ~REX_B; 1527 } 1528 else if (e.EV.Voffset == 1 && sz == 1) 1529 { 1530 assert(s.Sregm & BYTEREGS); 1531 assert(s.Sreglsw < 4); 1532 pcs.Irm |= 4; // use 2nd byte of register 1533 } 1534 else 1535 { 1536 assert(!e.EV.Voffset); 1537 if (I64 && sz == 1 && s.Sreglsw >= 4) 1538 pcs.Irex |= REX; 1539 } 1540 } 1541 else if (s.ty() & mTYcs && !(fl == FLextern && LARGECODE)) 1542 { 1543 pcs.Iflags |= CFcs | CFoff; 1544 } 1545 if (config.flags3 & CFG3pic && 1546 (fl == FLtlsdata || s.ty() & mTYthread)) 1547 { 1548 if (I32) 1549 { 1550 if (config.flags3 & CFG3pie) 1551 { 1552 pcs.Iflags |= CFgs; 1553 } 1554 } 1555 else if (I64) 1556 { 1557 if (config.flags3 & CFG3pie && 1558 (s.Sclass == SC.global || s.Sclass == SC.static_ || s.Sclass == SC.locstat)) 1559 { 1560 pcs.Iflags |= CFfs; 1561 pcs.Irm = modregrm(0, 0, 4); 1562 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1563 } 1564 else 1565 { 1566 //pcs.Iflags |= CFopsize; //I don't know what this was for 1567 pcs.Irex = 0x48; 1568 } 1569 } 1570 } 1571 pcs.IEV1.Vsym = s; 1572 pcs.IEV1.Voffset = e.EV.Voffset; 1573 if (sz == 1) 1574 { /* Don't use SI or DI for this variable */ 1575 s.Sflags |= GTbyte; 1576 if (I64 ? e.EV.Voffset > 0 : e.EV.Voffset > 1) 1577 { 1578 debug if (debugr) printf("'%s' not reg cand due to byte offset\n", s.Sident.ptr); 1579 s.Sflags &= ~GTregcand; 1580 } 1581 } 1582 else if (sz == 2 && tyxmmreg(s.ty()) && config.fpxmmregs) 1583 { 1584 debug if (debugr) printf("'%s' not XMM reg cand due to short access\n", s.Sident.ptr); 1585 s.Sflags &= ~GTregcand; 1586 } 1587 else if (e.EV.Voffset || sz > tysize(s.Stype.Tty)) 1588 { 1589 debug if (debugr) printf("'%s' not reg cand due to offset or size\n", s.Sident.ptr); 1590 s.Sflags &= ~GTregcand; 1591 } 1592 else if (tyvector(s.Stype.Tty) && sz < tysize(s.Stype.Tty)) 1593 { 1594 // https://issues.dlang.org/show_bug.cgi?id=21673 1595 // https://issues.dlang.org/show_bug.cgi?id=21676 1596 // https://issues.dlang.org/show_bug.cgi?id=23009 1597 // PR: https://github.com/dlang/dmd/pull/13977 1598 // cannot read or write to partial vector 1599 debug if (debugr) printf("'%s' not reg cand due to vector type\n", s.Sident.ptr); 1600 s.Sflags &= ~GTregcand; 1601 } 1602 1603 if (config.fpxmmregs && tyfloating(s.ty()) && !tyfloating(ty)) 1604 { 1605 debug if (debugr) printf("'%s' not reg cand due to mix float and int\n", s.Sident.ptr); 1606 // Can't successfully mix XMM register variables accessed as integers 1607 s.Sflags &= ~GTregcand; 1608 } 1609 1610 if (!(keepmsk & RMstore)) // if not store only 1611 s.Sflags |= SFLread; // assume we are doing a read 1612 break; 1613 1614 case FLpseudo: 1615 { 1616 getregs(cdb, mask(s.Sreglsw)); 1617 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1618 if (s.Sreglsw & 8) 1619 pcs.Irex |= REX_B; 1620 if (e.EV.Voffset == 1 && sz == 1) 1621 { assert(s.Sregm & BYTEREGS); 1622 assert(s.Sreglsw < 4); 1623 pcs.Irm |= 4; // use 2nd byte of register 1624 } 1625 else 1626 { assert(!e.EV.Voffset); 1627 if (I64 && sz == 1 && s.Sreglsw >= 4) 1628 pcs.Irex |= REX; 1629 } 1630 break; 1631 } 1632 1633 case FLfardata: 1634 case FLfunc: /* reading from code seg */ 1635 if (config.exe & EX_flat) 1636 goto L3; 1637 Lfardata: 1638 { 1639 regm_t regm = ALLREGS & ~keepmsk; // need scratch register 1640 allocreg(cdb, ®m, ®, TYint); 1641 getregs(cdb,mES); 1642 // MOV mreg,seg of symbol 1643 cdb.gencs(0xB8 + reg, 0, FLextern, s); 1644 cdb.last().Iflags = CFseg; 1645 cdb.gen2(0x8E, modregrmx(3, 0, reg)); // MOV ES,reg 1646 pcs.Iflags |= CFes | CFoff; /* ES segment override */ 1647 goto L3; 1648 } 1649 1650 case FLstack: 1651 assert(!I16); 1652 pcs.Irm = modregrm(2, 0, 4); 1653 pcs.Isib = modregrm(0, 4, SP); 1654 pcs.IEV1.Vsym = s; 1655 pcs.IEV1.Voffset = e.EV.Voffset; 1656 break; 1657 1658 default: 1659 WRFL(fl); 1660 symbol_print(s); 1661 assert(0); 1662 } 1663 } 1664 1665 /***************************** 1666 * Given an opcode and EA in cs, generate code 1667 * for each floating register in turn. 1668 * Input: 1669 * tym either TYdouble or TYfloat 1670 */ 1671 1672 @trusted 1673 void fltregs(ref CodeBuilder cdb, code* pcs, tym_t tym) 1674 { 1675 assert(!I64); 1676 tym = tybasic(tym); 1677 if (I32) 1678 { 1679 getregs(cdb,(tym == TYfloat) ? mAX : mAX | mDX); 1680 if (tym != TYfloat) 1681 { 1682 pcs.IEV1.Voffset += REGSIZE; 1683 NEWREG(pcs.Irm,DX); 1684 cdb.gen(pcs); 1685 pcs.IEV1.Voffset -= REGSIZE; 1686 } 1687 NEWREG(pcs.Irm,AX); 1688 cdb.gen(pcs); 1689 } 1690 else 1691 { 1692 getregs(cdb,(tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16); 1693 pcs.IEV1.Voffset += (tym == TYfloat) ? 2 : 6; 1694 if (tym == TYfloat) 1695 NEWREG(pcs.Irm, DX); 1696 else 1697 NEWREG(pcs.Irm, AX); 1698 cdb.gen(pcs); 1699 pcs.IEV1.Voffset -= 2; 1700 if (tym == TYfloat) 1701 NEWREG(pcs.Irm, AX); 1702 else 1703 NEWREG(pcs.Irm, BX); 1704 cdb.gen(pcs); 1705 if (tym != TYfloat) 1706 { 1707 pcs.IEV1.Voffset -= 2; 1708 NEWREG(pcs.Irm, CX); 1709 cdb.gen(pcs); 1710 pcs.IEV1.Voffset -= 2; /* note that exit is with Voffset unaltered */ 1711 NEWREG(pcs.Irm, DX); 1712 cdb.gen(pcs); 1713 } 1714 } 1715 } 1716 1717 1718 /***************************** 1719 * Given a result in registers, test it for true or false. 1720 * Will fail if TYfptr and the reg is ES! 1721 * If saveflag is true, preserve the contents of the 1722 * registers. 1723 */ 1724 1725 @trusted 1726 void tstresult(ref CodeBuilder cdb, regm_t regm, tym_t tym, uint saveflag) 1727 { 1728 reg_t scrreg; // scratch register 1729 regm_t scrregm; 1730 1731 //if (!(regm & (mBP | ALLREGS))) 1732 //printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n", 1733 //regm_str(regm),tym,saveflag); 1734 1735 assert(regm & (XMMREGS | mBP | ALLREGS)); 1736 tym = tybasic(tym); 1737 reg_t reg = findreg(regm); 1738 uint sz = _tysize[tym]; 1739 if (sz == 1) 1740 { 1741 assert(regm & BYTEREGS); 1742 genregs(cdb, 0x84, reg, reg); // TEST regL,regL 1743 if (I64 && reg >= 4) 1744 code_orrex(cdb.last(), REX); 1745 return; 1746 } 1747 if (regm & XMMREGS) 1748 { 1749 reg_t xreg; 1750 regm_t xregs = XMMREGS & ~regm; 1751 allocreg(cdb,&xregs, &xreg, TYdouble); 1752 opcode_t op = 0; 1753 if (tym == TYdouble || tym == TYidouble || tym == TYcdouble) 1754 op = 0x660000; 1755 cdb.gen2(op | XORPS, modregrm(3, xreg-XMM0, xreg-XMM0)); // XORPS xreg,xreg 1756 cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1757 if (tym == TYcfloat || tym == TYcdouble) 1758 { code *cnop = gennop(null); 1759 genjmp(cdb, JNE, FLcode, cast(block *) cnop); // JNE L1 1760 genjmp(cdb, JP, FLcode, cast(block *) cnop); // JP L1 1761 reg = findreg(regm & ~mask(reg)); 1762 cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1763 cdb.append(cnop); 1764 } 1765 return; 1766 } 1767 if (sz <= REGSIZE) 1768 { 1769 if (!I16) 1770 { 1771 if (tym == TYfloat) 1772 { 1773 if (saveflag) 1774 { 1775 scrregm = allregs & ~regm; // possible scratch regs 1776 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1777 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1778 reg = scrreg; 1779 } 1780 getregs(cdb, mask(reg)); 1781 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 1782 return; 1783 } 1784 gentstreg(cdb,reg); // TEST reg,reg 1785 if (sz == SHORTSIZE) 1786 cdb.last().Iflags |= CFopsize; // 16 bit operands 1787 else if (sz == 8) 1788 code_orrex(cdb.last(), REX_W); 1789 } 1790 else 1791 gentstreg(cdb, reg); // TEST reg,reg 1792 return; 1793 } 1794 1795 if (saveflag || tyfv(tym)) 1796 { 1797 L1: 1798 scrregm = ALLREGS & ~regm; // possible scratch regs 1799 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1800 if (I32 || sz == REGSIZE * 2) 1801 { 1802 assert(regm & mMSW && regm & mLSW); 1803 1804 reg = findregmsw(regm); 1805 if (I32) 1806 { 1807 if (tyfv(tym)) 1808 genregs(cdb, MOVZXw, scrreg, reg); // MOVZX scrreg,msreg 1809 else 1810 { 1811 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1812 if (tym == TYdouble || tym == TYdouble_alias) 1813 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1814 } 1815 } 1816 else 1817 { 1818 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1819 if (tym == TYfloat) 1820 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1821 } 1822 reg = findreglsw(regm); 1823 genorreg(cdb, scrreg, reg); // OR scrreg,lsreg 1824 } 1825 else if (sz == 8) 1826 { 1827 // !I32 1828 genmovreg(cdb, scrreg, AX); // MOV scrreg,AX 1829 if (tym == TYdouble || tym == TYdouble_alias) 1830 cdb.gen2(0xD1 ,modregrm(3, 4, scrreg)); // SHL scrreg,1 1831 genorreg(cdb, scrreg, BX); // OR scrreg,BX 1832 genorreg(cdb, scrreg, CX); // OR scrreg,CX 1833 genorreg(cdb, scrreg, DX); // OR scrreg,DX 1834 } 1835 else 1836 assert(0); 1837 } 1838 else 1839 { 1840 if (I32 || sz == REGSIZE * 2) 1841 { 1842 // can't test ES:LSW for 0 1843 assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP)); 1844 1845 reg = findregmsw(regm); 1846 if (regcon.mvar & mask(reg)) // if register variable 1847 goto L1; // don't trash it 1848 getregs(cdb, mask(reg)); // we're going to trash reg 1849 if (tyfloating(tym) && sz == 2 * _tysize[TYint]) 1850 cdb.gen2(0xD1, modregrm(3 ,4, reg)); // SHL reg,1 1851 genorreg(cdb, reg, findreglsw(regm)); // OR reg,reg+1 1852 if (I64) 1853 code_orrex(cdb.last(), REX_W); 1854 } 1855 else if (sz == 8) 1856 { assert(regm == DOUBLEREGS_16); 1857 getregs(cdb,mAX); // allocate AX 1858 if (tym == TYdouble || tym == TYdouble_alias) 1859 cdb.gen2(0xD1, modregrm(3, 4, AX)); // SHL AX,1 1860 genorreg(cdb, AX, BX); // OR AX,BX 1861 genorreg(cdb, AX, CX); // OR AX,CX 1862 genorreg(cdb, AX, DX); // OR AX,DX 1863 } 1864 else 1865 assert(0); 1866 } 1867 code_orflag(cdb.last(),CFpsw); 1868 } 1869 1870 /****************************** 1871 * Given the result of an expression is in retregs, 1872 * generate necessary code to return result in *pretregs. 1873 */ 1874 1875 @trusted 1876 void fixresult(ref CodeBuilder cdb, elem *e, regm_t retregs, regm_t *pretregs) 1877 { 1878 //printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs)); 1879 if (*pretregs == 0) return; // if don't want result 1880 assert(e && retregs); // need something to work with 1881 regm_t forccs = *pretregs & mPSW; 1882 regm_t forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS); 1883 tym_t tym = tybasic(e.Ety); 1884 1885 if (tym == TYstruct) 1886 { 1887 if (e.Eoper == OPpair || e.Eoper == OPrpair) 1888 { 1889 if (I64) 1890 tym = TYucent; 1891 else 1892 tym = TYullong; 1893 } 1894 else 1895 // Hack to support cdstreq() 1896 tym = (forregs & mMSW) ? TYfptr : TYnptr; 1897 } 1898 int sz = _tysize[tym]; 1899 1900 if (sz == 1) 1901 { 1902 assert(retregs & BYTEREGS); 1903 const reg = findreg(retregs); 1904 if (e.Eoper == OPvar && 1905 e.EV.Voffset == 1 && 1906 e.EV.Vsym.Sfl == FLreg) 1907 { 1908 assert(reg < 4); 1909 if (forccs) 1910 cdb.gen2(0x84, modregrm(3, reg | 4, reg | 4)); // TEST regH,regH 1911 forccs = 0; 1912 } 1913 } 1914 1915 reg_t reg,rreg; 1916 if ((retregs & forregs) == retregs) // if already in right registers 1917 *pretregs = retregs; 1918 else if (forregs) // if return the result in registers 1919 { 1920 if ((forregs | retregs) & (mST01 | mST0)) 1921 { 1922 fixresult87(cdb, e, retregs, pretregs); 1923 return; 1924 } 1925 uint opsflag = false; 1926 if (I16 && sz == 8) 1927 { 1928 if (forregs & mSTACK) 1929 { 1930 assert(retregs == DOUBLEREGS_16); 1931 // Push floating regs 1932 cdb.gen1(0x50 + AX); 1933 cdb.gen1(0x50 + BX); 1934 cdb.gen1(0x50 + CX); 1935 cdb.gen1(0x50 + DX); 1936 stackpush += DOUBLESIZE; 1937 } 1938 else if (retregs & mSTACK) 1939 { 1940 assert(forregs == DOUBLEREGS_16); 1941 // Pop floating regs 1942 getregs(cdb,forregs); 1943 cdb.gen1(0x58 + DX); 1944 cdb.gen1(0x58 + CX); 1945 cdb.gen1(0x58 + BX); 1946 cdb.gen1(0x58 + AX); 1947 stackpush -= DOUBLESIZE; 1948 retregs = DOUBLEREGS_16; // for tstresult() below 1949 } 1950 else 1951 { 1952 debug 1953 printf("retregs = %s, forregs = %s\n", regm_str(retregs), regm_str(forregs)), 1954 assert(0); 1955 } 1956 if (!OTleaf(e.Eoper)) 1957 opsflag = true; 1958 } 1959 else 1960 { 1961 allocreg(cdb, pretregs, &rreg, tym); // allocate return regs 1962 if (retregs & XMMREGS) 1963 { 1964 reg = findreg(retregs & XMMREGS); 1965 if (mask(rreg) & XMMREGS) 1966 genmovreg(cdb, rreg, reg, tym); 1967 else 1968 { 1969 // MOVSD floatreg, XMM? 1970 cdb.genxmmreg(xmmstore(tym), reg, 0, tym); 1971 // MOV rreg,floatreg 1972 cdb.genfltreg(0x8B,rreg,0); 1973 if (sz == 8) 1974 { 1975 if (I32) 1976 { 1977 rreg = findregmsw(*pretregs); 1978 cdb.genfltreg(0x8B, rreg,4); 1979 } 1980 else 1981 code_orrex(cdb.last(),REX_W); 1982 } 1983 } 1984 } 1985 else if (forregs & XMMREGS) 1986 { 1987 reg = findreg(retregs & (mBP | ALLREGS)); 1988 switch (sz) 1989 { 1990 case 4: 1991 cdb.gen2(LODD, modregxrmx(3, rreg - XMM0, reg)); // MOVD xmm,reg 1992 break; 1993 1994 case 8: 1995 if (I32) 1996 { 1997 cdb.genfltreg(0x89, reg, 0); 1998 reg = findregmsw(retregs); 1999 cdb.genfltreg(0x89, reg, 4); 2000 cdb.genxmmreg(xmmload(tym), rreg, 0, tym); // MOVQ xmm,mem 2001 } 2002 else 2003 { 2004 cdb.gen2(LODD /* [sic!] */, modregxrmx(3, rreg - XMM0, reg)); 2005 code_orrex(cdb.last(), REX_W); // MOVQ xmm,reg 2006 } 2007 break; 2008 2009 default: 2010 assert(false); 2011 } 2012 checkSetVex(cdb.last(), tym); 2013 } 2014 else if (sz > REGSIZE) 2015 { 2016 uint msreg = findregmsw(retregs); 2017 uint lsreg = findreglsw(retregs); 2018 uint msrreg = findregmsw(*pretregs); 2019 uint lsrreg = findreglsw(*pretregs); 2020 2021 genmovreg(cdb, msrreg, msreg); // MOV msrreg,msreg 2022 genmovreg(cdb, lsrreg, lsreg); // MOV lsrreg,lsreg 2023 } 2024 else 2025 { 2026 assert(!(retregs & XMMREGS)); 2027 assert(!(forregs & XMMREGS)); 2028 reg = findreg(retregs & (mBP | ALLREGS)); 2029 if (I64 && sz <= 4) 2030 genregs(cdb, 0x89, reg, rreg); // only move 32 bits, and zero the top 32 bits 2031 else 2032 genmovreg(cdb, rreg, reg); // MOV rreg,reg 2033 } 2034 } 2035 cssave(e,retregs | *pretregs,opsflag); 2036 // Commented out due to Bugzilla 8840 2037 //forregs = 0; // don't care about result in reg cuz real result is in rreg 2038 retregs = *pretregs & ~mPSW; 2039 } 2040 if (forccs) // if return result in flags 2041 { 2042 if (retregs & (mST01 | mST0)) 2043 { 2044 *pretregs |= forccs; 2045 fixresult87(cdb, e, retregs, pretregs); 2046 } 2047 else 2048 tstresult(cdb, retregs, tym, forregs); 2049 } 2050 } 2051 2052 /******************************* 2053 * Extra information about each CLIB runtime library function. 2054 */ 2055 2056 enum 2057 { 2058 INF32 = 1, /// if 32 bit only 2059 INFfloat = 2, /// if this is floating point 2060 INFwkdone = 4, /// if weak extern is already done 2061 INF64 = 8, /// if 64 bit only 2062 INFpushebx = 0x10, /// push EBX before load_localgot() 2063 INFpusheabcdx = 0x20, /// pass EAX/EBX/ECX/EDX on stack, callee does ret 16 2064 } 2065 2066 struct ClibInfo 2067 { 2068 regm_t retregs16; /* registers that 16 bit result is returned in */ 2069 regm_t retregs32; /* registers that 32 bit result is returned in */ 2070 ubyte pop; // # of bytes popped off of stack upon return 2071 ubyte flags; /// INFxxx 2072 byte push87; // # of pushes onto the 8087 stack 2073 byte pop87; // # of pops off of the 8087 stack 2074 } 2075 2076 __gshared int clib_inited = false; // true if initialized 2077 2078 @trusted 2079 Symbol* symboly(const(char)* name, regm_t desregs) 2080 { 2081 Symbol *s = symbol_calloc(name[0 .. strlen(name)]); 2082 s.Stype = tsclib; 2083 s.Sclass = SC.extern_; 2084 s.Sfl = FLfunc; 2085 s.Ssymnum = 0; 2086 s.Sregsaved = ~desregs & (mBP | mES | ALLREGS); 2087 return s; 2088 } 2089 2090 @trusted 2091 void getClibInfo(uint clib, Symbol** ps, ClibInfo** pinfo) 2092 { 2093 __gshared Symbol*[CLIB.MAX] clibsyms; 2094 __gshared ClibInfo[CLIB.MAX] clibinfo; 2095 2096 if (!clib_inited) 2097 { 2098 for (size_t i = 0; i < CLIB.MAX; ++i) 2099 { 2100 Symbol* s = clibsyms[i]; 2101 if (s) 2102 { 2103 s.Sxtrnnum = 0; 2104 s.Stypidx = 0; 2105 clibinfo[i].flags &= ~INFwkdone; 2106 } 2107 } 2108 clib_inited = true; 2109 } 2110 2111 const uint ex_unix = (EX_LINUX | EX_LINUX64 | 2112 EX_OSX | EX_OSX64 | 2113 EX_FREEBSD | EX_FREEBSD64 | 2114 EX_OPENBSD | EX_OPENBSD64 | 2115 EX_DRAGONFLYBSD64 | 2116 EX_SOLARIS | EX_SOLARIS64); 2117 2118 ClibInfo* cinfo = &clibinfo[clib]; 2119 Symbol* s = clibsyms[clib]; 2120 if (!s) 2121 { 2122 2123 switch (clib) 2124 { 2125 case CLIB.lcmp: 2126 { 2127 const(char)* name = (config.exe & ex_unix) ? "__LCMP__" : "_LCMP@"; 2128 s = symboly(name, 0); 2129 } 2130 break; 2131 2132 case CLIB.lmul: 2133 { 2134 const(char)* name = (config.exe & ex_unix) ? "__LMUL__" : "_LMUL@"; 2135 s = symboly(name, mAX|mCX|mDX); 2136 cinfo.retregs16 = mDX|mAX; 2137 cinfo.retregs32 = mDX|mAX; 2138 } 2139 break; 2140 2141 case CLIB.ldiv: 2142 cinfo.retregs16 = mDX|mAX; 2143 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD)) 2144 { 2145 s = symboly("__divdi3", mAX|mBX|mCX|mDX); 2146 cinfo.flags = INFpushebx; 2147 cinfo.retregs32 = mDX|mAX; 2148 } 2149 else if (config.exe & EX_SOLARIS) 2150 { 2151 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2152 cinfo.flags = INFpushebx; 2153 cinfo.retregs32 = mDX|mAX; 2154 } 2155 else if (I32 && config.objfmt == OBJ_MSCOFF) 2156 { 2157 s = symboly("_alldiv", mAX|mBX|mCX|mDX); 2158 cinfo.flags = INFpusheabcdx; 2159 cinfo.retregs32 = mDX|mAX; 2160 } 2161 else 2162 { 2163 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2164 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2165 cinfo.retregs32 = mDX|mAX; 2166 } 2167 break; 2168 2169 case CLIB.lmod: 2170 cinfo.retregs16 = mCX|mBX; 2171 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD)) 2172 { 2173 s = symboly("__moddi3", mAX|mBX|mCX|mDX); 2174 cinfo.flags = INFpushebx; 2175 cinfo.retregs32 = mDX|mAX; 2176 } 2177 else if (config.exe & EX_SOLARIS) 2178 { 2179 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2180 cinfo.flags = INFpushebx; 2181 cinfo.retregs32 = mCX|mBX; 2182 } 2183 else if (I32 && config.objfmt == OBJ_MSCOFF) 2184 { 2185 s = symboly("_allrem", mAX|mBX|mCX|mDX); 2186 cinfo.flags = INFpusheabcdx; 2187 cinfo.retregs32 = mAX|mDX; 2188 } 2189 else 2190 { 2191 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2192 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2193 cinfo.retregs32 = mCX|mBX; 2194 } 2195 break; 2196 2197 case CLIB.uldiv: 2198 cinfo.retregs16 = mDX|mAX; 2199 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD)) 2200 { 2201 s = symboly("__udivdi3", mAX|mBX|mCX|mDX); 2202 cinfo.flags = INFpushebx; 2203 cinfo.retregs32 = mDX|mAX; 2204 } 2205 else if (config.exe & EX_SOLARIS) 2206 { 2207 s = symboly("__ULDIV2__", mAX|mBX|mCX|mDX); 2208 cinfo.flags = INFpushebx; 2209 cinfo.retregs32 = mDX|mAX; 2210 } 2211 else if (I32 && config.objfmt == OBJ_MSCOFF) 2212 { 2213 s = symboly("_aulldiv", mAX|mBX|mCX|mDX); 2214 cinfo.flags = INFpusheabcdx; 2215 cinfo.retregs32 = mDX|mAX; 2216 } 2217 else 2218 { 2219 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2220 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2221 cinfo.retregs32 = mDX|mAX; 2222 } 2223 break; 2224 2225 case CLIB.ulmod: 2226 cinfo.retregs16 = mCX|mBX; 2227 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD)) 2228 { 2229 s = symboly("__umoddi3", mAX|mBX|mCX|mDX); 2230 cinfo.flags = INFpushebx; 2231 cinfo.retregs32 = mDX|mAX; 2232 } 2233 else if (config.exe & EX_SOLARIS) 2234 { 2235 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2236 cinfo.flags = INFpushebx; 2237 cinfo.retregs32 = mCX|mBX; 2238 } 2239 else if (I32 && config.objfmt == OBJ_MSCOFF) 2240 { 2241 s = symboly("_aullrem", mAX|mBX|mCX|mDX); 2242 cinfo.flags = INFpusheabcdx; 2243 cinfo.retregs32 = mAX|mDX; 2244 } 2245 else 2246 { 2247 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2248 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2249 cinfo.retregs32 = mCX|mBX; 2250 } 2251 break; 2252 2253 // This section is only for Windows and DOS (i.e. machines without the x87 FPU) 2254 case CLIB.dmul: 2255 s = symboly("_DMUL@",mAX|mBX|mCX|mDX); 2256 cinfo.retregs16 = DOUBLEREGS_16; 2257 cinfo.retregs32 = DOUBLEREGS_32; 2258 cinfo.pop = 8; 2259 cinfo.flags = INFfloat; 2260 cinfo.push87 = 1; 2261 cinfo.pop87 = 1; 2262 break; 2263 2264 case CLIB.ddiv: 2265 s = symboly("_DDIV@",mAX|mBX|mCX|mDX); 2266 cinfo.retregs16 = DOUBLEREGS_16; 2267 cinfo.retregs32 = DOUBLEREGS_32; 2268 cinfo.pop = 8; 2269 cinfo.flags = INFfloat; 2270 cinfo.push87 = 1; 2271 cinfo.pop87 = 1; 2272 break; 2273 2274 case CLIB.dtst0: 2275 s = symboly("_DTST0@",0); 2276 cinfo.flags = INFfloat; 2277 break; 2278 2279 case CLIB.dtst0exc: 2280 s = symboly("_DTST0EXC@",0); 2281 cinfo.flags = INFfloat; 2282 break; 2283 2284 case CLIB.dcmp: 2285 s = symboly("_DCMP@",0); 2286 cinfo.pop = 8; 2287 cinfo.flags = INFfloat; 2288 cinfo.push87 = 1; 2289 cinfo.pop87 = 1; 2290 break; 2291 2292 case CLIB.dcmpexc: 2293 s = symboly("_DCMPEXC@",0); 2294 cinfo.pop = 8; 2295 cinfo.flags = INFfloat; 2296 cinfo.push87 = 1; 2297 cinfo.pop87 = 1; 2298 break; 2299 2300 case CLIB.dneg: 2301 s = symboly("_DNEG@",I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2302 cinfo.retregs16 = DOUBLEREGS_16; 2303 cinfo.retregs32 = DOUBLEREGS_32; 2304 cinfo.flags = INFfloat; 2305 break; 2306 2307 case CLIB.dadd: 2308 s = symboly("_DADD@",mAX|mBX|mCX|mDX); 2309 cinfo.retregs16 = DOUBLEREGS_16; 2310 cinfo.retregs32 = DOUBLEREGS_32; 2311 cinfo.pop = 8; 2312 cinfo.flags = INFfloat; 2313 cinfo.push87 = 1; 2314 cinfo.pop87 = 1; 2315 break; 2316 2317 case CLIB.dsub: 2318 s = symboly("_DSUB@",mAX|mBX|mCX|mDX); 2319 cinfo.retregs16 = DOUBLEREGS_16; 2320 cinfo.retregs32 = DOUBLEREGS_32; 2321 cinfo.pop = 8; 2322 cinfo.flags = INFfloat; 2323 cinfo.push87 = 1; 2324 cinfo.pop87 = 1; 2325 break; 2326 2327 case CLIB.fmul: 2328 s = symboly("_FMUL@",mAX|mBX|mCX|mDX); 2329 cinfo.retregs16 = FLOATREGS_16; 2330 cinfo.retregs32 = FLOATREGS_32; 2331 cinfo.flags = INFfloat; 2332 cinfo.push87 = 1; 2333 cinfo.pop87 = 1; 2334 break; 2335 2336 case CLIB.fdiv: 2337 s = symboly("_FDIV@",mAX|mBX|mCX|mDX); 2338 cinfo.retregs16 = FLOATREGS_16; 2339 cinfo.retregs32 = FLOATREGS_32; 2340 cinfo.flags = INFfloat; 2341 cinfo.push87 = 1; 2342 cinfo.pop87 = 1; 2343 break; 2344 2345 case CLIB.ftst0: 2346 s = symboly("_FTST0@",0); 2347 cinfo.flags = INFfloat; 2348 break; 2349 2350 case CLIB.ftst0exc: 2351 s = symboly("_FTST0EXC@",0); 2352 cinfo.flags = INFfloat; 2353 break; 2354 2355 case CLIB.fcmp: 2356 s = symboly("_FCMP@",0); 2357 cinfo.flags = INFfloat; 2358 cinfo.push87 = 1; 2359 cinfo.pop87 = 1; 2360 break; 2361 2362 case CLIB.fcmpexc: 2363 s = symboly("_FCMPEXC@",0); 2364 cinfo.flags = INFfloat; 2365 cinfo.push87 = 1; 2366 cinfo.pop87 = 1; 2367 break; 2368 2369 case CLIB.fneg: 2370 s = symboly("_FNEG@",I16 ? FLOATREGS_16 : FLOATREGS_32); 2371 cinfo.retregs16 = FLOATREGS_16; 2372 cinfo.retregs32 = FLOATREGS_32; 2373 cinfo.flags = INFfloat; 2374 break; 2375 2376 case CLIB.fadd: 2377 s = symboly("_FADD@",mAX|mBX|mCX|mDX); 2378 cinfo.retregs16 = FLOATREGS_16; 2379 cinfo.retregs32 = FLOATREGS_32; 2380 cinfo.flags = INFfloat; 2381 cinfo.push87 = 1; 2382 cinfo.pop87 = 1; 2383 break; 2384 2385 case CLIB.fsub: 2386 s = symboly("_FSUB@",mAX|mBX|mCX|mDX); 2387 cinfo.retregs16 = FLOATREGS_16; 2388 cinfo.retregs32 = FLOATREGS_32; 2389 cinfo.flags = INFfloat; 2390 cinfo.push87 = 1; 2391 cinfo.pop87 = 1; 2392 break; 2393 2394 case CLIB.dbllng: 2395 { 2396 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG" : "_DBLLNG@"; 2397 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2398 cinfo.retregs16 = mDX | mAX; 2399 cinfo.retregs32 = mAX; 2400 cinfo.flags = INFfloat; 2401 cinfo.push87 = 1; 2402 cinfo.pop87 = 1; 2403 break; 2404 } 2405 2406 case CLIB.lngdbl: 2407 { 2408 const(char)* name = (config.exe & ex_unix) ? "__LNGDBL" : "_LNGDBL@"; 2409 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2410 cinfo.retregs16 = DOUBLEREGS_16; 2411 cinfo.retregs32 = DOUBLEREGS_32; 2412 cinfo.flags = INFfloat; 2413 cinfo.push87 = 1; 2414 cinfo.pop87 = 1; 2415 break; 2416 } 2417 2418 case CLIB.dblint: 2419 { 2420 const(char)* name = (config.exe & ex_unix) ? "__DBLINT" : "_DBLINT@"; 2421 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2422 cinfo.retregs16 = mAX; 2423 cinfo.retregs32 = mAX; 2424 cinfo.flags = INFfloat; 2425 cinfo.push87 = 1; 2426 cinfo.pop87 = 1; 2427 break; 2428 } 2429 2430 case CLIB.intdbl: 2431 { 2432 const(char)* name = (config.exe & ex_unix) ? "__INTDBL" : "_INTDBL@"; 2433 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2434 cinfo.retregs16 = DOUBLEREGS_16; 2435 cinfo.retregs32 = DOUBLEREGS_32; 2436 cinfo.flags = INFfloat; 2437 cinfo.push87 = 1; 2438 cinfo.pop87 = 1; 2439 break; 2440 } 2441 2442 case CLIB.dbluns: 2443 { 2444 const(char)* name = (config.exe & ex_unix) ? "__DBLUNS" : "_DBLUNS@"; 2445 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2446 cinfo.retregs16 = mAX; 2447 cinfo.retregs32 = mAX; 2448 cinfo.flags = INFfloat; 2449 cinfo.push87 = 1; 2450 cinfo.pop87 = 1; 2451 break; 2452 } 2453 2454 case CLIB.unsdbl: 2455 // Y(DOUBLEREGS_32,"__UNSDBL"), // CLIB.unsdbl 2456 // Y(DOUBLEREGS_16,"_UNSDBL@"), 2457 // {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _UNSDBL@ unsdbl 2458 { 2459 const(char)* name = (config.exe & ex_unix) ? "__UNSDBL" : "_UNSDBL@"; 2460 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2461 cinfo.retregs16 = DOUBLEREGS_16; 2462 cinfo.retregs32 = DOUBLEREGS_32; 2463 cinfo.flags = INFfloat; 2464 cinfo.push87 = 1; 2465 cinfo.pop87 = 1; 2466 break; 2467 } 2468 2469 case CLIB.dblulng: 2470 { 2471 const(char)* name = (config.exe & ex_unix) ? "__DBLULNG" : "_DBLULNG@"; 2472 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2473 cinfo.retregs16 = mDX|mAX; 2474 cinfo.retregs32 = mAX; 2475 cinfo.flags = (config.exe & ex_unix) ? INFfloat | INF32 : INFfloat; 2476 cinfo.push87 = (config.exe & ex_unix) ? 0 : 1; 2477 cinfo.pop87 = 1; 2478 break; 2479 } 2480 2481 case CLIB.ulngdbl: 2482 { 2483 const(char)* name = (config.exe & ex_unix) ? "__ULNGDBL@" : "_ULNGDBL@"; 2484 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2485 cinfo.retregs16 = DOUBLEREGS_16; 2486 cinfo.retregs32 = DOUBLEREGS_32; 2487 cinfo.flags = INFfloat; 2488 cinfo.push87 = 1; 2489 cinfo.pop87 = 1; 2490 break; 2491 } 2492 2493 case CLIB.dblflt: 2494 { 2495 const(char)* name = (config.exe & ex_unix) ? "__DBLFLT" : "_DBLFLT@"; 2496 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2497 cinfo.retregs16 = FLOATREGS_16; 2498 cinfo.retregs32 = FLOATREGS_32; 2499 cinfo.flags = INFfloat; 2500 cinfo.push87 = 1; 2501 cinfo.pop87 = 1; 2502 break; 2503 } 2504 2505 case CLIB.fltdbl: 2506 { 2507 const(char)* name = (config.exe & ex_unix) ? "__FLTDBL" : "_FLTDBL@"; 2508 s = symboly(name, I16 ? ALLREGS : DOUBLEREGS_32); 2509 cinfo.retregs16 = DOUBLEREGS_16; 2510 cinfo.retregs32 = DOUBLEREGS_32; 2511 cinfo.flags = INFfloat; 2512 cinfo.push87 = 1; 2513 cinfo.pop87 = 1; 2514 break; 2515 } 2516 2517 case CLIB.dblllng: 2518 { 2519 const(char)* name = (config.exe & ex_unix) ? "__DBLLLNG" : "_DBLLLNG@"; 2520 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2521 cinfo.retregs16 = DOUBLEREGS_16; 2522 cinfo.retregs32 = mDX|mAX; 2523 cinfo.flags = INFfloat; 2524 cinfo.push87 = 1; 2525 cinfo.pop87 = 1; 2526 break; 2527 } 2528 2529 case CLIB.llngdbl: 2530 { 2531 const(char)* name = (config.exe & ex_unix) ? "__LLNGDBL" : "_LLNGDBL@"; 2532 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2533 cinfo.retregs16 = DOUBLEREGS_16; 2534 cinfo.retregs32 = DOUBLEREGS_32; 2535 cinfo.flags = INFfloat; 2536 cinfo.push87 = 1; 2537 cinfo.pop87 = 1; 2538 break; 2539 } 2540 2541 case CLIB.dblullng: 2542 { 2543 if (config.exe == EX_WIN64) 2544 { 2545 s = symboly("__DBLULLNG", DOUBLEREGS_32); 2546 cinfo.retregs32 = mAX; 2547 cinfo.flags = INFfloat; 2548 cinfo.push87 = 2; 2549 cinfo.pop87 = 2; 2550 } 2551 else 2552 { 2553 const(char)* name = (config.exe & ex_unix) ? "__DBLULLNG" : "_DBLULLNG@"; 2554 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2555 cinfo.retregs16 = DOUBLEREGS_16; 2556 cinfo.retregs32 = I64 ? mAX : mDX|mAX; 2557 cinfo.flags = INFfloat; 2558 cinfo.push87 = (config.exe & ex_unix) ? 2 : 1; 2559 cinfo.pop87 = (config.exe & ex_unix) ? 2 : 1; 2560 } 2561 break; 2562 } 2563 2564 case CLIB.ullngdbl: 2565 { 2566 if (config.exe == EX_WIN64) 2567 { 2568 s = symboly("__ULLNGDBL", DOUBLEREGS_32); 2569 cinfo.retregs32 = mAX; 2570 cinfo.flags = INFfloat; 2571 cinfo.push87 = 1; 2572 cinfo.pop87 = 1; 2573 } 2574 else 2575 { 2576 const(char)* name = (config.exe & ex_unix) ? "__ULLNGDBL" : "_ULLNGDBL@"; 2577 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2578 cinfo.retregs16 = DOUBLEREGS_16; 2579 cinfo.retregs32 = I64 ? mAX : DOUBLEREGS_32; 2580 cinfo.flags = INFfloat; 2581 cinfo.push87 = 1; 2582 cinfo.pop87 = 1; 2583 } 2584 break; 2585 } 2586 2587 case CLIB.dtst: 2588 { 2589 const(char)* name = (config.exe & ex_unix) ? "__DTST" : "_DTST@"; 2590 s = symboly(name, 0); 2591 cinfo.flags = INFfloat; 2592 break; 2593 } 2594 2595 case CLIB.vptrfptr: 2596 { 2597 const(char)* name = (config.exe & ex_unix) ? "__HTOFPTR" : "_HTOFPTR@"; 2598 s = symboly(name, mES|mBX); 2599 cinfo.retregs16 = mES|mBX; 2600 cinfo.retregs32 = mES|mBX; 2601 break; 2602 } 2603 2604 case CLIB.cvptrfptr: 2605 { 2606 const(char)* name = (config.exe & ex_unix) ? "__HCTOFPTR" : "_HCTOFPTR@"; 2607 s = symboly(name, mES|mBX); 2608 cinfo.retregs16 = mES|mBX; 2609 cinfo.retregs32 = mES|mBX; 2610 break; 2611 } 2612 2613 case CLIB._87topsw: 2614 { 2615 const(char)* name = (config.exe & ex_unix) ? "__87TOPSW" : "_87TOPSW@"; 2616 s = symboly(name, 0); 2617 cinfo.flags = INFfloat; 2618 break; 2619 } 2620 2621 case CLIB.fltto87: 2622 { 2623 const(char)* name = (config.exe & ex_unix) ? "__FLTTO87" : "_FLTTO87@"; 2624 s = symboly(name, mST0); 2625 cinfo.retregs16 = mST0; 2626 cinfo.retregs32 = mST0; 2627 cinfo.flags = INFfloat; 2628 cinfo.push87 = 1; 2629 break; 2630 } 2631 2632 case CLIB.dblto87: 2633 { 2634 const(char)* name = (config.exe & ex_unix) ? "__DBLTO87" : "_DBLTO87@"; 2635 s = symboly(name, mST0); 2636 cinfo.retregs16 = mST0; 2637 cinfo.retregs32 = mST0; 2638 cinfo.flags = INFfloat; 2639 cinfo.push87 = 1; 2640 break; 2641 } 2642 2643 case CLIB.dblint87: 2644 { 2645 const(char)* name = (config.exe & ex_unix) ? "__DBLINT87" : "_DBLINT87@"; 2646 s = symboly(name, mST0|mAX); 2647 cinfo.retregs16 = mAX; 2648 cinfo.retregs32 = mAX; 2649 cinfo.flags = INFfloat; 2650 break; 2651 } 2652 2653 case CLIB.dbllng87: 2654 { 2655 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG87" : "_DBLLNG87@"; 2656 s = symboly(name, mST0|mAX|mDX); 2657 cinfo.retregs16 = mDX|mAX; 2658 cinfo.retregs32 = mAX; 2659 cinfo.flags = INFfloat; 2660 break; 2661 } 2662 2663 case CLIB.ftst: 2664 { 2665 const(char)* name = (config.exe & ex_unix) ? "__FTST" : "_FTST@"; 2666 s = symboly(name, 0); 2667 cinfo.flags = INFfloat; 2668 break; 2669 } 2670 2671 case CLIB.fcompp: 2672 { 2673 const(char)* name = (config.exe & ex_unix) ? "__FCOMPP" : "_FCOMPP@"; 2674 s = symboly(name, 0); 2675 cinfo.retregs16 = mPSW; 2676 cinfo.retregs32 = mPSW; 2677 cinfo.flags = INFfloat; 2678 cinfo.pop87 = 2; 2679 break; 2680 } 2681 2682 case CLIB.ftest: 2683 { 2684 const(char)* name = (config.exe & ex_unix) ? "__FTEST" : "_FTEST@"; 2685 s = symboly(name, 0); 2686 cinfo.retregs16 = mPSW; 2687 cinfo.retregs32 = mPSW; 2688 cinfo.flags = INFfloat; 2689 break; 2690 } 2691 2692 case CLIB.ftest0: 2693 { 2694 const(char)* name = (config.exe & ex_unix) ? "__FTEST0" : "_FTEST0@"; 2695 s = symboly(name, 0); 2696 cinfo.retregs16 = mPSW; 2697 cinfo.retregs32 = mPSW; 2698 cinfo.flags = INFfloat; 2699 break; 2700 } 2701 2702 case CLIB.fdiv87: 2703 { 2704 const(char)* name = (config.exe & ex_unix) ? "__FDIVP" : "_FDIVP"; 2705 s = symboly(name, mST0|mAX|mBX|mCX|mDX); 2706 cinfo.retregs16 = mST0; 2707 cinfo.retregs32 = mST0; 2708 cinfo.flags = INFfloat; 2709 cinfo.push87 = 1; 2710 cinfo.pop87 = 1; 2711 break; 2712 } 2713 2714 // Complex numbers 2715 case CLIB.cmul: 2716 { 2717 s = symboly("_Cmul", mST0|mST01); 2718 cinfo.retregs16 = mST01; 2719 cinfo.retregs32 = mST01; 2720 cinfo.flags = INF32|INFfloat; 2721 cinfo.push87 = 3; 2722 cinfo.pop87 = 5; 2723 break; 2724 } 2725 2726 case CLIB.cdiv: 2727 { 2728 s = symboly("_Cdiv", mAX|mCX|mDX|mST0|mST01); 2729 cinfo.retregs16 = mST01; 2730 cinfo.retregs32 = mST01; 2731 cinfo.flags = INF32|INFfloat; 2732 cinfo.push87 = 0; 2733 cinfo.pop87 = 2; 2734 break; 2735 } 2736 2737 case CLIB.ccmp: 2738 { 2739 s = symboly("_Ccmp", mAX|mST0|mST01); 2740 cinfo.retregs16 = mPSW; 2741 cinfo.retregs32 = mPSW; 2742 cinfo.flags = INF32|INFfloat; 2743 cinfo.push87 = 0; 2744 cinfo.pop87 = 4; 2745 break; 2746 } 2747 2748 case CLIB.u64_ldbl: 2749 { 2750 const(char)* name = (config.exe & ex_unix) ? "__U64_LDBL" : "_U64_LDBL"; 2751 s = symboly(name, mST0); 2752 cinfo.retregs16 = mST0; 2753 cinfo.retregs32 = mST0; 2754 cinfo.flags = INF32|INF64|INFfloat; 2755 cinfo.push87 = 2; 2756 cinfo.pop87 = 1; 2757 break; 2758 } 2759 2760 case CLIB.ld_u64: 2761 { 2762 const(char)* name = (config.exe & ex_unix) ? (config.objfmt == OBJ_ELF || 2763 config.objfmt == OBJ_MACH ? 2764 "__LDBLULLNG" : "___LDBLULLNG") 2765 : "__LDBLULLNG"; 2766 s = symboly(name, mST0|mAX|mDX); 2767 cinfo.retregs16 = 0; 2768 cinfo.retregs32 = mDX|mAX; 2769 cinfo.flags = INF32|INF64|INFfloat; 2770 cinfo.push87 = 1; 2771 cinfo.pop87 = 2; 2772 break; 2773 } 2774 2775 default: 2776 assert(0); 2777 } 2778 clibsyms[clib] = s; 2779 } 2780 2781 *ps = s; 2782 *pinfo = cinfo; 2783 } 2784 2785 /******************************** 2786 * Generate code sequence to call C runtime library support routine. 2787 * clib = CLIB.xxxx 2788 * keepmask = mask of registers not to destroy. Currently can 2789 * handle only 1. Should use a temporary rather than 2790 * push/pop for speed. 2791 */ 2792 2793 @trusted 2794 void callclib(ref CodeBuilder cdb, elem* e, uint clib, regm_t* pretregs, regm_t keepmask) 2795 { 2796 //printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask)); 2797 //elem_print(e); 2798 2799 Symbol* s; 2800 ClibInfo* cinfo; 2801 getClibInfo(clib, &s, &cinfo); 2802 2803 if (I16) 2804 assert(!(cinfo.flags & (INF32 | INF64))); 2805 getregs(cdb,(~s.Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed 2806 keepmask &= ~s.Sregsaved; 2807 int npushed = popcnt(keepmask); 2808 CodeBuilder cdbpop; 2809 cdbpop.ctor(); 2810 gensaverestore(keepmask, cdb, cdbpop); 2811 2812 save87regs(cdb,cinfo.push87); 2813 for (int i = 0; i < cinfo.push87; i++) 2814 push87(cdb); 2815 2816 for (int i = 0; i < cinfo.pop87; i++) 2817 pop87(); 2818 2819 if (config.target_cpu >= TARGET_80386 && clib == CLIB.lmul && !I32) 2820 { 2821 static immutable ubyte[23] lmul = 2822 [ 2823 0x66,0xc1,0xe1,0x10, // shl ECX,16 2824 0x8b,0xcb, // mov CX,BX ;ECX = CX,BX 2825 0x66,0xc1,0xe0,0x10, // shl EAX,16 2826 0x66,0x0f,0xac,0xd0,0x10, // shrd EAX,EDX,16 ;EAX = DX,AX 2827 0x66,0xf7,0xe1, // mul ECX 2828 0x66,0x0f,0xa4,0xc2,0x10, // shld EDX,EAX,16 ;DX,AX = EAX 2829 ]; 2830 2831 cdb.genasm(lmul[]); 2832 } 2833 else 2834 { 2835 makeitextern(s); 2836 int nalign = 0; 2837 int pushebx = (cinfo.flags & INFpushebx) != 0; 2838 int pushall = (cinfo.flags & INFpusheabcdx) != 0; 2839 if (STACKALIGN >= 16) 2840 { // Align the stack (assume no args on stack) 2841 int npush = (npushed + pushebx + 4 * pushall) * REGSIZE + stackpush; 2842 if (npush & (STACKALIGN - 1)) 2843 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 2844 cod3_stackadj(cdb, nalign); 2845 } 2846 } 2847 if (pushebx) 2848 { 2849 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_FREEBSD | EX_FREEBSD64 | EX_OPENBSD | EX_OPENBSD64 | EX_DRAGONFLYBSD64)) 2850 { 2851 cdb.gen1(0x50 + CX); // PUSH ECX 2852 cdb.gen1(0x50 + BX); // PUSH EBX 2853 cdb.gen1(0x50 + DX); // PUSH EDX 2854 cdb.gen1(0x50 + AX); // PUSH EAX 2855 nalign += 4 * REGSIZE; 2856 } 2857 else 2858 { 2859 cdb.gen1(0x50 + BX); // PUSH EBX 2860 nalign += REGSIZE; 2861 } 2862 } 2863 if (pushall) 2864 { 2865 cdb.gen1(0x50 + CX); // PUSH ECX 2866 cdb.gen1(0x50 + BX); // PUSH EBX 2867 cdb.gen1(0x50 + DX); // PUSH EDX 2868 cdb.gen1(0x50 + AX); // PUSH EAX 2869 } 2870 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) 2871 { 2872 // Note: not for OSX 2873 /* Pass EBX on the stack instead, this is because EBX is used 2874 * for shared library function calls 2875 */ 2876 if (config.flags3 & CFG3pic) 2877 { 2878 load_localgot(cdb); // EBX gets set to this value 2879 } 2880 } 2881 2882 cdb.gencs(LARGECODE ? 0x9A : 0xE8,0,FLfunc,s); // CALL s 2883 if (nalign) 2884 cod3_stackadj(cdb, -nalign); 2885 calledafunc = 1; 2886 } 2887 if (I16) 2888 stackpush -= cinfo.pop; 2889 regm_t retregs = I16 ? cinfo.retregs16 : cinfo.retregs32; 2890 cdb.append(cdbpop); 2891 fixresult(cdb, e, retregs, pretregs); 2892 } 2893 2894 2895 /************************************************* 2896 * Helper function for converting OPparam's into array of Parameters. 2897 */ 2898 struct Parameter { elem* e; reg_t reg; reg_t reg2; uint numalign; } 2899 2900 @trusted 2901 void fillParameters(elem* e, Parameter* parameters, int* pi) 2902 { 2903 if (e.Eoper == OPparam) 2904 { 2905 fillParameters(e.EV.E1, parameters, pi); 2906 fillParameters(e.EV.E2, parameters, pi); 2907 freenode(e); 2908 } 2909 else 2910 { 2911 parameters[*pi].e = e; 2912 (*pi)++; 2913 } 2914 } 2915 2916 /*********************************** 2917 * tyf: type of the function 2918 */ 2919 @trusted 2920 FuncParamRegs FuncParamRegs_create(tym_t tyf) 2921 { 2922 FuncParamRegs result; 2923 2924 result.tyf = tyf; 2925 2926 if (I16) 2927 { 2928 result.numintegerregs = 0; 2929 result.numfloatregs = 0; 2930 } 2931 else if (I32) 2932 { 2933 if (tyf == TYjfunc) 2934 { 2935 static immutable ubyte[1] reglist1 = [ AX ]; 2936 result.argregs = ®list1[0]; 2937 result.numintegerregs = reglist1.length; 2938 } 2939 else if (tyf == TYmfunc) 2940 { 2941 static immutable ubyte[1] reglist2 = [ CX ]; 2942 result.argregs = ®list2[0]; 2943 result.numintegerregs = reglist2.length; 2944 } 2945 else 2946 result.numintegerregs = 0; 2947 result.numfloatregs = 0; 2948 } 2949 else if (I64 && config.exe == EX_WIN64) 2950 { 2951 static immutable ubyte[4] reglist3 = [ CX,DX,R8,R9 ]; 2952 result.argregs = ®list3[0]; 2953 result.numintegerregs = reglist3.length; 2954 2955 static immutable ubyte[4] freglist3 = [ XMM0, XMM1, XMM2, XMM3 ]; 2956 result.floatregs = &freglist3[0]; 2957 result.numfloatregs = freglist3.length; 2958 } 2959 else if (I64) 2960 { 2961 static immutable ubyte[6] reglist4 = [ DI,SI,DX,CX,R8,R9 ]; 2962 result.argregs = ®list4[0]; 2963 result.numintegerregs = reglist4.length; 2964 2965 static immutable ubyte[8] freglist4 = [ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 ]; 2966 result.floatregs = &freglist4[0]; 2967 result.numfloatregs = freglist4.length; 2968 } 2969 else 2970 assert(0); 2971 return result; 2972 } 2973 2974 /***************************************** 2975 * Allocate parameter of type t and ty to registers *preg1 and *preg2. 2976 * Params: 2977 * t = type, valid only if ty is TYstruct or TYarray 2978 * Returns: 2979 * false not allocated to any register 2980 * true *preg1, *preg2 set to allocated register pair 2981 */ 2982 2983 @trusted 2984 private bool type_jparam2(type* t, tym_t ty) 2985 { 2986 ty = tybasic(ty); 2987 2988 if (tyfloating(ty)) 2989 return false; 2990 else if (ty == TYstruct || ty == TYarray) 2991 { 2992 type_debug(t); 2993 targ_size_t sz = type_size(t); 2994 return (sz <= _tysize[TYnptr]) && 2995 (config.exe == EX_WIN64 || sz == 1 || sz == 2 || sz == 4 || sz == 8); 2996 } 2997 else if (tysize(ty) <= _tysize[TYnptr]) 2998 return true; 2999 return false; 3000 } 3001 3002 @trusted 3003 int FuncParamRegs_alloc(ref FuncParamRegs fpr, type* t, tym_t ty, reg_t* preg1, reg_t* preg2) 3004 { 3005 //printf("FuncParamRegs::alloc(ty: TY%sm t: %p)\n", tystring[tybasic(ty)], t); 3006 //if (t) type_print(t); 3007 3008 *preg1 = NOREG; 3009 *preg2 = NOREG; 3010 3011 type* t2 = null; 3012 tym_t ty2 = TYMAX; 3013 3014 // SROA with mixed registers 3015 if (ty & mTYxmmgpr) 3016 { 3017 ty = TYdouble; 3018 ty2 = TYllong; 3019 } 3020 else if (ty & mTYgprxmm) 3021 { 3022 ty = TYllong; 3023 ty2 = TYdouble; 3024 } 3025 3026 // Treat array of 1 the same as its element type 3027 // (Don't put volatile parameters in registers) 3028 if (tybasic(ty) == TYarray && tybasic(t.Tty) == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile) 3029 && type_size(t.Tnext) > 1) 3030 { 3031 t = t.Tnext; 3032 ty = t.Tty; 3033 } 3034 3035 if (tybasic(ty) == TYstruct && type_zeroSize(t, fpr.tyf)) 3036 return 0; // don't allocate into registers 3037 3038 ++fpr.i; 3039 3040 // If struct or array 3041 if (tyaggregate(ty)) 3042 { 3043 assert(t); 3044 if (config.exe == EX_WIN64) 3045 { 3046 /* Structs occupy a general purpose register, regardless of the struct 3047 * size or the number & types of its fields. 3048 */ 3049 t = null; 3050 ty = TYnptr; 3051 } 3052 else 3053 { 3054 type* targ1, targ2; 3055 if (tybasic(t.Tty) == TYstruct) 3056 { 3057 targ1 = t.Ttag.Sstruct.Sarg1type; 3058 targ2 = t.Ttag.Sstruct.Sarg2type; 3059 } 3060 else if (tybasic(t.Tty) == TYarray) 3061 { 3062 if (I64) 3063 argtypes(t, targ1, targ2); 3064 } 3065 else 3066 assert(0); 3067 3068 if (targ1) 3069 { 3070 t = targ1; 3071 ty = t.Tty; 3072 if (targ2) 3073 { 3074 t2 = targ2; 3075 ty2 = t2.Tty; 3076 } 3077 } 3078 else if (I64 && !targ2) 3079 return 0; 3080 } 3081 } 3082 3083 reg_t* preg = preg1; 3084 int regcntsave = fpr.regcnt; 3085 int xmmcntsave = fpr.xmmcnt; 3086 3087 if (config.exe == EX_WIN64) 3088 { 3089 if (tybasic(ty) == TYcfloat) 3090 { 3091 ty = TYnptr; // treat like a struct 3092 } 3093 } 3094 else if (I64) 3095 { 3096 if ((tybasic(ty) == TYcent || tybasic(ty) == TYucent) && 3097 fpr.numintegerregs - fpr.regcnt >= 2) 3098 { 3099 // Allocate to register pair 3100 *preg1 = fpr.argregs[fpr.regcnt]; 3101 *preg2 = fpr.argregs[fpr.regcnt + 1]; 3102 fpr.regcnt += 2; 3103 return 1; 3104 } 3105 3106 if (tybasic(ty) == TYcdouble && 3107 fpr.numfloatregs - fpr.xmmcnt >= 2) 3108 { 3109 // Allocate to register pair 3110 *preg1 = fpr.floatregs[fpr.xmmcnt]; 3111 *preg2 = fpr.floatregs[fpr.xmmcnt + 1]; 3112 fpr.xmmcnt += 2; 3113 return 1; 3114 } 3115 3116 if (tybasic(ty) == TYcfloat 3117 && fpr.numfloatregs - fpr.xmmcnt >= 1) 3118 { 3119 // Allocate XMM register 3120 *preg1 = fpr.floatregs[fpr.xmmcnt++]; 3121 return 1; 3122 } 3123 } 3124 3125 foreach (j; 0 .. 2) 3126 { 3127 if (fpr.regcnt < fpr.numintegerregs) 3128 { 3129 if ((I64 || (fpr.i == 1 && (fpr.tyf == TYjfunc || fpr.tyf == TYmfunc))) && 3130 type_jparam2(t, ty)) 3131 { 3132 *preg = fpr.argregs[fpr.regcnt]; 3133 ++fpr.regcnt; 3134 if (config.exe == EX_WIN64) 3135 ++fpr.xmmcnt; 3136 goto Lnext; 3137 } 3138 } 3139 if (fpr.xmmcnt < fpr.numfloatregs) 3140 { 3141 if (tyxmmreg(ty)) 3142 { 3143 *preg = fpr.floatregs[fpr.xmmcnt]; 3144 if (config.exe == EX_WIN64) 3145 ++fpr.regcnt; 3146 ++fpr.xmmcnt; 3147 goto Lnext; 3148 } 3149 } 3150 // Failed to allocate to a register 3151 if (j == 1) 3152 { /* Unwind first preg1 assignment, because it's both or nothing 3153 */ 3154 *preg1 = NOREG; 3155 fpr.regcnt = regcntsave; 3156 fpr.xmmcnt = xmmcntsave; 3157 } 3158 return 0; 3159 3160 Lnext: 3161 if (tybasic(ty2) == TYMAX) 3162 break; 3163 preg = preg2; 3164 t = t2; 3165 ty = ty2; 3166 } 3167 return 1; 3168 } 3169 3170 /*************************************** 3171 * Finds replacement types for register passing of aggregates. 3172 */ 3173 @trusted 3174 void argtypes(type* t, ref type* arg1type, ref type* arg2type) 3175 { 3176 if (!t) return; 3177 3178 tym_t ty = t.Tty; 3179 3180 if (!tyaggregate(ty)) 3181 return; 3182 3183 arg1type = arg2type = null; 3184 3185 if (tybasic(ty) == TYarray) 3186 { 3187 size_t sz = cast(size_t) type_size(t); 3188 if (sz == 0) 3189 return; 3190 3191 if ((I32 || config.exe == EX_WIN64) && (sz & (sz - 1))) // power of 2 3192 return; 3193 3194 if (config.exe == EX_WIN64 && sz > REGSIZE) 3195 return; 3196 3197 if (sz <= 2 * REGSIZE) 3198 { 3199 type** argtype = &arg1type; 3200 size_t argsz = sz < REGSIZE ? sz : REGSIZE; 3201 foreach (v; 0 .. (sz > REGSIZE) + 1) 3202 { 3203 *argtype = argsz == 1 ? tstypes[TYchar] 3204 : argsz == 2 ? tstypes[TYshort] 3205 : argsz <= 4 ? tstypes[TYlong] 3206 : tstypes[TYllong]; 3207 argtype = &arg2type; 3208 argsz = sz - REGSIZE; 3209 } 3210 } 3211 3212 if (I64 && config.exe != EX_WIN64) 3213 { 3214 type* tn = t.Tnext; 3215 tym_t tyn = tn.Tty; 3216 while (tyn == TYarray) 3217 { 3218 tn = tn.Tnext; 3219 assert(tn); 3220 tyn = tybasic(tn.Tty); 3221 } 3222 3223 if (tybasic(tyn) == TYstruct) 3224 { 3225 if (type_size(tn) == sz) // array(s) of size 1 3226 { 3227 arg1type = tn.Ttag.Sstruct.Sarg1type; 3228 arg2type = tn.Ttag.Sstruct.Sarg2type; 3229 return; 3230 } 3231 3232 type* t1 = tn.Ttag.Sstruct.Sarg1type; 3233 if (t1) 3234 { 3235 tn = t1; 3236 tyn = tn.Tty; 3237 } 3238 } 3239 3240 if (sz == tysize(tyn)) 3241 { 3242 if (tysimd(tyn)) 3243 { 3244 type* ts = type_fake(tybasic(tyn)); 3245 ts.Tcount = 1; 3246 arg1type = ts; 3247 return; 3248 } 3249 else if (tybasic(tyn) == TYldouble || tybasic(tyn) == TYildouble) 3250 { 3251 arg1type = tstypes[tybasic(tyn)]; 3252 return; 3253 } 3254 } 3255 3256 if (sz <= 16) 3257 { 3258 if (tyfloating(tyn)) 3259 { 3260 arg1type = sz <= 4 ? tstypes[TYfloat] : tstypes[TYdouble]; 3261 if (sz > 8) 3262 arg2type = (sz - 8) <= 4 ? tstypes[TYfloat] : tstypes[TYdouble]; 3263 } 3264 } 3265 } 3266 } 3267 else if (tybasic(ty) == TYstruct) 3268 { 3269 // TODO: Move code from `cgelem.d:elstruct()` here 3270 } 3271 } 3272 3273 /******************************* 3274 * Generate code sequence for function call. 3275 */ 3276 3277 @trusted 3278 void cdfunc(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3279 { 3280 //printf("cdfunc()\n"); elem_print(e); 3281 assert(e); 3282 uint numpara = 0; // bytes of parameters 3283 uint numalign = 0; // bytes to align stack before pushing parameters 3284 uint stackpushsave = stackpush; // so we can compute # of parameters 3285 cgstate.stackclean++; 3286 regm_t keepmsk = 0; 3287 int xmmcnt = 0; 3288 tym_t tyf = tybasic(e.EV.E1.Ety); // the function type 3289 3290 // Easier to deal with parameters as an array: parameters[0..np] 3291 int np = OTbinary(e.Eoper) ? el_nparams(e.EV.E2) : 0; 3292 Parameter *parameters = cast(Parameter *)alloca(np * Parameter.sizeof); 3293 3294 if (np) 3295 { 3296 int n = 0; 3297 fillParameters(e.EV.E2, parameters, &n); 3298 assert(n == np); 3299 } 3300 3301 Symbol *sf = null; // symbol of the function being called 3302 if (e.EV.E1.Eoper == OPvar) 3303 sf = e.EV.E1.EV.Vsym; 3304 3305 /* Assume called function access statics 3306 */ 3307 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_FREEBSD | EX_FREEBSD64 | EX_OPENBSD | EX_OPENBSD64) && 3308 config.flags3 & CFG3pic) 3309 cgstate.accessedTLS = true; 3310 3311 /* Special handling for call to __tls_get_addr, we must save registers 3312 * before evaluating the parameter, so that the parameter load and call 3313 * are adjacent. 3314 */ 3315 if (np == 1 && sf) 3316 { 3317 if (sf == tls_get_addr_sym) 3318 getregs(cdb, ~sf.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3319 } 3320 3321 uint stackalign = REGSIZE; 3322 if (tyf == TYf16func) 3323 stackalign = 2; 3324 // Figure out which parameters go in registers. 3325 // Compute numpara, the total bytes pushed on the stack 3326 FuncParamRegs fpr = FuncParamRegs_create(tyf); 3327 for (int i = np; --i >= 0;) 3328 { 3329 elem *ep = parameters[i].e; 3330 uint psize = cast(uint)_align(stackalign, paramsize(ep, tyf)); // align on stack boundary 3331 if (config.exe == EX_WIN64) 3332 { 3333 //printf("[%d] size = %u, numpara = %d ep = %p %s\n", i, psize, numpara, ep, tym_str(ep.Ety)); 3334 debug 3335 if (psize > REGSIZE) elem_print(e); 3336 3337 assert(psize <= REGSIZE); 3338 psize = REGSIZE; 3339 } 3340 //printf("[%d] size = %u, numpara = %d %s\n", i, psize, numpara, tym_str(ep.Ety)); 3341 if (FuncParamRegs_alloc(fpr, ep.ET, ep.Ety, ¶meters[i].reg, ¶meters[i].reg2)) 3342 { 3343 if (config.exe == EX_WIN64) 3344 numpara += REGSIZE; // allocate stack space for it anyway 3345 continue; // goes in register, not stack 3346 } 3347 3348 // Parameter i goes on the stack 3349 parameters[i].reg = NOREG; 3350 uint alignsize = el_alignsize(ep); 3351 parameters[i].numalign = 0; 3352 if (alignsize > stackalign && 3353 (I64 || (alignsize >= 16 && 3354 (config.exe & (EX_OSX | EX_LINUX) && (tyaggregate(ep.Ety) || tyvector(ep.Ety)))))) 3355 { 3356 if (alignsize > STACKALIGN) 3357 { 3358 STACKALIGN = alignsize; 3359 enforcealign = true; 3360 } 3361 uint newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1); 3362 parameters[i].numalign = newnumpara - numpara; 3363 numpara = newnumpara; 3364 assert(config.exe != EX_WIN64); 3365 } 3366 numpara += psize; 3367 } 3368 3369 if (config.exe == EX_WIN64) 3370 { 3371 if (numpara < 4 * REGSIZE) 3372 numpara = 4 * REGSIZE; 3373 } 3374 3375 //printf("numpara = %d, stackpush = %d\n", numpara, stackpush); 3376 assert((numpara & (REGSIZE - 1)) == 0); 3377 assert((stackpush & (REGSIZE - 1)) == 0); 3378 3379 /* Should consider reordering the order of evaluation of the parameters 3380 * so that args that go into registers are evaluated after args that get 3381 * pushed. We can reorder args that are constants or relconst's. 3382 */ 3383 3384 /* Determine if we should use cgstate.funcarg for the parameters or push them 3385 */ 3386 bool usefuncarg = false; 3387 static if (0) 3388 { 3389 printf("test1 %d %d %d %d %d %d %d %d\n", (config.flags4 & CFG4speed)!=0, !Alloca.size, 3390 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)), 3391 cast(int)numpara, !stackpush, 3392 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos), 3393 (!typfunc(tyf) || sf && sf.Sflags & SFLexit), !I16); 3394 } 3395 if (config.flags4 & CFG4speed && 3396 !Alloca.size && 3397 /* The cleanup code calls a local function, leaving the return address on 3398 * the top of the stack. If parameters are placed there, the return address 3399 * is stepped on. 3400 * A better solution is turn this off only inside the cleanup code. 3401 */ 3402 !usednteh && 3403 !calledFinally && 3404 (numpara || config.exe == EX_WIN64) && 3405 stackpush == 0 && // cgstate.funcarg needs to be at top of stack 3406 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos) && 3407 (!(typfunc(tyf) || tyf == TYhfunc) || sf && sf.Sflags & SFLexit) && 3408 !anyiasm && !I16 3409 ) 3410 { 3411 for (int i = 0; i < np; i++) 3412 { 3413 elem* ep = parameters[i].e; 3414 int preg = parameters[i].reg; 3415 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3416 if (preg == NOREG) 3417 { 3418 switch (ep.Eoper) 3419 { 3420 case OPstrctor: 3421 case OPstrthis: 3422 case OPstrpar: 3423 case OPnp_fp: 3424 goto Lno; 3425 3426 default: 3427 break; 3428 } 3429 } 3430 } 3431 3432 if (numpara > cgstate.funcarg.size) 3433 { // New high water mark 3434 //printf("increasing size from %d to %d\n", cast(int)cgstate.funcarg.size, cast(int)numpara); 3435 cgstate.funcarg.size = numpara; 3436 } 3437 usefuncarg = true; 3438 } 3439 Lno: 3440 3441 /* Adjust start of the stack so after all args are pushed, 3442 * the stack will be aligned. 3443 */ 3444 if (!usefuncarg && STACKALIGN >= 16 && (numpara + stackpush) & (STACKALIGN - 1)) 3445 { 3446 numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1)); 3447 cod3_stackadj(cdb, numalign); 3448 cdb.genadjesp(numalign); 3449 stackpush += numalign; 3450 stackpushsave += numalign; 3451 } 3452 assert(stackpush == stackpushsave); 3453 if (config.exe == EX_WIN64) 3454 { 3455 //printf("np = %d, numpara = %d, stackpush = %d\n", np, numpara, stackpush); 3456 assert(numpara == ((np < 4) ? 4 * REGSIZE : np * REGSIZE)); 3457 3458 // Allocate stack space for four entries anyway 3459 // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29 3460 } 3461 3462 int[XMM7 + 1] regsaved = void; 3463 memset(regsaved.ptr, -1, regsaved.sizeof); 3464 CodeBuilder cdbrestore; 3465 cdbrestore.ctor(); 3466 regm_t saved = 0; 3467 targ_size_t funcargtossave = cgstate.funcargtos; 3468 targ_size_t funcargtos = numpara; 3469 //printf("funcargtos1 = %d\n", cast(int)funcargtos); 3470 3471 /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9 3472 * float and double parameters go into XMM0..XMM7 3473 * For variadic functions, count of XMM registers used goes in AL 3474 */ 3475 for (int i = 0; i < np; i++) 3476 { 3477 elem* ep = parameters[i].e; 3478 int preg = parameters[i].reg; 3479 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3480 if (preg == NOREG) 3481 { 3482 /* Push parameter on stack, but keep track of registers used 3483 * in the process. If they interfere with keepmsk, we'll have 3484 * to save/restore them. 3485 */ 3486 CodeBuilder cdbsave; 3487 cdbsave.ctor(); 3488 regm_t overlap = msavereg & keepmsk; 3489 msavereg |= keepmsk; 3490 CodeBuilder cdbparams; 3491 cdbparams.ctor(); 3492 if (usefuncarg) 3493 movParams(cdbparams, ep, stackalign, cast(uint)funcargtos, tyf); 3494 else 3495 pushParams(cdbparams,ep,stackalign, tyf); 3496 regm_t tosave = keepmsk & ~msavereg; 3497 msavereg &= ~keepmsk | overlap; 3498 3499 // tosave is the mask to save and restore 3500 for (reg_t j = 0; tosave; j++) 3501 { 3502 regm_t mi = mask(j); 3503 assert(j <= XMM7); 3504 if (mi & tosave) 3505 { 3506 uint idx; 3507 regsave.save(cdbsave, j, &idx); 3508 regsave.restore(cdbrestore, j, idx); 3509 saved |= mi; 3510 keepmsk &= ~mi; // don't need to keep these for rest of params 3511 tosave &= ~mi; 3512 } 3513 } 3514 3515 cdb.append(cdbsave); 3516 cdb.append(cdbparams); 3517 3518 // Alignment for parameter comes after it got pushed 3519 const uint numalignx = parameters[i].numalign; 3520 if (usefuncarg) 3521 { 3522 funcargtos -= _align(stackalign, paramsize(ep, tyf)) + numalignx; 3523 cgstate.funcargtos = funcargtos; 3524 } 3525 else if (numalignx) 3526 { 3527 cod3_stackadj(cdb, numalignx); 3528 cdb.genadjesp(numalignx); 3529 stackpush += numalignx; 3530 } 3531 } 3532 else 3533 { 3534 // Goes in register preg, not stack 3535 regm_t retregs = mask(preg); 3536 if (retregs & XMMREGS) 3537 ++xmmcnt; 3538 int preg2 = parameters[i].reg2; 3539 reg_t mreg,lreg; 3540 if (preg2 != NOREG || tybasic(ep.Ety) == TYcfloat) 3541 { 3542 assert(ep.Eoper != OPstrthis); 3543 if (mask(preg2) & XMMREGS) 3544 ++xmmcnt; 3545 if (tybasic(ep.Ety) == TYcfloat) 3546 { 3547 lreg = ST01; 3548 mreg = NOREG; 3549 } 3550 else if (tyrelax(ep.Ety) == TYcent) 3551 { 3552 lreg = mask(preg ) & mLSW ? cast(reg_t)preg : AX; 3553 mreg = mask(preg2) & mMSW ? cast(reg_t)preg2 : DX; 3554 } 3555 else 3556 { 3557 lreg = XMM0; 3558 mreg = XMM1; 3559 } 3560 retregs = (mask(mreg) | mask(lreg)) & ~mask(NOREG); 3561 CodeBuilder cdbsave; 3562 cdbsave.ctor(); 3563 if (keepmsk & retregs) 3564 { 3565 regm_t tosave = keepmsk & retregs; 3566 3567 // tosave is the mask to save and restore 3568 for (reg_t j = 0; tosave; j++) 3569 { 3570 regm_t mi = mask(j); 3571 assert(j <= XMM7); 3572 if (mi & tosave) 3573 { 3574 uint idx; 3575 regsave.save(cdbsave, j, &idx); 3576 regsave.restore(cdbrestore, j, idx); 3577 saved |= mi; 3578 keepmsk &= ~mi; // don't need to keep these for rest of params 3579 tosave &= ~mi; 3580 } 3581 } 3582 } 3583 cdb.append(cdbsave); 3584 3585 scodelem(cdb, ep, &retregs, keepmsk, false); 3586 3587 // Move result [mreg,lreg] into parameter registers from [preg2,preg] 3588 retregs = 0; 3589 if (preg != lreg) 3590 retregs |= mask(preg); 3591 if (preg2 != mreg) 3592 retregs |= mask(preg2); 3593 retregs &= ~mask(NOREG); 3594 getregs(cdb,retregs); 3595 3596 tym_t ty1 = tybasic(ep.Ety); 3597 tym_t ty2 = ty1; 3598 if (ep.Ety & mTYgprxmm) 3599 { 3600 ty1 = TYllong; 3601 ty2 = TYdouble; 3602 } 3603 else if (ep.Ety & mTYxmmgpr) 3604 { 3605 ty1 = TYdouble; 3606 ty2 = TYllong; 3607 } 3608 else if (ty1 == TYstruct) 3609 { 3610 type* targ1 = ep.ET.Ttag.Sstruct.Sarg1type; 3611 type* targ2 = ep.ET.Ttag.Sstruct.Sarg2type; 3612 if (targ1) 3613 ty1 = targ1.Tty; 3614 if (targ2) 3615 ty2 = targ2.Tty; 3616 } 3617 else if (tyrelax(ty1) == TYcent) 3618 ty1 = ty2 = TYllong; 3619 else if (tybasic(ty1) == TYcdouble) 3620 ty1 = ty2 = TYdouble; 3621 3622 if (tybasic(ep.Ety) == TYcfloat) 3623 { 3624 assert(I64); 3625 assert(lreg == ST01 && mreg == NOREG); 3626 // spill 3627 pop87(); 3628 pop87(); 3629 cdb.genfltreg(0xD9, 3, tysize(TYfloat)); 3630 genfwait(cdb); 3631 cdb.genfltreg(0xD9, 3, 0); 3632 genfwait(cdb); 3633 // reload 3634 if (config.exe == EX_WIN64) 3635 { 3636 cdb.genfltreg(LOD, preg, 0); 3637 code_orrex(cdb.last(), REX_W); 3638 } 3639 else 3640 { 3641 assert(mask(preg) & XMMREGS); 3642 cdb.genxmmreg(xmmload(TYdouble), cast(reg_t) preg, 0, TYdouble); 3643 } 3644 } 3645 else foreach (v; 0 .. 2) 3646 { 3647 if (v ^ (preg != mreg)) 3648 genmovreg(cdb, preg, lreg, ty1); 3649 else 3650 genmovreg(cdb, preg2, mreg, ty2); 3651 } 3652 3653 retregs = (mask(preg) | mask(preg2)) & ~mask(NOREG); 3654 } 3655 else if (ep.Eoper == OPstrthis) 3656 { 3657 getregs(cdb,retregs); 3658 // LEA preg,np[RSP] 3659 uint delta = stackpush - ep.EV.Vuns; // stack delta to parameter 3660 cdb.genc1(LEA, 3661 (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,delta); 3662 if (I64) 3663 code_orrex(cdb.last(), REX_W); 3664 } 3665 else if (ep.Eoper == OPstrpar && config.exe == EX_WIN64 && type_size(ep.ET) == 0) 3666 { 3667 retregs = 0; 3668 scodelem(cdb, ep.EV.E1, &retregs, keepmsk, false); 3669 freenode(ep); 3670 } 3671 else 3672 { 3673 scodelem(cdb, ep, &retregs, keepmsk, false); 3674 } 3675 keepmsk |= retregs; // don't change preg when evaluating func address 3676 } 3677 } 3678 3679 if (config.exe == EX_WIN64) 3680 { // Allocate stack space for four entries anyway 3681 // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29 3682 { uint sz = 4 * REGSIZE; 3683 if (usefuncarg) 3684 { 3685 funcargtos -= sz; 3686 cgstate.funcargtos = funcargtos; 3687 } 3688 else 3689 { 3690 cod3_stackadj(cdb, sz); 3691 cdb.genadjesp(sz); 3692 stackpush += sz; 3693 } 3694 } 3695 3696 /* Variadic functions store XMM parameters into their corresponding GP registers 3697 */ 3698 for (int i = 0; i < np; i++) 3699 { 3700 int preg = parameters[i].reg; 3701 regm_t retregs = mask(preg); 3702 if (retregs & XMMREGS) 3703 { 3704 reg_t reg; 3705 switch (preg) 3706 { 3707 case XMM0: reg = CX; break; 3708 case XMM1: reg = DX; break; 3709 case XMM2: reg = R8; break; 3710 case XMM3: reg = R9; break; 3711 3712 default: assert(0); 3713 } 3714 getregs(cdb,mask(reg)); 3715 cdb.gen2(STOD,(REX_W << 16) | modregxrmx(3,preg-XMM0,reg)); // MOVD reg,preg 3716 } 3717 } 3718 } 3719 3720 // Restore any register parameters we saved 3721 getregs(cdb,saved); 3722 cdb.append(cdbrestore); 3723 keepmsk |= saved; 3724 3725 // Variadic functions store the number of XMM registers used in AL 3726 if (I64 && config.exe != EX_WIN64 && e.Eflags & EFLAGS_variadic) 3727 { 3728 getregs(cdb,mAX); 3729 movregconst(cdb,AX,xmmcnt,1); 3730 keepmsk |= mAX; 3731 } 3732 3733 //printf("funcargtos2 = %d\n", cast(int)funcargtos); 3734 assert(!usefuncarg || (funcargtos == 0 && cgstate.funcargtos == 0)); 3735 cgstate.stackclean--; 3736 3737 debug 3738 if (!usefuncarg && numpara != stackpush - stackpushsave) 3739 { 3740 printf("function %s\n", funcsym_p.Sident.ptr); 3741 printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave); 3742 elem_print(e); 3743 } 3744 3745 assert(usefuncarg || numpara == stackpush - stackpushsave); 3746 3747 funccall(cdb,e,numpara,numalign,pretregs,keepmsk,usefuncarg); 3748 cgstate.funcargtos = funcargtossave; 3749 } 3750 3751 /*********************************** 3752 */ 3753 3754 @trusted 3755 void cdstrthis(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3756 { 3757 assert(tysize(e.Ety) == REGSIZE); 3758 const reg = findreg(*pretregs & allregs); 3759 getregs(cdb,mask(reg)); 3760 // LEA reg,np[ESP] 3761 uint np = stackpush - e.EV.Vuns; // stack delta to parameter 3762 cdb.genc1(LEA,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np); 3763 if (I64) 3764 code_orrex(cdb.last(), REX_W); 3765 fixresult(cdb, e, mask(reg), pretregs); 3766 } 3767 3768 /****************************** 3769 * Call function. All parameters have already been pushed onto the stack. 3770 * Params: 3771 * e = function call 3772 * numpara = size in bytes of all the parameters 3773 * numalign = amount the stack was aligned by before the parameters were pushed 3774 * pretregs = where return value goes 3775 * keepmsk = registers to not change when evaluating the function address 3776 * usefuncarg = using cgstate.funcarg, so no need to adjust stack after func return 3777 */ 3778 3779 @trusted 3780 private void funccall(ref CodeBuilder cdb, elem* e, uint numpara, uint numalign, 3781 regm_t* pretregs,regm_t keepmsk, bool usefuncarg) 3782 { 3783 //printf("funccall(e = %p, *pretregs = %s, numpara = %d, numalign = %d, usefuncarg=%d)\n",e,regm_str(*pretregs),numpara,numalign,usefuncarg); 3784 //printf(" from %s\n", funcsym_p.Sident.ptr); 3785 //elem_print(e); 3786 calledafunc = 1; 3787 // Determine if we need frame for function prolog/epilog 3788 3789 if (config.memmodel == Vmodel) 3790 { 3791 if (tyfarfunc(funcsym_p.ty())) 3792 needframe = true; 3793 } 3794 3795 code cs; 3796 regm_t retregs; 3797 Symbol* s; 3798 3799 elem* e1 = e.EV.E1; 3800 tym_t tym1 = tybasic(e1.Ety); 3801 char farfunc = tyfarfunc(tym1) || tym1 == TYifunc; 3802 3803 CodeBuilder cdbe; 3804 cdbe.ctor(); 3805 3806 if (e1.Eoper == OPvar) 3807 { // Call function directly 3808 3809 if (!tyfunc(tym1)) 3810 printf("%s\n", tym_str(tym1)); 3811 assert(tyfunc(tym1)); 3812 s = e1.EV.Vsym; 3813 if (s.Sflags & SFLexit) 3814 { } 3815 else if (s != tls_get_addr_sym) 3816 save87(cdb); // assume 8087 regs are all trashed 3817 3818 // Function calls may throw Errors, unless marked that they don't 3819 if (s == funcsym_p || !s.Sfunc || !(s.Sfunc.Fflags3 & Fnothrow)) 3820 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3821 3822 if (s.Sflags & SFLexit) 3823 { 3824 // Function doesn't return, so don't worry about registers 3825 // it may use 3826 } 3827 else if (!tyfunc(s.ty()) || !(config.flags4 & CFG4optimized)) 3828 // so we can replace func at runtime 3829 getregs(cdbe,~fregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3830 else 3831 getregs(cdbe,~s.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3832 if (strcmp(s.Sident.ptr, "alloca") == 0) 3833 { 3834 s = getRtlsym(RTLSYM.ALLOCA); 3835 makeitextern(s); 3836 int areg = CX; 3837 if (config.exe == EX_WIN64) 3838 areg = DX; 3839 getregs(cdbe, mask(areg)); 3840 cdbe.genc(LEA, modregrm(2, areg, BPRM), FLallocatmp, 0, 0, 0); // LEA areg,&localsize[BP] 3841 if (I64) 3842 code_orrex(cdbe.last(), REX_W); 3843 Alloca.size = REGSIZE; 3844 } 3845 if (sytab[s.Sclass] & SCSS) // if function is on stack (!) 3846 { 3847 retregs = allregs & ~keepmsk; 3848 s.Sflags &= ~GTregcand; 3849 s.Sflags |= SFLread; 3850 cdrelconst(cdbe,e1,&retregs); 3851 if (farfunc) 3852 { 3853 const reg = findregmsw(retregs); 3854 const lsreg = findreglsw(retregs); 3855 floatreg = true; // use float register 3856 reflocal = true; 3857 cdbe.genc1(0x89, // MOV floatreg+2,reg 3858 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3859 cdbe.genc1(0x89, // MOV floatreg,lsreg 3860 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3861 if (tym1 == TYifunc) 3862 cdbe.gen1(0x9C); // PUSHF 3863 cdbe.genc1(0xFF, // CALL [floatreg] 3864 modregrm(2, 3, BPRM), FLfltreg, 0); 3865 } 3866 else 3867 { 3868 const reg = findreg(retregs); 3869 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 3870 if (I64) 3871 code_orrex(cdbe.last(), REX_W); 3872 } 3873 } 3874 else 3875 { 3876 FL fl = FLfunc; 3877 if (!tyfunc(s.ty())) 3878 fl = el_fl(e1); 3879 if (tym1 == TYifunc) 3880 cdbe.gen1(0x9C); // PUSHF 3881 if (config.exe & (EX_windos | EX_OSX | EX_OSX64)) 3882 { 3883 cdbe.gencs(farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern 3884 } 3885 else 3886 { 3887 assert(!farfunc); 3888 if (s != tls_get_addr_sym) 3889 { 3890 //printf("call %s\n", s.Sident.ptr); 3891 load_localgot(cdb); 3892 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3893 } 3894 else if (I64) 3895 { 3896 /* Prepend 66 66 48 so GNU linker has patch room 3897 */ 3898 assert(!farfunc); 3899 cdbe.gen1(0x66); 3900 cdbe.gen1(0x66); 3901 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3902 cdbe.last().Irex = REX | REX_W; 3903 } 3904 else 3905 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3906 } 3907 code_orflag(cdbe.last(), farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff)); 3908 } 3909 } 3910 else 3911 { // Call function via pointer 3912 3913 // Function calls may throw Errors 3914 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3915 3916 if (e1.Eoper != OPind) { WRFL(el_fl(e1)); printf("e1.Eoper: %s\n", oper_str(e1.Eoper)); } 3917 save87(cdb); // assume 8087 regs are all trashed 3918 assert(e1.Eoper == OPind); 3919 elem *e11 = e1.EV.E1; 3920 tym_t e11ty = tybasic(e11.Ety); 3921 assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); 3922 load_localgot(cdb); 3923 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // 32 bit only 3924 { 3925 if (config.flags3 & CFG3pic) 3926 keepmsk |= mBX; 3927 } 3928 3929 /* Mask of registers destroyed by the function call 3930 */ 3931 regm_t desmsk = (mBP | ALLREGS | mES | XMMREGS) & ~fregsaved; 3932 3933 // if we can't use loadea() 3934 if ((!OTleaf(e11.Eoper) || e11.Eoper == OPconst) && 3935 (e11.Eoper != OPind || e11.Ecount)) 3936 { 3937 retregs = allregs & ~keepmsk; 3938 cgstate.stackclean++; 3939 scodelem(cdbe,e11,&retregs,keepmsk,true); 3940 cgstate.stackclean--; 3941 // Kill registers destroyed by an arbitrary function call 3942 getregs(cdbe,desmsk); 3943 if (e11ty == TYfptr) 3944 { 3945 const reg = findregmsw(retregs); 3946 const lsreg = findreglsw(retregs); 3947 floatreg = true; // use float register 3948 reflocal = true; 3949 cdbe.genc1(0x89, // MOV floatreg+2,reg 3950 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3951 cdbe.genc1(0x89, // MOV floatreg,lsreg 3952 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3953 if (tym1 == TYifunc) 3954 cdbe.gen1(0x9C); // PUSHF 3955 cdbe.genc1(0xFF, // CALL [floatreg] 3956 modregrm(2, 3, BPRM), FLfltreg, 0); 3957 } 3958 else 3959 { 3960 const reg = findreg(retregs); 3961 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 3962 if (I64) 3963 code_orrex(cdbe.last(), REX_W); 3964 } 3965 } 3966 else 3967 { 3968 if (tym1 == TYifunc) 3969 cdb.gen1(0x9C); // PUSHF 3970 // CALL [function] 3971 cs.Iflags = 0; 3972 cgstate.stackclean++; 3973 loadea(cdbe, e11, &cs, 0xFF, farfunc ? 3 : 2, 0, keepmsk, desmsk); 3974 cgstate.stackclean--; 3975 freenode(e11); 3976 } 3977 s = null; 3978 } 3979 cdb.append(cdbe); 3980 freenode(e1); 3981 3982 /* See if we will need the frame pointer. 3983 Calculate it here so we can possibly use BP to fix the stack. 3984 */ 3985 static if (0) 3986 { 3987 if (!needframe) 3988 { 3989 // If there is a register available for this basic block 3990 if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used)) 3991 { } 3992 else 3993 { 3994 for (SYMIDX si = 0; si < globsym.length; si++) 3995 { 3996 Symbol* s = globsym[si]; 3997 3998 if (s.Sflags & GTregcand && type_size(s.Stype) != 0) 3999 { 4000 if (config.flags4 & CFG4optimized) 4001 { // If symbol is live in this basic block and 4002 // isn't already in a register 4003 if (s.Srange && vec_testbit(dfoidx, s.Srange) && 4004 s.Sfl != FLreg) 4005 { // Then symbol must be allocated on stack 4006 needframe = true; 4007 break; 4008 } 4009 } 4010 else 4011 { if (mfuncreg == 0) // if no registers left 4012 { needframe = true; 4013 break; 4014 } 4015 } 4016 } 4017 } 4018 } 4019 } 4020 } 4021 4022 reg_t reg1, reg2; 4023 retregs = allocretregs(e.Ety, e.ET, tym1, reg1, reg2); 4024 4025 assert(retregs || !*pretregs); 4026 4027 if (!usefuncarg) 4028 { 4029 // If stack needs cleanup 4030 if (s && s.Sflags & SFLexit) 4031 { 4032 if (config.fulltypes && TARGET_WINDOS) 4033 { 4034 // the stack walker evaluates the return address, not a byte of the 4035 // call instruction, so ensure there is an instruction byte after 4036 // the call that still has the same line number information 4037 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); 4038 } 4039 /* Function never returns, so don't need to generate stack 4040 * cleanup code. But still need to log the stack cleanup 4041 * as if it did return. 4042 */ 4043 cdb.genadjesp(-(numpara + numalign)); 4044 stackpush -= numpara + numalign; 4045 } 4046 else if ((OTbinary(e.Eoper) || config.exe == EX_WIN64) && 4047 (!typfunc(tym1) || config.exe == EX_WIN64)) 4048 { 4049 if (tym1 == TYhfunc) 4050 { // Hidden parameter is popped off by the callee 4051 cdb.genadjesp(-REGSIZE); 4052 stackpush -= REGSIZE; 4053 if (numpara + numalign > REGSIZE) 4054 genstackclean(cdb, numpara + numalign - REGSIZE, retregs); 4055 } 4056 else 4057 genstackclean(cdb, numpara + numalign, retregs); 4058 } 4059 else 4060 { 4061 cdb.genadjesp(-numpara); // popped off by the callee's 'RET numpara' 4062 stackpush -= numpara; 4063 if (numalign) // callee doesn't know about alignment adjustment 4064 genstackclean(cdb,numalign,retregs); 4065 } 4066 } 4067 4068 /* Special handling for functions which return a floating point 4069 value in the top of the 8087 stack. 4070 */ 4071 4072 if (retregs & mST0) 4073 { 4074 cdb.genadjfpu(1); 4075 if (*pretregs) // if we want the result 4076 { 4077 //assert(global87.stackused == 0); 4078 push87(cdb); // one item on 8087 stack 4079 fixresult87(cdb,e,retregs,pretregs); 4080 return; 4081 } 4082 else 4083 // Pop unused result off 8087 stack 4084 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4085 } 4086 else if (retregs & mST01) 4087 { 4088 cdb.genadjfpu(2); 4089 if (*pretregs) // if we want the result 4090 { 4091 assert(global87.stackused == 0); 4092 push87(cdb); 4093 push87(cdb); // two items on 8087 stack 4094 fixresult_complex87(cdb, e, retregs, pretregs, true); 4095 return; 4096 } 4097 else 4098 { 4099 // Pop unused result off 8087 stack 4100 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4101 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4102 } 4103 } 4104 4105 /* Special handling for functions that return one part 4106 in XMM0 and the other part in AX 4107 */ 4108 if (*pretregs && retregs) 4109 { 4110 if (reg1 == NOREG || reg2 == NOREG) 4111 {} 4112 else if ((0 == (mask(reg1) & XMMREGS)) ^ (0 == (mask(reg2) & XMMREGS))) 4113 { 4114 reg_t lreg, mreg; 4115 if (mask(reg1) & XMMREGS) 4116 { 4117 lreg = XMM0; 4118 mreg = XMM1; 4119 } 4120 else 4121 { 4122 lreg = mask(reg1) & mLSW ? reg1 : AX; 4123 mreg = mask(reg2) & mMSW ? reg2 : DX; 4124 } 4125 for (int v = 0; v < 2; v++) 4126 { 4127 if (v ^ (reg2 != lreg)) 4128 genmovreg(cdb,lreg,reg1); 4129 else 4130 genmovreg(cdb,mreg,reg2); 4131 } 4132 retregs = mask(lreg) | mask(mreg); 4133 } 4134 } 4135 4136 /* Special handling for functions which return complex float in XMM0 or RAX. */ 4137 4138 if (I64 4139 && config.exe != EX_WIN64 // broken 4140 && *pretregs && tybasic(e.Ety) == TYcfloat) 4141 { 4142 assert(reg2 == NOREG); 4143 // spill 4144 if (config.exe == EX_WIN64) 4145 { 4146 assert(reg1 == AX); 4147 cdb.genfltreg(STO, reg1, 0); 4148 code_orrex(cdb.last(), REX_W); 4149 } 4150 else 4151 { 4152 assert(reg1 == XMM0); 4153 cdb.genxmmreg(xmmstore(TYdouble), reg1, 0, TYdouble); 4154 } 4155 // reload real 4156 push87(cdb); 4157 cdb.genfltreg(0xD9, 0, 0); 4158 genfwait(cdb); 4159 // reload imaginary 4160 push87(cdb); 4161 cdb.genfltreg(0xD9, 0, tysize(TYfloat)); 4162 genfwait(cdb); 4163 4164 retregs = mST01; 4165 } 4166 4167 fixresult(cdb, e, retregs, pretregs); 4168 } 4169 4170 /*************************** 4171 * Determine size of argument e that will be pushed. 4172 */ 4173 4174 @trusted 4175 targ_size_t paramsize(elem* e, tym_t tyf) 4176 { 4177 assert(e.Eoper != OPparam); 4178 targ_size_t szb; 4179 tym_t tym = tybasic(e.Ety); 4180 if (tyscalar(tym)) 4181 szb = size(tym); 4182 else if (tym == TYstruct || tym == TYarray) 4183 szb = type_parameterSize(e.ET, tyf); 4184 else 4185 { 4186 printf("%s\n", tym_str(tym)); 4187 assert(0); 4188 } 4189 return szb; 4190 } 4191 4192 /*************************** 4193 * Generate code to move argument e on the stack. 4194 */ 4195 4196 @trusted 4197 private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funcargtos, tym_t tyf) 4198 { 4199 //printf("movParams(e = %p, stackalign = %d, funcargtos = %d)\n", e, stackalign, funcargtos); 4200 //printf("movParams()\n"); elem_print(e); 4201 assert(!I16); 4202 assert(e && e.Eoper != OPparam); 4203 4204 tym_t tym = tybasic(e.Ety); 4205 if (tyfloating(tym)) 4206 objmod.fltused(); 4207 4208 int grex = I64 ? REX_W << 16 : 0; 4209 4210 targ_size_t szb = paramsize(e, tyf); // size before alignment 4211 targ_size_t sz = _align(stackalign, szb); // size after alignment 4212 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 4213 assert((sz & (REGSIZE - 1)) == 0); 4214 //printf("szb = %d sz = %d\n", cast(int)szb, cast(int)sz); 4215 4216 code cs; 4217 cs.Iflags = 0; 4218 cs.Irex = 0; 4219 switch (e.Eoper) 4220 { 4221 case OPstrctor: 4222 case OPstrthis: 4223 case OPstrpar: 4224 case OPnp_fp: 4225 assert(0); 4226 4227 case OPrelconst: 4228 { 4229 int fl; 4230 if (!evalinregister(e) && 4231 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 4232 ((fl = el_fl(e)) == FLdata || fl == FLudata || fl == FLextern) 4233 ) 4234 { 4235 // MOV -stackoffset[EBP],&variable 4236 cs.Iop = 0xC7; 4237 cs.Irm = modregrm(2,0,BPRM); 4238 if (I64 && sz == 8) 4239 cs.Irex |= REX_W; 4240 cs.IFL1 = FLfuncarg; 4241 cs.IEV1.Voffset = funcargtos - REGSIZE; 4242 cs.IEV2.Voffset = e.EV.Voffset; 4243 cs.IFL2 = cast(ubyte)fl; 4244 cs.IEV2.Vsym = e.EV.Vsym; 4245 cs.Iflags |= CFoff; 4246 cdb.gen(&cs); 4247 return; 4248 } 4249 break; 4250 } 4251 4252 case OPconst: 4253 if (!evalinregister(e)) 4254 { 4255 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 4256 cs.Irm = modregrm(2,0,BPRM); 4257 cs.IFL1 = FLfuncarg; 4258 cs.IEV1.Voffset = funcargtos - sz; 4259 cs.IFL2 = FLconst; 4260 targ_size_t *p = cast(targ_size_t *) &(e.EV); 4261 cs.IEV2.Vsize_t = *p; 4262 if (I64 && tym == TYcldouble) 4263 // The alignment of EV.Vcldouble is not the same on the compiler 4264 // as on the target 4265 goto Lbreak; 4266 if (I64 && sz >= 8) 4267 { 4268 int i = cast(int)sz; 4269 do 4270 { 4271 if (*p >= 0x80000000) 4272 { // Use 64 bit register MOV, as the 32 bit one gets sign extended 4273 // MOV reg,imm64 4274 // MOV EA,reg 4275 goto Lbreak; 4276 } 4277 p = cast(targ_size_t *)(cast(char *) p + REGSIZE); 4278 i -= REGSIZE; 4279 } while (i > 0); 4280 p = cast(targ_size_t *) &(e.EV); 4281 } 4282 4283 int i = cast(int)sz; 4284 do 4285 { int regsize = REGSIZE; 4286 regm_t retregs = (sz == 1) ? BYTEREGS : allregs; 4287 reg_t reg; 4288 if (reghasvalue(retregs,*p,reg)) 4289 { 4290 cs.Iop = (cs.Iop & 1) | 0x88; 4291 cs.Irm |= modregrm(0, reg & 7, 0); // MOV EA,reg 4292 if (reg & 8) 4293 cs.Irex |= REX_R; 4294 if (I64 && sz == 1 && reg >= 4) 4295 cs.Irex |= REX; 4296 } 4297 if (I64 && sz >= 8) 4298 cs.Irex |= REX_W; 4299 cdb.gen(&cs); // MOV EA,const 4300 4301 p = cast(targ_size_t *)(cast(char *) p + regsize); 4302 cs.Iop = 0xC7; 4303 cs.Irm &= cast(ubyte)~cast(int)modregrm(0, 7, 0); 4304 cs.Irex &= ~REX_R; 4305 cs.IEV1.Voffset += regsize; 4306 cs.IEV2.Vint = cast(targ_int)*p; 4307 i -= regsize; 4308 } while (i > 0); 4309 return; 4310 } 4311 4312 Lbreak: 4313 break; 4314 4315 default: 4316 break; 4317 } 4318 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 4319 if (tyvector(tym) || 4320 config.fpxmmregs && tyxmmreg(tym) && 4321 // If not already in x87 register from function call return 4322 !((e.Eoper == OPcall || e.Eoper == OPucall) && I32)) 4323 { 4324 retregs = XMMREGS; 4325 codelem(cdb, e, &retregs, false); 4326 const op = xmmstore(tym); 4327 const r = findreg(retregs); 4328 cdb.genc1(op, modregxrm(2, r - XMM0, BPRM), FLfuncarg, funcargtos - sz); // MOV funcarg[EBP],r 4329 checkSetVex(cdb.last(),tym); 4330 return; 4331 } 4332 else if (tyfloating(tym)) 4333 { 4334 if (config.inline8087) 4335 { 4336 retregs = tycomplex(tym) ? mST01 : mST0; 4337 codelem(cdb, e, &retregs, false); 4338 4339 opcode_t op; 4340 uint r; 4341 switch (tym) 4342 { 4343 case TYfloat: 4344 case TYifloat: 4345 case TYcfloat: 4346 op = 0xD9; 4347 r = 3; 4348 break; 4349 4350 case TYdouble: 4351 case TYidouble: 4352 case TYdouble_alias: 4353 case TYcdouble: 4354 op = 0xDD; 4355 r = 3; 4356 break; 4357 4358 case TYldouble: 4359 case TYildouble: 4360 case TYcldouble: 4361 op = 0xDB; 4362 r = 7; 4363 break; 4364 4365 default: 4366 assert(0); 4367 } 4368 if (tycomplex(tym)) 4369 { 4370 // FSTP sz/2[ESP] 4371 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz/2); 4372 pop87(); 4373 } 4374 pop87(); 4375 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz); // FSTP -sz[EBP] 4376 return; 4377 } 4378 } 4379 scodelem(cdb, e, &retregs, 0, true); 4380 if (sz <= REGSIZE) 4381 { 4382 uint r = findreg(retregs); 4383 cdb.genc1(0x89, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4384 if (sz == 8) 4385 code_orrex(cdb.last(), REX_W); 4386 } 4387 else if (sz == REGSIZE * 2) 4388 { 4389 uint r = findregmsw(retregs); 4390 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4391 r = findreglsw(retregs); 4392 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE * 2); // MOV -2*REGSIZE[EBP],r 4393 } 4394 else 4395 assert(0); 4396 } 4397 4398 4399 /*************************** 4400 * Generate code to push argument e on the stack. 4401 * stackpush is incremented by stackalign for each PUSH. 4402 */ 4403 4404 @trusted 4405 void pushParams(ref CodeBuilder cdb, elem* e, uint stackalign, tym_t tyf) 4406 { 4407 //printf("params(e = %p, stackalign = %d)\n", e, stackalign); 4408 //printf("params()\n"); elem_print(e); 4409 stackchanged = 1; 4410 assert(e && e.Eoper != OPparam); 4411 4412 tym_t tym = tybasic(e.Ety); 4413 if (tyfloating(tym)) 4414 objmod.fltused(); 4415 4416 int grex = I64 ? REX_W << 16 : 0; 4417 4418 targ_size_t szb = paramsize(e, tyf); // size before alignment 4419 targ_size_t sz = _align(stackalign,szb); // size after alignment 4420 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 4421 assert((sz & (REGSIZE - 1)) == 0); 4422 4423 switch (e.Eoper) 4424 { 4425 case OPstrpar: 4426 { 4427 uint rm; 4428 4429 elem* e1 = e.EV.E1; 4430 if (sz == 0) 4431 { 4432 docommas(cdb, e1); // skip over any commas 4433 4434 const stackpushsave = stackpush; 4435 const stackcleansave = cgstate.stackclean; 4436 cgstate.stackclean = 0; 4437 4438 regm_t retregs = 0; 4439 codelem(cdb,e1,&retregs,true); 4440 4441 assert(cgstate.stackclean == 0); 4442 cgstate.stackclean = stackcleansave; 4443 genstackclean(cdb,stackpush - stackpushsave,0); 4444 4445 freenode(e); 4446 return; 4447 } 4448 if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1.Eoper == OPvar) 4449 { 4450 freenode(e); 4451 e = e1; 4452 goto L1; 4453 } 4454 docommas(cdb, e1); // skip over any commas 4455 code_flags_t seg = 0; // assume no seg override 4456 regm_t retregs = sz ? IDXREGS : 0; 4457 bool doneoff = false; 4458 uint pushsize = REGSIZE; 4459 uint op16 = 0; 4460 if (!I16 && sz & 2) // if odd number of words to push 4461 { 4462 pushsize = 2; 4463 op16 = 1; 4464 } 4465 else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0) 4466 { 4467 pushsize = 4; // push DWORDs at a time 4468 op16 = 1; 4469 } 4470 uint npushes = cast(uint)(sz / pushsize); 4471 switch (e1.Eoper) 4472 { 4473 case OPind: 4474 if (sz) 4475 { 4476 switch (tybasic(e1.EV.E1.Ety)) 4477 { 4478 case TYfptr: 4479 case TYhptr: 4480 seg = CFes; 4481 retregs |= mES; 4482 break; 4483 4484 case TYsptr: 4485 if (config.wflags & WFssneds) 4486 seg = CFss; 4487 break; 4488 4489 case TYfgPtr: 4490 if (I32) 4491 seg = CFgs; 4492 else if (I64) 4493 seg = CFfs; 4494 else 4495 assert(0); 4496 break; 4497 4498 case TYcptr: 4499 seg = CFcs; 4500 break; 4501 4502 default: 4503 break; 4504 } 4505 } 4506 codelem(cdb, e1.EV.E1, &retregs, false); 4507 freenode(e1); 4508 break; 4509 4510 case OPvar: 4511 /* Symbol is no longer a candidate for a register */ 4512 e1.EV.Vsym.Sflags &= ~GTregcand; 4513 4514 if (!e1.Ecount && npushes > 4) 4515 { 4516 /* Kludge to point at last word in struct. */ 4517 /* Don't screw up CSEs. */ 4518 e1.EV.Voffset += sz - pushsize; 4519 doneoff = true; 4520 } 4521 //if (LARGEDATA) /* if default isn't DS */ 4522 { 4523 static immutable uint[4] segtocf = [ CFes,CFcs,CFss,0 ]; 4524 4525 int fl = el_fl(e1); 4526 if (fl == FLfardata) 4527 { 4528 seg = CFes; 4529 retregs |= mES; 4530 } 4531 else 4532 { 4533 uint s = segfl[fl]; 4534 assert(s < 4); 4535 seg = segtocf[s]; 4536 if (seg == CFss && !(config.wflags & WFssneds)) 4537 seg = 0; 4538 } 4539 } 4540 if (e1.Ety & mTYfar) 4541 { 4542 seg = CFes; 4543 retregs |= mES; 4544 } 4545 cdrelconst(cdb, e1, &retregs); 4546 // Reverse the effect of the previous add 4547 if (doneoff) 4548 e1.EV.Voffset -= sz - pushsize; 4549 freenode(e1); 4550 break; 4551 4552 case OPstreq: 4553 //case OPcond: 4554 if (config.exe & EX_segmented) 4555 { 4556 seg = CFes; 4557 retregs |= mES; 4558 } 4559 codelem(cdb, e1, &retregs, false); 4560 break; 4561 4562 case OPpair: 4563 case OPrpair: 4564 pushParams(cdb, e1, stackalign, tyf); 4565 freenode(e); 4566 return; 4567 4568 default: 4569 elem_print(e1); 4570 assert(0); 4571 } 4572 reg_t reg = findreglsw(retregs); 4573 rm = I16 ? regtorm[reg] : regtorm32[reg]; 4574 if (op16) 4575 seg |= CFopsize; // operand size 4576 if (npushes <= 4) 4577 { 4578 assert(!doneoff); 4579 for (; npushes > 1; --npushes) 4580 { 4581 cdb.genc1(0xFF, buildModregrm(2, 6, rm), FLconst, pushsize * (npushes - 1)); // PUSH [reg] 4582 code_orflag(cdb.last(),seg); 4583 cdb.genadjesp(pushsize); 4584 } 4585 cdb.gen2(0xFF,buildModregrm(0, 6, rm)); // PUSH [reg] 4586 cdb.last().Iflags |= seg; 4587 cdb.genadjesp(pushsize); 4588 } 4589 else if (sz) 4590 { 4591 getregs_imm(cdb, mCX | retregs); 4592 // MOV CX,sz/2 4593 movregconst(cdb, CX, npushes, 0); 4594 if (!doneoff) 4595 { // This should be done when 4596 // reg is loaded. Fix later 4597 // ADD reg,sz-pushsize 4598 cdb.genc2(0x81, grex | modregrmx(3, 0, reg), sz-pushsize); 4599 } 4600 getregs(cdb,mCX); // the LOOP decrements it 4601 cdb.gen2(0xFF, buildModregrm(0, 6, rm)); // PUSH [reg] 4602 cdb.last().Iflags |= seg | CFtarg2; 4603 code* c3 = cdb.last(); 4604 cdb.genc2(0x81,grex | buildModregrm(3, 5,reg), pushsize); // SUB reg,pushsize 4605 if (I16 || config.flags4 & CFG4space) 4606 genjmp(cdb,0xE2,FLcode,cast(block *)c3);// LOOP c3 4607 else 4608 { 4609 if (I64) 4610 cdb.gen2(0xFF, modregrm(3, 1, CX));// DEC CX 4611 else 4612 cdb.gen1(0x48 + CX); // DEC CX 4613 genjmp(cdb, JNE, FLcode, cast(block *)c3); // JNE c3 4614 } 4615 regimmed_set(CX,0); 4616 cdb.genadjesp(cast(int)sz); 4617 } 4618 stackpush += sz; 4619 freenode(e); 4620 return; 4621 } 4622 4623 case OPind: 4624 if (!e.Ecount) /* if *e1 */ 4625 { 4626 if (sz < REGSIZE) 4627 { 4628 /* Don't push REGSIZE quantity because it may 4629 * straddle past the end of valid memory 4630 */ 4631 break; 4632 } 4633 if (sz == REGSIZE) 4634 goto case OPvar; // handle it with loadea() 4635 4636 // Avoid PUSH MEM on the Pentium when optimizing for speed 4637 if (config.flags4 & CFG4speed && 4638 (config.target_cpu >= TARGET_80486 && 4639 config.target_cpu <= TARGET_PentiumMMX) && 4640 sz <= 2 * REGSIZE && 4641 !tyfloating(tym)) 4642 break; 4643 4644 if (tym == TYldouble || tym == TYildouble || tycomplex(tym)) 4645 break; 4646 4647 code cs; 4648 cs.Iflags = 0; 4649 cs.Irex = 0; 4650 if (I32) 4651 { 4652 assert(sz >= REGSIZE * 2); 4653 loadea(cdb, e, &cs, 0xFF, 6, sz - REGSIZE, 0, 0); // PUSH EA+4 4654 cdb.genadjesp(REGSIZE); 4655 stackpush += REGSIZE; 4656 sz -= REGSIZE; 4657 4658 if (sz > REGSIZE) 4659 { 4660 while (sz) 4661 { 4662 cs.IEV1.Voffset -= REGSIZE; 4663 cdb.gen(&cs); // PUSH EA+... 4664 cdb.genadjesp(REGSIZE); 4665 stackpush += REGSIZE; 4666 sz -= REGSIZE; 4667 } 4668 freenode(e); 4669 return; 4670 } 4671 } 4672 else 4673 { 4674 if (sz == DOUBLESIZE) 4675 { 4676 loadea(cdb, e, &cs, 0xFF, 6, DOUBLESIZE - REGSIZE, 0, 0); // PUSH EA+6 4677 cs.IEV1.Voffset -= REGSIZE; 4678 cdb.gen(&cs); // PUSH EA+4 4679 cdb.genadjesp(REGSIZE); 4680 getlvalue_lsw(&cs); 4681 cdb.gen(&cs); // PUSH EA+2 4682 } 4683 else /* TYlong */ 4684 loadea(cdb, e, &cs, 0xFF, 6, REGSIZE, 0, 0); // PUSH EA+2 4685 cdb.genadjesp(REGSIZE); 4686 } 4687 stackpush += sz; 4688 getlvalue_lsw(&cs); 4689 cdb.gen(&cs); // PUSH EA 4690 cdb.genadjesp(REGSIZE); 4691 freenode(e); 4692 return; 4693 } 4694 break; 4695 4696 case OPnp_fp: 4697 if (!e.Ecount) /* if (far *)e1 */ 4698 { 4699 elem* e1 = e.EV.E1; 4700 tym_t tym1 = tybasic(e1.Ety); 4701 /* BUG: what about pointers to functions? */ 4702 int segreg; 4703 switch (tym1) 4704 { 4705 case TYnptr: segreg = 3<<3; break; 4706 case TYcptr: segreg = 1<<3; break; 4707 default: segreg = 2<<3; break; 4708 } 4709 if (I32 && stackalign == 2) 4710 cdb.gen1(0x66); // push a word 4711 cdb.gen1(0x06 + segreg); // PUSH SEGREG 4712 if (I32 && stackalign == 2) 4713 code_orflag(cdb.last(), CFopsize); // push a word 4714 cdb.genadjesp(stackalign); 4715 stackpush += stackalign; 4716 pushParams(cdb, e1, stackalign, tyf); 4717 freenode(e); 4718 return; 4719 } 4720 break; 4721 4722 case OPrelconst: 4723 if (config.exe & EX_segmented) 4724 { 4725 /* Determine if we can just push the segment register */ 4726 /* Test size of type rather than TYfptr because of (long)(&v) */ 4727 Symbol* s = e.EV.Vsym; 4728 //if (sytab[s.Sclass] & SCSS && !I32) // if variable is on stack 4729 // needframe = true; // then we need stack frame 4730 int fl; 4731 if (_tysize[tym] == tysize(TYfptr) && 4732 (fl = s.Sfl) != FLfardata && 4733 /* not a function that CS might not be the segment of */ 4734 (!((fl == FLfunc || s.ty() & mTYcs) && 4735 (s.Sclass == SC.comdat || s.Sclass == SC.extern_ || 4736 s.Sclass == SC.inline || config.wflags & WFthunk)) || 4737 (fl == FLfunc && config.exe == EX_DOSX) 4738 ) 4739 ) 4740 { 4741 stackpush += sz; 4742 cdb.gen1(0x06 + // PUSH SEGREG 4743 (((fl == FLfunc || s.ty() & mTYcs) ? 1 : segfl[fl]) << 3)); 4744 cdb.genadjesp(REGSIZE); 4745 4746 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4747 { 4748 getoffset(cdb, e, STACK); 4749 freenode(e); 4750 return; 4751 } 4752 else 4753 { 4754 regm_t retregs; 4755 offsetinreg(cdb, e, &retregs); 4756 const reg = findreg(retregs); 4757 genpush(cdb,reg); // PUSH reg 4758 cdb.genadjesp(REGSIZE); 4759 } 4760 return; 4761 } 4762 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4763 { 4764 stackpush += sz; 4765 if (_tysize[tym] == tysize(TYfptr)) 4766 { 4767 // PUSH SEG e 4768 cdb.gencs(0x68,0,FLextern,s); 4769 cdb.last().Iflags = CFseg; 4770 cdb.genadjesp(REGSIZE); 4771 } 4772 getoffset(cdb, e, STACK); 4773 freenode(e); 4774 return; 4775 } 4776 } 4777 break; /* else must evaluate expression */ 4778 4779 case OPvar: 4780 L1: 4781 if (config.flags4 & CFG4speed && 4782 (config.target_cpu >= TARGET_80486 && 4783 config.target_cpu <= TARGET_PentiumMMX) && 4784 sz <= 2 * REGSIZE && 4785 !tyfloating(tym)) 4786 { // Avoid PUSH MEM on the Pentium when optimizing for speed 4787 break; 4788 } 4789 else if (movOnly(e) || (tyxmmreg(tym) && config.fpxmmregs) || tyvector(tym)) 4790 break; // no PUSH MEM 4791 else 4792 { 4793 int regsize = REGSIZE; 4794 uint flag = 0; 4795 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 && 4796 !e.Ecount) 4797 { 4798 regsize = 4; 4799 flag |= CFopsize; 4800 } 4801 code cs; 4802 cs.Iflags = 0; 4803 cs.Irex = 0; 4804 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); // PUSH EA+sz-2 4805 code_orflag(cdb.last(), flag); 4806 cdb.genadjesp(REGSIZE); 4807 stackpush += sz; 4808 while (cast(targ_int)(sz -= regsize) > 0) 4809 { 4810 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); 4811 code_orflag(cdb.last(), flag); 4812 cdb.genadjesp(REGSIZE); 4813 } 4814 freenode(e); 4815 return; 4816 } 4817 4818 case OPconst: 4819 { 4820 char pushi = 0; 4821 uint flag = 0; 4822 int regsize = REGSIZE; 4823 4824 if (tycomplex(tym)) 4825 break; 4826 4827 if (I64 && tyfloating(tym) && sz > 4 && boolres(e)) 4828 // Can't push 64 bit non-zero args directly 4829 break; 4830 4831 if (I32 && szb == 10) // special case for long double constants 4832 { 4833 assert(sz == 12); 4834 targ_int value = e.EV.Vushort8[4]; // pick upper 2 bytes of Vldouble 4835 stackpush += sz; 4836 cdb.genadjesp(cast(int)sz); 4837 for (int i = 0; i < 3; ++i) 4838 { 4839 reg_t reg; 4840 if (reghasvalue(allregs, value, reg)) 4841 cdb.gen1(0x50 + reg); // PUSH reg 4842 else 4843 cdb.genc2(0x68,0,value); // PUSH value 4844 value = e.EV.Vulong4[i ^ 1]; // treat Vldouble as 2 element array of 32 bit uint 4845 } 4846 freenode(e); 4847 return; 4848 } 4849 4850 assert(I64 || sz <= tysize(TYldouble)); 4851 int i = cast(int)sz; 4852 if (!I16 && i == 2) 4853 flag = CFopsize; 4854 4855 if (config.target_cpu >= TARGET_80286) 4856 // && (e.Ecount == 0 || e.Ecount != e.Ecomsub)) 4857 { 4858 pushi = 1; 4859 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4) 4860 { 4861 regsize = 4; 4862 flag = CFopsize; 4863 } 4864 } 4865 else if (i == REGSIZE) 4866 break; 4867 4868 stackpush += sz; 4869 cdb.genadjesp(cast(int)sz); 4870 targ_uns* pi = &e.EV.Vuns; // point to start of Vdouble 4871 targ_ushort* ps = cast(targ_ushort *) pi; 4872 targ_ullong* pl = cast(targ_ullong *)pi; 4873 i /= regsize; 4874 do 4875 { 4876 if (i) /* be careful not to go negative */ 4877 i--; 4878 4879 targ_size_t value; 4880 switch (regsize) 4881 { 4882 case 2: 4883 value = ps[i]; 4884 break; 4885 4886 case 4: 4887 if (tym == TYldouble || tym == TYildouble) 4888 /* The size is 10 bytes, and since we have 2 bytes left over, 4889 * just read those 2 bytes, not 4. 4890 * Otherwise we're reading uninitialized data. 4891 * I.e. read 4 bytes, 4 bytes, then 2 bytes 4892 */ 4893 value = i == 2 ? ps[4] : pi[i]; // 80 bits 4894 else 4895 value = pi[i]; 4896 break; 4897 4898 case 8: 4899 value = cast(targ_size_t)pl[i]; 4900 break; 4901 4902 default: 4903 assert(0); 4904 } 4905 4906 reg_t reg; 4907 if (pushi) 4908 { 4909 if (I64 && regsize == 8 && value != cast(int)value) 4910 { 4911 regwithvalue(cdb,allregs,value,reg,64); 4912 goto Preg; // cannot push imm64 unless it is sign extended 32 bit value 4913 } 4914 if (regsize == REGSIZE && reghasvalue(allregs,value,reg)) 4915 goto Preg; 4916 cdb.genc2((szb == 1) ? 0x6A : 0x68, 0, value); // PUSH value 4917 } 4918 else 4919 { 4920 regwithvalue(cdb, allregs, value, reg, 0); 4921 Preg: 4922 genpush(cdb,reg); // PUSH reg 4923 } 4924 code_orflag(cdb.last(), flag); // operand size 4925 } while (i); 4926 freenode(e); 4927 return; 4928 } 4929 4930 case OPpair: 4931 { 4932 if (e.Ecount) 4933 break; 4934 const op1 = e.EV.E1.Eoper; 4935 const op2 = e.EV.E2.Eoper; 4936 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 4937 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 4938 { 4939 pushParams(cdb, e.EV.E2, stackalign, tyf); 4940 pushParams(cdb, e.EV.E1, stackalign, tyf); 4941 freenode(e); 4942 } 4943 else if (tyfloating(e.EV.E1.Ety) || 4944 tyfloating(e.EV.E2.Ety)) 4945 { 4946 // Need special handling because of order of evaluation of e1 and e2 4947 break; 4948 } 4949 else 4950 { 4951 regm_t regs = allregs; 4952 codelem(cdb, e, ®s, false); 4953 genpush(cdb, findregmsw(regs)); // PUSH msreg 4954 genpush(cdb, findreglsw(regs)); // PUSH lsreg 4955 cdb.genadjesp(cast(int)sz); 4956 stackpush += sz; 4957 } 4958 return; 4959 } 4960 4961 case OPrpair: 4962 { 4963 if (e.Ecount) 4964 break; 4965 const op1 = e.EV.E1.Eoper; 4966 const op2 = e.EV.E2.Eoper; 4967 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 4968 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 4969 { 4970 pushParams(cdb, e.EV.E1, stackalign, tyf); 4971 pushParams(cdb, e.EV.E2, stackalign, tyf); 4972 freenode(e); 4973 } 4974 else if (tyfloating(e.EV.E1.Ety) || 4975 tyfloating(e.EV.E2.Ety)) 4976 { 4977 // Need special handling because of order of evaluation of e1 and e2 4978 break; 4979 } 4980 else 4981 { 4982 regm_t regs = allregs; 4983 codelem(cdb, e, ®s, false); 4984 genpush(cdb, findregmsw(regs)); // PUSH msreg 4985 genpush(cdb, findreglsw(regs)); // PUSH lsreg 4986 cdb.genadjesp(cast(int)sz); 4987 stackpush += sz; 4988 } 4989 return; 4990 } 4991 4992 default: 4993 break; 4994 } 4995 4996 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 4997 if (tyvector(tym) || (tyxmmreg(tym) && config.fpxmmregs)) 4998 { 4999 regm_t retxmm = XMMREGS; 5000 codelem(cdb, e, &retxmm, false); 5001 stackpush += sz; 5002 cdb.genadjesp(cast(int)sz); 5003 cod3_stackadj(cdb, cast(int)sz); 5004 const op = xmmstore(tym); 5005 const r = findreg(retxmm); 5006 cdb.gen2sib(op, modregxrm(0, r - XMM0,4 ), modregrm(0, 4, SP)); // MOV [ESP],r 5007 checkSetVex(cdb.last(),tym); 5008 return; 5009 } 5010 else if (tyfloating(tym)) 5011 { 5012 if (config.inline8087) 5013 { 5014 retregs = tycomplex(tym) ? mST01 : mST0; 5015 codelem(cdb, e, &retregs, false); 5016 stackpush += sz; 5017 cdb.genadjesp(cast(int)sz); 5018 cod3_stackadj(cdb, cast(int)sz); 5019 opcode_t op; 5020 uint r; 5021 switch (tym) 5022 { 5023 case TYfloat: 5024 case TYifloat: 5025 case TYcfloat: 5026 op = 0xD9; 5027 r = 3; 5028 break; 5029 5030 case TYdouble: 5031 case TYidouble: 5032 case TYdouble_alias: 5033 case TYcdouble: 5034 op = 0xDD; 5035 r = 3; 5036 break; 5037 5038 case TYldouble: 5039 case TYildouble: 5040 case TYcldouble: 5041 op = 0xDB; 5042 r = 7; 5043 break; 5044 5045 default: 5046 assert(0); 5047 } 5048 if (!I16) 5049 { 5050 if (tycomplex(tym)) 5051 { 5052 // FSTP sz/2[ESP] 5053 cdb.genc1(op, (modregrm(0, 4, SP) << 8) | modregxrm(2, r, 4),FLconst, sz/2); 5054 pop87(); 5055 } 5056 pop87(); 5057 cdb.gen2sib(op, modregrm(0, r, 4),modregrm(0, 4, SP)); // FSTP [ESP] 5058 } 5059 else 5060 { 5061 retregs = IDXREGS; // get an index reg 5062 reg_t reg; 5063 allocreg(cdb, &retregs, ®, TYoffset); 5064 genregs(cdb, 0x89, SP, reg); // MOV reg,SP 5065 pop87(); 5066 cdb.gen2(op, modregrm(0, r, regtorm[reg])); // FSTP [reg] 5067 } 5068 if (LARGEDATA) 5069 cdb.last().Iflags |= CFss; // want to store into stack 5070 genfwait(cdb); // FWAIT 5071 return; 5072 } 5073 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 5074 retregs = mSTACK; 5075 } 5076 else if (I16 && sz == 8) // if long long 5077 retregs = mSTACK; 5078 5079 scodelem(cdb,e,&retregs,0,true); 5080 if (retregs != mSTACK) // if stackpush not already inc'd 5081 stackpush += sz; 5082 if (sz <= REGSIZE) 5083 { 5084 genpush(cdb,findreg(retregs)); // PUSH reg 5085 cdb.genadjesp(cast(int)REGSIZE); 5086 } 5087 else if (sz == REGSIZE * 2) 5088 { 5089 genpush(cdb,findregmsw(retregs)); // PUSH msreg 5090 genpush(cdb,findreglsw(retregs)); // PUSH lsreg 5091 cdb.genadjesp(cast(int)sz); 5092 } 5093 } 5094 5095 /******************************* 5096 * Get offset portion of e, and store it in an index 5097 * register. Return mask of index register in *pretregs. 5098 */ 5099 5100 @trusted 5101 void offsetinreg(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 5102 { 5103 reg_t reg; 5104 regm_t retregs = mLSW; // want only offset 5105 if (e.Ecount && e.Ecount != e.Ecomsub) 5106 { 5107 regm_t rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */ 5108 for (uint i = 0; rm; i++) 5109 { 5110 if (mask(i) & rm && regcon.cse.value[i] == e) 5111 { 5112 *pretregs = mask(i); 5113 getregs(cdb, *pretregs); 5114 goto L3; 5115 } 5116 rm &= ~mask(i); 5117 } 5118 } 5119 5120 *pretregs = retregs; 5121 allocreg(cdb, pretregs, ®, TYoffset); 5122 getoffset(cdb,e,reg); 5123 L3: 5124 cssave(e, *pretregs,false); 5125 freenode(e); 5126 } 5127 5128 /****************************** 5129 * Generate code to load data into registers. 5130 */ 5131 5132 5133 @trusted 5134 void loaddata(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 5135 { 5136 reg_t reg; 5137 reg_t nreg; 5138 reg_t sreg; 5139 opcode_t op; 5140 tym_t tym; 5141 code cs; 5142 regm_t flags, forregs, regm; 5143 5144 debug 5145 { 5146 // if (debugw) 5147 // printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs)); 5148 // elem_print(e); 5149 } 5150 5151 assert(e); 5152 elem_debug(e); 5153 if (*pretregs == 0) 5154 return; 5155 tym = tybasic(e.Ety); 5156 if (tym == TYstruct) 5157 { 5158 cdrelconst(cdb,e,pretregs); 5159 return; 5160 } 5161 if (tyfloating(tym)) 5162 { 5163 objmod.fltused(); 5164 if (config.fpxmmregs && 5165 (tym == TYcfloat || tym == TYcdouble) && 5166 (*pretregs & (XMMREGS | mPSW)) 5167 ) 5168 { 5169 cloadxmm(cdb, e, pretregs); 5170 return; 5171 } 5172 else if (config.inline8087) 5173 { 5174 if (*pretregs & mST0) 5175 { 5176 load87(cdb, e, 0, pretregs, null, -1); 5177 return; 5178 } 5179 else if (tycomplex(tym)) 5180 { 5181 cload87(cdb, e, pretregs); 5182 return; 5183 } 5184 } 5185 } 5186 int sz = _tysize[tym]; 5187 cs.Iflags = 0; 5188 cs.Irex = 0; 5189 if (*pretregs == mPSW) 5190 { 5191 Symbol *s; 5192 regm = allregs; 5193 if (e.Eoper == OPconst) 5194 { /* true: OR SP,SP (SP is never 0) */ 5195 /* false: CMP SP,SP (always equal) */ 5196 genregs(cdb, (boolres(e)) ? 0x09 : 0x39 , SP, SP); 5197 if (I64) 5198 code_orrex(cdb.last(), REX_W); 5199 } 5200 else if (e.Eoper == OPvar && 5201 (s = e.EV.Vsym).Sfl == FLreg && 5202 s.Sregm & XMMREGS && 5203 (tym == TYfloat || tym == TYifloat || tym == TYdouble || tym ==TYidouble)) 5204 { 5205 /* Evaluate using XMM register and XMM instruction. 5206 * This affects jmpopcode() 5207 */ 5208 if (s.Sclass == SC.parameter) 5209 refparam = true; 5210 tstresult(cdb,s.Sregm,e.Ety,true); 5211 } 5212 else if (sz <= REGSIZE) 5213 { 5214 if (!I16 && (tym == TYfloat || tym == TYifloat)) 5215 { 5216 allocreg(cdb, ®m, ®, TYoffset); // get a register 5217 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5218 cdb.gen2(0xD1,modregrmx(3,4,reg)); // SHL reg,1 5219 } 5220 else if (I64 && (tym == TYdouble || tym ==TYidouble)) 5221 { 5222 allocreg(cdb, ®m, ®, TYoffset); // get a register 5223 loadea(cdb, e,&cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5224 // remove sign bit, so that -0.0 == 0.0 5225 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 5226 code_orrex(cdb.last(), REX_W); 5227 } 5228 else if (TARGET_OSX && e.Eoper == OPvar && movOnly(e)) 5229 { 5230 allocreg(cdb, ®m, ®, TYoffset); // get a register 5231 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5232 fixresult(cdb, e, regm, pretregs); 5233 } 5234 else 5235 { cs.IFL2 = FLconst; 5236 cs.IEV2.Vsize_t = 0; 5237 op = (sz == 1) ? 0x80 : 0x81; 5238 loadea(cdb, e, &cs, op, 7, 0, 0, 0); // CMP EA,0 5239 5240 // Convert to TEST instruction if EA is a register 5241 // (to avoid register contention on Pentium) 5242 code *c = cdb.last(); 5243 if ((c.Iop & ~1) == 0x38 && 5244 (c.Irm & modregrm(3, 0, 0)) == modregrm(3, 0, 0) 5245 ) 5246 { 5247 c.Iop = (c.Iop & 1) | 0x84; 5248 code_newreg(c, c.Irm & 7); 5249 if (c.Irex & REX_B) 5250 //c.Irex = (c.Irex & ~REX_B) | REX_R; 5251 c.Irex |= REX_R; 5252 } 5253 } 5254 } 5255 else if (sz < 8) 5256 { 5257 allocreg(cdb, ®m, ®, TYoffset); // get a register 5258 if (I32) // it's a 48 bit pointer 5259 loadea(cdb, e, &cs, MOVZXw, reg, REGSIZE, 0, 0); // MOVZX reg,data+4 5260 else 5261 { 5262 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, 0, 0); // MOV reg,data+2 5263 if (tym == TYfloat || tym == TYifloat) // dump sign bit 5264 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5265 } 5266 loadea(cdb,e,&cs,0x0B,reg,0,regm,0); // OR reg,data 5267 } 5268 else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym))) 5269 { 5270 allocreg(cdb, ®m, ®, TYoffset); // get a register 5271 int i = sz - REGSIZE; 5272 loadea(cdb, e, &cs, 0x8B, reg, i, 0, 0); // MOV reg,data+6 5273 if (tyfloating(tym)) // TYdouble or TYdouble_alias 5274 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5275 5276 while ((i -= REGSIZE) >= 0) 5277 { 5278 loadea(cdb, e, &cs, 0x0B, reg, i, regm, 0); // OR reg,data+i 5279 code *c = cdb.last(); 5280 if (i == 0) 5281 c.Iflags |= CFpsw; // need the flags on last OR 5282 } 5283 } 5284 else if (sz == tysize(TYldouble)) // TYldouble 5285 load87(cdb, e, 0, pretregs, null, -1); 5286 else 5287 { 5288 elem_print(e); 5289 assert(0); 5290 } 5291 return; 5292 } 5293 /* not for flags only */ 5294 flags = *pretregs & mPSW; /* save original */ 5295 forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS); 5296 if (*pretregs & mSTACK) 5297 forregs |= DOUBLEREGS; 5298 if (e.Eoper == OPconst) 5299 { 5300 if (tyvector(tym) && forregs & XMMREGS) 5301 { 5302 assert(!flags); 5303 reg_t xreg; 5304 allocreg(cdb, &forregs, &xreg, tym); // allocate registers 5305 movxmmconst(cdb, xreg, tym, &e.EV, flags); 5306 fixresult(cdb, e, forregs, pretregs); 5307 return; 5308 } 5309 5310 targ_size_t value = e.EV.Vint; 5311 if (sz == 8) 5312 value = cast(targ_size_t)e.EV.Vullong; 5313 5314 if (sz == REGSIZE && reghasvalue(forregs, value, reg)) 5315 forregs = mask(reg); 5316 5317 regm_t save = regcon.immed.mval; 5318 allocreg(cdb, &forregs, ®, tym); // allocate registers 5319 regcon.immed.mval = save; // allocreg could unnecessarily clear .mval 5320 if (sz <= REGSIZE) 5321 { 5322 if (sz == 1) 5323 flags |= 1; 5324 else if (!I16 && sz == SHORTSIZE && 5325 !(mask(reg) & regcon.mvar) && 5326 !(config.flags4 & CFG4speed) 5327 ) 5328 flags |= 2; 5329 if (sz == 8) 5330 flags |= 64; 5331 if (isXMMreg(reg)) 5332 { 5333 movxmmconst(cdb, reg, tym, &e.EV, 0); 5334 flags = 0; 5335 } 5336 else 5337 { 5338 movregconst(cdb, reg, value, flags); 5339 flags = 0; // flags are already set 5340 } 5341 } 5342 else if (sz < 8) // far pointers, longs for 16 bit targets 5343 { 5344 targ_int msw = I32 ? e.EV.Vseg 5345 : (e.EV.Vulong >> 16); 5346 targ_int lsw = e.EV.Voff; 5347 regm_t mswflags = 0; 5348 if (forregs & mES) 5349 { 5350 movregconst(cdb, reg, msw, 0); // MOV reg,segment 5351 genregs(cdb, 0x8E, 0, reg); // MOV ES,reg 5352 msw = lsw; // MOV reg,offset 5353 } 5354 else 5355 { 5356 sreg = findreglsw(forregs); 5357 movregconst(cdb, sreg, lsw, 0); 5358 reg = findregmsw(forregs); 5359 /* Decide if we need to set flags when we load msw */ 5360 if (flags && (msw && msw|lsw || !(msw|lsw))) 5361 { mswflags = mPSW; 5362 flags = 0; 5363 } 5364 } 5365 movregconst(cdb, reg, msw, mswflags); 5366 } 5367 else if (sz == 8) 5368 { 5369 if (I32) 5370 { 5371 targ_long *p = cast(targ_long *)cast(void*)&e.EV.Vdouble; 5372 if (isXMMreg(reg)) 5373 { /* This comes about because 0, 1, pi, etc., constants don't get stored 5374 * in the data segment, because they are x87 opcodes. 5375 * Not so efficient. We should at least do a PXOR for 0. 5376 */ 5377 reg_t r; 5378 regm_t rm = ALLREGS; 5379 allocreg(cdb, &rm, &r, TYint); // allocate scratch register 5380 movregconst(cdb, r, p[0], 0); 5381 cdb.genfltreg(0x89, r, 0); // MOV floatreg,r 5382 movregconst(cdb, r, p[1], 0); 5383 cdb.genfltreg(0x89, r, 4); // MOV floatreg+4,r 5384 5385 const opmv = xmmload(tym); 5386 cdb.genxmmreg(opmv, reg, 0, tym); // MOVSS/MOVSD XMMreg,floatreg 5387 } 5388 else 5389 { 5390 movregconst(cdb, findreglsw(forregs) ,p[0], 0); 5391 movregconst(cdb, findregmsw(forregs) ,p[1], 0); 5392 } 5393 } 5394 else 5395 { targ_short *p = &e.EV.Vshort; // point to start of Vdouble 5396 5397 assert(reg == AX); 5398 movregconst(cdb, AX, p[3], 0); // MOV AX,p[3] 5399 movregconst(cdb, DX, p[0], 0); 5400 movregconst(cdb, CX, p[1], 0); 5401 movregconst(cdb, BX, p[2], 0); 5402 } 5403 } 5404 else if (I64 && sz == 16) 5405 { 5406 movregconst(cdb, findreglsw(forregs), cast(targ_size_t)e.EV.Vcent.lo, 64); 5407 movregconst(cdb, findregmsw(forregs), cast(targ_size_t)e.EV.Vcent.hi, 64); 5408 } 5409 else 5410 assert(0); 5411 // Flags may already be set 5412 *pretregs &= flags | ~mPSW; 5413 fixresult(cdb, e, forregs, pretregs); 5414 return; 5415 } 5416 else 5417 { 5418 // See if we can use register that parameter was passed in 5419 if (regcon.params && 5420 regParamInPreg(e.EV.Vsym) && 5421 !anyiasm && // may have written to the memory for the parameter 5422 (regcon.params & mask(e.EV.Vsym.Spreg) && e.EV.Voffset == 0 || 5423 regcon.params & mask(e.EV.Vsym.Spreg2) && e.EV.Voffset == REGSIZE) && 5424 sz <= REGSIZE) // make sure no 'paint' to a larger size happened 5425 { 5426 const reg_t preg = e.EV.Voffset ? e.EV.Vsym.Spreg2 : e.EV.Vsym.Spreg; 5427 const regm_t pregm = mask(preg); 5428 5429 if (!(sz <= 2 && pregm & XMMREGS)) // no SIMD instructions to load 1 or 2 byte quantities 5430 { 5431 if (debugr) 5432 printf("%s.%d is fastpar and using register %s\n", 5433 e.EV.Vsym.Sident.ptr, 5434 cast(int)e.EV.Voffset, 5435 regm_str(pregm)); 5436 5437 mfuncreg &= ~pregm; 5438 regcon.used |= pregm; 5439 fixresult(cdb,e,pregm,pretregs); 5440 return; 5441 } 5442 } 5443 5444 allocreg(cdb, &forregs, ®, tym); // allocate registers 5445 5446 if (sz == 1) 5447 { regm_t nregm; 5448 5449 debug 5450 if (!(forregs & BYTEREGS)) 5451 { elem_print(e); 5452 printf("forregs = %s\n", regm_str(forregs)); 5453 } 5454 5455 opcode_t opmv = 0x8A; // byte MOV 5456 if (config.exe & (EX_OSX | EX_OSX64)) 5457 { 5458 if (movOnly(e)) 5459 opmv = 0x8B; 5460 } 5461 assert(forregs & BYTEREGS); 5462 if (!I16) 5463 { 5464 if (config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed && 5465 // Workaround for OSX linker bug: 5466 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5467 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5468 ) 5469 { 5470 // opmv = tyuns(tym) ? MOVZXb : MOVSXb; // MOVZX/MOVSX 5471 } 5472 loadea(cdb, e, &cs, opmv, reg, 0, 0, 0); // MOV regL,data 5473 } 5474 else 5475 { 5476 nregm = tyuns(tym) ? BYTEREGS : cast(regm_t) mAX; 5477 if (*pretregs & nregm) 5478 nreg = reg; // already allocated 5479 else 5480 allocreg(cdb, &nregm, &nreg, tym); 5481 loadea(cdb, e, &cs, opmv, nreg, 0, 0, 0); // MOV nregL,data 5482 if (reg != nreg) 5483 { 5484 genmovreg(cdb, reg, nreg); // MOV reg,nreg 5485 cssave(e, mask(nreg), false); 5486 } 5487 } 5488 } 5489 else if (forregs & XMMREGS) 5490 { 5491 // Can't load from registers directly to XMM regs 5492 //e.EV.Vsym.Sflags &= ~GTregcand; 5493 5494 opcode_t opmv = xmmload(tym, xmmIsAligned(e)); 5495 if (e.Eoper == OPvar) 5496 { 5497 Symbol *s = e.EV.Vsym; 5498 if (s.Sfl == FLreg && !(mask(s.Sreglsw) & XMMREGS)) 5499 { opmv = LODD; // MOVD/MOVQ 5500 /* getlvalue() will unwind this and unregister s; could use a better solution */ 5501 } 5502 } 5503 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); // MOVSS/MOVSD reg,data 5504 checkSetVex(cdb.last(),tym); 5505 } 5506 else if (sz <= REGSIZE) 5507 { 5508 opcode_t opmv = 0x8B; // MOV reg,data 5509 if (sz == 2 && !I16 && config.target_cpu >= TARGET_PentiumPro && 5510 // Workaround for OSX linker bug: 5511 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5512 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5513 ) 5514 { 5515 // opmv = tyuns(tym) ? MOVZXw : MOVSXw; // MOVZX/MOVSX 5516 } 5517 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); 5518 } 5519 else if (sz <= 2 * REGSIZE && forregs & mES) 5520 { 5521 loadea(cdb, e, &cs, 0xC4, reg, 0, 0, mES); // LES data 5522 } 5523 else if (sz <= 2 * REGSIZE) 5524 { 5525 if (I32 && sz == 8 && 5526 (*pretregs & (mSTACK | mPSW)) == mSTACK) 5527 { 5528 assert(0); 5529 /+ 5530 /* Note that we allocreg(DOUBLEREGS) needlessly */ 5531 stackchanged = 1; 5532 int i = DOUBLESIZE - REGSIZE; 5533 do 5534 { 5535 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5536 cdb.genadjesp(REGSIZE); 5537 stackpush += REGSIZE; 5538 i -= REGSIZE; 5539 } 5540 while (i >= 0); 5541 return; 5542 +/ 5543 } 5544 5545 reg = findregmsw(forregs); 5546 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, forregs, 0); // MOV reg,data+2 5547 if (I32 && sz == REGSIZE + 2) 5548 cdb.last().Iflags |= CFopsize; // seg is 16 bits 5549 reg = findreglsw(forregs); 5550 loadea(cdb, e, &cs, 0x8B, reg, 0, forregs, 0); // MOV reg,data 5551 } 5552 else if (sz >= 8) 5553 { 5554 assert(!I32); 5555 if ((*pretregs & (mSTACK | mPSW)) == mSTACK) 5556 { 5557 // Note that we allocreg(DOUBLEREGS) needlessly 5558 stackchanged = 1; 5559 int i = sz - REGSIZE; 5560 do 5561 { 5562 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5563 cdb.genadjesp(REGSIZE); 5564 stackpush += REGSIZE; 5565 i -= REGSIZE; 5566 } 5567 while (i >= 0); 5568 return; 5569 } 5570 else 5571 { 5572 assert(reg == AX); 5573 loadea(cdb, e, &cs, 0x8B, AX, 6, 0, 0); // MOV AX,data+6 5574 loadea(cdb, e, &cs, 0x8B, BX, 4, mAX, 0); // MOV BX,data+4 5575 loadea(cdb, e, &cs, 0x8B, CX, 2, mAX|mBX, 0); // MOV CX,data+2 5576 loadea(cdb, e, &cs, 0x8B, DX, 0, mAX|mCX|mCX, 0); // MOV DX,data 5577 } 5578 } 5579 else 5580 assert(0); 5581 // Flags may already be set 5582 *pretregs &= flags | ~mPSW; 5583 fixresult(cdb, e, forregs, pretregs); 5584 return; 5585 } 5586 }