1 /** 2 * Code generation 2 3 * 4 * Includes: 5 * - math operators (+ - * / %) and functions (abs, cos, sqrt) 6 * - 'string' functions (strlen, memcpy, memset) 7 * - pointers (address of / dereference) 8 * - struct assign, constructor, destructor 9 * 10 * Compiler implementation of the 11 * $(LINK2 https://www.dlang.org, D programming language). 12 * 13 * Copyright: Copyright (C) 1984-1998 by Symantec 14 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 15 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 16 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 17 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod2.d, backend/cod2.d) 18 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod2.d 19 */ 20 21 module dmd.backend.cod2; 22 23 import core.stdc.stdio; 24 import core.stdc.stdlib; 25 import core.stdc.string; 26 27 import dmd.backend.backend; 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.code; 31 import dmd.backend.code_x86; 32 import dmd.backend.codebuilder; 33 import dmd.backend.mem; 34 import dmd.backend.el; 35 import dmd.backend.global; 36 import dmd.backend.oper; 37 import dmd.backend.ty; 38 import dmd.backend.type; 39 import dmd.backend.xmm; 40 41 42 nothrow: 43 @safe: 44 45 import dmd.backend.cg : segfl, stackfl; 46 47 __gshared int cdcmp_flag; 48 49 import dmd.backend.divcoeff : choose_multiplier, udiv_coefficients; 50 51 /******************************* 52 * Swap two registers. 53 */ 54 55 private void swap(reg_t *a,reg_t *b) 56 { 57 const tmp = *a; 58 *a = *b; 59 *b = tmp; 60 } 61 62 63 /******************************************* 64 * Returns: true if cannot use this EA in anything other than a MOV instruction. 65 */ 66 67 @trusted 68 bool movOnly(const elem *e) 69 { 70 if (config.exe & EX_OSX64 && config.flags3 & CFG3pic && e.Eoper == OPvar) 71 { 72 const s = e.EV.Vsym; 73 // Fixups for these can only be done with a MOV 74 if (s.Sclass == SC.global || s.Sclass == SC.extern_ || 75 s.Sclass == SC.comdat || s.Sclass == SC.comdef) 76 return true; 77 } 78 return false; 79 } 80 81 /******************************** 82 * Determine index registers used by addressing mode. 83 * Index is rm of modregrm field. 84 * Returns: 85 * mask of index registers 86 */ 87 88 regm_t idxregm(const code* c) 89 { 90 const rm = c.Irm; 91 regm_t idxm; 92 if ((rm & 0xC0) != 0xC0) /* if register is not the destination */ 93 { 94 if (I16) 95 { 96 static immutable ubyte[8] idxrm = [mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX]; 97 idxm = idxrm[rm & 7]; 98 } 99 else 100 { 101 if ((rm & 7) == 4) /* if sib byte */ 102 { 103 const sib = c.Isib; 104 reg_t idxreg = (sib >> 3) & 7; 105 // scaled index reg 106 idxm = mask(idxreg | ((c.Irex & REX_X) ? 8 : 0)); 107 108 if ((sib & 7) == 5 && (rm & 0xC0) == 0) 109 { } 110 else 111 idxm |= mask((sib & 7) | ((c.Irex & REX_B) ? 8 : 0)); 112 } 113 else 114 idxm = mask((rm & 7) | ((c.Irex & REX_B) ? 8 : 0)); 115 } 116 } 117 return idxm; 118 } 119 120 121 /*************************** 122 * Gen code for call to floating point routine. 123 */ 124 125 @trusted 126 void opdouble(ref CodeBuilder cdb, elem *e,regm_t *pretregs,uint clib) 127 { 128 if (config.inline8087) 129 { 130 orth87(cdb,e,pretregs); 131 return; 132 } 133 134 regm_t retregs1,retregs2; 135 if (tybasic(e.EV.E1.Ety) == TYfloat) 136 { 137 clib += CLIB.fadd - CLIB.dadd; /* convert to float operation */ 138 retregs1 = FLOATREGS; 139 retregs2 = FLOATREGS2; 140 } 141 else 142 { 143 if (I32) 144 { retregs1 = DOUBLEREGS_32; 145 retregs2 = DOUBLEREGS2_32; 146 } 147 else 148 { retregs1 = mSTACK; 149 retregs2 = DOUBLEREGS_16; 150 } 151 } 152 153 codelem(cdb,e.EV.E1, &retregs1,false); 154 if (retregs1 & mSTACK) 155 cgstate.stackclean++; 156 scodelem(cdb,e.EV.E2, &retregs2, retregs1 & ~mSTACK, false); 157 if (retregs1 & mSTACK) 158 cgstate.stackclean--; 159 callclib(cdb, e, clib, pretregs, 0); 160 } 161 162 /***************************** 163 * Handle operators which are more or less orthogonal 164 * ( + - & | ^ ) 165 */ 166 167 @trusted 168 void cdorth(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 169 { 170 //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 171 elem *e1 = e.EV.E1; 172 elem *e2 = e.EV.E2; 173 if (*pretregs == 0) // if don't want result 174 { 175 codelem(cdb,e1,pretregs,false); // eval left leaf 176 *pretregs = 0; // in case they got set 177 codelem(cdb,e2,pretregs,false); 178 return; 179 } 180 181 const ty = tybasic(e.Ety); 182 const ty1 = tybasic(e1.Ety); 183 184 if (tyfloating(ty1)) 185 { 186 if (tyvector(ty1) || 187 config.fpxmmregs && tyxmmreg(ty1) && 188 !(*pretregs & mST0) && 189 !(*pretregs & mST01) && 190 !(ty == TYldouble || ty == TYildouble) // watch out for shrinkLongDoubleConstantIfPossible() 191 ) 192 { 193 orthxmm(cdb,e,pretregs); 194 return; 195 } 196 if (config.inline8087) 197 { 198 orth87(cdb,e,pretregs); 199 return; 200 } 201 if (config.exe & EX_windos) 202 { 203 opdouble(cdb,e,pretregs,(e.Eoper == OPadd) ? CLIB.dadd 204 : CLIB.dsub); 205 return; 206 } 207 else 208 { 209 assert(0); 210 } 211 } 212 if (tyxmmreg(ty1)) 213 { 214 orthxmm(cdb,e,pretregs); 215 return; 216 } 217 218 opcode_t op1, op2; 219 uint mode; 220 __gshared int nest; 221 222 const ty2 = tybasic(e2.Ety); 223 const e2oper = e2.Eoper; 224 const sz = _tysize[ty]; 225 const isbyte = (sz == 1); 226 code_flags_t word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 227 bool test = false; // assume we destroyed lvalue 228 229 switch (e.Eoper) 230 { 231 case OPadd: mode = 0; 232 op1 = 0x03; op2 = 0x13; break; /* ADD, ADC */ 233 case OPmin: mode = 5; 234 op1 = 0x2B; op2 = 0x1B; break; /* SUB, SBB */ 235 case OPor: mode = 1; 236 op1 = 0x0B; op2 = 0x0B; break; /* OR , OR */ 237 case OPxor: mode = 6; 238 op1 = 0x33; op2 = 0x33; break; /* XOR, XOR */ 239 case OPand: mode = 4; 240 op1 = 0x23; op2 = 0x23; /* AND, AND */ 241 if (tyreg(ty1) && 242 *pretregs == mPSW) /* if flags only */ 243 { 244 test = true; 245 op1 = 0x85; /* TEST */ 246 mode = 0; 247 } 248 break; 249 250 default: 251 assert(0); 252 } 253 op1 ^= isbyte; /* if byte operation */ 254 255 // Compute numwords, the number of words to operate on. 256 int numwords = 1; 257 if (!I16) 258 { 259 /* Cannot operate on longs and then do a 'paint' to a far */ 260 /* pointer, because far pointers are 48 bits and longs are 32. */ 261 /* Therefore, numwords can never be 2. */ 262 assert(!(tyfv(ty1) && tyfv(ty2))); 263 if (sz == 2 * REGSIZE) 264 { 265 numwords++; 266 } 267 } 268 else 269 { 270 /* If ty is a TYfptr, but both operands are long, treat the */ 271 /* operation as a long. */ 272 if ((tylong(ty1) || ty1 == TYhptr) && 273 (tylong(ty2) || ty2 == TYhptr)) 274 numwords++; 275 } 276 277 // Special cases where only flags are set 278 if (test && _tysize[ty1] <= REGSIZE && 279 (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 280 && !movOnly(e1) 281 ) 282 { 283 // Handle the case of (var & const) 284 if (e2.Eoper == OPconst && el_signx32(e2)) 285 { 286 code cs = void; 287 cs.Iflags = 0; 288 cs.Irex = 0; 289 getlvalue(cdb,&cs,e1,0); 290 targ_size_t value = e2.EV.Vpointer; 291 if (sz == 2) 292 value &= 0xFFFF; 293 else if (sz == 4) 294 value &= 0xFFFFFFFF; 295 reg_t reg; 296 if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,value,reg)) 297 { 298 code_newreg(&cs, reg); 299 if (I64 && isbyte && reg >= 4) 300 cs.Irex |= REX; 301 } 302 else 303 { 304 if (sz == 8 && !I64) 305 { 306 assert(value == cast(int)value); // sign extend imm32 307 } 308 op1 = 0xF7; 309 cs.IEV2.Vint = cast(targ_int)value; 310 cs.IFL2 = FLconst; 311 } 312 cs.Iop = op1 ^ isbyte; 313 cs.Iflags |= word | CFpsw; 314 freenode(e1); 315 freenode(e2); 316 cdb.gen(&cs); 317 return; 318 } 319 320 // Handle (exp & reg) 321 reg_t reg; 322 regm_t retregs; 323 if (isregvar(e2,retregs,reg)) 324 { 325 code cs = void; 326 cs.Iflags = 0; 327 cs.Irex = 0; 328 getlvalue(cdb,&cs,e1,0); 329 code_newreg(&cs, reg); 330 if (I64 && isbyte && reg >= 4) 331 cs.Irex |= REX; 332 cs.Iop = op1 ^ isbyte; 333 cs.Iflags |= word | CFpsw; 334 freenode(e1); 335 freenode(e2); 336 cdb.gen(&cs); 337 return; 338 } 339 } 340 341 code cs = void; 342 cs.Iflags = 0; 343 cs.Irex = 0; 344 345 // Look for possible uses of LEA 346 if (e.Eoper == OPadd && 347 !(*pretregs & mPSW) && // flags aren't set by LEA 348 !nest && // could cause infinite recursion if e.Ecount 349 (sz == REGSIZE || (I64 && sz == 4))) // far pointers aren't handled 350 { 351 const rex = (sz == 8) ? REX_W : 0; 352 353 // Handle the case of (e + &var) 354 int e1oper = e1.Eoper; 355 if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2.Ecount && stackfl[el_fl(e2)]))) 356 || // LEA costs too much for simple EAs on older CPUs 357 (e2oper == OPconst && (e1.Eoper == OPcall || e1.Eoper == OPcallns) && !(*pretregs & mAX)) || 358 (!I16 && (isscaledindex(e1) || isscaledindex(e2))) || 359 (!I16 && e1oper == OPvar && e1.EV.Vsym.Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2.EV.Vsym.Sfl == FLreg))) || 360 (e2oper == OPconst && e1oper == OPeq && e1.EV.E1.Eoper == OPvar) || 361 (!I16 && (e2oper == OPrelconst || e2oper == OPconst) && !e1.Ecount && 362 (e1oper == OPmul || e1oper == OPshl) && 363 e1.EV.E2.Eoper == OPconst && 364 ssindex(e1oper,e1.EV.E2.EV.Vuns) 365 ) || 366 (!I16 && e1.Ecount) 367 ) 368 { 369 const inc = e.Ecount != 0; 370 nest += inc; 371 code csx = void; 372 getlvalue(cdb,&csx,e,0); 373 nest -= inc; 374 reg_t regx; 375 allocreg(cdb,pretregs,®x,ty); 376 csx.Iop = LEA; 377 code_newreg(&csx, regx); 378 cdb.gen(&csx); // LEA regx,EA 379 if (rex) 380 code_orrex(cdb.last(), rex); 381 return; 382 } 383 384 // Handle the case of ((e + c) + e2) 385 if (!I16 && 386 e1oper == OPadd && 387 (e1.EV.E2.Eoper == OPconst && el_signx32(e1.EV.E2) || 388 e2oper == OPconst && el_signx32(e2)) && 389 !e1.Ecount 390 ) 391 { 392 elem *ebase; 393 elem *edisp; 394 if (e2oper == OPconst && el_signx32(e2)) 395 { edisp = e2; 396 ebase = e1.EV.E2; 397 } 398 else 399 { edisp = e1.EV.E2; 400 ebase = e2; 401 } 402 403 auto e11 = e1.EV.E1; 404 regm_t retregs = *pretregs & ALLREGS; 405 if (!retregs) 406 retregs = ALLREGS; 407 int ss = 0; 408 int ss2 = 0; 409 410 // Handle the case of (((e * c1) + c2) + e2) 411 // Handle the case of (((e << c1) + c2) + e2) 412 if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && 413 e11.EV.E2.Eoper == OPconst && 414 !e11.Ecount 415 ) 416 { 417 const co1 = cast(targ_size_t)el_tolong(e11.EV.E2); 418 if (e11.Eoper == OPshl) 419 { 420 if (co1 > 3) 421 goto L13; 422 ss = cast(int)co1; 423 } 424 else 425 { 426 ss2 = 1; 427 switch (co1) 428 { 429 case 6: ss = 1; break; 430 case 12: ss = 1; ss2 = 2; break; 431 case 24: ss = 1; ss2 = 3; break; 432 case 10: ss = 2; break; 433 case 20: ss = 2; ss2 = 2; break; 434 case 40: ss = 2; ss2 = 3; break; 435 case 18: ss = 3; break; 436 case 36: ss = 3; ss2 = 2; break; 437 case 72: ss = 3; ss2 = 3; break; 438 default: 439 ss2 = 0; 440 goto L13; 441 } 442 } 443 freenode(e11.EV.E2); 444 freenode(e11); 445 e11 = e11.EV.E1; 446 L13: 447 { } 448 } 449 450 reg_t reg11; 451 regm_t regm; 452 if (e11.Eoper == OPvar && isregvar(e11,regm,reg11)) 453 { 454 if (tysize(e11.Ety) <= REGSIZE) 455 retregs = mask(reg11); // only want the LSW 456 else 457 retregs = regm; 458 freenode(e11); 459 } 460 else 461 codelem(cdb,e11,&retregs,false); 462 463 regm_t rretregs = ALLREGS & ~retregs & ~mBP; 464 scodelem(cdb,ebase,&rretregs,retregs,true); 465 reg_t reg; 466 { 467 regm_t sregs = *pretregs & ~rretregs; 468 if (!sregs) 469 sregs = ALLREGS & ~rretregs; 470 allocreg(cdb,&sregs,®,ty); 471 } 472 473 assert((retregs & (retregs - 1)) == 0); // must be only one register 474 assert((rretregs & (rretregs - 1)) == 0); // must be only one register 475 476 auto reg1 = findreg(retregs); 477 const reg2 = findreg(rretregs); 478 479 if (ss2) 480 { 481 assert(reg != reg2); 482 if ((reg1 & 7) == BP) 483 { static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1]; 484 485 // IMUL reg,imm32 486 cdb.genc2(0x69,modregxrmx(3,reg,reg1),imm32[ss]); 487 } 488 else 489 { // LEA reg,[reg1*ss][reg1] 490 cdb.gen2sib(LEA,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7)); 491 if (reg1 & 8) 492 code_orrex(cdb.last(), REX_X | REX_B); 493 } 494 if (rex) 495 code_orrex(cdb.last(), rex); 496 reg1 = reg; 497 ss = ss2; // use *2 for scale 498 } 499 500 cs.Iop = LEA; // LEA reg,c[reg1*ss][reg2] 501 cs.Irm = modregrm(2,reg & 7,4); 502 cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7); 503 assert(reg2 != BP); 504 cs.Iflags = CFoff; 505 cs.Irex = cast(ubyte)rex; 506 if (reg & 8) 507 cs.Irex |= REX_R; 508 if (reg1 & 8) 509 cs.Irex |= REX_X; 510 if (reg2 & 8) 511 cs.Irex |= REX_B; 512 cs.IFL1 = FLconst; 513 cs.IEV1.Vsize_t = edisp.EV.Vuns; 514 515 freenode(edisp); 516 freenode(e1); 517 cdb.gen(&cs); 518 fixresult(cdb,e,mask(reg),pretregs); 519 return; 520 } 521 } 522 523 regm_t posregs = (isbyte) ? BYTEREGS : (mES | allregs); 524 regm_t retregs = *pretregs & posregs; 525 if (retregs == 0) /* if no return regs speced */ 526 /* (like if wanted flags only) */ 527 retregs = ALLREGS & posregs; // give us some 528 529 if (ty1 == TYhptr || ty2 == TYhptr) 530 { /* Generate code for add/subtract of huge pointers. 531 No attempt is made to generate very good code. 532 */ 533 retregs = (retregs & mLSW) | mDX; 534 regm_t rretregs; 535 if (ty1 == TYhptr) 536 { // hptr +- long 537 rretregs = mLSW & ~(retregs | regcon.mvar); 538 if (!rretregs) 539 rretregs = mLSW; 540 rretregs |= mCX; 541 codelem(cdb,e1,&rretregs,0); 542 retregs &= ~rretregs; 543 if (!(retregs & mLSW)) 544 retregs |= mLSW & ~rretregs; 545 546 scodelem(cdb,e2,&retregs,rretregs,true); 547 } 548 else 549 { // long + hptr 550 codelem(cdb,e1,&retregs,0); 551 rretregs = (mLSW | mCX) & ~retregs; 552 if (!(rretregs & mLSW)) 553 rretregs |= mLSW; 554 scodelem(cdb,e2,&rretregs,retregs,true); 555 } 556 getregs(cdb,rretregs | retregs); 557 const mreg = DX; 558 const lreg = findreglsw(retregs); 559 if (e.Eoper == OPmin) 560 { // negate retregs 561 cdb.gen2(0xF7,modregrm(3,3,mreg)); // NEG mreg 562 cdb.gen2(0xF7,modregrm(3,3,lreg)); // NEG lreg 563 code_orflag(cdb.last(),CFpsw); 564 cdb.genc2(0x81,modregrm(3,3,mreg),0); // SBB mreg,0 565 } 566 const lrreg = findreglsw(rretregs); 567 genregs(cdb,0x03,lreg,lrreg); // ADD lreg,lrreg 568 code_orflag(cdb.last(),CFpsw); 569 genmovreg(cdb,lrreg,CX); // MOV lrreg,CX 570 cdb.genc2(0x81,modregrm(3,2,mreg),0); // ADC mreg,0 571 genshift(cdb); // MOV CX,offset __AHSHIFT 572 cdb.gen2(0xD3,modregrm(3,4,mreg)); // SHL mreg,CL 573 genregs(cdb,0x03,mreg,lrreg); // ADD mreg,MSREG(h) 574 fixresult(cdb,e,retregs,pretregs); 575 return; 576 } 577 578 regm_t rretregs; 579 reg_t reg; 580 if (_tysize[ty1] > REGSIZE && numwords == 1) 581 { /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */ 582 583 debug 584 if (_tysize[ty2] != REGSIZE) 585 { 586 printf("e = %p, e.Eoper = %s e1.Ety = %s e2.Ety = %s\n", e, oper_str(e.Eoper), tym_str(ty1), tym_str(ty2)); 587 elem_print(e); 588 } 589 590 assert(_tysize[ty2] == REGSIZE); 591 592 /* Watch out for the case here where you are going to OP reg,EA */ 593 /* and both the reg and EA use ES! Prevent this by forcing */ 594 /* reg into the regular registers. */ 595 if ((e2oper == OPind || 596 (e2oper == OPvar && el_fl(e2) == FLfardata)) && 597 !e2.Ecount) 598 { 599 retregs = ALLREGS; 600 } 601 602 codelem(cdb,e1,&retregs,test != 0); 603 reg = findreglsw(retregs); /* reg is the register with the offset*/ 604 } 605 else 606 { 607 regm_t regm; 608 reg_t regx; 609 610 /* if (tyword + TYfptr) */ 611 if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE) 612 { retregs = ~*pretregs & ALLREGS; 613 614 /* if retregs doesn't have any regs in it that aren't reg vars */ 615 if ((retregs & ~regcon.mvar) == 0) 616 retregs |= mAX; 617 } 618 else if (numwords == 2 && retregs & mES) 619 retregs = (retregs | mMSW) & ALLREGS; 620 621 // Determine if we should swap operands, because 622 // mov EAX,x 623 // add EAX,reg 624 // is faster than: 625 // mov EAX,reg 626 // add EAX,x 627 else if (e2oper == OPvar && 628 e1.Eoper == OPvar && 629 e.Eoper != OPmin && 630 isregvar(e1,regm,regx) && 631 regm != retregs && 632 _tysize[ty1] == _tysize[ty2]) 633 { 634 elem *es = e1; 635 e1 = e2; 636 e2 = es; 637 } 638 codelem(cdb,e1,&retregs,test != 0); // eval left leaf 639 reg = findreg(retregs); 640 } 641 reg_t rreg; 642 int rval; 643 targ_size_t i; 644 switch (e2oper) 645 { 646 case OPind: /* if addressing mode */ 647 if (!e2.Ecount) /* if not CSE */ 648 goto L1; /* try OP reg,EA */ 649 goto default; 650 651 default: /* operator node */ 652 L2: 653 rretregs = ALLREGS & ~retregs; 654 /* Be careful not to do arithmetic on ES */ 655 if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE && *pretregs != mPSW) 656 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs; 657 else if (isbyte) 658 rretregs &= BYTEREGS; 659 660 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 661 rreg = (_tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs); 662 if (!test) 663 getregs(cdb,retregs); // we will trash these regs 664 if (numwords == 1) /* ADD reg,rreg */ 665 { 666 /* reverse operands to avoid moving around the segment value */ 667 if (_tysize[ty2] > REGSIZE) 668 { 669 getregs(cdb,rretregs); 670 genregs(cdb,op1,rreg,reg); 671 retregs = rretregs; // reverse operands 672 } 673 else 674 { 675 genregs(cdb,op1,reg,rreg); 676 if (!I16 && *pretregs & mPSW) 677 cdb.last().Iflags |= word; 678 } 679 if (I64 && sz == 8) 680 code_orrex(cdb.last(), REX_W); 681 if (I64 && isbyte && (reg >= 4 || rreg >= 4)) 682 code_orrex(cdb.last(), REX); 683 } 684 else /* numwords == 2 */ /* ADD lsreg,lsrreg */ 685 { 686 reg = findreglsw(retregs); 687 rreg = findreglsw(rretregs); 688 genregs(cdb,op1,reg,rreg); 689 if (e.Eoper == OPadd || e.Eoper == OPmin) 690 code_orflag(cdb.last(),CFpsw); 691 reg = findregmsw(retregs); 692 rreg = findregmsw(rretregs); 693 if (!(e2oper == OPu16_32 && // if second operand is 0 694 (op2 == 0x0B || op2 == 0x33)) // and OR or XOR 695 ) 696 genregs(cdb,op2,reg,rreg); // ADC msreg,msrreg 697 } 698 break; 699 700 case OPrelconst: 701 if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) 702 goto default; 703 if (sz != REGSIZE) 704 goto L2; 705 if (segfl[el_fl(e2)] != 3) /* if not in data segment */ 706 goto L2; 707 if (evalinregister(e2)) 708 goto L2; 709 cs.IEV2.Voffset = e2.EV.Voffset; 710 cs.IEV2.Vsym = e2.EV.Vsym; 711 cs.Iflags |= CFoff; 712 i = 0; /* no INC or DEC opcode */ 713 rval = 0; 714 goto L3; 715 716 case OPconst: 717 if (tyfv(ty2)) 718 goto L2; 719 if (numwords == 1) 720 { 721 if (!el_signx32(e2)) 722 goto L2; 723 i = e2.EV.Vpointer; 724 if (word) 725 { 726 if (!(*pretregs & mPSW) && 727 config.flags4 & CFG4speed && 728 (e.Eoper == OPor || e.Eoper == OPxor || test || 729 (e1.Eoper != OPvar && e1.Eoper != OPind))) 730 { word = 0; 731 i &= 0xFFFF; 732 } 733 } 734 rval = reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,rreg); 735 cs.IEV2.Vsize_t = i; 736 L3: 737 if (!test) 738 getregs(cdb,retregs); // we will trash these regs 739 op1 ^= isbyte; 740 cs.Iflags |= word; 741 if (rval) 742 { cs.Iop = op1 ^ 2; 743 mode = rreg; 744 } 745 else 746 cs.Iop = 0x81; 747 cs.Irm = modregrm(3,mode&7,reg&7); 748 if (mode & 8) 749 cs.Irex |= REX_R; 750 if (reg & 8) 751 cs.Irex |= REX_B; 752 if (I64 && sz == 8) 753 cs.Irex |= REX_W; 754 if (I64 && isbyte && (reg >= 4 || (rval && rreg >= 4))) 755 cs.Irex |= REX; 756 cs.IFL2 = cast(ubyte)((e2.Eoper == OPconst) ? FLconst : el_fl(e2)); 757 /* Modify instruction for special cases */ 758 switch (e.Eoper) 759 { 760 case OPadd: 761 { 762 int iop; 763 764 if (i == 1) 765 iop = 0; /* INC reg */ 766 else if (i == -1) 767 iop = 8; /* DEC reg */ 768 else 769 break; 770 cs.Iop = (0x40 | iop | reg) ^ isbyte; 771 if ((isbyte && *pretregs & mPSW) || I64) 772 { 773 cs.Irm = cast(ubyte)(modregrm(3,0,reg & 7) | iop); 774 cs.Iop = 0xFF; 775 } 776 break; 777 } 778 779 case OPand: 780 if (test) 781 cs.Iop = rval ? op1 : 0xF7; // TEST 782 break; 783 784 default: 785 break; 786 } 787 if (*pretregs & mPSW) 788 cs.Iflags |= CFpsw; 789 cs.Iop ^= isbyte; 790 cdb.gen(&cs); 791 cs.Iflags &= ~CFpsw; 792 } 793 else if (numwords == 2) 794 { 795 getregs(cdb,retregs); 796 reg = findregmsw(retregs); 797 const lsreg = findreglsw(retregs); 798 cs.Iop = 0x81; 799 cs.Irm = modregrm(3,mode,lsreg); 800 cs.IFL2 = FLconst; 801 const msw = cast(targ_int)MSREG(e2.EV.Vllong); 802 cs.IEV2.Vint = e2.EV.Vlong; 803 switch (e.Eoper) 804 { 805 case OPadd: 806 case OPmin: 807 cs.Iflags |= CFpsw; 808 break; 809 810 default: 811 break; 812 } 813 cdb.gen(&cs); 814 cs.Iflags &= ~CFpsw; 815 816 cs.Irm = cast(ubyte)((cs.Irm & modregrm(3,7,0)) | reg); 817 cs.IEV2.Vint = msw; 818 if (e.Eoper == OPadd) 819 cs.Irm |= modregrm(0,2,0); /* ADC */ 820 cdb.gen(&cs); 821 } 822 else 823 assert(0); 824 freenode(e2); 825 break; 826 827 case OPvar: 828 if (movOnly(e2)) 829 goto L2; 830 L1: 831 if (tyfv(ty2)) 832 goto L2; 833 if (!test) 834 getregs(cdb,retregs); // we will trash these regs 835 loadea(cdb,e2,&cs,op1, 836 ((numwords == 2) ? findreglsw(retregs) : reg), 837 0,retregs,retregs); 838 if (!I16 && word) 839 { if (*pretregs & mPSW) 840 code_orflag(cdb.last(),word); 841 else 842 cdb.last().Iflags &= ~cast(int)word; 843 } 844 else if (numwords == 2) 845 { 846 if (e.Eoper == OPadd || e.Eoper == OPmin) 847 code_orflag(cdb.last(),CFpsw); 848 reg = findregmsw(retregs); 849 if (!OTleaf(e2.Eoper)) 850 { getlvalue_msw(&cs); 851 cs.Iop = op2; 852 NEWREG(cs.Irm,reg); 853 cdb.gen(&cs); // ADC reg,data+2 854 } 855 else 856 loadea(cdb,e2,&cs,op2,reg,REGSIZE,retregs,0); 857 } 858 else if (I64 && sz == 8) 859 code_orrex(cdb.last(), REX_W); 860 freenode(e2); 861 break; 862 } 863 864 if (sz <= REGSIZE && *pretregs & mPSW) 865 { 866 /* If the expression is (_tls_array + ...), then the flags are not set 867 * since the linker may rewrite these instructions into something else. 868 */ 869 if (I64 && e.Eoper == OPadd && e1.Eoper == OPvar) 870 { 871 const s = e1.EV.Vsym; 872 if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) 873 { 874 goto L7; // don't assume flags are set 875 } 876 } 877 code_orflag(cdb.last(),CFpsw); 878 *pretregs &= ~mPSW; // flags already set 879 L7: { } 880 } 881 fixresult(cdb,e,retregs,pretregs); 882 } 883 884 885 /***************************** 886 * Handle multiply. 887 */ 888 889 @trusted 890 void cdmul(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 891 { 892 //printf("cdmul()\n"); 893 elem *e1 = e.EV.E1; 894 elem *e2 = e.EV.E2; 895 if (*pretregs == 0) // if don't want result 896 { 897 codelem(cdb,e1,pretregs,false); // eval left leaf 898 *pretregs = 0; // in case they got set 899 codelem(cdb,e2,pretregs,false); 900 return; 901 } 902 903 //printf("cdmul(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 904 const tyml = tybasic(e1.Ety); 905 const ty = tybasic(e.Ety); 906 const oper = e.Eoper; 907 908 if (tyfloating(tyml)) 909 { 910 if (tyvector(tyml) || 911 config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) && 912 !(*pretregs & mST0) && 913 !(ty == TYldouble || ty == TYildouble) && // watch out for shrinkLongDoubleConstantIfPossible() 914 !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div 915 !(ty == TYllong) // or passing to function through integer register 916 ) 917 { 918 orthxmm(cdb,e,pretregs); 919 return; 920 } 921 if (config.exe & EX_posix) 922 orth87(cdb,e,pretregs); 923 else 924 opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv); 925 926 return; 927 } 928 929 if (tyxmmreg(tyml)) 930 { 931 orthxmm(cdb,e,pretregs); 932 return; 933 } 934 935 const uns = tyuns(tyml) || tyuns(e2.Ety); // 1 if signed operation, 0 if unsigned 936 const isbyte = tybyte(e.Ety) != 0; 937 const sz = _tysize[tyml]; 938 const ubyte rex = (I64 && sz == 8) ? REX_W : 0; 939 const uint grex = rex << 16; 940 const OPER opunslng = I16 ? OPu16_32 : OPu32_64; 941 942 code cs = void; 943 cs.Iflags = 0; 944 cs.Irex = 0; 945 946 switch (e2.Eoper) 947 { 948 case OPu16_32: 949 case OPs16_32: 950 case OPu32_64: 951 case OPs32_64: 952 { 953 if (sz != 2 * REGSIZE || e1.Eoper != e2.Eoper || 954 e1.Ecount || e2.Ecount) 955 goto default; 956 const ubyte opx = (e2.Eoper == opunslng) ? 4 : 5; 957 regm_t retregsx = mAX; 958 codelem(cdb,e1.EV.E1,&retregsx,false); // eval left leaf 959 if (e2.EV.E1.Eoper == OPvar || 960 (e2.EV.E1.Eoper == OPind && !e2.EV.E1.Ecount) 961 ) 962 { 963 loadea(cdb,e2.EV.E1,&cs,0xF7,opx,0,mAX,mAX | mDX); 964 } 965 else 966 { 967 regm_t rretregsx = ALLREGS & ~mAX; 968 scodelem(cdb,e2.EV.E1,&rretregsx,retregsx,true); // get rvalue 969 getregs(cdb,mAX | mDX); 970 const rregx = findreg(rretregsx); 971 cdb.gen2(0xF7,grex | modregrmx(3,opx,rregx)); // OP AX,rregx 972 } 973 freenode(e.EV.E1); 974 freenode(e2); 975 fixresult(cdb,e,mAX | mDX,pretregs); 976 return; 977 } 978 979 case OPconst: 980 const e2factor = cast(targ_size_t)el_tolong(e2); 981 982 // Multiply by a constant 983 if (I32 && sz == REGSIZE * 2) 984 { 985 /* if (msw) 986 IMUL EDX,EDX,lsw 987 IMUL reg,EAX,msw 988 ADD reg,EDX 989 else 990 IMUL reg,EDX,lsw 991 MOV EDX,lsw 992 MUL EDX 993 ADD EDX,reg 994 */ 995 regm_t retregs = mAX | mDX; 996 codelem(cdb,e1,&retregs,false); // eval left leaf 997 reg_t reg = allocScratchReg(cdb, allregs & ~(mAX | mDX)); 998 getregs(cdb,mDX | mAX); 999 1000 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1)); 1001 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8)); 1002 1003 if (msw) 1004 { 1005 genmulimm(cdb,DX,DX,lsw); // IMUL EDX,EDX,lsw 1006 genmulimm(cdb,reg,AX,msw); // IMUL reg,EAX,msw 1007 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX 1008 } 1009 else 1010 genmulimm(cdb,reg,DX,lsw); // IMUL reg,EDX,lsw 1011 1012 movregconst(cdb,DX,lsw,0); // MOV EDX,lsw 1013 getregs(cdb,mDX); 1014 cdb.gen2(0xF7,modregrm(3,4,DX)); // MUL EDX 1015 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD EDX,reg 1016 1017 const resregx = mDX | mAX; 1018 freenode(e2); 1019 fixresult(cdb,e,resregx,pretregs); 1020 return; 1021 } 1022 1023 1024 const int pow2 = ispow2(e2factor); 1025 1026 if (sz > REGSIZE || !el_signx32(e2)) 1027 goto default; 1028 1029 if (config.target_cpu >= TARGET_80286) 1030 { 1031 if (I32 || I64) 1032 { 1033 // See if we can use an LEA instruction 1034 int ss; 1035 int ss2 = 0; 1036 int shift; 1037 1038 switch (e2factor) 1039 { 1040 case 12: ss = 1; ss2 = 2; goto L4; 1041 case 24: ss = 1; ss2 = 3; goto L4; 1042 1043 case 6: 1044 case 3: ss = 1; goto L4; 1045 1046 case 20: ss = 2; ss2 = 2; goto L4; 1047 case 40: ss = 2; ss2 = 3; goto L4; 1048 1049 case 10: 1050 case 5: ss = 2; goto L4; 1051 1052 case 36: ss = 3; ss2 = 2; goto L4; 1053 case 72: ss = 3; ss2 = 3; goto L4; 1054 1055 case 18: 1056 case 9: ss = 3; goto L4; 1057 1058 L4: 1059 { 1060 regm_t resreg = *pretregs & ALLREGS & ~(mBP | mR13); 1061 if (!resreg) 1062 resreg = isbyte ? BYTEREGS : ALLREGS & ~(mBP | mR13); 1063 1064 codelem(cdb,e.EV.E1,&resreg,false); 1065 getregs(cdb,resreg); 1066 reg_t reg = findreg(resreg); 1067 1068 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1069 modregxrmx(ss,reg,reg)); // LEA reg,[ss*reg][reg] 1070 assert((reg & 7) != BP); 1071 if (ss2) 1072 { 1073 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1074 modregxrm(ss2,reg,5)); 1075 cdb.last().IFL1 = FLconst; 1076 cdb.last().IEV1.Vint = 0; // LEA reg,0[ss2*reg] 1077 } 1078 else if (!(e2factor & 1)) // if even factor 1079 { 1080 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1081 code_orrex(cdb.last(),rex); 1082 } 1083 freenode(e2); 1084 fixresult(cdb,e,resreg,pretregs); 1085 return; 1086 } 1087 case 37: 1088 case 74: shift = 2; 1089 goto L5; 1090 case 13: 1091 case 26: shift = 0; 1092 goto L5; 1093 L5: 1094 { 1095 regm_t retregs = isbyte ? BYTEREGS : ALLREGS; 1096 regm_t resreg = *pretregs & (ALLREGS | mBP); 1097 if (!resreg) 1098 resreg = retregs; 1099 1100 // Don't use EBP 1101 resreg &= ~(mBP | mR13); 1102 if (!resreg) 1103 resreg = retregs; 1104 reg_t reg; 1105 allocreg(cdb,&resreg,®,TYint); 1106 1107 regm_t sregm = (ALLREGS & ~mR13) & ~resreg; 1108 codelem(cdb,e.EV.E1,&sregm,false); 1109 uint sreg = findreg(sregm); 1110 getregs(cdb,resreg | sregm); 1111 assert((sreg & 7) != BP); 1112 assert((reg & 7) != BP); 1113 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1114 modregxrmx(2,sreg,sreg)); // LEA reg,[sreg*4][sreg] 1115 if (shift) 1116 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift 1117 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1118 modregxrmx(3,sreg,reg)); // LEA reg,[sreg*8][reg] 1119 if (!(e2factor & 1)) // if even factor 1120 { 1121 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1122 code_orrex(cdb.last(),rex); 1123 } 1124 freenode(e2); 1125 fixresult(cdb,e,resreg,pretregs); 1126 return; 1127 } 1128 1129 default: 1130 break; 1131 } 1132 } 1133 1134 regm_t retregs = isbyte ? BYTEREGS : ALLREGS; 1135 regm_t resreg = *pretregs & (ALLREGS | mBP); 1136 if (!resreg) 1137 resreg = retregs; 1138 1139 scodelem(cdb,e.EV.E1,&retregs,0,true); // eval left leaf 1140 const regx = findreg(retregs); 1141 reg_t rreg; 1142 allocreg(cdb,&resreg,&rreg,e.Ety); 1143 1144 // IMUL regx,imm16 1145 cdb.genc2(0x69,grex | modregxrmx(3,rreg,regx),e2factor); 1146 freenode(e2); 1147 fixresult(cdb,e,resreg,pretregs); 1148 return; 1149 } 1150 goto default; 1151 1152 case OPind: 1153 if (!e2.Ecount) // if not CSE 1154 goto case OPvar; // try OP reg,EA 1155 goto default; 1156 1157 default: // OPconst and operators 1158 //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg)); 1159 if (sz <= REGSIZE) 1160 { 1161 regm_t retregs = mAX; 1162 codelem(cdb,e1,&retregs,false); // eval left leaf 1163 regm_t rretregs = isbyte ? BYTEREGS & ~mAX 1164 : ALLREGS & ~(mAX|mDX); 1165 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1166 getregs(cdb,mAX | mDX); // trash these regs 1167 reg_t rreg = findreg(rretregs); 1168 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,5 - uns,rreg)); // OP AX,rreg 1169 if (I64 && isbyte && rreg >= 4) 1170 code_orrex(cdb.last(), REX); 1171 fixresult(cdb,e,mAX,pretregs); 1172 return; 1173 } 1174 else if (sz == 2 * REGSIZE) 1175 { 1176 regm_t retregs = mDX | mAX; 1177 codelem(cdb,e1,&retregs,false); // eval left leaf 1178 if (config.target_cpu >= TARGET_PentiumPro) 1179 { 1180 regm_t rretregs = allregs & ~retregs; // second arg 1181 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1182 regm_t rlo = findreglsw(rretregs); 1183 regm_t rhi = findregmsw(rretregs); 1184 /* IMUL rhi,EAX 1185 IMUL EDX,rlo 1186 ADD rhi,EDX 1187 MUL rlo 1188 ADD EDX,rhi 1189 */ 1190 getregs(cdb,mAX|mDX|mask(rhi)); 1191 cdb.gen2(0x0FAF,modregrm(3,rhi,AX)); 1192 cdb.gen2(0x0FAF,modregrm(3,DX,rlo)); 1193 cdb.gen2(0x03,modregrm(3,rhi,DX)); 1194 cdb.gen2(0xF7,modregrm(3,4,rlo)); 1195 cdb.gen2(0x03,modregrm(3,DX,rhi)); 1196 fixresult(cdb,e,mDX|mAX,pretregs); 1197 return; 1198 } 1199 else 1200 { 1201 regm_t rretregs = mCX | mBX; // second arg 1202 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1203 callclib(cdb,e,CLIB.lmul,pretregs,0); 1204 return; 1205 } 1206 } 1207 assert(0); 1208 1209 case OPvar: 1210 if (!I16 && sz <= REGSIZE) 1211 { 1212 if (sz > 1) // no byte version 1213 { 1214 // Generate IMUL r32,r/m32 1215 regm_t retregs = *pretregs & (ALLREGS | mBP); 1216 if (!retregs) 1217 retregs = ALLREGS; 1218 codelem(cdb,e1,&retregs,false); // eval left leaf 1219 regm_t resreg = retregs; 1220 loadea(cdb,e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs); 1221 freenode(e2); 1222 fixresult(cdb,e,resreg,pretregs); 1223 return; 1224 } 1225 } 1226 else 1227 { 1228 if (sz == 2 * REGSIZE) 1229 { 1230 if (e.EV.E1.Eoper != opunslng || 1231 e1.Ecount) 1232 goto default; // have to handle it with codelem() 1233 1234 regm_t retregs = ALLREGS & ~(mAX | mDX); 1235 codelem(cdb,e1.EV.E1,&retregs,false); // eval left leaf 1236 const reg = findreg(retregs); 1237 getregs(cdb,mAX); 1238 genmovreg(cdb,AX,reg); // MOV AX,reg 1239 loadea(cdb,e2,&cs,0xF7,4,REGSIZE,mAX | mDX | mskl(reg),mAX | mDX); // MUL EA+2 1240 getregs(cdb,retregs); 1241 cdb.gen1(0x90 + reg); // XCHG AX,reg 1242 getregs(cdb,mAX | mDX); 1243 if ((cs.Irm & 0xC0) == 0xC0) // if EA is a register 1244 loadea(cdb,e2,&cs,0xF7,4,0,mAX | mskl(reg),mAX | mDX); // MUL EA 1245 else 1246 { getlvalue_lsw(&cs); 1247 cdb.gen(&cs); // MUL EA 1248 } 1249 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD DX,reg 1250 1251 freenode(e1); 1252 fixresult(cdb,e,mAX | mDX,pretregs); 1253 return; 1254 } 1255 assert(sz <= REGSIZE); 1256 } 1257 1258 // loadea() handles CWD or CLR DX for divides 1259 regm_t retregs = sz <= REGSIZE ? mAX : mDX|mAX; 1260 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1261 loadea(cdb,e2,&cs,0xF7 ^ isbyte,5 - uns,0, 1262 mAX, 1263 mAX | mDX); 1264 freenode(e2); 1265 fixresult(cdb,e,mAX,pretregs); 1266 return; 1267 } 1268 assert(0); 1269 } 1270 1271 1272 /***************************** 1273 * Handle divide, modulo and remquo. 1274 * Note that modulo isn't defined for doubles. 1275 */ 1276 1277 @trusted 1278 void cddiv(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1279 { 1280 //printf("cddiv()\n"); 1281 elem *e1 = e.EV.E1; 1282 elem *e2 = e.EV.E2; 1283 if (*pretregs == 0) // if don't want result 1284 { 1285 codelem(cdb,e1,pretregs,false); // eval left leaf 1286 *pretregs = 0; // in case they got set 1287 codelem(cdb,e2,pretregs,false); 1288 return; 1289 } 1290 1291 //printf("cddiv(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1292 const tyml = tybasic(e1.Ety); 1293 const ty = tybasic(e.Ety); 1294 const oper = e.Eoper; 1295 1296 if (tyfloating(tyml)) 1297 { 1298 if (tyvector(tyml) || 1299 config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) && 1300 !(*pretregs & mST0) && 1301 !(ty == TYldouble || ty == TYildouble) && // watch out for shrinkLongDoubleConstantIfPossible() 1302 !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div 1303 !(ty == TYllong) // or passing to function through integer register 1304 ) 1305 { 1306 orthxmm(cdb,e,pretregs); 1307 return; 1308 } 1309 if (config.exe & EX_posix) 1310 orth87(cdb,e,pretregs); 1311 else 1312 opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv); 1313 1314 return; 1315 } 1316 1317 if (tyxmmreg(tyml)) 1318 { 1319 orthxmm(cdb,e,pretregs); 1320 return; 1321 } 1322 1323 const uns = tyuns(tyml) || tyuns(e2.Ety); // 1 if uint operation, 0 if not 1324 const isbyte = tybyte(e.Ety) != 0; 1325 const sz = _tysize[tyml]; 1326 const ubyte rex = (I64 && sz == 8) ? REX_W : 0; 1327 const uint grex = rex << 16; 1328 1329 code cs = void; 1330 cs.Iflags = 0; 1331 cs.IFL2 = 0; 1332 cs.Irex = 0; 1333 1334 switch (e2.Eoper) 1335 { 1336 case OPconst: 1337 auto d = cast(targ_size_t)el_tolong(e2); 1338 bool neg = false; 1339 const e2factor = d; 1340 if (!uns && cast(targ_llong)e2factor < 0) 1341 { neg = true; 1342 d = -d; 1343 } 1344 1345 // Signed divide by a constant 1346 if ((d & (d - 1)) && 1347 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) && 1348 config.flags4 & CFG4speed && !uns) 1349 { 1350 /* R1 / 10 1351 * 1352 * MOV EAX,m 1353 * IMUL R1 1354 * MOV EAX,R1 1355 * SAR EAX,31 1356 * SAR EDX,shpost 1357 * SUB EDX,EAX 1358 * IMUL EAX,EDX,d 1359 * SUB R1,EAX 1360 * 1361 * EDX = quotient 1362 * R1 = remainder 1363 */ 1364 assert(sz == 4 || sz == 8); 1365 1366 ulong m; 1367 int shpost; 1368 const int N = sz * 8; 1369 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost); 1370 1371 regm_t regm = allregs & ~(mAX | mDX); 1372 codelem(cdb,e1,®m,false); // eval left leaf 1373 const reg_t reg = findreg(regm); 1374 getregs(cdb,regm | mDX | mAX); 1375 1376 /* Algorithm 5.2 1377 * if m>=2**(N-1) 1378 * q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n) 1379 * else 1380 * q = SRA(MULSH(m,n), shpost) - XSIGN(n) 1381 * if (neg) 1382 * q = -q 1383 */ 1384 const bool mgt = mhighbit || m >= (1UL << (N - 1)); 1385 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EAX,m 1386 cdb.gen2(0xF7,grex | modregrmx(3,5,reg)); // IMUL R1 1387 if (mgt) 1388 cdb.gen2(0x03,grex | modregrmx(3,DX,reg)); // ADD EDX,R1 1389 getregsNoSave(mAX); // EAX no longer contains 'm' 1390 genmovreg(cdb, AX, reg); // MOV EAX,R1 1391 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1); // SAR EAX,31 1392 if (shpost) 1393 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost); // SAR EDX,shpost 1394 reg_t r3; 1395 if (neg && oper == OPdiv) 1396 { 1397 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1398 r3 = AX; 1399 } 1400 else 1401 { 1402 cdb.gen2(0x2B,grex | modregrm(3,DX,AX)); // SUB EDX,EAX 1403 r3 = DX; 1404 } 1405 1406 // r3 is quotient 1407 regm_t resregx; 1408 switch (oper) 1409 { case OPdiv: 1410 resregx = mask(r3); 1411 break; 1412 1413 case OPmod: 1414 assert(reg != AX && r3 == DX); 1415 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1416 { 1417 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1418 } 1419 else 1420 { 1421 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1422 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1423 getregsNoSave(mAX); // EAX no longer contains 'd' 1424 } 1425 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1426 resregx = regm; 1427 break; 1428 1429 case OPremquo: 1430 assert(reg != AX && r3 == DX); 1431 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1432 { 1433 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1434 } 1435 else 1436 { 1437 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1438 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1439 } 1440 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1441 genmovreg(cdb, AX, r3); // MOV EAX,r3 1442 if (neg) 1443 cdb.gen2(0xF7,grex | modregrm(3,3,AX)); // NEG EAX 1444 genmovreg(cdb, DX, reg); // MOV EDX,R1 1445 resregx = mDX | mAX; 1446 break; 1447 1448 default: 1449 assert(0); 1450 } 1451 freenode(e2); 1452 fixresult(cdb,e,resregx,pretregs); 1453 return; 1454 } 1455 1456 // Unsigned divide by a constant 1457 if (e2factor > 2 && (e2factor & (e2factor - 1)) && 1458 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) && 1459 config.flags4 & CFG4speed && uns) 1460 { 1461 assert(sz == 4 || sz == 8); 1462 1463 reg_t r3; 1464 regm_t regm; 1465 reg_t reg; 1466 ulong m; 1467 int shpre; 1468 int shpost; 1469 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost)) 1470 { 1471 /* t1 = MULUH(m, n) 1472 * q = SRL(t1 + SRL(n - t1, 1), shpost - 1) 1473 * MOV EAX,reg 1474 * MOV EDX,m 1475 * MUL EDX 1476 * MOV EAX,reg 1477 * SUB EAX,EDX 1478 * SHR EAX,1 1479 * LEA R3,[EAX][EDX] 1480 * SHR R3,shpost-1 1481 */ 1482 assert(shpre == 0); 1483 1484 regm = allregs & ~(mAX | mDX); 1485 codelem(cdb,e1,®m,false); // eval left leaf 1486 reg = findreg(regm); 1487 getregs(cdb,mAX | mDX); 1488 genmovreg(cdb,AX,reg); // MOV EAX,reg 1489 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1490 getregs(cdb,regm | mDX | mAX); 1491 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1492 genmovreg(cdb,AX,reg); // MOV EAX,reg 1493 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1494 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1); // SHR EAX,1 1495 regm_t regm3 = allregs; 1496 if (oper == OPmod || oper == OPremquo) 1497 { 1498 regm3 &= ~regm; 1499 if (oper == OPremquo || !el_signx32(e2)) 1500 regm3 &= ~mAX; 1501 } 1502 allocreg(cdb,®m3,&r3,TYint); 1503 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX] 1504 if (shpost != 1) 1505 cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1); // SHR R3,shpost-1 1506 } 1507 else 1508 { 1509 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost) 1510 * SHR EAX,shpre 1511 * MOV reg,m 1512 * MUL reg 1513 * SHR EDX,shpost 1514 */ 1515 regm = mAX; 1516 if (oper == OPmod || oper == OPremquo) 1517 regm = allregs & ~(mAX|mDX); 1518 codelem(cdb,e1,®m,false); // eval left leaf 1519 reg = findreg(regm); 1520 1521 if (reg != AX) 1522 { 1523 getregs(cdb,mAX); 1524 genmovreg(cdb,AX,reg); // MOV EAX,reg 1525 } 1526 if (shpre) 1527 { 1528 getregs(cdb,mAX); 1529 cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre); // SHR EAX,shpre 1530 } 1531 getregs(cdb,mDX); 1532 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1533 getregs(cdb,mDX | mAX); 1534 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1535 if (shpost) 1536 cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost); // SHR EDX,shpost 1537 r3 = DX; 1538 } 1539 1540 regm_t resreg; 1541 switch (oper) 1542 { case OPdiv: 1543 // r3 = quotient 1544 resreg = mask(r3); 1545 break; 1546 1547 case OPmod: 1548 /* reg = original value 1549 * r3 = quotient 1550 */ 1551 assert(!(regm & mAX)); 1552 if (el_signx32(e2)) 1553 { 1554 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1555 } 1556 else 1557 { 1558 assert(!(mask(r3) & mAX)); 1559 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1560 getregs(cdb,mAX); 1561 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1562 } 1563 getregs(cdb,regm); 1564 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1565 resreg = regm; 1566 break; 1567 1568 case OPremquo: 1569 /* reg = original value 1570 * r3 = quotient 1571 */ 1572 assert(!(mask(r3) & (mAX|regm))); 1573 assert(!(regm & mAX)); 1574 if (el_signx32(e2)) 1575 { 1576 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1577 } 1578 else 1579 { 1580 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1581 getregs(cdb,mAX); 1582 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1583 } 1584 getregs(cdb,regm); 1585 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1586 genmovreg(cdb, AX, r3); // MOV EAX,r3 1587 genmovreg(cdb, DX, reg); // MOV EDX,reg 1588 resreg = mDX | mAX; 1589 break; 1590 1591 default: 1592 assert(0); 1593 } 1594 freenode(e2); 1595 fixresult(cdb,e,resreg,pretregs); 1596 return; 1597 } 1598 1599 const int pow2 = ispow2(e2factor); 1600 1601 // Register pair signed divide by power of 2 1602 if (sz == REGSIZE * 2 && 1603 (oper == OPdiv) && !uns && 1604 pow2 != -1 && 1605 I32 // not set up for I64 cent yet 1606 ) 1607 { 1608 regm_t retregs = mDX | mAX; 1609 if (pow2 == 63 && !(retregs & BYTEREGS & mLSW)) 1610 retregs = (retregs & mMSW) | (BYTEREGS & mLSW); // because of SETZ 1611 1612 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1613 const rhi = findregmsw(retregs); 1614 const rlo = findreglsw(retregs); 1615 freenode(e2); 1616 getregs(cdb,retregs); 1617 1618 if (pow2 < 32) 1619 { 1620 reg_t r1 = allocScratchReg(cdb, allregs & ~retregs); 1621 1622 genmovreg(cdb,r1,rhi); // MOV r1,rhi 1623 if (pow2 == 1) 1624 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR r1,31 1625 else 1626 { 1627 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1628 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND r1,mask 1629 } 1630 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1631 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 1632 cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2); // SHRD rlo,rhi,pow2 1633 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2); // SAR rhi,pow2 1634 } 1635 else if (pow2 == 32) 1636 { 1637 reg_t r1 = allocScratchReg(cdb, allregs & ~retregs); 1638 1639 genmovreg(cdb,r1,rhi); // MOV r1,rhi 1640 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1641 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1642 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 1643 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 1644 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 1645 } 1646 else if (pow2 < 63) 1647 { 1648 reg_t r1 = allocScratchReg(cdb, allregs & ~retregs); 1649 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1))); 1650 1651 genmovreg(cdb,r1,rhi); // MOV r1,rhi 1652 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1653 cdb.genmovreg(r2,r1); // MOV r2,r1 1654 1655 if (pow2 == 33) 1656 { 1657 cdb.gen2(0xF7,modregrmx(3,3,r1)); // NEG r1 1658 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2)); // ADD rlo,r2 1659 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1)); // ADC rhi,r1 1660 } 1661 else 1662 { 1663 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask 1664 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1665 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 1666 } 1667 1668 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 1669 cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32); // SAR rlo,pow2-32 1670 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 1671 } 1672 else 1673 { 1674 // This may be better done by cgelem.d 1675 assert(pow2 == 63); 1676 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000 1677 cdb.genregs(0x09,rlo,rhi); // OR rlo,rhi 1678 cdb.gen2(0x0F94,modregrmx(3,0,rlo)); // SETZ rlo 1679 cdb.genregs(MOVZXb,rlo,rlo); // MOVZX rlo,rloL 1680 movregconst(cdb,rhi,0,0); // MOV rhi,0 1681 } 1682 1683 fixresult(cdb,e,retregs,pretregs); 1684 return; 1685 } 1686 1687 // Register pair signed modulo by power of 2 1688 if (sz == REGSIZE * 2 && 1689 (oper == OPmod) && !uns && 1690 pow2 != -1 && 1691 I32 // not set up for I64 cent yet 1692 ) 1693 { 1694 regm_t retregs = mDX | mAX; 1695 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1696 const rhi = findregmsw(retregs); 1697 const rlo = findreglsw(retregs); 1698 freenode(e2); 1699 getregs(cdb,retregs); 1700 1701 regm_t scratchm = allregs & ~retregs; 1702 if (pow2 == 63) 1703 scratchm &= BYTEREGS; // because of SETZ 1704 reg_t r1 = allocScratchReg(cdb, scratchm); 1705 1706 if (pow2 < 32) 1707 { 1708 cdb.genmovreg(r1,rhi); // MOV r1,rhi 1709 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1710 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 1711 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1712 cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1); // AND rlo,(1<<pow2)-1 1713 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 1714 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1715 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 1716 } 1717 else if (pow2 == 32) 1718 { 1719 cdb.genmovreg(r1,rhi); // MOV r1,rhi 1720 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1721 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1722 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1723 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 1724 } 1725 else if (pow2 < 63) 1726 { 1727 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1))); 1728 1729 cdb.genmovreg(r1,rhi); // MOV r1,rhi 1730 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1731 cdb.genmovreg(r2,r1); // MOV r2,r1 1732 cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2); // SHRD r1,r2,64-pow2 1733 cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2); // SHR r2,64-pow2 1734 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1735 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 1736 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND rhi,(1<<(pow2-32))-1 1737 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1738 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2)); // SBB rhi,r2 1739 } 1740 else 1741 { 1742 // This may be better done by cgelem.d 1743 assert(pow2 == 63); 1744 1745 cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi] 1746 cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo)); // OR r1,rlo 1747 cdb.gen2(0x0F94,modregrmx(3,0,r1)); // SETZ r1 1748 cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL r1,31 1749 cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1)); // SUB rhi,r1 1750 } 1751 1752 fixresult(cdb,e,retregs,pretregs); 1753 return; 1754 } 1755 1756 if (sz > REGSIZE || !el_signx32(e2)) 1757 goto default; 1758 1759 // Special code for signed divide or modulo by power of 2 1760 if ((sz == REGSIZE || (I64 && sz == 4)) && 1761 (oper == OPdiv || oper == OPmod) && !uns && 1762 pow2 != -1 && 1763 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv) 1764 ) 1765 { 1766 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386) 1767 { 1768 /* MOV r,reg 1769 SHR r,31 1770 ADD reg,r 1771 SAR reg,1 1772 */ 1773 regm_t retregs = allregs; 1774 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1775 const reg = findreg(retregs); 1776 freenode(e2); 1777 getregs(cdb,retregs); 1778 1779 reg_t r = allocScratchReg(cdb, allregs & ~retregs); 1780 genmovreg(cdb,r,reg); // MOV r,reg 1781 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31 1782 cdb.gen2(0x03,grex | modregxrmx(3,reg,r)); // ADD reg,r 1783 cdb.gen2(0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 1784 regm_t resreg = retregs; 1785 fixresult(cdb,e,resreg,pretregs); 1786 return; 1787 } 1788 1789 regm_t resreg; 1790 switch (oper) 1791 { 1792 case OPdiv: 1793 resreg = mAX; 1794 break; 1795 1796 case OPmod: 1797 resreg = mDX; 1798 break; 1799 1800 case OPremquo: 1801 resreg = mDX | mAX; 1802 break; 1803 1804 default: 1805 assert(0); 1806 } 1807 1808 regm_t retregs = mAX; 1809 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1810 freenode(e2); 1811 getregs(cdb,mAX | mDX); // modify these regs 1812 cdb.gen1(0x99); // CWD 1813 code_orrex(cdb.last(), rex); 1814 if (pow2 == 1) 1815 { 1816 if (oper == OPdiv) 1817 { 1818 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1819 cdb.gen2(0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 1820 } 1821 else // OPmod 1822 { 1823 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1824 cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1 1825 cdb.gen2(0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 1826 } 1827 } 1828 else 1829 { targ_ulong m; 1830 1831 m = (1 << pow2) - 1; 1832 if (oper == OPdiv) 1833 { 1834 cdb.genc2(0x81,grex | modregrm(3,4,DX),m); // AND DX,m 1835 cdb.gen2(0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 1836 // Be careful not to generate this for 8088 1837 assert(config.target_cpu >= TARGET_80286); 1838 cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 1839 } 1840 else // OPmod 1841 { 1842 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1843 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1844 cdb.genc2(0x81,grex | modregrm(3,4,AX),m); // AND AX,mask 1845 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1846 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1847 resreg = mAX; 1848 } 1849 } 1850 fixresult(cdb,e,resreg,pretregs); 1851 return; 1852 } 1853 goto default; 1854 1855 case OPind: 1856 if (!e2.Ecount) // if not CSE 1857 goto case OPvar; // try OP reg,EA 1858 goto default; 1859 1860 default: // OPconst and operators 1861 //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg)); 1862 regm_t retregs = sz <= REGSIZE ? mAX : mDX | mAX; 1863 codelem(cdb,e1,&retregs,false); // eval left leaf 1864 regm_t rretregs; 1865 if (sz <= REGSIZE) // dedicated regs for div 1866 { 1867 // pick some other regs 1868 rretregs = isbyte ? BYTEREGS & ~mAX 1869 : ALLREGS & ~(mAX|mDX); 1870 } 1871 else 1872 { 1873 assert(sz <= 2 * REGSIZE); 1874 rretregs = mCX | mBX; // second arg 1875 } 1876 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1877 if (sz <= REGSIZE) 1878 { 1879 getregs(cdb,mAX | mDX); // trash these regs 1880 if (uns) // unsigned divide 1881 { 1882 movregconst(cdb,DX,0,(sz == 8) ? 64 : 0); // MOV DX,0 1883 getregs(cdb,mDX); 1884 } 1885 else 1886 { 1887 cdb.gen1(0x99); // CWD 1888 code_orrex(cdb.last(),rex); 1889 } 1890 reg_t rreg = findreg(rretregs); 1891 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,7 - uns,rreg)); // OP AX,rreg 1892 if (I64 && isbyte && rreg >= 4) 1893 code_orrex(cdb.last(), REX); 1894 regm_t resreg; 1895 switch (oper) 1896 { 1897 case OPdiv: 1898 resreg = mAX; 1899 break; 1900 1901 case OPmod: 1902 resreg = mDX; 1903 break; 1904 1905 case OPremquo: 1906 resreg = mDX | mAX; 1907 break; 1908 1909 default: 1910 assert(0); 1911 } 1912 fixresult(cdb,e,resreg,pretregs); 1913 } 1914 else if (sz == 2 * REGSIZE) 1915 { 1916 uint lib; 1917 switch (oper) 1918 { 1919 case OPdiv: 1920 case OPremquo: 1921 lib = uns ? CLIB.uldiv : CLIB.ldiv; 1922 break; 1923 1924 case OPmod: 1925 lib = uns ? CLIB.ulmod : CLIB.lmod; 1926 break; 1927 1928 default: 1929 assert(0); 1930 } 1931 1932 regm_t keepregs = I32 ? mSI | mDI : 0; 1933 callclib(cdb,e,lib,pretregs,keepregs); 1934 } 1935 else 1936 assert(0); 1937 return; 1938 1939 case OPvar: 1940 if (I16 || sz == 2 * REGSIZE) 1941 goto default; // have to handle it with codelem() 1942 1943 // loadea() handles CWD or CLR DX for divides 1944 regm_t retregs = mAX; 1945 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1946 loadea(cdb,e2,&cs,0xF7 ^ isbyte,7 - uns,0, 1947 mAX | mDX, 1948 mAX | mDX); 1949 freenode(e2); 1950 regm_t resreg; 1951 switch (oper) 1952 { 1953 case OPdiv: 1954 resreg = mAX; 1955 break; 1956 1957 case OPmod: 1958 resreg = mDX; 1959 break; 1960 1961 case OPremquo: 1962 resreg = mDX | mAX; 1963 break; 1964 1965 default: 1966 assert(0); 1967 } 1968 fixresult(cdb,e,resreg,pretregs); 1969 return; 1970 } 1971 assert(0); 1972 } 1973 1974 1975 /*************************** 1976 * Handle OPnot and OPbool. 1977 * Generate: 1978 * c: [evaluate e1] 1979 * cfalse: [save reg code] 1980 * clr reg 1981 * jmp cnop 1982 * ctrue: [save reg code] 1983 * clr reg 1984 * inc reg 1985 * cnop: nop 1986 */ 1987 1988 @trusted 1989 void cdnot(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1990 { 1991 //printf("cdnot()\n"); 1992 reg_t reg; 1993 tym_t forflags; 1994 regm_t retregs; 1995 elem *e1 = e.EV.E1; 1996 1997 if (*pretregs == 0) 1998 goto L1; 1999 if (*pretregs == mPSW) 2000 { //assert(e.Eoper != OPnot && e.Eoper != OPbool);*/ /* should've been optimized 2001 L1: 2002 codelem(cdb,e1,pretregs,false); // evaluate e1 for cc 2003 return; 2004 } 2005 2006 OPER op = e.Eoper; 2007 uint sz = tysize(e1.Ety); 2008 uint rex = (I64 && sz == 8) ? REX_W : 0; 2009 uint grex = rex << 16; 2010 2011 if (!tyfloating(e1.Ety)) 2012 { 2013 if (sz <= REGSIZE && e1.Eoper == OPvar) 2014 { code cs; 2015 2016 getlvalue(cdb,&cs,e1,0); 2017 freenode(e1); 2018 if (!I16 && sz == 2) 2019 cs.Iflags |= CFopsize; 2020 2021 retregs = *pretregs & (ALLREGS | mBP); 2022 if (config.target_cpu >= TARGET_80486 && 2023 tysize(e.Ety) == 1) 2024 { 2025 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,reg)) 2026 { 2027 cs.Iop = 0x39; 2028 if (I64 && (sz == 1) && reg >= 4) 2029 cs.Irex |= REX; 2030 } 2031 else 2032 { cs.Iop = 0x81; 2033 reg = 7; 2034 cs.IFL2 = FLconst; 2035 cs.IEV2.Vint = 0; 2036 } 2037 cs.Iop ^= (sz == 1); 2038 code_newreg(&cs,reg); 2039 cdb.gen(&cs); // CMP e1,0 2040 2041 retregs &= BYTEREGS; 2042 if (!retregs) 2043 retregs = BYTEREGS; 2044 allocreg(cdb,&retregs,®,TYint); 2045 2046 const opcode_t iop = (op == OPbool) 2047 ? 0x0F95 // SETNZ rm8 2048 : 0x0F94; // SETZ rm8 2049 cdb.gen2(iop, modregrmx(3,0,reg)); 2050 if (reg >= 4) 2051 code_orrex(cdb.last(), REX); 2052 if (op == OPbool) 2053 *pretregs &= ~mPSW; 2054 goto L4; 2055 } 2056 2057 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,reg)) 2058 cs.Iop = 0x39; 2059 else 2060 { cs.Iop = 0x81; 2061 reg = 7; 2062 cs.IFL2 = FLconst; 2063 cs.IEV2.Vint = 1; 2064 } 2065 if (I64 && (sz == 1) && reg >= 4) 2066 cs.Irex |= REX; 2067 cs.Iop ^= (sz == 1); 2068 code_newreg(&cs,reg); 2069 cdb.gen(&cs); // CMP e1,1 2070 2071 allocreg(cdb,&retregs,®,TYint); 2072 op ^= (OPbool ^ OPnot); // switch operators 2073 goto L2; 2074 } 2075 else if (config.target_cpu >= TARGET_80486 && 2076 tysize(e.Ety) == 1) 2077 { 2078 int jop = jmpopcode(e.EV.E1); 2079 retregs = mPSW; 2080 codelem(cdb,e.EV.E1,&retregs,false); 2081 retregs = *pretregs & BYTEREGS; 2082 if (!retregs) 2083 retregs = BYTEREGS; 2084 allocreg(cdb,&retregs,®,TYint); 2085 2086 int iop = 0x0F90 | (jop & 0x0F); // SETcc rm8 2087 if (op == OPnot) 2088 iop ^= 1; 2089 cdb.gen2(iop,grex | modregrmx(3,0,reg)); 2090 if (reg >= 4) 2091 code_orrex(cdb.last(), REX); 2092 if (op == OPbool) 2093 *pretregs &= ~mPSW; 2094 goto L4; 2095 } 2096 else if (sz <= REGSIZE && 2097 // NEG bytereg is too expensive 2098 (sz != 1 || config.target_cpu < TARGET_PentiumPro)) 2099 { 2100 retregs = *pretregs & (ALLREGS | mBP); 2101 if (sz == 1 && !(retregs &= BYTEREGS)) 2102 retregs = BYTEREGS; 2103 codelem(cdb,e.EV.E1,&retregs,false); 2104 reg = findreg(retregs); 2105 getregs(cdb,retregs); 2106 cdb.gen2(sz == 1 ? 0xF6 : 0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2107 code_orflag(cdb.last(),CFpsw); 2108 if (!I16 && sz == SHORTSIZE) 2109 code_orflag(cdb.last(),CFopsize); 2110 L2: 2111 genregs(cdb,0x19,reg,reg); // SBB reg,reg 2112 code_orrex(cdb.last(), rex); 2113 // At this point, reg==0 if e1==0, reg==-1 if e1!=0 2114 if (op == OPnot) 2115 { 2116 if (I64) 2117 cdb.gen2(0xFF,grex | modregrmx(3,0,reg)); // INC reg 2118 else 2119 cdb.gen1(0x40 + reg); // INC reg 2120 } 2121 else 2122 cdb.gen2(0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2123 if (*pretregs & mPSW) 2124 { code_orflag(cdb.last(),CFpsw); 2125 *pretregs &= ~mPSW; // flags are always set anyway 2126 } 2127 L4: 2128 fixresult(cdb,e,retregs,pretregs); 2129 return; 2130 } 2131 } 2132 code *cnop = gennop(null); 2133 code *ctrue = gennop(null); 2134 logexp(cdb,e.EV.E1,(op == OPnot) ? false : true,FLcode,ctrue); 2135 forflags = *pretregs & mPSW; 2136 if (I64 && sz == 8) 2137 forflags |= 64; 2138 assert(tysize(e.Ety) <= REGSIZE); // result better be int 2139 CodeBuilder cdbfalse; 2140 cdbfalse.ctor(); 2141 allocreg(cdbfalse,pretregs,®,e.Ety); // allocate reg for result 2142 code *cfalse = cdbfalse.finish(); 2143 CodeBuilder cdbtrue; 2144 cdbtrue.ctor(); 2145 cdbtrue.append(ctrue); 2146 for (code *c1 = cfalse; c1; c1 = code_next(c1)) 2147 cdbtrue.gen(c1); // duplicate reg save code 2148 CodeBuilder cdbfalse2; 2149 cdbfalse2.ctor(); 2150 movregconst(cdbfalse2,reg,0,forflags); // mov 0 into reg 2151 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2152 movregconst(cdbtrue,reg,1,forflags); // mov 1 into reg 2153 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2154 genjmp(cdbfalse2,JMP,FLcode,cast(block *) cnop); // skip over ctrue 2155 cdb.append(cfalse); 2156 cdb.append(cdbfalse2); 2157 cdb.append(cdbtrue); 2158 cdb.append(cnop); 2159 } 2160 2161 2162 /************************ 2163 * Complement operator 2164 */ 2165 2166 @trusted 2167 void cdcom(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2168 { 2169 if (*pretregs == 0) 2170 { 2171 codelem(cdb,e.EV.E1,pretregs,false); 2172 return; 2173 } 2174 tym_t tym = tybasic(e.Ety); 2175 int sz = _tysize[tym]; 2176 uint rex = (I64 && sz == 8) ? REX_W : 0; 2177 regm_t possregs = (sz == 1) ? BYTEREGS : allregs; 2178 regm_t retregs = *pretregs & possregs; 2179 if (retregs == 0) 2180 retregs = possregs; 2181 codelem(cdb,e.EV.E1,&retregs,false); 2182 getregs(cdb,retregs); // retregs will be destroyed 2183 2184 if (0 && sz == 4 * REGSIZE) 2185 { 2186 cdb.gen2(0xF7,modregrm(3,2,AX)); // NOT AX 2187 cdb.gen2(0xF7,modregrm(3,2,BX)); // NOT BX 2188 cdb.gen2(0xF7,modregrm(3,2,CX)); // NOT CX 2189 cdb.gen2(0xF7,modregrm(3,2,DX)); // NOT DX 2190 } 2191 else 2192 { 2193 const reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs); 2194 const op = (sz == 1) ? 0xF6 : 0xF7; 2195 genregs(cdb,op,2,reg); // NOT reg 2196 code_orrex(cdb.last(), rex); 2197 if (I64 && sz == 1 && reg >= 4) 2198 code_orrex(cdb.last(), REX); 2199 if (sz == 2 * REGSIZE) 2200 { 2201 const reg2 = findreglsw(retregs); 2202 genregs(cdb,op,2,reg2); // NOT reg+1 2203 } 2204 } 2205 fixresult(cdb,e,retregs,pretregs); 2206 } 2207 2208 /************************ 2209 * Bswap operator 2210 */ 2211 2212 @trusted 2213 void cdbswap(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2214 { 2215 if (*pretregs == 0) 2216 { 2217 codelem(cdb,e.EV.E1,pretregs,false); 2218 return; 2219 } 2220 2221 const tym = tybasic(e.Ety); 2222 const sz = _tysize[tym]; 2223 const posregs = (sz == 2) ? mAX|mBX|mCX|mDX : allregs; 2224 regm_t retregs = *pretregs & posregs; 2225 if (retregs == 0) 2226 retregs = posregs; 2227 codelem(cdb,e.EV.E1,&retregs,false); 2228 getregs(cdb,retregs); // retregs will be destroyed 2229 if (sz == 2 * REGSIZE) 2230 { 2231 assert(sz != 16); // no cent support yet 2232 const msreg = findregmsw(retregs); 2233 cdb.gen1(0x0FC8 + (msreg & 7)); // BSWAP msreg 2234 const lsreg = findreglsw(retregs); 2235 cdb.gen1(0x0FC8 + (lsreg & 7)); // BSWAP lsreg 2236 cdb.gen2(0x87,modregrm(3,msreg,lsreg)); // XCHG msreg,lsreg 2237 } 2238 else 2239 { 2240 const reg = findreg(retregs); 2241 if (sz == 2) 2242 { 2243 genregs(cdb,0x86,reg+4,reg); // XCHG regL,regH 2244 } 2245 else 2246 { 2247 assert(sz == 4 || sz == 8); 2248 cdb.gen1(0x0FC8 + (reg & 7)); // BSWAP reg 2249 ubyte rex = 0; 2250 if (sz == 8) 2251 rex |= REX_W; 2252 if (reg & 8) 2253 rex |= REX_B; 2254 if (rex) 2255 code_orrex(cdb.last(), rex); 2256 } 2257 } 2258 fixresult(cdb,e,retregs,pretregs); 2259 } 2260 2261 /************************* 2262 * ?: operator 2263 */ 2264 2265 @trusted 2266 void cdcond(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2267 { 2268 con_t regconold,regconsave; 2269 uint stackpushold,stackpushsave; 2270 int ehindexold,ehindexsave; 2271 uint sz2; 2272 2273 /* vars to save state of 8087 */ 2274 int stackusedold,stackusedsave; 2275 NDP[global87.stack.length] _8087old; 2276 NDP[global87.stack.length] _8087save; 2277 2278 //printf("cdcond(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 2279 elem *e1 = e.EV.E1; 2280 elem *e2 = e.EV.E2; 2281 elem *e21 = e2.EV.E1; 2282 elem *e22 = e2.EV.E2; 2283 regm_t psw = *pretregs & mPSW; /* save PSW bit */ 2284 const op1 = e1.Eoper; 2285 uint sz1 = tysize(e1.Ety); 2286 uint jop = jmpopcode(e1); 2287 2288 uint jop1 = jmpopcode(e21); 2289 uint jop2 = jmpopcode(e22); 2290 2291 docommas(cdb,e1); 2292 cgstate.stackclean++; 2293 2294 if (!OTrel(op1) && e1 == e21 && 2295 sz1 <= REGSIZE && !tyfloating(e1.Ety)) 2296 { // Recognize (e ? e : f) 2297 2298 code *cnop1 = gennop(null); 2299 regm_t retregs = *pretregs | mPSW; 2300 codelem(cdb,e1,&retregs,false); 2301 2302 cse_flush(cdb,1); // flush CSEs to memory 2303 genjmp(cdb,jop,FLcode,cast(block *)cnop1); 2304 freenode(e21); 2305 2306 regconsave = regcon; 2307 stackpushsave = stackpush; 2308 2309 retregs |= psw; 2310 if (retregs & (mBP | ALLREGS)) 2311 regimmed_set(findreg(retregs),0); 2312 codelem(cdb,e22,&retregs,false); 2313 2314 andregcon(regconsave); 2315 assert(stackpushsave == stackpush); 2316 2317 *pretregs = retregs; 2318 freenode(e2); 2319 cdb.append(cnop1); 2320 cgstate.stackclean--; 2321 return; 2322 } 2323 2324 if (OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE && 2325 !e1.Ecount && 2326 (jop == JC || jop == JNC) && 2327 (sz2 = tysize(e2.Ety)) <= REGSIZE && 2328 e21.Eoper == OPconst && 2329 e22.Eoper == OPconst 2330 ) 2331 { 2332 uint sz = tysize(e.Ety); 2333 uint rex = (I64 && sz == 8) ? REX_W : 0; 2334 uint grex = rex << 16; 2335 2336 regm_t retregs; 2337 targ_size_t v1,v2; 2338 2339 if (sz2 != 1 || I64) 2340 { 2341 retregs = *pretregs & (ALLREGS | mBP); 2342 if (!retregs) 2343 retregs = ALLREGS; 2344 } 2345 else 2346 { 2347 retregs = *pretregs & BYTEREGS; 2348 if (!retregs) 2349 retregs = BYTEREGS; 2350 } 2351 2352 cdcmp_flag = 1 | rex; 2353 v1 = cast(targ_size_t)e21.EV.Vllong; 2354 v2 = cast(targ_size_t)e22.EV.Vllong; 2355 if (jop == JNC) 2356 { v1 = v2; 2357 v2 = cast(targ_size_t)e21.EV.Vllong; 2358 } 2359 2360 opcode_t opcode = 0x81; 2361 switch (sz2) 2362 { case 1: opcode--; 2363 v1 = cast(byte) v1; 2364 v2 = cast(byte) v2; 2365 break; 2366 2367 case 2: v1 = cast(short) v1; 2368 v2 = cast(short) v2; 2369 break; 2370 2371 case 4: v1 = cast(int) v1; 2372 v2 = cast(int) v2; 2373 break; 2374 default: 2375 break; 2376 } 2377 2378 if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1) 2379 { 2380 // only zero-extension from 32-bits is available for 'or' 2381 } 2382 else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2) 2383 { 2384 // only sign-extension from 32-bits is available for 'and' 2385 } 2386 else 2387 { 2388 codelem(cdb,e1,&retregs,false); 2389 const reg = findreg(retregs); 2390 2391 if (v1 == 0 && v2 == ~cast(targ_size_t)0) 2392 { 2393 cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg)); // NOT reg 2394 if (I64 && sz2 == REGSIZE) 2395 code_orrex(cdb.last(), REX_W); 2396 if (I64 && sz2 == 1 && reg >= 4) 2397 code_orrex(cdb.last(), REX); 2398 } 2399 else 2400 { 2401 v1 -= v2; 2402 cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1); // AND reg,v1-v2 2403 if (I64 && sz2 == 1 && reg >= 4) 2404 code_orrex(cdb.last(), REX); 2405 if (v2 == 1 && !I64) 2406 cdb.gen1(0x40 + reg); // INC reg 2407 else if (v2 == -1L && !I64) 2408 cdb.gen1(0x48 + reg); // DEC reg 2409 else 2410 { cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2); // ADD reg,v2 2411 if (I64 && sz2 == 1 && reg >= 4) 2412 code_orrex(cdb.last(), REX); 2413 } 2414 } 2415 2416 freenode(e21); 2417 freenode(e22); 2418 freenode(e2); 2419 2420 fixresult(cdb,e,retregs,pretregs); 2421 cgstate.stackclean--; 2422 return; 2423 } 2424 } 2425 2426 if (op1 != OPcond && op1 != OPandand && op1 != OPoror && 2427 op1 != OPnot && op1 != OPbool && 2428 e21.Eoper == OPconst && 2429 sz1 <= REGSIZE && 2430 *pretregs & (mBP | ALLREGS) && 2431 tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety)) 2432 { // Recognize (e ? c : f) 2433 2434 code *cnop1 = gennop(null); 2435 regm_t retregs = mPSW; 2436 jop = jmpopcode(e1); // get jmp condition 2437 codelem(cdb,e1,&retregs,false); 2438 2439 // Set the register with e21 without affecting the flags 2440 retregs = *pretregs & (ALLREGS | mBP); 2441 if (retregs & ~regcon.mvar) 2442 retregs &= ~regcon.mvar; // don't disturb register variables 2443 // NOTE: see my email (sign extension bug? possible fix, some questions 2444 reg_t reg; 2445 regwithvalue(cdb,retregs,cast(targ_size_t)e21.EV.Vllong,reg,tysize(e21.Ety) == 8 ? 64|8 : 8); 2446 retregs = mask(reg); 2447 2448 cse_flush(cdb,1); // flush CSE's to memory 2449 genjmp(cdb,jop,FLcode,cast(block *)cnop1); 2450 freenode(e21); 2451 2452 regconsave = regcon; 2453 stackpushsave = stackpush; 2454 2455 codelem(cdb,e22,&retregs,false); 2456 2457 andregcon(regconsave); 2458 assert(stackpushsave == stackpush); 2459 2460 freenode(e2); 2461 cdb.append(cnop1); 2462 fixresult(cdb,e,retregs,pretregs); 2463 cgstate.stackclean--; 2464 return; 2465 } 2466 2467 code *cnop1 = gennop(null); 2468 code *cnop2 = gennop(null); // dummy target addresses 2469 logexp(cdb,e1,false,FLcode,cnop1); // evaluate condition 2470 regconold = regcon; 2471 stackusedold = global87.stackused; 2472 stackpushold = stackpush; 2473 memcpy(_8087old.ptr,global87.stack.ptr,global87.stack.sizeof); 2474 regm_t retregs = *pretregs; 2475 CodeBuilder cdb1; 2476 cdb1.ctor(); 2477 if (psw && jop1 != JNE) 2478 { 2479 retregs &= ~mPSW; 2480 if (!retregs) 2481 retregs = ALLREGS; 2482 codelem(cdb1,e21,&retregs,false); 2483 fixresult(cdb1,e21,retregs,pretregs); 2484 } 2485 else 2486 codelem(cdb1,e21,&retregs,false); 2487 2488 if (CPP && e2.Eoper == OPcolon2) 2489 { 2490 code cs; 2491 2492 // This is necessary so that any cleanup code on one branch 2493 // is redone on the other branch. 2494 cs.Iop = ESCAPE | ESCmark2; 2495 cs.Iflags = 0; 2496 cs.Irex = 0; 2497 cdb.gen(&cs); 2498 cdb.append(cdb1); 2499 cs.Iop = ESCAPE | ESCrelease2; 2500 cdb.gen(&cs); 2501 } 2502 else 2503 cdb.append(cdb1); 2504 2505 regconsave = regcon; 2506 regcon = regconold; 2507 2508 stackpushsave = stackpush; 2509 stackpush = stackpushold; 2510 2511 stackusedsave = global87.stackused; 2512 global87.stackused = stackusedold; 2513 2514 memcpy(_8087save.ptr,global87.stack.ptr,global87.stack.sizeof); 2515 memcpy(global87.stack.ptr,_8087old.ptr,global87.stack.sizeof); 2516 2517 retregs |= psw; // PSW bit may have been trashed 2518 *pretregs |= psw; 2519 CodeBuilder cdb2; 2520 cdb2.ctor(); 2521 if (psw && jop2 != JNE) 2522 { 2523 retregs &= ~mPSW; 2524 if (!retregs) 2525 retregs = ALLREGS; 2526 codelem(cdb2,e22,&retregs,false); 2527 fixresult(cdb2,e22,retregs,pretregs); 2528 } 2529 else 2530 codelem(cdb2,e22,&retregs,false); // use same regs as E1 2531 *pretregs = retregs | psw; 2532 andregcon(regconold); 2533 andregcon(regconsave); 2534 assert(global87.stackused == stackusedsave); 2535 assert(stackpush == stackpushsave); 2536 memcpy(global87.stack.ptr,_8087save.ptr,global87.stack.sizeof); 2537 freenode(e2); 2538 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); 2539 cdb.append(cnop1); 2540 cdb.append(cdb2); 2541 cdb.append(cnop2); 2542 if (*pretregs & mST0) 2543 note87(e,0,0); 2544 2545 cgstate.stackclean--; 2546 } 2547 2548 /********************* 2549 * Comma operator OPcomma 2550 */ 2551 2552 @trusted 2553 void cdcomma(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2554 { 2555 regm_t retregs = 0; 2556 codelem(cdb,e.EV.E1,&retregs,false); // ignore value from left leaf 2557 codelem(cdb,e.EV.E2,pretregs,false); // do right leaf 2558 } 2559 2560 2561 /********************************* 2562 * Do && and || operators. 2563 * Generate: 2564 * (evaluate e1 and e2, if true goto cnop1) 2565 * cnop3: NOP 2566 * cg: [save reg code] ;if we must preserve reg 2567 * CLR reg ;false result (set Z also) 2568 * JMP cnop2 2569 * 2570 * cnop1: NOP ;if e1 evaluates to true 2571 * [save reg code] ;preserve reg 2572 * 2573 * MOV reg,1 ;true result 2574 * or 2575 * CLR reg ;if return result in flags 2576 * INC reg 2577 * 2578 * cnop2: NOP ;mark end of code 2579 */ 2580 2581 @trusted 2582 void cdloglog(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2583 { 2584 /* We can trip the assert with the following: 2585 * if ( (b<=a) ? (c<b || a<=c) : c>=a ) 2586 * We'll generate ugly code for it, but it's too obscure a case 2587 * to expend much effort on it. 2588 * assert(*pretregs != mPSW); 2589 */ 2590 2591 //printf("cdloglog() *pretregs: %s\n", regm_str(*pretregs)); 2592 cgstate.stackclean++; 2593 code *cnop1 = gennop(null); 2594 CodeBuilder cdb1; 2595 cdb1.ctor(); 2596 cdb1.append(cnop1); 2597 code *cnop3 = gennop(null); 2598 elem *e2 = e.EV.E2; 2599 (e.Eoper == OPoror) 2600 ? logexp(cdb,e.EV.E1,1,FLcode,cnop1) 2601 : logexp(cdb,e.EV.E1,0,FLcode,cnop3); 2602 con_t regconsave = regcon; 2603 uint stackpushsave = stackpush; 2604 if (*pretregs == 0) // if don't want result 2605 { 2606 int noreturn = !el_returns(e2); 2607 codelem(cdb,e2,pretregs,false); 2608 if (noreturn) 2609 { 2610 regconsave.used |= regcon.used; 2611 regcon = regconsave; 2612 } 2613 else 2614 andregcon(regconsave); 2615 assert(stackpush == stackpushsave); 2616 cdb.append(cnop3); 2617 cdb.append(cdb1); // eval code, throw away result 2618 cgstate.stackclean--; 2619 return; 2620 } 2621 2622 if (tybasic(e2.Ety) == TYnoreturn) 2623 { 2624 regm_t retregs2 = 0; 2625 codelem(cdb, e2, &retregs2, false); 2626 regconsave.used |= regcon.used; 2627 regcon = regconsave; 2628 assert(stackpush == stackpushsave); 2629 2630 regm_t retregs = *pretregs & (ALLREGS | mBP); 2631 if (!retregs) 2632 retregs = ALLREGS; // if mPSW only 2633 2634 reg_t reg; 2635 allocreg(cdb1,&retregs,®,TYint); // allocate reg for result 2636 movregconst(cdb1,reg,e.Eoper == OPoror,*pretregs & mPSW); 2637 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2638 *pretregs = retregs; 2639 2640 cdb.append(cnop3); 2641 cdb.append(cdb1); // eval code, throw away result 2642 cgstate.stackclean--; 2643 return; 2644 } 2645 2646 code *cnop2 = gennop(null); 2647 uint sz = tysize(e.Ety); 2648 if (tybasic(e2.Ety) == TYbool && 2649 sz == tysize(e2.Ety) && 2650 !(*pretregs & mPSW) && 2651 e2.Eoper == OPcall) 2652 { 2653 codelem(cdb,e2,pretregs,false); 2654 2655 andregcon(regconsave); 2656 2657 // stack depth should not change when evaluating E2 2658 assert(stackpush == stackpushsave); 2659 2660 assert(sz <= 4); // result better be int 2661 regm_t retregs = *pretregs & allregs; 2662 reg_t reg; 2663 allocreg(cdb1,&retregs,®,TYint); // allocate reg for result 2664 movregconst(cdb1,reg,e.Eoper == OPoror,0); // reg = 1 2665 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2666 *pretregs = retregs; 2667 if (e.Eoper == OPoror) 2668 { 2669 cdb.append(cnop3); 2670 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2671 cdb.append(cdb1); 2672 cdb.append(cnop2); 2673 } 2674 else 2675 { 2676 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2677 cdb.append(cnop3); 2678 cdb.append(cdb1); 2679 cdb.append(cnop2); 2680 } 2681 cgstate.stackclean--; 2682 return; 2683 } 2684 2685 logexp(cdb,e2,1,FLcode,cnop1); 2686 andregcon(regconsave); 2687 2688 // stack depth should not change when evaluating E2 2689 assert(stackpush == stackpushsave); 2690 2691 assert(sz <= 4); // result better be int 2692 regm_t retregs = *pretregs & (ALLREGS | mBP); 2693 if (!retregs) 2694 retregs = ALLREGS; // if mPSW only 2695 CodeBuilder cdbcg; 2696 cdbcg.ctor(); 2697 reg_t reg; 2698 allocreg(cdbcg,&retregs,®,TYint); // allocate reg for result 2699 code *cg = cdbcg.finish(); 2700 for (code *c1 = cg; c1; c1 = code_next(c1)) // for each instruction 2701 cdb1.gen(c1); // duplicate it 2702 CodeBuilder cdbcg2; 2703 cdbcg2.ctor(); 2704 movregconst(cdbcg2,reg,0,*pretregs & mPSW); // MOV reg,0 2705 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2706 genjmp(cdbcg2, JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2707 movregconst(cdb1,reg,1,*pretregs & mPSW); // reg = 1 2708 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2709 *pretregs = retregs; 2710 cdb.append(cnop3); 2711 cdb.append(cg); 2712 cdb.append(cdbcg2); 2713 cdb.append(cdb1); 2714 cdb.append(cnop2); 2715 cgstate.stackclean--; 2716 return; 2717 } 2718 2719 2720 /********************* 2721 * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror). 2722 */ 2723 2724 @trusted 2725 void cdshift(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2726 { 2727 reg_t resreg; 2728 uint shiftcnt; 2729 regm_t retregs,rretregs; 2730 2731 //printf("cdshift()\n"); 2732 elem *e1 = e.EV.E1; 2733 if (*pretregs == 0) // if don't want result 2734 { 2735 codelem(cdb,e1,pretregs,false); // eval left leaf 2736 *pretregs = 0; // in case they got set 2737 codelem(cdb,e.EV.E2,pretregs,false); 2738 return; 2739 } 2740 2741 tym_t tyml = tybasic(e1.Ety); 2742 int sz = _tysize[tyml]; 2743 assert(!tyfloating(tyml)); 2744 OPER oper = e.Eoper; 2745 uint grex = ((I64 && sz == 8) ? REX_W : 0) << 16; 2746 2747 uint s1,s2; 2748 switch (oper) 2749 { 2750 case OPshl: 2751 s1 = 4; // SHL 2752 s2 = 2; // RCL 2753 break; 2754 case OPshr: 2755 s1 = 5; // SHR 2756 s2 = 3; // RCR 2757 break; 2758 case OPashr: 2759 s1 = 7; // SAR 2760 s2 = 3; // RCR 2761 break; 2762 case OProl: 2763 s1 = 0; // ROL 2764 break; 2765 case OPror: 2766 s1 = 1; // ROR 2767 break; 2768 default: 2769 assert(0); 2770 } 2771 2772 reg_t sreg = NOREG; // guard against using value without assigning to sreg 2773 elem *e2 = e.EV.E2; 2774 regm_t forccs = *pretregs & mPSW; // if return result in CCs 2775 regm_t forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs 2776 bool e2isconst = false; // assume for the moment 2777 uint isbyte = (sz == 1); 2778 switch (e2.Eoper) 2779 { 2780 case OPconst: 2781 e2isconst = true; // e2 is a constant 2782 shiftcnt = e2.EV.Vint; // get shift count 2783 if ((!I16 && sz <= REGSIZE) || 2784 shiftcnt <= 4 || // if sequence of shifts 2785 (sz == 2 && 2786 (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) || 2787 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 2788 ) 2789 { 2790 retregs = (forregs) ? forregs 2791 : ALLREGS; 2792 if (isbyte) 2793 { retregs &= BYTEREGS; 2794 if (!retregs) 2795 retregs = BYTEREGS; 2796 } 2797 else if (sz > REGSIZE && sz <= 2 * REGSIZE && 2798 !(retregs & mMSW)) 2799 retregs |= mMSW & ALLREGS; 2800 if (s1 == 7) // if arithmetic right shift 2801 { 2802 if (shiftcnt == 8) 2803 retregs = mAX; 2804 else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 2805 retregs = mDX|mAX; 2806 } 2807 2808 if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE && 2809 oper == OPshl && 2810 !e1.Ecount && 2811 (e1.Eoper == OPs16_32 || e1.Eoper == OPu16_32 || 2812 e1.Eoper == OPs32_64 || e1.Eoper == OPu32_64) 2813 ) 2814 { // Handle (shtlng)s << 16 2815 regm_t r = retregs & mMSW; 2816 codelem(cdb,e1.EV.E1,&r,false); // eval left leaf 2817 regwithvalue(cdb,retregs & mLSW,0,resreg,0); 2818 getregs(cdb,r); 2819 retregs = r | mask(resreg); 2820 if (forccs) 2821 { sreg = findreg(r); 2822 gentstreg(cdb,sreg); 2823 *pretregs &= ~mPSW; // already set 2824 } 2825 freenode(e1); 2826 freenode(e2); 2827 break; 2828 } 2829 2830 // See if we should use LEA reg,xxx instead of shift 2831 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 && 2832 (sz == REGSIZE || (I64 && sz == 4)) && 2833 oper == OPshl && 2834 e1.Eoper == OPvar && 2835 !(*pretregs & mPSW) && 2836 config.flags4 & CFG4speed 2837 ) 2838 { 2839 reg_t reg; 2840 regm_t regm; 2841 2842 if (isregvar(e1,regm,reg) && !(regm & retregs)) 2843 { code cs; 2844 allocreg(cdb,&retregs,&resreg,e.Ety); 2845 buildEA(&cs,-1,reg,1 << shiftcnt,0); 2846 cs.Iop = LEA; 2847 code_newreg(&cs,resreg); 2848 cs.Iflags = 0; 2849 if (I64 && sz == 8) 2850 cs.Irex |= REX_W; 2851 cdb.gen(&cs); // LEA resreg,[reg * ss] 2852 freenode(e1); 2853 freenode(e2); 2854 break; 2855 } 2856 } 2857 2858 codelem(cdb,e1,&retregs,false); // eval left leaf 2859 //assert((retregs & regcon.mvar) == 0); 2860 getregs(cdb,retregs); // modify these regs 2861 2862 { 2863 if (sz == 2 * REGSIZE) 2864 { resreg = findregmsw(retregs); 2865 sreg = findreglsw(retregs); 2866 } 2867 else 2868 { resreg = findreg(retregs); 2869 sreg = NOREG; // an invalid value 2870 } 2871 if (config.target_cpu >= TARGET_80286 && 2872 sz <= REGSIZE) 2873 { 2874 // SHL resreg,shiftcnt 2875 assert(!(sz == 1 && (mask(resreg) & ~BYTEREGS))); 2876 cdb.genc2(0xC1 ^ isbyte,grex | modregxrmx(3,s1,resreg),shiftcnt); 2877 if (shiftcnt == 1) 2878 cdb.last().Iop += 0x10; // short form of shift 2879 if (I64 && sz == 1 && resreg >= 4) 2880 cdb.last().Irex |= REX; 2881 // See if we need operand size prefix 2882 if (!I16 && oper != OPshl && sz == 2) 2883 cdb.last().Iflags |= CFopsize; 2884 if (forccs) 2885 cdb.last().Iflags |= CFpsw; // need flags result 2886 } 2887 else if (shiftcnt == 8) 2888 { if (!(retregs & BYTEREGS) || resreg >= 4) 2889 { 2890 goto L1; 2891 } 2892 2893 if (pass != BackendPass.final_ && (!forregs || forregs & (mSI | mDI))) 2894 { 2895 // e1 might get into SI or DI in a later pass, 2896 // so don't put CX into a register 2897 getregs(cdb,mCX); 2898 } 2899 2900 assert(sz == 2); 2901 switch (oper) 2902 { 2903 case OPshl: 2904 // MOV regH,regL XOR regL,regL 2905 assert(resreg < 4 && !grex); 2906 genregs(cdb,0x8A,resreg+4,resreg); 2907 genregs(cdb,0x32,resreg,resreg); 2908 break; 2909 2910 case OPshr: 2911 case OPashr: 2912 // MOV regL,regH 2913 genregs(cdb,0x8A,resreg,resreg+4); 2914 if (oper == OPashr) 2915 cdb.gen1(0x98); // CBW 2916 else 2917 genregs(cdb,0x32,resreg+4,resreg+4); // CLR regH 2918 break; 2919 2920 case OPror: 2921 case OProl: 2922 // XCHG regL,regH 2923 genregs(cdb,0x86,resreg+4,resreg); 2924 break; 2925 2926 default: 2927 assert(0); 2928 } 2929 if (forccs) 2930 gentstreg(cdb,resreg); 2931 } 2932 else if (shiftcnt == REGSIZE * 8) // it's an lword 2933 { 2934 if (oper == OPshl) 2935 swap(&resreg, &sreg); 2936 genmovreg(cdb,sreg,resreg); // MOV sreg,resreg 2937 if (oper == OPashr) 2938 cdb.gen1(0x99); // CWD 2939 else 2940 movregconst(cdb,resreg,0,0); // MOV resreg,0 2941 if (forccs) 2942 { 2943 gentstreg(cdb,sreg); 2944 *pretregs &= mBP | ALLREGS | mES; 2945 } 2946 } 2947 else 2948 { 2949 if (oper == OPshl && sz == 2 * REGSIZE) 2950 swap(&resreg, &sreg); 2951 while (shiftcnt--) 2952 { 2953 cdb.gen2(0xD1 ^ isbyte,modregrm(3,s1,resreg)); 2954 if (sz == 2 * REGSIZE) 2955 { 2956 code_orflag(cdb.last(),CFpsw); 2957 cdb.gen2(0xD1,modregrm(3,s2,sreg)); 2958 } 2959 } 2960 if (forccs) 2961 code_orflag(cdb.last(),CFpsw); 2962 } 2963 if (sz <= REGSIZE) 2964 *pretregs &= mBP | ALLREGS; // flags already set 2965 } 2966 freenode(e2); 2967 break; 2968 } 2969 goto default; 2970 2971 default: 2972 retregs = forregs & ~mCX; // CX will be shift count 2973 if (sz <= REGSIZE) 2974 { 2975 if (forregs & ~regcon.mvar && !(retregs & ~regcon.mvar)) 2976 retregs = ALLREGS & ~mCX; // need something 2977 else if (!retregs) 2978 retregs = ALLREGS & ~mCX; // need something 2979 if (sz == 1) 2980 { retregs &= mAX|mBX|mDX; 2981 if (!retregs) 2982 retregs = mAX|mBX|mDX; 2983 } 2984 } 2985 else 2986 { 2987 if (!(retregs & mMSW)) 2988 retregs = ALLREGS & ~mCX; 2989 } 2990 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 2991 2992 if (sz <= REGSIZE) 2993 resreg = findreg(retregs); 2994 else 2995 { 2996 resreg = findregmsw(retregs); 2997 sreg = findreglsw(retregs); 2998 } 2999 L1: 3000 rretregs = mCX; // CX is shift count 3001 if (sz <= REGSIZE) 3002 { 3003 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue 3004 getregs(cdb,retregs); // trash these regs 3005 cdb.gen2(0xD3 ^ isbyte,grex | modregrmx(3,s1,resreg)); // Sxx resreg,CX 3006 3007 if (!I16 && sz == 2 && (oper == OProl || oper == OPror)) 3008 cdb.last().Iflags |= CFopsize; 3009 3010 // Note that a shift by CL does not set the flags if 3011 // CL == 0. If e2 is a constant, we know it isn't 0 3012 // (it would have been optimized out). 3013 if (e2isconst) 3014 *pretregs &= mBP | ALLREGS; // flags already set with result 3015 } 3016 else if (sz == 2 * REGSIZE && 3017 config.target_cpu >= TARGET_80386) 3018 { 3019 reg_t hreg = resreg; 3020 reg_t lreg = sreg; 3021 uint rex = I64 ? (REX_W << 16) : 0; 3022 if (e2isconst) 3023 { 3024 getregs(cdb,retregs); 3025 if (shiftcnt & (REGSIZE * 8)) 3026 { 3027 if (oper == OPshr) 3028 { // SHR hreg,shiftcnt 3029 // MOV lreg,hreg 3030 // XOR hreg,hreg 3031 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt - (REGSIZE * 8)); 3032 genmovreg(cdb,lreg,hreg); 3033 movregconst(cdb,hreg,0,0); 3034 } 3035 else if (oper == OPashr) 3036 { // MOV lreg,hreg 3037 // SAR hreg,31 3038 // SHRD lreg,hreg,shiftcnt 3039 genmovreg(cdb,lreg,hreg); 3040 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),(REGSIZE * 8) - 1); 3041 cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt - (REGSIZE * 8)); 3042 } 3043 else 3044 { // SHL lreg,shiftcnt 3045 // MOV hreg,lreg 3046 // XOR lreg,lreg 3047 cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt - (REGSIZE * 8)); 3048 genmovreg(cdb,hreg,lreg); 3049 movregconst(cdb,lreg,0,0); 3050 } 3051 } 3052 else 3053 { 3054 if (oper == OPshr || oper == OPashr) 3055 { // SHRD lreg,hreg,shiftcnt 3056 // SHR/SAR hreg,shiftcnt 3057 cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt); 3058 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt); 3059 } 3060 else 3061 { // SHLD hreg,lreg,shiftcnt 3062 // SHL lreg,shiftcnt 3063 cdb.genc2(0x0FA4,rex | modregrm(3,lreg,hreg),shiftcnt); 3064 cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt); 3065 } 3066 } 3067 freenode(e2); 3068 } 3069 else if (config.target_cpu >= TARGET_80486 && REGSIZE == 2) 3070 { 3071 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX 3072 getregs(cdb,retregs); // modify these regs 3073 if (oper == OPshl) 3074 { 3075 /* 3076 SHLD hreg,lreg,CL 3077 SHL lreg,CL 3078 */ 3079 3080 cdb.gen2(0x0FA5,modregrm(3,lreg,hreg)); 3081 cdb.gen2(0xD3,modregrm(3,4,lreg)); 3082 } 3083 else 3084 { 3085 /* 3086 SHRD lreg,hreg,CL 3087 SAR hreg,CL 3088 3089 -- or -- 3090 3091 SHRD lreg,hreg,CL 3092 SHR hreg,CL 3093 */ 3094 cdb.gen2(0x0FAD,modregrm(3,hreg,lreg)); 3095 cdb.gen2(0xD3,modregrm(3,s1,hreg)); 3096 } 3097 } 3098 else 3099 { code* cl1,cl2; 3100 3101 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX 3102 getregs(cdb,retregs | mCX); // modify these regs 3103 // TEST CL,0x20 3104 cdb.genc2(0xF6,modregrm(3,0,CX),REGSIZE * 8); 3105 cl1 = gennop(null); 3106 CodeBuilder cdb1; 3107 cdb1.ctor(); 3108 cdb1.append(cl1); 3109 if (oper == OPshl) 3110 { 3111 /* TEST CL,20H 3112 JNE L1 3113 SHLD hreg,lreg,CL 3114 SHL lreg,CL 3115 JMP L2 3116 L1: AND CL,20H-1 3117 SHL lreg,CL 3118 MOV hreg,lreg 3119 XOR lreg,lreg 3120 L2: NOP 3121 */ 3122 3123 if (REGSIZE == 2) 3124 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 3125 cdb1.gen2(0xD3,modregrm(3,4,lreg)); 3126 genmovreg(cdb1,hreg,lreg); 3127 genregs(cdb1,0x31,lreg,lreg); 3128 3129 genjmp(cdb,JNE,FLcode,cast(block *)cl1); 3130 cdb.gen2(0x0FA5,modregrm(3,lreg,hreg)); 3131 cdb.gen2(0xD3,modregrm(3,4,lreg)); 3132 } 3133 else 3134 { if (oper == OPashr) 3135 { 3136 /* TEST CL,20H 3137 JNE L1 3138 SHRD lreg,hreg,CL 3139 SAR hreg,CL 3140 JMP L2 3141 L1: AND CL,15 3142 MOV lreg,hreg 3143 SAR hreg,31 3144 SHRD lreg,hreg,CL 3145 L2: NOP 3146 */ 3147 3148 if (REGSIZE == 2) 3149 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 3150 genmovreg(cdb1,lreg,hreg); 3151 cdb1.genc2(0xC1,modregrm(3,s1,hreg),31); 3152 cdb1.gen2(0x0FAD,modregrm(3,hreg,lreg)); 3153 } 3154 else 3155 { 3156 /* TEST CL,20H 3157 JNE L1 3158 SHRD lreg,hreg,CL 3159 SHR hreg,CL 3160 JMP L2 3161 L1: AND CL,15 3162 SHR hreg,CL 3163 MOV lreg,hreg 3164 XOR hreg,hreg 3165 L2: NOP 3166 */ 3167 3168 if (REGSIZE == 2) 3169 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 3170 cdb1.gen2(0xD3,modregrm(3,5,hreg)); 3171 genmovreg(cdb1,lreg,hreg); 3172 genregs(cdb1,0x31,hreg,hreg); 3173 } 3174 genjmp(cdb,JNE,FLcode,cast(block *)cl1); 3175 cdb.gen2(0x0FAD,modregrm(3,hreg,lreg)); 3176 cdb.gen2(0xD3,modregrm(3,s1,hreg)); 3177 } 3178 cl2 = gennop(null); 3179 genjmp(cdb,JMPS,FLcode,cast(block *)cl2); 3180 cdb.append(cdb1); 3181 cdb.append(cl2); 3182 } 3183 break; 3184 } 3185 else if (sz == 2 * REGSIZE) 3186 { 3187 scodelem(cdb,e2,&rretregs,retregs,false); 3188 getregs(cdb,retregs | mCX); 3189 if (oper == OPshl) 3190 swap(&resreg, &sreg); 3191 if (!e2isconst) // if not sure shift count != 0 3192 cdb.genc2(0xE3,0,6); // JCXZ .+6 3193 cdb.gen2(0xD1,modregrm(3,s1,resreg)); 3194 code_orflag(cdb.last(),CFtarg2); 3195 cdb.gen2(0xD1,modregrm(3,s2,sreg)); 3196 cdb.genc2(0xE2,0,cast(targ_uns)-6); // LOOP .-6 3197 regimmed_set(CX,0); // note that now CX == 0 3198 } 3199 else 3200 assert(0); 3201 break; 3202 } 3203 fixresult(cdb,e,retregs,pretregs); 3204 } 3205 3206 3207 /*************************** 3208 * Perform a 'star' reference (indirection). 3209 */ 3210 3211 @trusted 3212 void cdind(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3213 { 3214 regm_t retregs; 3215 reg_t reg; 3216 uint nreg; 3217 3218 //printf("cdind(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 3219 tym_t tym = tybasic(e.Ety); 3220 if (tyfloating(tym)) 3221 { 3222 if (config.inline8087) 3223 { 3224 if (*pretregs & mST0) 3225 { 3226 cdind87(cdb, e, pretregs); 3227 return; 3228 } 3229 if (I64 && tym == TYcfloat && *pretregs & (ALLREGS | mBP)) 3230 { } 3231 else if (tycomplex(tym)) 3232 { 3233 cload87(cdb, e, pretregs); 3234 return; 3235 } 3236 3237 if (*pretregs & mPSW) 3238 { 3239 cdind87(cdb, e, pretregs); 3240 return; 3241 } 3242 } 3243 } 3244 3245 elem *e1 = e.EV.E1; 3246 assert(e1); 3247 switch (tym) 3248 { 3249 case TYstruct: 3250 case TYarray: 3251 // This case should never happen, why is it here? 3252 tym = TYnptr; // don't confuse allocreg() 3253 if (*pretregs & (mES | mCX) || e.Ety & mTYfar) 3254 tym = TYfptr; 3255 break; 3256 3257 default: 3258 break; 3259 } 3260 uint sz = _tysize[tym]; 3261 uint isbyte = tybyte(tym) != 0; 3262 3263 code cs; 3264 3265 getlvalue(cdb,&cs,e,RMload); // get addressing mode 3266 //printf("Irex = %02x, Irm = x%02x, Isib = x%02x\n", cs.Irex, cs.Irm, cs.Isib); 3267 //fprintf(stderr,"cd2 :\n"); WRcodlst(c); 3268 if (*pretregs == 0) 3269 { 3270 if (e.Ety & mTYvolatile) // do the load anyway 3271 *pretregs = regmask(e.Ety, 0); // load into registers 3272 else 3273 return; 3274 } 3275 3276 regm_t idxregs = idxregm(&cs); // mask of index regs used 3277 3278 if (*pretregs == mPSW) 3279 { 3280 if (!I16 && tym == TYfloat) 3281 { 3282 retregs = ALLREGS & ~idxregs; 3283 allocreg(cdb,&retregs,®,TYfloat); 3284 cs.Iop = 0x8B; 3285 code_newreg(&cs,reg); 3286 cdb.gen(&cs); // MOV reg,lsw 3287 cdb.gen2(0xD1,modregrmx(3,4,reg)); // SHL reg,1 3288 code_orflag(cdb.last(), CFpsw); 3289 } 3290 else if (sz <= REGSIZE) 3291 { 3292 cs.Iop = 0x81 ^ isbyte; 3293 cs.Irm |= modregrm(0,7,0); 3294 cs.IFL2 = FLconst; 3295 cs.IEV2.Vsize_t = 0; 3296 cdb.gen(&cs); // CMP [idx],0 3297 } 3298 else if (!I16 && sz == REGSIZE + 2) // if far pointer 3299 { 3300 retregs = ALLREGS & ~idxregs; 3301 allocreg(cdb,&retregs,®,TYint); 3302 cs.Iop = MOVZXw; 3303 cs.Irm |= modregrm(0,reg,0); 3304 getlvalue_msw(&cs); 3305 cdb.gen(&cs); // MOVZX reg,msw 3306 goto L4; 3307 } 3308 else if (sz <= 2 * REGSIZE) 3309 { 3310 retregs = ALLREGS & ~idxregs; 3311 allocreg(cdb,&retregs,®,TYint); 3312 cs.Iop = 0x8B; 3313 code_newreg(&cs,reg); 3314 getlvalue_msw(&cs); 3315 cdb.gen(&cs); // MOV reg,msw 3316 if (I32) 3317 { if (tym == TYdouble || tym == TYdouble_alias) 3318 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 3319 } 3320 else if (tym == TYfloat) 3321 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 3322 L4: 3323 cs.Iop = 0x0B; 3324 getlvalue_lsw(&cs); 3325 cs.Iflags |= CFpsw; 3326 cdb.gen(&cs); // OR reg,lsw 3327 } 3328 else if (!I32 && sz == 8) 3329 { 3330 *pretregs |= DOUBLEREGS_16; // fake it for now 3331 goto L1; 3332 } 3333 else 3334 { 3335 debug printf("%s\n", tym_str(tym)); 3336 assert(0); 3337 } 3338 } 3339 else // else return result in reg 3340 { 3341 L1: 3342 retregs = *pretregs; 3343 if (sz == 8 && 3344 (retregs & (mPSW | mSTACK | ALLREGS | mBP)) == mSTACK) 3345 { int i; 3346 3347 // Optimizer should not CSE these, as the result is worse code! 3348 assert(!e.Ecount); 3349 3350 cs.Iop = 0xFF; 3351 cs.Irm |= modregrm(0,6,0); 3352 cs.IEV1.Voffset += 8 - REGSIZE; 3353 stackchanged = 1; 3354 i = 8 - REGSIZE; 3355 do 3356 { 3357 cdb.gen(&cs); // PUSH EA+i 3358 cdb.genadjesp(REGSIZE); 3359 cs.IEV1.Voffset -= REGSIZE; 3360 stackpush += REGSIZE; 3361 i -= REGSIZE; 3362 } 3363 while (i >= 0); 3364 goto L3; 3365 } 3366 if (I16 && sz == 8) 3367 retregs = DOUBLEREGS_16; 3368 3369 // Watch out for loading an lptr from an lptr! We must have 3370 // the offset loaded into a different register. 3371 /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes) 3372 retregs = ALLREGS;*/ 3373 3374 { 3375 assert(!isbyte || retregs & BYTEREGS); 3376 allocreg(cdb,&retregs,®,tym); // alloc registers 3377 } 3378 if (retregs & XMMREGS) 3379 { 3380 assert(sz == 4 || sz == 8 || sz == 16 || sz == 32); // float, double or vector 3381 cs.Iop = xmmload(tym); 3382 cs.Irex &= ~REX_W; 3383 code_newreg(&cs,reg - XMM0); 3384 checkSetVex(&cs,tym); 3385 cdb.gen(&cs); // MOV reg,[idx] 3386 } 3387 else if (sz <= REGSIZE) 3388 { 3389 cs.Iop = 0x8B; // MOV 3390 if (sz <= 2 && !I16 && 3391 config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed) 3392 { 3393 cs.Iop = tyuns(tym) ? MOVZXw : MOVSXw; // MOVZX/MOVSX 3394 cs.Iflags &= ~CFopsize; 3395 } 3396 cs.Iop ^= isbyte; 3397 L2: 3398 code_newreg(&cs,reg); 3399 cdb.gen(&cs); // MOV reg,[idx] 3400 if (isbyte && reg >= 4) 3401 code_orrex(cdb.last(), REX); 3402 } 3403 else if ((tym == TYfptr || tym == TYhptr) && retregs & mES) 3404 { 3405 cs.Iop = 0xC4; // LES reg,[idx] 3406 goto L2; 3407 } 3408 else if (sz <= 2 * REGSIZE) 3409 { uint lsreg; 3410 3411 cs.Iop = 0x8B; 3412 // Be careful not to interfere with index registers 3413 if (!I16) 3414 { 3415 // Can't handle if both result registers are used in 3416 // the addressing mode. 3417 if ((retregs & idxregs) == retregs) 3418 { 3419 retregs = mMSW & allregs & ~idxregs; 3420 if (!retregs) 3421 retregs |= mCX; 3422 retregs |= mLSW & ~idxregs; 3423 3424 // We can run out of registers, so if that's possible, 3425 // give us *one* of the idxregs 3426 if ((retregs & ~regcon.mvar & mLSW) == 0) 3427 { 3428 regm_t x = idxregs & mLSW; 3429 if (x) 3430 retregs |= mask(findreg(x)); // give us one idxreg 3431 } 3432 else if ((retregs & ~regcon.mvar & mMSW) == 0) 3433 { 3434 regm_t x = idxregs & mMSW; 3435 if (x) 3436 retregs |= mask(findreg(x)); // give us one idxreg 3437 } 3438 3439 allocreg(cdb,&retregs,®,tym); // alloc registers 3440 assert((retregs & idxregs) != retregs); 3441 } 3442 3443 lsreg = findreglsw(retregs); 3444 if (mask(reg) & idxregs) // reg is in addr mode 3445 { 3446 code_newreg(&cs,lsreg); 3447 cdb.gen(&cs); // MOV lsreg,lsw 3448 if (sz == REGSIZE + 2) 3449 cs.Iflags |= CFopsize; 3450 lsreg = reg; 3451 getlvalue_msw(&cs); // MOV reg,msw 3452 } 3453 else 3454 { 3455 code_newreg(&cs,reg); 3456 getlvalue_msw(&cs); 3457 cdb.gen(&cs); // MOV reg,msw 3458 if (sz == REGSIZE + 2) 3459 cdb.last().Iflags |= CFopsize; 3460 getlvalue_lsw(&cs); // MOV lsreg,lsw 3461 } 3462 NEWREG(cs.Irm,lsreg); 3463 cdb.gen(&cs); 3464 } 3465 else 3466 { 3467 // Index registers are always the lsw! 3468 cs.Irm |= modregrm(0,reg,0); 3469 getlvalue_msw(&cs); 3470 cdb.gen(&cs); // MOV reg,msw 3471 lsreg = findreglsw(retregs); 3472 NEWREG(cs.Irm,lsreg); 3473 getlvalue_lsw(&cs); // MOV lsreg,lsw 3474 cdb.gen(&cs); 3475 } 3476 } 3477 else if (I16 && sz == 8) 3478 { 3479 assert(reg == AX); 3480 cs.Iop = 0x8B; 3481 cs.IEV1.Voffset += 6; 3482 cdb.gen(&cs); // MOV AX,EA+6 3483 cs.Irm |= modregrm(0,CX,0); 3484 cs.IEV1.Voffset -= 4; 3485 cdb.gen(&cs); // MOV CX,EA+2 3486 NEWREG(cs.Irm,DX); 3487 cs.IEV1.Voffset -= 2; 3488 cdb.gen(&cs); // MOV DX,EA 3489 cs.IEV1.Voffset += 4; 3490 NEWREG(cs.Irm,BX); 3491 cdb.gen(&cs); // MOV BX,EA+4 3492 } 3493 else 3494 assert(0); 3495 L3: 3496 fixresult(cdb,e,retregs,pretregs); 3497 } 3498 //fprintf(stderr,"cdafter :\n"); WRcodlst(c); 3499 } 3500 3501 3502 3503 /******************************** 3504 * Generate code to load ES with the right segment value, 3505 * do nothing if e is a far pointer. 3506 */ 3507 3508 @trusted 3509 private code *cod2_setES(tym_t ty) 3510 { 3511 if (config.exe & EX_flat) 3512 return null; 3513 3514 int push; 3515 3516 CodeBuilder cdb; 3517 cdb.ctor(); 3518 switch (tybasic(ty)) 3519 { 3520 case TYnptr: 3521 if (!(config.flags3 & CFG3eseqds)) 3522 { push = 0x1E; // PUSH DS 3523 goto L1; 3524 } 3525 break; 3526 case TYcptr: 3527 push = 0x0E; // PUSH CS 3528 goto L1; 3529 case TYsptr: 3530 if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds)) 3531 { push = 0x16; // PUSH SS 3532 L1: 3533 // Must load ES 3534 getregs(cdb,mES); 3535 cdb.gen1(push); 3536 cdb.gen1(0x07); // POP ES 3537 } 3538 break; 3539 3540 default: 3541 break; 3542 } 3543 return cdb.finish(); 3544 } 3545 3546 /******************************** 3547 * Generate code for intrinsic strlen(). 3548 */ 3549 3550 @trusted 3551 void cdstrlen(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3552 { 3553 /* Generate strlen in CX: 3554 LES DI,e1 3555 CLR AX ;scan for 0 3556 MOV CX,-1 ;largest possible string 3557 REPNE SCASB 3558 NOT CX 3559 DEC CX 3560 */ 3561 3562 regm_t retregs = mDI; 3563 tym_t ty1 = e.EV.E1.Ety; 3564 if (!tyreg(ty1)) 3565 retregs |= mES; 3566 codelem(cdb,e.EV.E1,&retregs,false); 3567 3568 // Make sure ES contains proper segment value 3569 cdb.append(cod2_setES(ty1)); 3570 3571 ubyte rex = I64 ? REX_W : 0; 3572 3573 getregs_imm(cdb,mAX | mCX); 3574 movregconst(cdb,AX,0,1); // MOV AL,0 3575 movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0); // MOV CX,-1 3576 getregs(cdb,mDI|mCX); 3577 cdb.gen1(0xF2); // REPNE 3578 cdb.gen1(0xAE); // SCASB 3579 genregs(cdb,0xF7,2,CX); // NOT CX 3580 code_orrex(cdb.last(), rex); 3581 if (I64) 3582 cdb.gen2(0xFF,(rex << 16) | modregrm(3,1,CX)); // DEC reg 3583 else 3584 cdb.gen1(0x48 + CX); // DEC CX 3585 3586 if (*pretregs & mPSW) 3587 { 3588 cdb.last().Iflags |= CFpsw; 3589 *pretregs &= ~mPSW; 3590 } 3591 fixresult(cdb,e,mCX,pretregs); 3592 } 3593 3594 3595 /********************************* 3596 * Generate code for strcmp(s1,s2) intrinsic. 3597 */ 3598 3599 @trusted 3600 void cdstrcmp(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3601 { 3602 char need_DS; 3603 int segreg; 3604 3605 /* 3606 MOV SI,s1 ;get destination pointer (s1) 3607 MOV CX,s1+2 3608 LES DI,s2 ;get source pointer (s2) 3609 PUSH DS 3610 MOV DS,CX 3611 CLR AX ;scan for 0 3612 MOV CX,-1 ;largest possible string 3613 REPNE SCASB 3614 NOT CX ;CX = string length of s2 3615 SUB DI,CX ;point DI back to beginning 3616 REPE CMPSB ;compare string 3617 POP DS 3618 JE L1 ;strings are equal 3619 SBB AX,AX 3620 SBB AX,-1 3621 L1: 3622 */ 3623 3624 regm_t retregs1 = mSI; 3625 tym_t ty1 = e.EV.E1.Ety; 3626 if (!tyreg(ty1)) 3627 retregs1 |= mCX; 3628 codelem(cdb,e.EV.E1,&retregs1,false); 3629 3630 regm_t retregs = mDI; 3631 tym_t ty2 = e.EV.E2.Ety; 3632 if (!tyreg(ty2)) 3633 retregs |= mES; 3634 scodelem(cdb,e.EV.E2,&retregs,retregs1,false); 3635 3636 // Make sure ES contains proper segment value 3637 cdb.append(cod2_setES(ty2)); 3638 getregs_imm(cdb,mAX | mCX); 3639 3640 ubyte rex = I64 ? REX_W : 0; 3641 3642 // Load DS with right value 3643 switch (tybasic(ty1)) 3644 { 3645 case TYnptr: 3646 case TYimmutPtr: 3647 need_DS = false; 3648 break; 3649 3650 case TYsptr: 3651 if (config.wflags & WFssneds) // if sptr can't use DS segment 3652 segreg = SEG_SS; 3653 else 3654 segreg = SEG_DS; 3655 goto L1; 3656 case TYcptr: 3657 segreg = SEG_CS; 3658 L1: 3659 cdb.gen1(0x1E); // PUSH DS 3660 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3661 cdb.gen1(0x1F); // POP DS 3662 need_DS = true; 3663 break; 3664 case TYfptr: 3665 case TYvptr: 3666 case TYhptr: 3667 cdb.gen1(0x1E); // PUSH DS 3668 cdb.gen2(0x8E,modregrm(3,SEG_DS,CX)); // MOV DS,CX 3669 need_DS = true; 3670 break; 3671 default: 3672 assert(0); 3673 } 3674 3675 movregconst(cdb,AX,0,0); // MOV AX,0 3676 movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0); // MOV CX,-1 3677 getregs(cdb,mSI|mDI|mCX); 3678 cdb.gen1(0xF2); // REPNE 3679 cdb.gen1(0xAE); // SCASB 3680 genregs(cdb,0xF7,2,CX); // NOT CX 3681 code_orrex(cdb.last(),rex); 3682 genregs(cdb,0x2B,DI,CX); // SUB DI,CX 3683 code_orrex(cdb.last(),rex); 3684 cdb.gen1(0xF3); // REPE 3685 cdb.gen1(0xA6); // CMPSB 3686 if (need_DS) 3687 cdb.gen1(0x1F); // POP DS 3688 code *c4 = gennop(null); 3689 if (*pretregs != mPSW) // if not flags only 3690 { 3691 genjmp(cdb,JE,FLcode,cast(block *) c4); // JE L1 3692 getregs(cdb,mAX); 3693 genregs(cdb,0x1B,AX,AX); // SBB AX,AX 3694 code_orrex(cdb.last(),rex); 3695 cdb.genc2(0x81,(rex << 16) | modregrm(3,3,AX),cast(targ_uns)-1); // SBB AX,-1 3696 } 3697 3698 *pretregs &= ~mPSW; 3699 cdb.append(c4); 3700 fixresult(cdb,e,mAX,pretregs); 3701 } 3702 3703 /********************************* 3704 * Generate code for memcmp(s1,s2,n) intrinsic. 3705 */ 3706 3707 @trusted 3708 void cdmemcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3709 { 3710 char need_DS; 3711 int segreg; 3712 3713 /* 3714 MOV SI,s1 ;get destination pointer (s1) 3715 MOV DX,s1+2 3716 LES DI,s2 ;get source pointer (s2) 3717 MOV CX,n ;get number of bytes to compare 3718 PUSH DS 3719 MOV DS,DX 3720 XOR AX,AX 3721 REPE CMPSB ;compare string 3722 POP DS 3723 JE L1 ;strings are equal 3724 SBB AX,AX 3725 SBB AX,-1 3726 L1: 3727 */ 3728 3729 elem *e1 = e.EV.E1; 3730 assert(e1.Eoper == OPparam); 3731 3732 // Get s1 into DX:SI 3733 regm_t retregs1 = mSI; 3734 tym_t ty1 = e1.EV.E1.Ety; 3735 if (!tyreg(ty1)) 3736 retregs1 |= mDX; 3737 codelem(cdb,e1.EV.E1,&retregs1,false); 3738 3739 // Get s2 into ES:DI 3740 regm_t retregs = mDI; 3741 tym_t ty2 = e1.EV.E2.Ety; 3742 if (!tyreg(ty2)) 3743 retregs |= mES; 3744 scodelem(cdb,e1.EV.E2,&retregs,retregs1,false); 3745 freenode(e1); 3746 3747 // Get nbytes into CX 3748 regm_t retregs3 = mCX; 3749 scodelem(cdb,e.EV.E2,&retregs3,retregs | retregs1,false); 3750 3751 // Make sure ES contains proper segment value 3752 cdb.append(cod2_setES(ty2)); 3753 3754 // Load DS with right value 3755 switch (tybasic(ty1)) 3756 { 3757 case TYnptr: 3758 case TYimmutPtr: 3759 need_DS = false; 3760 break; 3761 3762 case TYsptr: 3763 if (config.wflags & WFssneds) // if sptr can't use DS segment 3764 segreg = SEG_SS; 3765 else 3766 segreg = SEG_DS; 3767 goto L1; 3768 case TYcptr: 3769 segreg = SEG_CS; 3770 L1: 3771 cdb.gen1(0x1E); // PUSH DS 3772 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3773 cdb.gen1(0x1F); // POP DS 3774 need_DS = true; 3775 break; 3776 case TYfptr: 3777 case TYvptr: 3778 case TYhptr: 3779 cdb.gen1(0x1E); // PUSH DS 3780 cdb.gen2(0x8E,modregrm(3,SEG_DS,DX)); // MOV DS,DX 3781 need_DS = true; 3782 break; 3783 default: 3784 assert(0); 3785 } 3786 3787 static if (1) 3788 { 3789 getregs(cdb,mAX); 3790 cdb.gen2(0x33,modregrm(3,AX,AX)); // XOR AX,AX 3791 code_orflag(cdb.last(), CFpsw); // keep flags 3792 } 3793 else 3794 { 3795 if (*pretregs != mPSW) // if not flags only 3796 { 3797 reg_t r; 3798 regwithvalue(cdb,mAX,0,r,0); // put 0 in AX 3799 } 3800 } 3801 3802 getregs(cdb,mCX | mSI | mDI); 3803 cdb.gen1(0xF3); // REPE 3804 cdb.gen1(0xA6); // CMPSB 3805 if (need_DS) 3806 cdb.gen1(0x1F); // POP DS 3807 if (*pretregs != mPSW) // if not flags only 3808 { 3809 code *c4 = gennop(null); 3810 genjmp(cdb,JE,FLcode,cast(block *) c4); // JE L1 3811 getregs(cdb,mAX); 3812 genregs(cdb,0x1B,AX,AX); // SBB AX,AX 3813 cdb.genc2(0x81,modregrm(3,3,AX),cast(targ_uns)-1); // SBB AX,-1 3814 cdb.append(c4); 3815 } 3816 3817 *pretregs &= ~mPSW; 3818 fixresult(cdb,e,mAX,pretregs); 3819 } 3820 3821 /********************************* 3822 * Generate code for strcpy(s1,s2) intrinsic. 3823 */ 3824 3825 @trusted 3826 void cdstrcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3827 { 3828 char need_DS; 3829 int segreg; 3830 3831 /* 3832 LES DI,s2 ;ES:DI = s2 3833 CLR AX ;scan for 0 3834 MOV CX,-1 ;largest possible string 3835 REPNE SCASB ;find end of s2 3836 NOT CX ;CX = strlen(s2) + 1 (for EOS) 3837 SUB DI,CX 3838 MOV SI,DI 3839 PUSH DS 3840 PUSH ES 3841 LES DI,s1 3842 POP DS 3843 MOV AX,DI ;return value is s1 3844 REP MOVSB 3845 POP DS 3846 */ 3847 3848 stackchanged = 1; 3849 regm_t retregs = mDI; 3850 tym_t ty2 = tybasic(e.EV.E2.Ety); 3851 if (!tyreg(ty2)) 3852 retregs |= mES; 3853 ubyte rex = I64 ? REX_W : 0; 3854 codelem(cdb,e.EV.E2,&retregs,false); 3855 3856 // Make sure ES contains proper segment value 3857 cdb.append(cod2_setES(ty2)); 3858 getregs_imm(cdb,mAX | mCX); 3859 movregconst(cdb,AX,0,1); // MOV AL,0 3860 movregconst(cdb,CX,-1,I64?64:0); // MOV CX,-1 3861 getregs(cdb,mAX|mCX|mSI|mDI); 3862 cdb.gen1(0xF2); // REPNE 3863 cdb.gen1(0xAE); // SCASB 3864 genregs(cdb,0xF7,2,CX); // NOT CX 3865 code_orrex(cdb.last(),rex); 3866 genregs(cdb,0x2B,DI,CX); // SUB DI,CX 3867 code_orrex(cdb.last(),rex); 3868 genmovreg(cdb,SI,DI); // MOV SI,DI 3869 3870 // Load DS with right value 3871 switch (ty2) 3872 { 3873 case TYnptr: 3874 case TYimmutPtr: 3875 need_DS = false; 3876 break; 3877 3878 case TYsptr: 3879 if (config.wflags & WFssneds) // if sptr can't use DS segment 3880 segreg = SEG_SS; 3881 else 3882 segreg = SEG_DS; 3883 goto L1; 3884 case TYcptr: 3885 segreg = SEG_CS; 3886 L1: 3887 cdb.gen1(0x1E); // PUSH DS 3888 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3889 cdb.genadjesp(REGSIZE * 2); 3890 need_DS = true; 3891 break; 3892 case TYfptr: 3893 case TYvptr: 3894 case TYhptr: 3895 segreg = SEG_ES; 3896 goto L1; 3897 3898 default: 3899 assert(0); 3900 } 3901 3902 retregs = mDI; 3903 tym_t ty1 = tybasic(e.EV.E1.Ety); 3904 if (!tyreg(ty1)) 3905 retregs |= mES; 3906 scodelem(cdb,e.EV.E1,&retregs,mCX|mSI,false); 3907 getregs(cdb,mAX|mCX|mSI|mDI); 3908 3909 // Make sure ES contains proper segment value 3910 if (ty2 != TYnptr || ty1 != ty2) 3911 cdb.append(cod2_setES(ty1)); 3912 else 3913 {} // ES is already same as DS 3914 3915 if (need_DS) 3916 cdb.gen1(0x1F); // POP DS 3917 if (*pretregs) 3918 genmovreg(cdb,AX,DI); // MOV AX,DI 3919 cdb.gen1(0xF3); // REP 3920 cdb.gen1(0xA4); // MOVSB 3921 3922 if (need_DS) 3923 { cdb.gen1(0x1F); // POP DS 3924 cdb.genadjesp(-(REGSIZE * 2)); 3925 } 3926 fixresult(cdb,e,mAX | mES,pretregs); 3927 } 3928 3929 /********************************* 3930 * Generate code for memcpy(s1,s2,n) intrinsic. 3931 * OPmemcpy 3932 * / \ 3933 * s1 OPparam 3934 * / \ 3935 * s2 n 3936 */ 3937 3938 @trusted 3939 void cdmemcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3940 { 3941 char need_DS; 3942 int segreg; 3943 3944 /* 3945 MOV SI,s2 3946 MOV DX,s2+2 3947 MOV CX,n 3948 LES DI,s1 3949 PUSH DS 3950 MOV DS,DX 3951 MOV AX,DI ;return value is s1 3952 REP MOVSB 3953 POP DS 3954 */ 3955 3956 elem *e2 = e.EV.E2; 3957 assert(e2.Eoper == OPparam); 3958 3959 // Get s2 into DX:SI 3960 regm_t retregs2 = mSI; 3961 tym_t ty2 = e2.EV.E1.Ety; 3962 if (!tyreg(ty2)) 3963 retregs2 |= mDX; 3964 codelem(cdb,e2.EV.E1,&retregs2,false); 3965 3966 // Need to check if nbytes is 0 (OPconst of 0 would have been removed by elmemcpy()) 3967 const zeroCheck = e2.EV.E2.Eoper != OPconst; 3968 3969 // Get nbytes into CX 3970 regm_t retregs3 = mCX; 3971 scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false); 3972 freenode(e2); 3973 3974 // Get s1 into ES:DI 3975 regm_t retregs1 = mDI; 3976 tym_t ty1 = e.EV.E1.Ety; 3977 if (!tyreg(ty1)) 3978 retregs1 |= mES; 3979 scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false); 3980 3981 ubyte rex = I64 ? REX_W : 0; 3982 3983 // Make sure ES contains proper segment value 3984 cdb.append(cod2_setES(ty1)); 3985 3986 // Load DS with right value 3987 switch (tybasic(ty2)) 3988 { 3989 case TYnptr: 3990 case TYimmutPtr: 3991 need_DS = false; 3992 break; 3993 3994 case TYsptr: 3995 if (config.wflags & WFssneds) // if sptr can't use DS segment 3996 segreg = SEG_SS; 3997 else 3998 segreg = SEG_DS; 3999 goto L1; 4000 4001 case TYcptr: 4002 segreg = SEG_CS; 4003 L1: 4004 cdb.gen1(0x1E); // PUSH DS 4005 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 4006 cdb.gen1(0x1F); // POP DS 4007 need_DS = true; 4008 break; 4009 4010 case TYfptr: 4011 case TYvptr: 4012 case TYhptr: 4013 cdb.gen1(0x1E); // PUSH DS 4014 cdb.gen2(0x8E,modregrm(3,SEG_DS,DX)); // MOV DS,DX 4015 need_DS = true; 4016 break; 4017 4018 default: 4019 assert(0); 4020 } 4021 4022 if (*pretregs) // if need return value 4023 { getregs(cdb,mAX); 4024 genmovreg(cdb,AX,DI); 4025 } 4026 4027 if (0 && I32 && config.flags4 & CFG4speed) 4028 { 4029 /* This is only faster if the memory is dword aligned, if not 4030 * it is significantly slower than just a rep movsb. 4031 */ 4032 /* mov EDX,ECX 4033 * shr ECX,2 4034 * jz L1 4035 * repe movsd 4036 * L1: nop 4037 * and EDX,3 4038 * jz L2 4039 * mov ECX,EDX 4040 * repe movsb 4041 * L2: nop 4042 */ 4043 getregs(cdb,mSI | mDI | mCX | mDX); 4044 genmovreg(cdb,DX,CX); // MOV EDX,ECX 4045 cdb.genc2(0xC1,modregrm(3,5,CX),2); // SHR ECX,2 4046 code *cx = gennop(null); 4047 genjmp(cdb, JE, FLcode, cast(block *)cx); // JZ L1 4048 cdb.gen1(0xF3); // REPE 4049 cdb.gen1(0xA5); // MOVSW 4050 cdb.append(cx); 4051 cdb.genc2(0x81, modregrm(3,4,DX),3); // AND EDX,3 4052 4053 code *cnop = gennop(null); 4054 genjmp(cdb, JE, FLcode, cast(block *)cnop); // JZ L2 4055 genmovreg(cdb,CX,DX); // MOV ECX,EDX 4056 cdb.gen1(0xF3); // REPE 4057 cdb.gen1(0xA4); // MOVSB 4058 cdb.append(cnop); 4059 } 4060 else 4061 { 4062 getregs(cdb,mSI | mDI | mCX); 4063 code* cnop; 4064 if (zeroCheck) 4065 { 4066 cnop = gennop(null); 4067 gentstreg(cdb,CX); // TEST ECX,ECX 4068 if (I64) 4069 code_orrex(cdb.last, REX_W); 4070 genjmp(cdb, JE, FLcode, cast(block *)cnop); // JZ cnop 4071 } 4072 4073 if (I16 && config.flags4 & CFG4speed) // if speed optimization 4074 { 4075 // Note this doesn't work if CX is 0 4076 cdb.gen2(0xD1,(rex << 16) | modregrm(3,5,CX)); // SHR CX,1 4077 cdb.gen1(0xF3); // REPE 4078 cdb.gen1(0xA5); // MOVSW 4079 cdb.gen2(0x11,(rex << 16) | modregrm(3,CX,CX)); // ADC CX,CX 4080 } 4081 cdb.gen1(0xF3); // REPE 4082 cdb.gen1(0xA4); // MOVSB 4083 if (zeroCheck) 4084 cdb.append(cnop); 4085 if (need_DS) 4086 cdb.gen1(0x1F); // POP DS 4087 } 4088 fixresult(cdb,e,mES|mAX,pretregs); 4089 } 4090 4091 4092 /********************************* 4093 * Generate code for memset(s,value,numbytes) intrinsic. 4094 * (s OPmemset (numbytes OPparam value)) 4095 */ 4096 4097 @trusted 4098 void cdmemset(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4099 { 4100 regm_t retregs1; 4101 regm_t retregs3; 4102 reg_t reg; 4103 reg_t vreg; 4104 tym_t ty1; 4105 int segreg; 4106 targ_uns numbytes; 4107 uint m; 4108 4109 //printf("cdmemset(*pretregs = %s)\n", regm_str(*pretregs)); 4110 elem *e2 = e.EV.E2; 4111 assert(e2.Eoper == OPparam); 4112 4113 elem* evalue = e2.EV.E2; 4114 elem* enumbytes = e2.EV.E1; 4115 4116 const sz = tysize(evalue.Ety); 4117 if (sz > 1) 4118 { 4119 cdmemsetn(cdb, e, pretregs); 4120 return; 4121 } 4122 4123 const grex = I64 ? (REX_W << 16) : 0; 4124 4125 bool valueIsConst = false; 4126 targ_size_t value; 4127 if (evalue.Eoper == OPconst) 4128 { 4129 value = el_tolong(evalue) & 0xFF; 4130 value |= value << 8; 4131 if (I32 || I64) 4132 { 4133 value |= value << 16; 4134 static if (value.sizeof == 8) 4135 if (I64) 4136 value |= value << 32; 4137 } 4138 valueIsConst = true; 4139 } 4140 else if (evalue.Eoper == OPstrpar) // happens if evalue is a struct of 0 size 4141 { 4142 value = 0; 4143 valueIsConst = true; 4144 } 4145 else 4146 value = 0xDEADBEEF; // stop annoying false positives that value is not inited 4147 4148 if (enumbytes.Eoper == OPconst) 4149 { 4150 numbytes = cast(uint)cast(targ_size_t)el_tolong(enumbytes); 4151 } 4152 4153 // Get nbytes into CX 4154 regm_t retregs2 = 0; 4155 if (enumbytes.Eoper != OPconst) 4156 { 4157 retregs2 = mCX; 4158 codelem(cdb,enumbytes,&retregs2,false); 4159 } 4160 4161 // Get value into AX 4162 retregs3 = mAX; 4163 if (valueIsConst) 4164 { 4165 reg_t r; 4166 regwithvalue(cdb, mAX, value, r, I64?64:0); 4167 freenode(evalue); 4168 } 4169 else 4170 { 4171 scodelem(cdb,evalue,&retregs3,retregs2,false); 4172 4173 getregs(cdb,mAX); 4174 if (I16) 4175 { 4176 cdb.gen2(0x8A,modregrm(3,AH,AL)); // MOV AH,AL 4177 } 4178 else if (I32) 4179 { 4180 genregs(cdb,MOVZXb,AX,AX); // MOVZX EAX,AL 4181 cdb.genc2(0x69,modregrm(3,AX,AX),0x01010101); // IMUL EAX,EAX,0x01010101 4182 } 4183 else 4184 { 4185 genregs(cdb,MOVZXb,AX,AX); // MOVZX EAX,AL 4186 regm_t regm = allregs & ~(mAX | retregs2); 4187 reg_t r; 4188 regwithvalue(cdb,regm,cast(targ_size_t)0x01010101_01010101,r,64); // MOV reg,0x01010101_01010101 4189 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r)); // IMUL RAX,reg 4190 } 4191 } 4192 freenode(e2); 4193 4194 // Get s into ES:DI 4195 retregs1 = mDI; 4196 ty1 = e.EV.E1.Ety; 4197 if (!tyreg(ty1)) 4198 retregs1 |= mES; 4199 scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false); 4200 reg = DI; //findreg(retregs1); 4201 4202 // Make sure ES contains proper segment value 4203 cdb.append(cod2_setES(ty1)); 4204 4205 if (*pretregs) // if need return value 4206 { 4207 getregs(cdb,mBX); 4208 genmovreg(cdb,BX,DI); // MOV EBX,EDI 4209 } 4210 4211 if (enumbytes.Eoper == OPconst) 4212 { 4213 getregs(cdb,mDI); 4214 if (const numwords = numbytes / REGSIZE) 4215 { 4216 reg_t r; 4217 regwithvalue(cdb,mCX,numwords,r, I64 ? 64 : 0); 4218 getregs(cdb,mCX); 4219 cdb.gen1(0xF3); // REP 4220 cdb.gen1(STOS); // STOSW/D/Q 4221 if (I64) 4222 code_orrex(cdb.last(), REX_W); 4223 regimmed_set(CX, 0); // CX is now 0 4224 } 4225 4226 auto remainder = numbytes & (REGSIZE - 1); 4227 if (I64 && remainder >= 4) 4228 { 4229 cdb.gen1(STOS); // STOSD 4230 remainder -= 4; 4231 } 4232 for (; remainder; --remainder) 4233 cdb.gen1(STOSB); // STOSB 4234 fixresult(cdb,e,mES|mBX,pretregs); 4235 return; 4236 } 4237 4238 getregs(cdb,mDI | mCX); 4239 if (I16) 4240 { 4241 if (config.flags4 & CFG4speed) // if speed optimization 4242 { 4243 cdb.gen2(0xD1,modregrm(3,5,CX)); // SHR CX,1 4244 cdb.gen1(0xF3); // REP 4245 cdb.gen1(STOS); // STOSW 4246 cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC CX,CX 4247 } 4248 cdb.gen1(0xF3); // REP 4249 cdb.gen1(STOSB); // STOSB 4250 regimmed_set(CX, 0); // CX is now 0 4251 fixresult(cdb,e,mES|mBX,pretregs); 4252 return; 4253 } 4254 4255 /* MOV sreg,ECX 4256 SHR ECX,n 4257 REP 4258 STOSD/Q 4259 4260 ADC ECX,ECX 4261 REP 4262 STOSD 4263 4264 MOV ECX,sreg 4265 AND ECX,3 4266 REP 4267 STOSB 4268 */ 4269 regm_t regs = allregs & (*pretregs ? ~(mAX|mBX|mCX|mDI) : ~(mAX|mCX|mDI)); 4270 reg_t sreg; 4271 allocreg(cdb,®s,&sreg,TYint); 4272 genregs(cdb,0x89,CX,sreg); // MOV sreg,ECX (32 bits only) 4273 4274 const n = I64 ? 3 : 2; 4275 cdb.genc2(0xC1, grex | modregrm(3,5,CX), n); // SHR ECX,n 4276 4277 cdb.gen1(0xF3); // REP 4278 cdb.gen1(STOS); // STOSD/Q 4279 if (I64) 4280 code_orrex(cdb.last(), REX_W); 4281 4282 if (I64) 4283 { 4284 cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC ECX,ECX 4285 cdb.gen1(0xF3); // REP 4286 cdb.gen1(STOS); // STOSD 4287 } 4288 4289 genregs(cdb,0x89,sreg,CX); // MOV ECX,sreg (32 bits only) 4290 cdb.genc2(0x81, modregrm(3,4,CX), 3); // AND ECX,3 4291 cdb.gen1(0xF3); // REP 4292 cdb.gen1(STOSB); // STOSB 4293 4294 regimmed_set(CX, 0); // CX is now 0 4295 fixresult(cdb,e,mES|mBX,pretregs); 4296 } 4297 4298 /*********************************************** 4299 * Do memset for values larger than a byte. 4300 * Has many similarities to cod4.cdeq(). 4301 * Doesn't work for 16 bit code. 4302 */ 4303 @trusted 4304 private void cdmemsetn(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4305 { 4306 //printf("cdmemsetn(*pretregs = %s)\n", regm_str(*pretregs)); 4307 elem *e2 = e.EV.E2; 4308 assert(e2.Eoper == OPparam); 4309 4310 elem* evalue = e2.EV.E2; 4311 elem* enelems = e2.EV.E1; 4312 4313 tym_t tymv = tybasic(evalue.Ety); 4314 const sz = tysize(evalue.Ety); 4315 assert(cast(int)sz > 1); 4316 4317 if (tyxmmreg(tymv) && config.fpxmmregs) 4318 assert(0); // fix later 4319 if (tyfloating(tymv) && config.inline8087) 4320 assert(0); // fix later 4321 4322 const grex = I64 ? (REX_W << 16) : 0; 4323 4324 // get the count of elems into CX 4325 regm_t mregcx = mCX; 4326 codelem(cdb,enelems,&mregcx,false); 4327 4328 // Get value into AX 4329 regm_t retregs3 = allregs & ~mregcx; 4330 if (sz == 2 * REGSIZE) 4331 retregs3 &= ~(mBP | IDXREGS); // BP cannot be used for register pair, 4332 // IDXREGS could deplete index regs - see sdtor.d test14815() 4333 scodelem(cdb,evalue,&retregs3,mregcx,false); 4334 4335 /* Necessary because if evalue calls a function, and that function never returns, 4336 * it doesn't affect registers. Which means those registers can be used for enregistering 4337 * variables, and next pass fails because it can't use those registers, and so cannot 4338 * allocate registers for retregs3. See ice11596.d 4339 */ 4340 useregs(retregs3); 4341 4342 reg_t valreg = findreg(retregs3); 4343 reg_t valreghi; 4344 if (sz == 2 * REGSIZE) 4345 { 4346 valreg = findreglsw(retregs3); 4347 valreghi = findregmsw(retregs3); 4348 } 4349 4350 freenode(e2); 4351 4352 // Get s into ES:DI 4353 regm_t mregidx = IDXREGS & ~(mregcx | retregs3); 4354 assert(mregidx); 4355 tym_t ty1 = tybasic(e.EV.E1.Ety); 4356 if (!tyreg(ty1)) 4357 mregidx |= mES; 4358 scodelem(cdb,e.EV.E1,&mregidx,mregcx | retregs3,false); 4359 reg_t idxreg = findreg(mregidx); 4360 4361 // Make sure ES contains proper segment value 4362 cdb.append(cod2_setES(ty1)); 4363 4364 regm_t mregbx = 0; 4365 if (*pretregs) // if need return value 4366 { 4367 mregbx = *pretregs & ~(mregidx | mregcx | retregs3); 4368 if (!mregbx) 4369 mregbx = allregs & ~(mregidx | mregcx | retregs3); 4370 reg_t regbx; 4371 allocreg(cdb, &mregbx, ®bx, TYnptr); 4372 getregs(cdb, mregbx); 4373 genmovreg(cdb,regbx,idxreg); // MOV BX,DI 4374 } 4375 4376 getregs(cdb,mask(idxreg) | mCX); // modify DI and CX 4377 4378 /* Generate: 4379 * JCXZ L1 4380 * L2: 4381 * MOV [idxreg],AX 4382 * ADD idxreg,sz 4383 * LOOP L2 4384 * L1: 4385 * NOP 4386 */ 4387 code* c1 = gennop(null); 4388 genjmp(cdb, JCXZ, FLcode, cast(block *)c1); 4389 code cs; 4390 buildEA(&cs,idxreg,-1,1,0); 4391 cs.Iop = 0x89; 4392 if (!I16 && sz == 2) 4393 cs.Iflags |= CFopsize; 4394 if (I64 && sz == 8) 4395 cs.Irex |= REX_W; 4396 code_newreg(&cs, valreg); 4397 cdb.gen(&cs); // MOV [idxreg],AX 4398 code* c2 = cdb.last(); 4399 if (sz == REGSIZE * 2) 4400 { 4401 cs.IEV1.Vuns = REGSIZE; 4402 code_newreg(&cs, valreghi); 4403 cdb.gen(&cs); // MOV REGSIZE[idxreg],DX 4404 } 4405 cdb.genc2(0x81, grex | modregrmx(3,0,idxreg), sz); // ADD idxreg,sz 4406 genjmp(cdb, LOOP, FLcode, cast(block *)c2); // LOOP L2 4407 cdb.append(c1); 4408 4409 regimmed_set(CX, 0); // CX is now 0 4410 4411 fixresult(cdb,e,mregbx,pretregs); 4412 } 4413 4414 /********************** 4415 * Do structure assignments. 4416 * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2). 4417 * Mebbe call cdstreq() for double assignments??? 4418 */ 4419 4420 @trusted 4421 void cdstreq(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4422 { 4423 char need_DS = false; 4424 elem *e1 = e.EV.E1; 4425 elem *e2 = e.EV.E2; 4426 int segreg; 4427 uint numbytes = cast(uint)type_size(e.ET); // # of bytes in structure/union 4428 ubyte rex = I64 ? REX_W : 0; 4429 4430 //printf("cdstreq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4431 4432 // First, load pointer to rvalue into SI 4433 regm_t srcregs = mSI; // source is DS:SI 4434 docommas(cdb,e2); 4435 if (e2.Eoper == OPind) // if (.. = *p) 4436 { elem *e21 = e2.EV.E1; 4437 4438 segreg = SEG_DS; 4439 switch (tybasic(e21.Ety)) 4440 { 4441 case TYsptr: 4442 if (config.wflags & WFssneds) // if sptr can't use DS segment 4443 segreg = SEG_SS; 4444 break; 4445 case TYcptr: 4446 if (!(config.exe & EX_flat)) 4447 segreg = SEG_CS; 4448 break; 4449 case TYfptr: 4450 case TYvptr: 4451 case TYhptr: 4452 srcregs |= mCX; // get segment also 4453 need_DS = true; 4454 break; 4455 4456 default: 4457 break; 4458 } 4459 codelem(cdb,e21,&srcregs,false); 4460 freenode(e2); 4461 if (segreg != SEG_DS) // if not DS 4462 { 4463 getregs(cdb,mCX); 4464 cdb.gen2(0x8C,modregrm(3,segreg,CX)); // MOV CX,segreg 4465 need_DS = true; 4466 } 4467 } 4468 else if (e2.Eoper == OPvar) 4469 { 4470 if (e2.EV.Vsym.ty() & mTYfar) // if e2 is in a far segment 4471 { srcregs |= mCX; // get segment also 4472 need_DS = true; 4473 cdrelconst(cdb,e2,&srcregs); 4474 } 4475 else 4476 { 4477 segreg = segfl[el_fl(e2)]; 4478 if ((config.wflags & WFssneds) && segreg == SEG_SS || // if source is on stack 4479 segreg == SEG_CS) // if source is in CS 4480 { 4481 need_DS = true; // we need to reload DS 4482 // Load CX with segment 4483 srcregs |= mCX; 4484 getregs(cdb,mCX); 4485 cdb.gen2(0x8C, // MOV CX,[SS|CS] 4486 modregrm(3,segreg,CX)); 4487 } 4488 cdrelconst(cdb,e2,&srcregs); 4489 } 4490 freenode(e2); 4491 } 4492 else 4493 { 4494 if (!(config.exe & EX_flat)) 4495 { need_DS = true; 4496 srcregs |= mCX; 4497 } 4498 codelem(cdb,e2,&srcregs,false); 4499 } 4500 4501 // now get pointer to lvalue (destination) in ES:DI 4502 regm_t dstregs = (config.exe & EX_flat) ? mDI : mES|mDI; 4503 if (e1.Eoper == OPind) // if (*p = ..) 4504 { 4505 if (tyreg(e1.EV.E1.Ety)) 4506 dstregs = mDI; 4507 cdb.append(cod2_setES(e1.EV.E1.Ety)); 4508 scodelem(cdb,e1.EV.E1,&dstregs,srcregs,false); 4509 } 4510 else 4511 cdrelconst(cdb,e1,&dstregs); 4512 freenode(e1); 4513 4514 getregs(cdb,(srcregs | dstregs) & (mLSW | mDI)); 4515 if (need_DS) 4516 { assert(!(config.exe & EX_flat)); 4517 cdb.gen1(0x1E); // PUSH DS 4518 cdb.gen2(0x8E,modregrm(3,SEG_DS,CX)); // MOV DS,CX 4519 } 4520 if (numbytes <= REGSIZE * (6 + (REGSIZE == 4))) 4521 { 4522 while (numbytes >= REGSIZE) 4523 { 4524 cdb.gen1(0xA5); // MOVSW 4525 code_orrex(cdb.last(), rex); 4526 numbytes -= REGSIZE; 4527 } 4528 //if (numbytes) 4529 // printf("cdstreq numbytes %d\n",numbytes); 4530 if (I64 && numbytes >= 4) 4531 { 4532 cdb.gen1(0xA5); // MOVSD 4533 numbytes -= 4; 4534 } 4535 while (numbytes--) 4536 cdb.gen1(0xA4); // MOVSB 4537 } 4538 else 4539 { 4540 static if (1) 4541 { 4542 uint remainder = numbytes & (REGSIZE - 1); 4543 numbytes /= REGSIZE; // number of words 4544 getregs_imm(cdb,mCX); 4545 movregconst(cdb,CX,numbytes,0); // # of bytes/words 4546 cdb.gen1(0xF3); // REP 4547 if (REGSIZE == 8) 4548 cdb.gen1(REX | REX_W); 4549 cdb.gen1(0xA5); // REP MOVSD 4550 regimmed_set(CX,0); // note that CX == 0 4551 if (I64 && remainder >= 4) 4552 { 4553 cdb.gen1(0xA5); // MOVSD 4554 remainder -= 4; 4555 } 4556 for (; remainder; remainder--) 4557 { 4558 cdb.gen1(0xA4); // MOVSB 4559 } 4560 } 4561 else 4562 { 4563 uint movs; 4564 if (numbytes & (REGSIZE - 1)) // if odd 4565 movs = 0xA4; // MOVSB 4566 else 4567 { 4568 movs = 0xA5; // MOVSW 4569 numbytes /= REGSIZE; // # of words 4570 } 4571 getregs_imm(cdb,mCX); 4572 movregconst(cdb,CX,numbytes,0); // # of bytes/words 4573 cdb.gen1(0xF3); // REP 4574 cdb.gen1(movs); 4575 regimmed_set(CX,0); // note that CX == 0 4576 } 4577 } 4578 if (need_DS) 4579 cdb.gen1(0x1F); // POP DS 4580 assert(!(*pretregs & mPSW)); 4581 if (*pretregs) 4582 { // ES:DI points past what we want 4583 4584 cdb.genc2(0x81,(rex << 16) | modregrm(3,5,DI), type_size(e.ET)); // SUB DI,numbytes 4585 4586 const tym = tybasic(e.Ety); 4587 if (tym == TYucent && I64) 4588 { 4589 /* https://issues.dlang.org/show_bug.cgi?id=22175 4590 * The trouble happens when the struct size does not fit exactly into 4591 * 2 registers. Then the type of e becomes a TYucent, not a TYstruct, 4592 * and we need to dereference DI to get the ucent 4593 */ 4594 4595 // dereference DI 4596 code cs; 4597 cs.Iop = 0x8B; 4598 regm_t retregs = *pretregs; 4599 reg_t reg; 4600 allocreg(cdb,&retregs,®,tym); 4601 4602 reg_t msreg = findregmsw(retregs); 4603 buildEA(&cs,DI,-1,1,REGSIZE); 4604 code_newreg(&cs,msreg); 4605 cs.Irex |= REX_W; 4606 cdb.gen(&cs); // MOV msreg,REGSIZE[DI] // msreg is never DI 4607 4608 reg_t lsreg = findreglsw(retregs); 4609 buildEA(&cs,DI,-1,1,0); 4610 code_newreg(&cs,lsreg); 4611 cs.Irex |= REX_W; 4612 cdb.gen(&cs); // MOV lsreg,[DI]; 4613 fixresult(cdb,e,retregs,pretregs); 4614 return; 4615 } 4616 4617 regm_t retregs = mDI; 4618 if (*pretregs & mMSW && !(config.exe & EX_flat)) 4619 retregs |= mES; 4620 fixresult(cdb,e,retregs,pretregs); 4621 } 4622 } 4623 4624 4625 /********************** 4626 * Get the address of. 4627 * Is also called by cdstreq() to set up pointer to a structure. 4628 */ 4629 4630 @trusted 4631 void cdrelconst(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4632 { 4633 //printf("cdrelconst(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4634 4635 /* The following should not happen, but cgelem.c is a little stupid. 4636 * Assertion can be tripped by func("string" == 0); and similar 4637 * things. Need to add goals to optelem() to fix this completely. 4638 */ 4639 //assert((*pretregs & mPSW) == 0); 4640 if (*pretregs & mPSW) 4641 { 4642 *pretregs &= ~mPSW; 4643 gentstreg(cdb,SP); // SP is never 0 4644 if (I64) 4645 code_orrex(cdb.last(), REX_W); 4646 } 4647 if (!*pretregs) 4648 return; 4649 4650 assert(e); 4651 tym_t tym = tybasic(e.Ety); 4652 switch (tym) 4653 { 4654 case TYstruct: 4655 case TYarray: 4656 case TYldouble: 4657 case TYildouble: 4658 case TYcldouble: 4659 tym = TYnptr; // don't confuse allocreg() 4660 if (*pretregs & (mES | mCX) || e.Ety & mTYfar) 4661 { 4662 tym = TYfptr; 4663 } 4664 break; 4665 4666 case TYifunc: 4667 tym = TYfptr; 4668 break; 4669 4670 default: 4671 if (tyfunc(tym)) 4672 tym = 4673 tyfarfunc(tym) ? TYfptr : 4674 TYnptr; 4675 break; 4676 } 4677 //assert(tym & typtr); // don't fail on (int)&a 4678 4679 SC sclass; 4680 reg_t mreg, // segment of the address (TYfptrs only) 4681 lreg; // offset of the address 4682 4683 allocreg(cdb,pretregs,&lreg,tym); 4684 if (_tysize[tym] > REGSIZE) // fptr could've been cast to long 4685 { 4686 if (*pretregs & mES) 4687 { 4688 /* Do not allocate CX or SI here, as cdstreq() needs 4689 * them preserved. cdstreq() should use scodelem() 4690 */ 4691 mreg = allocScratchReg(cdb, (mAX|mBX|mDX|mDI) & ~mask(lreg)); 4692 } 4693 else 4694 { 4695 mreg = lreg; 4696 lreg = findreglsw(*pretregs); 4697 } 4698 4699 /* if (get segment of function that isn't necessarily in the 4700 * current segment (i.e. CS doesn't have the right value in it) 4701 */ 4702 Symbol *s = e.EV.Vsym; 4703 if (s.Sfl == FLdatseg) 4704 { assert(0); 4705 } 4706 sclass = s.Sclass; 4707 const ety = tybasic(s.ty()); 4708 if ((tyfarfunc(ety) || ety == TYifunc) && 4709 (sclass == SC.extern_ || ClassInline(sclass) || config.wflags & WFthunk) 4710 || s.Sfl == FLfardata 4711 || (s.ty() & mTYcs && s.Sseg != cseg && (LARGECODE || s.Sclass == SC.comdat)) 4712 ) 4713 { // MOV mreg,seg of symbol 4714 cdb.gencs(0xB8 + mreg,0,FLextern,s); 4715 cdb.last().Iflags = CFseg; 4716 } 4717 else 4718 { 4719 const fl = (s.ty() & mTYcs) ? FLcsdata : s.Sfl; 4720 cdb.gen2(0x8C, // MOV mreg,SEG REGISTER 4721 modregrm(3,segfl[fl],mreg)); 4722 } 4723 if (*pretregs & mES) 4724 cdb.gen2(0x8E,modregrm(3,0,mreg)); // MOV ES,mreg 4725 } 4726 getoffset(cdb,e,lreg); 4727 } 4728 4729 /********************************* 4730 * Load the offset portion of the address represented by e into 4731 * reg. 4732 */ 4733 4734 @trusted 4735 void getoffset(ref CodeBuilder cdb,elem *e,reg_t reg) 4736 { 4737 //printf("getoffset(e = %p, reg = %d)\n", e, reg); 4738 code cs = void; 4739 cs.Iflags = 0; 4740 ubyte rex = 0; 4741 cs.Irex = rex; 4742 assert(e.Eoper == OPvar || e.Eoper == OPrelconst); 4743 auto fl = el_fl(e); 4744 switch (fl) 4745 { 4746 case FLdatseg: 4747 cs.IEV2.Vpointer = e.EV.Vpointer; 4748 goto L3; 4749 4750 case FLfardata: 4751 goto L4; 4752 4753 case FLtlsdata: 4754 if (config.exe & EX_posix) 4755 { 4756 Lposix: 4757 if (config.flags3 & CFG3pic) 4758 { 4759 if (I64) 4760 { 4761 /* Generate: 4762 * LEA DI,s@TLSGD[RIP] 4763 */ 4764 //assert(reg == DI); 4765 code css = void; 4766 css.Irex = REX | REX_W; 4767 css.Iop = LEA; 4768 css.Irm = modregrm(0,reg,5); 4769 if (reg & 8) 4770 css.Irex |= REX_R; 4771 css.Iflags = CFopsize; 4772 css.IFL1 = cast(ubyte)fl; 4773 css.IEV1.Vsym = e.EV.Vsym; 4774 css.IEV1.Voffset = e.EV.Voffset; 4775 cdb.gen(&css); 4776 } 4777 else 4778 { 4779 /* Generate: 4780 * LEA EAX,s@TLSGD[1*EBX+0] 4781 */ 4782 assert(reg == AX); 4783 load_localgot(cdb); 4784 code css = void; 4785 css.Iflags = 0; 4786 css.Iop = LEA; // LEA 4787 css.Irex = 0; 4788 css.Irm = modregrm(0,AX,4); 4789 css.Isib = modregrm(0,BX,5); 4790 css.IFL1 = cast(ubyte)fl; 4791 css.IEV1.Vsym = e.EV.Vsym; 4792 css.IEV1.Voffset = e.EV.Voffset; 4793 cdb.gen(&css); 4794 } 4795 return; 4796 } 4797 /* Generate: 4798 * MOV reg,GS:[00000000] 4799 * ADD reg, offset s@TLS_LE 4800 * for locals, and for globals: 4801 * MOV reg,GS:[00000000] 4802 * ADD reg, s@TLS_IE 4803 * note different fixup 4804 */ 4805 int stack = 0; 4806 if (reg == STACK) 4807 { regm_t retregs = ALLREGS; 4808 4809 reg_t regx; 4810 allocreg(cdb,&retregs,®x,TYoffset); 4811 reg = findreg(retregs); 4812 stack = 1; 4813 } 4814 4815 code css = void; 4816 css.Irex = rex; 4817 css.Iop = 0x8B; 4818 css.Irm = modregrm(0, 0, BPRM); 4819 code_newreg(&css, reg); 4820 css.Iflags = CFgs; 4821 css.IFL1 = FLconst; 4822 css.IEV1.Vuns = 0; 4823 cdb.gen(&css); // MOV reg,GS:[00000000] 4824 4825 if (e.EV.Vsym.Sclass == SC.static_ || e.EV.Vsym.Sclass == SC.locstat) 4826 { // ADD reg, offset s 4827 cs.Irex = rex; 4828 cs.Iop = 0x81; 4829 cs.Irm = modregrm(3,0,reg & 7); 4830 if (reg & 8) 4831 cs.Irex |= REX_B; 4832 cs.Iflags = CFoff; 4833 cs.IFL2 = cast(ubyte)fl; 4834 cs.IEV2.Vsym = e.EV.Vsym; 4835 cs.IEV2.Voffset = e.EV.Voffset; 4836 } 4837 else 4838 { // ADD reg, s 4839 cs.Irex = rex; 4840 cs.Iop = 0x03; 4841 cs.Irm = modregrm(0,0,BPRM); 4842 code_newreg(&cs, reg); 4843 cs.Iflags = CFoff; 4844 cs.IFL1 = cast(ubyte)fl; 4845 cs.IEV1.Vsym = e.EV.Vsym; 4846 cs.IEV1.Voffset = e.EV.Voffset; 4847 } 4848 cdb.gen(&cs); // ADD reg, xxxx 4849 4850 if (stack) 4851 { 4852 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4853 if (reg & 8) 4854 code_orrex(cdb.last(), REX_B); 4855 cdb.genadjesp(REGSIZE); 4856 stackchanged = 1; 4857 } 4858 break; 4859 } 4860 else if (config.exe & EX_windos) 4861 { 4862 if (I64) 4863 { 4864 Lwin64: 4865 assert(reg != STACK); 4866 cs.IEV2.Vsym = e.EV.Vsym; 4867 cs.IEV2.Voffset = e.EV.Voffset; 4868 cs.Iop = 0xB8 + (reg & 7); // MOV Ereg,offset s 4869 if (reg & 8) 4870 cs.Irex |= REX_B; 4871 cs.Iflags = CFoff; // want offset only 4872 cs.IFL2 = cast(ubyte)fl; 4873 cdb.gen(&cs); 4874 break; 4875 } 4876 goto L4; 4877 } 4878 else 4879 { 4880 goto L4; 4881 } 4882 4883 case FLfunc: 4884 fl = FLextern; /* don't want PC relative addresses */ 4885 goto L4; 4886 4887 case FLextern: 4888 if (config.exe & EX_posix && e.EV.Vsym.ty() & mTYthread) 4889 goto Lposix; 4890 if (config.exe & EX_WIN64 && e.EV.Vsym.ty() & mTYthread) 4891 goto Lwin64; 4892 goto L4; 4893 4894 case FLdata: 4895 case FLudata: 4896 case FLgot: 4897 case FLgotoff: 4898 case FLcsdata: 4899 L4: 4900 cs.IEV2.Vsym = e.EV.Vsym; 4901 cs.IEV2.Voffset = e.EV.Voffset; 4902 L3: 4903 if (reg == STACK) 4904 { stackchanged = 1; 4905 cs.Iop = 0x68; /* PUSH immed16 */ 4906 cdb.genadjesp(REGSIZE); 4907 } 4908 else 4909 { cs.Iop = 0xB8 + (reg & 7); // MOV reg,immed16 4910 if (reg & 8) 4911 cs.Irex |= REX_B; 4912 if (I64) 4913 { cs.Irex |= REX_W; 4914 if (config.flags3 & CFG3pic || config.exe == EX_WIN64) 4915 { // LEA reg,immed32[RIP] 4916 cs.Iop = LEA; 4917 cs.Irm = modregrm(0,reg & 7,5); 4918 if (reg & 8) 4919 cs.Irex = (cs.Irex & ~REX_B) | REX_R; 4920 cs.IFL1 = cast(ubyte)fl; 4921 cs.IEV1.Vsym = cs.IEV2.Vsym; 4922 cs.IEV1.Voffset = cs.IEV2.Voffset; 4923 } 4924 } 4925 } 4926 cs.Iflags = CFoff; /* want offset only */ 4927 cs.IFL2 = cast(ubyte)fl; 4928 cdb.gen(&cs); 4929 break; 4930 4931 case FLreg: 4932 /* Allow this since the tree optimizer puts & in front of */ 4933 /* register doubles. */ 4934 goto L2; 4935 case FLauto: 4936 case FLfast: 4937 case FLbprel: 4938 case FLfltreg: 4939 reflocal = true; 4940 goto L2; 4941 case FLpara: 4942 refparam = true; 4943 L2: 4944 if (reg == STACK) 4945 { regm_t retregs = ALLREGS; 4946 4947 reg_t regx; 4948 allocreg(cdb,&retregs,®x,TYoffset); 4949 reg = findreg(retregs); 4950 loadea(cdb,e,&cs,LEA,reg,0,0,0); // LEA reg,EA 4951 if (I64) 4952 code_orrex(cdb.last(), REX_W); 4953 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4954 if (reg & 8) 4955 code_orrex(cdb.last(), REX_B); 4956 cdb.genadjesp(REGSIZE); 4957 stackchanged = 1; 4958 } 4959 else 4960 { 4961 loadea(cdb,e,&cs,LEA,reg,0,0,0); // LEA reg,EA 4962 if (I64) 4963 code_orrex(cdb.last(), REX_W); 4964 } 4965 break; 4966 4967 default: 4968 debug 4969 { 4970 elem_print(e); 4971 WRFL(fl); 4972 } 4973 assert(0); 4974 } 4975 } 4976 4977 4978 /****************** 4979 * OPneg, OPsqrt, OPsin, OPcos, OPrint 4980 */ 4981 4982 @trusted 4983 void cdneg(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4984 { 4985 //printf("cdneg()\n"); 4986 //elem_print(e); 4987 if (*pretregs == 0) 4988 { 4989 codelem(cdb,e.EV.E1,pretregs,false); 4990 return; 4991 } 4992 const tyml = tybasic(e.EV.E1.Ety); 4993 const sz = _tysize[tyml]; 4994 if (tyfloating(tyml)) 4995 { 4996 if (tycomplex(tyml)) 4997 { 4998 neg_complex87(cdb, e, pretregs); 4999 return; 5000 } 5001 if (tyxmmreg(tyml) && e.Eoper == OPneg && *pretregs & XMMREGS) 5002 { 5003 xmmneg(cdb,e,pretregs); 5004 return; 5005 } 5006 if (config.inline8087 && 5007 ((*pretregs & (ALLREGS | mBP)) == 0 || e.Eoper == OPsqrt || I64)) 5008 { 5009 neg87(cdb,e,pretregs); 5010 return; 5011 } 5012 regm_t retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 5013 codelem(cdb,e.EV.E1,&retregs,false); 5014 getregs(cdb,retregs); 5015 if (I32) 5016 { 5017 const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 5018 cdb.genc2(0x81,modregrm(3,6,reg),0x80000000); // XOR EDX,sign bit 5019 } 5020 else 5021 { 5022 const reg = (sz == 8) ? AX : findregmsw(retregs); 5023 cdb.genc2(0x81,modregrm(3,6,reg),0x8000); // XOR AX,0x8000 5024 } 5025 fixresult(cdb,e,retregs,pretregs); 5026 return; 5027 } 5028 5029 const uint isbyte = sz == 1; 5030 const possregs = (isbyte) ? BYTEREGS : allregs; 5031 regm_t retregs = *pretregs & possregs; 5032 if (retregs == 0) 5033 retregs = possregs; 5034 codelem(cdb,e.EV.E1,&retregs,false); 5035 getregs(cdb,retregs); // retregs will be destroyed 5036 if (sz <= REGSIZE) 5037 { 5038 const reg = findreg(retregs); 5039 uint rex = (I64 && sz == 8) ? REX_W : 0; 5040 if (I64 && sz == 1 && reg >= 4) 5041 rex |= REX; 5042 cdb.gen2(0xF7 ^ isbyte,(rex << 16) | modregrmx(3,3,reg)); // NEG reg 5043 if (!I16 && _tysize[tyml] == SHORTSIZE && *pretregs & mPSW) 5044 cdb.last().Iflags |= CFopsize | CFpsw; 5045 *pretregs &= mBP | ALLREGS; // flags already set 5046 } 5047 else if (sz == 2 * REGSIZE) 5048 { 5049 const msreg = findregmsw(retregs); 5050 cdb.gen2(0xF7,modregrm(3,3,msreg)); // NEG msreg 5051 const lsreg = findreglsw(retregs); 5052 cdb.gen2(0xF7,modregrm(3,3,lsreg)); // NEG lsreg 5053 code_orflag(cdb.last(), CFpsw); // need flag result of previous NEG 5054 cdb.genc2(0x81,modregrm(3,3,msreg),0); // SBB msreg,0 5055 } 5056 else 5057 assert(0); 5058 fixresult(cdb,e,retregs,pretregs); 5059 } 5060 5061 5062 /****************** 5063 * Absolute value operator 5064 */ 5065 5066 5067 @trusted 5068 void cdabs(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 5069 { 5070 //printf("cdabs(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 5071 if (*pretregs == 0) 5072 { 5073 codelem(cdb,e.EV.E1,pretregs,false); 5074 return; 5075 } 5076 const tyml = tybasic(e.EV.E1.Ety); 5077 const sz = _tysize[tyml]; 5078 const rex = (I64 && sz == 8) ? REX_W : 0; 5079 if (tyfloating(tyml)) 5080 { 5081 if (tyxmmreg(tyml) && *pretregs & XMMREGS) 5082 { 5083 xmmabs(cdb,e,pretregs); 5084 return; 5085 } 5086 if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64)) 5087 { 5088 neg87(cdb,e,pretregs); 5089 return; 5090 } 5091 regm_t retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 5092 codelem(cdb,e.EV.E1,&retregs,false); 5093 getregs(cdb,retregs); 5094 if (I32) 5095 { 5096 const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 5097 cdb.genc2(0x81,modregrm(3,4,reg),0x7FFFFFFF); // AND EDX,~sign bit 5098 } 5099 else 5100 { 5101 const reg = (sz == 8) ? AX : findregmsw(retregs); 5102 cdb.genc2(0x81,modregrm(3,4,reg),0x7FFF); // AND AX,0x7FFF 5103 } 5104 fixresult(cdb,e,retregs,pretregs); 5105 return; 5106 } 5107 5108 const uint isbyte = sz == 1; 5109 assert(isbyte == 0); 5110 regm_t possregs = (sz <= REGSIZE) ? cast(regm_t) mAX : allregs; 5111 if (!I16 && sz == REGSIZE) 5112 possregs = allregs; 5113 regm_t retregs = *pretregs & possregs; 5114 if (retregs == 0) 5115 retregs = possregs; 5116 codelem(cdb,e.EV.E1,&retregs,false); 5117 getregs(cdb,retregs); // retregs will be destroyed 5118 if (sz <= REGSIZE) 5119 { 5120 /* CWD 5121 XOR AX,DX 5122 SUB AX,DX 5123 or: 5124 MOV r,reg 5125 SAR r,63 5126 XOR reg,r 5127 SUB reg,r 5128 */ 5129 reg_t reg; 5130 reg_t r; 5131 5132 if (!I16 && sz == REGSIZE) 5133 { 5134 reg = findreg(retregs); 5135 r = allocScratchReg(cdb, allregs & ~retregs); 5136 getregs(cdb,retregs); 5137 genmovreg(cdb,r,reg); // MOV r,reg 5138 cdb.genc2(0xC1,modregrmx(3,7,r),REGSIZE * 8 - 1); // SAR r,31/63 5139 code_orrex(cdb.last(), rex); 5140 } 5141 else 5142 { 5143 reg = AX; 5144 r = DX; 5145 getregs(cdb,mDX); 5146 if (!I16 && sz == SHORTSIZE) 5147 cdb.gen1(0x98); // CWDE 5148 cdb.gen1(0x99); // CWD 5149 code_orrex(cdb.last(), rex); 5150 } 5151 cdb.gen2(0x33 ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // XOR reg,r 5152 cdb.gen2(0x2B ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // SUB reg,r 5153 if (!I16 && sz == SHORTSIZE && *pretregs & mPSW) 5154 cdb.last().Iflags |= CFopsize | CFpsw; 5155 if (*pretregs & mPSW) 5156 cdb.last().Iflags |= CFpsw; 5157 *pretregs &= ~mPSW; // flags already set 5158 } 5159 else if (sz == 2 * REGSIZE) 5160 { 5161 /* or DX,DX 5162 jns L2 5163 neg DX 5164 neg AX 5165 sbb DX,0 5166 L2: 5167 */ 5168 5169 code *cnop = gennop(null); 5170 const msreg = findregmsw(retregs); 5171 const lsreg = findreglsw(retregs); 5172 genregs(cdb,0x09,msreg,msreg); // OR msreg,msreg 5173 genjmp(cdb,JNS,FLcode,cast(block *)cnop); 5174 cdb.gen2(0xF7,modregrm(3,3,msreg)); // NEG msreg 5175 cdb.gen2(0xF7,modregrm(3,3,lsreg)); // NEG lsreg+1 5176 cdb.genc2(0x81,modregrm(3,3,msreg),0); // SBB msreg,0 5177 cdb.append(cnop); 5178 } 5179 else 5180 assert(0); 5181 fixresult(cdb,e,retregs,pretregs); 5182 } 5183 5184 /************************** 5185 * Post increment and post decrement. 5186 */ 5187 5188 @trusted 5189 void cdpost(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5190 { 5191 //printf("cdpost(pretregs = %s)\n", regm_str(*pretregs)); 5192 code cs = void; 5193 const op = e.Eoper; // OPxxxx 5194 if (*pretregs == 0) // if nothing to return 5195 { 5196 cdaddass(cdb,e,pretregs); 5197 return; 5198 } 5199 const tym_t tyml = tybasic(e.EV.E1.Ety); 5200 const sz = _tysize[tyml]; 5201 elem *e2 = e.EV.E2; 5202 const rex = (I64 && sz == 8) ? REX_W : 0; 5203 5204 if (tyfloating(tyml)) 5205 { 5206 if (config.fpxmmregs && tyxmmreg(tyml) && 5207 !tycomplex(tyml) // SIMD code is not set up to deal with complex 5208 ) 5209 { 5210 xmmpost(cdb,e,pretregs); 5211 return; 5212 } 5213 5214 if (config.inline8087) 5215 { 5216 post87(cdb,e,pretregs); 5217 return; 5218 } 5219 if (config.exe & EX_windos) 5220 { 5221 assert(sz <= 8); 5222 getlvalue(cdb,&cs,e.EV.E1,DOUBLEREGS); 5223 freenode(e.EV.E1); 5224 regm_t idxregs = idxregm(&cs); // mask of index regs used 5225 cs.Iop = 0x8B; /* MOV DOUBLEREGS,EA */ 5226 fltregs(cdb,&cs,tyml); 5227 stackchanged = 1; 5228 int stackpushsave = stackpush; 5229 regm_t retregs; 5230 if (sz == 8) 5231 { 5232 if (I32) 5233 { 5234 cdb.gen1(0x50 + DX); // PUSH DOUBLEREGS 5235 cdb.gen1(0x50 + AX); 5236 stackpush += DOUBLESIZE; 5237 retregs = DOUBLEREGS2_32; 5238 } 5239 else 5240 { 5241 cdb.gen1(0x50 + AX); 5242 cdb.gen1(0x50 + BX); 5243 cdb.gen1(0x50 + CX); 5244 cdb.gen1(0x50 + DX); /* PUSH DOUBLEREGS */ 5245 stackpush += DOUBLESIZE + DOUBLESIZE; 5246 5247 cdb.gen1(0x50 + AX); 5248 cdb.gen1(0x50 + BX); 5249 cdb.gen1(0x50 + CX); 5250 cdb.gen1(0x50 + DX); /* PUSH DOUBLEREGS */ 5251 retregs = DOUBLEREGS_16; 5252 } 5253 } 5254 else 5255 { 5256 stackpush += FLOATSIZE; /* so we know something is on */ 5257 if (!I32) 5258 cdb.gen1(0x50 + DX); 5259 cdb.gen1(0x50 + AX); 5260 retregs = FLOATREGS2; 5261 } 5262 cdb.genadjesp(stackpush - stackpushsave); 5263 5264 cgstate.stackclean++; 5265 scodelem(cdb,e2,&retregs,idxregs,false); 5266 cgstate.stackclean--; 5267 5268 if (tyml == TYdouble || tyml == TYdouble_alias) 5269 { 5270 retregs = DOUBLEREGS; 5271 callclib(cdb,e,(op == OPpostinc) ? CLIB.dadd : CLIB.dsub, 5272 &retregs,idxregs); 5273 } 5274 else /* tyml == TYfloat */ 5275 { 5276 retregs = FLOATREGS; 5277 callclib(cdb,e,(op == OPpostinc) ? CLIB.fadd : CLIB.fsub, 5278 &retregs,idxregs); 5279 } 5280 cs.Iop = 0x89; /* MOV EA,DOUBLEREGS */ 5281 fltregs(cdb,&cs,tyml); 5282 stackpushsave = stackpush; 5283 if (tyml == TYdouble || tyml == TYdouble_alias) 5284 { if (*pretregs == mSTACK) 5285 retregs = mSTACK; /* leave result on stack */ 5286 else 5287 { 5288 if (I32) 5289 { 5290 cdb.gen1(0x58 + AX); 5291 cdb.gen1(0x58 + DX); 5292 } 5293 else 5294 { 5295 cdb.gen1(0x58 + DX); 5296 cdb.gen1(0x58 + CX); 5297 cdb.gen1(0x58 + BX); 5298 cdb.gen1(0x58 + AX); 5299 } 5300 stackpush -= DOUBLESIZE; 5301 retregs = DOUBLEREGS; 5302 } 5303 } 5304 else 5305 { 5306 cdb.gen1(0x58 + AX); 5307 if (!I32) 5308 cdb.gen1(0x58 + DX); 5309 stackpush -= FLOATSIZE; 5310 retregs = FLOATREGS; 5311 } 5312 cdb.genadjesp(stackpush - stackpushsave); 5313 fixresult(cdb,e,retregs,pretregs); 5314 return; 5315 } 5316 } 5317 if (tyxmmreg(tyml)) 5318 { 5319 xmmpost(cdb,e,pretregs); 5320 return; 5321 } 5322 5323 assert(e2.Eoper == OPconst); 5324 uint isbyte = (sz == 1); 5325 regm_t possregs = isbyte ? BYTEREGS : allregs; 5326 getlvalue(cdb,&cs,e.EV.E1,0); 5327 freenode(e.EV.E1); 5328 regm_t idxregs = idxregm(&cs); // mask of index regs used 5329 if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 && 5330 (!I16 || (idxregs & (mBX | mSI | mDI | mBP)))) 5331 { 5332 // Generate: 5333 // TEST reg,reg 5334 // LEA reg,n[reg] // don't affect flags 5335 reg_t reg = cs.Irm & 7; 5336 if (cs.Irex & REX_B) 5337 reg |= 8; 5338 cs.Iop = 0x85 ^ isbyte; 5339 code_newreg(&cs, reg); 5340 cs.Iflags |= CFpsw; 5341 cdb.gen(&cs); // TEST reg,reg 5342 5343 // If lvalue is a register variable, we must mark it as modified 5344 modEA(cdb,&cs); 5345 5346 auto n = e2.EV.Vint; 5347 if (op == OPpostdec) 5348 n = -n; 5349 int rm = reg; 5350 if (I16) 5351 { 5352 static immutable byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; // copied from cod1.c 5353 rm = regtorm[reg]; 5354 } 5355 cdb.genc1(LEA,(rex << 16) | buildModregrm(2,reg,rm),FLconst,n); // LEA reg,n[reg] 5356 return; 5357 } 5358 else if (sz <= REGSIZE || tyfv(tyml)) 5359 { 5360 code cs2 = void; 5361 5362 cs.Iop = 0x8B ^ isbyte; 5363 regm_t retregs = possregs & ~idxregs & *pretregs; 5364 if (!tyfv(tyml)) 5365 { 5366 if (retregs == 0) 5367 retregs = possregs & ~idxregs; 5368 } 5369 else /* tyfv(tyml) */ 5370 { 5371 if ((retregs &= mLSW) == 0) 5372 retregs = mLSW & ~idxregs; 5373 /* Can't use LES if the EA uses ES as a seg override */ 5374 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 5375 { cs.Iop = 0xC4; /* LES */ 5376 getregs(cdb,mES); // allocate ES 5377 } 5378 } 5379 reg_t reg; 5380 allocreg(cdb,&retregs,®,TYint); 5381 code_newreg(&cs, reg); 5382 if (sz == 1 && I64 && reg >= 4) 5383 cs.Irex |= REX; 5384 cdb.gen(&cs); // MOV reg,EA 5385 cs2 = cs; 5386 5387 /* If lvalue is a register variable, we must mark it as modified */ 5388 modEA(cdb,&cs); 5389 5390 cs.Iop = 0x81 ^ isbyte; 5391 cs.Irm &= ~cast(int)modregrm(0,7,0); // reg field = 0 5392 cs.Irex &= ~REX_R; 5393 if (op == OPpostdec) 5394 cs.Irm |= modregrm(0,5,0); /* SUB */ 5395 cs.IFL2 = FLconst; 5396 targ_int n = e2.EV.Vint; 5397 cs.IEV2.Vint = n; 5398 if (n == 1) /* can use INC or DEC */ 5399 { 5400 cs.Iop |= 0xFE; /* xFE is dec byte, xFF is word */ 5401 if (op == OPpostdec) 5402 NEWREG(cs.Irm,1); // DEC EA 5403 else 5404 NEWREG(cs.Irm,0); // INC EA 5405 } 5406 else if (n == -1) // can use INC or DEC 5407 { 5408 cs.Iop |= 0xFE; // xFE is dec byte, xFF is word 5409 if (op == OPpostinc) 5410 NEWREG(cs.Irm,1); // DEC EA 5411 else 5412 NEWREG(cs.Irm,0); // INC EA 5413 } 5414 5415 // For scheduling purposes, we wish to replace: 5416 // MOV reg,EA 5417 // OP EA 5418 // with: 5419 // MOV reg,EA 5420 // OP reg 5421 // MOV EA,reg 5422 // ~OP reg 5423 if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 5424 config.target_cpu >= TARGET_Pentium && 5425 config.flags4 & CFG4speed) 5426 { 5427 // Replace EA in cs with reg 5428 cs.Irm = (cs.Irm & ~cast(int)modregrm(3,0,7)) | modregrm(3,0,reg & 7); 5429 if (reg & 8) 5430 { cs.Irex &= ~REX_R; 5431 cs.Irex |= REX_B; 5432 } 5433 else 5434 cs.Irex &= ~REX_B; 5435 if (I64 && sz == 1 && reg >= 4) 5436 cs.Irex |= REX; 5437 cdb.gen(&cs); // ADD/SUB reg,const 5438 5439 // Reverse MOV direction 5440 cs2.Iop ^= 2; 5441 cdb.gen(&cs2); // MOV EA,reg 5442 5443 // Toggle INC <. DEC, ADD <. SUB 5444 cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0); 5445 cdb.gen(&cs); 5446 5447 if (*pretregs & mPSW) 5448 { *pretregs &= ~mPSW; // flags already set 5449 code_orflag(cdb.last(),CFpsw); 5450 } 5451 } 5452 else 5453 cdb.gen(&cs); // ADD/SUB EA,const 5454 5455 freenode(e2); 5456 if (tyfv(tyml)) 5457 { 5458 reg_t preg; 5459 5460 getlvalue_msw(&cs); 5461 if (*pretregs & mES) 5462 { 5463 preg = ES; 5464 /* ES is already loaded if CFes is 0 */ 5465 cs.Iop = ((cs.Iflags & CFSEG) == CFes) ? 0x8E : NOP; 5466 NEWREG(cs.Irm,0); /* MOV ES,EA+2 */ 5467 } 5468 else 5469 { 5470 regm_t retregsx = *pretregs & mMSW; 5471 if (!retregsx) 5472 retregsx = mMSW; 5473 allocreg(cdb,&retregsx,&preg,TYint); 5474 cs.Iop = 0x8B; 5475 if (I32) 5476 cs.Iflags |= CFopsize; 5477 NEWREG(cs.Irm,preg); /* MOV preg,EA+2 */ 5478 } 5479 getregs(cdb,mask(preg)); 5480 cdb.gen(&cs); 5481 retregs = mask(reg) | mask(preg); 5482 } 5483 fixresult(cdb,e,retregs,pretregs); 5484 return; 5485 } 5486 else if (tyml == TYhptr) 5487 { 5488 uint rvalue; 5489 reg_t lreg; 5490 reg_t rtmp; 5491 regm_t mtmp; 5492 5493 rvalue = e2.EV.Vlong; 5494 freenode(e2); 5495 5496 // If h--, convert to h++ 5497 if (e.Eoper == OPpostdec) 5498 rvalue = -rvalue; 5499 5500 regm_t retregs = mLSW & ~idxregs & *pretregs; 5501 if (!retregs) 5502 retregs = mLSW & ~idxregs; 5503 allocreg(cdb,&retregs,&lreg,TYint); 5504 5505 // Can't use LES if the EA uses ES as a seg override 5506 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 5507 { cs.Iop = 0xC4; 5508 retregs |= mES; 5509 getregs(cdb,mES|mCX); // allocate ES 5510 cs.Irm |= modregrm(0,lreg,0); 5511 cdb.gen(&cs); // LES lreg,EA 5512 } 5513 else 5514 { cs.Iop = 0x8B; 5515 retregs |= mDX; 5516 getregs(cdb,mDX|mCX); 5517 cs.Irm |= modregrm(0,lreg,0); 5518 cdb.gen(&cs); // MOV lreg,EA 5519 NEWREG(cs.Irm,DX); 5520 getlvalue_msw(&cs); 5521 cdb.gen(&cs); // MOV DX,EA+2 5522 getlvalue_lsw(&cs); 5523 } 5524 5525 // Allocate temporary register, rtmp 5526 mtmp = ALLREGS & ~mCX & ~idxregs & ~retregs; 5527 allocreg(cdb,&mtmp,&rtmp,TYint); 5528 5529 movregconst(cdb,rtmp,rvalue >> 16,0); // MOV rtmp,e2+2 5530 getregs(cdb,mtmp); 5531 cs.Iop = 0x81; 5532 NEWREG(cs.Irm,0); 5533 cs.IFL2 = FLconst; 5534 cs.IEV2.Vint = rvalue; 5535 cdb.gen(&cs); // ADD EA,e2 5536 code_orflag(cdb.last(),CFpsw); 5537 cdb.genc2(0x81,modregrm(3,2,rtmp),0); // ADC rtmp,0 5538 genshift(cdb); // MOV CX,offset __AHSHIFT 5539 cdb.gen2(0xD3,modregrm(3,4,rtmp)); // SHL rtmp,CL 5540 cs.Iop = 0x01; 5541 NEWREG(cs.Irm,rtmp); // ADD EA+2,rtmp 5542 getlvalue_msw(&cs); 5543 cdb.gen(&cs); 5544 fixresult(cdb,e,retregs,pretregs); 5545 return; 5546 } 5547 else if (sz == 2 * REGSIZE) 5548 { 5549 regm_t retregs = allregs & ~idxregs & *pretregs; 5550 if ((retregs & mLSW) == 0) 5551 retregs |= mLSW & ~idxregs; 5552 if ((retregs & mMSW) == 0) 5553 retregs |= ALLREGS & mMSW; 5554 assert(retregs & mMSW && retregs & mLSW); 5555 reg_t reg; 5556 allocreg(cdb,&retregs,®,tyml); 5557 uint sreg = findreglsw(retregs); 5558 cs.Iop = 0x8B; 5559 cs.Irm |= modregrm(0,sreg,0); 5560 cdb.gen(&cs); // MOV sreg,EA 5561 NEWREG(cs.Irm,reg); 5562 getlvalue_msw(&cs); 5563 cdb.gen(&cs); // MOV reg,EA+2 5564 cs.Iop = 0x81; 5565 cs.Irm &= ~cast(int)modregrm(0,7,0); /* reg field = 0 for ADD */ 5566 if (op == OPpostdec) 5567 cs.Irm |= modregrm(0,5,0); /* SUB */ 5568 getlvalue_lsw(&cs); 5569 cs.IFL2 = FLconst; 5570 cs.IEV2.Vlong = e2.EV.Vlong; 5571 cdb.gen(&cs); // ADD/SUB EA,const 5572 code_orflag(cdb.last(),CFpsw); 5573 getlvalue_msw(&cs); 5574 cs.IEV2.Vlong = 0; 5575 if (op == OPpostinc) 5576 cs.Irm ^= modregrm(0,2,0); /* ADC */ 5577 else 5578 cs.Irm ^= modregrm(0,6,0); /* SBB */ 5579 cs.IEV2.Vlong = cast(targ_long)(e2.EV.Vullong >> (REGSIZE * 8)); 5580 cdb.gen(&cs); // ADC/SBB EA,0 5581 freenode(e2); 5582 fixresult(cdb,e,retregs,pretregs); 5583 return; 5584 } 5585 else 5586 { 5587 assert(0); 5588 } 5589 } 5590 5591 5592 void cderr(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5593 { 5594 debug 5595 elem_print(e); 5596 5597 //printf("op = %d, %d\n", e.Eoper, OPstring); 5598 //printf("string = %p, len = %d\n", e.EV.ss.Vstring, e.EV.ss.Vstrlen); 5599 //printf("string = '%.*s'\n", cast(int)e.EV.ss.Vstrlen, e.EV.ss.Vstring); 5600 assert(0); 5601 } 5602 5603 @trusted 5604 void cdinfo(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5605 { 5606 switch (e.EV.E1.Eoper) 5607 { 5608 case OPdctor: 5609 codelem(cdb,e.EV.E2,pretregs,false); 5610 regm_t retregs = 0; 5611 codelem(cdb,e.EV.E1,&retregs,false); 5612 break; 5613 default: 5614 assert(0); 5615 } 5616 } 5617 5618 /******************************************* 5619 * D constructor. 5620 * OPdctor 5621 */ 5622 5623 @trusted 5624 void cddctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5625 { 5626 /* Generate: 5627 ESCAPE | ESCdctor 5628 MOV sindex[BP],index 5629 */ 5630 usednteh |= EHcleanup; 5631 if (config.ehmethod == EHmethod.EH_WIN32) 5632 { usednteh |= NTEHcleanup | NTEH_try; 5633 nteh_usevars(); 5634 } 5635 assert(*pretregs == 0); 5636 code cs; 5637 cs.Iop = ESCAPE | ESCdctor; // mark start of EH range 5638 cs.Iflags = 0; 5639 cs.Irex = 0; 5640 cs.IFL1 = FLctor; 5641 cs.IEV1.Vtor = e; 5642 cdb.gen(&cs); 5643 nteh_gensindex(cdb,0); // the actual index will be patched in later 5644 // by except_fillInEHTable() 5645 } 5646 5647 /******************************************* 5648 * D destructor. 5649 * OPddtor 5650 */ 5651 5652 @trusted 5653 void cdddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5654 { 5655 if (config.ehmethod == EHmethod.EH_DWARF) 5656 { 5657 usednteh |= EHcleanup; 5658 5659 code cs; 5660 cs.Iop = ESCAPE | ESCddtor; // mark end of EH range and where landing pad is 5661 cs.Iflags = 0; 5662 cs.Irex = 0; 5663 cs.IFL1 = FLdtor; 5664 cs.IEV1.Vtor = e; 5665 cdb.gen(&cs); 5666 5667 // Mark all registers as destroyed 5668 getregsNoSave(allregs); 5669 5670 assert(*pretregs == 0); 5671 codelem(cdb,e.EV.E1,pretregs,false); 5672 return; 5673 } 5674 else 5675 { 5676 /* Generate: 5677 ESCAPE | ESCddtor 5678 MOV sindex[BP],index 5679 CALL dtor 5680 JMP L1 5681 Ldtor: 5682 ... e.EV.E1 ... 5683 RET 5684 L1: NOP 5685 */ 5686 usednteh |= EHcleanup; 5687 if (config.ehmethod == EHmethod.EH_WIN32) 5688 { usednteh |= NTEHcleanup | NTEH_try; 5689 nteh_usevars(); 5690 } 5691 5692 code cs; 5693 cs.Iop = ESCAPE | ESCddtor; 5694 cs.Iflags = 0; 5695 cs.Irex = 0; 5696 cs.IFL1 = FLdtor; 5697 cs.IEV1.Vtor = e; 5698 cdb.gen(&cs); 5699 5700 nteh_gensindex(cdb,0); // the actual index will be patched in later 5701 // by except_fillInEHTable() 5702 5703 // Mark all registers as destroyed 5704 getregsNoSave(allregs); 5705 5706 assert(*pretregs == 0); 5707 CodeBuilder cdbx; 5708 cdbx.ctor(); 5709 codelem(cdbx,e.EV.E1,pretregs,false); 5710 cdbx.gen1(0xC3); // RET 5711 code *c = cdbx.finish(); 5712 5713 int nalign = 0; 5714 if (STACKALIGN >= 16) 5715 { 5716 nalign = STACKALIGN - REGSIZE; 5717 cod3_stackadj(cdb, nalign); 5718 } 5719 calledafunc = 1; 5720 genjmp(cdb,0xE8,FLcode,cast(block *)c); // CALL Ldtor 5721 if (nalign) 5722 cod3_stackadj(cdb, -nalign); 5723 5724 code *cnop = gennop(null); 5725 5726 genjmp(cdb,JMP,FLcode,cast(block *)cnop); 5727 cdb.append(cdbx); 5728 cdb.append(cnop); 5729 return; 5730 } 5731 } 5732 5733 5734 /******************************************* 5735 * C++ constructor. 5736 */ 5737 5738 @trusted 5739 void cdctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5740 { 5741 } 5742 5743 /****** 5744 * OPdtor 5745 */ 5746 void cddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5747 { 5748 } 5749 5750 void cdmark(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5751 { 5752 } 5753 5754 static if (!NTEXCEPTIONS) 5755 { 5756 void cdsetjmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5757 { 5758 assert(0); 5759 } 5760 } 5761 5762 /***************************************** 5763 */ 5764 5765 @trusted 5766 void cdvoid(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5767 { 5768 assert(*pretregs == 0); 5769 codelem(cdb,e.EV.E1,pretregs,false); 5770 } 5771 5772 /***************************************** 5773 */ 5774 5775 @trusted 5776 void cdhalt(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5777 { 5778 assert(*pretregs == 0); 5779 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); 5780 }