1 /** 2 * Code generation 4 3 * 4 * Includes: 5 * - assignemt variations of operators (+= -= *= /= %= <<= >>=) 6 * - integer comparison (< > <= >=) 7 * - converting integers to a different size (e.g. short to int) 8 * - bit instructions (bit scan, population count) 9 * 10 * Compiler implementation of the 11 * $(LINK2 https://www.dlang.org, D programming language). 12 * 13 * Mostly code generation for assignment operators. 14 * 15 * Copyright: Copyright (C) 1985-1998 by Symantec 16 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 17 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 18 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 19 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d) 20 * Documentation: https://dlang.org/phobos/dmd_backend_cod4.html 21 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod4.d 22 */ 23 24 module dmd.backend.cod4; 25 26 version (SCPP) 27 version = COMPILE; 28 version (MARS) 29 version = COMPILE; 30 31 version (COMPILE) 32 { 33 34 import core.stdc.stdio; 35 import core.stdc.stdlib; 36 import core.stdc.string; 37 38 import dmd.backend.cc; 39 import dmd.backend.cdef; 40 import dmd.backend.code; 41 import dmd.backend.code_x86; 42 import dmd.backend.codebuilder; 43 import dmd.backend.mem; 44 import dmd.backend.el; 45 import dmd.backend.global; 46 import dmd.backend.oper; 47 import dmd.backend.ty; 48 import dmd.backend.evalu8 : el_toldoubled; 49 import dmd.backend.xmm; 50 51 extern (C++): 52 53 nothrow: 54 @safe: 55 56 extern __gshared CGstate cgstate; 57 extern __gshared bool[FLMAX] datafl; 58 59 private extern (D) uint mask(uint m) { return 1 << m; } 60 61 /* AX,CX,DX,BX */ 62 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; 63 64 // from divcoeff.c 65 extern (C) 66 { 67 bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost); 68 bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost); 69 } 70 71 /******************************* 72 * Return number of times symbol s appears in tree e. 73 */ 74 75 @trusted 76 private int intree(Symbol *s,elem *e) 77 { 78 if (!OTleaf(e.Eoper)) 79 return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0); 80 return e.Eoper == OPvar && e.EV.Vsym == s; 81 } 82 83 /*********************************** 84 * Determine if expression e can be evaluated directly into register 85 * variable s. 86 * Have to be careful about things like x=x+x+x, and x=a+x. 87 * Returns: 88 * !=0 can 89 * 0 can't 90 */ 91 92 @trusted 93 int doinreg(Symbol *s, elem *e) 94 { 95 int in_ = 0; 96 OPER op; 97 98 L1: 99 op = e.Eoper; 100 if (op == OPind || 101 OTcall(op) || 102 OTleaf(op) || 103 (in_ = intree(s,e)) == 0 || 104 (OTunary(op) && OTleaf(e.EV.E1.Eoper)) 105 ) 106 return 1; 107 if (in_ == 1) 108 { 109 switch (op) 110 { 111 case OPadd: 112 case OPmin: 113 case OPand: 114 case OPor: 115 case OPxor: 116 case OPshl: 117 case OPmul: 118 if (!intree(s,e.EV.E2)) 119 { 120 e = e.EV.E1; 121 goto L1; 122 } 123 break; 124 125 default: 126 break; 127 } 128 } 129 return 0; 130 } 131 132 /**************************** 133 * Return code for saving common subexpressions if EA 134 * turns out to be a register. 135 * This is called just before modifying an EA. 136 */ 137 138 void modEA(ref CodeBuilder cdb,code *c) 139 { 140 if ((c.Irm & 0xC0) == 0xC0) // addressing mode refers to a register 141 { 142 reg_t reg = c.Irm & 7; 143 if (c.Irex & REX_B) 144 { reg |= 8; 145 assert(I64); 146 } 147 getregs(cdb,mask(reg)); 148 } 149 } 150 151 152 /**************************** 153 * Gen code for op= for doubles. 154 */ 155 @trusted 156 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op) 157 { 158 assert(config.exe & EX_windos); // for targets that may not have an 8087 159 160 static immutable uint[OPdivass - OPpostinc + 1] clibtab = 161 /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass */ 162 [ CLIB.dadd, CLIB.dsub, cast(uint)-1, CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ]; 163 164 if (config.inline8087) 165 { 166 opass87(cdb,e,pretregs); 167 return; 168 } 169 170 code cs; 171 regm_t retregs2,retregs,idxregs; 172 173 uint clib = clibtab[op - OPpostinc]; 174 elem *e1 = e.EV.E1; 175 tym_t tym = tybasic(e1.Ety); 176 getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX); 177 178 if (tym == TYfloat) 179 { 180 clib += CLIB.fadd - CLIB.dadd; /* convert to float operation */ 181 182 // Load EA into FLOATREGS 183 getregs(cdb,FLOATREGS); 184 cs.Iop = LOD; 185 cs.Irm |= modregrm(0,AX,0); 186 cdb.gen(&cs); 187 188 if (!I32) 189 { 190 cs.Irm |= modregrm(0,DX,0); 191 getlvalue_msw(&cs); 192 cdb.gen(&cs); 193 getlvalue_lsw(&cs); 194 195 } 196 retregs2 = FLOATREGS2; 197 idxregs = FLOATREGS | idxregm(&cs); 198 retregs = FLOATREGS; 199 } 200 else 201 { 202 if (I32) 203 { 204 // Load EA into DOUBLEREGS 205 getregs(cdb,DOUBLEREGS_32); 206 cs.Iop = LOD; 207 cs.Irm |= modregrm(0,AX,0); 208 cdb.gen(&cs); 209 cs.Irm |= modregrm(0,DX,0); 210 getlvalue_msw(&cs); 211 cdb.gen(&cs); 212 getlvalue_lsw(&cs); 213 214 retregs2 = DOUBLEREGS2_32; 215 idxregs = DOUBLEREGS_32 | idxregm(&cs); 216 } 217 else 218 { 219 // Push EA onto stack 220 cs.Iop = 0xFF; 221 cs.Irm |= modregrm(0,6,0); 222 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 223 cdb.gen(&cs); 224 getlvalue_lsw(&cs); 225 cdb.gen(&cs); 226 getlvalue_lsw(&cs); 227 cdb.gen(&cs); 228 getlvalue_lsw(&cs); 229 cdb.gen(&cs); 230 stackpush += DOUBLESIZE; 231 232 retregs2 = DOUBLEREGS_16; 233 idxregs = idxregm(&cs); 234 } 235 retregs = DOUBLEREGS; 236 } 237 238 if ((cs.Iflags & CFSEG) == CFes) 239 idxregs |= mES; 240 cgstate.stackclean++; 241 scodelem(cdb,e.EV.E2,&retregs2,idxregs,false); 242 cgstate.stackclean--; 243 callclib(cdb,e,clib,&retregs,0); 244 if (e1.Ecount) 245 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 246 freenode(e1); 247 cs.Iop = STO; // MOV EA,DOUBLEREGS 248 fltregs(cdb,&cs,tym); 249 fixresult(cdb,e,retregs,pretregs); 250 } 251 252 /**************************** 253 * Gen code for OPnegass for doubles. 254 */ 255 256 @trusted 257 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 258 { 259 assert(config.exe & EX_windos); // for targets that may not have an 8087 260 261 if (config.inline8087) 262 { 263 cdnegass87(cdb,e,pretregs); 264 return; 265 } 266 elem *e1 = e.EV.E1; 267 tym_t tym = tybasic(e1.Ety); 268 int sz = _tysize[tym]; 269 code cs; 270 271 getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0); 272 modEA(cdb,&cs); 273 cs.Irm |= modregrm(0,6,0); 274 cs.Iop = 0x80; 275 cs.IEV1.Voffset += sz - 1; 276 cs.IFL2 = FLconst; 277 cs.IEV2.Vuns = 0x80; 278 cdb.gen(&cs); // XOR 7[EA],0x80 279 if (tycomplex(tym)) 280 { 281 cs.IEV1.Voffset -= sz / 2; 282 cdb.gen(&cs); // XOR 7[EA],0x80 283 } 284 285 regm_t retregs; 286 if (*pretregs || e1.Ecount) 287 { 288 cs.IEV1.Voffset -= sz - 1; 289 290 if (tym == TYfloat) 291 { 292 // Load EA into FLOATREGS 293 getregs(cdb,FLOATREGS); 294 cs.Iop = LOD; 295 NEWREG(cs.Irm, AX); 296 cdb.gen(&cs); 297 298 if (!I32) 299 { 300 NEWREG(cs.Irm, DX); 301 getlvalue_msw(&cs); 302 cdb.gen(&cs); 303 getlvalue_lsw(&cs); 304 305 } 306 retregs = FLOATREGS; 307 } 308 else 309 { 310 if (I32) 311 { 312 // Load EA into DOUBLEREGS 313 getregs(cdb,DOUBLEREGS_32); 314 cs.Iop = LOD; 315 cs.Irm &= ~cast(uint)modregrm(0,7,0); 316 cs.Irm |= modregrm(0,AX,0); 317 cdb.gen(&cs); 318 cs.Irm |= modregrm(0,DX,0); 319 getlvalue_msw(&cs); 320 cdb.gen(&cs); 321 getlvalue_lsw(&cs); 322 } 323 else 324 { 325 static if (1) 326 { 327 cs.Iop = LOD; 328 fltregs(cdb,&cs,TYdouble); // MOV DOUBLEREGS, EA 329 } 330 else 331 { 332 // Push EA onto stack 333 cs.Iop = 0xFF; 334 cs.Irm |= modregrm(0,6,0); 335 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 336 cdb.gen(&cs); 337 cs.IEV1.Voffset -= REGSIZE; 338 cdb.gen(&cs); 339 cs.IEV1.Voffset -= REGSIZE; 340 cdb.gen(&cs); 341 cs.IEV1.Voffset -= REGSIZE; 342 cdb.gen(&cs); 343 stackpush += DOUBLESIZE; 344 } 345 } 346 retregs = DOUBLEREGS; 347 } 348 if (e1.Ecount) 349 cssave(e1,retregs,!OTleaf(e1.Eoper)); /* if lvalue is a CSE */ 350 } 351 else 352 { 353 retregs = 0; 354 assert(e1.Ecount == 0); 355 } 356 357 freenode(e1); 358 fixresult(cdb,e,retregs,pretregs); 359 } 360 361 362 363 /************************ 364 * Generate code for an assignment. 365 */ 366 367 @trusted 368 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 369 { 370 tym_t tymll; 371 reg_t reg; 372 code cs; 373 elem *e11; 374 bool regvar; // true means evaluate into register variable 375 regm_t varregm; 376 reg_t varreg; 377 targ_int postinc; 378 379 //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 380 elem *e1 = e.EV.E1; 381 elem *e2 = e.EV.E2; 382 int e2oper = e2.Eoper; 383 tym_t tyml = tybasic(e1.Ety); // type of lvalue 384 regm_t retregs = *pretregs; 385 386 if (tyxmmreg(tyml) && config.fpxmmregs) 387 { 388 xmmeq(cdb, e, CMP, e1, e2, pretregs); 389 return; 390 } 391 392 if (tyfloating(tyml) && config.inline8087) 393 { 394 if (tycomplex(tyml)) 395 { 396 complex_eq87(cdb, e, pretregs); 397 return; 398 } 399 400 if (!(retregs == 0 && 401 (e2oper == OPconst || e2oper == OPvar || e2oper == OPind)) 402 ) 403 { 404 eq87(cdb,e,pretregs); 405 return; 406 } 407 if (config.target_cpu >= TARGET_PentiumPro && 408 (e2oper == OPvar || e2oper == OPind) 409 ) 410 { 411 eq87(cdb,e,pretregs); 412 return; 413 } 414 if (tyml == TYldouble || tyml == TYildouble) 415 { 416 eq87(cdb,e,pretregs); 417 return; 418 } 419 } 420 421 uint sz = _tysize[tyml]; // # of bytes to transfer 422 assert(cast(int)sz > 0); 423 424 if (retregs == 0) // if no return value 425 { 426 int fl; 427 428 /* If registers are tight, and we might need them for the lvalue, 429 * prefer to not use them for the rvalue 430 */ 431 bool plenty = true; 432 if (e1.Eoper == OPind) 433 { 434 /* Will need 1 register for evaluation, +2 registers for 435 * e1's addressing mode 436 */ 437 regm_t m = allregs & ~regcon.mvar; // mask of non-register variables 438 m &= m - 1; // clear least significant bit 439 m &= m - 1; // clear least significant bit 440 plenty = m != 0; // at least 3 registers 441 } 442 443 if ((e2oper == OPconst || // if rvalue is a constant 444 e2oper == OPrelconst && 445 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 446 ((fl = el_fl(e2)) == FLdata || 447 fl==FLudata || fl == FLextern) 448 && !(e2.EV.Vsym.ty() & mTYcs) 449 ) && 450 !(evalinregister(e2) && plenty) && 451 !e1.Ecount) // and no CSE headaches 452 { 453 // Look for special case of (*p++ = ...), where p is a register variable 454 if (e1.Eoper == OPind && 455 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 456 e11.EV.E1.Eoper == OPvar && 457 e11.EV.E1.EV.Vsym.Sfl == FLreg && 458 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 459 ) 460 { 461 Symbol *s = e11.EV.E1.EV.Vsym; 462 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 463 { 464 regcon.params &= ~s.Spregm(); 465 } 466 postinc = e11.EV.E2.EV.Vint; 467 if (e11.Eoper == OPpostdec) 468 postinc = -postinc; 469 getlvalue(cdb,&cs,e1,RMstore); 470 freenode(e11.EV.E2); 471 } 472 else 473 { 474 postinc = 0; 475 getlvalue(cdb,&cs,e1,RMstore); 476 477 if (e2oper == OPconst && 478 config.flags4 & CFG4speed && 479 (config.target_cpu == TARGET_Pentium || 480 config.target_cpu == TARGET_PentiumMMX) && 481 (cs.Irm & 0xC0) == 0x80 482 ) 483 { 484 if (I64 && sz == 8 && e2.EV.Vpointer) 485 { 486 // MOV reg,imm64 487 // MOV EA,reg 488 regm_t rregm = allregs & ~idxregm(&cs); 489 reg_t regx; 490 regwithvalue(cdb,rregm,e2.EV.Vpointer,®x,64); 491 cs.Iop = STO; 492 cs.Irm |= modregrm(0,regx & 7,0); 493 if (regx & 8) 494 cs.Irex |= REX_R; 495 cdb.gen(&cs); 496 freenode(e2); 497 goto Lp; 498 } 499 if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint) 500 { 501 // MOV reg,imm 502 // MOV EA,reg 503 regm_t rregm = allregs & ~idxregm(&cs); 504 reg_t regx; 505 regwithvalue(cdb,rregm,e2.EV.Vint,®x,0); 506 cs.Iop = STO; 507 cs.Irm |= modregrm(0,regx & 7,0); 508 if (regx & 8) 509 cs.Irex |= REX_R; 510 cdb.gen(&cs); 511 freenode(e2); 512 goto Lp; 513 } 514 if (sz == 2 * REGSIZE && e2.EV.Vllong == 0) 515 { 516 // MOV reg,imm 517 // MOV EA,reg 518 // MOV EA+2,reg 519 regm_t rregm = getscratch() & ~idxregm(&cs); 520 if (rregm) 521 { 522 reg_t regx; 523 regwithvalue(cdb,rregm,e2.EV.Vint,®x,0); 524 cs.Iop = STO; 525 cs.Irm |= modregrm(0,regx,0); 526 cdb.gen(&cs); 527 getlvalue_msw(&cs); 528 cdb.gen(&cs); 529 freenode(e2); 530 goto Lp; 531 } 532 } 533 } 534 } 535 536 // If loading result into a register 537 if ((cs.Irm & 0xC0) == 0xC0) 538 { 539 modEA(cdb,&cs); 540 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg) 541 getregs(cdb,cs.IEV1.Vsym.Sregm); 542 } 543 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 544 545 if (e2oper == OPrelconst) 546 { 547 cs.IEV2.Voffset = e2.EV.Voffset; 548 cs.IFL2 = cast(ubyte)fl; 549 cs.IEV2.Vsym = e2.EV.Vsym; 550 cs.Iflags |= CFoff; 551 cdb.gen(&cs); // MOV EA,&variable 552 if (I64 && sz == 8) 553 code_orrex(cdb.last(), REX_W); 554 if (sz > REGSIZE) 555 { 556 cs.Iop = 0x8C; 557 getlvalue_msw(&cs); 558 cs.Irm |= modregrm(0,3,0); 559 cdb.gen(&cs); // MOV EA+2,DS 560 } 561 } 562 else 563 { 564 assert(e2oper == OPconst); 565 cs.IFL2 = FLconst; 566 targ_size_t *p = cast(targ_size_t *) &(e2.EV); 567 cs.IEV2.Vsize_t = *p; 568 // Look for loading a register variable 569 if ((cs.Irm & 0xC0) == 0xC0) 570 { 571 reg_t regx = cs.Irm & 7; 572 573 if (cs.Irex & REX_B) 574 regx |= 8; 575 if (I64 && sz == 8) 576 movregconst(cdb,regx,*p,64); 577 else 578 movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1)); 579 if (sz == 2 * REGSIZE) 580 { getlvalue_msw(&cs); 581 if (REGSIZE == 2) 582 movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0); 583 else if (REGSIZE == 4) 584 movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0); 585 else if (REGSIZE == 8) 586 movregconst(cdb,cs.Irm & 7,p[1],0); 587 else 588 assert(0); 589 } 590 } 591 else if (I64 && sz == 8 && *p >= 0x80000000) 592 { // Use 64 bit MOV, as the 32 bit one gets sign extended 593 // MOV reg,imm64 594 // MOV EA,reg 595 regm_t rregm = allregs & ~idxregm(&cs); 596 reg_t regx; 597 regwithvalue(cdb,rregm,*p,®x,64); 598 cs.Iop = STO; 599 cs.Irm |= modregrm(0,regx & 7,0); 600 if (regx & 8) 601 cs.Irex |= REX_R; 602 cdb.gen(&cs); 603 } 604 else 605 { 606 int off = sz; 607 do 608 { int regsize = REGSIZE; 609 if (off >= 4 && I16 && config.target_cpu >= TARGET_80386) 610 { 611 regsize = 4; 612 cs.Iflags |= CFopsize; // use opsize to do 32 bit operation 613 } 614 else if (I64 && sz == 16 && *p >= 0x80000000) 615 { 616 regm_t rregm = allregs & ~idxregm(&cs); 617 reg_t regx; 618 regwithvalue(cdb,rregm,*p,®x,64); 619 cs.Iop = STO; 620 cs.Irm |= modregrm(0,regx & 7,0); 621 if (regx & 8) 622 cs.Irex |= REX_R; 623 } 624 else 625 { 626 regm_t retregsx = (sz == 1) ? BYTEREGS : allregs; 627 reg_t regx; 628 if (reghasvalue(retregsx,*p,®x)) 629 { 630 cs.Iop = (cs.Iop & 1) | 0x88; 631 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx 632 if (regx & 8) 633 cs.Irex |= REX_R; 634 if (I64 && sz == 1 && regx >= 4) 635 cs.Irex |= REX; 636 } 637 if (!I16 && off == 2) // if 16 bit operand 638 cs.Iflags |= CFopsize; 639 if (I64 && sz == 8) 640 cs.Irex |= REX_W; 641 } 642 cdb.gen(&cs); // MOV EA,const 643 644 p = cast(targ_size_t *)(cast(char *) p + regsize); 645 cs.Iop = (cs.Iop & 1) | 0xC6; 646 cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0); 647 cs.Irex &= ~REX_R; 648 cs.IEV1.Voffset += regsize; 649 cs.IEV2.Vint = cast(int)*p; 650 off -= regsize; 651 } while (off > 0); 652 } 653 } 654 freenode(e2); 655 goto Lp; 656 } 657 retregs = allregs; // pick a reg, any reg 658 if (sz == 2 * REGSIZE) 659 retregs &= ~mBP; // BP cannot be used for register pair 660 } 661 if (retregs == mPSW) 662 { 663 retregs = allregs; 664 if (sz == 2 * REGSIZE) 665 retregs &= ~mBP; // BP cannot be used for register pair 666 } 667 cs.Iop = STO; 668 if (sz == 1) // must have byte regs 669 { 670 cs.Iop = 0x88; 671 retregs &= BYTEREGS; 672 if (!retregs) 673 retregs = BYTEREGS; 674 } 675 else if (retregs & mES && 676 ( 677 (e1.Eoper == OPind && 678 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) || 679 (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata) 680 ) 681 ) 682 // getlvalue() needs ES, so we can't return it 683 retregs = allregs; // no conflicts with ES 684 else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0) 685 retregs = DOUBLEREGS; 686 687 regvar = false; 688 varregm = 0; 689 if (config.flags4 & CFG4optimized) 690 { 691 // Be careful of cases like (x = x+x+x). We cannot evaluate in 692 // x if x is in a register. 693 if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable 694 doinreg(e1.EV.Vsym,e2) && // and we can compute directly into it 695 !(sz == 1 && e1.EV.Voffset == 1) 696 ) 697 { 698 if (varregm & XMMREGS) 699 { 700 // Could be an integer vector in the XMMREGS 701 xmmeq(cdb, e, CMP, e1, e2, pretregs); 702 return; 703 } 704 regvar = true; 705 retregs = varregm; 706 reg = varreg; // evaluate directly in target register 707 if (tysize(e1.Ety) == REGSIZE && 708 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE) 709 { 710 if (e1.EV.Voffset) 711 retregs &= mMSW; 712 else 713 retregs &= mLSW; 714 reg = findreg(retregs); 715 } 716 } 717 } 718 if (*pretregs & mPSW && OTleaf(e1.Eoper)) // if evaluating e1 couldn't change flags 719 { // Be careful that this lines up with jmpopcode() 720 retregs |= mPSW; 721 *pretregs &= ~mPSW; 722 } 723 scodelem(cdb,e2,&retregs,0,true); // get rvalue 724 725 // Look for special case of (*p++ = ...), where p is a register variable 726 if (e1.Eoper == OPind && 727 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 728 e11.EV.E1.Eoper == OPvar && 729 e11.EV.E1.EV.Vsym.Sfl == FLreg && 730 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 731 ) 732 { 733 Symbol *s = e11.EV.E1.EV.Vsym; 734 if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) 735 { 736 regcon.params &= ~s.Spregm(); 737 } 738 739 postinc = e11.EV.E2.EV.Vint; 740 if (e11.Eoper == OPpostdec) 741 postinc = -postinc; 742 getlvalue(cdb,&cs,e1,RMstore | retregs); 743 freenode(e11.EV.E2); 744 } 745 else 746 { 747 postinc = 0; 748 getlvalue(cdb,&cs,e1,RMstore | retregs); // get lvalue (cl == null if regvar) 749 } 750 751 getregs(cdb,varregm); 752 753 assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes)); 754 if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES) 755 { 756 reg = findreglsw(retregs); 757 cs.Irm |= modregrm(0,reg,0); 758 cdb.gen(&cs); // MOV EA,reg 759 getlvalue_msw(&cs); // point to where segment goes 760 cs.Iop = 0x8C; 761 NEWREG(cs.Irm,0); 762 cdb.gen(&cs); // MOV EA+2,ES 763 } 764 else 765 { 766 if (!I16) 767 { 768 reg = findreg(retregs & 769 ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS)); 770 cs.Irm |= modregrm(0,reg & 7,0); 771 if (reg & 8) 772 cs.Irex |= REX_R; 773 for (; true; sz -= REGSIZE) 774 { 775 // Do not generate mov from register onto itself 776 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0))) 777 break; 778 if (sz == 2) // if 16 bit operand 779 cs.Iflags |= CFopsize; 780 else if (sz == 1 && reg >= 4) 781 cs.Irex |= REX; 782 cdb.gen(&cs); // MOV EA+offset,reg 783 if (sz <= REGSIZE) 784 break; 785 getlvalue_msw(&cs); 786 reg = findregmsw(retregs); 787 code_newreg(&cs, reg); 788 } 789 } 790 else 791 { 792 if (sz > REGSIZE) 793 cs.IEV1.Voffset += sz - REGSIZE; // 0,2,6 794 reg = findreg(retregs & 795 (sz > REGSIZE ? mMSW : ALLREGS)); 796 if (tyml == TYdouble || tyml == TYdouble_alias) 797 reg = AX; 798 cs.Irm |= modregrm(0,reg,0); 799 // Do not generate mov from register onto itself 800 if (!regvar || reg != (cs.Irm & 7)) 801 for (; true; sz -= REGSIZE) // 1,2,4 802 { 803 cdb.gen(&cs); // MOV EA+offset,reg 804 if (sz <= REGSIZE) 805 break; 806 cs.IEV1.Voffset -= REGSIZE; 807 if (tyml == TYdouble || tyml == TYdouble_alias) 808 reg = dblreg[reg]; 809 else 810 reg = findreglsw(retregs); 811 NEWREG(cs.Irm,reg); 812 } 813 } 814 } 815 if (e1.Ecount || // if lvalue is a CSE or 816 regvar) // rvalue can't be a CSE 817 { 818 getregs_imm(cdb,retregs); // necessary if both lvalue and 819 // rvalue are CSEs (since a reg 820 // can hold only one e at a time) 821 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 822 } 823 824 fixresult(cdb,e,retregs,pretregs); 825 Lp: 826 if (postinc) 827 { 828 reg_t ireg = findreg(idxregm(&cs)); 829 if (*pretregs & mPSW) 830 { // Use LEA to avoid touching the flags 831 uint rm = cs.Irm & 7; 832 if (cs.Irex & REX_B) 833 rm |= 8; 834 cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc); 835 if (tysize(e11.EV.E1.Ety) == 8) 836 code_orrex(cdb.last(), REX_W); 837 } 838 else if (I64) 839 { 840 cdb.genc2(0x81,modregrmx(3,0,ireg),postinc); 841 if (tysize(e11.EV.E1.Ety) == 8) 842 code_orrex(cdb.last(), REX_W); 843 } 844 else 845 { 846 if (postinc == 1) 847 cdb.gen1(0x40 + ireg); // INC ireg 848 else if (postinc == -cast(targ_int)1) 849 cdb.gen1(0x48 + ireg); // DEC ireg 850 else 851 { 852 cdb.genc2(0x81,modregrm(3,0,ireg),postinc); 853 } 854 } 855 } 856 freenode(e1); 857 } 858 859 860 /************************ 861 * Generate code for += -= &= |= ^= negass 862 */ 863 864 @trusted 865 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 866 { 867 //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 868 OPER op = e.Eoper; 869 regm_t retregs = 0; 870 uint reverse = 0; 871 elem *e1 = e.EV.E1; 872 tym_t tyml = tybasic(e1.Ety); // type of lvalue 873 int sz = _tysize[tyml]; 874 int isbyte = (sz == 1); // 1 for byte operation, else 0 875 876 // See if evaluate in XMM registers 877 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0)) 878 { 879 xmmopass(cdb,e,pretregs); 880 return; 881 } 882 883 if (tyfloating(tyml)) 884 { 885 if (config.exe & EX_posix) 886 { 887 if (op == OPnegass) 888 cdnegass87(cdb,e,pretregs); 889 else 890 opass87(cdb,e,pretregs); 891 } 892 else 893 { 894 if (op == OPnegass) 895 opnegassdbl(cdb,e,pretregs); 896 else 897 opassdbl(cdb,e,pretregs,op); 898 } 899 return; 900 } 901 uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386) 902 ? CFopsize : 0; 903 uint cflags = 0; 904 regm_t forccs = *pretregs & mPSW; // return result in flags 905 regm_t forregs = *pretregs & ~mPSW; // return result in regs 906 // true if we want the result in a register 907 uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper)); 908 909 reg_t reg; 910 uint op1,op2,mode; 911 code cs; 912 elem *e2; 913 regm_t varregm; 914 reg_t varreg; 915 uint jop; 916 917 918 switch (op) // select instruction opcodes 919 { 920 case OPpostinc: op = OPaddass; // i++ => += 921 goto case OPaddass; 922 923 case OPaddass: op1 = 0x01; op2 = 0x11; 924 cflags = CFpsw; 925 mode = 0; break; // ADD, ADC 926 927 case OPpostdec: op = OPminass; // i-- => -= 928 goto case OPminass; 929 930 case OPminass: op1 = 0x29; op2 = 0x19; 931 cflags = CFpsw; 932 mode = 5; break; // SUB, SBC 933 934 case OPandass: op1 = op2 = 0x21; 935 mode = 4; break; // AND, AND 936 937 case OPorass: op1 = op2 = 0x09; 938 mode = 1; break; // OR , OR 939 940 case OPxorass: op1 = op2 = 0x31; 941 mode = 6; break; // XOR, XOR 942 943 case OPnegass: op1 = 0xF7; // NEG 944 break; 945 946 default: 947 assert(0); 948 } 949 op1 ^= isbyte; // bit 0 is 0 for byte operation 950 951 if (op == OPnegass) 952 { 953 getlvalue(cdb,&cs,e1,0); 954 modEA(cdb,&cs); 955 cs.Irm |= modregrm(0,3,0); 956 cs.Iop = op1; 957 switch (_tysize[tyml]) 958 { 959 case CHARSIZE: 960 cdb.gen(&cs); 961 break; 962 963 case SHORTSIZE: 964 cdb.gen(&cs); 965 if (!I16 && *pretregs & mPSW) 966 cdb.last().Iflags |= CFopsize | CFpsw; 967 break; 968 969 case LONGSIZE: 970 if (!I16 || opsize) 971 { cdb.gen(&cs); 972 cdb.last().Iflags |= opsize; 973 break; 974 } 975 neg_2reg: 976 getlvalue_msw(&cs); 977 cdb.gen(&cs); // NEG EA+2 978 getlvalue_lsw(&cs); 979 cdb.gen(&cs); // NEG EA 980 code_orflag(cdb.last(),CFpsw); 981 cs.Iop = 0x81; 982 getlvalue_msw(&cs); 983 cs.IFL2 = FLconst; 984 cs.IEV2.Vuns = 0; 985 cdb.gen(&cs); // SBB EA+2,0 986 break; 987 988 case LLONGSIZE: 989 if (I16) 990 assert(0); // not implemented yet 991 if (I32) 992 goto neg_2reg; 993 cdb.gen(&cs); 994 break; 995 996 default: 997 assert(0); 998 } 999 forccs = 0; // flags already set by NEG 1000 *pretregs &= ~mPSW; 1001 } 1002 else if ((e2 = e.EV.E2).Eoper == OPconst && // if rvalue is a const 1003 el_signx32(e2) && 1004 // Don't evaluate e2 in register if we can use an INC or DEC 1005 (((sz <= REGSIZE || tyfv(tyml)) && 1006 (op == OPaddass || op == OPminass) && 1007 (el_allbits(e2, 1) || el_allbits(e2, -1)) 1008 ) || 1009 (!evalinregister(e2) 1010 && tyml != TYhptr 1011 ) 1012 ) 1013 ) 1014 { 1015 getlvalue(cdb,&cs,e1,0); 1016 modEA(cdb,&cs); 1017 cs.IFL2 = FLconst; 1018 cs.IEV2.Vsize_t = e2.EV.Vint; 1019 if (sz <= REGSIZE || tyfv(tyml) || opsize) 1020 { 1021 targ_int i = cs.IEV2.Vint; 1022 1023 // Handle shortcuts. Watch out for if result has 1024 // to be in flags. 1025 1026 if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,®) && i != 1 && i != -1 && 1027 !opsize) 1028 { 1029 cs.Iop = op1; 1030 cs.Irm |= modregrm(0,reg & 7,0); 1031 if (I64) 1032 { if (isbyte && reg >= 4) 1033 cs.Irex |= REX; 1034 if (reg & 8) 1035 cs.Irex |= REX_R; 1036 } 1037 } 1038 else 1039 { 1040 cs.Iop = 0x81; 1041 cs.Irm |= modregrm(0,mode,0); 1042 switch (op) 1043 { 1044 case OPminass: // convert to += 1045 cs.Irm ^= modregrm(0,5,0); 1046 i = -i; 1047 cs.IEV2.Vsize_t = i; 1048 goto case OPaddass; 1049 1050 case OPaddass: 1051 if (i == 1) // INC EA 1052 goto L1; 1053 else if (i == -1) // DEC EA 1054 { cs.Irm |= modregrm(0,1,0); 1055 L1: cs.Iop = 0xFF; 1056 } 1057 break; 1058 1059 default: 1060 break; 1061 } 1062 cs.Iop ^= isbyte; // for byte operations 1063 } 1064 cs.Iflags |= opsize; 1065 if (forccs) 1066 cs.Iflags |= CFpsw; 1067 else if (!I16 && cs.Iflags & CFopsize) 1068 { 1069 switch (op) 1070 { case OPorass: 1071 case OPxorass: 1072 cs.IEV2.Vsize_t &= 0xFFFF; 1073 cs.Iflags &= ~CFopsize; // don't worry about MSW 1074 break; 1075 1076 case OPandass: 1077 cs.IEV2.Vsize_t |= ~0xFFFFL; 1078 cs.Iflags &= ~CFopsize; // don't worry about MSW 1079 break; 1080 1081 case OPminass: 1082 case OPaddass: 1083 static if (1) 1084 { 1085 if ((cs.Irm & 0xC0) == 0xC0) // EA is register 1086 cs.Iflags &= ~CFopsize; 1087 } 1088 else 1089 { 1090 if ((cs.Irm & 0xC0) == 0xC0 && // EA is register and 1091 e1.Eoper == OPind) // not a register var 1092 cs.Iflags &= ~CFopsize; 1093 } 1094 break; 1095 1096 default: 1097 assert(0); 1098 } 1099 } 1100 1101 // For scheduling purposes, we wish to replace: 1102 // OP EA 1103 // with: 1104 // MOV reg,EA 1105 // OP reg 1106 // MOV EA,reg 1107 if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 1108 (config.target_cpu == TARGET_Pentium || 1109 config.target_cpu == TARGET_PentiumMMX) && 1110 config.flags4 & CFG4speed) 1111 { 1112 regm_t sregm; 1113 code cs2; 1114 1115 // Determine which registers to use 1116 sregm = allregs & ~idxregm(&cs); 1117 if (isbyte) 1118 sregm &= BYTEREGS; 1119 if (sregm & forregs) 1120 sregm &= forregs; 1121 1122 allocreg(cdb,&sregm,®,tyml); // allocate register 1123 1124 cs2 = cs; 1125 cs2.Iflags &= ~CFpsw; 1126 cs2.Iop = LOD ^ isbyte; 1127 code_newreg(&cs2, reg); 1128 cdb.gen(&cs2); // MOV reg,EA 1129 1130 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7); 1131 if (reg & 8) 1132 cs.Irex |= REX_B; 1133 cdb.gen(&cs); // OP reg 1134 1135 cs2.Iop ^= 2; 1136 cdb.gen(&cs2); // MOV EA,reg 1137 1138 retregs = sregm; 1139 wantres = 0; 1140 if (e1.Ecount) 1141 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1142 } 1143 else 1144 { 1145 cdb.gen(&cs); 1146 cs.Iflags &= ~opsize; 1147 cs.Iflags &= ~CFpsw; 1148 if (I16 && opsize) // if DWORD operand 1149 cs.IEV1.Voffset += 2; // compensate for wantres code 1150 } 1151 } 1152 else if (sz == 2 * REGSIZE) 1153 { 1154 targ_uns msw; 1155 1156 cs.Iop = 0x81; 1157 cs.Irm |= modregrm(0,mode,0); 1158 cs.Iflags |= cflags; 1159 cdb.gen(&cs); 1160 cs.Iflags &= ~CFpsw; 1161 1162 getlvalue_msw(&cs); // point to msw 1163 msw = cast(uint)MSREG(e.EV.E2.EV.Vllong); 1164 cs.IEV2.Vuns = msw; // msw of constant 1165 switch (op) 1166 { 1167 case OPminass: 1168 cs.Irm ^= modregrm(0,6,0); // SUB => SBB 1169 break; 1170 1171 case OPaddass: 1172 cs.Irm |= modregrm(0,2,0); // ADD => ADC 1173 break; 1174 1175 default: 1176 break; 1177 } 1178 cdb.gen(&cs); 1179 } 1180 else 1181 assert(0); 1182 freenode(e.EV.E2); // don't need it anymore 1183 } 1184 else if (isregvar(e1,&varregm,&varreg) && 1185 (e2.Eoper == OPvar || e2.Eoper == OPind) && 1186 !evalinregister(e2) && 1187 sz <= REGSIZE) // deal with later 1188 { 1189 getlvalue(cdb,&cs,e2,0); 1190 freenode(e2); 1191 getregs(cdb,varregm); 1192 code_newreg(&cs, varreg); 1193 if (I64 && sz == 1 && varreg >= 4) 1194 cs.Irex |= REX; 1195 cs.Iop = op1 ^ 2; // toggle direction bit 1196 if (forccs) 1197 cs.Iflags |= CFpsw; 1198 reverse = 2; // remember we toggled it 1199 cdb.gen(&cs); 1200 retregs = 0; // to trigger a bug if we attempt to use it 1201 } 1202 else if ((op == OPaddass || op == OPminass) && 1203 sz <= REGSIZE && 1204 !e2.Ecount && 1205 ((jop = jmpopcode(e2)) == JC || jop == JNC || 1206 (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC))) 1207 ) 1208 { 1209 /* e1 += (x < y) ADC EA,0 1210 * e1 -= (x < y) SBB EA,0 1211 * e1 += (x >= y) SBB EA,-1 1212 * e1 -= (x >= y) ADC EA,-1 1213 */ 1214 getlvalue(cdb,&cs,e1,0); // get lvalue 1215 modEA(cdb,&cs); 1216 regm_t keepmsk = idxregm(&cs); 1217 retregs = mPSW; 1218 if (OTconv(e2.Eoper)) 1219 { 1220 scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true); 1221 freenode(e2); 1222 } 1223 else 1224 scodelem(cdb,e2,&retregs,keepmsk,true); 1225 cs.Iop = 0x81 ^ isbyte; // ADC EA,imm16/32 1226 uint regop = 2; // ADC 1227 if ((op == OPaddass) ^ (jop == JC)) 1228 regop = 3; // SBB 1229 code_newreg(&cs,regop); 1230 cs.Iflags |= opsize; 1231 if (forccs) 1232 cs.Iflags |= CFpsw; 1233 cs.IFL2 = FLconst; 1234 cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0; 1235 cdb.gen(&cs); 1236 retregs = 0; // to trigger a bug if we attempt to use it 1237 } 1238 else // evaluate e2 into register 1239 { 1240 retregs = (isbyte) ? BYTEREGS : ALLREGS; // pick working reg 1241 if (tyml == TYhptr) 1242 retregs &= ~mCX; // need CX for shift count 1243 scodelem(cdb,e.EV.E2,&retregs,0,true); // get rvalue 1244 getlvalue(cdb,&cs,e1,retregs); // get lvalue 1245 modEA(cdb,&cs); 1246 cs.Iop = op1; 1247 if (sz <= REGSIZE || tyfv(tyml)) 1248 { 1249 reg = findreg(retregs); 1250 code_newreg(&cs, reg); // OP1 EA,reg 1251 if (sz == 1 && reg >= 4 && I64) 1252 cs.Irex |= REX; 1253 if (forccs) 1254 cs.Iflags |= CFpsw; 1255 } 1256 else if (tyml == TYhptr) 1257 { 1258 uint mreg = findregmsw(retregs); 1259 uint lreg = findreglsw(retregs); 1260 getregs(cdb,retregs | mCX); 1261 1262 // If h -= l, convert to h += -l 1263 if (e.Eoper == OPminass) 1264 { 1265 cdb.gen2(0xF7,modregrm(3,3,mreg)); // NEG mreg 1266 cdb.gen2(0xF7,modregrm(3,3,lreg)); // NEG lreg 1267 code_orflag(cdb.last(),CFpsw); 1268 cdb.genc2(0x81,modregrm(3,3,mreg),0); // SBB mreg,0 1269 } 1270 cs.Iop = 0x01; 1271 cs.Irm |= modregrm(0,lreg,0); 1272 cdb.gen(&cs); // ADD EA,lreg 1273 code_orflag(cdb.last(),CFpsw); 1274 cdb.genc2(0x81,modregrm(3,2,mreg),0); // ADC mreg,0 1275 genshift(cdb); // MOV CX,offset __AHSHIFT 1276 cdb.gen2(0xD3,modregrm(3,4,mreg)); // SHL mreg,CL 1277 NEWREG(cs.Irm,mreg); // ADD EA+2,mreg 1278 getlvalue_msw(&cs); 1279 } 1280 else if (sz == 2 * REGSIZE) 1281 { 1282 cs.Irm |= modregrm(0,findreglsw(retregs),0); 1283 cdb.gen(&cs); // OP1 EA,reg+1 1284 code_orflag(cdb.last(),cflags); 1285 cs.Iop = op2; 1286 NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg 1287 getlvalue_msw(&cs); 1288 } 1289 else 1290 assert(0); 1291 cdb.gen(&cs); 1292 retregs = 0; // to trigger a bug if we attempt to use it 1293 } 1294 1295 // See if we need to reload result into a register. 1296 // Need result in registers in case we have a 32 bit 1297 // result and we want the flags as a result. 1298 if (wantres || (sz > REGSIZE && forccs)) 1299 { 1300 if (sz <= REGSIZE) 1301 { 1302 regm_t possregs; 1303 1304 possregs = ALLREGS; 1305 if (isbyte) 1306 possregs = BYTEREGS; 1307 retregs = forregs & possregs; 1308 if (!retregs) 1309 retregs = possregs; 1310 1311 // If reg field is destination 1312 if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5) 1313 { 1314 reg = (cs.Irm >> 3) & 7; 1315 if (cs.Irex & REX_R) 1316 reg |= 8; 1317 retregs = mask(reg); 1318 allocreg(cdb,&retregs,®,tyml); 1319 } 1320 // If lvalue is a register, just use that register 1321 else if ((cs.Irm & 0xC0) == 0xC0) 1322 { 1323 reg = cs.Irm & 7; 1324 if (cs.Irex & REX_B) 1325 reg |= 8; 1326 retregs = mask(reg); 1327 allocreg(cdb,&retregs,®,tyml); 1328 } 1329 else 1330 { 1331 allocreg(cdb,&retregs,®,tyml); 1332 cs.Iop = LOD ^ isbyte ^ reverse; 1333 code_newreg(&cs, reg); 1334 if (I64 && isbyte && reg >= 4) 1335 cs.Irex |= REX_W; 1336 cdb.gen(&cs); // MOV reg,EA 1337 } 1338 } 1339 else if (tyfv(tyml) || tyml == TYhptr) 1340 { 1341 regm_t idxregs; 1342 1343 if (tyml == TYhptr) 1344 getlvalue_lsw(&cs); 1345 idxregs = idxregm(&cs); 1346 retregs = forregs & ~idxregs; 1347 if (!(retregs & IDXREGS)) 1348 retregs |= IDXREGS & ~idxregs; 1349 if (!(retregs & mMSW)) 1350 retregs |= mMSW & ALLREGS; 1351 allocreg(cdb,&retregs,®,tyml); 1352 NEWREG(cs.Irm,findreglsw(retregs)); 1353 if (retregs & mES) // if want ES loaded 1354 { 1355 cs.Iop = 0xC4; 1356 cdb.gen(&cs); // LES lreg,EA 1357 } 1358 else 1359 { 1360 cs.Iop = LOD; 1361 cdb.gen(&cs); // MOV lreg,EA 1362 getlvalue_msw(&cs); 1363 if (I32) 1364 cs.Iflags |= CFopsize; 1365 NEWREG(cs.Irm,reg); 1366 cdb.gen(&cs); // MOV mreg,EA+2 1367 } 1368 } 1369 else if (sz == 2 * REGSIZE) 1370 { 1371 regm_t idx = idxregm(&cs); 1372 retregs = forregs; 1373 if (!retregs) 1374 retregs = ALLREGS; 1375 allocreg(cdb,&retregs,®,tyml); 1376 cs.Iop = LOD; 1377 NEWREG(cs.Irm,reg); 1378 1379 code csl = cs; 1380 NEWREG(csl.Irm,findreglsw(retregs)); 1381 getlvalue_lsw(&csl); 1382 1383 if (mask(reg) & idx) 1384 { 1385 cdb.gen(&csl); // MOV reg+1,EA 1386 cdb.gen(&cs); // MOV reg,EA+2 1387 } 1388 else 1389 { 1390 cdb.gen(&cs); // MOV reg,EA+2 1391 cdb.gen(&csl); // MOV reg+1,EA 1392 } 1393 } 1394 else 1395 assert(0); 1396 if (e1.Ecount) // if we gen a CSE 1397 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1398 } 1399 freenode(e1); 1400 if (sz <= REGSIZE) 1401 *pretregs &= ~mPSW; // flags are already set 1402 fixresult(cdb,e,retregs,pretregs); 1403 } 1404 1405 /******************************** 1406 * Generate code for *= 1407 */ 1408 1409 @trusted 1410 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1411 { 1412 code cs; 1413 regm_t retregs; 1414 reg_t resreg; 1415 uint opr,isbyte; 1416 1417 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1418 elem *e1 = e.EV.E1; 1419 elem *e2 = e.EV.E2; 1420 OPER op = e.Eoper; // OPxxxx 1421 1422 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1423 char uns = tyuns(tyml) || tyuns(e2.Ety); 1424 uint sz = _tysize[tyml]; 1425 1426 uint rex = (I64 && sz == 8) ? REX_W : 0; 1427 uint grex = rex << 16; // 64 bit operands 1428 1429 // See if evaluate in XMM registers 1430 if (config.fpxmmregs && tyxmmreg(tyml) && !(*pretregs & mST0)) 1431 { 1432 xmmopass(cdb,e,pretregs); 1433 return; 1434 } 1435 1436 if (tyfloating(tyml)) 1437 { 1438 if (config.exe & EX_posix) 1439 { 1440 opass87(cdb,e,pretregs); 1441 } 1442 else 1443 { 1444 opassdbl(cdb,e,pretregs,op); 1445 } 1446 return; 1447 } 1448 1449 if (sz <= REGSIZE) // if word or byte 1450 { 1451 if (e2.Eoper == OPconst && 1452 (I32 || I64) && 1453 el_signx32(e2) && 1454 sz >= 4) 1455 { 1456 // See if we can use an LEA instruction 1457 1458 int ss; 1459 int ss2 = 0; 1460 int shift; 1461 1462 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1463 switch (e2factor) 1464 { 1465 case 12: ss = 1; ss2 = 2; goto L4; 1466 case 24: ss = 1; ss2 = 3; goto L4; 1467 1468 case 6: 1469 case 3: ss = 1; goto L4; 1470 1471 case 20: ss = 2; ss2 = 2; goto L4; 1472 case 40: ss = 2; ss2 = 3; goto L4; 1473 1474 case 10: 1475 case 5: ss = 2; goto L4; 1476 1477 case 36: ss = 3; ss2 = 2; goto L4; 1478 case 72: ss = 3; ss2 = 3; goto L4; 1479 1480 case 18: 1481 case 9: ss = 3; goto L4; 1482 L4: 1483 { 1484 getlvalue(cdb,&cs,e1,0); // get EA 1485 modEA(cdb,&cs); 1486 freenode(e2); 1487 regm_t idxregs = idxregm(&cs); 1488 regm_t regm = *pretregs & ~(idxregs | mBP | mR13); // don't use EBP 1489 if (!regm) 1490 regm = allregs & ~(idxregs | mBP | mR13); 1491 reg_t reg; 1492 allocreg(cdb,®m,®,tyml); 1493 cs.Iop = LOD; 1494 code_newreg(&cs,reg); 1495 cs.Irex |= rex; 1496 cdb.gen(&cs); // MOV reg,EA 1497 1498 assert((reg & 7) != BP); 1499 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1500 modregxrmx(ss,reg,reg)); // LEA reg,[ss*reg][reg] 1501 if (ss2) 1502 { 1503 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1504 modregxrm(ss2,reg,5)); 1505 cdb.last().IFL1 = FLconst; 1506 cdb.last().IEV1.Vint = 0; // LEA reg,0[ss2*reg] 1507 } 1508 else if (!(e2factor & 1)) // if even factor 1509 { 1510 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1511 code_orrex(cdb.last(),rex); 1512 } 1513 opAssStoreReg(cdb,cs,e,reg,pretregs); 1514 return; 1515 } 1516 1517 case 37: 1518 case 74: shift = 2; 1519 goto L5; 1520 case 13: 1521 case 26: shift = 0; 1522 goto L5; 1523 L5: 1524 { 1525 getlvalue(cdb,&cs,e1,0); // get EA 1526 modEA(cdb,&cs); 1527 freenode(e2); 1528 regm_t idxregs = idxregm(&cs); 1529 regm_t regm = *pretregs & ~(idxregs | mBP | mR13); // don't use EBP 1530 if (!regm) 1531 regm = allregs & ~(idxregs | mBP | mR13); 1532 reg_t reg; // return register 1533 allocreg(cdb,®m,®,tyml); 1534 1535 reg_t sreg = allocScratchReg(cdb, allregs & ~(regm | idxregs | mBP | mR13)); 1536 1537 cs.Iop = LOD; 1538 code_newreg(&cs,sreg); 1539 cs.Irex |= rex; 1540 cdb.gen(&cs); // MOV sreg,EA 1541 1542 assert((sreg & 7) != BP); 1543 assert((reg & 7) != BP); 1544 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1545 modregxrmx(2,sreg,sreg)); // LEA reg,[sreg*4][sreg] 1546 if (shift) 1547 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift 1548 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1549 modregxrmx(3,sreg,reg)); // LEA reg,[sreg*8][reg] 1550 if (!(e2factor & 1)) // if even factor 1551 { 1552 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1553 code_orrex(cdb.last(),rex); 1554 } 1555 opAssStoreReg(cdb,cs,e,reg,pretregs); 1556 return; 1557 } 1558 1559 default: 1560 break; 1561 } 1562 } 1563 1564 isbyte = (sz == 1); // 1 for byte operation 1565 1566 if (config.target_cpu >= TARGET_80286 && 1567 e2.Eoper == OPconst && !isbyte) 1568 { 1569 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1570 if (I64 && sz == 8 && e2factor != cast(int)e2factor) 1571 goto L1; 1572 freenode(e2); 1573 getlvalue(cdb,&cs,e1,0); // get EA 1574 regm_t idxregs = idxregm(&cs); 1575 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs; 1576 if (!retregs) 1577 retregs = ALLREGS & ~idxregs; 1578 allocreg(cdb,&retregs,&resreg,tyml); 1579 cs.Iop = 0x69; // IMUL reg,EA,e2value 1580 cs.IFL2 = FLconst; 1581 cs.IEV2.Vint = cast(int)e2factor; 1582 opr = resreg; 1583 } 1584 else if (!I16 && !isbyte) 1585 { 1586 L1: 1587 retregs = *pretregs & (ALLREGS | mBP); 1588 if (!retregs) 1589 retregs = ALLREGS; 1590 codelem(cdb,e2,&retregs,false); // load rvalue in reg 1591 getlvalue(cdb,&cs,e1,retregs); // get EA 1592 getregs(cdb,retregs); // destroy these regs 1593 cs.Iop = 0x0FAF; // IMUL resreg,EA 1594 resreg = findreg(retregs); 1595 opr = resreg; 1596 } 1597 else 1598 { 1599 retregs = mAX; 1600 codelem(cdb,e2,&retregs,false); // load rvalue in AX 1601 getlvalue(cdb,&cs,e1,mAX); // get EA 1602 getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs 1603 cs.Iop = 0xF7 ^ isbyte; // [I]MUL EA 1604 opr = uns ? 4 : 5; // MUL/IMUL 1605 resreg = AX; // result register for * 1606 } 1607 code_newreg(&cs,opr); 1608 cdb.gen(&cs); 1609 1610 opAssStoreReg(cdb, cs, e, resreg, pretregs); 1611 return; 1612 } 1613 else if (sz == 2 * REGSIZE) 1614 { 1615 if (e2.Eoper == OPconst && I32) 1616 { 1617 /* if (msw) 1618 IMUL EDX,EDX,lsw 1619 IMUL reg,EAX,msw 1620 ADD reg,EDX 1621 else 1622 IMUL reg,EDX,lsw 1623 MOV EDX,lsw 1624 MUL EDX 1625 ADD EDX,reg 1626 */ 1627 freenode(e2); 1628 retregs = mDX|mAX; 1629 reg_t rhi, rlo; 1630 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 1631 const regm_t keepmsk = idxregm(&cs); 1632 1633 reg_t reg = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 1634 1635 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1636 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1)); 1637 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8)); 1638 1639 if (msw) 1640 { 1641 genmulimm(cdb,DX,DX,lsw); // IMUL EDX,EDX,lsw 1642 genmulimm(cdb,reg,AX,msw); // IMUL reg,EAX,msw 1643 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX 1644 } 1645 else 1646 genmulimm(cdb,reg,DX,lsw); // IMUL reg,EDX,lsw 1647 1648 movregconst(cdb,DX,lsw,0); // MOV EDX,lsw 1649 getregs(cdb,mDX); 1650 cdb.gen2(0xF7,modregrm(3,4,DX)); // MUL EDX 1651 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD EDX,reg 1652 } 1653 else 1654 { 1655 retregs = mDX | mAX; 1656 regm_t rretregs = (config.target_cpu >= TARGET_PentiumPro) ? allregs & ~retregs : mCX | mBX; 1657 codelem(cdb,e2,&rretregs,false); 1658 getlvalue(cdb,&cs,e1,retregs | rretregs); 1659 getregs(cdb,retregs); 1660 cs.Iop = LOD; 1661 cdb.gen(&cs); // MOV AX,EA 1662 getlvalue_msw(&cs); 1663 cs.Irm |= modregrm(0,DX,0); 1664 cdb.gen(&cs); // MOV DX,EA+2 1665 getlvalue_lsw(&cs); 1666 if (config.target_cpu >= TARGET_PentiumPro) 1667 { 1668 regm_t rlo = findreglsw(rretregs); 1669 regm_t rhi = findregmsw(rretregs); 1670 /* IMUL rhi,EAX 1671 IMUL EDX,rlo 1672 ADD rhi,EDX 1673 MUL rlo 1674 ADD EDX,Erhi 1675 */ 1676 getregs(cdb,mAX|mDX|mask(rhi)); 1677 cdb.gen2(0x0FAF,modregrm(3,rhi,AX)); 1678 cdb.gen2(0x0FAF,modregrm(3,DX,rlo)); 1679 cdb.gen2(0x03,modregrm(3,rhi,DX)); 1680 cdb.gen2(0xF7,modregrm(3,4,rlo)); 1681 cdb.gen2(0x03,modregrm(3,DX,rhi)); 1682 } 1683 else 1684 { 1685 callclib(cdb,e,CLIB.lmul,&retregs,idxregm(&cs)); 1686 } 1687 } 1688 1689 opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs); 1690 return; 1691 } 1692 else 1693 { 1694 assert(0); 1695 } 1696 } 1697 1698 1699 /******************************** 1700 * Generate code for /= %= 1701 */ 1702 1703 @trusted 1704 void cddivass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1705 { 1706 elem *e1 = e.EV.E1; 1707 elem *e2 = e.EV.E2; 1708 1709 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1710 OPER op = e.Eoper; // OPxxxx 1711 1712 // See if evaluate in XMM registers 1713 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0)) 1714 { 1715 xmmopass(cdb,e,pretregs); 1716 return; 1717 } 1718 1719 if (tyfloating(tyml)) 1720 { 1721 if (config.exe & EX_posix) 1722 { 1723 opass87(cdb,e,pretregs); 1724 } 1725 else 1726 { 1727 opassdbl(cdb,e,pretregs,op); 1728 } 1729 return; 1730 } 1731 1732 code cs = void; 1733 1734 //printf("cddivass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1735 char uns = tyuns(tyml) || tyuns(e2.Ety); 1736 uint sz = _tysize[tyml]; 1737 1738 uint rex = (I64 && sz == 8) ? REX_W : 0; 1739 uint grex = rex << 16; // 64 bit operands 1740 1741 if (sz <= REGSIZE) // if word or byte 1742 { 1743 uint isbyte = (sz == 1); // 1 for byte operation 1744 reg_t resreg; 1745 targ_size_t e2factor; 1746 targ_size_t d; 1747 bool neg; 1748 int pow2; 1749 1750 assert(!isbyte); // should never happen 1751 assert(I16 || sz != SHORTSIZE); 1752 1753 if (e2.Eoper == OPconst) 1754 { 1755 e2factor = cast(targ_size_t)el_tolong(e2); 1756 pow2 = ispow2(e2factor); 1757 d = e2factor; 1758 if (!uns && cast(targ_llong)e2factor < 0) 1759 { 1760 neg = true; 1761 d = -d; 1762 } 1763 } 1764 1765 // Signed divide by a constant 1766 if (config.flags4 & CFG4speed && 1767 e2.Eoper == OPconst && 1768 !uns && 1769 (d & (d - 1)) && 1770 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8)))) 1771 { 1772 /* R1 / 10 1773 * 1774 * MOV EAX,m 1775 * IMUL R1 1776 * MOV EAX,R1 1777 * SAR EAX,31 1778 * SAR EDX,shpost 1779 * SUB EDX,EAX 1780 * IMUL EAX,EDX,d 1781 * SUB R1,EAX 1782 * 1783 * EDX = quotient 1784 * R1 = remainder 1785 */ 1786 assert(sz == 4 || sz == 8); 1787 1788 ulong m; 1789 int shpost; 1790 const int N = sz * 8; 1791 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost); 1792 1793 freenode(e2); 1794 1795 getlvalue(cdb,&cs,e1,mAX | mDX); 1796 reg_t reg; 1797 opAssLoadReg(cdb, cs, e, reg, allregs & ~( mAX | mDX | idxregm(&cs))); // MOV reg,EA 1798 getregs(cdb, mAX|mDX); 1799 1800 /* Algorithm 5.2 1801 * if m>=2**(N-1) 1802 * q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n) 1803 * else 1804 * q = SRA(MULSH(m,n), shpost) - XSIGN(n) 1805 * if (neg) 1806 * q = -q 1807 */ 1808 const bool mgt = mhighbit || m >= (1UL << (N - 1)); 1809 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EAX,m 1810 cdb.gen2(0xF7,grex | modregrmx(3,5,reg)); // IMUL reg 1811 if (mgt) 1812 cdb.gen2(0x03,grex | modregrmx(3,DX,reg)); // ADD EDX,reg 1813 getregsNoSave(mAX); // EAX no longer contains 'm' 1814 genmovreg(cdb, AX, reg); // MOV EAX,reg 1815 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1); // SAR EAX,31 1816 if (shpost) 1817 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost); // SAR EDX,shpost 1818 reg_t r3; 1819 if (neg && op == OPdivass) 1820 { 1821 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1822 r3 = AX; 1823 } 1824 else 1825 { 1826 cdb.gen2(0x2B,grex | modregrm(3,DX,AX)); // SUB EDX,EAX 1827 r3 = DX; 1828 } 1829 1830 // r3 is quotient 1831 reg_t resregx; 1832 switch (op) 1833 { case OPdivass: 1834 resregx = r3; 1835 break; 1836 1837 case OPmodass: 1838 assert(reg != AX && r3 == DX); 1839 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1840 { 1841 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1842 } 1843 else 1844 { 1845 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1846 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1847 getregsNoSave(mAX); // EAX no longer contains 'd' 1848 } 1849 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1850 resregx = reg; 1851 break; 1852 1853 default: 1854 assert(0); 1855 } 1856 1857 opAssStoreReg(cdb, cs, e, resregx, pretregs); 1858 return; 1859 } 1860 1861 // Unsigned divide by a constant 1862 void unsignedDivideByConstant(ref CodeBuilder cdb) 1863 { 1864 assert(sz == 4 || sz == 8); 1865 1866 reg_t r3; 1867 reg_t reg; 1868 ulong m; 1869 int shpre; 1870 int shpost; 1871 code cs = void; 1872 1873 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost)) 1874 { 1875 /* t1 = MULUH(m, n) 1876 * q = SRL(t1 + SRL(n - t1, 1), shpost - 1) 1877 * MOV EAX,reg 1878 * MOV EDX,m 1879 * MUL EDX 1880 * MOV EAX,reg 1881 * SUB EAX,EDX 1882 * SHR EAX,1 1883 * LEA R3,[EAX][EDX] 1884 * SHR R3,shpost-1 1885 */ 1886 assert(shpre == 0); 1887 1888 freenode(e2); 1889 getlvalue(cdb,&cs,e1,mAX | mDX); 1890 regm_t idxregs = idxregm(&cs); 1891 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA 1892 getregs(cdb, mAX|mDX); 1893 1894 genmovreg(cdb,AX,reg); // MOV EAX,reg 1895 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1896 getregs(cdb,mask(reg) | mDX | mAX); 1897 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1898 genmovreg(cdb,AX,reg); // MOV EAX,reg 1899 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1900 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1); // SHR EAX,1 1901 regm_t regm3 = allregs & ~idxregs; 1902 if (op == OPmodass) 1903 { 1904 regm3 &= ~mask(reg); 1905 if (!el_signx32(e2)) 1906 regm3 &= ~mAX; 1907 } 1908 allocreg(cdb,®m3,&r3,TYint); 1909 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX] 1910 if (shpost != 1) 1911 cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1); // SHR R3,shpost-1 1912 } 1913 else 1914 { 1915 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost) 1916 * SHR EAX,shpre 1917 * MOV reg,m 1918 * MUL reg 1919 * SHR EDX,shpost 1920 */ 1921 1922 freenode(e2); 1923 getlvalue(cdb,&cs,e1,mAX | mDX); 1924 regm_t idxregs = idxregm(&cs); 1925 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA 1926 getregs(cdb, mAX|mDX); 1927 1928 if (reg != AX) 1929 { 1930 getregs(cdb,mAX); 1931 genmovreg(cdb,AX,reg); // MOV EAX,reg 1932 } 1933 if (shpre) 1934 { 1935 getregs(cdb,mAX); 1936 cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre); // SHR EAX,shpre 1937 } 1938 getregs(cdb,mDX); 1939 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1940 getregs(cdb,mDX | mAX); 1941 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1942 if (shpost) 1943 cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost); // SHR EDX,shpost 1944 r3 = DX; 1945 } 1946 1947 reg_t resregx; 1948 switch (op) 1949 { 1950 case OPdivass: 1951 // r3 = quotient 1952 resregx = r3; 1953 break; 1954 1955 case OPmodass: 1956 /* reg = original value 1957 * r3 = quotient 1958 */ 1959 assert(reg != AX); 1960 if (el_signx32(e2)) 1961 { 1962 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1963 } 1964 else 1965 { 1966 assert(!(mask(r3) & mAX)); 1967 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1968 getregs(cdb,mAX); 1969 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1970 } 1971 getregs(cdb,mask(reg)); 1972 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1973 resregx = reg; 1974 break; 1975 1976 default: 1977 assert(0); 1978 } 1979 1980 opAssStoreReg(cdb, cs, e, resregx, pretregs); 1981 return; 1982 } 1983 1984 if (config.flags4 & CFG4speed && 1985 e2.Eoper == OPconst && 1986 uns && 1987 e2factor > 2 && (e2factor & (e2factor - 1)) && 1988 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8)))) 1989 { 1990 unsignedDivideByConstant(cdb); 1991 return; 1992 } 1993 1994 if (config.flags4 & CFG4speed && 1995 e2.Eoper == OPconst && !uns && 1996 (sz == REGSIZE || (I64 && sz == 4)) && 1997 pow2 != -1 && 1998 e2factor == cast(int)e2factor && 1999 !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass) 2000 ) 2001 { 2002 freenode(e2); 2003 if (pow2 == 1 && op == OPdivass && config.target_cpu > TARGET_80386) 2004 { 2005 /* This is better than the code further down because it is 2006 * not constrained to using AX and DX. 2007 */ 2008 getlvalue(cdb,&cs,e1,0); 2009 regm_t idxregs = idxregm(&cs); 2010 reg_t reg; 2011 opAssLoadReg(cdb,cs,e,reg,allregs & ~idxregs); // MOV reg,EA 2012 2013 reg_t r = allocScratchReg(cdb, allregs & ~(idxregs | mask(reg))); 2014 genmovreg(cdb,r,reg); // MOV r,reg 2015 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31 2016 cdb.gen2(0x03,grex | modregxrmx(3,reg,r)); // ADD reg,r 2017 cdb.gen2(0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 2018 2019 opAssStoreReg(cdb, cs, e, reg, pretregs); 2020 return; 2021 } 2022 2023 // Signed divide or modulo by power of 2 2024 getlvalue(cdb,&cs,e1,mAX | mDX); 2025 reg_t reg; 2026 opAssLoadReg(cdb,cs,e,reg,mAX); 2027 2028 getregs(cdb,mDX); // DX is scratch register 2029 cdb.gen1(0x99); // CWD 2030 code_orrex(cdb.last(), rex); 2031 if (pow2 == 1) 2032 { 2033 if (op == OPdivass) 2034 { 2035 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2036 cdb.gen2(0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 2037 resreg = AX; 2038 } 2039 else // OPmod 2040 { 2041 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2042 cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1 2043 cdb.gen2(0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 2044 resreg = DX; 2045 } 2046 } 2047 else 2048 { 2049 assert(pow2 < 32); 2050 targ_ulong m = (1 << pow2) - 1; 2051 if (op == OPdivass) 2052 { 2053 cdb.genc2(0x81,grex | modregrm(3,4,DX),m); // AND DX,m 2054 cdb.gen2(0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 2055 // Be careful not to generate this for 8088 2056 assert(config.target_cpu >= TARGET_80286); 2057 cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 2058 resreg = AX; 2059 } 2060 else // OPmodass 2061 { 2062 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2063 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2064 cdb.genc2(0x81,grex | modregrm(3,4,AX),m); // AND AX,m 2065 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2066 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2067 resreg = AX; 2068 } 2069 } 2070 } 2071 else 2072 { 2073 regm_t retregs = ALLREGS & ~(mAX|mDX); // DX gets sign extension 2074 codelem(cdb,e2,&retregs,false); // load rvalue in retregs 2075 reg_t reg = findreg(retregs); 2076 getlvalue(cdb,&cs,e1,mAX | mDX | retregs); // get EA 2077 getregs(cdb,mAX | mDX); // destroy these regs 2078 cs.Irm |= modregrm(0,AX,0); 2079 cs.Iop = LOD; 2080 cdb.gen(&cs); // MOV AX,EA 2081 if (uns) // if uint 2082 movregconst(cdb,DX,0,0); // CLR DX 2083 else // else signed 2084 { 2085 cdb.gen1(0x99); // CWD 2086 code_orrex(cdb.last(),rex); 2087 } 2088 getregs(cdb,mDX | mAX); // DX and AX will be destroyed 2089 const uint opr = uns ? 6 : 7; // DIV/IDIV 2090 genregs(cdb,0xF7,opr,reg); // OPR reg 2091 code_orrex(cdb.last(),rex); 2092 resreg = (op == OPmodass) ? DX : AX; // result register 2093 } 2094 opAssStoreReg(cdb, cs, e, resreg, pretregs); 2095 return; 2096 } 2097 2098 assert(sz == 2 * REGSIZE); 2099 2100 targ_size_t e2factor; 2101 int pow2; 2102 if (e2.Eoper == OPconst) 2103 { 2104 e2factor = cast(targ_size_t)el_tolong(e2); 2105 pow2 = ispow2(e2factor); 2106 } 2107 2108 // Register pair signed divide by power of 2 2109 if (op == OPdivass && 2110 !uns && 2111 e.Eoper == OPconst && 2112 pow2 != -1 && 2113 I32 // not set up for I16 or I64 cent 2114 ) 2115 { 2116 freenode(e2); 2117 regm_t retregs = mDX|mAX | mCX|mBX; // LSW must be byte reg because of later SETZ 2118 reg_t rhi, rlo; 2119 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 2120 const regm_t keepmsk = idxregm(&cs); 2121 retregs = mask(rhi) | mask(rlo); 2122 2123 if (pow2 < 32) 2124 { 2125 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2126 2127 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2128 if (pow2 == 1) 2129 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR r1,31 2130 else 2131 { 2132 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2133 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND r1,mask 2134 } 2135 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2136 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 2137 cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2); // SHRD rlo,rhi,pow2 2138 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2); // SAR rhi,pow2 2139 } 2140 else if (pow2 == 32) 2141 { 2142 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2143 2144 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2145 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2146 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2147 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 2148 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 2149 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 2150 } 2151 else if (pow2 < 63) 2152 { 2153 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2154 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk | mask(r1))); 2155 2156 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2157 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2158 cdb.genmovreg(r2,r1); // MOV r2,r1 2159 2160 if (pow2 == 33) 2161 { 2162 cdb.gen2(0xF7,modregrmx(3,3,r1)); // NEG r1 2163 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2)); // ADD rlo,r2 2164 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1)); // ADC rhi,r1 2165 } 2166 else 2167 { 2168 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask 2169 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2170 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 2171 } 2172 2173 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 2174 cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32); // SAR rlo,pow2-32 2175 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 2176 } 2177 else 2178 { 2179 // This may be better done by cgelem.d 2180 assert(pow2 == 63); 2181 assert(mask(rlo) & BYTEREGS); // for SETZ 2182 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000 2183 cdb.genregs(0x09,rlo,rhi); // OR rlo,rhi 2184 cdb.gen2(0x0F94,modregrmx(3,0,rlo)); // SETZ rlo 2185 cdb.genregs(MOVZXb,rlo,rlo); // MOVZX rlo,rloL 2186 movregconst(cdb,rhi,0,0); // MOV rhi,0 2187 } 2188 2189 opAssStorePair(cdb, cs, e, rlo, rhi, pretregs); 2190 return; 2191 } 2192 2193 // Register pair signed modulo by power of 2 2194 if (op == OPmodass && 2195 !uns && 2196 e.Eoper == OPconst && 2197 pow2 != -1 && 2198 I32 // not set up for I64 cent yet 2199 ) 2200 { 2201 freenode(e2); 2202 regm_t retregs = mDX|mAX; 2203 reg_t rhi, rlo; 2204 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 2205 const regm_t keepmsk = idxregm(&cs); 2206 2207 regm_t scratchm = allregs & ~(retregs | keepmsk); 2208 if (pow2 == 63) 2209 scratchm &= BYTEREGS; // because of SETZ 2210 reg_t r1 = allocScratchReg(cdb, scratchm); 2211 2212 if (pow2 < 32) 2213 { 2214 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2215 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2216 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 2217 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2218 cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1); // AND rlo,(1<<pow2)-1 2219 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 2220 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2221 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 2222 } 2223 else if (pow2 == 32) 2224 { 2225 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2226 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2227 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2228 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2229 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 2230 } 2231 else if (pow2 < 63) 2232 { 2233 scratchm = allregs & ~(retregs | scratchm); 2234 reg_t r2; 2235 allocreg(cdb,&scratchm,&r2,TYint); 2236 2237 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2238 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2239 cdb.genmovreg(r2,r1); // MOV r2,r1 2240 cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2); // SHRD r1,r2,64-pow2 2241 cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2); // SHR r2,64-pow2 2242 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2243 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 2244 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND rhi,(1<<(pow2-32))-1 2245 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2246 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2)); // SBB rhi,r2 2247 } 2248 else 2249 { 2250 // This may be better done by cgelem.d 2251 assert(pow2 == 63); 2252 2253 cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi] 2254 cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo)); // OR r1,rlo 2255 cdb.gen2(0x0F94,modregrmx(3,0,r1)); // SETZ r1 2256 cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL r1,31 2257 cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1)); // SUB rhi,r1 2258 } 2259 2260 opAssStorePair(cdb, cs, e, rlo, rhi, pretregs); 2261 return; 2262 } 2263 2264 regm_t rretregs = mCX|mBX; 2265 codelem(cdb,e2,&rretregs,false); // load e2 into CX|BX 2266 2267 reg_t rlo; 2268 reg_t rhi; 2269 opAssLoadPair(cdb, cs, e, rhi, rlo, mDX|mAX, rretregs); 2270 2271 regm_t retregs = (op == OPmodass) ? mCX|mBX : mDX|mAX; 2272 uint lib = uns ? CLIB.uldiv : CLIB.ldiv; 2273 if (op == OPmodass) 2274 ++lib; 2275 callclib(cdb,e,lib,&retregs,idxregm(&cs)); 2276 2277 opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs); 2278 } 2279 2280 2281 /******************************** 2282 * Generate code for <<= and >>= 2283 */ 2284 2285 @trusted 2286 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2287 { 2288 code cs; 2289 regm_t retregs; 2290 uint op1,op2; 2291 reg_t reg; 2292 2293 elem *e1 = e.EV.E1; 2294 elem *e2 = e.EV.E2; 2295 2296 tym_t tyml = tybasic(e1.Ety); // type of lvalue 2297 uint sz = _tysize[tyml]; 2298 uint isbyte = tybyte(e.Ety) != 0; // 1 for byte operations 2299 tym_t tym = tybasic(e.Ety); // type of result 2300 OPER oper = e.Eoper; 2301 assert(tysize(e2.Ety) <= REGSIZE); 2302 2303 uint rex = (I64 && sz == 8) ? REX_W : 0; 2304 2305 // if our lvalue is a cse, make sure we evaluate for result in register 2306 if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,®)) 2307 *pretregs |= ALLREGS; 2308 2309 version (SCPP) 2310 { 2311 // Do this until the rest of the compiler does OPshr/OPashr correctly 2312 if (oper == OPshrass) 2313 oper = tyuns(tyml) ? OPshrass : OPashrass; 2314 } 2315 2316 // Select opcodes. op2 is used for msw for long shifts. 2317 2318 switch (oper) 2319 { 2320 case OPshlass: 2321 op1 = 4; // SHL 2322 op2 = 2; // RCL 2323 break; 2324 2325 case OPshrass: 2326 op1 = 5; // SHR 2327 op2 = 3; // RCR 2328 break; 2329 2330 case OPashrass: 2331 op1 = 7; // SAR 2332 op2 = 3; // RCR 2333 break; 2334 2335 default: 2336 assert(0); 2337 } 2338 2339 2340 uint v = 0xD3; // for SHIFT xx,CL cases 2341 uint loopcnt = 1; 2342 uint conste2 = false; 2343 uint shiftcnt = 0; // avoid "use before initialized" warnings 2344 if (e2.Eoper == OPconst) 2345 { 2346 conste2 = true; // e2 is a constant 2347 shiftcnt = e2.EV.Vint; // byte ordering of host 2348 if (config.target_cpu >= TARGET_80286 && 2349 sz <= REGSIZE && 2350 shiftcnt != 1) 2351 v = 0xC1; // SHIFT xx,shiftcnt 2352 else if (shiftcnt <= 3) 2353 { 2354 loopcnt = shiftcnt; 2355 v = 0xD1; // SHIFT xx,1 2356 } 2357 } 2358 2359 if (v == 0xD3) // if COUNT == CL 2360 { 2361 retregs = mCX; 2362 codelem(cdb,e2,&retregs,false); 2363 } 2364 else 2365 freenode(e2); 2366 getlvalue(cdb,&cs,e1,mCX); // get lvalue, preserve CX 2367 modEA(cdb,&cs); // check for modifying register 2368 2369 if (*pretregs == 0 || // if don't return result 2370 (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) || 2371 sz > REGSIZE 2372 ) 2373 { 2374 retregs = 0; // value not returned in a register 2375 cs.Iop = v ^ isbyte; 2376 while (loopcnt--) 2377 { 2378 NEWREG(cs.Irm,op1); // make sure op1 is first 2379 if (sz <= REGSIZE) 2380 { 2381 if (conste2) 2382 { 2383 cs.IFL2 = FLconst; 2384 cs.IEV2.Vint = shiftcnt; 2385 } 2386 cdb.gen(&cs); // SHIFT EA,[CL|1] 2387 if (*pretregs & mPSW && !loopcnt && conste2) 2388 code_orflag(cdb.last(),CFpsw); 2389 } 2390 else // TYlong 2391 { 2392 cs.Iop = 0xD1; // plain shift 2393 code *ce = gennop(null); // ce: NOP 2394 if (v == 0xD3) 2395 { 2396 getregs(cdb,mCX); 2397 if (!conste2) 2398 { 2399 assert(loopcnt == 0); 2400 genjmp(cdb,JCXZ,FLcode,cast(block *) ce); // JCXZ ce 2401 } 2402 } 2403 code *cg; 2404 if (oper == OPshlass) 2405 { 2406 cdb.gen(&cs); // cg: SHIFT EA 2407 cg = cdb.last(); 2408 code_orflag(cg,CFpsw); 2409 getlvalue_msw(&cs); 2410 NEWREG(cs.Irm,op2); 2411 cdb.gen(&cs); // SHIFT EA 2412 getlvalue_lsw(&cs); 2413 } 2414 else 2415 { 2416 getlvalue_msw(&cs); 2417 cdb.gen(&cs); 2418 cg = cdb.last(); 2419 code_orflag(cg,CFpsw); 2420 NEWREG(cs.Irm,op2); 2421 getlvalue_lsw(&cs); 2422 cdb.gen(&cs); 2423 } 2424 if (v == 0xD3) // if building a loop 2425 { 2426 genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg 2427 regimmed_set(CX,0); // note that now CX == 0 2428 } 2429 cdb.append(ce); 2430 } 2431 } 2432 2433 // If we want the result, we must load it from the EA 2434 // into a register. 2435 2436 if (sz == 2 * REGSIZE && *pretregs) 2437 { 2438 retregs = *pretregs & (ALLREGS | mBP); 2439 if (retregs) 2440 { 2441 retregs &= ~idxregm(&cs); 2442 allocreg(cdb,&retregs,®,tym); 2443 cs.Iop = LOD; 2444 2445 // be careful not to trash any index regs 2446 // do MSW first (which can't be an index reg) 2447 getlvalue_msw(&cs); 2448 NEWREG(cs.Irm,reg); 2449 cdb.gen(&cs); 2450 getlvalue_lsw(&cs); 2451 reg = findreglsw(retregs); 2452 NEWREG(cs.Irm,reg); 2453 cdb.gen(&cs); 2454 if (*pretregs & mPSW) 2455 tstresult(cdb,retregs,tyml,true); 2456 } 2457 else // flags only 2458 { 2459 retregs = ALLREGS & ~idxregm(&cs); 2460 allocreg(cdb,&retregs,®,TYint); 2461 cs.Iop = LOD; 2462 NEWREG(cs.Irm,reg); 2463 cdb.gen(&cs); // MOV reg,EA 2464 cs.Iop = 0x0B; // OR reg,EA+2 2465 cs.Iflags |= CFpsw; 2466 getlvalue_msw(&cs); 2467 cdb.gen(&cs); 2468 } 2469 } 2470 if (e1.Ecount && !(retregs & regcon.mvar)) // if lvalue is a CSE 2471 cssave(e1,retregs,!OTleaf(e1.Eoper)); 2472 freenode(e1); 2473 *pretregs = retregs; 2474 return; 2475 } 2476 else // else must evaluate in register 2477 { 2478 if (sz <= REGSIZE) 2479 { 2480 regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs); 2481 if (isbyte) 2482 possregs &= BYTEREGS; 2483 retregs = *pretregs & possregs; 2484 if (retregs == 0) 2485 retregs = possregs; 2486 allocreg(cdb,&retregs,®,tym); 2487 cs.Iop = LOD ^ isbyte; 2488 code_newreg(&cs, reg); 2489 if (isbyte && I64 && (reg >= 4)) 2490 cs.Irex |= REX; 2491 cdb.gen(&cs); // MOV reg,EA 2492 if (!I16) 2493 { 2494 assert(!isbyte || (mask(reg) & BYTEREGS)); 2495 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt); 2496 if (isbyte && I64 && (reg >= 4)) 2497 cdb.last().Irex |= REX; 2498 code_orrex(cdb.last(), rex); 2499 // We can do a 32 bit shift on a 16 bit operand if 2500 // it's a left shift and we're not concerned about 2501 // the flags. Remember that flags are not set if 2502 // a shift of 0 occurs. 2503 if (_tysize[tym] == SHORTSIZE && 2504 (oper == OPshrass || oper == OPashrass || 2505 (*pretregs & mPSW && conste2))) 2506 cdb.last().Iflags |= CFopsize; // 16 bit operand 2507 } 2508 else 2509 { 2510 while (loopcnt--) 2511 { // Generate shift instructions. 2512 cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt); 2513 } 2514 } 2515 if (*pretregs & mPSW && conste2) 2516 { 2517 assert(shiftcnt); 2518 *pretregs &= ~mPSW; // result is already in flags 2519 code_orflag(cdb.last(),CFpsw); 2520 } 2521 2522 opAssStoreReg(cdb,cs,e,reg,pretregs); 2523 return; 2524 } 2525 assert(0); 2526 } 2527 } 2528 2529 2530 /********************************** 2531 * Generate code for compares. 2532 * Handles lt,gt,le,ge,eqeq,ne for all data types. 2533 */ 2534 2535 @trusted 2536 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2537 { 2538 regm_t retregs,rretregs; 2539 reg_t reg,rreg; 2540 int fl; 2541 2542 //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs)); 2543 // Collect extra parameter. This is pretty ugly... 2544 int flag = cdcmp_flag; 2545 cdcmp_flag = 0; 2546 2547 elem *e1 = e.EV.E1; 2548 elem *e2 = e.EV.E2; 2549 if (*pretregs == 0) // if don't want result 2550 { 2551 codelem(cdb,e1,pretregs,false); 2552 *pretregs = 0; // in case e1 changed it 2553 codelem(cdb,e2,pretregs,false); 2554 return; 2555 } 2556 2557 if (tyvector(tybasic(e1.Ety))) 2558 return orthxmm(cdb,e,pretregs); 2559 2560 uint jop = jmpopcode(e); // must be computed before 2561 // leaves are free'd 2562 uint reverse = 0; 2563 2564 OPER op = e.Eoper; 2565 assert(OTrel(op)); 2566 bool eqorne = (op == OPeqeq) || (op == OPne); 2567 2568 tym_t tym = tybasic(e1.Ety); 2569 uint sz = _tysize[tym]; 2570 uint isbyte = sz == 1; 2571 2572 uint rex = (I64 && sz == 8) ? REX_W : 0; 2573 uint grex = rex << 16; // 64 bit operands 2574 2575 code cs; 2576 code *ce; 2577 if (tyfloating(tym)) // if floating operation 2578 { 2579 if (config.fpxmmregs) 2580 { 2581 retregs = mPSW; 2582 if (tyxmmreg(tym)) 2583 orthxmm(cdb,e,&retregs); 2584 else 2585 orth87(cdb,e,&retregs); 2586 } 2587 else if (config.inline8087) 2588 { retregs = mPSW; 2589 orth87(cdb,e,&retregs); 2590 } 2591 else 2592 { 2593 if (config.exe & EX_windos) 2594 { 2595 int clib; 2596 2597 retregs = 0; /* skip result for now */ 2598 if (iffalse(e2)) /* second operand is constant 0 */ 2599 { 2600 assert(!eqorne); /* should be OPbool or OPnot */ 2601 if (tym == TYfloat) 2602 { 2603 retregs = FLOATREGS; 2604 clib = CLIB.ftst0; 2605 } 2606 else 2607 { 2608 retregs = DOUBLEREGS; 2609 clib = CLIB.dtst0; 2610 } 2611 if (rel_exception(op)) 2612 clib += CLIB.dtst0exc - CLIB.dtst0; 2613 codelem(cdb,e1,&retregs,false); 2614 retregs = 0; 2615 callclib(cdb,e,clib,&retregs,0); 2616 freenode(e2); 2617 } 2618 else 2619 { 2620 clib = CLIB.dcmp; 2621 if (rel_exception(op)) 2622 clib += CLIB.dcmpexc - CLIB.dcmp; 2623 opdouble(cdb,e,&retregs,clib); 2624 } 2625 } 2626 else 2627 { 2628 assert(0); 2629 } 2630 } 2631 goto L3; 2632 } 2633 2634 /* If it's a signed comparison of longs, we have to call a library */ 2635 /* routine, because we don't know the target of the signed branch */ 2636 /* (have to set up flags so that jmpopcode() will do it right) */ 2637 if (!eqorne && 2638 (I16 && tym == TYlong && tybasic(e2.Ety) == TYlong || 2639 I32 && tym == TYllong && tybasic(e2.Ety) == TYllong) 2640 ) 2641 { 2642 assert(jop != JC && jop != JNC); 2643 retregs = mDX | mAX; 2644 codelem(cdb,e1,&retregs,false); 2645 retregs = mCX | mBX; 2646 scodelem(cdb,e2,&retregs,mDX | mAX,false); 2647 2648 if (I16) 2649 { 2650 retregs = 0; 2651 callclib(cdb,e,CLIB.lcmp,&retregs,0); // gross, but it works 2652 } 2653 else 2654 { 2655 /* Generate: 2656 * CMP EDX,ECX 2657 * JNE C1 2658 * XOR EDX,EDX 2659 * CMP EAX,EBX 2660 * JZ C1 2661 * JA C3 2662 * DEC EDX 2663 * JMP C1 2664 * C3: INC EDX 2665 * C1: 2666 */ 2667 getregs(cdb,mDX); 2668 genregs(cdb,0x39,CX,DX); // CMP EDX,ECX 2669 code *c1 = gennop(null); 2670 genjmp(cdb,JNE,FLcode,cast(block *)c1); // JNE C1 2671 movregconst(cdb,DX,0,0); // XOR EDX,EDX 2672 genregs(cdb,0x39,BX,AX); // CMP EAX,EBX 2673 genjmp(cdb,JE,FLcode,cast(block *)c1); // JZ C1 2674 code *c3 = gen1(null,0x40 + DX); // INC EDX 2675 genjmp(cdb,JA,FLcode,cast(block *)c3); // JA C3 2676 cdb.gen1(0x48 + DX); // DEC EDX 2677 genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1 2678 cdb.append(c3); 2679 cdb.append(c1); 2680 getregs(cdb,mDX); 2681 retregs = mPSW; 2682 } 2683 goto L3; 2684 } 2685 2686 /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC 2687 * (This is already reflected in the jop) 2688 */ 2689 if ((jop == JC || jop == JNC) && 2690 (op == OPgt || op == OPle) && 2691 (tyuns(tym) || tyuns(e2.Ety)) 2692 ) 2693 { // jmpopcode() sez comparison should be reversed 2694 assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst); 2695 reverse ^= 2; 2696 } 2697 2698 /* See if we should swap operands */ 2699 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 2700 { 2701 e1 = e.EV.E2; 2702 e2 = e.EV.E1; 2703 reverse ^= 2; 2704 } 2705 2706 retregs = allregs; 2707 if (isbyte) 2708 retregs = BYTEREGS; 2709 2710 ce = null; 2711 cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 2712 cs.Irex = cast(ubyte)rex; 2713 if (sz > REGSIZE) 2714 ce = gennop(ce); 2715 2716 switch (e2.Eoper) 2717 { 2718 default: 2719 L2: 2720 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2721 rretregs = allregs & ~retregs; 2722 if (isbyte) 2723 rretregs &= BYTEREGS; 2724 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 2725 if (sz <= REGSIZE) // CMP reg,rreg 2726 { 2727 reg = findreg(retregs); // get reg that e1 is in 2728 rreg = findreg(rretregs); 2729 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg); 2730 code_orrex(cdb.last(), rex); 2731 if (!I16 && sz == SHORTSIZE) 2732 cdb.last().Iflags |= CFopsize; // compare only 16 bits 2733 if (I64 && isbyte && (reg >= 4 || rreg >= 4)) 2734 cdb.last().Irex |= REX; // address byte registers 2735 } 2736 else 2737 { 2738 assert(sz <= 2 * REGSIZE); 2739 2740 // Compare MSW, if they're equal then compare the LSW 2741 reg = findregmsw(retregs); 2742 rreg = findregmsw(rretregs); 2743 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2744 if (I32 && sz == 6) 2745 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2746 else if (I64) 2747 code_orrex(cdb.last(), REX_W); 2748 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2749 2750 reg = findreglsw(retregs); 2751 rreg = findreglsw(rretregs); 2752 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2753 if (I64) 2754 code_orrex(cdb.last(), REX_W); 2755 } 2756 break; 2757 2758 case OPrelconst: 2759 if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) 2760 goto L2; 2761 fl = el_fl(e2); 2762 switch (fl) 2763 { 2764 case FLfunc: 2765 fl = FLextern; // so it won't be self-relative 2766 break; 2767 2768 case FLdata: 2769 case FLudata: 2770 case FLextern: 2771 if (sz > REGSIZE) // compare against DS, not DGROUP 2772 goto L2; 2773 break; 2774 2775 case FLfardata: 2776 break; 2777 2778 default: 2779 goto L2; 2780 } 2781 cs.IFL2 = cast(ubyte)fl; 2782 cs.IEV2.Vsym = e2.EV.Vsym; 2783 if (sz > REGSIZE) 2784 { 2785 cs.Iflags |= CFseg; 2786 cs.IEV2.Voffset = 0; 2787 } 2788 else 2789 { 2790 cs.Iflags |= CFoff; 2791 cs.IEV2.Voffset = e2.EV.Voffset; 2792 } 2793 goto L4; 2794 2795 case OPconst: 2796 // If compare against 0 2797 if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) && 2798 isregvar(e1,&retregs,®) 2799 ) 2800 { // Just do a TEST instruction 2801 genregs(cdb,0x85 ^ isbyte,reg,reg); // TEST reg,reg 2802 cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw; 2803 code_orrex(cdb.last(), rex); 2804 if (I64 && isbyte && reg >= 4) 2805 cdb.last().Irex |= REX; // address byte registers 2806 retregs = mPSW; 2807 break; 2808 } 2809 2810 if (!tyuns(tym) && !tyuns(e2.Ety) && 2811 !boolres(e2) && !(*pretregs & mPSW) && 2812 (sz == REGSIZE || (I64 && sz == 4)) && 2813 (!I16 || op == OPlt || op == OPge)) 2814 { 2815 assert(*pretregs & (allregs)); 2816 codelem(cdb,e1,pretregs,false); 2817 reg = findreg(*pretregs); 2818 getregs(cdb,mask(reg)); 2819 switch (op) 2820 { 2821 case OPle: 2822 cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1); // ADD reg,-1 2823 code_orflag(cdb.last(), CFpsw); 2824 cdb.genc2(0x81,grex | modregrmx(3,2,reg),0); // ADC reg,0 2825 goto oplt; 2826 2827 case OPgt: 2828 cdb.gen2(0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2829 /* Flips the sign bit unless the value is 0 or int.min. 2830 Also sets the carry bit when the value is not 0. */ 2831 code_orflag(cdb.last(), CFpsw); 2832 cdb.genc2(0x81,grex | modregrmx(3,3,reg),0); // SBB reg,0 2833 /* Subtracts the carry bit. This turns int.min into 2834 int.max, flipping the sign bit. 2835 For other negative and positive values, subtracting 1 2836 doesn't affect the sign bit. 2837 For 0, the carry bit is not set, so this does nothing 2838 and the sign bit is not affected. */ 2839 goto oplt; 2840 2841 case OPlt: 2842 oplt: 2843 // Get the sign bit, i.e. 1 if the value is negative. 2844 if (!I16) 2845 cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31 2846 else 2847 { /* 8088-286 do not have a barrel shifter, so use this 2848 faster sequence 2849 */ 2850 genregs(cdb,0xD1,0,reg); // ROL reg,1 2851 reg_t regi; 2852 if (reghasvalue(allregs,1,®i)) 2853 genregs(cdb,0x23,reg,regi); // AND reg,regi 2854 else 2855 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1 2856 } 2857 break; 2858 2859 case OPge: 2860 genregs(cdb,0xD1,4,reg); // SHL reg,1 2861 code_orrex(cdb.last(),rex); 2862 code_orflag(cdb.last(), CFpsw); 2863 genregs(cdb,0x19,reg,reg); // SBB reg,reg 2864 code_orrex(cdb.last(),rex); 2865 if (I64) 2866 { 2867 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 2868 code_orrex(cdb.last(), rex); 2869 } 2870 else 2871 cdb.gen1(0x40 + reg); // INC reg 2872 break; 2873 2874 default: 2875 assert(0); 2876 } 2877 freenode(e2); 2878 goto ret; 2879 } 2880 2881 cs.IFL2 = FLconst; 2882 if (sz == 16) 2883 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.hi; 2884 else if (sz > REGSIZE) 2885 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); 2886 else 2887 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong; 2888 2889 // The cmp immediate relies on sign extension of the 32 bit immediate value 2890 if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint) 2891 goto L2; 2892 L4: 2893 cs.Iop = 0x81 ^ isbyte; 2894 2895 /* if ((e1 is data or a '*' reference) and it's not a 2896 * common subexpression 2897 */ 2898 2899 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 2900 e1.Eoper == OPind) && 2901 !evalinregister(e1)) 2902 { 2903 getlvalue(cdb,&cs,e1,RMload); 2904 freenode(e1); 2905 if (evalinregister(e2)) 2906 { 2907 retregs = idxregm(&cs); 2908 if ((cs.Iflags & CFSEG) == CFes) 2909 retregs |= mES; // take no chances 2910 rretregs = allregs & ~retregs; 2911 if (isbyte) 2912 rretregs &= BYTEREGS; 2913 scodelem(cdb,e2,&rretregs,retregs,true); 2914 cs.Iop = 0x39 ^ isbyte ^ reverse; 2915 if (sz > REGSIZE) 2916 { 2917 rreg = findregmsw(rretregs); 2918 cs.Irm |= modregrm(0,rreg,0); 2919 getlvalue_msw(&cs); 2920 cdb.gen(&cs); // CMP EA+2,rreg 2921 if (I32 && sz == 6) 2922 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2923 if (I64 && isbyte && rreg >= 4) 2924 cdb.last().Irex |= REX; 2925 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2926 rreg = findreglsw(rretregs); 2927 NEWREG(cs.Irm,rreg); 2928 getlvalue_lsw(&cs); 2929 } 2930 else 2931 { 2932 rreg = findreg(rretregs); 2933 code_newreg(&cs, rreg); 2934 if (I64 && isbyte && rreg >= 4) 2935 cs.Irex |= REX; 2936 } 2937 } 2938 else 2939 { 2940 cs.Irm |= modregrm(0,7,0); 2941 if (sz > REGSIZE) 2942 { 2943 if (sz == 6) 2944 assert(0); 2945 if (e2.Eoper == OPrelconst) 2946 { cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg; 2947 cs.IEV2.Voffset = 0; 2948 } 2949 getlvalue_msw(&cs); 2950 cdb.gen(&cs); // CMP EA+2,const 2951 if (!I16 && sz == 6) 2952 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2953 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop 2954 if (e2.Eoper == OPconst) 2955 cs.IEV2.Vint = cast(int)e2.EV.Vllong; 2956 else if (e2.Eoper == OPrelconst) 2957 { // Turn off CFseg, on CFoff 2958 cs.Iflags ^= CFseg | CFoff; 2959 cs.IEV2.Voffset = e2.EV.Voffset; 2960 } 2961 else 2962 assert(0); 2963 getlvalue_lsw(&cs); 2964 } 2965 freenode(e2); 2966 } 2967 cdb.gen(&cs); 2968 break; 2969 } 2970 2971 if (evalinregister(e2) && !OTassign(e1.Eoper) && 2972 !isregvar(e1,null,null)) 2973 { 2974 regm_t m; 2975 2976 m = allregs & ~regcon.mvar; 2977 if (isbyte) 2978 m &= BYTEREGS; 2979 if (m & (m - 1)) // if more than one free register 2980 goto L2; 2981 } 2982 if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) && 2983 !boolres(e2) && !evalinregister(e1)) 2984 { 2985 retregs = mPSW; 2986 scodelem(cdb,e1,&retregs,0,false); 2987 freenode(e2); 2988 break; 2989 } 2990 if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW) 2991 { 2992 retregs |= mPSW; 2993 scodelem(cdb,e1,&retregs,0,false); 2994 freenode(e2); 2995 break; 2996 } 2997 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2998 if (sz == 1) 2999 { 3000 reg = findreg(retregs & allregs); // get reg that e1 is in 3001 cs.Irm = modregrm(3,7,reg & 7); 3002 if (reg & 8) 3003 cs.Irex |= REX_B; 3004 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg) 3005 { assert(reg < 4); 3006 cs.Irm |= 4; // use upper register half 3007 } 3008 if (I64 && reg >= 4) 3009 cs.Irex |= REX; // address byte registers 3010 } 3011 else if (sz <= REGSIZE) 3012 { // CMP reg,const 3013 reg = findreg(retregs & allregs); // get reg that e1 is in 3014 rretregs = allregs & ~retregs; 3015 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg)) 3016 { 3017 genregs(cdb,0x3B,reg,rreg); 3018 code_orrex(cdb.last(), rex); 3019 if (!I16) 3020 cdb.last().Iflags |= cs.Iflags & CFopsize; 3021 freenode(e2); 3022 break; 3023 } 3024 cs.Irm = modregrm(3,7,reg & 7); 3025 if (reg & 8) 3026 cs.Irex |= REX_B; 3027 } 3028 else if (sz <= 2 * REGSIZE) 3029 { 3030 reg = findregmsw(retregs); // get reg that e1 is in 3031 cs.Irm = modregrm(3,7,reg); 3032 cdb.gen(&cs); // CMP reg,MSW 3033 if (I32 && sz == 6) 3034 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 3035 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 3036 3037 reg = findreglsw(retregs); 3038 cs.Irm = modregrm(3,7,reg); 3039 if (e2.Eoper == OPconst) 3040 cs.IEV2.Vint = e2.EV.Vlong; 3041 else if (e2.Eoper == OPrelconst) 3042 { // Turn off CFseg, on CFoff 3043 cs.Iflags ^= CFseg | CFoff; 3044 cs.IEV2.Voffset = e2.EV.Voffset; 3045 } 3046 else 3047 assert(0); 3048 } 3049 else 3050 assert(0); 3051 cdb.gen(&cs); // CMP sucreg,LSW 3052 freenode(e2); 3053 break; 3054 3055 case OPind: 3056 if (e2.Ecount) 3057 goto L2; 3058 goto L5; 3059 3060 case OPvar: 3061 if (config.exe & (EX_OSX | EX_OSX64)) 3062 { 3063 if (movOnly(e2)) 3064 goto L2; 3065 } 3066 if ((e1.Eoper == OPvar && 3067 isregvar(e2,&rretregs,®) && 3068 sz <= REGSIZE 3069 ) || 3070 (e1.Eoper == OPind && 3071 isregvar(e2,&rretregs,®) && 3072 !evalinregister(e1) && 3073 sz <= REGSIZE 3074 ) 3075 ) 3076 { 3077 // CMP EA,e2 3078 getlvalue(cdb,&cs,e1,RMload); 3079 freenode(e1); 3080 cs.Iop = 0x39 ^ isbyte ^ reverse; 3081 code_newreg(&cs,reg); 3082 if (I64 && isbyte && reg >= 4) 3083 cs.Irex |= REX; // address byte registers 3084 cdb.gen(&cs); 3085 freenode(e2); 3086 break; 3087 } 3088 L5: 3089 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3090 if (sz <= REGSIZE) // CMP reg,EA 3091 { 3092 reg = findreg(retregs & allregs); // get reg that e1 is in 3093 uint opsize = cs.Iflags & CFopsize; 3094 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0); 3095 code_orflag(cdb.last(),opsize); 3096 } 3097 else if (sz <= 2 * REGSIZE) 3098 { 3099 reg = findregmsw(retregs); // get reg that e1 is in 3100 // CMP reg,EA 3101 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0); 3102 if (I32 && sz == 6) 3103 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 3104 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 3105 reg = findreglsw(retregs); 3106 if (e2.Eoper == OPind) 3107 { 3108 NEWREG(cs.Irm,reg); 3109 getlvalue_lsw(&cs); 3110 cdb.gen(&cs); 3111 } 3112 else 3113 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0); 3114 } 3115 else 3116 assert(0); 3117 freenode(e2); 3118 break; 3119 } 3120 cdb.append(ce); 3121 3122 L3: 3123 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 3124 { 3125 if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00)) 3126 { 3127 regm_t resregs = retregs; 3128 if (!I64) 3129 { 3130 resregs &= BYTEREGS; 3131 if (!resregs) 3132 resregs = BYTEREGS; 3133 } 3134 allocreg(cdb,&resregs,®,TYint); 3135 cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg 3136 if (I64 && reg >= 4) 3137 code_orrex(cdb.last(),REX); 3138 if (tysize(e.Ety) > 1) 3139 { 3140 genregs(cdb,MOVZXb,reg,reg); // MOVZX reg,reg 3141 if (I64 && sz == 8) 3142 code_orrex(cdb.last(),REX_W); 3143 if (I64 && reg >= 4) 3144 code_orrex(cdb.last(),REX); 3145 } 3146 *pretregs &= ~mPSW; 3147 fixresult(cdb,e,resregs,pretregs); 3148 } 3149 else 3150 { 3151 code *nop = null; 3152 regm_t save = regcon.immed.mval; 3153 allocreg(cdb,&retregs,®,TYint); 3154 regcon.immed.mval = save; 3155 if ((*pretregs & mPSW) == 0 && 3156 (jop == JC || jop == JNC)) 3157 { 3158 getregs(cdb,retregs); 3159 genregs(cdb,0x19,reg,reg); // SBB reg,reg 3160 if (rex || flag & REX_W) 3161 code_orrex(cdb.last(), REX_W); 3162 if (flag) 3163 { } // cdcond() will handle it 3164 else if (jop == JNC) 3165 { 3166 if (I64) 3167 { 3168 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 3169 code_orrex(cdb.last(), rex); 3170 } 3171 else 3172 cdb.gen1(0x40 + reg); // INC reg 3173 } 3174 else 3175 { 3176 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 3177 code_orrex(cdb.last(), rex); 3178 } 3179 } 3180 else if (I64 && sz == 8) 3181 { 3182 assert(!flag); 3183 movregconst(cdb,reg,1,64|8); // MOV reg,1 3184 nop = gennop(nop); 3185 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 3186 // MOV reg,0 3187 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64); 3188 regcon.immed.mval &= ~mask(reg); 3189 } 3190 else 3191 { 3192 assert(!flag); 3193 movregconst(cdb,reg,1,8); // MOV reg,1 3194 nop = gennop(nop); 3195 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 3196 // MOV reg,0 3197 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0); 3198 regcon.immed.mval &= ~mask(reg); 3199 } 3200 *pretregs = retregs; 3201 cdb.append(nop); 3202 } 3203 } 3204 ret: 3205 { } 3206 } 3207 3208 3209 /********************************** 3210 * Generate code for signed compare of longs. 3211 * Input: 3212 * targ block* or code* 3213 */ 3214 3215 @trusted 3216 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ) 3217 { 3218 // <= > < >= 3219 static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ]; 3220 static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ]; 3221 3222 //printf("longcmp(e = %p)\n", e); 3223 elem *e1 = e.EV.E1; 3224 elem *e2 = e.EV.E2; 3225 OPER op = e.Eoper; 3226 3227 // See if we should swap operands 3228 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 3229 { 3230 e1 = e.EV.E2; 3231 e2 = e.EV.E1; 3232 op = swaprel(op); 3233 } 3234 3235 code cs; 3236 cs.Iflags = 0; 3237 cs.Irex = 0; 3238 3239 code *ce = gennop(null); 3240 regm_t retregs = ALLREGS; 3241 regm_t rretregs; 3242 reg_t reg,rreg; 3243 3244 uint jop = jopmsw[op - OPle]; 3245 if (!(jcond & 1)) jop ^= (JL ^ JG); // toggle jump condition 3246 CodeBuilder cdbjmp; 3247 cdbjmp.ctor(); 3248 genjmp(cdbjmp,jop,fltarg, cast(block *) targ); // Jx targ 3249 genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce); // Jy nop 3250 3251 switch (e2.Eoper) 3252 { 3253 default: 3254 L2: 3255 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3256 rretregs = ALLREGS & ~retregs; 3257 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 3258 cse_flush(cdb,1); 3259 // Compare MSW, if they're equal then compare the LSW 3260 reg = findregmsw(retregs); 3261 rreg = findregmsw(rretregs); 3262 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 3263 cdb.append(cdbjmp); 3264 3265 reg = findreglsw(retregs); 3266 rreg = findreglsw(rretregs); 3267 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 3268 break; 3269 3270 case OPconst: 3271 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); // MSW first 3272 cs.IFL2 = FLconst; 3273 cs.Iop = 0x81; 3274 3275 /* if ((e1 is data or a '*' reference) and it's not a 3276 * common subexpression 3277 */ 3278 3279 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 3280 e1.Eoper == OPind) && 3281 !evalinregister(e1)) 3282 { 3283 getlvalue(cdb,&cs,e1,0); 3284 freenode(e1); 3285 if (evalinregister(e2)) 3286 { 3287 retregs = idxregm(&cs); 3288 if ((cs.Iflags & CFSEG) == CFes) 3289 retregs |= mES; // take no chances 3290 rretregs = ALLREGS & ~retregs; 3291 scodelem(cdb,e2,&rretregs,retregs,true); 3292 cse_flush(cdb,1); 3293 rreg = findregmsw(rretregs); 3294 cs.Iop = 0x39; 3295 cs.Irm |= modregrm(0,rreg,0); 3296 getlvalue_msw(&cs); 3297 cdb.gen(&cs); // CMP EA+2,rreg 3298 cdb.append(cdbjmp); 3299 rreg = findreglsw(rretregs); 3300 NEWREG(cs.Irm,rreg); 3301 } 3302 else 3303 { 3304 cse_flush(cdb,1); 3305 cs.Irm |= modregrm(0,7,0); 3306 getlvalue_msw(&cs); 3307 cdb.gen(&cs); // CMP EA+2,const 3308 cdb.append(cdbjmp); 3309 cs.IEV2.Vint = e2.EV.Vlong; 3310 freenode(e2); 3311 } 3312 getlvalue_lsw(&cs); 3313 cdb.gen(&cs); // CMP EA,rreg/const 3314 break; 3315 } 3316 if (evalinregister(e2)) 3317 goto L2; 3318 3319 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3320 cse_flush(cdb,1); 3321 reg = findregmsw(retregs); // get reg that e1 is in 3322 cs.Irm = modregrm(3,7,reg); 3323 3324 cdb.gen(&cs); // CMP reg,MSW 3325 cdb.append(cdbjmp); 3326 reg = findreglsw(retregs); 3327 cs.Irm = modregrm(3,7,reg); 3328 cs.IEV2.Vint = e2.EV.Vlong; 3329 cdb.gen(&cs); // CMP sucreg,LSW 3330 freenode(e2); 3331 break; 3332 3333 case OPvar: 3334 if (!e1.Ecount && e1.Eoper == OPs32_64) 3335 { 3336 reg_t msreg; 3337 3338 retregs = allregs; 3339 scodelem(cdb,e1.EV.E1,&retregs,0,true); 3340 freenode(e1); 3341 reg = findreg(retregs); 3342 retregs = allregs & ~retregs; 3343 allocreg(cdb,&retregs,&msreg,TYint); 3344 genmovreg(cdb,msreg,reg); // MOV msreg,reg 3345 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 3346 cse_flush(cdb,1); 3347 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0); 3348 cdb.append(cdbjmp); 3349 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0); 3350 freenode(e2); 3351 } 3352 else 3353 { 3354 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3355 cse_flush(cdb,1); 3356 reg = findregmsw(retregs); // get reg that e1 is in 3357 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0); 3358 cdb.append(cdbjmp); 3359 reg = findreglsw(retregs); 3360 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0); 3361 freenode(e2); 3362 } 3363 break; 3364 } 3365 3366 jop = joplsw[op - OPle]; 3367 if (!(jcond & 1)) jop ^= 1; // toggle jump condition 3368 genjmp(cdb,jop,fltarg,cast(block *) targ); // Jcond targ 3369 3370 cdb.append(ce); 3371 freenode(e); 3372 } 3373 3374 /***************************** 3375 * Do conversions. 3376 * Depends on OPd_s32 and CLIB.dbllng being in sequence. 3377 */ 3378 3379 @trusted 3380 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 3381 { 3382 //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs)); 3383 //elem_print(e); 3384 3385 static immutable ubyte[2][16] clib = 3386 [ 3387 [ OPd_s32, CLIB.dbllng ], 3388 [ OPs32_d, CLIB.lngdbl ], 3389 [ OPd_s16, CLIB.dblint ], 3390 [ OPs16_d, CLIB.intdbl ], 3391 [ OPd_u16, CLIB.dbluns ], 3392 [ OPu16_d, CLIB.unsdbl ], 3393 [ OPd_u32, CLIB.dblulng ], 3394 [ OPu32_d, CLIB.ulngdbl ], 3395 [ OPd_s64, CLIB.dblllng ], 3396 [ OPs64_d, CLIB.llngdbl ], 3397 [ OPd_u64, CLIB.dblullng ], 3398 [ OPu64_d, CLIB.ullngdbl ], 3399 [ OPd_f, CLIB.dblflt ], 3400 [ OPf_d, CLIB.fltdbl ], 3401 [ OPvp_fp, CLIB.vptrfptr ], 3402 [ OPcvp_fp, CLIB.cvptrfptr] 3403 ]; 3404 3405 if (!*pretregs) 3406 { 3407 codelem(cdb,e.EV.E1,pretregs,false); 3408 return; 3409 } 3410 3411 regm_t retregs; 3412 if (config.inline8087) 3413 { 3414 switch (e.Eoper) 3415 { 3416 case OPld_d: 3417 case OPd_ld: 3418 { 3419 if (tycomplex(e.EV.E1.Ety)) 3420 { 3421 Lcomplex: 3422 regm_t retregsx = mST01 | (*pretregs & mPSW); 3423 codelem(cdb,e.EV.E1, &retregsx, false); 3424 fixresult_complex87(cdb, e, retregsx, pretregs); 3425 return; 3426 } 3427 regm_t retregsx = mST0 | (*pretregs & mPSW); 3428 codelem(cdb,e.EV.E1, &retregsx, false); 3429 fixresult87(cdb, e, retregsx, pretregs); 3430 return; 3431 } 3432 3433 case OPf_d: 3434 case OPd_f: 3435 if (tycomplex(e.EV.E1.Ety)) 3436 goto Lcomplex; 3437 if (config.fpxmmregs && *pretregs & XMMREGS) 3438 { 3439 xmmcnvt(cdb, e, pretregs); 3440 return; 3441 } 3442 3443 /* if won't do us much good to transfer back and */ 3444 /* forth between 8088 registers and 8087 registers */ 3445 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs)) 3446 { 3447 retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety); 3448 if (retregs & (mXMM1 | mXMM0 |mST01 | mST0)) // if return in ST0 3449 { 3450 codelem(cdb,e.EV.E1,pretregs,false); 3451 if (*pretregs & mST0) 3452 note87(e, 0, 0); 3453 return; 3454 } 3455 else 3456 break; 3457 } 3458 goto Lload87; 3459 3460 case OPs64_d: 3461 if (!I64) 3462 goto Lload87; 3463 goto case OPs32_d; 3464 3465 case OPs32_d: 3466 if (config.fpxmmregs && *pretregs & XMMREGS) 3467 { 3468 xmmcnvt(cdb, e, pretregs); 3469 return; 3470 } 3471 goto Lload87; 3472 3473 case OPs16_d: 3474 case OPu16_d: 3475 Lload87: 3476 load87(cdb,e,0,pretregs,null,-1); 3477 return; 3478 3479 case OPu32_d: 3480 if (I64 && config.fpxmmregs && *pretregs & XMMREGS) 3481 { 3482 xmmcnvt(cdb,e,pretregs); 3483 return; 3484 } 3485 else if (!I16) 3486 { 3487 regm_t retregsx = ALLREGS; 3488 codelem(cdb,e.EV.E1, &retregsx, false); 3489 reg_t reg = findreg(retregsx); 3490 cdb.genfltreg(STO, reg, 0); 3491 regwithvalue(cdb,ALLREGS,0,®,0); 3492 cdb.genfltreg(STO, reg, 4); 3493 3494 push87(cdb); 3495 cdb.genfltreg(0xDF,5,0); // FILD m64int 3496 3497 regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/; 3498 fixresult87(cdb, e, retregsy, pretregs); 3499 return; 3500 } 3501 break; 3502 3503 case OPd_s64: 3504 if (!I64) 3505 goto Lcnvt87; 3506 goto case OPd_s32; 3507 3508 case OPd_s16: 3509 case OPd_s32: 3510 if (config.fpxmmregs) 3511 { 3512 xmmcnvt(cdb,e,pretregs); 3513 return; 3514 } 3515 goto Lcnvt87; 3516 3517 case OPd_u16: 3518 Lcnvt87: 3519 cnvt87(cdb,e,pretregs); 3520 return; 3521 3522 case OPd_u32: // use subroutine, not 8087 3523 if (I64 && config.fpxmmregs) 3524 { 3525 xmmcnvt(cdb,e,pretregs); 3526 return; 3527 } 3528 if (I32 || I64) 3529 { 3530 cdd_u32(cdb,e,pretregs); 3531 return; 3532 } 3533 if (config.exe & EX_posix) 3534 { 3535 retregs = mST0; 3536 } 3537 else 3538 { 3539 retregs = DOUBLEREGS; 3540 } 3541 goto L1; 3542 3543 case OPd_u64: 3544 if (I32 || I64) 3545 { 3546 cdd_u64(cdb,e,pretregs); 3547 return; 3548 } 3549 retregs = DOUBLEREGS; 3550 goto L1; 3551 3552 case OPu64_d: 3553 if (*pretregs & mST0) 3554 { 3555 regm_t retregsx = I64 ? mAX : mAX|mDX; 3556 codelem(cdb,e.EV.E1,&retregsx,false); 3557 callclib(cdb,e,CLIB.u64_ldbl,pretregs,0); 3558 return; 3559 } 3560 break; 3561 3562 case OPld_u64: 3563 { 3564 if (I32 || I64) 3565 { 3566 cdd_u64(cdb,e,pretregs); 3567 return; 3568 } 3569 regm_t retregsx = mST0; 3570 codelem(cdb,e.EV.E1,&retregsx,false); 3571 callclib(cdb,e,CLIB.ld_u64,pretregs,0); 3572 return; 3573 } 3574 3575 default: 3576 break; 3577 } 3578 } 3579 retregs = regmask(e.EV.E1.Ety, TYnfunc); 3580 L1: 3581 codelem(cdb,e.EV.E1,&retregs,false); 3582 for (int i = 0; 1; i++) 3583 { 3584 assert(i < clib.length); 3585 if (clib[i][0] == e.Eoper) 3586 { 3587 callclib(cdb,e,clib[i][1],pretregs,0); 3588 break; 3589 } 3590 } 3591 } 3592 3593 3594 /*************************** 3595 * Convert short to long. 3596 * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64, 3597 * OPu64_128, OPs64_128 3598 */ 3599 3600 @trusted 3601 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3602 { 3603 reg_t reg; 3604 regm_t retregs; 3605 3606 //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3607 int e1comsub = e.EV.E1.Ecount; 3608 ubyte op = e.Eoper; 3609 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 3610 { 3611 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 3612 return; 3613 } 3614 else if ( 3615 op == OPnp_fp || 3616 (I16 && op == OPu16_32) || 3617 (I32 && op == OPu32_64) || 3618 (I64 && op == OPu64_128) 3619 ) 3620 { 3621 /* Result goes into a register pair. 3622 * Zero extend by putting a zero into most significant reg. 3623 */ 3624 3625 regm_t retregsx = *pretregs & mLSW; 3626 assert(retregsx); 3627 tym_t tym1 = tybasic(e.EV.E1.Ety); 3628 codelem(cdb,e.EV.E1,&retregsx,false); 3629 3630 regm_t regm = *pretregs & (mMSW & ALLREGS); 3631 if (regm == 0) // *pretregs could be mES 3632 regm = mMSW & ALLREGS; 3633 allocreg(cdb,®m,®,TYint); 3634 if (e1comsub) 3635 getregs(cdb,retregsx); 3636 if (op == OPnp_fp) 3637 { 3638 int segreg; 3639 3640 // BUG: what about pointers to functions? 3641 switch (tym1) 3642 { 3643 case TYimmutPtr: 3644 case TYnptr: segreg = SEG_DS; break; 3645 case TYcptr: segreg = SEG_CS; break; 3646 case TYsptr: segreg = SEG_SS; break; 3647 default: assert(0); 3648 } 3649 cdb.gen2(0x8C,modregrm(3,segreg,reg)); // MOV reg,segreg 3650 } 3651 else 3652 movregconst(cdb,reg,0,0); // 0 extend 3653 3654 fixresult(cdb,e,retregsx | regm,pretregs); 3655 return; 3656 } 3657 else if (I64 && op == OPu32_64) 3658 { 3659 elem *e1 = e.EV.E1; 3660 retregs = *pretregs; 3661 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3662 { 3663 code cs; 3664 3665 allocreg(cdb,&retregs,®,TYint); 3666 loadea(cdb,e1,&cs,LOD,reg,0,retregs,retregs); // MOV Ereg,EA 3667 freenode(e1); 3668 } 3669 else 3670 { 3671 *pretregs &= ~mPSW; // flags are set by eval of e1 3672 codelem(cdb,e1,&retregs,false); 3673 /* Determine if high 32 bits are already 0 3674 */ 3675 if (e1.Eoper == OPu16_32 && !e1.Ecount) 3676 { 3677 } 3678 else 3679 { 3680 // Zero high 32 bits 3681 getregs(cdb,retregs); 3682 reg = findreg(retregs); 3683 // Don't use x89 because that will get optimized away 3684 genregs(cdb,LOD,reg,reg); // MOV Ereg,Ereg 3685 } 3686 } 3687 fixresult(cdb,e,retregs,pretregs); 3688 return; 3689 } 3690 else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount) 3691 { 3692 /* Due to how e1 is calculated, the high 32 bits of the register 3693 * are already 0. 3694 */ 3695 retregs = *pretregs; 3696 codelem(cdb,e.EV.E1,&retregs,false); 3697 fixresult(cdb,e,retregs,pretregs); 3698 return; 3699 } 3700 else if (!I16 && (op == OPs16_32 || op == OPu16_32) || 3701 I64 && op == OPs32_64) 3702 { 3703 elem *e11; 3704 elem *e1 = e.EV.E1; 3705 3706 if (e1.Eoper == OPu8_16 && !e1.Ecount && 3707 ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount)) 3708 ) 3709 { 3710 code cs; 3711 3712 retregs = *pretregs & BYTEREGS; 3713 if (!retregs) 3714 retregs = BYTEREGS; 3715 allocreg(cdb,&retregs,®,TYint); 3716 movregconst(cdb,reg,0,0); // XOR reg,reg 3717 loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs); // MOV regL,EA 3718 freenode(e11); 3719 freenode(e1); 3720 } 3721 else if (e1.Eoper == OPvar || 3722 (e1.Eoper == OPind && !e1.Ecount)) 3723 { 3724 code cs = void; 3725 3726 if (I32 && op == OPu16_32 && config.flags4 & CFG4speed) 3727 goto L2; 3728 retregs = *pretregs; 3729 allocreg(cdb,&retregs,®,TYint); 3730 const opcode = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,EA 3731 if (op == OPs32_64) 3732 { 3733 assert(I64); 3734 // MOVSXD reg,e1 3735 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs); 3736 code_orrex(cdb.last(), REX_W); 3737 } 3738 else 3739 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs); 3740 freenode(e1); 3741 } 3742 else 3743 { 3744 L2: 3745 retregs = *pretregs; 3746 if (op == OPs32_64) 3747 retregs = mAX | (*pretregs & mPSW); 3748 *pretregs &= ~mPSW; // flags are already set 3749 CodeBuilder cdbx; 3750 cdbx.ctor(); 3751 codelem(cdbx,e1,&retregs,false); 3752 code *cx = cdbx.finish(); 3753 cdb.append(cdbx); 3754 getregs(cdb,retregs); 3755 if (op == OPu16_32 && cx) 3756 { 3757 cx = code_last(cx); 3758 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) && 3759 mask(cx.Irm & 7) == retregs) 3760 { 3761 // Convert AND of a word to AND of a dword, zeroing upper word 3762 if (cx.Irex & REX_B) 3763 retregs = mask(8 | (cx.Irm & 7)); 3764 cx.Iflags &= ~CFopsize; 3765 cx.IEV2.Vint &= 0xFFFF; 3766 goto L1; 3767 } 3768 } 3769 if (op == OPs16_32 && retregs == mAX) 3770 cdb.gen1(0x98); // CWDE 3771 else if (op == OPs32_64 && retregs == mAX) 3772 { 3773 cdb.gen1(0x98); // CDQE 3774 code_orrex(cdb.last(), REX_W); 3775 } 3776 else 3777 { 3778 reg = findreg(retregs); 3779 if (config.flags4 & CFG4speed && op == OPu16_32) 3780 { // AND reg,0xFFFF 3781 cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu); 3782 } 3783 else 3784 { 3785 opcode_t iop = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,reg 3786 genregs(cdb,iop,reg,reg); 3787 } 3788 } 3789 L1: 3790 if (e1comsub) 3791 getregs(cdb,retregs); 3792 } 3793 fixresult(cdb,e,retregs,pretregs); 3794 return; 3795 } 3796 else if (*pretregs & mPSW || config.target_cpu < TARGET_80286) 3797 { 3798 // OPs16_32, OPs32_64 3799 // CWD doesn't affect flags, so we can depend on the integer 3800 // math to provide the flags. 3801 retregs = mAX | mPSW; // want integer result in AX 3802 *pretregs &= ~mPSW; // flags are already set 3803 codelem(cdb,e.EV.E1,&retregs,false); 3804 getregs(cdb,mDX); // sign extend into DX 3805 cdb.gen1(0x99); // CWD/CDQ 3806 if (e1comsub) 3807 getregs(cdb,retregs); 3808 fixresult(cdb,e,mDX | retregs,pretregs); 3809 return; 3810 } 3811 else 3812 { 3813 // OPs16_32, OPs32_64, OPs64_128 3814 uint msreg,lsreg; 3815 3816 retregs = *pretregs & mLSW; 3817 assert(retregs); 3818 codelem(cdb,e.EV.E1,&retregs,false); 3819 retregs |= *pretregs & mMSW; 3820 allocreg(cdb,&retregs,®,e.Ety); 3821 msreg = findregmsw(retregs); 3822 lsreg = findreglsw(retregs); 3823 genmovreg(cdb,msreg,lsreg); // MOV msreg,lsreg 3824 assert(config.target_cpu >= TARGET_80286); // 8088 can't handle SAR reg,imm8 3825 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 3826 fixresult(cdb,e,retregs,pretregs); 3827 return; 3828 } 3829 } 3830 3831 3832 /*************************** 3833 * Convert byte to int. 3834 * For OPu8_16 and OPs8_16. 3835 */ 3836 3837 @trusted 3838 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3839 { 3840 regm_t retregs; 3841 char size; 3842 3843 if ((*pretregs & (ALLREGS | mBP | XMMREGS)) == 0) // if don't need result in regs 3844 { 3845 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 3846 return; 3847 } 3848 3849 //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 3850 char op = e.Eoper; 3851 elem *e1 = e.EV.E1; 3852 if (e1.Eoper == OPcomma) 3853 docommas(cdb,&e1); 3854 if (!I16) 3855 { 3856 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3857 { 3858 code cs; 3859 3860 regm_t retregsx = *pretregs; 3861 reg_t reg; 3862 allocreg(cdb,&retregsx,®,TYint); 3863 if (config.flags4 & CFG4speed && 3864 op == OPu8_16 && mask(reg) & BYTEREGS && 3865 config.target_cpu < TARGET_PentiumPro) 3866 { 3867 movregconst(cdb,reg,0,0); // XOR reg,reg 3868 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); // MOV regL,EA 3869 } 3870 else 3871 { 3872 const opcode = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,EA 3873 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx); 3874 } 3875 freenode(e1); 3876 fixresult(cdb,e,retregsx,pretregs); 3877 return; 3878 } 3879 size = tysize(e.Ety); 3880 retregs = *pretregs & BYTEREGS; 3881 if (retregs == 0) 3882 retregs = BYTEREGS; 3883 retregs |= *pretregs & mPSW; 3884 *pretregs &= ~mPSW; 3885 } 3886 else 3887 { 3888 if (op == OPu8_16) // if uint conversion 3889 { 3890 retregs = *pretregs & BYTEREGS; 3891 if (retregs == 0) 3892 retregs = BYTEREGS; 3893 } 3894 else 3895 { 3896 // CBW doesn't affect flags, so we can depend on the integer 3897 // math to provide the flags. 3898 retregs = mAX | (*pretregs & mPSW); // want integer result in AX 3899 } 3900 } 3901 3902 CodeBuilder cdb1; 3903 cdb1.ctor(); 3904 codelem(cdb1,e1,&retregs,false); 3905 code *c1 = cdb1.finish(); 3906 cdb.append(cdb1); 3907 reg_t reg = findreg(retregs); 3908 code *c; 3909 if (!c1) 3910 goto L1; 3911 3912 // If previous instruction is an AND bytereg,value 3913 c = cdb.last(); 3914 if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) && 3915 (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0)) 3916 { 3917 if (*pretregs & mPSW) 3918 c.Iflags |= CFpsw; 3919 c.Iop |= 1; // convert to word operation 3920 c.IEV2.Vuns &= 0xFF; // dump any high order bits 3921 *pretregs &= ~mPSW; // flags already set 3922 } 3923 else 3924 { 3925 L1: 3926 if (!I16) 3927 { 3928 if (op == OPs8_16 && reg == AX && size == 2) 3929 { 3930 cdb.gen1(0x98); // CBW 3931 cdb.last().Iflags |= CFopsize; // don't do a CWDE 3932 } 3933 else 3934 { 3935 // We could do better by not forcing the src and dst 3936 // registers to be the same. 3937 3938 if (config.flags4 & CFG4speed && op == OPu8_16) 3939 { // AND reg,0xFF 3940 cdb.genc2(0x81,modregrmx(3,4,reg),0xFF); 3941 } 3942 else 3943 { 3944 opcode_t iop = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,reg 3945 genregs(cdb,iop,reg,reg); 3946 if (I64 && reg >= 4) 3947 code_orrex(cdb.last(), REX); 3948 } 3949 } 3950 } 3951 else 3952 { 3953 if (op == OPu8_16) 3954 genregs(cdb,0x30,reg+4,reg+4); // XOR regH,regH 3955 else 3956 { 3957 cdb.gen1(0x98); // CBW 3958 *pretregs &= ~mPSW; // flags already set 3959 } 3960 } 3961 } 3962 getregs(cdb,retregs); 3963 fixresult(cdb,e,retregs,pretregs); 3964 } 3965 3966 3967 /*************************** 3968 * Convert long to short (OP32_16). 3969 * Get offset of far pointer (OPoffset). 3970 * Convert int to byte (OP16_8). 3971 * Convert long long to long (OP64_32). 3972 * OP128_64 3973 */ 3974 3975 @trusted 3976 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3977 { 3978 debug 3979 { 3980 switch (e.Eoper) 3981 { 3982 case OP32_16: 3983 case OPoffset: 3984 case OP16_8: 3985 case OP64_32: 3986 case OP128_64: 3987 break; 3988 3989 default: 3990 assert(0); 3991 } 3992 } 3993 3994 regm_t retregs; 3995 if (e.Eoper == OP16_8) 3996 { 3997 retregs = *pretregs ? BYTEREGS : 0; 3998 codelem(cdb,e.EV.E1,&retregs,false); 3999 } 4000 else 4001 { 4002 if (e.EV.E1.Eoper == OPrelconst) 4003 offsetinreg(cdb,e.EV.E1,&retregs); 4004 else 4005 { 4006 retregs = *pretregs ? ALLREGS : 0; 4007 codelem(cdb,e.EV.E1,&retregs,false); 4008 bool isOff = e.Eoper == OPoffset; 4009 if (I16 || 4010 I32 && (isOff || e.Eoper == OP64_32) || 4011 I64 && (isOff || e.Eoper == OP128_64)) 4012 retregs &= mLSW; // want LSW only 4013 } 4014 } 4015 4016 /* We "destroy" a reg by assigning it the result of a new e, even 4017 * though the values are the same. Weakness of our CSE strategy that 4018 * a register can only hold the contents of one elem at a time. 4019 */ 4020 if (e.Ecount) 4021 getregs(cdb,retregs); 4022 else 4023 useregs(retregs); 4024 4025 debug 4026 if (!(!*pretregs || retregs)) 4027 { 4028 printf("%s *pretregs = %s, retregs = %s, e = %p\n",oper_str(e.Eoper),regm_str(*pretregs),regm_str(retregs),e); 4029 } 4030 4031 assert(!*pretregs || retregs); 4032 fixresult(cdb,e,retregs,pretregs); // lsw only 4033 } 4034 4035 /********************************************** 4036 * Get top 32 bits of 64 bit value (I32) 4037 * or top 16 bits of 32 bit value (I16) 4038 * or top 64 bits of 128 bit value (I64). 4039 * OPmsw 4040 */ 4041 4042 @trusted 4043 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4044 { 4045 assert(e.Eoper == OPmsw); 4046 4047 regm_t retregs = *pretregs ? ALLREGS : 0; 4048 codelem(cdb,e.EV.E1,&retregs,false); 4049 retregs &= mMSW; // want MSW only 4050 4051 /* We "destroy" a reg by assigning it the result of a new e, even 4052 * though the values are the same. Weakness of our CSE strategy that 4053 * a register can only hold the contents of one elem at a time. 4054 */ 4055 if (e.Ecount) 4056 getregs(cdb,retregs); 4057 else 4058 useregs(retregs); 4059 4060 debug 4061 if (!(!*pretregs || retregs)) 4062 { 4063 printf("%s *pretregs = %s, retregs = %s\n",oper_str(e.Eoper),regm_str(*pretregs),regm_str(retregs)); 4064 elem_print(e); 4065 } 4066 4067 assert(!*pretregs || retregs); 4068 fixresult(cdb,e,retregs,pretregs); // msw only 4069 } 4070 4071 4072 4073 /****************************** 4074 * Handle operators OPinp and OPoutp. 4075 */ 4076 4077 @trusted 4078 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4079 { 4080 //printf("cdport\n"); 4081 ubyte op = 0xE4; // root of all IN/OUT opcodes 4082 elem *e1 = e.EV.E1; 4083 4084 // See if we can use immediate mode of IN/OUT opcodes 4085 ubyte port; 4086 if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 && 4087 (!evalinregister(e1) || regcon.mvar & mDX)) 4088 { 4089 port = cast(ubyte)e1.EV.Vuns; 4090 freenode(e1); 4091 } 4092 else 4093 { 4094 regm_t retregs = mDX; // port number is always DX 4095 codelem(cdb,e1,&retregs,false); 4096 op |= 0x08; // DX version of opcode 4097 port = 0; // not logically needed, but 4098 // quiets "uninitialized var" complaints 4099 } 4100 4101 uint sz; 4102 if (e.Eoper == OPoutp) 4103 { 4104 sz = tysize(e.EV.E2.Ety); 4105 regm_t retregs = mAX; // byte/word to output is in AL/AX 4106 scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true); 4107 op |= 0x02; // OUT opcode 4108 } 4109 else // OPinp 4110 { 4111 getregs(cdb,mAX); 4112 sz = tysize(e.Ety); 4113 } 4114 4115 if (sz != 1) 4116 op |= 1; // word operation 4117 cdb.genc2(op,0,port); // IN/OUT AL/AX,DX/port 4118 if (op & 1 && sz != REGSIZE) // if need size override 4119 cdb.last().Iflags |= CFopsize; 4120 regm_t retregs = mAX; 4121 fixresult(cdb,e,retregs,pretregs); 4122 } 4123 4124 /************************ 4125 * Generate code for an asm elem. 4126 */ 4127 4128 @trusted 4129 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4130 { 4131 // Assume only regs normally destroyed by a function are destroyed 4132 getregs(cdb,(ALLREGS | mES) & ~fregsaved); 4133 cdb.genasm(cast(char *)e.EV.Vstring, cast(uint) e.EV.Vstrlen); 4134 fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs); 4135 } 4136 4137 /************************ 4138 * Generate code for OPnp_f16p and OPf16p_np. 4139 */ 4140 4141 @trusted 4142 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4143 { 4144 code *cnop; 4145 code cs; 4146 4147 assert(I32); 4148 codelem(cdb,e.EV.E1,pretregs,false); 4149 reg_t reg = findreg(*pretregs); 4150 getregs(cdb,*pretregs); // we will destroy the regs 4151 4152 cs.Iop = 0xC1; 4153 cs.Irm = modregrm(3,0,reg); 4154 cs.Iflags = 0; 4155 cs.Irex = 0; 4156 cs.IFL2 = FLconst; 4157 cs.IEV2.Vuns = 16; 4158 4159 cdb.gen(&cs); // ROL ereg,16 4160 cs.Irm |= modregrm(0,1,0); 4161 cdb.gen(&cs); // ROR ereg,16 4162 cs.IEV2.Vuns = 3; 4163 cs.Iflags |= CFopsize; 4164 4165 if (e.Eoper == OPnp_f16p) 4166 { 4167 /* OR ereg,ereg 4168 JE L1 4169 ROR ereg,16 4170 SHL reg,3 4171 MOV rx,SS 4172 AND rx,3 ;mask off CPL bits 4173 OR rl,4 ;run on LDT bit 4174 OR regl,rl 4175 ROL ereg,16 4176 L1: NOP 4177 */ 4178 reg_t rx; 4179 4180 regm_t retregs = BYTEREGS & ~*pretregs; 4181 allocreg(cdb,&retregs,&rx,TYint); 4182 cnop = gennop(null); 4183 int jop = JCXZ; 4184 if (reg != CX) 4185 { 4186 gentstreg(cdb,reg); 4187 jop = JE; 4188 } 4189 genjmp(cdb,jop,FLcode, cast(block *)cnop); // Jop L1 4190 NEWREG(cs.Irm,4); 4191 cdb.gen(&cs); // SHL reg,3 4192 genregs(cdb,0x8C,2,rx); // MOV rx,SS 4193 int isbyte = (mask(reg) & BYTEREGS) == 0; 4194 cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3); // AND rl,3 4195 cdb.genc2(0x80,modregrm(3,1,rx),4); // OR rl,4 4196 genregs(cdb,0x0A | isbyte,reg,rx); // OR regl,rl 4197 } 4198 else // OPf16p_np 4199 { 4200 /* ROR ereg,16 4201 SHR reg,3 4202 ROL ereg,16 4203 */ 4204 4205 cs.Irm |= modregrm(0,5,0); 4206 cdb.gen(&cs); // SHR reg,3 4207 cnop = null; 4208 } 4209 } 4210 4211 /************************* 4212 * Generate code for OPbtst 4213 */ 4214 4215 @trusted 4216 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4217 { 4218 regm_t retregs; 4219 reg_t reg; 4220 4221 //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 4222 4223 opcode_t op = 0xA3; // BT EA,value 4224 int mode = 4; 4225 4226 elem *e1 = e.EV.E1; 4227 elem *e2 = e.EV.E2; 4228 code cs; 4229 cs.Iflags = 0; 4230 4231 if (*pretregs == 0) // if don't want result 4232 { 4233 codelem(cdb,e1,pretregs,false); // eval left leaf 4234 *pretregs = 0; // in case they got set 4235 codelem(cdb,e2,pretregs,false); 4236 return; 4237 } 4238 4239 regm_t idxregs; 4240 if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar) 4241 { 4242 getlvalue(cdb, &cs, e1, RMload); // get addressing mode 4243 idxregs = idxregm(&cs); // mask if index regs used 4244 } 4245 else 4246 { 4247 retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs; 4248 codelem(cdb,e1, &retregs, false); 4249 reg = findreg(retregs); 4250 cs.Irm = modregrm(3,0,reg & 7); 4251 cs.Iflags = 0; 4252 cs.Irex = 0; 4253 if (reg & 8) 4254 cs.Irex |= REX_B; 4255 idxregs = retregs; 4256 } 4257 4258 tym_t ty1 = tybasic(e1.Ety); 4259 const sz = tysize(e1.Ety); 4260 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 4261 4262 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 4263 if (e2.Eoper == OPconst) 4264 { 4265 cs.Iop = 0x0FBA; // BT rm,imm8 4266 cs.Irm |= modregrm(0,mode,0); 4267 cs.Iflags |= CFpsw | word; 4268 cs.IFL2 = FLconst; 4269 if (sz <= SHORTSIZE) 4270 { 4271 cs.IEV2.Vint = e2.EV.Vint & 15; 4272 } 4273 else if (sz == 4) 4274 { 4275 cs.IEV2.Vint = e2.EV.Vint & 31; 4276 } 4277 else 4278 { 4279 cs.IEV2.Vint = e2.EV.Vint & 63; 4280 if (I64) 4281 cs.Irex |= REX_W; 4282 } 4283 cdb.gen(&cs); 4284 } 4285 else 4286 { 4287 retregs = ALLREGS & ~idxregs; 4288 4289 /* A register variable may not have its upper 32 4290 * bits 0, so pick a different register to force 4291 * a MOV which will clear it 4292 */ 4293 if (I64 && sz == 8 && tysize(e2.Ety) == 4) 4294 { 4295 regm_t rregm; 4296 if (isregvar(e2, &rregm, null)) 4297 retregs &= ~rregm; 4298 } 4299 4300 scodelem(cdb,e2,&retregs,idxregs,true); 4301 reg = findreg(retregs); 4302 4303 cs.Iop = 0x0F00 | op; // BT rm,reg 4304 code_newreg(&cs,reg); 4305 cs.Iflags |= CFpsw | word; 4306 if (I64 && _tysize[ty1] == 8) 4307 cs.Irex |= REX_W; 4308 cdb.gen(&cs); 4309 } 4310 4311 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 4312 { 4313 if (tysize(e.Ety) == 1) 4314 { 4315 assert(I64 || retregs & BYTEREGS); 4316 allocreg(cdb,&retregs,®,TYint); 4317 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 4318 if (I64 && reg >= 4) 4319 code_orrex(cdb.last(), REX); 4320 *pretregs = retregs; 4321 } 4322 else 4323 { 4324 code *cnop = null; 4325 regm_t save = regcon.immed.mval; 4326 allocreg(cdb,&retregs,®,TYint); 4327 regcon.immed.mval = save; 4328 if ((*pretregs & mPSW) == 0) 4329 { 4330 getregs(cdb,retregs); 4331 genregs(cdb,0x19,reg,reg); // SBB reg,reg 4332 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 4333 } 4334 else 4335 { 4336 movregconst(cdb,reg,1,8); // MOV reg,1 4337 cnop = gennop(null); 4338 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 4339 // MOV reg,0 4340 movregconst(cdb,reg,0,8); 4341 regcon.immed.mval &= ~mask(reg); 4342 } 4343 *pretregs = retregs; 4344 cdb.append(cnop); 4345 } 4346 } 4347 } 4348 4349 /************************* 4350 * Generate code for OPbt, OPbtc, OPbtr, OPbts 4351 */ 4352 4353 @trusted 4354 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 4355 { 4356 //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs)); 4357 regm_t retregs; 4358 reg_t reg; 4359 opcode_t op; 4360 int mode; 4361 4362 switch (e.Eoper) 4363 { 4364 case OPbt: op = 0xA3; mode = 4; break; 4365 case OPbtc: op = 0xBB; mode = 7; break; 4366 case OPbtr: op = 0xB3; mode = 6; break; 4367 case OPbts: op = 0xAB; mode = 5; break; 4368 4369 default: 4370 assert(0); 4371 } 4372 4373 elem *e1 = e.EV.E1; 4374 elem *e2 = e.EV.E2; 4375 code cs; 4376 cs.Iflags = 0; 4377 4378 getlvalue(cdb, &cs, e, RMload); // get addressing mode 4379 if (e.Eoper == OPbt && *pretregs == 0) 4380 { 4381 codelem(cdb,e2,pretregs,false); 4382 return; 4383 } 4384 4385 const ty1 = tybasic(e1.Ety); 4386 const ty2 = tybasic(e2.Ety); 4387 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 4388 regm_t idxregs = idxregm(&cs); // mask if index regs used 4389 4390 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 4391 if (e2.Eoper == OPconst) 4392 { 4393 cs.Iop = 0x0FBA; // BT rm,imm8 4394 cs.Irm |= modregrm(0,mode,0); 4395 cs.Iflags |= CFpsw | word; 4396 cs.IFL2 = FLconst; 4397 if (_tysize[ty1] == SHORTSIZE) 4398 { 4399 cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3; 4400 cs.IEV2.Vint = e2.EV.Vint & 15; 4401 } 4402 else if (_tysize[ty1] == 4) 4403 { 4404 cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3; 4405 cs.IEV2.Vint = e2.EV.Vint & 31; 4406 } 4407 else 4408 { 4409 cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3; 4410 cs.IEV2.Vint = e2.EV.Vint & 63; 4411 if (I64) 4412 cs.Irex |= REX_W; 4413 } 4414 cdb.gen(&cs); 4415 } 4416 else 4417 { 4418 retregs = ALLREGS & ~idxregs; 4419 scodelem(cdb,e2,&retregs,idxregs,true); 4420 reg = findreg(retregs); 4421 4422 cs.Iop = 0x0F00 | op; // BT rm,reg 4423 code_newreg(&cs,reg); 4424 cs.Iflags |= CFpsw | word; 4425 if (_tysize[ty2] == 8 && I64) 4426 cs.Irex |= REX_W; 4427 cdb.gen(&cs); 4428 } 4429 4430 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 4431 { 4432 if (_tysize[e.Ety] == 1) 4433 { 4434 assert(I64 || retregs & BYTEREGS); 4435 allocreg(cdb,&retregs,®,TYint); 4436 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 4437 if (I64 && reg >= 4) 4438 code_orrex(cdb.last(), REX); 4439 *pretregs = retregs; 4440 } 4441 else 4442 { 4443 code *cnop = null; 4444 const save = regcon.immed.mval; 4445 allocreg(cdb,&retregs,®,TYint); 4446 regcon.immed.mval = save; 4447 if ((*pretregs & mPSW) == 0) 4448 { 4449 getregs(cdb,retregs); 4450 genregs(cdb,0x19,reg,reg); // SBB reg,reg 4451 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 4452 } 4453 else 4454 { 4455 movregconst(cdb,reg,1,8); // MOV reg,1 4456 cnop = gennop(null); 4457 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 4458 // MOV reg,0 4459 movregconst(cdb,reg,0,8); 4460 regcon.immed.mval &= ~mask(reg); 4461 } 4462 *pretregs = retregs; 4463 cdb.append(cnop); 4464 } 4465 } 4466 } 4467 4468 /************************************* 4469 * Generate code for OPbsf and OPbsr. 4470 */ 4471 4472 @trusted 4473 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4474 { 4475 //printf("cdbscan()\n"); 4476 //elem_print(e); 4477 if (!*pretregs) 4478 { 4479 codelem(cdb,e.EV.E1,pretregs,false); 4480 return; 4481 } 4482 4483 const tyml = tybasic(e.EV.E1.Ety); 4484 const sz = _tysize[tyml]; 4485 assert(sz == 2 || sz == 4 || sz == 8); 4486 code cs = void; 4487 4488 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 4489 { 4490 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 4491 } 4492 else 4493 { 4494 regm_t retregs = allregs; 4495 codelem(cdb,e.EV.E1, &retregs, false); 4496 const reg = findreg(retregs); 4497 cs.Irm = modregrm(3,0,reg & 7); 4498 cs.Iflags = 0; 4499 cs.Irex = 0; 4500 if (reg & 8) 4501 cs.Irex |= REX_B; 4502 } 4503 4504 regm_t retregs = *pretregs & allregs; 4505 if (!retregs) 4506 retregs = allregs; 4507 reg_t reg; 4508 allocreg(cdb,&retregs, ®, e.Ety); 4509 4510 cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD; // BSF/BSR reg,EA 4511 code_newreg(&cs, reg); 4512 if (!I16 && sz == SHORTSIZE) 4513 cs.Iflags |= CFopsize; 4514 cdb.gen(&cs); 4515 if (sz == 8) 4516 code_orrex(cdb.last(), REX_W); 4517 4518 fixresult(cdb,e,retregs,pretregs); 4519 } 4520 4521 /************************ 4522 * OPpopcnt operator 4523 */ 4524 4525 @trusted 4526 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4527 { 4528 //printf("cdpopcnt()\n"); 4529 //elem_print(e); 4530 assert(!I16); 4531 if (!*pretregs) 4532 { 4533 codelem(cdb,e.EV.E1,pretregs,false); 4534 return; 4535 } 4536 4537 const tyml = tybasic(e.EV.E1.Ety); 4538 4539 const sz = _tysize[tyml]; 4540 assert(sz == 2 || sz == 4 || (sz == 8 && I64)); // no byte op 4541 4542 code cs = void; 4543 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 4544 { 4545 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 4546 } 4547 else 4548 { 4549 regm_t retregs = allregs; 4550 codelem(cdb,e.EV.E1, &retregs, false); 4551 const reg = findreg(retregs); 4552 cs.Irm = modregrm(3,0,reg & 7); 4553 cs.Iflags = 0; 4554 cs.Irex = 0; 4555 if (reg & 8) 4556 cs.Irex |= REX_B; 4557 } 4558 4559 regm_t retregs = *pretregs & allregs; 4560 if (!retregs) 4561 retregs = allregs; 4562 reg_t reg; 4563 allocreg(cdb,&retregs, ®, e.Ety); 4564 4565 cs.Iop = POPCNT; // POPCNT reg,EA 4566 code_newreg(&cs, reg); 4567 if (sz == SHORTSIZE) 4568 cs.Iflags |= CFopsize; 4569 if (*pretregs & mPSW) 4570 cs.Iflags |= CFpsw; 4571 cdb.gen(&cs); 4572 if (sz == 8) 4573 code_orrex(cdb.last(), REX_W); 4574 *pretregs &= mBP | ALLREGS; // flags already set 4575 4576 fixresult(cdb,e,retregs,pretregs); 4577 } 4578 4579 4580 /******************************************* 4581 * Generate code for OPpair, OPrpair. 4582 */ 4583 4584 @trusted 4585 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4586 { 4587 if (*pretregs == 0) // if don't want result 4588 { 4589 codelem(cdb,e.EV.E1,pretregs,false); // eval left leaf 4590 *pretregs = 0; // in case they got set 4591 codelem(cdb,e.EV.E2,pretregs,false); 4592 return; 4593 } 4594 4595 //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4596 //WRTYxx(e.Ety);printf("\n"); 4597 //printf("Ecount = %d\n", e.Ecount); 4598 4599 regm_t retregs = *pretregs; 4600 if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087) 4601 { 4602 if (config.fpxmmregs) 4603 retregs |= mXMM0 | mXMM1; 4604 else 4605 retregs |= mST01; 4606 } 4607 4608 if (retregs & mST01) 4609 { 4610 loadPair87(cdb, e, pretregs); 4611 return; 4612 } 4613 4614 regm_t regs1; 4615 regm_t regs2; 4616 if (retregs & XMMREGS) 4617 { 4618 retregs &= XMMREGS; 4619 const reg = findreg(retregs); 4620 regs1 = mask(reg); 4621 regs2 = mask(findreg(retregs & ~regs1)); 4622 } 4623 else 4624 { 4625 retregs &= allregs; 4626 if (!retregs) 4627 retregs = allregs; 4628 regs1 = retregs & mLSW; 4629 regs2 = retregs & mMSW; 4630 } 4631 if (e.Eoper == OPrpair) 4632 { 4633 // swap 4634 regs1 ^= regs2; 4635 regs2 ^= regs1; 4636 regs1 ^= regs2; 4637 } 4638 //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2)); 4639 4640 codelem(cdb,e.EV.E1, ®s1, false); 4641 scodelem(cdb,e.EV.E2, ®s2, regs1, false); 4642 4643 if (e.EV.E1.Ecount) 4644 getregs(cdb,regs1); 4645 if (e.EV.E2.Ecount) 4646 getregs(cdb,regs2); 4647 4648 fixresult(cdb,e,regs1 | regs2,pretregs); 4649 } 4650 4651 /************************* 4652 * Generate code for OPcmpxchg 4653 */ 4654 4655 @trusted 4656 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4657 { 4658 /* The form is: 4659 * OPcmpxchg 4660 * / \ 4661 * lvalue OPparam 4662 * / \ 4663 * old new 4664 */ 4665 4666 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 4667 elem *e1 = e.EV.E1; 4668 elem *e2 = e.EV.E2; 4669 assert(e2.Eoper == OPparam); 4670 assert(!e2.Ecount); 4671 4672 const tyml = tybasic(e1.Ety); // type of lvalue 4673 const sz = _tysize[tyml]; 4674 4675 if (I32 && sz == 8) 4676 { 4677 regm_t retregsx = mDX|mAX; 4678 codelem(cdb,e2.EV.E1,&retregsx,false); // [DX,AX] = e2.EV.E1 4679 4680 regm_t retregs = mCX|mBX; 4681 scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false); // [CX,BX] = e2.EV.E2 4682 4683 code cs = void; 4684 getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX); // get EA 4685 4686 getregs(cdb,mDX|mAX); // CMPXCHG destroys these regs 4687 4688 if (e1.Ety & mTYvolatile) 4689 cdb.gen1(LOCK); // LOCK prefix 4690 cs.Iop = 0x0FC7; // CMPXCHG8B EA 4691 cs.Iflags |= CFpsw; 4692 code_newreg(&cs,1); 4693 cdb.gen(&cs); 4694 4695 assert(!e1.Ecount); 4696 freenode(e1); 4697 } 4698 else 4699 { 4700 const uint isbyte = (sz == 1); // 1 for byte operation 4701 const ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 4702 const uint rex = (I64 && sz == 8) ? REX_W : 0; 4703 4704 regm_t retregsx = mAX; 4705 codelem(cdb,e2.EV.E1,&retregsx,false); // AX = e2.EV.E1 4706 4707 regm_t retregs = (ALLREGS | mBP) & ~mAX; 4708 scodelem(cdb,e2.EV.E2,&retregs,mAX,false); // load rvalue in reg 4709 4710 code cs = void; 4711 getlvalue(cdb,&cs,e1,mAX | retregs); // get EA 4712 4713 getregs(cdb,mAX); // CMPXCHG destroys AX 4714 4715 if (e1.Ety & mTYvolatile) 4716 cdb.gen1(LOCK); // LOCK prefix 4717 cs.Iop = 0x0FB1 ^ isbyte; // CMPXCHG EA,reg 4718 cs.Iflags |= CFpsw | word; 4719 cs.Irex |= rex; 4720 const reg = findreg(retregs); 4721 code_newreg(&cs,reg); 4722 cdb.gen(&cs); 4723 4724 assert(!e1.Ecount); 4725 freenode(e1); 4726 } 4727 4728 if (regm_t retregs = *pretregs & (ALLREGS | mBP)) // if return result in register 4729 { 4730 assert(tysize(e.Ety) == 1); 4731 assert(I64 || retregs & BYTEREGS); 4732 reg_t reg; 4733 allocreg(cdb,&retregs,®,TYint); 4734 uint ea = modregrmx(3,0,reg); 4735 if (I64 && reg >= 4) 4736 ea |= REX << 16; 4737 cdb.gen2(0x0F94,ea); // SETZ reg 4738 *pretregs = retregs; 4739 } 4740 } 4741 4742 /************************* 4743 * Generate code for OPprefetch 4744 */ 4745 4746 @trusted 4747 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4748 { 4749 /* Generate the following based on e2: 4750 * 0: prefetch0 4751 * 1: prefetch1 4752 * 2: prefetch2 4753 * 3: prefetchnta 4754 * 4: prefetchw 4755 * 5: prefetchwt1 4756 */ 4757 //printf("cdprefetch\n"); 4758 elem *e1 = e.EV.E1; 4759 4760 assert(*pretregs == 0); 4761 assert(e.EV.E2.Eoper == OPconst); 4762 opcode_t op; 4763 reg_t reg; 4764 switch (e.EV.E2.EV.Vuns) 4765 { 4766 case 0: op = PREFETCH; reg = 1; break; // PREFETCH0 4767 case 1: op = PREFETCH; reg = 2; break; // PREFETCH1 4768 case 2: op = PREFETCH; reg = 3; break; // PREFETCH2 4769 case 3: op = PREFETCH; reg = 0; break; // PREFETCHNTA 4770 case 4: op = 0x0F0D; reg = 1; break; // PREFETCHW 4771 case 5: op = 0x0F0D; reg = 2; break; // PREFETCHWT1 4772 default: assert(0); 4773 } 4774 4775 freenode(e.EV.E2); 4776 4777 code cs = void; 4778 getlvalue(cdb,&cs,e1,0); 4779 cs.Iop = op; 4780 cs.Irm |= modregrm(0,reg,0); 4781 cs.Iflags |= CFvolatile; // do not schedule 4782 cdb.gen(&cs); 4783 } 4784 4785 4786 /********************* 4787 * Load register from EA of assignment operation. 4788 * Params: 4789 * cdb = store generated code here 4790 * cs = instruction with EA already set in it 4791 * e = assignment expression that will be evaluated 4792 * reg = set to register loaded from EA 4793 * retregs = register candidates for reg 4794 */ 4795 @trusted 4796 private 4797 void opAssLoadReg(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t reg, regm_t retregs) 4798 { 4799 modEA(cdb, &cs); 4800 allocreg(cdb,&retregs,®,TYoffset); 4801 4802 cs.Iop = LOD; 4803 code_newreg(&cs,reg); 4804 cdb.gen(&cs); // MOV reg,EA 4805 } 4806 4807 /********************* 4808 * Load register pair from EA of assignment operation. 4809 * Params: 4810 * cdb = store generated code here 4811 * cs = instruction with EA already set in it 4812 * e = assignment expression that will be evaluated 4813 * rhi = set to most significant register of the pair 4814 * rlo = set toleast significant register of the pair 4815 * retregs = register candidates for rhi, rlo 4816 * keepmsk = registers to not modify 4817 */ 4818 @trusted 4819 private 4820 void opAssLoadPair(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t rhi, out reg_t rlo, regm_t retregs, regm_t keepmsk) 4821 { 4822 getlvalue(cdb,&cs,e.EV.E1,retregs | keepmsk); 4823 const tym_t tyml = tybasic(e.EV.E1.Ety); // type of lvalue 4824 reg_t reg; 4825 allocreg(cdb,&retregs,®,tyml); 4826 4827 rhi = findregmsw(retregs); 4828 rlo = findreglsw(retregs); 4829 4830 cs.Iop = LOD; 4831 code_newreg(&cs,rlo); 4832 cdb.gen(&cs); // MOV rlo,EA 4833 getlvalue_msw(&cs); 4834 code_newreg(&cs,rhi); 4835 cdb.gen(&cs); // MOV rhi,EA+2 4836 getlvalue_lsw(&cs); 4837 } 4838 4839 4840 /********************************************************* 4841 * Store register result of assignment operation EA. 4842 * Params: 4843 * cdb = store generated code here 4844 * cs = instruction with EA already set in it 4845 * e = assignment expression that was evaluated 4846 * reg = register of result 4847 * pretregs = registers to store result in 4848 */ 4849 @trusted 4850 private 4851 void opAssStoreReg(ref CodeBuilder cdb, ref code cs, elem* e, reg_t reg, regm_t* pretregs) 4852 { 4853 elem* e1 = e.EV.E1; 4854 const tym_t tyml = tybasic(e1.Ety); // type of lvalue 4855 const uint sz = _tysize[tyml]; 4856 const ubyte isbyte = (sz == 1); // 1 for byte operation 4857 cs.Iop = STO ^ isbyte; 4858 code_newreg(&cs,reg); 4859 cdb.gen(&cs); // MOV EA,resreg 4860 if (e1.Ecount) // if we gen a CSE 4861 cssave(e1,mask(reg),!OTleaf(e1.Eoper)); 4862 freenode(e1); 4863 fixresult(cdb,e,mask(reg),pretregs); 4864 } 4865 4866 /********************************************************* 4867 * Store register pair result of assignment operation EA. 4868 * Params: 4869 * cdb = store generated code here 4870 * cs = instruction with EA already set in it 4871 * e = assignment expression that was evaluated 4872 * rhi = most significant register of the pair 4873 * rlo = least significant register of the pair 4874 * pretregs = registers to store result in 4875 */ 4876 @trusted 4877 private 4878 void opAssStorePair(ref CodeBuilder cdb, ref code cs, elem* e, reg_t rhi, reg_t rlo, regm_t* pretregs) 4879 { 4880 cs.Iop = STO; 4881 code_newreg(&cs,rlo); 4882 cdb.gen(&cs); // MOV EA,lsreg 4883 code_newreg(&cs,rhi); 4884 getlvalue_msw(&cs); 4885 cdb.gen(&cs); // MOV EA+REGSIZE,msreg 4886 const regm_t retregs = mask(rhi) | mask(rlo); 4887 elem* e1 = e.EV.E1; 4888 if (e1.Ecount) // if we gen a CSE 4889 cssave(e1,retregs,!OTleaf(e1.Eoper)); 4890 freenode(e1); 4891 fixresult(cdb,e,retregs,pretregs); 4892 } 4893 4894 4895 }