1 /** 2 * x87 FPU code generation 3 * 4 * Compiler implementation of the 5 * $(LINK2 https://www.dlang.org, D programming language). 6 * 7 * Copyright: Copyright (C) 1987-1995 by Symantec 8 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 9 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 10 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 11 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cg87.d, backend/cg87.d) 12 */ 13 14 module dmd.backend.cg87; 15 16 version (SCPP) 17 version = COMPILE; 18 version (MARS) 19 version = COMPILE; 20 21 version (COMPILE) 22 { 23 24 import core.stdc.stdio; 25 import core.stdc.stdlib; 26 import core.stdc.string; 27 28 import dmd.backend.barray; 29 import dmd.backend.cc; 30 import dmd.backend.cdef; 31 import dmd.backend.code; 32 import dmd.backend.code_x86; 33 import dmd.backend.codebuilder; 34 import dmd.backend.mem; 35 import dmd.backend.el; 36 import dmd.backend.global; 37 import dmd.backend.oper; 38 import dmd.backend.ty; 39 import dmd.backend.evalu8 : el_toldoubled; 40 41 extern (C++): 42 43 nothrow: 44 @safe: 45 46 // NOTE: this could be a TLS global which would allow this variable to be used in 47 // a multi-threaded version of the backend 48 __gshared Globals87 global87; 49 50 private extern (D) uint mask(uint m) { return 1 << m; } 51 52 53 // Constants that the 8087 supports directly 54 // BUG: rewrite for 80 bit long doubles 55 enum PI = 3.14159265358979323846; 56 enum LOG2 = 0.30102999566398119521; 57 enum LN2 = 0.6931471805599453094172321; 58 enum LOG2T = 3.32192809488736234787; 59 enum LOG2E = 1.4426950408889634074; // 1/LN2 60 61 enum FWAIT = 0x9B; // FWAIT opcode 62 63 /* Mark variable referenced by e as not a register candidate */ 64 @trusted 65 uint notreg(elem* e) { return e.EV.Vsym.Sflags &= ~GTregcand; } 66 67 /* Generate the appropriate ESC instruction */ 68 ubyte ESC(uint MF, uint b) { return cast(ubyte)(0xD8 + (MF << 1) + b); } 69 enum 70 { // Values for MF 71 MFfloat = 0, 72 MFlong = 1, 73 MFdouble = 2, 74 MFword = 3 75 } 76 77 /********************************* 78 */ 79 80 struct Dconst 81 { 82 int round; 83 Symbol *roundto0; 84 Symbol *roundtonearest; 85 } 86 87 private __gshared Dconst oldd; 88 89 enum NDPP = 0; // print out debugging info 90 91 @trusted 92 bool NOSAHF() { return I64 || config.fpxmmregs; } // can't use SAHF instruction 93 94 enum CW_roundto0 = 0xFBF; 95 enum CW_roundtonearest = 0x3BF; 96 97 98 /********************************** 99 * When we need to temporarilly save 8087 registers, we record information 100 * about the save into an array of NDP structs. 101 */ 102 103 @trusted 104 private void getlvalue87(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk) 105 { 106 // the x87 instructions cannot read XMM registers 107 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 108 e.EV.Vsym.Sflags &= ~GTregcand; 109 110 getlvalue(cdb, pcs, e, keepmsk); 111 if (ADDFWAIT()) 112 pcs.Iflags |= CFwait; 113 if (I32) 114 pcs.Iflags &= ~CFopsize; 115 else if (I64) 116 pcs.Irex &= ~REX_W; 117 } 118 119 /**************************************** 120 * Store/load to ndp save location i 121 */ 122 123 @trusted 124 private void ndp_fstp(ref CodeBuilder cdb, int i, tym_t ty) 125 { 126 switch (tybasic(ty)) 127 { 128 case TYfloat: 129 case TYifloat: 130 case TYcfloat: 131 cdb.genc1(0xD9,modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP] 132 break; 133 134 case TYdouble: 135 case TYdouble_alias: 136 case TYidouble: 137 case TYcdouble: 138 cdb.genc1(0xDD,modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP] 139 break; 140 141 case TYldouble: 142 case TYildouble: 143 case TYcldouble: 144 cdb.genc1(0xDB,modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP] 145 break; 146 147 default: 148 assert(0); 149 } 150 } 151 152 @trusted 153 private void ndp_fld(ref CodeBuilder cdb, int i, tym_t ty) 154 { 155 switch (tybasic(ty)) 156 { 157 case TYfloat: 158 case TYifloat: 159 case TYcfloat: 160 cdb.genc1(0xD9,modregrm(2,0,BPRM),FLndp,i); 161 break; 162 163 case TYdouble: 164 case TYdouble_alias: 165 case TYidouble: 166 case TYcdouble: 167 cdb.genc1(0xDD,modregrm(2,0,BPRM),FLndp,i); 168 break; 169 170 case TYldouble: 171 case TYildouble: 172 case TYcldouble: 173 cdb.genc1(0xDB,modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP] 174 break; 175 176 default: 177 assert(0); 178 } 179 } 180 181 /************************** 182 * Return index of empty slot in global87.save[]. 183 */ 184 185 @trusted 186 private int getemptyslot() 187 { 188 int i; 189 190 for (i = 0; i < global87.save.length; ++i) 191 if (global87.save[i].e == null) 192 return i; 193 194 global87.save.push(NDP()); 195 return i; 196 } 197 198 /********************************* 199 * Pop 8087 stack. 200 */ 201 202 void pop87() { pop87(__LINE__, __FILE__); } 203 204 @trusted 205 void pop87(int line, const(char)* file) 206 { 207 int i; 208 209 if (NDPP) 210 printf("pop87(%s(%d): stackused=%d)\n", file, line, global87.stackused); 211 212 --global87.stackused; 213 assert(global87.stackused >= 0); 214 for (i = 0; i < global87.stack.length - 1; i++) 215 global87.stack[i] = global87.stack[i + 1]; 216 // end of stack is nothing 217 global87.stack[$ - 1] = NDP(); 218 } 219 220 221 /******************************* 222 * Push 8087 stack. Generate and return any code 223 * necessary to preserve anything that might run off the end of the stack. 224 */ 225 226 void push87(ref CodeBuilder cdb) { push87(cdb,__LINE__,__FILE__); } 227 228 @trusted 229 void push87(ref CodeBuilder cdb, int line, const(char)* file) 230 { 231 // if we would lose the top register off of the stack 232 if (global87.stack[7].e != null) 233 { 234 int i = getemptyslot(); 235 global87.save[i] = global87.stack[7]; 236 cdb.genf2(0xD9,0xF6); // FDECSTP 237 genfwait(cdb); 238 ndp_fstp(cdb, i, global87.stack[7].e.Ety); // FSTP i[BP] 239 assert(global87.stackused == 8); 240 if (NDPP) printf("push87() : overflow\n"); 241 } 242 else 243 { 244 if (NDPP) printf("push87(%s(%d): %d)\n", file, line, global87.stackused); 245 global87.stackused++; 246 assert(global87.stackused <= 8); 247 } 248 // Shift the stack up 249 for (int i = 7; i > 0; i--) 250 global87.stack[i] = global87.stack[i - 1]; 251 global87.stack[0] = NDP(); 252 } 253 254 /***************************** 255 * Note elem e as being in ST(i) as being a value we want to keep. 256 */ 257 258 void note87(elem *e, uint offset, int i) 259 { 260 note87(e, offset, i, __LINE__); 261 } 262 263 @trusted 264 void note87(elem *e, uint offset, int i, int linnum) 265 { 266 if (NDPP) 267 printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum); 268 269 static if (0) 270 { 271 if (global87.stack[i].e) 272 printf("global87.stack[%d].e = %p\n",i,global87.stack[i].e); 273 } 274 275 debug if (i >= global87.stackused) 276 { 277 printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum); 278 elem_print(e); 279 } 280 assert(i < global87.stackused); 281 282 while (e.Eoper == OPcomma) 283 e = e.EV.E2; 284 global87.stack[i].e = e; 285 global87.stack[i].offset = offset; 286 } 287 288 /**************************************************** 289 * Exchange two entries in 8087 stack. 290 */ 291 292 @trusted 293 void xchg87(int i, int j) 294 { 295 NDP save; 296 297 save = global87.stack[i]; 298 global87.stack[i] = global87.stack[j]; 299 global87.stack[j] = save; 300 } 301 302 /**************************** 303 * Make sure that elem e is in register ST(i). Reload it if necessary. 304 * Input: 305 * i 0..3 8087 register number 306 * flag 1 don't bother with FXCH 307 */ 308 309 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag) 310 { 311 makesure87(cdb,e,offset,i,flag,__LINE__); 312 } 313 314 @trusted 315 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag,int linnum) 316 { 317 debug if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum); 318 319 while (e.Eoper == OPcomma) 320 e = e.EV.E2; 321 assert(e && i < 4); 322 L1: 323 if (global87.stack[i].e != e || global87.stack[i].offset != offset) 324 { 325 debug if (global87.stack[i].e) 326 printf("global87.stack[%d].e = %p, .offset = %d\n",i,global87.stack[i].e,global87.stack[i].offset); 327 328 assert(global87.stack[i].e == null); 329 int j; 330 for (j = 0; 1; j++) 331 { 332 if (j >= global87.save.length && e.Eoper == OPcomma) 333 { 334 e = e.EV.E2; // try right side 335 goto L1; 336 } 337 338 debug if (j >= global87.save.length) 339 printf("e = %p, global87.save.length = %llu\n",e, cast(ulong) global87.save.length); 340 341 assert(j < global87.save.length); 342 //printf("\tglobal87.save[%d] = %p, .offset = %d\n", j, global87.save[j].e, global87.save[j].offset); 343 if (e == global87.save[j].e && offset == global87.save[j].offset) 344 break; 345 } 346 push87(cdb); 347 genfwait(cdb); 348 ndp_fld(cdb, j, e.Ety); // FLD j[BP] 349 if (!(flag & 1)) 350 { 351 while (i != 0) 352 { 353 cdb.genf2(0xD9,0xC8 + i); // FXCH ST(i) 354 i--; 355 } 356 } 357 global87.save[j] = NDP(); // back in 8087 358 } 359 //global87.stack[i].e = null; 360 } 361 362 /**************************** 363 * Save in memory any values in the 8087 that we want to keep. 364 */ 365 366 @trusted 367 void save87(ref CodeBuilder cdb) 368 { 369 bool any = false; 370 while (global87.stack[0].e && global87.stackused) 371 { 372 // Save it 373 int i = getemptyslot(); 374 if (NDPP) printf("saving %p in temporary global87.save[%d]\n",global87.stack[0].e,i); 375 global87.save[i] = global87.stack[0]; 376 377 genfwait(cdb); 378 ndp_fstp(cdb,i,global87.stack[0].e.Ety); // FSTP i[BP] 379 pop87(); 380 any = true; 381 } 382 if (any) // if any stores 383 genfwait(cdb); // wait for last one to finish 384 } 385 386 /****************************************** 387 * Save any noted values that would be destroyed by n pushes 388 */ 389 390 @trusted 391 void save87regs(ref CodeBuilder cdb, uint n) 392 { 393 assert(n <= 7); 394 uint j = 8 - n; 395 if (global87.stackused > j) 396 { 397 for (uint k = 8; k > j; k--) 398 { 399 cdb.genf2(0xD9,0xF6); // FDECSTP 400 genfwait(cdb); 401 if (k <= global87.stackused) 402 { 403 int i = getemptyslot(); 404 ndp_fstp(cdb, i, global87.stack[k - 1].e.Ety); // FSTP i[BP] 405 global87.save[i] = global87.stack[k - 1]; 406 global87.stack[k - 1] = NDP(); 407 } 408 } 409 410 for (uint k = 8; k > j; k--) 411 { 412 if (k > global87.stackused) 413 { cdb.genf2(0xD9,0xF7); // FINCSTP 414 genfwait(cdb); 415 } 416 } 417 global87.stackused = j; 418 } 419 } 420 421 /***************************************************** 422 * Save/restore ST0 or ST01 423 */ 424 425 @trusted 426 void gensaverestore87(regm_t regm, ref CodeBuilder cdbsave, ref CodeBuilder cdbrestore) 427 { 428 //printf("gensaverestore87(%s)\n", regm_str(regm)); 429 assert(regm == mST0 || regm == mST01); 430 431 int i = getemptyslot(); 432 global87.save[i].e = el_calloc(); // this blocks slot [i] for the life of this function 433 ndp_fstp(cdbsave, i, TYldouble); 434 435 CodeBuilder cdb2a; 436 cdb2a.ctor(); 437 ndp_fld(cdb2a, i, TYldouble); 438 439 if (regm == mST01) 440 { 441 int j = getemptyslot(); 442 global87.save[j].e = el_calloc(); 443 ndp_fstp(cdbsave, j, TYldouble); 444 ndp_fld(cdbrestore, j, TYldouble); 445 } 446 447 cdbrestore.append(cdb2a); 448 } 449 450 /************************************* 451 * Find which, if any, slot on stack holds elem e. 452 */ 453 454 @trusted 455 private int cse_get(elem *e, uint offset) 456 { 457 int i; 458 459 for (i = 0; 1; i++) 460 { 461 if (i == global87.stackused) 462 { 463 i = -1; 464 //printf("cse not found\n"); 465 //elem_print(e); 466 break; 467 } 468 if (global87.stack[i].e == e && 469 global87.stack[i].offset == offset) 470 { //printf("cse found %d\n",i); 471 //elem_print(e); 472 break; 473 } 474 } 475 return i; 476 } 477 478 /************************************* 479 * Reload common subexpression. 480 */ 481 482 void comsub87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 483 { 484 //printf("comsub87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 485 // Look on 8087 stack 486 int i = cse_get(e, 0); 487 488 if (tycomplex(e.Ety)) 489 { 490 uint sz = tysize(e.Ety); 491 int j = cse_get(e, sz / 2); 492 if (i >= 0 && j >= 0) 493 { 494 push87(cdb); 495 push87(cdb); 496 cdb.genf2(0xD9,0xC0 + i); // FLD ST(i) 497 cdb.genf2(0xD9,0xC0 + j + 1); // FLD ST(j + 1) 498 fixresult_complex87(cdb,e,mST01,pretregs); 499 } 500 else 501 // Reload 502 loaddata(cdb,e,pretregs); 503 } 504 else 505 { 506 if (i >= 0) 507 { 508 push87(cdb); 509 cdb.genf2(0xD9,0xC0 + i); // FLD ST(i) 510 if (*pretregs & XMMREGS) 511 fixresult87(cdb,e,mST0,pretregs); 512 else 513 fixresult(cdb,e,mST0,pretregs); 514 } 515 else 516 // Reload 517 loaddata(cdb,e,pretregs); 518 } 519 } 520 521 522 /******************************* 523 * Decide if we need to gen an FWAIT. 524 */ 525 526 public void genfwait(ref CodeBuilder cdb) 527 { 528 if (ADDFWAIT()) 529 cdb.gen1(FWAIT); 530 } 531 532 533 /*************************** 534 * Put the 8087 flags into the CPU flags. 535 */ 536 537 @trusted 538 private void cg87_87topsw(ref CodeBuilder cdb) 539 { 540 /* Note that SAHF is not available on some early I64 processors 541 * and will cause a seg fault 542 */ 543 assert(!NOSAHF); 544 getregs(cdb,mAX); 545 if (config.target_cpu >= TARGET_80286) 546 cdb.genf2(0xDF,0xE0); // FSTSW AX 547 else 548 { 549 cdb.genfltreg(0xD8+5,7,0); // FSTSW floatreg[BP] 550 genfwait(cdb); // FWAIT 551 cdb.genfltreg(0x8A,4,1); // MOV AH,floatreg+1[BP] 552 } 553 cdb.gen1(0x9E); // SAHF 554 code_orflag(cdb.last(),CFpsw); 555 } 556 557 /***************************************** 558 * Jump to ctarget if condition code C2 is set. 559 */ 560 561 @trusted 562 private void genjmpifC2(ref CodeBuilder cdb, code *ctarget) 563 { 564 if (NOSAHF) 565 { 566 getregs(cdb,mAX); 567 cdb.genf2(0xDF,0xE0); // FSTSW AX 568 cdb.genc2(0xF6,modregrm(3,0,4),4); // TEST AH,4 569 genjmp(cdb, JNE, FLcode, cast(block *)ctarget); // JNE ctarget 570 } 571 else 572 { 573 cg87_87topsw(cdb); 574 genjmp(cdb, JP, FLcode, cast(block *)ctarget); // JP ctarget 575 } 576 } 577 578 /*************************** 579 * Set the PSW based on the state of ST0. 580 * Input: 581 * pop if stack should be popped after test 582 * Returns: 583 * start of code appended to c. 584 */ 585 586 @trusted 587 private void genftst(ref CodeBuilder cdb,elem *e,int pop) 588 { 589 if (NOSAHF) 590 { 591 push87(cdb); 592 cdb.gen2(0xD9,0xEE); // FLDZ 593 cdb.gen2(0xDF,0xE9); // FUCOMIP ST1 594 pop87(); 595 if (pop) 596 { 597 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 598 pop87(); 599 } 600 } 601 else if (config.flags4 & CFG4fastfloat) // if fast floating point 602 { 603 cdb.genf2(0xD9,0xE4); // FTST 604 cg87_87topsw(cdb); // put 8087 flags in CPU flags 605 if (pop) 606 { 607 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 608 pop87(); 609 } 610 } 611 else if (config.target_cpu >= TARGET_80386) 612 { 613 // FUCOMP doesn't raise exceptions on QNANs, unlike FTST 614 push87(cdb); 615 cdb.gen2(0xD9,0xEE); // FLDZ 616 cdb.gen2(pop ? 0xDA : 0xDD,0xE9); // FUCOMPP / FUCOMP 617 pop87(); 618 if (pop) 619 pop87(); 620 cg87_87topsw(cdb); // put 8087 flags in CPU flags 621 } 622 else 623 { 624 // Call library function which does not raise exceptions 625 regm_t regm = 0; 626 627 callclib(cdb,e,CLIB.ftest,®m,0); 628 if (pop) 629 { 630 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 631 pop87(); 632 } 633 } 634 } 635 636 /************************************* 637 * Determine if there is a special 8087 instruction to load 638 * constant e. 639 * Input: 640 * im 0 load real part 641 * 1 load imaginary part 642 * Returns: 643 * opcode if found 644 * 0 if not 645 */ 646 647 @trusted 648 ubyte loadconst(elem *e, int im) 649 { 650 elem_debug(e); 651 assert(im == 0 || im == 1); 652 653 immutable float[7] fval = 654 [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2]; 655 immutable double[7] dval = 656 [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2]; 657 658 static if (real.sizeof < 10) 659 { 660 import dmd.root.longdouble; 661 immutable targ_ldouble[7] ldval = 662 [ld_zero,ld_one,ld_pi,ld_log2t,ld_log2e,ld_log2,ld_ln2]; 663 } 664 else 665 { 666 enum M_PI_L = 0x1.921fb54442d1846ap+1L; // 3.14159 fldpi 667 enum M_LOG2T_L = 0x1.a934f0979a3715fcp+1L; // 3.32193 fldl2t 668 enum M_LOG2E_L = 0x1.71547652b82fe178p+0L; // 1.4427 fldl2e 669 enum M_LOG2_L = 0x1.34413509f79fef32p-2L; // 0.30103 fldlg2 670 enum M_LN2_L = 0x1.62e42fefa39ef358p-1L; // 0.693147 fldln2 671 immutable targ_ldouble[7] ldval = 672 [0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L]; 673 } 674 675 immutable ubyte[7 + 1] opcode = 676 /* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */ 677 [0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0]; 678 679 int i; 680 targ_float f; 681 targ_double d; 682 targ_ldouble ld; 683 int sz; 684 int zero; 685 void *p; 686 immutable ubyte[16] zeros; 687 688 if (im == 0) 689 { 690 switch (tybasic(e.Ety)) 691 { 692 case TYfloat: 693 case TYifloat: 694 case TYcfloat: 695 f = e.EV.Vfloat; 696 sz = 4; 697 p = &f; 698 break; 699 700 case TYdouble: 701 case TYdouble_alias: 702 case TYidouble: 703 case TYcdouble: 704 d = e.EV.Vdouble; 705 sz = 8; 706 p = &d; 707 break; 708 709 case TYldouble: 710 case TYildouble: 711 case TYcldouble: 712 ld = e.EV.Vldouble; 713 sz = 10; 714 p = &ld; 715 break; 716 717 default: 718 assert(0); 719 } 720 } 721 else 722 { 723 switch (tybasic(e.Ety)) 724 { 725 case TYcfloat: 726 f = e.EV.Vcfloat.im; 727 sz = 4; 728 p = &f; 729 break; 730 731 case TYcdouble: 732 d = e.EV.Vcdouble.im; 733 sz = 8; 734 p = &d; 735 break; 736 737 case TYcldouble: 738 ld = e.EV.Vcldouble.im; 739 sz = 10; 740 p = &ld; 741 break; 742 743 default: 744 assert(0); 745 } 746 } 747 748 // Note that for this purpose, -0 is not regarded as +0, 749 // since FLDZ loads a +0 750 assert(sz <= zeros.length); 751 zero = (memcmp(p, zeros.ptr, sz) == 0); 752 if (zero && config.target_cpu >= TARGET_PentiumPro) 753 return 0xEE; // FLDZ is the only one with 1 micro-op 754 755 // For some reason, these instructions take more clocks 756 if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 757 return 0; 758 759 if (zero) 760 return 0xEE; 761 762 for (i = 1; i < fval.length; i++) 763 { 764 switch (sz) 765 { 766 case 4: 767 if (fval[i] != f) 768 continue; 769 break; 770 case 8: 771 if (dval[i] != d) 772 continue; 773 break; 774 case 10: 775 if (ldval[i] != ld) 776 continue; 777 break; 778 default: 779 assert(0); 780 } 781 break; 782 } 783 return opcode[i]; 784 } 785 786 /****************************** 787 * Given the result of an expression is in retregs, 788 * generate necessary code to return result in *pretregs. 789 */ 790 791 @trusted 792 void fixresult87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs, bool isReturnValue = false) 793 { 794 //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs); 795 //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs)); 796 assert(!*pretregs || retregs); 797 798 if ((*pretregs | retregs) & mST01) 799 { 800 fixresult_complex87(cdb, e, retregs, pretregs, isReturnValue); 801 return; 802 } 803 804 tym_t tym = tybasic(e.Ety); 805 uint sz = _tysize[tym]; 806 //printf("tym = x%x, sz = %d\n", tym, sz); 807 808 /* if retregs needs to be transferred into the 8087 */ 809 if (*pretregs & mST0 && retregs & (mBP | ALLREGS)) 810 { 811 debug if (sz > DOUBLESIZE) 812 { 813 elem_print(e); 814 printf("retregs = %s\n", regm_str(retregs)); 815 } 816 assert(sz <= DOUBLESIZE); 817 if (!I16) 818 { 819 820 if (*pretregs & mPSW) 821 { // Set flags 822 regm_t r = retregs | mPSW; 823 fixresult(cdb,e,retregs,&r); 824 } 825 push87(cdb); 826 if (sz == REGSIZE || (I64 && sz == 4)) 827 { 828 const reg = findreg(retregs); 829 cdb.genfltreg(STO,reg,0); // MOV fltreg,reg 830 cdb.genfltreg(0xD9,0,0); // FLD float ptr fltreg 831 } 832 else 833 { 834 const msreg = findregmsw(retregs); 835 const lsreg = findreglsw(retregs); 836 cdb.genfltreg(STO,lsreg,0); // MOV fltreg,lsreg 837 cdb.genfltreg(STO,msreg,4); // MOV fltreg+4,msreg 838 cdb.genfltreg(0xDD,0,0); // FLD double ptr fltreg 839 } 840 } 841 else 842 { 843 regm_t regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS; 844 regm |= *pretregs & mPSW; 845 fixresult(cdb,e,retregs,®m); 846 regm = 0; // don't worry about result from CLIB.xxx 847 callclib(cdb,e, 848 ((sz == FLOATSIZE) ? CLIB.fltto87 : CLIB.dblto87), 849 ®m,0); 850 } 851 } 852 else if (*pretregs & (mBP | ALLREGS) && retregs & mST0) 853 { 854 assert(sz <= DOUBLESIZE); 855 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 856 if (*pretregs & mPSW && !(retregs & mPSW)) 857 genftst(cdb,e,0); 858 // FSTP floatreg 859 pop87(); 860 cdb.genfltreg(ESC(mf,1),3,0); 861 genfwait(cdb); 862 reg_t reg; 863 allocreg(cdb,pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble); 864 if (sz == FLOATSIZE) 865 { 866 if (!I16) 867 cdb.genfltreg(LOD,reg,0); 868 else 869 { 870 cdb.genfltreg(LOD,reg,REGSIZE); 871 cdb.genfltreg(LOD,findreglsw(*pretregs),0); 872 } 873 } 874 else 875 { assert(sz == DOUBLESIZE); 876 if (I16) 877 { 878 cdb.genfltreg(LOD,AX,6); 879 cdb.genfltreg(LOD,BX,4); 880 cdb.genfltreg(LOD,CX,2); 881 cdb.genfltreg(LOD,DX,0); 882 } 883 else if (I32) 884 { 885 cdb.genfltreg(LOD,reg,REGSIZE); 886 cdb.genfltreg(LOD,findreglsw(*pretregs),0); 887 } 888 else // I64 889 { 890 cdb.genfltreg(LOD,reg,0); 891 code_orrex(cdb.last(), REX_W); 892 } 893 } 894 } 895 else if (*pretregs == 0 && retregs == mST0) 896 { 897 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 898 pop87(); 899 } 900 else 901 { 902 if (*pretregs & mPSW) 903 { 904 if (!(retregs & mPSW)) 905 { 906 genftst(cdb,e,!(*pretregs & (mST0 | XMMREGS))); // FTST 907 } 908 } 909 if (*pretregs & mST0 && retregs & XMMREGS) 910 { 911 assert(sz <= DOUBLESIZE); 912 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 913 // MOVD floatreg,XMM? 914 const reg = findreg(retregs); 915 cdb.genxmmreg(xmmstore(tym),reg,0,tym); 916 push87(cdb); 917 cdb.genfltreg(ESC(mf,1),0,0); // FLD float/double ptr fltreg 918 } 919 else if (retregs & mST0 && *pretregs & XMMREGS) 920 { 921 assert(sz <= DOUBLESIZE); 922 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 923 // FSTP floatreg 924 pop87(); 925 cdb.genfltreg(ESC(mf,1),3,0); 926 genfwait(cdb); 927 // MOVD XMM?,floatreg 928 reg_t reg; 929 allocreg(cdb,pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble); 930 cdb.genxmmreg(xmmload(tym),reg,0,tym); 931 } 932 else 933 assert(!(*pretregs & mST0) || (retregs & mST0)); 934 } 935 if (*pretregs & mST0) 936 note87(e,0,0); 937 } 938 939 /******************************** 940 * Generate in-line 8087 code for the following operators: 941 * add 942 * min 943 * mul 944 * div 945 * cmp 946 */ 947 948 // Reverse the order that the op is done in 949 __gshared const ubyte[9] oprev = [ cast(ubyte)-1,0,1,2,3,5,4,7,6 ]; 950 951 @trusted 952 void orth87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 953 { 954 //printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 955 // we could be evaluating / for side effects only 956 assert(*pretregs != 0); 957 958 elem *e1 = e.EV.E1; 959 elem *e2 = e.EV.E2; 960 uint sz2 = tysize(e1.Ety); 961 if (tycomplex(e1.Ety)) 962 sz2 /= 2; 963 964 OPER eoper = e.Eoper; 965 if (eoper == OPmul && e2.Eoper == OPconst && el_toldoubled(e.EV.E2) == 2.0L) 966 { 967 // Perform "mul 2.0" as fadd ST(0), ST 968 regm_t retregs = mST0; 969 codelem(cdb,e1,&retregs,false); 970 cdb.genf2(0xDC, 0xC0); // fadd ST(0), ST; 971 fixresult87(cdb,e,mST0,pretregs); // result is in ST(0). 972 freenode(e2); 973 return; 974 } 975 976 uint op; 977 if (OTrel(eoper)) 978 eoper = OPeqeq; 979 bool imaginary; 980 static uint X(OPER op, uint ty1, uint ty2) { return (op << 16) + ty1 * 256 + ty2; } 981 switch (X(eoper, tybasic(e1.Ety), tybasic(e2.Ety))) 982 { 983 case X(OPadd, TYfloat, TYfloat): 984 case X(OPadd, TYdouble, TYdouble): 985 case X(OPadd, TYdouble_alias, TYdouble_alias): 986 case X(OPadd, TYldouble, TYldouble): 987 case X(OPadd, TYldouble, TYdouble): 988 case X(OPadd, TYdouble, TYldouble): 989 case X(OPadd, TYifloat, TYifloat): 990 case X(OPadd, TYidouble, TYidouble): 991 case X(OPadd, TYildouble, TYildouble): 992 op = 0; // FADDP 993 break; 994 995 case X(OPmin, TYfloat, TYfloat): 996 case X(OPmin, TYdouble, TYdouble): 997 case X(OPmin, TYdouble_alias, TYdouble_alias): 998 case X(OPmin, TYldouble, TYldouble): 999 case X(OPmin, TYldouble, TYdouble): 1000 case X(OPmin, TYdouble, TYldouble): 1001 case X(OPmin, TYifloat, TYifloat): 1002 case X(OPmin, TYidouble, TYidouble): 1003 case X(OPmin, TYildouble, TYildouble): 1004 op = 4; // FSUBP 1005 break; 1006 1007 case X(OPmul, TYfloat, TYfloat): 1008 case X(OPmul, TYdouble, TYdouble): 1009 case X(OPmul, TYdouble_alias, TYdouble_alias): 1010 case X(OPmul, TYldouble, TYldouble): 1011 case X(OPmul, TYldouble, TYdouble): 1012 case X(OPmul, TYdouble, TYldouble): 1013 case X(OPmul, TYifloat, TYifloat): 1014 case X(OPmul, TYidouble, TYidouble): 1015 case X(OPmul, TYildouble, TYildouble): 1016 case X(OPmul, TYfloat, TYifloat): 1017 case X(OPmul, TYdouble, TYidouble): 1018 case X(OPmul, TYldouble, TYildouble): 1019 case X(OPmul, TYifloat, TYfloat): 1020 case X(OPmul, TYidouble, TYdouble): 1021 case X(OPmul, TYildouble, TYldouble): 1022 op = 1; // FMULP 1023 break; 1024 1025 case X(OPdiv, TYfloat, TYfloat): 1026 case X(OPdiv, TYdouble, TYdouble): 1027 case X(OPdiv, TYdouble_alias, TYdouble_alias): 1028 case X(OPdiv, TYldouble, TYldouble): 1029 case X(OPdiv, TYldouble, TYdouble): 1030 case X(OPdiv, TYdouble, TYldouble): 1031 case X(OPdiv, TYifloat, TYifloat): 1032 case X(OPdiv, TYidouble, TYidouble): 1033 case X(OPdiv, TYildouble, TYildouble): 1034 op = 6; // FDIVP 1035 break; 1036 1037 case X(OPmod, TYfloat, TYfloat): 1038 case X(OPmod, TYdouble, TYdouble): 1039 case X(OPmod, TYdouble_alias, TYdouble_alias): 1040 case X(OPmod, TYldouble, TYldouble): 1041 case X(OPmod, TYfloat, TYifloat): 1042 case X(OPmod, TYdouble, TYidouble): 1043 case X(OPmod, TYldouble, TYildouble): 1044 case X(OPmod, TYifloat, TYifloat): 1045 case X(OPmod, TYidouble, TYidouble): 1046 case X(OPmod, TYildouble, TYildouble): 1047 case X(OPmod, TYifloat, TYfloat): 1048 case X(OPmod, TYidouble, TYdouble): 1049 case X(OPmod, TYildouble, TYldouble): 1050 op = cast(uint) -1; 1051 break; 1052 1053 case X(OPeqeq, TYfloat, TYfloat): 1054 case X(OPeqeq, TYdouble, TYdouble): 1055 case X(OPeqeq, TYdouble_alias, TYdouble_alias): 1056 case X(OPeqeq, TYldouble, TYldouble): 1057 case X(OPeqeq, TYifloat, TYifloat): 1058 case X(OPeqeq, TYidouble, TYidouble): 1059 case X(OPeqeq, TYildouble, TYildouble): 1060 { 1061 assert(OTrel(e.Eoper)); 1062 assert((*pretregs & mST0) == 0); 1063 regm_t retregs = mST0; 1064 codelem(cdb,e1,&retregs,false); 1065 note87(e1,0,0); 1066 regm_t resregm = mPSW; 1067 1068 if (rel_exception(e.Eoper) || config.flags4 & CFG4fastfloat) 1069 { 1070 if (e2.Eoper == OPconst && !boolres(e2)) 1071 { 1072 if (NOSAHF) 1073 { 1074 push87(cdb); 1075 cdb.gen2(0xD9,0xEE); // FLDZ 1076 cdb.gen2(0xDF,0xF1); // FCOMIP ST1 1077 pop87(); 1078 } 1079 else 1080 { 1081 cdb.genf2(0xD9,0xE4); // FTST 1082 cg87_87topsw(cdb); 1083 } 1084 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 1085 pop87(); 1086 } 1087 else if (NOSAHF) 1088 { 1089 note87(e1,0,0); 1090 load87(cdb,e2,0,&retregs,e1,-1); 1091 makesure87(cdb,e1,0,1,0); 1092 resregm = 0; 1093 //cdb.genf2(0xD9,0xC8 + 1); // FXCH ST1 1094 cdb.gen2(0xDF,0xF1); // FCOMIP ST1 1095 pop87(); 1096 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 1097 pop87(); 1098 } 1099 else 1100 { 1101 load87(cdb,e2, 0, pretregs, e1, 3); // FCOMPP 1102 } 1103 } 1104 else 1105 { 1106 if (e2.Eoper == OPconst && !boolres(e2) && 1107 config.target_cpu < TARGET_80386) 1108 { 1109 regm_t regm = 0; 1110 1111 callclib(cdb,e,CLIB.ftest0,®m,0); 1112 pop87(); 1113 } 1114 else 1115 { 1116 note87(e1,0,0); 1117 load87(cdb,e2,0,&retregs,e1,-1); 1118 makesure87(cdb,e1,0,1,0); 1119 resregm = 0; 1120 if (NOSAHF) 1121 { 1122 cdb.gen2(0xDF,0xE9); // FUCOMIP ST1 1123 pop87(); 1124 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 1125 pop87(); 1126 } 1127 else if (config.target_cpu >= TARGET_80386) 1128 { 1129 cdb.gen2(0xDA,0xE9); // FUCOMPP 1130 cg87_87topsw(cdb); 1131 pop87(); 1132 pop87(); 1133 } 1134 else 1135 // Call a function instead so that exceptions 1136 // are not generated. 1137 callclib(cdb,e,CLIB.fcompp,&resregm,0); 1138 } 1139 } 1140 1141 freenode(e2); 1142 return; 1143 } 1144 1145 case X(OPadd, TYcfloat, TYcfloat): 1146 case X(OPadd, TYcdouble, TYcdouble): 1147 case X(OPadd, TYcldouble, TYcldouble): 1148 case X(OPadd, TYcfloat, TYfloat): 1149 case X(OPadd, TYcdouble, TYdouble): 1150 case X(OPadd, TYcldouble, TYldouble): 1151 case X(OPadd, TYfloat, TYcfloat): 1152 case X(OPadd, TYdouble, TYcdouble): 1153 case X(OPadd, TYldouble, TYcldouble): 1154 goto Lcomplex; 1155 1156 case X(OPadd, TYifloat, TYcfloat): 1157 case X(OPadd, TYidouble, TYcdouble): 1158 case X(OPadd, TYildouble, TYcldouble): 1159 goto Lcomplex2; 1160 1161 case X(OPmin, TYcfloat, TYcfloat): 1162 case X(OPmin, TYcdouble, TYcdouble): 1163 case X(OPmin, TYcldouble, TYcldouble): 1164 case X(OPmin, TYcfloat, TYfloat): 1165 case X(OPmin, TYcdouble, TYdouble): 1166 case X(OPmin, TYcldouble, TYldouble): 1167 case X(OPmin, TYfloat, TYcfloat): 1168 case X(OPmin, TYdouble, TYcdouble): 1169 case X(OPmin, TYldouble, TYcldouble): 1170 goto Lcomplex; 1171 1172 case X(OPmin, TYifloat, TYcfloat): 1173 case X(OPmin, TYidouble, TYcdouble): 1174 case X(OPmin, TYildouble, TYcldouble): 1175 goto Lcomplex2; 1176 1177 case X(OPmul, TYcfloat, TYcfloat): 1178 case X(OPmul, TYcdouble, TYcdouble): 1179 case X(OPmul, TYcldouble, TYcldouble): 1180 goto Lcomplex; 1181 1182 case X(OPdiv, TYcfloat, TYcfloat): 1183 case X(OPdiv, TYcdouble, TYcdouble): 1184 case X(OPdiv, TYcldouble, TYcldouble): 1185 case X(OPdiv, TYfloat, TYcfloat): 1186 case X(OPdiv, TYdouble, TYcdouble): 1187 case X(OPdiv, TYldouble, TYcldouble): 1188 case X(OPdiv, TYifloat, TYcfloat): 1189 case X(OPdiv, TYidouble, TYcdouble): 1190 case X(OPdiv, TYildouble, TYcldouble): 1191 goto Lcomplex; 1192 1193 case X(OPdiv, TYifloat, TYfloat): 1194 case X(OPdiv, TYidouble, TYdouble): 1195 case X(OPdiv, TYildouble, TYldouble): 1196 op = 6; // FDIVP 1197 break; 1198 1199 Lcomplex: 1200 { 1201 loadComplex(cdb,e1); 1202 loadComplex(cdb,e2); 1203 makesure87(cdb, e1, sz2, 2, 0); 1204 makesure87(cdb, e1, 0, 3, 0); 1205 regm_t retregs = mST01; 1206 if (eoper == OPadd) 1207 { 1208 cdb.genf2(0xDE, 0xC0+2); // FADDP ST(2),ST 1209 cdb.genf2(0xDE, 0xC0+2); // FADDP ST(2),ST 1210 pop87(); 1211 pop87(); 1212 } 1213 else if (eoper == OPmin) 1214 { 1215 cdb.genf2(0xDE, 0xE8+2); // FSUBP ST(2),ST 1216 cdb.genf2(0xDE, 0xE8+2); // FSUBP ST(2),ST 1217 pop87(); 1218 pop87(); 1219 } 1220 else 1221 { 1222 int clib = eoper == OPmul ? CLIB.cmul : CLIB.cdiv; 1223 callclib(cdb, e, clib, &retregs, 0); 1224 } 1225 fixresult_complex87(cdb, e, retregs, pretregs); 1226 return; 1227 } 1228 1229 Lcomplex2: 1230 { 1231 regm_t retregs = mST0; 1232 codelem(cdb,e1, &retregs, false); 1233 note87(e1, 0, 0); 1234 loadComplex(cdb,e2); 1235 makesure87(cdb, e1, 0, 2, 0); 1236 retregs = mST01; 1237 if (eoper == OPadd) 1238 { 1239 cdb.genf2(0xDE, 0xC0+2); // FADDP ST(2),ST 1240 } 1241 else if (eoper == OPmin) 1242 { 1243 cdb.genf2(0xDE, 0xE8+2); // FSUBP ST(2),ST 1244 cdb.genf2(0xD9, 0xE0); // FCHS 1245 } 1246 else 1247 assert(0); 1248 pop87(); 1249 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1250 fixresult_complex87(cdb, e, retregs, pretregs); 1251 return; 1252 } 1253 1254 case X(OPeqeq, TYcfloat, TYcfloat): 1255 case X(OPeqeq, TYcdouble, TYcdouble): 1256 case X(OPeqeq, TYcldouble, TYcldouble): 1257 case X(OPeqeq, TYcfloat, TYifloat): 1258 case X(OPeqeq, TYcdouble, TYidouble): 1259 case X(OPeqeq, TYcldouble, TYildouble): 1260 case X(OPeqeq, TYcfloat, TYfloat): 1261 case X(OPeqeq, TYcdouble, TYdouble): 1262 case X(OPeqeq, TYcldouble, TYldouble): 1263 case X(OPeqeq, TYifloat, TYcfloat): 1264 case X(OPeqeq, TYidouble, TYcdouble): 1265 case X(OPeqeq, TYildouble, TYcldouble): 1266 case X(OPeqeq, TYfloat, TYcfloat): 1267 case X(OPeqeq, TYdouble, TYcdouble): 1268 case X(OPeqeq, TYldouble, TYcldouble): 1269 case X(OPeqeq, TYfloat, TYifloat): 1270 case X(OPeqeq, TYdouble, TYidouble): 1271 case X(OPeqeq, TYldouble, TYildouble): 1272 case X(OPeqeq, TYifloat, TYfloat): 1273 case X(OPeqeq, TYidouble, TYdouble): 1274 case X(OPeqeq, TYildouble, TYldouble): 1275 { 1276 loadComplex(cdb,e1); 1277 loadComplex(cdb,e2); 1278 makesure87(cdb, e1, sz2, 2, 0); 1279 makesure87(cdb, e1, 0, 3, 0); 1280 regm_t retregs = 0; 1281 callclib(cdb, e, CLIB.ccmp, &retregs, 0); 1282 return; 1283 } 1284 1285 case X(OPadd, TYfloat, TYifloat): 1286 case X(OPadd, TYdouble, TYidouble): 1287 case X(OPadd, TYldouble, TYildouble): 1288 case X(OPadd, TYifloat, TYfloat): 1289 case X(OPadd, TYidouble, TYdouble): 1290 case X(OPadd, TYildouble, TYldouble): 1291 1292 case X(OPmin, TYfloat, TYifloat): 1293 case X(OPmin, TYdouble, TYidouble): 1294 case X(OPmin, TYldouble, TYildouble): 1295 case X(OPmin, TYifloat, TYfloat): 1296 case X(OPmin, TYidouble, TYdouble): 1297 case X(OPmin, TYildouble, TYldouble): 1298 { 1299 regm_t retregs = mST0; 1300 codelem(cdb,e1, &retregs, false); 1301 note87(e1, 0, 0); 1302 codelem(cdb,e2, &retregs, false); 1303 makesure87(cdb, e1, 0, 1, 0); 1304 if (eoper == OPmin) 1305 cdb.genf2(0xD9, 0xE0); // FCHS 1306 if (tyimaginary(e1.Ety)) 1307 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1308 retregs = mST01; 1309 fixresult_complex87(cdb, e, retregs, pretregs); 1310 return; 1311 } 1312 1313 case X(OPadd, TYcfloat, TYifloat): 1314 case X(OPadd, TYcdouble, TYidouble): 1315 case X(OPadd, TYcldouble, TYildouble): 1316 op = 0; 1317 goto Lci; 1318 1319 case X(OPmin, TYcfloat, TYifloat): 1320 case X(OPmin, TYcdouble, TYidouble): 1321 case X(OPmin, TYcldouble, TYildouble): 1322 op = 4; 1323 goto Lci; 1324 1325 Lci: 1326 { 1327 loadComplex(cdb,e1); 1328 regm_t retregs = mST0; 1329 load87(cdb,e2,sz2,&retregs,e1,op); 1330 freenode(e2); 1331 retregs = mST01; 1332 makesure87(cdb, e1,0,1,0); 1333 fixresult_complex87(cdb,e, retregs, pretregs); 1334 return; 1335 } 1336 1337 case X(OPmul, TYcfloat, TYfloat): 1338 case X(OPmul, TYcdouble, TYdouble): 1339 case X(OPmul, TYcldouble, TYldouble): 1340 imaginary = false; 1341 goto Lcmul; 1342 1343 case X(OPmul, TYcfloat, TYifloat): 1344 case X(OPmul, TYcdouble, TYidouble): 1345 case X(OPmul, TYcldouble, TYildouble): 1346 imaginary = true; 1347 Lcmul: 1348 { 1349 loadComplex(cdb,e1); 1350 if (imaginary) 1351 { 1352 cdb.genf2(0xD9, 0xE0); // FCHS 1353 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1354 if (elemisone(e2)) 1355 { 1356 freenode(e2); 1357 fixresult_complex87(cdb, e, mST01, pretregs); 1358 return; 1359 } 1360 } 1361 regm_t retregs = mST0; 1362 codelem(cdb,e2, &retregs, false); 1363 makesure87(cdb, e1, sz2, 1, 0); 1364 makesure87(cdb, e1, 0, 2, 0); 1365 cdb.genf2(0xDC,0xC8 + 2); // FMUL ST(2), ST 1366 cdb.genf2(0xDE,0xC8 + 1); // FMULP ST(1), ST 1367 pop87(); 1368 fixresult_complex87(cdb, e, mST01, pretregs); 1369 return; 1370 } 1371 1372 case X(OPmul, TYfloat, TYcfloat): 1373 case X(OPmul, TYdouble, TYcdouble): 1374 case X(OPmul, TYldouble, TYcldouble): 1375 imaginary = false; 1376 goto Lcmul2; 1377 1378 case X(OPmul, TYifloat, TYcfloat): 1379 case X(OPmul, TYidouble, TYcdouble): 1380 case X(OPmul, TYildouble, TYcldouble): 1381 imaginary = true; 1382 Lcmul2: 1383 { 1384 regm_t retregs = mST0; 1385 codelem(cdb,e1, &retregs, false); 1386 note87(e1, 0, 0); 1387 loadComplex(cdb,e2); 1388 makesure87(cdb, e1, 0, 2, 0); 1389 cdb.genf2(0xD9, imaginary ? 0xE0 : 0xC8 + 1); // FCHS / FXCH ST(1) 1390 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 1391 cdb.genf2(0xDC,0xC8 + 2); // FMUL ST(2), ST 1392 cdb.genf2(0xDE,0xC8 + 1); // FMULP ST(1), ST 1393 pop87(); 1394 fixresult_complex87(cdb, e, mST01, pretregs); 1395 return; 1396 } 1397 1398 case X(OPdiv, TYcfloat, TYfloat): 1399 case X(OPdiv, TYcdouble, TYdouble): 1400 case X(OPdiv, TYcldouble, TYldouble): 1401 { 1402 loadComplex(cdb,e1); 1403 regm_t retregs = mST0; 1404 codelem(cdb,e2, &retregs, false); 1405 makesure87(cdb, e1, sz2, 1, 0); 1406 makesure87(cdb, e1, 0, 2, 0); 1407 cdb.genf2(0xDC,0xF8 + 2); // FDIV ST(2), ST 1408 cdb.genf2(0xDE,0xF8 + 1); // FDIVP ST(1), ST 1409 pop87(); 1410 fixresult_complex87(cdb, e, mST01, pretregs); 1411 return; 1412 } 1413 1414 case X(OPdiv, TYcfloat, TYifloat): 1415 case X(OPdiv, TYcdouble, TYidouble): 1416 case X(OPdiv, TYcldouble, TYildouble): 1417 { 1418 loadComplex(cdb,e1); 1419 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1420 xchg87(0, 1); 1421 cdb.genf2(0xD9, 0xE0); // FCHS 1422 regm_t retregs = mST0; 1423 codelem(cdb,e2, &retregs, false); 1424 makesure87(cdb, e1, 0, 1, 0); 1425 makesure87(cdb, e1, sz2, 2, 0); 1426 cdb.genf2(0xDC,0xF8 + 2); // FDIV ST(2), ST 1427 cdb.genf2(0xDE,0xF8 + 1); // FDIVP ST(1), ST 1428 pop87(); 1429 fixresult_complex87(cdb, e, mST01, pretregs); 1430 return; 1431 } 1432 1433 case X(OPmod, TYcfloat, TYfloat): 1434 case X(OPmod, TYcdouble, TYdouble): 1435 case X(OPmod, TYcldouble, TYldouble): 1436 case X(OPmod, TYcfloat, TYifloat): 1437 case X(OPmod, TYcdouble, TYidouble): 1438 case X(OPmod, TYcldouble, TYildouble): 1439 { 1440 /* 1441 fld E1.re 1442 fld E1.im 1443 fld E2 1444 fxch ST(1) 1445 FM1: fprem 1446 fstsw word ptr sw 1447 fwait 1448 mov AH, byte ptr sw+1 1449 jp FM1 1450 fxch ST(2) 1451 FM2: fprem 1452 fstsw word ptr sw 1453 fwait 1454 mov AH, byte ptr sw+1 1455 jp FM2 1456 fstp ST(1) 1457 fxch ST(1) 1458 */ 1459 loadComplex(cdb,e1); 1460 regm_t retregs = mST0; 1461 codelem(cdb,e2, &retregs, false); 1462 makesure87(cdb, e1, sz2, 1, 0); 1463 makesure87(cdb, e1, 0, 2, 0); 1464 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1465 1466 cdb.gen2(0xD9, 0xF8); // FPREM 1467 code *cfm1 = cdb.last(); 1468 genjmpifC2(cdb, cfm1); // JC2 FM1 1469 cdb.genf2(0xD9, 0xC8 + 2); // FXCH ST(2) 1470 1471 cdb.gen2(0xD9, 0xF8); // FPREM 1472 code *cfm2 = cdb.last(); 1473 1474 genjmpifC2(cdb, cfm2); // JC2 FM2 1475 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 1476 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1477 1478 pop87(); 1479 fixresult_complex87(cdb, e, mST01, pretregs); 1480 return; 1481 } 1482 1483 default: 1484 1485 debug 1486 elem_print(e); 1487 1488 assert(0); 1489 } 1490 1491 int reverse = 0; 1492 int e2oper = e2.Eoper; 1493 1494 /* Move double-sized operand into the second position if there's a chance 1495 * it will allow combining a load with an operation (DMD Bugzilla 2905) 1496 */ 1497 if ( ((tybasic(e1.Ety) == TYdouble) 1498 && ((e1.Eoper == OPvar) || (e1.Eoper == OPconst)) 1499 && (tybasic(e2.Ety) != TYdouble)) || 1500 (e1.Eoper == OPconst) || 1501 (e1.Eoper == OPvar && 1502 ((e1.Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) || 1503 (e2oper == OPd_f && 1504 (e2.EV.E1.Eoper == OPs32_d || e2.EV.E1.Eoper == OPs64_d || e2.EV.E1.Eoper == OPs16_d) && 1505 e2.EV.E1.EV.E1.Eoper == OPvar 1506 ) || 1507 ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) && 1508 e2.EV.E1.Eoper == OPvar 1509 ) 1510 ) 1511 ) 1512 ) 1513 { // Reverse order of evaluation 1514 e1 = e.EV.E2; 1515 e2 = e.EV.E1; 1516 op = oprev[op + 1]; 1517 reverse ^= 1; 1518 } 1519 1520 regm_t retregs1 = mST0; 1521 codelem(cdb,e1,&retregs1,false); 1522 note87(e1,0,0); 1523 1524 if (config.flags4 & CFG4fdivcall && e.Eoper == OPdiv) 1525 { 1526 regm_t retregs = mST0; 1527 load87(cdb,e2,0,&retregs,e1,-1); 1528 makesure87(cdb, e1,0,1,0); 1529 if (op == 7) // if reverse divide 1530 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1531 callclib(cdb,e,CLIB.fdiv87,&retregs,0); 1532 pop87(); 1533 regm_t resregm = mST0; 1534 freenode(e2); 1535 fixresult87(cdb,e,resregm,pretregs); 1536 } 1537 else if (e.Eoper == OPmod) 1538 { 1539 /* 1540 * fld tbyte ptr y 1541 * fld tbyte ptr x // ST = x, ST1 = y 1542 * FM1: // We don't use fprem1 because for some inexplicable 1543 * // reason we get -5 when we do _modulo(15, 10) 1544 * fprem // ST = ST % ST1 1545 * fstsw word ptr sw 1546 * fwait 1547 * mov AH,byte ptr sw+1 // get msb of status word in AH 1548 * sahf // transfer to flags 1549 * jp FM1 // continue till ST < ST1 1550 * fstp ST(1) // leave remainder on stack 1551 */ 1552 regm_t retregs = mST0; 1553 load87(cdb,e2,0,&retregs,e1,-1); 1554 makesure87(cdb,e1,0,1,0); // now have x,y on stack; need y,x 1555 if (!reverse) // if not reverse modulo 1556 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1557 1558 cdb.gen2(0xD9, 0xF8); // FM1: FPREM 1559 code *cfm1 = cdb.last(); 1560 genjmpifC2(cdb, cfm1); // JC2 FM1 1561 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 1562 1563 pop87(); 1564 freenode(e2); 1565 fixresult87(cdb,e,mST0,pretregs); 1566 } 1567 else 1568 { 1569 load87(cdb,e2,0,pretregs,e1,op); 1570 freenode(e2); 1571 } 1572 if (*pretregs & mST0) 1573 note87(e,0,0); 1574 //printf("orth87(-e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1575 } 1576 1577 /***************************** 1578 * Load e into ST01. 1579 */ 1580 1581 @trusted 1582 private void loadComplex(ref CodeBuilder cdb,elem *e) 1583 { 1584 regm_t retregs; 1585 1586 int sz = tysize(e.Ety); 1587 switch (tybasic(e.Ety)) 1588 { 1589 case TYfloat: 1590 case TYdouble: 1591 case TYldouble: 1592 retregs = mST0; 1593 codelem(cdb,e,&retregs,false); 1594 // Convert to complex with a 0 for the imaginary part 1595 push87(cdb); 1596 cdb.gen2(0xD9,0xEE); // FLDZ 1597 break; 1598 1599 case TYifloat: 1600 case TYidouble: 1601 case TYildouble: 1602 // Convert to complex with a 0 for the real part 1603 push87(cdb); 1604 cdb.gen2(0xD9,0xEE); // FLDZ 1605 retregs = mST0; 1606 codelem(cdb,e,&retregs,false); 1607 break; 1608 1609 case TYcfloat: 1610 case TYcdouble: 1611 case TYcldouble: 1612 sz /= 2; 1613 retregs = mST01; 1614 codelem(cdb,e,&retregs,false); 1615 break; 1616 1617 default: 1618 assert(0); 1619 } 1620 note87(e, 0, 1); 1621 note87(e, sz, 0); 1622 } 1623 1624 /************************* 1625 * If op == -1, load expression e into ST0. 1626 * else compute (eleft op e), eleft is in ST0. 1627 * Must follow same logic as cmporder87(); 1628 */ 1629 1630 @trusted 1631 void load87(ref CodeBuilder cdb,elem *e,uint eoffset,regm_t *pretregs,elem *eleft,OPER op) 1632 { 1633 code cs; 1634 regm_t retregs; 1635 reg_t reg; 1636 uint mf1; 1637 ubyte ldop; 1638 int i; 1639 1640 if (NDPP) 1641 printf("+load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused); 1642 1643 assert(!(NOSAHF && op == 3)); 1644 elem_debug(e); 1645 if (ADDFWAIT()) 1646 cs.Iflags = CFwait; 1647 else 1648 cs.Iflags = 0; 1649 cs.Irex = 0; 1650 OPER opr = oprev[op + 1]; 1651 tym_t ty = tybasic(e.Ety); 1652 uint mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble; 1653 bool noted = false; 1654 if ((ty == TYldouble || ty == TYildouble) && 1655 op != -1 && e.Eoper != OPd_ld) 1656 goto Ldefault; 1657 L5: 1658 switch (e.Eoper) 1659 { 1660 case OPcomma: 1661 if (op != -1) 1662 { 1663 note87(eleft,eoffset,0); 1664 noted = true; 1665 } 1666 docommas(cdb,&e); 1667 goto L5; 1668 1669 case OPvar: 1670 notreg(e); 1671 goto L2; 1672 1673 case OPind: 1674 L2: 1675 if (op != -1) 1676 { 1677 if (e.Ecount && e.Ecount != e.Ecomsub && 1678 (i = cse_get(e, 0)) >= 0) 1679 { 1680 immutable ubyte[8] b2 = [0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8]; 1681 1682 cdb.genf2(0xD8,b2[op] + i); // Fop ST(i) 1683 } 1684 else 1685 { 1686 getlvalue87(cdb,&cs,e,0); 1687 makesure87(cdb,eleft,eoffset,0,0); 1688 cs.Iop = ESC(mf,0); 1689 cs.Irm |= modregrm(0,op,0); 1690 cdb.gen(&cs); 1691 } 1692 } 1693 else 1694 { 1695 push87(cdb); 1696 switch (ty) 1697 { 1698 case TYfloat: 1699 case TYdouble: 1700 case TYifloat: 1701 case TYidouble: 1702 case TYcfloat: 1703 case TYcdouble: 1704 case TYdouble_alias: 1705 loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0); // FLD var 1706 break; 1707 case TYldouble: 1708 case TYildouble: 1709 case TYcldouble: 1710 loadea(cdb,e,&cs,0xDB,5,0,0,0); // FLD var 1711 break; 1712 default: 1713 printf("ty = x%x\n", ty); 1714 assert(0); 1715 } 1716 note87(e,0,0); 1717 } 1718 break; 1719 1720 case OPd_f: 1721 case OPf_d: 1722 case OPd_ld: 1723 mf1 = (tybasic(e.EV.E1.Ety) == TYfloat || tybasic(e.EV.E1.Ety) == TYifloat) 1724 ? MFfloat : MFdouble; 1725 if (op != -1 && global87.stackused && !noted) 1726 note87(eleft,eoffset,0); // don't trash this value 1727 if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind) 1728 { 1729 static if (1) 1730 { 1731 L4: 1732 getlvalue87(cdb,&cs,e.EV.E1,0); 1733 cs.Iop = ESC(mf1,0); 1734 if (op != -1) 1735 { 1736 cs.Irm |= modregrm(0,op,0); 1737 makesure87(cdb,eleft,eoffset,0,0); 1738 } 1739 else 1740 { 1741 cs.Iop |= 1; 1742 push87(cdb); 1743 } 1744 cdb.gen(&cs); // FLD / Fop 1745 } 1746 else 1747 { 1748 loadea(cdb,e.EV.E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e.EV.E1 */ 1749 } 1750 1751 // Variable cannot be put into a register anymore 1752 if (e.EV.E1.Eoper == OPvar) 1753 notreg(e.EV.E1); 1754 freenode(e.EV.E1); 1755 } 1756 else 1757 { 1758 retregs = mST0; 1759 codelem(cdb,e.EV.E1,&retregs,false); 1760 if (op != -1) 1761 { 1762 makesure87(cdb,eleft,eoffset,1,0); 1763 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1764 pop87(); 1765 } 1766 } 1767 break; 1768 1769 case OPs64_d: 1770 if (e.EV.E1.Eoper == OPvar || 1771 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0)) 1772 { 1773 getlvalue87(cdb,&cs,e.EV.E1,0); 1774 cs.Iop = 0xDF; 1775 push87(cdb); 1776 cs.Irm |= modregrm(0,5,0); 1777 cdb.gen(&cs); // FILD m64 1778 // Variable cannot be put into a register anymore 1779 if (e.EV.E1.Eoper == OPvar) 1780 notreg(e.EV.E1); 1781 freenode(e.EV.E1); 1782 } 1783 else if (I64) 1784 { 1785 retregs = ALLREGS; 1786 codelem(cdb,e.EV.E1,&retregs,false); 1787 reg = findreg(retregs); 1788 cdb.genfltreg(STO,reg,0); // MOV floatreg,reg 1789 code_orrex(cdb.last(), REX_W); 1790 push87(cdb); 1791 cdb.genfltreg(0xDF,5,0); // FILD long long ptr floatreg 1792 } 1793 else 1794 { 1795 retregs = ALLREGS; 1796 codelem(cdb,e.EV.E1,&retregs,false); 1797 reg = findreglsw(retregs); 1798 cdb.genfltreg(STO,reg,0); // MOV floatreg,reglsw 1799 reg = findregmsw(retregs); 1800 cdb.genfltreg(STO,reg,4); // MOV floatreg+4,regmsw 1801 push87(cdb); 1802 cdb.genfltreg(0xDF,5,0); // FILD long long ptr floatreg 1803 } 1804 if (op != -1) 1805 { 1806 makesure87(cdb,eleft,eoffset,1,0); 1807 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1808 pop87(); 1809 } 1810 break; 1811 1812 case OPconst: 1813 ldop = loadconst(e, 0); 1814 if (ldop) 1815 { 1816 push87(cdb); 1817 cdb.genf2(0xD9,ldop); // FLDx 1818 if (op != -1) 1819 { 1820 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1821 pop87(); 1822 } 1823 } 1824 else 1825 { 1826 assert(0); 1827 } 1828 break; 1829 1830 case OPu16_d: 1831 { 1832 /* This opcode should never be generated */ 1833 /* (probably shouldn't be for 16 bit code too) */ 1834 assert(!I32); 1835 1836 if (op != -1 && !noted) 1837 note87(eleft,eoffset,0); // don't trash this value 1838 retregs = ALLREGS & mLSW; 1839 codelem(cdb,e.EV.E1,&retregs,false); 1840 regwithvalue(cdb,ALLREGS & mMSW,0,®,0); // 0-extend 1841 retregs |= mask(reg); 1842 mf1 = MFlong; 1843 goto L3; 1844 } 1845 1846 case OPs16_d: mf1 = MFword; goto L6; 1847 case OPs32_d: mf1 = MFlong; goto L6; 1848 L6: 1849 if (e.Ecount) 1850 goto Ldefault; 1851 if (op != -1 && !noted) 1852 note87(eleft,eoffset,0); // don't trash this value 1853 if (e.EV.E1.Eoper == OPvar || 1854 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0)) 1855 { 1856 goto L4; 1857 } 1858 else 1859 { 1860 retregs = ALLREGS; 1861 codelem(cdb,e.EV.E1,&retregs,false); 1862 L3: 1863 if (I16 && e.Eoper != OPs16_d) 1864 { 1865 /* MOV floatreg+2,reg */ 1866 reg = findregmsw(retregs); 1867 cdb.genfltreg(STO,reg,REGSIZE); 1868 retregs &= mLSW; 1869 } 1870 reg = findreg(retregs); 1871 cdb.genfltreg(STO,reg,0); // MOV floatreg,reg 1872 if (op != -1) 1873 { 1874 makesure87(cdb,eleft,eoffset,0,0); 1875 cdb.genfltreg(ESC(mf1,0),op,0); // Fop floatreg 1876 } 1877 else 1878 { 1879 /* FLD long ptr floatreg */ 1880 push87(cdb); 1881 cdb.genfltreg(ESC(mf1,1),0,0); 1882 } 1883 } 1884 break; 1885 default: 1886 Ldefault: 1887 retregs = mST0; 1888 codelem(cdb,e,&retregs,2); 1889 1890 if (op != -1) 1891 { 1892 makesure87(cdb,eleft,eoffset,1,(op == 0 || op == 1)); 1893 pop87(); 1894 if (op == 4 || op == 6) // sub or div 1895 { 1896 code *cl = cdb.last(); 1897 if (cl && cl.Iop == 0xD9 && cl.Irm == 0xC9) // FXCH ST(1) 1898 { cl.Iop = NOP; 1899 opr = op; // reverse operands 1900 } 1901 } 1902 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1903 } 1904 break; 1905 } 1906 if (op == 3) // FCOMP 1907 { pop87(); // extra pop was done 1908 cg87_87topsw(cdb); 1909 } 1910 fixresult87(cdb,e,((op == 3) ? mPSW : mST0),pretregs); 1911 if (NDPP) 1912 printf("-load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused); 1913 } 1914 1915 /******************************** 1916 * Determine if a compare is to be done forwards (return 0) 1917 * or backwards (return 1). 1918 * Must follow same logic as load87(). 1919 */ 1920 1921 @trusted 1922 int cmporder87(elem *e) 1923 { 1924 //printf("cmporder87(%p)\n",e); 1925 L1: 1926 switch (e.Eoper) 1927 { 1928 case OPcomma: 1929 e = e.EV.E2; 1930 goto L1; 1931 1932 case OPd_f: 1933 case OPf_d: 1934 case OPd_ld: 1935 if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind) 1936 goto ret0; 1937 else 1938 goto ret1; 1939 1940 case OPconst: 1941 if (loadconst(e, 0) || tybasic(e.Ety) == TYldouble 1942 || tybasic(e.Ety) == TYildouble) 1943 { 1944 //printf("ret 1, loadconst(e) = %d\n", loadconst(e)); 1945 goto ret1; 1946 } 1947 goto ret0; 1948 1949 case OPvar: 1950 case OPind: 1951 if (tybasic(e.Ety) == TYldouble || 1952 tybasic(e.Ety) == TYildouble) 1953 goto ret1; 1954 goto ret0; 1955 1956 case OPu16_d: 1957 case OPs16_d: 1958 case OPs32_d: 1959 goto ret0; 1960 1961 case OPs64_d: 1962 goto ret1; 1963 1964 default: 1965 goto ret1; 1966 } 1967 1968 ret1: 1969 return 1; 1970 1971 ret0: 1972 return 0; 1973 } 1974 1975 /******************************* 1976 * Perform an assignment to a long double/double/float. 1977 */ 1978 1979 @trusted 1980 void eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1981 { 1982 code cs; 1983 opcode_t op1; 1984 uint op2; 1985 1986 //printf("+eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1987 assert(e.Eoper == OPeq); 1988 regm_t retregs = mST0 | (*pretregs & mPSW); 1989 codelem(cdb,e.EV.E2,&retregs,false); 1990 tym_t ty1 = tybasic(e.EV.E1.Ety); 1991 switch (ty1) 1992 { 1993 case TYdouble_alias: 1994 case TYidouble: 1995 case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 1996 1997 case TYifloat: 1998 case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 1999 2000 case TYildouble: 2001 case TYldouble: op1 = 0xDB; op2 = 7; break; 2002 2003 default: 2004 assert(0); 2005 } 2006 if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too 2007 { 2008 if (ty1 == TYldouble || ty1 == TYildouble) 2009 { 2010 push87(cdb); 2011 cdb.genf2(0xD9,0xC0); // FLD ST(0) 2012 pop87(); 2013 } 2014 else 2015 op2 = 2; // FST e.EV.E1 2016 } 2017 else 2018 { // FSTP e.EV.E1 2019 pop87(); 2020 } 2021 2022 static if (0) 2023 { 2024 // Doesn't work if ST(0) gets saved to the stack by getlvalue() 2025 loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0); 2026 } 2027 else 2028 { 2029 cs.Irex = 0; 2030 cs.Iflags = 0; 2031 cs.Iop = op1; 2032 if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too 2033 { // Make sure it's still there 2034 elem *e2 = e.EV.E2; 2035 while (e2.Eoper == OPcomma) 2036 e2 = e2.EV.E2; 2037 note87(e2,0,0); 2038 getlvalue87(cdb, &cs, e.EV.E1, 0); 2039 makesure87(cdb,e2,0,0,1); 2040 } 2041 else 2042 { 2043 getlvalue87(cdb, &cs, e.EV.E1, 0); 2044 } 2045 cs.Irm |= modregrm(0,op2,0); // OR in reg field 2046 cdb.gen(&cs); 2047 if (tysize(TYldouble) == 12) 2048 { 2049 /* This deals with the fact that 10 byte reals really 2050 * occupy 12 bytes by zeroing the extra 2 bytes. 2051 */ 2052 if (op1 == 0xDB) 2053 { 2054 cs.Iop = 0xC7; // MOV EA+10,0 2055 NEWREG(cs.Irm, 0); 2056 cs.IEV1.Voffset += 10; 2057 cs.IFL2 = FLconst; 2058 cs.IEV2.Vint = 0; 2059 cs.Iflags |= CFopsize; 2060 cdb.gen(&cs); 2061 } 2062 } 2063 else if (tysize(TYldouble) == 16) 2064 { 2065 /* This deals with the fact that 10 byte reals really 2066 * occupy 16 bytes by zeroing the extra 6 bytes. 2067 */ 2068 if (op1 == 0xDB) 2069 { 2070 cs.Irex &= ~REX_W; 2071 cs.Iop = 0xC7; // MOV EA+10,0 2072 NEWREG(cs.Irm, 0); 2073 cs.IEV1.Voffset += 10; 2074 cs.IFL2 = FLconst; 2075 cs.IEV2.Vint = 0; 2076 cs.Iflags |= CFopsize; 2077 cdb.gen(&cs); 2078 2079 cs.IEV1.Voffset += 2; 2080 cs.Iflags &= ~CFopsize; 2081 cdb.gen(&cs); 2082 } 2083 } 2084 } 2085 genfwait(cdb); 2086 freenode(e.EV.E1); 2087 fixresult87(cdb,e,mST0 | mPSW,pretregs); 2088 } 2089 2090 /******************************* 2091 * Perform an assignment to a long double/double/float. 2092 */ 2093 2094 @trusted 2095 void complex_eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2096 { 2097 code cs; 2098 opcode_t op1; 2099 uint op2; 2100 uint sz; 2101 int fxch = 0; 2102 2103 //printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2104 assert(e.Eoper == OPeq); 2105 cs.Iflags = ADDFWAIT() ? CFwait : 0; 2106 cs.Irex = 0; 2107 regm_t retregs = mST01 | (*pretregs & mPSW); 2108 codelem(cdb,e.EV.E2,&retregs,false); 2109 tym_t ty1 = tybasic(e.EV.E1.Ety); 2110 switch (ty1) 2111 { 2112 case TYcdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 2113 case TYcfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 2114 case TYcldouble: op1 = 0xDB; op2 = 7; break; 2115 default: 2116 assert(0); 2117 } 2118 if (*pretregs & (mST01 | mXMM0 | mXMM1)) // if want result on stack too 2119 { 2120 if (ty1 == TYcldouble) 2121 { 2122 push87(cdb); 2123 push87(cdb); 2124 cdb.genf2(0xD9,0xC0 + 1); // FLD ST(1) 2125 cdb.genf2(0xD9,0xC0 + 1); // FLD ST(1) 2126 pop87(); 2127 pop87(); 2128 } 2129 else 2130 { op2 = 2; // FST e.EV.E1 2131 fxch = 1; 2132 } 2133 } 2134 else 2135 { // FSTP e.EV.E1 2136 pop87(); 2137 pop87(); 2138 } 2139 sz = tysize(ty1) / 2; 2140 if (*pretregs & (mST01 | mXMM0 | mXMM1)) 2141 { 2142 cs.Iflags = 0; 2143 cs.Irex = 0; 2144 cs.Iop = op1; 2145 getlvalue87(cdb, &cs, e.EV.E1, 0); 2146 cs.IEV1.Voffset += sz; 2147 cs.Irm |= modregrm(0, op2, 0); 2148 makesure87(cdb,e.EV.E2, sz, 0, 0); 2149 cdb.gen(&cs); 2150 genfwait(cdb); 2151 makesure87(cdb,e.EV.E2, 0, 1, 0); 2152 } 2153 else 2154 { 2155 loadea(cdb,e.EV.E1,&cs,op1,op2,sz,0,0); 2156 genfwait(cdb); 2157 } 2158 if (fxch) 2159 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2160 cs.IEV1.Voffset -= sz; 2161 cdb.gen(&cs); 2162 if (fxch) 2163 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2164 if (tysize(TYldouble) == 12) 2165 { 2166 if (op1 == 0xDB) 2167 { 2168 cs.Iop = 0xC7; // MOV EA+10,0 2169 NEWREG(cs.Irm, 0); 2170 cs.IEV1.Voffset += 10; 2171 cs.IFL2 = FLconst; 2172 cs.IEV2.Vint = 0; 2173 cs.Iflags |= CFopsize; 2174 cdb.gen(&cs); 2175 cs.IEV1.Voffset += 12; 2176 cdb.gen(&cs); // MOV EA+22,0 2177 } 2178 } 2179 if (tysize(TYldouble) == 16) 2180 { 2181 if (op1 == 0xDB) 2182 { 2183 cs.Iop = 0xC7; // MOV EA+10,0 2184 NEWREG(cs.Irm, 0); 2185 cs.IEV1.Voffset += 10; 2186 cs.IFL2 = FLconst; 2187 cs.IEV2.Vint = 0; 2188 cs.Iflags |= CFopsize; 2189 cdb.gen(&cs); 2190 2191 cs.IEV1.Voffset += 2; 2192 cs.Iflags &= ~CFopsize; 2193 cdb.gen(&cs); 2194 2195 cs.IEV1.Voffset += 14; 2196 cs.Iflags |= CFopsize; 2197 cdb.gen(&cs); 2198 2199 cs.IEV1.Voffset += 2; 2200 cs.Iflags &= ~CFopsize; 2201 cdb.gen(&cs); 2202 } 2203 } 2204 genfwait(cdb); 2205 freenode(e.EV.E1); 2206 fixresult_complex87(cdb, e,mST01 | mPSW,pretregs); 2207 } 2208 2209 /******************************* 2210 * Perform an assignment while converting to integral type, 2211 * i.e. handle (e1 = (int) e2) 2212 */ 2213 2214 @trusted 2215 private void cnvteq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2216 { 2217 code cs; 2218 opcode_t op1; 2219 uint op2; 2220 2221 assert(e.Eoper == OPeq); 2222 assert(!*pretregs); 2223 regm_t retregs = mST0; 2224 elem_debug(e.EV.E2); 2225 codelem(cdb,e.EV.E2.EV.E1,&retregs,false); 2226 2227 switch (e.EV.E2.Eoper) 2228 { case OPd_s16: 2229 op1 = ESC(MFword,1); 2230 op2 = 3; 2231 break; 2232 case OPd_s32: 2233 case OPd_u16: 2234 op1 = ESC(MFlong,1); 2235 op2 = 3; 2236 break; 2237 case OPd_s64: 2238 op1 = 0xDF; 2239 op2 = 7; 2240 break; 2241 default: 2242 assert(0); 2243 } 2244 freenode(e.EV.E2); 2245 2246 genfwait(cdb); 2247 genrnd(cdb, CW_roundto0); // FLDCW roundto0 2248 2249 pop87(); 2250 cs.Iflags = ADDFWAIT() ? CFwait : 0; 2251 if (e.EV.E1.Eoper == OPvar) 2252 notreg(e.EV.E1); // cannot be put in register anymore 2253 loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0); 2254 2255 genfwait(cdb); 2256 genrnd(cdb, CW_roundtonearest); // FLDCW roundtonearest 2257 2258 freenode(e.EV.E1); 2259 } 2260 2261 /********************************** 2262 * Perform +=, -=, *= and /= for doubles. 2263 */ 2264 2265 @trusted 2266 public void opass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2267 { 2268 code cs; 2269 uint op; 2270 opcode_t opld; 2271 opcode_t op1; 2272 uint op2; 2273 tym_t ty1 = tybasic(e.EV.E1.Ety); 2274 2275 switch (ty1) 2276 { 2277 case TYdouble_alias: 2278 case TYidouble: 2279 case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 2280 case TYifloat: 2281 case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 2282 case TYildouble: 2283 case TYldouble: op1 = 0xDB; op2 = 7; break; 2284 2285 case TYcfloat: 2286 case TYcdouble: 2287 case TYcldouble: 2288 if (e.Eoper == OPmodass) 2289 opmod_complex87(cdb, e, pretregs); 2290 else 2291 opass_complex87(cdb, e, pretregs); 2292 return; 2293 2294 default: 2295 assert(0); 2296 } 2297 switch (e.Eoper) 2298 { 2299 case OPpostinc: 2300 case OPaddass: op = 0 << 3; opld = 0xC1; break; // FADD 2301 case OPpostdec: 2302 case OPminass: op = 5 << 3; opld = 0xE1; /*0xE9;*/ break; // FSUBR 2303 case OPmulass: op = 1 << 3; opld = 0xC9; break; // FMUL 2304 case OPdivass: op = 7 << 3; opld = 0xF1; break; // FDIVR 2305 case OPmodass: break; 2306 default: assert(0); 2307 } 2308 regm_t retregs = mST0; 2309 codelem(cdb,e.EV.E2,&retregs,false); // evaluate rvalue 2310 note87(e.EV.E2,0,0); 2311 getlvalue87(cdb,&cs,e.EV.E1,e.Eoper==OPmodass?mAX:0); 2312 makesure87(cdb,e.EV.E2,0,0,0); 2313 if (config.flags4 & CFG4fdivcall && e.Eoper == OPdivass) 2314 { 2315 push87(cdb); 2316 cs.Iop = op1; 2317 if (ty1 == TYldouble || ty1 == TYildouble) 2318 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2319 cdb.gen(&cs); 2320 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2321 callclib(cdb,e,CLIB.fdiv87,&retregs,0); 2322 pop87(); 2323 } 2324 else if (e.Eoper == OPmodass) 2325 { 2326 /* 2327 * fld tbyte ptr y 2328 * fld tbyte ptr x // ST = x, ST1 = y 2329 * FM1: // We don't use fprem1 because for some inexplicable 2330 * // reason we get -5 when we do _modulo(15, 10) 2331 * fprem // ST = ST % ST1 2332 * fstsw word ptr sw 2333 * fwait 2334 * mov AH,byte ptr sw+1 // get msb of status word in AH 2335 * sahf // transfer to flags 2336 * jp FM1 // continue till ST < ST1 2337 * fstp ST(1) // leave remainder on stack 2338 */ 2339 code *c1; 2340 2341 push87(cdb); 2342 cs.Iop = op1; 2343 if (ty1 == TYldouble || ty1 == TYildouble) 2344 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2345 cdb.gen(&cs); // FLD e.EV.E1 2346 2347 cdb.gen2(0xD9, 0xF8); // FPREM 2348 code *cfm1 = cdb.last(); 2349 genjmpifC2(cdb, cfm1); // JC2 FM1 2350 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 2351 2352 pop87(); 2353 } 2354 else if (ty1 == TYldouble || ty1 == TYildouble) 2355 { 2356 push87(cdb); 2357 cs.Iop = op1; 2358 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2359 cdb.gen(&cs); // FLD e.EV.E1 2360 cdb.genf2(0xDE,opld); // FopP ST(1) 2361 pop87(); 2362 } 2363 else 2364 { 2365 cs.Iop = op1 & ~1; 2366 cs.Irm |= op; 2367 cdb.gen(&cs); // Fop e.EV.E1 2368 } 2369 if (*pretregs & mPSW) 2370 genftst(cdb,e,0); // FTST ST0 2371 // if want result in registers 2372 if (*pretregs & (mST0 | ALLREGS | mBP)) 2373 { 2374 if (ty1 == TYldouble || ty1 == TYildouble) 2375 { 2376 push87(cdb); 2377 cdb.genf2(0xD9,0xC0); // FLD ST(0) 2378 pop87(); 2379 } 2380 else 2381 op2 = 2; // FST e.EV.E1 2382 } 2383 else 2384 { // FSTP 2385 pop87(); 2386 } 2387 cs.Iop = op1; 2388 NEWREG(cs.Irm,op2); // FSTx e.EV.E1 2389 freenode(e.EV.E1); 2390 cdb.gen(&cs); 2391 genfwait(cdb); 2392 fixresult87(cdb,e,mST0 | mPSW,pretregs); 2393 } 2394 2395 /*********************************** 2396 * Perform %= where E1 is complex and E2 is real or imaginary. 2397 */ 2398 2399 @trusted 2400 private void opmod_complex87(ref CodeBuilder cdb, elem *e,regm_t *pretregs) 2401 { 2402 2403 /* fld E2 2404 fld E1.re 2405 FM1: fprem 2406 fstsw word ptr sw 2407 fwait 2408 mov AH, byte ptr sw+1 2409 jp FM1 2410 fxch ST(1) 2411 fld E1.im 2412 FM2: fprem 2413 fstsw word ptr sw 2414 fwait 2415 mov AH, byte ptr sw+1 2416 jp FM2 2417 fstp ST(1) 2418 */ 2419 2420 code cs; 2421 2422 tym_t ty1 = tybasic(e.EV.E1.Ety); 2423 uint sz2 = _tysize[ty1] / 2; 2424 2425 regm_t retregs = mST0; 2426 codelem(cdb,e.EV.E2,&retregs,false); // FLD E2 2427 note87(e.EV.E2,0,0); 2428 getlvalue87(cdb,&cs,e.EV.E1,0); 2429 makesure87(cdb,e.EV.E2,0,0,0); 2430 2431 push87(cdb); 2432 switch (ty1) 2433 { 2434 case TYcdouble: cs.Iop = ESC(MFdouble,1); break; 2435 case TYcfloat: cs.Iop = ESC(MFfloat,1); break; 2436 case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break; 2437 default: 2438 assert(0); 2439 } 2440 cdb.gen(&cs); // FLD E1.re 2441 2442 cdb.gen2(0xD9, 0xF8); // FPREM 2443 code *cfm1 = cdb.last(); 2444 genjmpifC2(cdb, cfm1); // JC2 FM1 2445 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 2446 2447 push87(cdb); 2448 cs.IEV1.Voffset += sz2; 2449 cdb.gen(&cs); // FLD E1.im 2450 2451 cdb.gen2(0xD9, 0xF8); // FPREM 2452 code *cfm2 = cdb.last(); 2453 genjmpifC2(cdb, cfm2); // JC2 FM2 2454 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 2455 2456 pop87(); 2457 2458 if (*pretregs & (mST01 | mPSW)) 2459 { 2460 cs.Irm |= modregrm(0, 2, 0); 2461 cdb.gen(&cs); // FST mreal.im 2462 cs.IEV1.Voffset -= sz2; 2463 cdb.gen(&cs); // FST mreal.re 2464 retregs = mST01; 2465 } 2466 else 2467 { 2468 cs.Irm |= modregrm(0, 3, 0); 2469 cdb.gen(&cs); // FSTP mreal.im 2470 cs.IEV1.Voffset -= sz2; 2471 cdb.gen(&cs); // FSTP mreal.re 2472 pop87(); 2473 pop87(); 2474 retregs = 0; 2475 } 2476 freenode(e.EV.E1); 2477 genfwait(cdb); 2478 fixresult_complex87(cdb,e,retregs,pretregs); 2479 } 2480 2481 /********************************** 2482 * Perform +=, -=, *= and /= for the lvalue being complex. 2483 */ 2484 2485 @trusted 2486 private void opass_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2487 { 2488 regm_t retregs; 2489 regm_t idxregs; 2490 code cs; 2491 uint op; 2492 opcode_t op2; 2493 2494 tym_t ty1 = tybasic(e.EV.E1.Ety); 2495 uint sz2 = _tysize[ty1] / 2; 2496 switch (e.Eoper) 2497 { 2498 case OPpostinc: 2499 case OPaddass: op = 0 << 3; // FADD 2500 op2 = 0xC0; // FADDP ST(i),ST 2501 break; 2502 2503 case OPpostdec: 2504 case OPminass: op = 5 << 3; // FSUBR 2505 op2 = 0xE0; // FSUBRP ST(i),ST 2506 break; 2507 2508 case OPmulass: op = 1 << 3; // FMUL 2509 op2 = 0xC8; // FMULP ST(i),ST 2510 break; 2511 2512 case OPdivass: op = 7 << 3; // FDIVR 2513 op2 = 0xF0; // FDIVRP ST(i),ST 2514 break; 2515 2516 default: assert(0); 2517 } 2518 2519 if (!tycomplex(e.EV.E2.Ety) && 2520 (e.Eoper == OPmulass || e.Eoper == OPdivass)) 2521 { 2522 retregs = mST0; 2523 codelem(cdb,e.EV.E2, &retregs, false); 2524 note87(e.EV.E2, 0, 0); 2525 getlvalue87(cdb,&cs, e.EV.E1, 0); 2526 makesure87(cdb,e.EV.E2,0,0,0); 2527 push87(cdb); 2528 cdb.genf2(0xD9,0xC0); // FLD ST(0) 2529 goto L1; 2530 } 2531 else 2532 { 2533 loadComplex(cdb,e.EV.E2); 2534 getlvalue87(cdb,&cs,e.EV.E1,0); 2535 makesure87(cdb,e.EV.E2,sz2,0,0); 2536 makesure87(cdb,e.EV.E2,0,1,0); 2537 } 2538 2539 switch (e.Eoper) 2540 { 2541 case OPpostinc: 2542 case OPaddass: 2543 case OPpostdec: 2544 case OPminass: 2545 L1: 2546 if (ty1 == TYcldouble) 2547 { 2548 push87(cdb); 2549 push87(cdb); 2550 cs.Iop = 0xDB; 2551 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2552 cdb.gen(&cs); // FLD e.EV.E1.re 2553 cs.IEV1.Voffset += sz2; 2554 cdb.gen(&cs); // FLD e.EV.E1.im 2555 cdb.genf2(0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2556 cdb.genf2(0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2557 pop87(); 2558 pop87(); 2559 if (tyimaginary(e.EV.E2.Ety)) 2560 { 2561 if (e.Eoper == OPmulass) 2562 { 2563 cdb.genf2(0xD9, 0xE0); // FCHS 2564 cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1) 2565 } 2566 else if (e.Eoper == OPdivass) 2567 { 2568 cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1) 2569 cdb.genf2(0xD9, 0xE0); // FCHS 2570 } 2571 } 2572 L2: 2573 if (*pretregs & (mST01 | mPSW)) 2574 { 2575 push87(cdb); 2576 push87(cdb); 2577 cdb.genf2(0xD9,0xC1); // FLD ST(1) 2578 cdb.genf2(0xD9,0xC1); // FLD ST(1) 2579 retregs = mST01; 2580 } 2581 else 2582 retregs = 0; 2583 cs.Iop = 0xDB; 2584 cs.Irm |= modregrm(0,7,0); 2585 cdb.gen(&cs); // FSTP e.EV.E1.im 2586 cs.IEV1.Voffset -= sz2; 2587 cdb.gen(&cs); // FSTP e.EV.E1.re 2588 pop87(); 2589 pop87(); 2590 2591 } 2592 else 2593 { 2594 ubyte rmop = cast(ubyte)(cs.Irm | op); 2595 ubyte rmfst = cs.Irm | modregrm(0,2,0); 2596 ubyte rmfstp = cs.Irm | modregrm(0,3,0); 2597 ubyte iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2598 opcode_t iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC; 2599 2600 cs.Iop = iop; 2601 cs.Irm = rmop; 2602 cs.IEV1.Voffset += sz2; 2603 cdb.gen(&cs); // FSUBR mreal.im 2604 if (tyimaginary(e.EV.E2.Ety) && (e.Eoper == OPmulass || e.Eoper == OPdivass)) 2605 { 2606 if (e.Eoper == OPmulass) 2607 cdb.genf2(0xD9, 0xE0); // FCHS 2608 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2609 cs.IEV1.Voffset -= sz2; 2610 cdb.gen(&cs); // FMUL mreal.re 2611 if (e.Eoper == OPdivass) 2612 cdb.genf2(0xD9, 0xE0); // FCHS 2613 if (*pretregs & (mST01 | mPSW)) 2614 { 2615 cs.Iop = iopfst; 2616 cs.Irm = rmfst; 2617 cs.IEV1.Voffset += sz2; 2618 cdb.gen(&cs); // FST mreal.im 2619 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2620 cs.IEV1.Voffset -= sz2; 2621 cdb.gen(&cs); // FST mreal.re 2622 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2623 retregs = mST01; 2624 } 2625 else 2626 { 2627 cs.Iop = iopfst; 2628 cs.Irm = rmfstp; 2629 cs.IEV1.Voffset += sz2; 2630 cdb.gen(&cs); // FSTP mreal.im 2631 pop87(); 2632 cs.IEV1.Voffset -= sz2; 2633 cdb.gen(&cs); // FSTP mreal.re 2634 pop87(); 2635 retregs = 0; 2636 } 2637 goto L3; 2638 } 2639 2640 if (*pretregs & (mST01 | mPSW)) 2641 { 2642 cs.Iop = iopfst; 2643 cs.Irm = rmfst; 2644 cdb.gen(&cs); // FST mreal.im 2645 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2646 cs.Iop = iop; 2647 cs.Irm = rmop; 2648 cs.IEV1.Voffset -= sz2; 2649 cdb.gen(&cs); // FSUBR mreal.re 2650 cs.Iop = iopfst; 2651 cs.Irm = rmfst; 2652 cdb.gen(&cs); // FST mreal.re 2653 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2654 retregs = mST01; 2655 } 2656 else 2657 { 2658 cs.Iop = iopfst; 2659 cs.Irm = rmfstp; 2660 cdb.gen(&cs); // FSTP mreal.im 2661 pop87(); 2662 cs.Iop = iop; 2663 cs.Irm = rmop; 2664 cs.IEV1.Voffset -= sz2; 2665 cdb.gen(&cs); // FSUBR mreal.re 2666 cs.Iop = iopfst; 2667 cs.Irm = rmfstp; 2668 cdb.gen(&cs); // FSTP mreal.re 2669 pop87(); 2670 retregs = 0; 2671 } 2672 } 2673 L3: 2674 freenode(e.EV.E1); 2675 genfwait(cdb); 2676 fixresult_complex87(cdb,e,retregs,pretregs); 2677 return; 2678 2679 case OPmulass: 2680 push87(cdb); 2681 push87(cdb); 2682 if (ty1 == TYcldouble) 2683 { 2684 cs.Iop = 0xDB; 2685 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2686 cdb.gen(&cs); // FLD e.EV.E1.re 2687 cs.IEV1.Voffset += sz2; 2688 cdb.gen(&cs); // FLD e.EV.E1.im 2689 retregs = mST01; 2690 callclib(cdb, e, CLIB.cmul, &retregs, 0); 2691 goto L2; 2692 } 2693 else 2694 { 2695 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2696 cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2697 cdb.gen(&cs); // FLD e.EV.E1.re 2698 cs.IEV1.Voffset += sz2; 2699 cdb.gen(&cs); // FLD e.EV.E1.im 2700 retregs = mST01; 2701 callclib(cdb, e, CLIB.cmul, &retregs, 0); 2702 if (*pretregs & (mST01 | mPSW)) 2703 { 2704 cs.Irm |= modregrm(0, 2, 0); 2705 cdb.gen(&cs); // FST mreal.im 2706 cs.IEV1.Voffset -= sz2; 2707 cdb.gen(&cs); // FST mreal.re 2708 retregs = mST01; 2709 } 2710 else 2711 { 2712 cs.Irm |= modregrm(0, 3, 0); 2713 cdb.gen(&cs); // FSTP mreal.im 2714 cs.IEV1.Voffset -= sz2; 2715 cdb.gen(&cs); // FSTP mreal.re 2716 pop87(); 2717 pop87(); 2718 retregs = 0; 2719 } 2720 goto L3; 2721 } 2722 2723 case OPdivass: 2724 push87(cdb); 2725 push87(cdb); 2726 idxregs = idxregm(&cs); // mask of index regs used 2727 if (ty1 == TYcldouble) 2728 { 2729 cs.Iop = 0xDB; 2730 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2731 cdb.gen(&cs); // FLD e.EV.E1.re 2732 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2733 cs.IEV1.Voffset += sz2; 2734 cdb.gen(&cs); // FLD e.EV.E1.im 2735 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2736 retregs = mST01; 2737 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs); 2738 goto L2; 2739 } 2740 else 2741 { 2742 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2743 cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2744 cdb.gen(&cs); // FLD e.EV.E1.re 2745 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2746 cs.IEV1.Voffset += sz2; 2747 cdb.gen(&cs); // FLD e.EV.E1.im 2748 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2749 retregs = mST01; 2750 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs); 2751 if (*pretregs & (mST01 | mPSW)) 2752 { 2753 cs.Irm |= modregrm(0, 2, 0); 2754 cdb.gen(&cs); // FST mreal.im 2755 cs.IEV1.Voffset -= sz2; 2756 cdb.gen(&cs); // FST mreal.re 2757 retregs = mST01; 2758 } 2759 else 2760 { 2761 cs.Irm |= modregrm(0, 3, 0); 2762 cdb.gen(&cs); // FSTP mreal.im 2763 cs.IEV1.Voffset -= sz2; 2764 cdb.gen(&cs); // FSTP mreal.re 2765 pop87(); 2766 pop87(); 2767 retregs = 0; 2768 } 2769 goto L3; 2770 } 2771 2772 default: 2773 assert(0); 2774 } 2775 } 2776 2777 /************************** 2778 * OPnegass 2779 */ 2780 2781 @trusted 2782 void cdnegass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2783 { 2784 regm_t retregs; 2785 uint op; 2786 2787 //printf("cdnegass87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2788 elem *e1 = e.EV.E1; 2789 tym_t tyml = tybasic(e1.Ety); // type of lvalue 2790 int sz = _tysize[tyml]; 2791 2792 code cs; 2793 getlvalue87(cdb,&cs,e1,0); 2794 2795 /* If the EA is really an XMM register, modEA() will fail. 2796 * So disallow putting e1 into a register. 2797 * A better way would be to negate the XMM register in place. 2798 */ 2799 if (e1.Eoper == OPvar) 2800 e1.EV.Vsym.Sflags &= ~GTregcand; 2801 2802 modEA(cdb,&cs); 2803 cs.Irm |= modregrm(0,6,0); 2804 cs.Iop = 0x80; 2805 if (tysize(TYldouble) > 10) 2806 { 2807 if (tyml == TYldouble || tyml == TYildouble) 2808 cs.IEV1.Voffset += 10 - 1; 2809 else if (tyml == TYcldouble) 2810 cs.IEV1.Voffset += tysize(TYldouble) + 10 - 1; 2811 else 2812 cs.IEV1.Voffset += sz - 1; 2813 } 2814 else 2815 cs.IEV1.Voffset += sz - 1; 2816 cs.IFL2 = FLconst; 2817 cs.IEV2.Vuns = 0x80; 2818 cdb.gen(&cs); // XOR 7[EA],0x80 2819 if (tycomplex(tyml)) 2820 { 2821 cs.IEV1.Voffset -= sz / 2; 2822 cdb.gen(&cs); // XOR 7[EA],0x80 2823 } 2824 2825 if (*pretregs) 2826 { 2827 switch (tyml) 2828 { 2829 case TYifloat: 2830 case TYfloat: cs.Iop = 0xD9; op = 0; break; 2831 case TYidouble: 2832 case TYdouble: 2833 case TYdouble_alias: cs.Iop = 0xDD; op = 0; break; 2834 case TYildouble: 2835 case TYldouble: cs.Iop = 0xDB; op = 5; break; 2836 default: 2837 assert(0); 2838 } 2839 NEWREG(cs.Irm,op); 2840 cs.IEV1.Voffset -= sz - 1; 2841 push87(cdb); 2842 cdb.gen(&cs); // FLD EA 2843 retregs = mST0; 2844 } 2845 else 2846 retregs = 0; 2847 2848 freenode(e1); 2849 fixresult87(cdb,e,retregs,pretregs); 2850 } 2851 2852 /************************ 2853 * Take care of OPpostinc and OPpostdec. 2854 */ 2855 2856 @trusted 2857 void post87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2858 { 2859 uint op; 2860 opcode_t op1; 2861 reg_t reg; 2862 2863 //printf("post87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2864 code cs; 2865 assert(*pretregs); 2866 getlvalue87(cdb,&cs,e.EV.E1,0); 2867 tym_t ty1 = tybasic(e.EV.E1.Ety); 2868 switch (ty1) 2869 { 2870 case TYdouble_alias: 2871 case TYidouble: 2872 case TYdouble: 2873 case TYcdouble: op1 = ESC(MFdouble,1); reg = 0; break; 2874 case TYifloat: 2875 case TYfloat: 2876 case TYcfloat: op1 = ESC(MFfloat,1); reg = 0; break; 2877 case TYildouble: 2878 case TYldouble: 2879 case TYcldouble: op1 = 0xDB; reg = 5; break; 2880 default: 2881 assert(0); 2882 } 2883 NEWREG(cs.Irm, reg); 2884 if (reg == 5) 2885 reg = 7; 2886 else 2887 reg = 3; 2888 cs.Iop = op1; 2889 push87(cdb); 2890 cdb.gen(&cs); // FLD e.EV.E1 2891 if (tycomplex(ty1)) 2892 { 2893 uint sz = _tysize[ty1] / 2; 2894 2895 push87(cdb); 2896 cs.IEV1.Voffset += sz; 2897 cdb.gen(&cs); // FLD e.EV.E1 2898 regm_t retregs = mST0; // note kludge to only load real part 2899 codelem(cdb,e.EV.E2,&retregs,false); // load rvalue 2900 cdb.genf2(0xD8, // FADD/FSUBR ST,ST2 2901 (e.Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2); 2902 NEWREG(cs.Irm,reg); 2903 pop87(); 2904 cs.IEV1.Voffset -= sz; 2905 cdb.gen(&cs); // FSTP e.EV.E1 2906 genfwait(cdb); 2907 freenode(e.EV.E1); 2908 fixresult_complex87(cdb, e, mST01, pretregs); 2909 return; 2910 } 2911 2912 if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) 2913 { // Want the result in a register 2914 push87(cdb); 2915 cdb.genf2(0xD9,0xC0); // FLD ST0 2916 } 2917 if (*pretregs & mPSW) // if result in flags 2918 genftst(cdb,e,0); // FTST ST0 2919 regm_t retregs = mST0; 2920 codelem(cdb,e.EV.E2,&retregs,false); // load rvalue 2921 pop87(); 2922 op = (e.Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1); 2923 cdb.genf2(0xDE,op); // FADDP/FSUBRP ST1 2924 NEWREG(cs.Irm,reg); 2925 pop87(); 2926 cdb.gen(&cs); // FSTP e.EV.E1 2927 genfwait(cdb); 2928 freenode(e.EV.E1); 2929 fixresult87(cdb,e,mPSW | mST0,pretregs); 2930 } 2931 2932 /************************ 2933 * Do the following opcodes: 2934 * OPd_u64 2935 * OPld_u64 2936 */ 2937 void cdd_u64(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2938 { 2939 assert(I32 || I64); 2940 assert(*pretregs); 2941 if (I32) 2942 cdd_u64_I32(cdb, e, pretregs); 2943 else 2944 cdd_u64_I64(cdb, e, pretregs); 2945 } 2946 2947 @trusted 2948 private void cdd_u64_I32(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2949 { 2950 /* Generate: 2951 mov EDX,0x8000_0000 2952 mov floatreg+0,0 2953 mov floatreg+4,EDX 2954 mov floatreg+8,0x0FBF403e // (roundTo0<<16) | adjust 2955 fld real ptr floatreg // adjust (= 1/real.epsilon) 2956 fcomp 2957 fstsw AX 2958 fstcw floatreg+12 2959 fldcw floatreg+10 // roundTo0 2960 test AH,1 2961 jz L1 // jae L1 2962 2963 fld real ptr floatreg // adjust 2964 fsubp ST(1), ST 2965 fistp floatreg 2966 mov EAX,floatreg 2967 add EDX,floatreg+4 2968 fldcw floatreg+12 2969 jmp L2 2970 2971 L1: 2972 fistp floatreg 2973 mov EAX,floatreg 2974 mov EDX,floatreg+4 2975 fldcw floatreg+12 2976 L2: 2977 */ 2978 regm_t retregs = mST0; 2979 codelem(cdb,e.EV.E1, &retregs, false); 2980 tym_t tym = e.Ety; 2981 retregs = *pretregs; 2982 if (!retregs) 2983 retregs = ALLREGS; 2984 reg_t reg, reg2; 2985 allocreg(cdb,&retregs,®,tym); 2986 reg = findreglsw(retregs); 2987 reg2 = findregmsw(retregs); 2988 movregconst(cdb,reg2,0x80000000,0); 2989 getregs(cdb,mask(reg2) | mAX); 2990 2991 cdb.genfltreg(0xC7,0,0); 2992 code *cf1 = cdb.last(); 2993 cf1.IFL2 = FLconst; 2994 cf1.IEV2.Vint = 0; // MOV floatreg+0,0 2995 cdb.genfltreg(STO,reg2,4); // MOV floatreg+4,EDX 2996 cdb.genfltreg(0xC7,0,8); 2997 code *cf3 = cdb.last(); 2998 cf3.IFL2 = FLconst; 2999 cf3.IEV2.Vint = 0xFBF403E; // MOV floatreg+8,(roundTo0<<16)|adjust 3000 3001 push87(cdb); 3002 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 3003 cdb.gen2(0xD8,0xD9); // FCOMP 3004 pop87(); 3005 cdb.gen2(0xDF,0xE0); // FSTSW AX 3006 cdb.genfltreg(0xD9,7,12); // FSTCW floatreg+12 3007 cdb.genfltreg(0xD9,5,10); // FLDCW floatreg+10 3008 cdb.genc2(0xF6,modregrm(3,0,4),1); // TEST AH,1 3009 code *cnop1 = gennop(null); 3010 genjmp(cdb,JE,FLcode,cast(block *)cnop1); // JZ L1 3011 3012 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 3013 cdb.genf2(0xDE,0xE8+1); // FSUBP ST(1),ST 3014 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3015 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3016 cdb.genfltreg(0x03,reg2,4); // ADD reg,floatreg+4 3017 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3018 code *cnop2 = gennop(null); 3019 genjmp(cdb,JMP,FLcode,cast(block *)cnop2); // JMP L2 3020 3021 cdb.append(cnop1); 3022 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3023 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3024 cdb.genfltreg(LOD,reg2,4); // MOV reg,floatreg+4 3025 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3026 cdb.append(cnop2); 3027 3028 pop87(); 3029 fixresult(cdb,e,retregs,pretregs); 3030 } 3031 3032 @trusted 3033 private void cdd_u64_I64(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3034 { 3035 /* Generate: 3036 mov EDX,0x8000_0000 3037 mov floatreg+0,0 3038 mov floatreg+4,EDX 3039 mov floatreg+8,0x0FBF403e // (roundTo0<<16) | adjust 3040 fld real ptr floatreg // adjust 3041 fcomp 3042 fstsw AX 3043 fstcw floatreg+12 3044 fldcw floatreg+10 // roundTo0 3045 test AH,1 3046 jz L1 // jae L1 3047 3048 fld real ptr floatreg // adjust 3049 fsubp ST(1), ST 3050 fistp floatreg 3051 mov RAX,floatreg 3052 shl RDX,32 3053 add RAX,RDX 3054 fldcw floatreg+12 3055 jmp L2 3056 3057 L1: 3058 fistp floatreg 3059 mov RAX,floatreg 3060 fldcw floatreg+12 3061 L2: 3062 */ 3063 regm_t retregs = mST0; 3064 codelem(cdb,e.EV.E1, &retregs, false); 3065 tym_t tym = e.Ety; 3066 retregs = *pretregs; 3067 if (!retregs) 3068 retregs = ALLREGS; 3069 reg_t reg; 3070 allocreg(cdb,&retregs,®,tym); 3071 regm_t regm2 = ALLREGS & ~retregs & ~mAX; 3072 reg_t reg2; 3073 allocreg(cdb,®m2,®2,tym); 3074 movregconst(cdb,reg2,0x80000000,0); 3075 getregs(cdb,mask(reg2) | mAX); 3076 3077 cdb.genfltreg(0xC7,0,0); 3078 code *cf1 = cdb.last(); 3079 cf1.IFL2 = FLconst; 3080 cf1.IEV2.Vint = 0; // MOV floatreg+0,0 3081 cdb.genfltreg(STO,reg2,4); // MOV floatreg+4,EDX 3082 cdb.genfltreg(0xC7,0,8); 3083 code *cf3 = cdb.last(); 3084 cf3.IFL2 = FLconst; 3085 cf3.IEV2.Vint = 0xFBF403E; // MOV floatreg+8,(roundTo0<<16)|adjust 3086 3087 push87(cdb); 3088 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 3089 cdb.gen2(0xD8,0xD9); // FCOMP 3090 pop87(); 3091 cdb.gen2(0xDF,0xE0); // FSTSW AX 3092 cdb.genfltreg(0xD9,7,12); // FSTCW floatreg+12 3093 cdb.genfltreg(0xD9,5,10); // FLDCW floatreg+10 3094 cdb.genc2(0xF6,modregrm(3,0,4),1); // TEST AH,1 3095 code *cnop1 = gennop(null); 3096 genjmp(cdb,JE,FLcode,cast(block *)cnop1); // JZ L1 3097 3098 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 3099 cdb.genf2(0xDE,0xE8+1); // FSUBP ST(1),ST 3100 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3101 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3102 code_orrex(cdb.last(), REX_W); 3103 cdb.genc2(0xC1,(REX_W << 16) | modregrmx(3,4,reg2),32); // SHL reg2,32 3104 cdb.gen2(0x03,(REX_W << 16) | modregxrmx(3,reg,reg2)); // ADD reg,reg2 3105 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3106 code *cnop2 = gennop(null); 3107 genjmp(cdb,JMP,FLcode,cast(block *)cnop2); // JMP L2 3108 3109 cdb.append(cnop1); 3110 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3111 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3112 code_orrex(cdb.last(), REX_W); 3113 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3114 cdb.append(cnop2); 3115 3116 pop87(); 3117 fixresult(cdb,e,retregs,pretregs); 3118 } 3119 3120 /************************ 3121 * Do the following opcodes: 3122 * OPd_u32 3123 */ 3124 @trusted 3125 void cdd_u32(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3126 { 3127 assert(I32 || I64); 3128 3129 /* Generate: 3130 mov floatreg+8,0x0FBF0000 // (roundTo0<<16) 3131 fstcw floatreg+12 3132 fldcw floatreg+10 // roundTo0 3133 fistp floatreg 3134 fldcw floatreg+12 3135 mov EAX,floatreg 3136 */ 3137 regm_t retregs = mST0; 3138 codelem(cdb,e.EV.E1, &retregs, false); 3139 tym_t tym = e.Ety; 3140 retregs = *pretregs & ALLREGS; 3141 if (!retregs) 3142 retregs = ALLREGS; 3143 reg_t reg; 3144 allocreg(cdb,&retregs,®,tym); 3145 3146 cdb.genfltreg(0xC7,0,8); 3147 code *cf3 = cdb.last(); 3148 cf3.IFL2 = FLconst; 3149 cf3.IEV2.Vint = 0x0FBF0000; // MOV floatreg+8,(roundTo0<<16) 3150 3151 cdb.genfltreg(0xD9,7,12); // FSTCW floatreg+12 3152 cdb.genfltreg(0xD9,5,10); // FLDCW floatreg+10 3153 3154 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3155 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3156 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3157 3158 pop87(); 3159 fixresult(cdb,e,retregs,pretregs); 3160 } 3161 3162 /************************ 3163 * Do the following opcodes: 3164 * OPd_s16 3165 * OPd_s32 3166 * OPd_u16 3167 * OPd_s64 3168 */ 3169 3170 @trusted 3171 void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3172 { 3173 regm_t retregs; 3174 uint mf,rf; 3175 reg_t reg; 3176 int clib; 3177 3178 //printf("cnvt87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3179 assert(*pretregs); 3180 tym_t tym = e.Ety; 3181 int sz = tysize(tym); 3182 int szoff = sz; 3183 3184 switch (e.Eoper) 3185 { 3186 case OPd_s16: 3187 clib = CLIB.dblint87; 3188 mf = ESC(MFword,1); 3189 rf = 3; 3190 break; 3191 3192 case OPd_u16: 3193 szoff = 4; 3194 goto case OPd_s32; 3195 3196 case OPd_s32: 3197 clib = CLIB.dbllng87; 3198 mf = ESC(MFlong,1); 3199 rf = 3; 3200 break; 3201 3202 case OPd_s64: 3203 clib = CLIB.dblllng; 3204 mf = 0xDF; 3205 rf = 7; 3206 break; 3207 3208 default: 3209 assert(0); 3210 } 3211 3212 if (I16) // C may change the default control word 3213 { 3214 if (clib == CLIB.dblllng) 3215 { retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 3216 codelem(cdb,e.EV.E1,&retregs,false); 3217 callclib(cdb,e,clib,pretregs,0); 3218 } 3219 else 3220 { retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 3221 codelem(cdb,e.EV.E1,&retregs,false); 3222 callclib(cdb,e,clib,pretregs,0); 3223 pop87(); 3224 } 3225 } 3226 else if (1) 3227 { // Generate: 3228 // sub ESP,12 3229 // fstcw 8[ESP] 3230 // fldcw roundto0 3231 // fistp long64 ptr [ESP] 3232 // fldcw 8[ESP] 3233 // pop lsw 3234 // pop msw 3235 // add ESP,4 3236 3237 uint szpush = szoff + 2; 3238 if (config.flags3 & CFG3pic) 3239 szpush += 2; 3240 szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1); 3241 3242 retregs = mST0; 3243 codelem(cdb,e.EV.E1,&retregs,false); 3244 3245 if (szpush == REGSIZE) 3246 cdb.gen1(0x50 + AX); // PUSH EAX 3247 else 3248 cod3_stackadj(cdb, szpush); 3249 genfwait(cdb); 3250 cdb.genc1(0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP] 3251 3252 genfwait(cdb); 3253 3254 if (config.flags3 & CFG3pic) 3255 { 3256 cdb.genc(0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0 3257 code_orflag(cdb.last(), CFopsize); 3258 cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP] 3259 } 3260 else 3261 genrnd(cdb, CW_roundto0); // FLDCW roundto0 3262 3263 pop87(); 3264 3265 genfwait(cdb); 3266 cdb.gen2sib(mf,modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP] 3267 3268 retregs = *pretregs & (ALLREGS | mBP); 3269 if (!retregs) 3270 retregs = ALLREGS; 3271 allocreg(cdb,&retregs,®,tym); 3272 3273 genfwait(cdb); // FWAIT 3274 cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP] 3275 3276 if (szoff > REGSIZE) 3277 { szpush -= REGSIZE; 3278 genpop(cdb,findreglsw(retregs)); // POP lsw 3279 } 3280 szpush -= REGSIZE; 3281 genpop(cdb,reg); // POP reg 3282 3283 if (szpush) 3284 cod3_stackadj(cdb, -szpush); 3285 fixresult(cdb,e,retregs,pretregs); 3286 } 3287 else 3288 { 3289 // This is incorrect. For -inf and nan, the 8087 returns the largest 3290 // negative int (0x80000....). For -inf, 0x7FFFF... should be returned, 3291 // and for nan, 0 should be returned. 3292 retregs = mST0; 3293 codelem(cdb,e.EV.E1,&retregs,false); 3294 3295 genfwait(cdb); 3296 genrnd(cdb, CW_roundto0); // FLDCW roundto0 3297 3298 pop87(); 3299 cdb.genfltreg(mf,rf,0); // FISTP floatreg 3300 retregs = *pretregs & (ALLREGS | mBP); 3301 if (!retregs) 3302 retregs = ALLREGS; 3303 allocreg(cdb,&retregs,®,tym); 3304 3305 genfwait(cdb); 3306 3307 if (sz > REGSIZE) 3308 { 3309 cdb.genfltreg(LOD,reg,REGSIZE); // MOV reg,floatreg + REGSIZE 3310 // MOV lsreg,floatreg 3311 cdb.genfltreg(LOD,findreglsw(retregs),0); 3312 } 3313 else 3314 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3315 genrnd(cdb, CW_roundtonearest); // FLDCW roundtonearest 3316 fixresult(cdb,e,retregs,pretregs); 3317 } 3318 } 3319 3320 /************************ 3321 * Do OPrndtol. 3322 */ 3323 3324 @trusted 3325 void cdrndtol(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3326 { 3327 if (*pretregs == 0) 3328 { 3329 codelem(cdb,e.EV.E1,pretregs,false); 3330 return; 3331 } 3332 regm_t retregs = mST0; 3333 codelem(cdb,e.EV.E1,&retregs,false); 3334 3335 ubyte op1,op2; 3336 tym_t tym = e.Ety; 3337 uint sz = tysize(tym); 3338 switch (sz) 3339 { case 2: 3340 op1 = 0xDF; 3341 op2 = 3; 3342 break; 3343 case 4: 3344 op1 = 0xDB; 3345 op2 = 3; 3346 break; 3347 case 8: 3348 op1 = 0xDF; 3349 op2 = 7; 3350 break; 3351 default: 3352 assert(0); 3353 } 3354 3355 pop87(); 3356 cdb.genfltreg(op1,op2,0); // FISTP floatreg 3357 retregs = *pretregs & (ALLREGS | mBP); 3358 if (!retregs) 3359 retregs = ALLREGS; 3360 reg_t reg; 3361 allocreg(cdb,&retregs,®,tym); 3362 genfwait(cdb); // FWAIT 3363 if (tysize(tym) > REGSIZE) 3364 { 3365 cdb.genfltreg(LOD,reg,REGSIZE); // MOV reg,floatreg + REGSIZE 3366 // MOV lsreg,floatreg 3367 cdb.genfltreg(LOD,findreglsw(retregs),0); 3368 } 3369 else 3370 { 3371 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3372 if (tysize(tym) == 8 && I64) 3373 code_orrex(cdb.last(), REX_W); 3374 } 3375 fixresult(cdb,e,retregs,pretregs); 3376 } 3377 3378 /************************* 3379 * Do OPscale, OPyl2x, OPyl2xp1. 3380 */ 3381 3382 @trusted 3383 void cdscale(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3384 { 3385 assert(*pretregs != 0); 3386 3387 regm_t retregs = mST0; 3388 codelem(cdb,e.EV.E1,&retregs,false); 3389 note87(e.EV.E1,0,0); 3390 codelem(cdb,e.EV.E2,&retregs,false); 3391 makesure87(cdb,e.EV.E1,0,1,0); // now have x,y on stack; need y,x 3392 switch (e.Eoper) 3393 { 3394 case OPscale: 3395 cdb.genf2(0xD9,0xFD); // FSCALE 3396 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 3397 break; 3398 3399 case OPyl2x: 3400 cdb.genf2(0xD9,0xF1); // FYL2X 3401 break; 3402 3403 case OPyl2xp1: 3404 cdb.genf2(0xD9,0xF9); // FYL2XP1 3405 break; 3406 3407 default: 3408 assert(0); 3409 } 3410 pop87(); 3411 fixresult87(cdb,e,mST0,pretregs); 3412 } 3413 3414 3415 /********************************** 3416 * Unary -, absolute value, square root, sine, cosine 3417 */ 3418 3419 @trusted 3420 void neg87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3421 { 3422 //printf("neg87()\n"); 3423 3424 assert(*pretregs); 3425 opcode_t op; 3426 switch (e.Eoper) 3427 { case OPneg: op = 0xE0; break; 3428 case OPabs: op = 0xE1; break; 3429 case OPsqrt: op = 0xFA; break; 3430 case OPsin: op = 0xFE; break; 3431 case OPcos: op = 0xFF; break; 3432 case OPrint: op = 0xFC; break; // FRNDINT 3433 default: 3434 assert(0); 3435 } 3436 regm_t retregs = mST0; 3437 codelem(cdb,e.EV.E1,&retregs,false); 3438 cdb.genf2(0xD9,op); // FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT 3439 fixresult87(cdb,e,mST0,pretregs); 3440 } 3441 3442 /********************************** 3443 * Unary - for complex operands 3444 */ 3445 3446 @trusted 3447 void neg_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3448 { 3449 assert(e.Eoper == OPneg); 3450 regm_t retregs = mST01; 3451 codelem(cdb,e.EV.E1,&retregs,false); 3452 cdb.genf2(0xD9,0xE0); // FCHS 3453 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 3454 cdb.genf2(0xD9,0xE0); // FCHS 3455 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 3456 fixresult_complex87(cdb,e,mST01,pretregs); 3457 } 3458 3459 /********************************* 3460 */ 3461 3462 @trusted 3463 void cdind87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3464 { 3465 //printf("cdind87(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 3466 code cs; 3467 3468 getlvalue87(cdb,&cs,e,0); // get addressing mode 3469 if (*pretregs) 3470 { 3471 switch (tybasic(e.Ety)) 3472 { case TYfloat: 3473 case TYifloat: 3474 cs.Iop = 0xD9; 3475 break; 3476 3477 case TYidouble: 3478 case TYdouble: 3479 case TYdouble_alias: 3480 cs.Iop = 0xDD; 3481 break; 3482 3483 case TYildouble: 3484 case TYldouble: 3485 cs.Iop = 0xDB; 3486 cs.Irm |= modregrm(0,5,0); 3487 break; 3488 3489 default: 3490 assert(0); 3491 } 3492 push87(cdb); 3493 cdb.gen(&cs); // FLD EA 3494 fixresult87(cdb,e,mST0,pretregs); 3495 } 3496 } 3497 3498 /************************************ 3499 * Reset statics for another .obj file. 3500 */ 3501 3502 @trusted 3503 void cg87_reset() 3504 { 3505 memset(&oldd,0,oldd.sizeof); 3506 } 3507 3508 3509 /***************************************** 3510 * Initialize control word constants. 3511 */ 3512 3513 @trusted 3514 private void genrnd(ref CodeBuilder cdb, short cw) 3515 { 3516 if (config.flags3 & CFG3pic) 3517 { 3518 cdb.genfltreg(0xC7, 0, 0); // MOV floatreg, cw 3519 code *c1 = cdb.last(); 3520 c1.IFL2 = FLconst; 3521 c1.IEV2.Vuns = cw; 3522 3523 cdb.genfltreg(0xD9, 5, 0); // FLDCW floatreg 3524 } 3525 else 3526 { 3527 if (!oldd.round) // if not initialized 3528 { 3529 short cwi; 3530 3531 oldd.round = 1; 3532 3533 cwi = CW_roundto0; // round to 0 3534 oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2); 3535 cwi = CW_roundtonearest; // round to nearest 3536 oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2); 3537 } 3538 Symbol *rnddir = (cw == CW_roundto0) ? oldd.roundto0 : oldd.roundtonearest; 3539 code cs; 3540 cs.Iop = 0xD9; 3541 cs.Iflags = CFoff; 3542 cs.Irex = 0; 3543 cs.IEV1.Vsym = rnddir; 3544 cs.IFL1 = rnddir.Sfl; 3545 cs.IEV1.Voffset = 0; 3546 cs.Irm = modregrm(0,5,BPRM); 3547 cdb.gen(&cs); 3548 } 3549 } 3550 3551 /************************* Complex Numbers *********************/ 3552 3553 /*************************** 3554 * Set the PSW based on the state of ST01. 3555 * Input: 3556 * pop if stack should be popped after test 3557 */ 3558 3559 @trusted 3560 private void genctst(ref CodeBuilder cdb,elem *e,int pop) 3561 { 3562 assert(pop == 0 || pop == 1); 3563 3564 // Generate: 3565 // if (NOSAHF && pop) 3566 // FLDZ 3567 // FUCOMIP 3568 // JNE L1 3569 // JP L1 // if NAN 3570 // FLDZ 3571 // FUCOMIP ST(2) 3572 // L1: 3573 // if (pop) 3574 // FPOP 3575 // FPOP 3576 // if (pop) 3577 // FLDZ 3578 // FUCOMPP 3579 // FSTSW AX 3580 // SAHF 3581 // FLDZ 3582 // FUCOMPP 3583 // JNE L1 3584 // JP L1 // if NAN 3585 // FSTSW AX 3586 // SAHF 3587 // L1: 3588 // else 3589 // FLDZ 3590 // FUCOM 3591 // FSTSW AX 3592 // SAHF 3593 // FUCOMP ST(2) 3594 // JNE L1 3595 // JP L1 // if NAN 3596 // FSTSW AX 3597 // SAHF 3598 // L1: 3599 // FUCOMP doesn't raise exceptions on QNANs, unlike FTST 3600 3601 CodeBuilder cdbnop; 3602 cdbnop.ctor(); 3603 cdbnop.gennop(); 3604 code *cnop = cdbnop.peek(); 3605 push87(cdb); 3606 cdb.gen2(0xD9,0xEE); // FLDZ 3607 if (NOSAHF) 3608 { 3609 cdb.gen2(0xDF,0xE9); // FUCOMIP 3610 pop87(); 3611 genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE L1 3612 genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP L1 3613 cdb.gen2(0xD9,0xEE); // FLDZ 3614 cdb.gen2(0xDF,0xEA); // FUCOMIP ST(2) 3615 if (pop) 3616 { 3617 cdbnop.genf2(0xDD,modregrm(3,3,0)); // FPOP 3618 cdbnop.genf2(0xDD,modregrm(3,3,0)); // FPOP 3619 pop87(); 3620 pop87(); 3621 } 3622 } 3623 else if (pop) 3624 { 3625 cdb.gen2(0xDA,0xE9); // FUCOMPP 3626 pop87(); 3627 pop87(); 3628 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3629 cdb.gen2(0xD9,0xEE); // FLDZ 3630 cdb.gen2(0xDA,0xE9); // FUCOMPP 3631 pop87(); 3632 genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE L1 3633 genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP L1 3634 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3635 } 3636 else 3637 { 3638 cdb.gen2(0xDD,0xE1); // FUCOM 3639 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3640 cdb.gen2(0xDD,0xEA); // FUCOMP ST(2) 3641 pop87(); 3642 genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE L1 3643 genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP L1 3644 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3645 } 3646 cdb.append(cdbnop); 3647 } 3648 3649 /****************************** 3650 * Given the result of an expression is in retregs, 3651 * generate necessary code to return result in *pretregs. 3652 */ 3653 3654 @trusted 3655 void fixresult_complex87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs, bool isReturnValue = false) 3656 { 3657 static if (0) 3658 { 3659 printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n", 3660 e,regm_str(retregs),regm_str(*pretregs)); 3661 } 3662 3663 assert(!*pretregs || retregs); 3664 tym_t tym = tybasic(e.Ety); 3665 uint sz = _tysize[tym]; 3666 3667 if (isReturnValue) 3668 { 3669 // In loadComplex and complex_eq87, complex numbers have the real part 3670 // pushed to the FPU stack first (ST1), then the imaginary part (ST0). 3671 // However, the Intel 64 bit ABI scheme requires that types classified 3672 // as complex x87 instead have the real part returned in ST0, and the 3673 // imaginary part in ST1. 3674 if (retregs == mST01 && I64 && (config.exe & EX_posix)) 3675 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 3676 } 3677 3678 if (*pretregs == 0 && retregs == mST01) 3679 { 3680 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 3681 pop87(); 3682 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 3683 pop87(); 3684 } 3685 else if (tym == TYllong) 3686 { 3687 // passing cfloat through register for I64 3688 assert(retregs & mST01, "this float expression is not implemented"); 3689 pop87(); 3690 cdb.genfltreg(ESC(MFfloat,1),BX,4); // FSTP floatreg 3691 pop87(); 3692 cdb.genfltreg(ESC(MFfloat,1),BX,0); // FSTP floatreg+4 3693 genfwait(cdb); 3694 const reg = findreg(*pretregs); 3695 getregs(cdb,reg); 3696 cdb.genfltreg(LOD, reg, 0); // MOV ECX,floatreg 3697 code_orrex(cdb.last(), REX_W); // extend to RCX 3698 } 3699 else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01) 3700 { 3701 if (*pretregs & mPSW && !(retregs & mPSW)) 3702 genctst(cdb,e,0); // FTST 3703 pop87(); 3704 cdb.genfltreg(ESC(MFfloat,1),3,0); // FSTP floatreg 3705 genfwait(cdb); 3706 getregs(cdb,mDX|mAX); 3707 cdb.genfltreg(LOD, DX, 0); // MOV EDX,floatreg 3708 3709 pop87(); 3710 cdb.genfltreg(ESC(MFfloat,1),3,0); // FSTP floatreg 3711 genfwait(cdb); 3712 cdb.genfltreg(LOD, AX, 0); // MOV EAX,floatreg 3713 } 3714 else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01) 3715 { 3716 push87(cdb); 3717 cdb.genfltreg(STO, AX, 0); // MOV floatreg, EAX 3718 cdb.genfltreg(0xD9, 0, 0); // FLD float ptr floatreg 3719 3720 push87(cdb); 3721 cdb.genfltreg(STO, DX, 0); // MOV floatreg, EDX 3722 cdb.genfltreg(0xD9, 0, 0); // FLD float ptr floatreg 3723 3724 if (*pretregs & mPSW) 3725 genctst(cdb,e,0); // FTST 3726 } 3727 else if ((tym == TYcfloat || tym == TYcdouble) && 3728 *pretregs & (mXMM0|mXMM1) && retregs & mST01) 3729 { 3730 tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble; 3731 uint xop = xmmload(tyf); 3732 uint mf = tyf == TYfloat ? MFfloat : MFdouble; 3733 if (*pretregs & mPSW && !(retregs & mPSW)) 3734 genctst(cdb,e,0); // FTST 3735 pop87(); 3736 cdb.genfltreg(ESC(mf,1),3,0); // FSTP floatreg 3737 genfwait(cdb); 3738 getregs(cdb,mXMM0|mXMM1); 3739 cdb.genxmmreg(xop,XMM1,0,tyf); 3740 3741 pop87(); 3742 cdb.genfltreg(ESC(mf,1),3,0); // FSTP floatreg 3743 genfwait(cdb); 3744 cdb.genxmmreg(xop, XMM0, 0, tyf); // MOVD XMM0,floatreg 3745 } 3746 else if ((tym == TYcfloat || tym == TYcdouble) && 3747 retregs & (mXMM0|mXMM1) && *pretregs & mST01) 3748 { 3749 tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble; 3750 uint xop = xmmstore(tyf); 3751 uint fop = tym == TYcfloat ? 0xD9 : 0xDD; 3752 push87(cdb); 3753 cdb.genfltreg(xop, XMM0-XMM0, 0); // STOS(SD) floatreg, XMM0 3754 checkSetVex(cdb.last(),tyf); 3755 cdb.genfltreg(fop, 0, 0); // FLD double ptr floatreg 3756 3757 push87(cdb); 3758 cdb.genxmmreg(xop, XMM1, 0, tyf); // MOV floatreg, XMM1 3759 cdb.genfltreg(fop, 0, 0); // FLD double ptr floatreg 3760 3761 if (*pretregs & mPSW) 3762 genctst(cdb,e,0); // FTST 3763 } 3764 else 3765 { if (*pretregs & mPSW) 3766 { if (!(retregs & mPSW)) 3767 { assert(retregs & mST01); 3768 genctst(cdb,e,!(*pretregs & mST01)); // FTST 3769 } 3770 } 3771 assert(!(*pretregs & mST01) || (retregs & mST01)); 3772 } 3773 if (*pretregs & mST01) 3774 { note87(e,0,1); 3775 note87(e,sz/2,0); 3776 } 3777 } 3778 3779 /***************************************** 3780 * Operators OPc_r and OPc_i 3781 */ 3782 3783 @trusted 3784 void cdconvt87(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3785 { 3786 regm_t retregs = mST01; 3787 codelem(cdb,e.EV.E1, &retregs, false); 3788 switch (e.Eoper) 3789 { 3790 case OPc_r: 3791 cdb.genf2(0xDD,0xD8 + 0); // FPOP 3792 pop87(); 3793 break; 3794 3795 case OPc_i: 3796 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 3797 pop87(); 3798 break; 3799 3800 default: 3801 assert(0); 3802 } 3803 retregs = mST0; 3804 fixresult87(cdb, e, retregs, pretregs); 3805 } 3806 3807 /************************************** 3808 * Load complex operand into ST01 or flags or both. 3809 */ 3810 3811 @trusted 3812 void cload87(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3813 { 3814 //printf("e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3815 //elem_print(e); 3816 assert(!I16); 3817 debug 3818 if (I32) 3819 { 3820 assert(config.inline8087); 3821 elem_debug(e); 3822 assert(*pretregs & (mST01 | mPSW)); 3823 assert(!(*pretregs & ~(mST01 | mPSW))); 3824 } 3825 3826 tym_t ty = tybasic(e.Ety); 3827 code cs = void; 3828 uint mf; 3829 uint sz; 3830 ubyte ldop; 3831 regm_t retregs; 3832 int i; 3833 3834 //printf("cload87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3835 sz = _tysize[ty] / 2; 3836 memset(&cs, 0, cs.sizeof); 3837 if (ADDFWAIT()) 3838 cs.Iflags = CFwait; 3839 switch (ty) 3840 { 3841 case TYcfloat: mf = MFfloat; break; 3842 case TYcdouble: mf = MFdouble; break; 3843 case TYcldouble: break; 3844 default: assert(0); 3845 } 3846 switch (e.Eoper) 3847 { 3848 case OPvar: 3849 notreg(e); // never enregister this variable 3850 goto case OPind; 3851 3852 case OPind: 3853 push87(cdb); 3854 push87(cdb); 3855 switch (ty) 3856 { 3857 case TYcfloat: 3858 case TYcdouble: 3859 loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0); // FLD var 3860 cs.IEV1.Voffset += sz; 3861 cdb.gen(&cs); 3862 break; 3863 3864 case TYcldouble: 3865 loadea(cdb,e,&cs,0xDB,5,0,0,0); // FLD var 3866 cs.IEV1.Voffset += sz; 3867 cdb.gen(&cs); 3868 break; 3869 3870 default: 3871 assert(0); 3872 } 3873 retregs = mST01; 3874 break; 3875 3876 case OPd_ld: 3877 case OPld_d: 3878 case OPf_d: 3879 case OPd_f: 3880 cload87(cdb,e.EV.E1, pretregs); 3881 freenode(e.EV.E1); 3882 return; 3883 3884 case OPconst: 3885 push87(cdb); 3886 push87(cdb); 3887 for (i = 0; i < 2; i++) 3888 { 3889 ldop = loadconst(e, i); 3890 if (ldop) 3891 { 3892 cdb.genf2(0xD9,ldop); // FLDx 3893 } 3894 else 3895 { 3896 assert(0); 3897 } 3898 } 3899 retregs = mST01; 3900 break; 3901 3902 default: 3903 debug elem_print(e); 3904 assert(0); 3905 } 3906 fixresult_complex87(cdb, e, retregs, pretregs); 3907 } 3908 3909 /********************************************** 3910 * Load OPpair or OPrpair into mST01 3911 */ 3912 @trusted 3913 void loadPair87(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3914 { 3915 assert(e.Eoper == OPpair || e.Eoper == OPrpair); 3916 regm_t retregs = mST0; 3917 codelem(cdb,e.EV.E1, &retregs, false); 3918 note87(e.EV.E1, 0, 0); 3919 codelem(cdb,e.EV.E2, &retregs, false); 3920 makesure87(cdb,e.EV.E1, 0, 1, 0); 3921 if (e.Eoper == OPrpair) 3922 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 3923 retregs = mST01; 3924 fixresult_complex87(cdb, e, retregs, pretregs); 3925 } 3926 3927 /********************************************** 3928 * Round 80 bit precision to 32 or 64 bits. 3929 * OPtoprec 3930 */ 3931 @trusted 3932 void cdtoprec(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3933 { 3934 //printf("cdtoprec: *pretregs = %s\n", regm_str(*pretregs)); 3935 if (!*pretregs) 3936 { 3937 codelem(cdb,e.EV.E1,pretregs,false); 3938 return; 3939 } 3940 3941 assert(config.inline8087); 3942 regm_t retregs = mST0; 3943 codelem(cdb,e.EV.E1, &retregs, false); 3944 if (*pretregs & mST0) 3945 { 3946 const tym = tybasic(e.Ety); 3947 const sz = _tysize[tym]; 3948 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 3949 cdb.genfltreg(ESC(mf,1),3,0); // FSTP float/double ptr fltreg 3950 genfwait(cdb); 3951 cdb.genfltreg(ESC(mf,1),0,0); // FLD float/double ptr fltreg 3952 } 3953 fixresult87(cdb, e, retregs, pretregs); 3954 } 3955 3956 }