1 /** 2 * x87 FPU code generation 3 * 4 * Compiler implementation of the 5 * $(LINK2 https://www.dlang.org, D programming language). 6 * 7 * Copyright: Copyright (C) 1987-1995 by Symantec 8 * Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved 9 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 10 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 11 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cg87.d, backend/cg87.d) 12 */ 13 14 module dmd.backend.cg87; 15 16 import core.stdc.stdio; 17 import core.stdc.stdlib; 18 import core.stdc.string; 19 20 import dmd.backend.barray; 21 import dmd.backend.cc; 22 import dmd.backend.cdef; 23 import dmd.backend.code; 24 import dmd.backend.code_x86; 25 import dmd.backend.codebuilder; 26 import dmd.backend.mem; 27 import dmd.backend.el; 28 import dmd.backend.global; 29 import dmd.backend.oper; 30 import dmd.backend.ty; 31 import dmd.backend.evalu8 : el_toldoubled; 32 33 34 nothrow: 35 @safe: 36 37 // NOTE: this could be a TLS global which would allow this variable to be used in 38 // a multi-threaded version of the backend 39 __gshared Globals87 global87; 40 41 // Constants that the 8087 supports directly 42 // BUG: rewrite for 80 bit long doubles 43 enum PI = 3.14159265358979323846; 44 enum LOG2 = 0.30102999566398119521; 45 enum LN2 = 0.6931471805599453094172321; 46 enum LOG2T = 3.32192809488736234787; 47 enum LOG2E = 1.4426950408889634074; // 1/LN2 48 49 enum FWAIT = 0x9B; // FWAIT opcode 50 51 /* Mark variable referenced by e as not a register candidate */ 52 @trusted 53 uint notreg(elem* e) { return e.EV.Vsym.Sflags &= ~GTregcand; } 54 55 /* Generate the appropriate ESC instruction */ 56 ubyte ESC(uint MF, uint b) { return cast(ubyte)(0xD8 + (MF << 1) + b); } 57 enum 58 { // Values for MF 59 MFfloat = 0, 60 MFlong = 1, 61 MFdouble = 2, 62 MFword = 3 63 } 64 65 /********************************* 66 */ 67 68 struct Dconst 69 { 70 int round; 71 Symbol *roundto0; 72 Symbol *roundtonearest; 73 } 74 75 private __gshared Dconst oldd; 76 77 enum NDPP = 0; // print out debugging info 78 79 @trusted 80 bool NOSAHF() { return I64 || config.fpxmmregs; } // can't use SAHF instruction 81 82 /** 87 Control Word rounding modes */ 83 enum CW : ushort 84 { 85 roundto0 = 0xFBF, 86 roundtonearest = 0x3BF, 87 } 88 89 /********************************** 90 * When we need to temporarilly save 8087 registers, we record information 91 * about the save into an array of NDP structs. 92 */ 93 94 @trusted 95 private void getlvalue87(ref CodeBuilder cdb, ref code pcs,elem *e,regm_t keepmsk) 96 { 97 // the x87 instructions cannot read XMM registers 98 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 99 e.EV.Vsym.Sflags &= ~GTregcand; 100 101 getlvalue(cdb, &pcs, e, keepmsk); 102 if (ADDFWAIT()) 103 pcs.Iflags |= CFwait; 104 if (I32) 105 pcs.Iflags &= ~CFopsize; 106 else if (I64) 107 pcs.Irex &= ~REX_W; 108 } 109 110 /**************************************** 111 * Store/load to ndp save location i 112 */ 113 114 @trusted 115 private void ndp_fstp(ref CodeBuilder cdb, size_t i, tym_t ty) 116 { 117 switch (tybasic(ty)) 118 { 119 case TYfloat: 120 case TYifloat: 121 case TYcfloat: 122 cdb.genc1(0xD9,modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP] 123 break; 124 125 case TYdouble: 126 case TYdouble_alias: 127 case TYidouble: 128 case TYcdouble: 129 cdb.genc1(0xDD,modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP] 130 break; 131 132 case TYldouble: 133 case TYildouble: 134 case TYcldouble: 135 cdb.genc1(0xDB,modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP] 136 break; 137 138 default: 139 assert(0); 140 } 141 } 142 143 @trusted 144 private void ndp_fld(ref CodeBuilder cdb, size_t i, tym_t ty) 145 { 146 switch (tybasic(ty)) 147 { 148 case TYfloat: 149 case TYifloat: 150 case TYcfloat: 151 cdb.genc1(0xD9,modregrm(2,0,BPRM),FLndp,i); 152 break; 153 154 case TYdouble: 155 case TYdouble_alias: 156 case TYidouble: 157 case TYcdouble: 158 cdb.genc1(0xDD,modregrm(2,0,BPRM),FLndp,i); 159 break; 160 161 case TYldouble: 162 case TYildouble: 163 case TYcldouble: 164 cdb.genc1(0xDB,modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP] 165 break; 166 167 default: 168 assert(0); 169 } 170 } 171 172 /************************** 173 * Insert e into next available slot in save[]. 174 * Params: 175 * save = array of NDP 176 * ndp = NDP to insert into save[] 177 * Returns: 178 * index of slot in save[] where ndp was inserted 179 */ 180 181 @safe 182 private size_t getemptyslot(T)(ref T save, ref NDP ndp) 183 { 184 foreach (i, ref n; save[]) 185 if (n.e == null) 186 { 187 n = ndp; 188 return i; 189 } 190 191 save.push(ndp); 192 return save.length - 1; 193 } 194 195 /********************************* 196 * Pop 8087 stack. 197 */ 198 199 void pop87() { pop87(__LINE__, __FILE__); } 200 201 @trusted 202 void pop87(int line, const(char)* file) 203 { 204 int i; 205 206 if (NDPP) 207 printf("pop87(%s(%d): stackused=%d)\n", file, line, global87.stackused); 208 209 --global87.stackused; 210 assert(global87.stackused >= 0); 211 for (i = 0; i < global87.stack.length - 1; i++) 212 global87.stack[i] = global87.stack[i + 1]; 213 // end of stack is nothing 214 global87.stack[$ - 1] = NDP(); 215 } 216 217 218 /******************************* 219 * Push 8087 stack. Generate and return any code 220 * necessary to preserve anything that might run off the end of the stack. 221 */ 222 223 void push87(ref CodeBuilder cdb) { push87(cdb,__LINE__,__FILE__); } 224 225 @trusted 226 void push87(ref CodeBuilder cdb, int line, const(char)* file) 227 { 228 // if we would lose the top register off of the stack 229 if (global87.stack[7].e != null) 230 { 231 const i = getemptyslot(global87.save, global87.stack[7]); 232 cdb.genf2(0xD9,0xF6); // FDECSTP 233 genfwait(cdb); 234 ndp_fstp(cdb, i, global87.stack[7].e.Ety); // FSTP i[BP] 235 assert(global87.stackused == 8); 236 if (NDPP) printf("push87() : overflow\n"); 237 } 238 else 239 { 240 if (NDPP) printf("push87(%s(%d): %d)\n", file, line, global87.stackused); 241 global87.stackused++; 242 assert(global87.stackused <= 8); 243 } 244 // Shift the stack up 245 for (int i = 7; i > 0; i--) 246 global87.stack[i] = global87.stack[i - 1]; 247 global87.stack[0] = NDP(); 248 } 249 250 /***************************** 251 * Note elem e as being in ST(i) as being a value we want to keep. 252 */ 253 254 void note87(elem *e, uint offset, int i) 255 { 256 note87(e, offset, i, __LINE__); 257 } 258 259 @trusted 260 void note87(elem *e, uint offset, int i, int linnum) 261 { 262 if (NDPP) 263 printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum); 264 265 static if (0) 266 { 267 if (global87.stack[i].e) 268 printf("global87.stack[%d].e = %p\n",i,global87.stack[i].e); 269 } 270 271 debug if (i >= global87.stackused) 272 { 273 printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum); 274 elem_print(e); 275 } 276 assert(i < global87.stackused); 277 278 while (e.Eoper == OPcomma) 279 e = e.EV.E2; 280 global87.stack[i].e = e; 281 global87.stack[i].offset = offset; 282 } 283 284 /**************************************************** 285 * Exchange two entries in 8087 stack. 286 */ 287 288 @trusted 289 void xchg87(int i, int j) 290 { 291 NDP save; 292 293 save = global87.stack[i]; 294 global87.stack[i] = global87.stack[j]; 295 global87.stack[j] = save; 296 } 297 298 /**************************** 299 * Make sure that elem e is in register ST(i). Reload it if necessary. 300 * Input: 301 * i 0..3 8087 register number 302 * flag 1 don't bother with FXCH 303 */ 304 305 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag) 306 { 307 makesure87(cdb,e,offset,i,flag,__LINE__); 308 } 309 310 @trusted 311 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag,int linnum) 312 { 313 debug if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum); 314 315 while (e.Eoper == OPcomma) 316 e = e.EV.E2; 317 assert(e && i < 4); 318 L1: 319 if (global87.stack[i].e != e || global87.stack[i].offset != offset) 320 { 321 debug if (global87.stack[i].e) 322 printf("global87.stack[%d].e = %p, .offset = %d\n",i,global87.stack[i].e,global87.stack[i].offset); 323 324 assert(global87.stack[i].e == null); 325 int j; 326 for (j = 0; 1; j++) 327 { 328 if (j >= global87.save.length && e.Eoper == OPcomma) 329 { 330 e = e.EV.E2; // try right side 331 goto L1; 332 } 333 334 debug if (j >= global87.save.length) 335 printf("e = %p, global87.save.length = %llu\n",e, cast(ulong) global87.save.length); 336 337 assert(j < global87.save.length); 338 //printf("\tglobal87.save[%d] = %p, .offset = %d\n", j, global87.save[j].e, global87.save[j].offset); 339 if (e == global87.save[j].e && offset == global87.save[j].offset) 340 break; 341 } 342 push87(cdb); 343 genfwait(cdb); 344 ndp_fld(cdb, j, e.Ety); // FLD j[BP] 345 if (!(flag & 1)) 346 { 347 while (i != 0) 348 { 349 cdb.genf2(0xD9,0xC8 + i); // FXCH ST(i) 350 i--; 351 } 352 } 353 global87.save[j] = NDP(); // back in 8087 354 } 355 //global87.stack[i].e = null; 356 } 357 358 /**************************** 359 * Save in memory any values in the 8087 that we want to keep. 360 */ 361 362 @trusted 363 void save87(ref CodeBuilder cdb) 364 { 365 bool any = false; 366 while (global87.stack[0].e && global87.stackused) 367 { 368 // Save it 369 const i = getemptyslot(global87.save, global87.stack[0]); 370 if (NDPP) printf("saving %p in temporary global87.save[%d]\n",global87.stack[0].e, cast(int)i); 371 372 genfwait(cdb); 373 ndp_fstp(cdb,i,global87.stack[0].e.Ety); // FSTP i[BP] 374 pop87(); 375 any = true; 376 } 377 if (any) // if any stores 378 genfwait(cdb); // wait for last one to finish 379 } 380 381 /****************************************** 382 * Save any noted values that would be destroyed by n pushes 383 */ 384 385 @trusted 386 void save87regs(ref CodeBuilder cdb, uint n) 387 { 388 assert(n <= 7); 389 uint j = 8 - n; 390 if (global87.stackused > j) 391 { 392 for (uint k = 8; k > j; k--) 393 { 394 cdb.genf2(0xD9,0xF6); // FDECSTP 395 genfwait(cdb); 396 if (k <= global87.stackused) 397 { 398 const i = getemptyslot(global87.save, global87.stack[k - 1]); 399 ndp_fstp(cdb, i, global87.stack[k - 1].e.Ety); // FSTP i[BP] 400 global87.stack[k - 1] = NDP(); 401 } 402 } 403 404 for (uint k = 8; k > j; k--) 405 { 406 if (k > global87.stackused) 407 { cdb.genf2(0xD9,0xF7); // FINCSTP 408 genfwait(cdb); 409 } 410 } 411 global87.stackused = j; 412 } 413 } 414 415 /***************************************************** 416 * Save/restore ST0 or ST01 417 */ 418 419 @trusted 420 void gensaverestore87(regm_t regm, ref CodeBuilder cdbsave, ref CodeBuilder cdbrestore) 421 { 422 //printf("gensaverestore87(%s)\n", regm_str(regm)); 423 assert(regm == mST0 || regm == mST01); 424 425 auto ndp0 = NDP(el_calloc()); 426 const i = getemptyslot(global87.save, ndp0); // this blocks slot [i] for the life of this function 427 ndp_fstp(cdbsave, i, TYldouble); 428 429 CodeBuilder cdb2a; 430 cdb2a.ctor(); 431 ndp_fld(cdb2a, i, TYldouble); 432 433 if (regm == mST01) 434 { 435 auto ndp1 = NDP(el_calloc()); 436 const j = getemptyslot(global87.save, ndp1); 437 ndp_fstp(cdbsave, j, TYldouble); 438 ndp_fld(cdbrestore, j, TYldouble); 439 } 440 441 cdbrestore.append(cdb2a); 442 } 443 444 /************************************* 445 * Find which, if any, slot on stack holds elem e. 446 */ 447 448 @trusted 449 private int cse_get(elem *e, uint offset) 450 { 451 int i; 452 453 for (i = 0; 1; i++) 454 { 455 if (i == global87.stackused) 456 { 457 i = -1; 458 //printf("cse not found\n"); 459 //elem_print(e); 460 break; 461 } 462 if (global87.stack[i].e == e && 463 global87.stack[i].offset == offset) 464 { //printf("cse found %d\n",i); 465 //elem_print(e); 466 break; 467 } 468 } 469 return i; 470 } 471 472 /************************************* 473 * Reload common subexpression. 474 */ 475 476 void comsub87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 477 { 478 //printf("comsub87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 479 // Look on 8087 stack 480 int i = cse_get(e, 0); 481 482 if (tycomplex(e.Ety)) 483 { 484 uint sz = tysize(e.Ety); 485 int j = cse_get(e, sz / 2); 486 if (i >= 0 && j >= 0) 487 { 488 push87(cdb); 489 push87(cdb); 490 cdb.genf2(0xD9,0xC0 + i); // FLD ST(i) 491 cdb.genf2(0xD9,0xC0 + j + 1); // FLD ST(j + 1) 492 fixresult_complex87(cdb,e,mST01,pretregs); 493 } 494 else 495 // Reload 496 loaddata(cdb,e,pretregs); 497 } 498 else 499 { 500 if (i >= 0) 501 { 502 push87(cdb); 503 cdb.genf2(0xD9,0xC0 + i); // FLD ST(i) 504 if (*pretregs & XMMREGS) 505 fixresult87(cdb,e,mST0,pretregs); 506 else 507 fixresult(cdb,e,mST0,pretregs); 508 } 509 else 510 // Reload 511 loaddata(cdb,e,pretregs); 512 } 513 } 514 515 516 /******************************* 517 * Decide if we need to gen an FWAIT. 518 */ 519 520 public void genfwait(ref CodeBuilder cdb) 521 { 522 if (ADDFWAIT()) 523 cdb.gen1(FWAIT); 524 } 525 526 527 /*************************** 528 * Put the 8087 flags into the CPU flags. 529 */ 530 531 @trusted 532 private void cg87_87topsw(ref CodeBuilder cdb) 533 { 534 /* Note that SAHF is not available on some early I64 processors 535 * and will cause a seg fault 536 */ 537 assert(!NOSAHF); 538 getregs(cdb,mAX); 539 if (config.target_cpu >= TARGET_80286) 540 cdb.genf2(0xDF,0xE0); // FSTSW AX 541 else 542 { 543 cdb.genfltreg(0xD8+5,7,0); // FSTSW floatreg[BP] 544 genfwait(cdb); // FWAIT 545 cdb.genfltreg(0x8A,4,1); // MOV AH,floatreg+1[BP] 546 } 547 cdb.gen1(0x9E); // SAHF 548 code_orflag(cdb.last(),CFpsw); 549 } 550 551 /***************************************** 552 * Jump to ctarget if condition code C2 is set. 553 */ 554 555 @trusted 556 private void genjmpifC2(ref CodeBuilder cdb, code *ctarget) 557 { 558 if (NOSAHF) 559 { 560 getregs(cdb,mAX); 561 cdb.genf2(0xDF,0xE0); // FSTSW AX 562 cdb.genc2(0xF6,modregrm(3,0,4),4); // TEST AH,4 563 genjmp(cdb, JNE, FLcode, cast(block *)ctarget); // JNE ctarget 564 } 565 else 566 { 567 cg87_87topsw(cdb); 568 genjmp(cdb, JP, FLcode, cast(block *)ctarget); // JP ctarget 569 } 570 } 571 572 /*************************** 573 * Set the PSW based on the state of ST0. 574 * Input: 575 * pop if stack should be popped after test 576 * Returns: 577 * start of code appended to c. 578 */ 579 580 @trusted 581 private void genftst(ref CodeBuilder cdb,elem *e,int pop) 582 { 583 if (NOSAHF) 584 { 585 push87(cdb); 586 cdb.gen2(0xD9,0xEE); // FLDZ 587 cdb.gen2(0xDF,0xE9); // FUCOMIP ST1 588 pop87(); 589 if (pop) 590 { 591 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 592 pop87(); 593 } 594 } 595 else if (config.flags4 & CFG4fastfloat) // if fast floating point 596 { 597 cdb.genf2(0xD9,0xE4); // FTST 598 cg87_87topsw(cdb); // put 8087 flags in CPU flags 599 if (pop) 600 { 601 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 602 pop87(); 603 } 604 } 605 else if (config.target_cpu >= TARGET_80386) 606 { 607 // FUCOMP doesn't raise exceptions on QNANs, unlike FTST 608 push87(cdb); 609 cdb.gen2(0xD9,0xEE); // FLDZ 610 cdb.gen2(pop ? 0xDA : 0xDD,0xE9); // FUCOMPP / FUCOMP 611 pop87(); 612 if (pop) 613 pop87(); 614 cg87_87topsw(cdb); // put 8087 flags in CPU flags 615 } 616 else 617 { 618 // Call library function which does not raise exceptions 619 regm_t regm = 0; 620 621 callclib(cdb,e,CLIB.ftest,®m,0); 622 if (pop) 623 { 624 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 625 pop87(); 626 } 627 } 628 } 629 630 /************************************* 631 * Determine if there is a special 8087 instruction to load 632 * constant e. 633 * Input: 634 * im 0 load real part 635 * 1 load imaginary part 636 * Returns: 637 * opcode if found 638 * 0 if not 639 */ 640 641 @trusted 642 ubyte loadconst(elem *e, int im) 643 { 644 elem_debug(e); 645 assert(im == 0 || im == 1); 646 647 immutable float[7] fval = 648 [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2]; 649 immutable double[7] dval = 650 [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2]; 651 652 static if (real.sizeof < 10) 653 { 654 import dmd.root.longdouble; 655 immutable targ_ldouble[7] ldval = 656 [ld_zero,ld_one,ld_pi,ld_log2t,ld_log2e,ld_log2,ld_ln2]; 657 } 658 else 659 { 660 enum M_PI_L = 0x1.921fb54442d1846ap+1L; // 3.14159 fldpi 661 enum M_LOG2T_L = 0x1.a934f0979a3715fcp+1L; // 3.32193 fldl2t 662 enum M_LOG2E_L = 0x1.71547652b82fe178p+0L; // 1.4427 fldl2e 663 enum M_LOG2_L = 0x1.34413509f79fef32p-2L; // 0.30103 fldlg2 664 enum M_LN2_L = 0x1.62e42fefa39ef358p-1L; // 0.693147 fldln2 665 immutable targ_ldouble[7] ldval = 666 [0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L]; 667 } 668 669 immutable ubyte[7 + 1] opcode = 670 /* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */ 671 [0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0]; 672 673 int i; 674 targ_float f; 675 targ_double d; 676 targ_ldouble ld; 677 int sz; 678 int zero; 679 void *p; 680 immutable ubyte[16] zeros; 681 682 if (im == 0) 683 { 684 switch (tybasic(e.Ety)) 685 { 686 case TYfloat: 687 case TYifloat: 688 case TYcfloat: 689 f = e.EV.Vfloat; 690 sz = 4; 691 p = &f; 692 break; 693 694 case TYdouble: 695 case TYdouble_alias: 696 case TYidouble: 697 case TYcdouble: 698 d = e.EV.Vdouble; 699 sz = 8; 700 p = &d; 701 break; 702 703 case TYldouble: 704 case TYildouble: 705 case TYcldouble: 706 ld = e.EV.Vldouble; 707 sz = 10; 708 p = &ld; 709 break; 710 711 default: 712 assert(0); 713 } 714 } 715 else 716 { 717 switch (tybasic(e.Ety)) 718 { 719 case TYcfloat: 720 f = e.EV.Vcfloat.im; 721 sz = 4; 722 p = &f; 723 break; 724 725 case TYcdouble: 726 d = e.EV.Vcdouble.im; 727 sz = 8; 728 p = &d; 729 break; 730 731 case TYcldouble: 732 ld = e.EV.Vcldouble.im; 733 sz = 10; 734 p = &ld; 735 break; 736 737 default: 738 assert(0); 739 } 740 } 741 742 // Note that for this purpose, -0 is not regarded as +0, 743 // since FLDZ loads a +0 744 assert(sz <= zeros.length); 745 zero = (memcmp(p, zeros.ptr, sz) == 0); 746 if (zero && config.target_cpu >= TARGET_PentiumPro) 747 return 0xEE; // FLDZ is the only one with 1 micro-op 748 749 // For some reason, these instructions take more clocks 750 if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 751 return 0; 752 753 if (zero) 754 return 0xEE; 755 756 for (i = 1; i < fval.length; i++) 757 { 758 switch (sz) 759 { 760 case 4: 761 if (fval[i] != f) 762 continue; 763 break; 764 case 8: 765 if (dval[i] != d) 766 continue; 767 break; 768 case 10: 769 if (ldval[i] != ld) 770 continue; 771 break; 772 default: 773 assert(0); 774 } 775 break; 776 } 777 return opcode[i]; 778 } 779 780 /****************************** 781 * Given the result of an expression is in retregs, 782 * generate necessary code to return result in *pretregs. 783 */ 784 785 @trusted 786 void fixresult87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs, bool isReturnValue = false) 787 { 788 //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs); 789 //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs)); 790 assert(!*pretregs || retregs); 791 792 if ((*pretregs | retregs) & mST01) 793 { 794 fixresult_complex87(cdb, e, retregs, pretregs, isReturnValue); 795 return; 796 } 797 798 tym_t tym = tybasic(e.Ety); 799 uint sz = _tysize[tym]; 800 //printf("tym = x%x, sz = %d\n", tym, sz); 801 802 /* if retregs needs to be transferred into the 8087 */ 803 if (*pretregs & mST0 && retregs & (mBP | ALLREGS)) 804 { 805 debug if (sz > DOUBLESIZE) 806 { 807 elem_print(e); 808 printf("retregs = %s\n", regm_str(retregs)); 809 } 810 assert(sz <= DOUBLESIZE); 811 if (!I16) 812 { 813 814 if (*pretregs & mPSW) 815 { // Set flags 816 regm_t r = retregs | mPSW; 817 fixresult(cdb,e,retregs,&r); 818 } 819 push87(cdb); 820 if (sz == REGSIZE || (I64 && sz == 4)) 821 { 822 const reg = findreg(retregs); 823 cdb.genfltreg(STO,reg,0); // MOV fltreg,reg 824 cdb.genfltreg(0xD9,0,0); // FLD float ptr fltreg 825 } 826 else 827 { 828 const msreg = findregmsw(retregs); 829 const lsreg = findreglsw(retregs); 830 cdb.genfltreg(STO,lsreg,0); // MOV fltreg,lsreg 831 cdb.genfltreg(STO,msreg,4); // MOV fltreg+4,msreg 832 cdb.genfltreg(0xDD,0,0); // FLD double ptr fltreg 833 } 834 } 835 else 836 { 837 regm_t regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS; 838 regm |= *pretregs & mPSW; 839 fixresult(cdb,e,retregs,®m); 840 regm = 0; // don't worry about result from CLIB.xxx 841 callclib(cdb,e, 842 ((sz == FLOATSIZE) ? CLIB.fltto87 : CLIB.dblto87), 843 ®m,0); 844 } 845 } 846 else if (*pretregs & (mBP | ALLREGS) && retregs & mST0) 847 { 848 assert(sz <= DOUBLESIZE); 849 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 850 if (*pretregs & mPSW && !(retregs & mPSW)) 851 genftst(cdb,e,0); 852 // FSTP floatreg 853 pop87(); 854 cdb.genfltreg(ESC(mf,1),3,0); 855 genfwait(cdb); 856 reg_t reg; 857 allocreg(cdb,pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble); 858 if (sz == FLOATSIZE) 859 { 860 if (!I16) 861 cdb.genfltreg(LOD,reg,0); 862 else 863 { 864 cdb.genfltreg(LOD,reg,REGSIZE); 865 cdb.genfltreg(LOD,findreglsw(*pretregs),0); 866 } 867 } 868 else 869 { assert(sz == DOUBLESIZE); 870 if (I16) 871 { 872 cdb.genfltreg(LOD,AX,6); 873 cdb.genfltreg(LOD,BX,4); 874 cdb.genfltreg(LOD,CX,2); 875 cdb.genfltreg(LOD,DX,0); 876 } 877 else if (I32) 878 { 879 cdb.genfltreg(LOD,reg,REGSIZE); 880 cdb.genfltreg(LOD,findreglsw(*pretregs),0); 881 } 882 else // I64 883 { 884 cdb.genfltreg(LOD,reg,0); 885 code_orrex(cdb.last(), REX_W); 886 } 887 } 888 } 889 else if (*pretregs == 0 && retregs == mST0) 890 { 891 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 892 pop87(); 893 } 894 else 895 { 896 if (*pretregs & mPSW) 897 { 898 if (!(retregs & mPSW)) 899 { 900 genftst(cdb,e,!(*pretregs & (mST0 | XMMREGS))); // FTST 901 } 902 } 903 if (*pretregs & mST0 && retregs & XMMREGS) 904 { 905 assert(sz <= DOUBLESIZE); 906 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 907 // MOVD floatreg,XMM? 908 const reg = findreg(retregs); 909 cdb.genxmmreg(xmmstore(tym),reg,0,tym); 910 push87(cdb); 911 cdb.genfltreg(ESC(mf,1),0,0); // FLD float/double ptr fltreg 912 } 913 else if (retregs & mST0 && *pretregs & XMMREGS) 914 { 915 assert(sz <= DOUBLESIZE); 916 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 917 // FSTP floatreg 918 pop87(); 919 cdb.genfltreg(ESC(mf,1),3,0); 920 genfwait(cdb); 921 // MOVD XMM?,floatreg 922 reg_t reg; 923 allocreg(cdb,pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble); 924 cdb.genxmmreg(xmmload(tym),reg,0,tym); 925 } 926 else 927 assert(!(*pretregs & mST0) || (retregs & mST0)); 928 } 929 if (*pretregs & mST0) 930 note87(e,0,0); 931 } 932 933 /******************************** 934 * Generate in-line 8087 code for the following operators: 935 * add 936 * min 937 * mul 938 * div 939 * cmp 940 */ 941 942 // Reverse the order that the op is done in 943 __gshared const ubyte[9] oprev = [ cast(ubyte)-1,0,1,2,3,5,4,7,6 ]; 944 945 @trusted 946 void orth87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 947 { 948 //printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 949 // we could be evaluating / for side effects only 950 assert(*pretregs != 0); 951 952 elem *e1 = e.EV.E1; 953 elem *e2 = e.EV.E2; 954 uint sz2 = tysize(e1.Ety); 955 if (tycomplex(e1.Ety)) 956 sz2 /= 2; 957 958 OPER eoper = e.Eoper; 959 if (eoper == OPmul && e2.Eoper == OPconst && el_toldoubled(e.EV.E2) == 2.0L) 960 { 961 // Perform "mul 2.0" as fadd ST(0), ST 962 regm_t retregs = mST0; 963 codelem(cdb,e1,&retregs,false); 964 cdb.genf2(0xDC, 0xC0); // fadd ST(0), ST; 965 fixresult87(cdb,e,mST0,pretregs); // result is in ST(0). 966 freenode(e2); 967 return; 968 } 969 970 uint op; 971 if (OTrel(eoper)) 972 eoper = OPeqeq; 973 bool imaginary; 974 static uint X(OPER op, uint ty1, uint ty2) { return (op << 16) + ty1 * 256 + ty2; } 975 switch (X(eoper, tybasic(e1.Ety), tybasic(e2.Ety))) 976 { 977 case X(OPadd, TYfloat, TYfloat): 978 case X(OPadd, TYdouble, TYdouble): 979 case X(OPadd, TYdouble_alias, TYdouble_alias): 980 case X(OPadd, TYldouble, TYldouble): 981 case X(OPadd, TYldouble, TYdouble): 982 case X(OPadd, TYdouble, TYldouble): 983 case X(OPadd, TYifloat, TYifloat): 984 case X(OPadd, TYidouble, TYidouble): 985 case X(OPadd, TYildouble, TYildouble): 986 op = 0; // FADDP 987 break; 988 989 case X(OPmin, TYfloat, TYfloat): 990 case X(OPmin, TYdouble, TYdouble): 991 case X(OPmin, TYdouble_alias, TYdouble_alias): 992 case X(OPmin, TYldouble, TYldouble): 993 case X(OPmin, TYldouble, TYdouble): 994 case X(OPmin, TYdouble, TYldouble): 995 case X(OPmin, TYifloat, TYifloat): 996 case X(OPmin, TYidouble, TYidouble): 997 case X(OPmin, TYildouble, TYildouble): 998 op = 4; // FSUBP 999 break; 1000 1001 case X(OPmul, TYfloat, TYfloat): 1002 case X(OPmul, TYdouble, TYdouble): 1003 case X(OPmul, TYdouble_alias, TYdouble_alias): 1004 case X(OPmul, TYldouble, TYldouble): 1005 case X(OPmul, TYldouble, TYdouble): 1006 case X(OPmul, TYdouble, TYldouble): 1007 case X(OPmul, TYifloat, TYifloat): 1008 case X(OPmul, TYidouble, TYidouble): 1009 case X(OPmul, TYildouble, TYildouble): 1010 case X(OPmul, TYfloat, TYifloat): 1011 case X(OPmul, TYdouble, TYidouble): 1012 case X(OPmul, TYldouble, TYildouble): 1013 case X(OPmul, TYifloat, TYfloat): 1014 case X(OPmul, TYidouble, TYdouble): 1015 case X(OPmul, TYildouble, TYldouble): 1016 op = 1; // FMULP 1017 break; 1018 1019 case X(OPdiv, TYfloat, TYfloat): 1020 case X(OPdiv, TYdouble, TYdouble): 1021 case X(OPdiv, TYdouble_alias, TYdouble_alias): 1022 case X(OPdiv, TYldouble, TYldouble): 1023 case X(OPdiv, TYldouble, TYdouble): 1024 case X(OPdiv, TYdouble, TYldouble): 1025 case X(OPdiv, TYifloat, TYifloat): 1026 case X(OPdiv, TYidouble, TYidouble): 1027 case X(OPdiv, TYildouble, TYildouble): 1028 op = 6; // FDIVP 1029 break; 1030 1031 case X(OPmod, TYfloat, TYfloat): 1032 case X(OPmod, TYdouble, TYdouble): 1033 case X(OPmod, TYdouble_alias, TYdouble_alias): 1034 case X(OPmod, TYldouble, TYldouble): 1035 case X(OPmod, TYfloat, TYifloat): 1036 case X(OPmod, TYdouble, TYidouble): 1037 case X(OPmod, TYldouble, TYildouble): 1038 case X(OPmod, TYifloat, TYifloat): 1039 case X(OPmod, TYidouble, TYidouble): 1040 case X(OPmod, TYildouble, TYildouble): 1041 case X(OPmod, TYifloat, TYfloat): 1042 case X(OPmod, TYidouble, TYdouble): 1043 case X(OPmod, TYildouble, TYldouble): 1044 op = cast(uint) -1; 1045 break; 1046 1047 case X(OPeqeq, TYfloat, TYfloat): 1048 case X(OPeqeq, TYdouble, TYdouble): 1049 case X(OPeqeq, TYdouble_alias, TYdouble_alias): 1050 case X(OPeqeq, TYldouble, TYldouble): 1051 case X(OPeqeq, TYifloat, TYifloat): 1052 case X(OPeqeq, TYidouble, TYidouble): 1053 case X(OPeqeq, TYildouble, TYildouble): 1054 { 1055 assert(OTrel(e.Eoper)); 1056 assert((*pretregs & mST0) == 0); 1057 regm_t retregs = mST0; 1058 codelem(cdb,e1,&retregs,false); 1059 note87(e1,0,0); 1060 regm_t resregm = mPSW; 1061 1062 if (rel_exception(e.Eoper) || config.flags4 & CFG4fastfloat) 1063 { 1064 if (e2.Eoper == OPconst && !boolres(e2)) 1065 { 1066 if (NOSAHF) 1067 { 1068 push87(cdb); 1069 cdb.gen2(0xD9,0xEE); // FLDZ 1070 cdb.gen2(0xDF,0xF1); // FCOMIP ST1 1071 pop87(); 1072 } 1073 else 1074 { 1075 cdb.genf2(0xD9,0xE4); // FTST 1076 cg87_87topsw(cdb); 1077 } 1078 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 1079 pop87(); 1080 } 1081 else if (NOSAHF) 1082 { 1083 note87(e1,0,0); 1084 load87(cdb,e2,0,&retregs,e1,-1); 1085 makesure87(cdb,e1,0,1,0); 1086 resregm = 0; 1087 //cdb.genf2(0xD9,0xC8 + 1); // FXCH ST1 1088 cdb.gen2(0xDF,0xF1); // FCOMIP ST1 1089 pop87(); 1090 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 1091 pop87(); 1092 } 1093 else 1094 { 1095 load87(cdb,e2, 0, pretregs, e1, 3); // FCOMPP 1096 } 1097 } 1098 else 1099 { 1100 if (e2.Eoper == OPconst && !boolres(e2) && 1101 config.target_cpu < TARGET_80386) 1102 { 1103 regm_t regm = 0; 1104 1105 callclib(cdb,e,CLIB.ftest0,®m,0); 1106 pop87(); 1107 } 1108 else 1109 { 1110 note87(e1,0,0); 1111 load87(cdb,e2,0,&retregs,e1,-1); 1112 makesure87(cdb,e1,0,1,0); 1113 resregm = 0; 1114 if (NOSAHF) 1115 { 1116 cdb.gen2(0xDF,0xE9); // FUCOMIP ST1 1117 pop87(); 1118 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 1119 pop87(); 1120 } 1121 else if (config.target_cpu >= TARGET_80386) 1122 { 1123 cdb.gen2(0xDA,0xE9); // FUCOMPP 1124 cg87_87topsw(cdb); 1125 pop87(); 1126 pop87(); 1127 } 1128 else 1129 // Call a function instead so that exceptions 1130 // are not generated. 1131 callclib(cdb,e,CLIB.fcompp,&resregm,0); 1132 } 1133 } 1134 1135 freenode(e2); 1136 return; 1137 } 1138 1139 case X(OPadd, TYcfloat, TYcfloat): 1140 case X(OPadd, TYcdouble, TYcdouble): 1141 case X(OPadd, TYcldouble, TYcldouble): 1142 case X(OPadd, TYcfloat, TYfloat): 1143 case X(OPadd, TYcdouble, TYdouble): 1144 case X(OPadd, TYcldouble, TYldouble): 1145 case X(OPadd, TYfloat, TYcfloat): 1146 case X(OPadd, TYdouble, TYcdouble): 1147 case X(OPadd, TYldouble, TYcldouble): 1148 goto Lcomplex; 1149 1150 case X(OPadd, TYifloat, TYcfloat): 1151 case X(OPadd, TYidouble, TYcdouble): 1152 case X(OPadd, TYildouble, TYcldouble): 1153 goto Lcomplex2; 1154 1155 case X(OPmin, TYcfloat, TYcfloat): 1156 case X(OPmin, TYcdouble, TYcdouble): 1157 case X(OPmin, TYcldouble, TYcldouble): 1158 case X(OPmin, TYcfloat, TYfloat): 1159 case X(OPmin, TYcdouble, TYdouble): 1160 case X(OPmin, TYcldouble, TYldouble): 1161 case X(OPmin, TYfloat, TYcfloat): 1162 case X(OPmin, TYdouble, TYcdouble): 1163 case X(OPmin, TYldouble, TYcldouble): 1164 goto Lcomplex; 1165 1166 case X(OPmin, TYifloat, TYcfloat): 1167 case X(OPmin, TYidouble, TYcdouble): 1168 case X(OPmin, TYildouble, TYcldouble): 1169 goto Lcomplex2; 1170 1171 case X(OPmul, TYcfloat, TYcfloat): 1172 case X(OPmul, TYcdouble, TYcdouble): 1173 case X(OPmul, TYcldouble, TYcldouble): 1174 goto Lcomplex; 1175 1176 case X(OPdiv, TYcfloat, TYcfloat): 1177 case X(OPdiv, TYcdouble, TYcdouble): 1178 case X(OPdiv, TYcldouble, TYcldouble): 1179 case X(OPdiv, TYfloat, TYcfloat): 1180 case X(OPdiv, TYdouble, TYcdouble): 1181 case X(OPdiv, TYldouble, TYcldouble): 1182 case X(OPdiv, TYifloat, TYcfloat): 1183 case X(OPdiv, TYidouble, TYcdouble): 1184 case X(OPdiv, TYildouble, TYcldouble): 1185 goto Lcomplex; 1186 1187 case X(OPdiv, TYifloat, TYfloat): 1188 case X(OPdiv, TYidouble, TYdouble): 1189 case X(OPdiv, TYildouble, TYldouble): 1190 op = 6; // FDIVP 1191 break; 1192 1193 Lcomplex: 1194 { 1195 loadComplex(cdb,e1); 1196 loadComplex(cdb,e2); 1197 makesure87(cdb, e1, sz2, 2, 0); 1198 makesure87(cdb, e1, 0, 3, 0); 1199 regm_t retregs = mST01; 1200 if (eoper == OPadd) 1201 { 1202 cdb.genf2(0xDE, 0xC0+2); // FADDP ST(2),ST 1203 cdb.genf2(0xDE, 0xC0+2); // FADDP ST(2),ST 1204 pop87(); 1205 pop87(); 1206 } 1207 else if (eoper == OPmin) 1208 { 1209 cdb.genf2(0xDE, 0xE8+2); // FSUBP ST(2),ST 1210 cdb.genf2(0xDE, 0xE8+2); // FSUBP ST(2),ST 1211 pop87(); 1212 pop87(); 1213 } 1214 else 1215 { 1216 int clib = eoper == OPmul ? CLIB.cmul : CLIB.cdiv; 1217 callclib(cdb, e, clib, &retregs, 0); 1218 } 1219 fixresult_complex87(cdb, e, retregs, pretregs); 1220 return; 1221 } 1222 1223 Lcomplex2: 1224 { 1225 regm_t retregs = mST0; 1226 codelem(cdb,e1, &retregs, false); 1227 note87(e1, 0, 0); 1228 loadComplex(cdb,e2); 1229 makesure87(cdb, e1, 0, 2, 0); 1230 retregs = mST01; 1231 if (eoper == OPadd) 1232 { 1233 cdb.genf2(0xDE, 0xC0+2); // FADDP ST(2),ST 1234 } 1235 else if (eoper == OPmin) 1236 { 1237 cdb.genf2(0xDE, 0xE8+2); // FSUBP ST(2),ST 1238 cdb.genf2(0xD9, 0xE0); // FCHS 1239 } 1240 else 1241 assert(0); 1242 pop87(); 1243 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1244 fixresult_complex87(cdb, e, retregs, pretregs); 1245 return; 1246 } 1247 1248 case X(OPeqeq, TYcfloat, TYcfloat): 1249 case X(OPeqeq, TYcdouble, TYcdouble): 1250 case X(OPeqeq, TYcldouble, TYcldouble): 1251 case X(OPeqeq, TYcfloat, TYifloat): 1252 case X(OPeqeq, TYcdouble, TYidouble): 1253 case X(OPeqeq, TYcldouble, TYildouble): 1254 case X(OPeqeq, TYcfloat, TYfloat): 1255 case X(OPeqeq, TYcdouble, TYdouble): 1256 case X(OPeqeq, TYcldouble, TYldouble): 1257 case X(OPeqeq, TYifloat, TYcfloat): 1258 case X(OPeqeq, TYidouble, TYcdouble): 1259 case X(OPeqeq, TYildouble, TYcldouble): 1260 case X(OPeqeq, TYfloat, TYcfloat): 1261 case X(OPeqeq, TYdouble, TYcdouble): 1262 case X(OPeqeq, TYldouble, TYcldouble): 1263 case X(OPeqeq, TYfloat, TYifloat): 1264 case X(OPeqeq, TYdouble, TYidouble): 1265 case X(OPeqeq, TYldouble, TYildouble): 1266 case X(OPeqeq, TYifloat, TYfloat): 1267 case X(OPeqeq, TYidouble, TYdouble): 1268 case X(OPeqeq, TYildouble, TYldouble): 1269 { 1270 loadComplex(cdb,e1); 1271 loadComplex(cdb,e2); 1272 makesure87(cdb, e1, sz2, 2, 0); 1273 makesure87(cdb, e1, 0, 3, 0); 1274 regm_t retregs = 0; 1275 callclib(cdb, e, CLIB.ccmp, &retregs, 0); 1276 return; 1277 } 1278 1279 case X(OPadd, TYfloat, TYifloat): 1280 case X(OPadd, TYdouble, TYidouble): 1281 case X(OPadd, TYldouble, TYildouble): 1282 case X(OPadd, TYifloat, TYfloat): 1283 case X(OPadd, TYidouble, TYdouble): 1284 case X(OPadd, TYildouble, TYldouble): 1285 1286 case X(OPmin, TYfloat, TYifloat): 1287 case X(OPmin, TYdouble, TYidouble): 1288 case X(OPmin, TYldouble, TYildouble): 1289 case X(OPmin, TYifloat, TYfloat): 1290 case X(OPmin, TYidouble, TYdouble): 1291 case X(OPmin, TYildouble, TYldouble): 1292 { 1293 regm_t retregs = mST0; 1294 codelem(cdb,e1, &retregs, false); 1295 note87(e1, 0, 0); 1296 codelem(cdb,e2, &retregs, false); 1297 makesure87(cdb, e1, 0, 1, 0); 1298 if (eoper == OPmin) 1299 cdb.genf2(0xD9, 0xE0); // FCHS 1300 if (tyimaginary(e1.Ety)) 1301 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1302 retregs = mST01; 1303 fixresult_complex87(cdb, e, retregs, pretregs); 1304 return; 1305 } 1306 1307 case X(OPadd, TYcfloat, TYifloat): 1308 case X(OPadd, TYcdouble, TYidouble): 1309 case X(OPadd, TYcldouble, TYildouble): 1310 op = 0; 1311 goto Lci; 1312 1313 case X(OPmin, TYcfloat, TYifloat): 1314 case X(OPmin, TYcdouble, TYidouble): 1315 case X(OPmin, TYcldouble, TYildouble): 1316 op = 4; 1317 goto Lci; 1318 1319 Lci: 1320 { 1321 loadComplex(cdb,e1); 1322 regm_t retregs = mST0; 1323 load87(cdb,e2,sz2,&retregs,e1,op); 1324 freenode(e2); 1325 retregs = mST01; 1326 makesure87(cdb, e1,0,1,0); 1327 fixresult_complex87(cdb,e, retregs, pretregs); 1328 return; 1329 } 1330 1331 case X(OPmul, TYcfloat, TYfloat): 1332 case X(OPmul, TYcdouble, TYdouble): 1333 case X(OPmul, TYcldouble, TYldouble): 1334 imaginary = false; 1335 goto Lcmul; 1336 1337 case X(OPmul, TYcfloat, TYifloat): 1338 case X(OPmul, TYcdouble, TYidouble): 1339 case X(OPmul, TYcldouble, TYildouble): 1340 imaginary = true; 1341 Lcmul: 1342 { 1343 loadComplex(cdb,e1); 1344 if (imaginary) 1345 { 1346 cdb.genf2(0xD9, 0xE0); // FCHS 1347 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1348 if (elemisone(e2)) 1349 { 1350 freenode(e2); 1351 fixresult_complex87(cdb, e, mST01, pretregs); 1352 return; 1353 } 1354 } 1355 regm_t retregs = mST0; 1356 codelem(cdb,e2, &retregs, false); 1357 makesure87(cdb, e1, sz2, 1, 0); 1358 makesure87(cdb, e1, 0, 2, 0); 1359 cdb.genf2(0xDC,0xC8 + 2); // FMUL ST(2), ST 1360 cdb.genf2(0xDE,0xC8 + 1); // FMULP ST(1), ST 1361 pop87(); 1362 fixresult_complex87(cdb, e, mST01, pretregs); 1363 return; 1364 } 1365 1366 case X(OPmul, TYfloat, TYcfloat): 1367 case X(OPmul, TYdouble, TYcdouble): 1368 case X(OPmul, TYldouble, TYcldouble): 1369 imaginary = false; 1370 goto Lcmul2; 1371 1372 case X(OPmul, TYifloat, TYcfloat): 1373 case X(OPmul, TYidouble, TYcdouble): 1374 case X(OPmul, TYildouble, TYcldouble): 1375 imaginary = true; 1376 Lcmul2: 1377 { 1378 regm_t retregs = mST0; 1379 codelem(cdb,e1, &retregs, false); 1380 note87(e1, 0, 0); 1381 loadComplex(cdb,e2); 1382 makesure87(cdb, e1, 0, 2, 0); 1383 cdb.genf2(0xD9, imaginary ? 0xE0 : 0xC8 + 1); // FCHS / FXCH ST(1) 1384 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 1385 cdb.genf2(0xDC,0xC8 + 2); // FMUL ST(2), ST 1386 cdb.genf2(0xDE,0xC8 + 1); // FMULP ST(1), ST 1387 pop87(); 1388 fixresult_complex87(cdb, e, mST01, pretregs); 1389 return; 1390 } 1391 1392 case X(OPdiv, TYcfloat, TYfloat): 1393 case X(OPdiv, TYcdouble, TYdouble): 1394 case X(OPdiv, TYcldouble, TYldouble): 1395 { 1396 loadComplex(cdb,e1); 1397 regm_t retregs = mST0; 1398 codelem(cdb,e2, &retregs, false); 1399 makesure87(cdb, e1, sz2, 1, 0); 1400 makesure87(cdb, e1, 0, 2, 0); 1401 cdb.genf2(0xDC,0xF8 + 2); // FDIV ST(2), ST 1402 cdb.genf2(0xDE,0xF8 + 1); // FDIVP ST(1), ST 1403 pop87(); 1404 fixresult_complex87(cdb, e, mST01, pretregs); 1405 return; 1406 } 1407 1408 case X(OPdiv, TYcfloat, TYifloat): 1409 case X(OPdiv, TYcdouble, TYidouble): 1410 case X(OPdiv, TYcldouble, TYildouble): 1411 { 1412 loadComplex(cdb,e1); 1413 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1414 xchg87(0, 1); 1415 cdb.genf2(0xD9, 0xE0); // FCHS 1416 regm_t retregs = mST0; 1417 codelem(cdb,e2, &retregs, false); 1418 makesure87(cdb, e1, 0, 1, 0); 1419 makesure87(cdb, e1, sz2, 2, 0); 1420 cdb.genf2(0xDC,0xF8 + 2); // FDIV ST(2), ST 1421 cdb.genf2(0xDE,0xF8 + 1); // FDIVP ST(1), ST 1422 pop87(); 1423 fixresult_complex87(cdb, e, mST01, pretregs); 1424 return; 1425 } 1426 1427 case X(OPmod, TYcfloat, TYfloat): 1428 case X(OPmod, TYcdouble, TYdouble): 1429 case X(OPmod, TYcldouble, TYldouble): 1430 case X(OPmod, TYcfloat, TYifloat): 1431 case X(OPmod, TYcdouble, TYidouble): 1432 case X(OPmod, TYcldouble, TYildouble): 1433 { 1434 /* 1435 fld E1.re 1436 fld E1.im 1437 fld E2 1438 fxch ST(1) 1439 FM1: fprem 1440 fstsw word ptr sw 1441 fwait 1442 mov AH, byte ptr sw+1 1443 jp FM1 1444 fxch ST(2) 1445 FM2: fprem 1446 fstsw word ptr sw 1447 fwait 1448 mov AH, byte ptr sw+1 1449 jp FM2 1450 fstp ST(1) 1451 fxch ST(1) 1452 */ 1453 loadComplex(cdb,e1); 1454 regm_t retregs = mST0; 1455 codelem(cdb,e2, &retregs, false); 1456 makesure87(cdb, e1, sz2, 1, 0); 1457 makesure87(cdb, e1, 0, 2, 0); 1458 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1459 1460 cdb.gen2(0xD9, 0xF8); // FPREM 1461 code *cfm1 = cdb.last(); 1462 genjmpifC2(cdb, cfm1); // JC2 FM1 1463 cdb.genf2(0xD9, 0xC8 + 2); // FXCH ST(2) 1464 1465 cdb.gen2(0xD9, 0xF8); // FPREM 1466 code *cfm2 = cdb.last(); 1467 1468 genjmpifC2(cdb, cfm2); // JC2 FM2 1469 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 1470 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1471 1472 pop87(); 1473 fixresult_complex87(cdb, e, mST01, pretregs); 1474 return; 1475 } 1476 1477 default: 1478 1479 debug 1480 elem_print(e); 1481 1482 assert(0); 1483 } 1484 1485 int reverse = 0; 1486 int e2oper = e2.Eoper; 1487 1488 /* Move double-sized operand into the second position if there's a chance 1489 * it will allow combining a load with an operation (DMD Bugzilla 2905) 1490 */ 1491 if ( ((tybasic(e1.Ety) == TYdouble) 1492 && ((e1.Eoper == OPvar) || (e1.Eoper == OPconst)) 1493 && (tybasic(e2.Ety) != TYdouble)) || 1494 (e1.Eoper == OPconst) || 1495 (e1.Eoper == OPvar && 1496 ((e1.Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) || 1497 (e2oper == OPd_f && 1498 (e2.EV.E1.Eoper == OPs32_d || e2.EV.E1.Eoper == OPs64_d || e2.EV.E1.Eoper == OPs16_d) && 1499 e2.EV.E1.EV.E1.Eoper == OPvar 1500 ) || 1501 ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) && 1502 e2.EV.E1.Eoper == OPvar 1503 ) 1504 ) 1505 ) 1506 ) 1507 { // Reverse order of evaluation 1508 e1 = e.EV.E2; 1509 e2 = e.EV.E1; 1510 op = oprev[op + 1]; 1511 reverse ^= 1; 1512 } 1513 1514 regm_t retregs1 = mST0; 1515 codelem(cdb,e1,&retregs1,false); 1516 note87(e1,0,0); 1517 1518 if (config.flags4 & CFG4fdivcall && e.Eoper == OPdiv) 1519 { 1520 regm_t retregs = mST0; 1521 load87(cdb,e2,0,&retregs,e1,-1); 1522 makesure87(cdb, e1,0,1,0); 1523 if (op == 7) // if reverse divide 1524 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1525 callclib(cdb,e,CLIB.fdiv87,&retregs,0); 1526 pop87(); 1527 regm_t resregm = mST0; 1528 freenode(e2); 1529 fixresult87(cdb,e,resregm,pretregs); 1530 } 1531 else if (e.Eoper == OPmod) 1532 { 1533 /* 1534 * fld tbyte ptr y 1535 * fld tbyte ptr x // ST = x, ST1 = y 1536 * FM1: // We don't use fprem1 because for some inexplicable 1537 * // reason we get -5 when we do _modulo(15, 10) 1538 * fprem // ST = ST % ST1 1539 * fstsw word ptr sw 1540 * fwait 1541 * mov AH,byte ptr sw+1 // get msb of status word in AH 1542 * sahf // transfer to flags 1543 * jp FM1 // continue till ST < ST1 1544 * fstp ST(1) // leave remainder on stack 1545 */ 1546 regm_t retregs = mST0; 1547 load87(cdb,e2,0,&retregs,e1,-1); 1548 makesure87(cdb,e1,0,1,0); // now have x,y on stack; need y,x 1549 if (!reverse) // if not reverse modulo 1550 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1551 1552 cdb.gen2(0xD9, 0xF8); // FM1: FPREM 1553 code *cfm1 = cdb.last(); 1554 genjmpifC2(cdb, cfm1); // JC2 FM1 1555 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 1556 1557 pop87(); 1558 freenode(e2); 1559 fixresult87(cdb,e,mST0,pretregs); 1560 } 1561 else 1562 { 1563 load87(cdb,e2,0,pretregs,e1,op); 1564 freenode(e2); 1565 } 1566 if (*pretregs & mST0) 1567 note87(e,0,0); 1568 //printf("orth87(-e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1569 } 1570 1571 /***************************** 1572 * Load e into ST01. 1573 */ 1574 1575 @trusted 1576 private void loadComplex(ref CodeBuilder cdb,elem *e) 1577 { 1578 regm_t retregs; 1579 1580 int sz = tysize(e.Ety); 1581 switch (tybasic(e.Ety)) 1582 { 1583 case TYfloat: 1584 case TYdouble: 1585 case TYldouble: 1586 retregs = mST0; 1587 codelem(cdb,e,&retregs,false); 1588 // Convert to complex with a 0 for the imaginary part 1589 push87(cdb); 1590 cdb.gen2(0xD9,0xEE); // FLDZ 1591 break; 1592 1593 case TYifloat: 1594 case TYidouble: 1595 case TYildouble: 1596 // Convert to complex with a 0 for the real part 1597 push87(cdb); 1598 cdb.gen2(0xD9,0xEE); // FLDZ 1599 retregs = mST0; 1600 codelem(cdb,e,&retregs,false); 1601 break; 1602 1603 case TYcfloat: 1604 case TYcdouble: 1605 case TYcldouble: 1606 sz /= 2; 1607 retregs = mST01; 1608 codelem(cdb,e,&retregs,false); 1609 break; 1610 1611 default: 1612 assert(0); 1613 } 1614 note87(e, 0, 1); 1615 note87(e, sz, 0); 1616 } 1617 1618 /************************* 1619 * If op == -1, load expression e into ST0. 1620 * else compute (eleft op e), eleft is in ST0. 1621 * Must follow same logic as cmporder87(); 1622 */ 1623 1624 @trusted 1625 void load87(ref CodeBuilder cdb,elem *e,uint eoffset,regm_t *pretregs,elem *eleft,OPER op) 1626 { 1627 code cs; 1628 regm_t retregs; 1629 reg_t reg; 1630 uint mf1; 1631 ubyte ldop; 1632 int i; 1633 1634 if (NDPP) 1635 printf("+load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused); 1636 1637 assert(!(NOSAHF && op == 3)); 1638 elem_debug(e); 1639 if (ADDFWAIT()) 1640 cs.Iflags = CFwait; 1641 else 1642 cs.Iflags = 0; 1643 cs.Irex = 0; 1644 OPER opr = oprev[op + 1]; 1645 tym_t ty = tybasic(e.Ety); 1646 uint mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble; 1647 bool noted = false; 1648 if ((ty == TYldouble || ty == TYildouble) && 1649 op != -1 && e.Eoper != OPd_ld) 1650 goto Ldefault; 1651 L5: 1652 switch (e.Eoper) 1653 { 1654 case OPcomma: 1655 if (op != -1) 1656 { 1657 note87(eleft,eoffset,0); 1658 noted = true; 1659 } 1660 docommas(cdb,e); 1661 goto L5; 1662 1663 case OPvar: 1664 notreg(e); 1665 goto L2; 1666 1667 case OPind: 1668 L2: 1669 if (op != -1) 1670 { 1671 if (e.Ecount && e.Ecount != e.Ecomsub && 1672 (i = cse_get(e, 0)) >= 0) 1673 { 1674 immutable ubyte[8] b2 = [0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8]; 1675 1676 cdb.genf2(0xD8,b2[op] + i); // Fop ST(i) 1677 } 1678 else 1679 { 1680 getlvalue87(cdb,cs,e,0); 1681 makesure87(cdb,eleft,eoffset,0,0); 1682 cs.Iop = ESC(mf,0); 1683 cs.Irm |= modregrm(0,op,0); 1684 cdb.gen(&cs); 1685 } 1686 } 1687 else 1688 { 1689 push87(cdb); 1690 switch (ty) 1691 { 1692 case TYfloat: 1693 case TYdouble: 1694 case TYifloat: 1695 case TYidouble: 1696 case TYcfloat: 1697 case TYcdouble: 1698 case TYdouble_alias: 1699 loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0); // FLD var 1700 break; 1701 case TYldouble: 1702 case TYildouble: 1703 case TYcldouble: 1704 loadea(cdb,e,&cs,0xDB,5,0,0,0); // FLD var 1705 break; 1706 default: 1707 printf("ty = x%x\n", ty); 1708 assert(0); 1709 } 1710 note87(e,0,0); 1711 } 1712 break; 1713 1714 case OPd_f: 1715 case OPf_d: 1716 case OPd_ld: 1717 mf1 = (tybasic(e.EV.E1.Ety) == TYfloat || tybasic(e.EV.E1.Ety) == TYifloat) 1718 ? MFfloat : MFdouble; 1719 if (op != -1 && global87.stackused && !noted) 1720 note87(eleft,eoffset,0); // don't trash this value 1721 if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind) 1722 { 1723 static if (1) 1724 { 1725 L4: 1726 getlvalue87(cdb,cs,e.EV.E1,0); 1727 cs.Iop = ESC(mf1,0); 1728 if (op != -1) 1729 { 1730 cs.Irm |= modregrm(0,op,0); 1731 makesure87(cdb,eleft,eoffset,0,0); 1732 } 1733 else 1734 { 1735 cs.Iop |= 1; 1736 push87(cdb); 1737 } 1738 cdb.gen(&cs); // FLD / Fop 1739 } 1740 else 1741 { 1742 loadea(cdb,e.EV.E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e.EV.E1 */ 1743 } 1744 1745 // Variable cannot be put into a register anymore 1746 if (e.EV.E1.Eoper == OPvar) 1747 notreg(e.EV.E1); 1748 freenode(e.EV.E1); 1749 } 1750 else 1751 { 1752 retregs = mST0; 1753 codelem(cdb,e.EV.E1,&retregs,false); 1754 if (op != -1) 1755 { 1756 makesure87(cdb,eleft,eoffset,1,0); 1757 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1758 pop87(); 1759 } 1760 } 1761 break; 1762 1763 case OPs64_d: 1764 if (e.EV.E1.Eoper == OPvar || 1765 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0)) 1766 { 1767 getlvalue87(cdb,cs,e.EV.E1,0); 1768 cs.Iop = 0xDF; 1769 push87(cdb); 1770 cs.Irm |= modregrm(0,5,0); 1771 cdb.gen(&cs); // FILD m64 1772 // Variable cannot be put into a register anymore 1773 if (e.EV.E1.Eoper == OPvar) 1774 notreg(e.EV.E1); 1775 freenode(e.EV.E1); 1776 } 1777 else if (I64) 1778 { 1779 retregs = ALLREGS; 1780 codelem(cdb,e.EV.E1,&retregs,false); 1781 reg = findreg(retregs); 1782 cdb.genfltreg(STO,reg,0); // MOV floatreg,reg 1783 code_orrex(cdb.last(), REX_W); 1784 push87(cdb); 1785 cdb.genfltreg(0xDF,5,0); // FILD long long ptr floatreg 1786 } 1787 else 1788 { 1789 retregs = ALLREGS; 1790 codelem(cdb,e.EV.E1,&retregs,false); 1791 reg = findreglsw(retregs); 1792 cdb.genfltreg(STO,reg,0); // MOV floatreg,reglsw 1793 reg = findregmsw(retregs); 1794 cdb.genfltreg(STO,reg,4); // MOV floatreg+4,regmsw 1795 push87(cdb); 1796 cdb.genfltreg(0xDF,5,0); // FILD long long ptr floatreg 1797 } 1798 if (op != -1) 1799 { 1800 makesure87(cdb,eleft,eoffset,1,0); 1801 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1802 pop87(); 1803 } 1804 break; 1805 1806 case OPconst: 1807 ldop = loadconst(e, 0); 1808 if (ldop) 1809 { 1810 push87(cdb); 1811 cdb.genf2(0xD9,ldop); // FLDx 1812 if (op != -1) 1813 { 1814 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1815 pop87(); 1816 } 1817 } 1818 else 1819 { 1820 assert(0); 1821 } 1822 break; 1823 1824 case OPu16_d: 1825 { 1826 /* This opcode should never be generated */ 1827 /* (probably shouldn't be for 16 bit code too) */ 1828 assert(!I32); 1829 1830 if (op != -1 && !noted) 1831 note87(eleft,eoffset,0); // don't trash this value 1832 retregs = ALLREGS & mLSW; 1833 codelem(cdb,e.EV.E1,&retregs,false); 1834 regwithvalue(cdb,ALLREGS & mMSW,0,reg,0); // 0-extend 1835 retregs |= mask(reg); 1836 mf1 = MFlong; 1837 goto L3; 1838 } 1839 1840 case OPs16_d: mf1 = MFword; goto L6; 1841 case OPs32_d: mf1 = MFlong; goto L6; 1842 L6: 1843 if (e.Ecount) 1844 goto Ldefault; 1845 if (op != -1 && !noted) 1846 note87(eleft,eoffset,0); // don't trash this value 1847 if (e.EV.E1.Eoper == OPvar || 1848 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0)) 1849 { 1850 goto L4; 1851 } 1852 else 1853 { 1854 retregs = ALLREGS; 1855 codelem(cdb,e.EV.E1,&retregs,false); 1856 L3: 1857 if (I16 && e.Eoper != OPs16_d) 1858 { 1859 /* MOV floatreg+2,reg */ 1860 reg = findregmsw(retregs); 1861 cdb.genfltreg(STO,reg,REGSIZE); 1862 retregs &= mLSW; 1863 } 1864 reg = findreg(retregs); 1865 cdb.genfltreg(STO,reg,0); // MOV floatreg,reg 1866 if (op != -1) 1867 { 1868 makesure87(cdb,eleft,eoffset,0,0); 1869 cdb.genfltreg(ESC(mf1,0),op,0); // Fop floatreg 1870 } 1871 else 1872 { 1873 /* FLD long ptr floatreg */ 1874 push87(cdb); 1875 cdb.genfltreg(ESC(mf1,1),0,0); 1876 } 1877 } 1878 break; 1879 default: 1880 Ldefault: 1881 retregs = mST0; 1882 codelem(cdb,e,&retregs,2); 1883 1884 if (op != -1) 1885 { 1886 makesure87(cdb,eleft,eoffset,1,(op == 0 || op == 1)); 1887 pop87(); 1888 if (op == 4 || op == 6) // sub or div 1889 { 1890 code *cl = cdb.last(); 1891 if (cl && cl.Iop == 0xD9 && cl.Irm == 0xC9) // FXCH ST(1) 1892 { cl.Iop = NOP; 1893 opr = op; // reverse operands 1894 } 1895 } 1896 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1897 } 1898 break; 1899 } 1900 if (op == 3) // FCOMP 1901 { pop87(); // extra pop was done 1902 cg87_87topsw(cdb); 1903 } 1904 fixresult87(cdb,e,((op == 3) ? mPSW : mST0),pretregs); 1905 if (NDPP) 1906 printf("-load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused); 1907 } 1908 1909 /******************************** 1910 * Determine if a compare is to be done forwards (return 0) 1911 * or backwards (return 1). 1912 * Must follow same logic as load87(). 1913 */ 1914 1915 @trusted 1916 int cmporder87(elem *e) 1917 { 1918 //printf("cmporder87(%p)\n",e); 1919 L1: 1920 switch (e.Eoper) 1921 { 1922 case OPcomma: 1923 e = e.EV.E2; 1924 goto L1; 1925 1926 case OPd_f: 1927 case OPf_d: 1928 case OPd_ld: 1929 if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind) 1930 goto ret0; 1931 else 1932 goto ret1; 1933 1934 case OPconst: 1935 if (loadconst(e, 0) || tybasic(e.Ety) == TYldouble 1936 || tybasic(e.Ety) == TYildouble) 1937 { 1938 //printf("ret 1, loadconst(e) = %d\n", loadconst(e)); 1939 goto ret1; 1940 } 1941 goto ret0; 1942 1943 case OPvar: 1944 case OPind: 1945 if (tybasic(e.Ety) == TYldouble || 1946 tybasic(e.Ety) == TYildouble) 1947 goto ret1; 1948 goto ret0; 1949 1950 case OPu16_d: 1951 case OPs16_d: 1952 case OPs32_d: 1953 goto ret0; 1954 1955 case OPs64_d: 1956 goto ret1; 1957 1958 default: 1959 goto ret1; 1960 } 1961 1962 ret1: 1963 return 1; 1964 1965 ret0: 1966 return 0; 1967 } 1968 1969 /******************************* 1970 * Perform an assignment to a long double/double/float. 1971 */ 1972 1973 @trusted 1974 void eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1975 { 1976 code cs; 1977 opcode_t op1; 1978 uint op2; 1979 1980 //printf("+eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1981 assert(e.Eoper == OPeq); 1982 regm_t retregs = mST0 | (*pretregs & mPSW); 1983 codelem(cdb,e.EV.E2,&retregs,false); 1984 tym_t ty1 = tybasic(e.EV.E1.Ety); 1985 switch (ty1) 1986 { 1987 case TYdouble_alias: 1988 case TYidouble: 1989 case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 1990 1991 case TYifloat: 1992 case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 1993 1994 case TYildouble: 1995 case TYldouble: op1 = 0xDB; op2 = 7; break; 1996 1997 default: 1998 assert(0); 1999 } 2000 if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too 2001 { 2002 if (ty1 == TYldouble || ty1 == TYildouble) 2003 { 2004 push87(cdb); 2005 cdb.genf2(0xD9,0xC0); // FLD ST(0) 2006 pop87(); 2007 } 2008 else 2009 op2 = 2; // FST e.EV.E1 2010 } 2011 else 2012 { // FSTP e.EV.E1 2013 pop87(); 2014 } 2015 2016 static if (0) 2017 { 2018 // Doesn't work if ST(0) gets saved to the stack by getlvalue() 2019 loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0); 2020 } 2021 else 2022 { 2023 cs.Irex = 0; 2024 cs.Iflags = 0; 2025 cs.Iop = op1; 2026 if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too 2027 { // Make sure it's still there 2028 elem *e2 = e.EV.E2; 2029 while (e2.Eoper == OPcomma) 2030 e2 = e2.EV.E2; 2031 note87(e2,0,0); 2032 getlvalue87(cdb, cs, e.EV.E1, 0); 2033 makesure87(cdb,e2,0,0,1); 2034 } 2035 else 2036 { 2037 getlvalue87(cdb, cs, e.EV.E1, 0); 2038 } 2039 cs.Irm |= modregrm(0,op2,0); // OR in reg field 2040 cdb.gen(&cs); 2041 if (tysize(TYldouble) == 12) 2042 { 2043 /* This deals with the fact that 10 byte reals really 2044 * occupy 12 bytes by zeroing the extra 2 bytes. 2045 */ 2046 if (op1 == 0xDB) 2047 { 2048 cs.Iop = 0xC7; // MOV EA+10,0 2049 NEWREG(cs.Irm, 0); 2050 cs.IEV1.Voffset += 10; 2051 cs.IFL2 = FLconst; 2052 cs.IEV2.Vint = 0; 2053 cs.Iflags |= CFopsize; 2054 cdb.gen(&cs); 2055 } 2056 } 2057 else if (tysize(TYldouble) == 16) 2058 { 2059 /* This deals with the fact that 10 byte reals really 2060 * occupy 16 bytes by zeroing the extra 6 bytes. 2061 */ 2062 if (op1 == 0xDB) 2063 { 2064 cs.Irex &= ~REX_W; 2065 cs.Iop = 0xC7; // MOV EA+10,0 2066 NEWREG(cs.Irm, 0); 2067 cs.IEV1.Voffset += 10; 2068 cs.IFL2 = FLconst; 2069 cs.IEV2.Vint = 0; 2070 cs.Iflags |= CFopsize; 2071 cdb.gen(&cs); 2072 2073 cs.IEV1.Voffset += 2; 2074 cs.Iflags &= ~CFopsize; 2075 cdb.gen(&cs); 2076 } 2077 } 2078 } 2079 genfwait(cdb); 2080 freenode(e.EV.E1); 2081 fixresult87(cdb,e,mST0 | mPSW,pretregs); 2082 } 2083 2084 /******************************* 2085 * Perform an assignment to a long double/double/float. 2086 */ 2087 2088 @trusted 2089 void complex_eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2090 { 2091 code cs; 2092 opcode_t op1; 2093 uint op2; 2094 uint sz; 2095 int fxch = 0; 2096 2097 //printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2098 assert(e.Eoper == OPeq); 2099 cs.Iflags = ADDFWAIT() ? CFwait : 0; 2100 cs.Irex = 0; 2101 regm_t retregs = mST01 | (*pretregs & mPSW); 2102 codelem(cdb,e.EV.E2,&retregs,false); 2103 tym_t ty1 = tybasic(e.EV.E1.Ety); 2104 switch (ty1) 2105 { 2106 case TYcdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 2107 case TYcfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 2108 case TYcldouble: op1 = 0xDB; op2 = 7; break; 2109 default: 2110 assert(0); 2111 } 2112 if (*pretregs & (mST01 | mXMM0 | mXMM1)) // if want result on stack too 2113 { 2114 if (ty1 == TYcldouble) 2115 { 2116 push87(cdb); 2117 push87(cdb); 2118 cdb.genf2(0xD9,0xC0 + 1); // FLD ST(1) 2119 cdb.genf2(0xD9,0xC0 + 1); // FLD ST(1) 2120 pop87(); 2121 pop87(); 2122 } 2123 else 2124 { op2 = 2; // FST e.EV.E1 2125 fxch = 1; 2126 } 2127 } 2128 else 2129 { // FSTP e.EV.E1 2130 pop87(); 2131 pop87(); 2132 } 2133 sz = tysize(ty1) / 2; 2134 if (*pretregs & (mST01 | mXMM0 | mXMM1)) 2135 { 2136 cs.Iflags = 0; 2137 cs.Irex = 0; 2138 cs.Iop = op1; 2139 getlvalue87(cdb, cs, e.EV.E1, 0); 2140 cs.IEV1.Voffset += sz; 2141 cs.Irm |= modregrm(0, op2, 0); 2142 makesure87(cdb,e.EV.E2, sz, 0, 0); 2143 cdb.gen(&cs); 2144 genfwait(cdb); 2145 makesure87(cdb,e.EV.E2, 0, 1, 0); 2146 } 2147 else 2148 { 2149 loadea(cdb,e.EV.E1,&cs,op1,op2,sz,0,0); 2150 genfwait(cdb); 2151 } 2152 if (fxch) 2153 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2154 cs.IEV1.Voffset -= sz; 2155 cdb.gen(&cs); 2156 if (fxch) 2157 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2158 if (tysize(TYldouble) == 12) 2159 { 2160 if (op1 == 0xDB) 2161 { 2162 cs.Iop = 0xC7; // MOV EA+10,0 2163 NEWREG(cs.Irm, 0); 2164 cs.IEV1.Voffset += 10; 2165 cs.IFL2 = FLconst; 2166 cs.IEV2.Vint = 0; 2167 cs.Iflags |= CFopsize; 2168 cdb.gen(&cs); 2169 cs.IEV1.Voffset += 12; 2170 cdb.gen(&cs); // MOV EA+22,0 2171 } 2172 } 2173 if (tysize(TYldouble) == 16) 2174 { 2175 if (op1 == 0xDB) 2176 { 2177 cs.Iop = 0xC7; // MOV EA+10,0 2178 NEWREG(cs.Irm, 0); 2179 cs.IEV1.Voffset += 10; 2180 cs.IFL2 = FLconst; 2181 cs.IEV2.Vint = 0; 2182 cs.Iflags |= CFopsize; 2183 cdb.gen(&cs); 2184 2185 cs.IEV1.Voffset += 2; 2186 cs.Iflags &= ~CFopsize; 2187 cdb.gen(&cs); 2188 2189 cs.IEV1.Voffset += 14; 2190 cs.Iflags |= CFopsize; 2191 cdb.gen(&cs); 2192 2193 cs.IEV1.Voffset += 2; 2194 cs.Iflags &= ~CFopsize; 2195 cdb.gen(&cs); 2196 } 2197 } 2198 genfwait(cdb); 2199 freenode(e.EV.E1); 2200 fixresult_complex87(cdb, e,mST01 | mPSW,pretregs); 2201 } 2202 2203 /******************************* 2204 * Perform an assignment while converting to integral type, 2205 * i.e. handle (e1 = (int) e2) 2206 */ 2207 2208 @trusted 2209 private void cnvteq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2210 { 2211 code cs; 2212 opcode_t op1; 2213 uint op2; 2214 2215 assert(e.Eoper == OPeq); 2216 assert(!*pretregs); 2217 regm_t retregs = mST0; 2218 elem_debug(e.EV.E2); 2219 codelem(cdb,e.EV.E2.EV.E1,&retregs,false); 2220 2221 switch (e.EV.E2.Eoper) 2222 { case OPd_s16: 2223 op1 = ESC(MFword,1); 2224 op2 = 3; 2225 break; 2226 case OPd_s32: 2227 case OPd_u16: 2228 op1 = ESC(MFlong,1); 2229 op2 = 3; 2230 break; 2231 case OPd_s64: 2232 op1 = 0xDF; 2233 op2 = 7; 2234 break; 2235 default: 2236 assert(0); 2237 } 2238 freenode(e.EV.E2); 2239 2240 genfwait(cdb); 2241 genSetRoundingMode(cdb, CW.roundto0); // FLDCW roundto0 2242 2243 pop87(); 2244 cs.Iflags = ADDFWAIT() ? CFwait : 0; 2245 if (e.EV.E1.Eoper == OPvar) 2246 notreg(e.EV.E1); // cannot be put in register anymore 2247 loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0); 2248 2249 genfwait(cdb); 2250 genSetRoundingMode(cdb, CW.roundtonearest); // FLDCW roundtonearest 2251 2252 freenode(e.EV.E1); 2253 } 2254 2255 /********************************** 2256 * Perform +=, -=, *= and /= for doubles. 2257 */ 2258 2259 @trusted 2260 public void opass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2261 { 2262 code cs; 2263 uint op; 2264 opcode_t opld; 2265 opcode_t op1; 2266 uint op2; 2267 tym_t ty1 = tybasic(e.EV.E1.Ety); 2268 2269 switch (ty1) 2270 { 2271 case TYdouble_alias: 2272 case TYidouble: 2273 case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 2274 case TYifloat: 2275 case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 2276 case TYildouble: 2277 case TYldouble: op1 = 0xDB; op2 = 7; break; 2278 2279 case TYcfloat: 2280 case TYcdouble: 2281 case TYcldouble: 2282 if (e.Eoper == OPmodass) 2283 opmod_complex87(cdb, e, pretregs); 2284 else 2285 opass_complex87(cdb, e, pretregs); 2286 return; 2287 2288 default: 2289 assert(0); 2290 } 2291 switch (e.Eoper) 2292 { 2293 case OPpostinc: 2294 case OPaddass: op = 0 << 3; opld = 0xC1; break; // FADD 2295 case OPpostdec: 2296 case OPminass: op = 5 << 3; opld = 0xE1; /*0xE9;*/ break; // FSUBR 2297 case OPmulass: op = 1 << 3; opld = 0xC9; break; // FMUL 2298 case OPdivass: op = 7 << 3; opld = 0xF1; break; // FDIVR 2299 case OPmodass: break; 2300 default: assert(0); 2301 } 2302 regm_t retregs = mST0; 2303 codelem(cdb,e.EV.E2,&retregs,false); // evaluate rvalue 2304 note87(e.EV.E2,0,0); 2305 getlvalue87(cdb,cs,e.EV.E1,e.Eoper==OPmodass?mAX:0); 2306 makesure87(cdb,e.EV.E2,0,0,0); 2307 if (config.flags4 & CFG4fdivcall && e.Eoper == OPdivass) 2308 { 2309 push87(cdb); 2310 cs.Iop = op1; 2311 if (ty1 == TYldouble || ty1 == TYildouble) 2312 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2313 cdb.gen(&cs); 2314 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2315 callclib(cdb,e,CLIB.fdiv87,&retregs,0); 2316 pop87(); 2317 } 2318 else if (e.Eoper == OPmodass) 2319 { 2320 /* 2321 * fld tbyte ptr y 2322 * fld tbyte ptr x // ST = x, ST1 = y 2323 * FM1: // We don't use fprem1 because for some inexplicable 2324 * // reason we get -5 when we do _modulo(15, 10) 2325 * fprem // ST = ST % ST1 2326 * fstsw word ptr sw 2327 * fwait 2328 * mov AH,byte ptr sw+1 // get msb of status word in AH 2329 * sahf // transfer to flags 2330 * jp FM1 // continue till ST < ST1 2331 * fstp ST(1) // leave remainder on stack 2332 */ 2333 code *c1; 2334 2335 push87(cdb); 2336 cs.Iop = op1; 2337 if (ty1 == TYldouble || ty1 == TYildouble) 2338 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2339 cdb.gen(&cs); // FLD e.EV.E1 2340 2341 cdb.gen2(0xD9, 0xF8); // FPREM 2342 code *cfm1 = cdb.last(); 2343 genjmpifC2(cdb, cfm1); // JC2 FM1 2344 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 2345 2346 pop87(); 2347 } 2348 else if (ty1 == TYldouble || ty1 == TYildouble) 2349 { 2350 push87(cdb); 2351 cs.Iop = op1; 2352 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2353 cdb.gen(&cs); // FLD e.EV.E1 2354 cdb.genf2(0xDE,opld); // FopP ST(1) 2355 pop87(); 2356 } 2357 else 2358 { 2359 cs.Iop = op1 & ~1; 2360 cs.Irm |= op; 2361 cdb.gen(&cs); // Fop e.EV.E1 2362 } 2363 if (*pretregs & mPSW) 2364 genftst(cdb,e,0); // FTST ST0 2365 // if want result in registers 2366 if (*pretregs & (mST0 | ALLREGS | mBP)) 2367 { 2368 if (ty1 == TYldouble || ty1 == TYildouble) 2369 { 2370 push87(cdb); 2371 cdb.genf2(0xD9,0xC0); // FLD ST(0) 2372 pop87(); 2373 } 2374 else 2375 op2 = 2; // FST e.EV.E1 2376 } 2377 else 2378 { // FSTP 2379 pop87(); 2380 } 2381 cs.Iop = op1; 2382 NEWREG(cs.Irm,op2); // FSTx e.EV.E1 2383 freenode(e.EV.E1); 2384 cdb.gen(&cs); 2385 genfwait(cdb); 2386 fixresult87(cdb,e,mST0 | mPSW,pretregs); 2387 } 2388 2389 /*********************************** 2390 * Perform %= where E1 is complex and E2 is real or imaginary. 2391 */ 2392 2393 @trusted 2394 private void opmod_complex87(ref CodeBuilder cdb, elem *e,regm_t *pretregs) 2395 { 2396 2397 /* fld E2 2398 fld E1.re 2399 FM1: fprem 2400 fstsw word ptr sw 2401 fwait 2402 mov AH, byte ptr sw+1 2403 jp FM1 2404 fxch ST(1) 2405 fld E1.im 2406 FM2: fprem 2407 fstsw word ptr sw 2408 fwait 2409 mov AH, byte ptr sw+1 2410 jp FM2 2411 fstp ST(1) 2412 */ 2413 2414 code cs; 2415 2416 tym_t ty1 = tybasic(e.EV.E1.Ety); 2417 uint sz2 = _tysize[ty1] / 2; 2418 2419 regm_t retregs = mST0; 2420 codelem(cdb,e.EV.E2,&retregs,false); // FLD E2 2421 note87(e.EV.E2,0,0); 2422 getlvalue87(cdb,cs,e.EV.E1,0); 2423 makesure87(cdb,e.EV.E2,0,0,0); 2424 2425 push87(cdb); 2426 switch (ty1) 2427 { 2428 case TYcdouble: cs.Iop = ESC(MFdouble,1); break; 2429 case TYcfloat: cs.Iop = ESC(MFfloat,1); break; 2430 case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break; 2431 default: 2432 assert(0); 2433 } 2434 cdb.gen(&cs); // FLD E1.re 2435 2436 cdb.gen2(0xD9, 0xF8); // FPREM 2437 code *cfm1 = cdb.last(); 2438 genjmpifC2(cdb, cfm1); // JC2 FM1 2439 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 2440 2441 push87(cdb); 2442 cs.IEV1.Voffset += sz2; 2443 cdb.gen(&cs); // FLD E1.im 2444 2445 cdb.gen2(0xD9, 0xF8); // FPREM 2446 code *cfm2 = cdb.last(); 2447 genjmpifC2(cdb, cfm2); // JC2 FM2 2448 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 2449 2450 pop87(); 2451 2452 if (*pretregs & (mST01 | mPSW)) 2453 { 2454 cs.Irm |= modregrm(0, 2, 0); 2455 cdb.gen(&cs); // FST mreal.im 2456 cs.IEV1.Voffset -= sz2; 2457 cdb.gen(&cs); // FST mreal.re 2458 retregs = mST01; 2459 } 2460 else 2461 { 2462 cs.Irm |= modregrm(0, 3, 0); 2463 cdb.gen(&cs); // FSTP mreal.im 2464 cs.IEV1.Voffset -= sz2; 2465 cdb.gen(&cs); // FSTP mreal.re 2466 pop87(); 2467 pop87(); 2468 retregs = 0; 2469 } 2470 freenode(e.EV.E1); 2471 genfwait(cdb); 2472 fixresult_complex87(cdb,e,retregs,pretregs); 2473 } 2474 2475 /********************************** 2476 * Perform +=, -=, *= and /= for the lvalue being complex. 2477 */ 2478 2479 @trusted 2480 private void opass_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2481 { 2482 regm_t retregs; 2483 regm_t idxregs; 2484 code cs; 2485 uint op; 2486 opcode_t op2; 2487 2488 tym_t ty1 = tybasic(e.EV.E1.Ety); 2489 uint sz2 = _tysize[ty1] / 2; 2490 switch (e.Eoper) 2491 { 2492 case OPpostinc: 2493 case OPaddass: op = 0 << 3; // FADD 2494 op2 = 0xC0; // FADDP ST(i),ST 2495 break; 2496 2497 case OPpostdec: 2498 case OPminass: op = 5 << 3; // FSUBR 2499 op2 = 0xE0; // FSUBRP ST(i),ST 2500 break; 2501 2502 case OPmulass: op = 1 << 3; // FMUL 2503 op2 = 0xC8; // FMULP ST(i),ST 2504 break; 2505 2506 case OPdivass: op = 7 << 3; // FDIVR 2507 op2 = 0xF0; // FDIVRP ST(i),ST 2508 break; 2509 2510 default: assert(0); 2511 } 2512 2513 if (!tycomplex(e.EV.E2.Ety) && 2514 (e.Eoper == OPmulass || e.Eoper == OPdivass)) 2515 { 2516 retregs = mST0; 2517 codelem(cdb,e.EV.E2, &retregs, false); 2518 note87(e.EV.E2, 0, 0); 2519 getlvalue87(cdb,cs, e.EV.E1, 0); 2520 makesure87(cdb,e.EV.E2,0,0,0); 2521 push87(cdb); 2522 cdb.genf2(0xD9,0xC0); // FLD ST(0) 2523 goto L1; 2524 } 2525 else 2526 { 2527 loadComplex(cdb,e.EV.E2); 2528 getlvalue87(cdb,cs,e.EV.E1,0); 2529 makesure87(cdb,e.EV.E2,sz2,0,0); 2530 makesure87(cdb,e.EV.E2,0,1,0); 2531 } 2532 2533 switch (e.Eoper) 2534 { 2535 case OPpostinc: 2536 case OPaddass: 2537 case OPpostdec: 2538 case OPminass: 2539 L1: 2540 if (ty1 == TYcldouble) 2541 { 2542 push87(cdb); 2543 push87(cdb); 2544 cs.Iop = 0xDB; 2545 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2546 cdb.gen(&cs); // FLD e.EV.E1.re 2547 cs.IEV1.Voffset += sz2; 2548 cdb.gen(&cs); // FLD e.EV.E1.im 2549 cdb.genf2(0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2550 cdb.genf2(0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2551 pop87(); 2552 pop87(); 2553 if (tyimaginary(e.EV.E2.Ety)) 2554 { 2555 if (e.Eoper == OPmulass) 2556 { 2557 cdb.genf2(0xD9, 0xE0); // FCHS 2558 cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1) 2559 } 2560 else if (e.Eoper == OPdivass) 2561 { 2562 cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1) 2563 cdb.genf2(0xD9, 0xE0); // FCHS 2564 } 2565 } 2566 L2: 2567 if (*pretregs & (mST01 | mPSW)) 2568 { 2569 push87(cdb); 2570 push87(cdb); 2571 cdb.genf2(0xD9,0xC1); // FLD ST(1) 2572 cdb.genf2(0xD9,0xC1); // FLD ST(1) 2573 retregs = mST01; 2574 } 2575 else 2576 retregs = 0; 2577 cs.Iop = 0xDB; 2578 cs.Irm |= modregrm(0,7,0); 2579 cdb.gen(&cs); // FSTP e.EV.E1.im 2580 cs.IEV1.Voffset -= sz2; 2581 cdb.gen(&cs); // FSTP e.EV.E1.re 2582 pop87(); 2583 pop87(); 2584 2585 } 2586 else 2587 { 2588 ubyte rmop = cast(ubyte)(cs.Irm | op); 2589 ubyte rmfst = cs.Irm | modregrm(0,2,0); 2590 ubyte rmfstp = cs.Irm | modregrm(0,3,0); 2591 ubyte iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2592 opcode_t iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC; 2593 2594 cs.Iop = iop; 2595 cs.Irm = rmop; 2596 cs.IEV1.Voffset += sz2; 2597 cdb.gen(&cs); // FSUBR mreal.im 2598 if (tyimaginary(e.EV.E2.Ety) && (e.Eoper == OPmulass || e.Eoper == OPdivass)) 2599 { 2600 if (e.Eoper == OPmulass) 2601 cdb.genf2(0xD9, 0xE0); // FCHS 2602 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2603 cs.IEV1.Voffset -= sz2; 2604 cdb.gen(&cs); // FMUL mreal.re 2605 if (e.Eoper == OPdivass) 2606 cdb.genf2(0xD9, 0xE0); // FCHS 2607 if (*pretregs & (mST01 | mPSW)) 2608 { 2609 cs.Iop = iopfst; 2610 cs.Irm = rmfst; 2611 cs.IEV1.Voffset += sz2; 2612 cdb.gen(&cs); // FST mreal.im 2613 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2614 cs.IEV1.Voffset -= sz2; 2615 cdb.gen(&cs); // FST mreal.re 2616 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2617 retregs = mST01; 2618 } 2619 else 2620 { 2621 cs.Iop = iopfst; 2622 cs.Irm = rmfstp; 2623 cs.IEV1.Voffset += sz2; 2624 cdb.gen(&cs); // FSTP mreal.im 2625 pop87(); 2626 cs.IEV1.Voffset -= sz2; 2627 cdb.gen(&cs); // FSTP mreal.re 2628 pop87(); 2629 retregs = 0; 2630 } 2631 goto L3; 2632 } 2633 2634 if (*pretregs & (mST01 | mPSW)) 2635 { 2636 cs.Iop = iopfst; 2637 cs.Irm = rmfst; 2638 cdb.gen(&cs); // FST mreal.im 2639 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2640 cs.Iop = iop; 2641 cs.Irm = rmop; 2642 cs.IEV1.Voffset -= sz2; 2643 cdb.gen(&cs); // FSUBR mreal.re 2644 cs.Iop = iopfst; 2645 cs.Irm = rmfst; 2646 cdb.gen(&cs); // FST mreal.re 2647 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2648 retregs = mST01; 2649 } 2650 else 2651 { 2652 cs.Iop = iopfst; 2653 cs.Irm = rmfstp; 2654 cdb.gen(&cs); // FSTP mreal.im 2655 pop87(); 2656 cs.Iop = iop; 2657 cs.Irm = rmop; 2658 cs.IEV1.Voffset -= sz2; 2659 cdb.gen(&cs); // FSUBR mreal.re 2660 cs.Iop = iopfst; 2661 cs.Irm = rmfstp; 2662 cdb.gen(&cs); // FSTP mreal.re 2663 pop87(); 2664 retregs = 0; 2665 } 2666 } 2667 L3: 2668 freenode(e.EV.E1); 2669 genfwait(cdb); 2670 fixresult_complex87(cdb,e,retregs,pretregs); 2671 return; 2672 2673 case OPmulass: 2674 push87(cdb); 2675 push87(cdb); 2676 if (ty1 == TYcldouble) 2677 { 2678 cs.Iop = 0xDB; 2679 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2680 cdb.gen(&cs); // FLD e.EV.E1.re 2681 cs.IEV1.Voffset += sz2; 2682 cdb.gen(&cs); // FLD e.EV.E1.im 2683 retregs = mST01; 2684 callclib(cdb, e, CLIB.cmul, &retregs, 0); 2685 goto L2; 2686 } 2687 else 2688 { 2689 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2690 cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2691 cdb.gen(&cs); // FLD e.EV.E1.re 2692 cs.IEV1.Voffset += sz2; 2693 cdb.gen(&cs); // FLD e.EV.E1.im 2694 retregs = mST01; 2695 callclib(cdb, e, CLIB.cmul, &retregs, 0); 2696 if (*pretregs & (mST01 | mPSW)) 2697 { 2698 cs.Irm |= modregrm(0, 2, 0); 2699 cdb.gen(&cs); // FST mreal.im 2700 cs.IEV1.Voffset -= sz2; 2701 cdb.gen(&cs); // FST mreal.re 2702 retregs = mST01; 2703 } 2704 else 2705 { 2706 cs.Irm |= modregrm(0, 3, 0); 2707 cdb.gen(&cs); // FSTP mreal.im 2708 cs.IEV1.Voffset -= sz2; 2709 cdb.gen(&cs); // FSTP mreal.re 2710 pop87(); 2711 pop87(); 2712 retregs = 0; 2713 } 2714 goto L3; 2715 } 2716 2717 case OPdivass: 2718 push87(cdb); 2719 push87(cdb); 2720 idxregs = idxregm(&cs); // mask of index regs used 2721 if (ty1 == TYcldouble) 2722 { 2723 cs.Iop = 0xDB; 2724 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2725 cdb.gen(&cs); // FLD e.EV.E1.re 2726 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2727 cs.IEV1.Voffset += sz2; 2728 cdb.gen(&cs); // FLD e.EV.E1.im 2729 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2730 retregs = mST01; 2731 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs); 2732 goto L2; 2733 } 2734 else 2735 { 2736 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2737 cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2738 cdb.gen(&cs); // FLD e.EV.E1.re 2739 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2740 cs.IEV1.Voffset += sz2; 2741 cdb.gen(&cs); // FLD e.EV.E1.im 2742 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2743 retregs = mST01; 2744 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs); 2745 if (*pretregs & (mST01 | mPSW)) 2746 { 2747 cs.Irm |= modregrm(0, 2, 0); 2748 cdb.gen(&cs); // FST mreal.im 2749 cs.IEV1.Voffset -= sz2; 2750 cdb.gen(&cs); // FST mreal.re 2751 retregs = mST01; 2752 } 2753 else 2754 { 2755 cs.Irm |= modregrm(0, 3, 0); 2756 cdb.gen(&cs); // FSTP mreal.im 2757 cs.IEV1.Voffset -= sz2; 2758 cdb.gen(&cs); // FSTP mreal.re 2759 pop87(); 2760 pop87(); 2761 retregs = 0; 2762 } 2763 goto L3; 2764 } 2765 2766 default: 2767 assert(0); 2768 } 2769 } 2770 2771 /************************** 2772 * OPnegass 2773 */ 2774 2775 @trusted 2776 void cdnegass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2777 { 2778 regm_t retregs; 2779 uint op; 2780 2781 //printf("cdnegass87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2782 elem *e1 = e.EV.E1; 2783 tym_t tyml = tybasic(e1.Ety); // type of lvalue 2784 int sz = _tysize[tyml]; 2785 2786 code cs; 2787 getlvalue87(cdb,cs,e1,0); 2788 2789 /* If the EA is really an XMM register, modEA() will fail. 2790 * So disallow putting e1 into a register. 2791 * A better way would be to negate the XMM register in place. 2792 */ 2793 if (e1.Eoper == OPvar) 2794 e1.EV.Vsym.Sflags &= ~GTregcand; 2795 2796 modEA(cdb,&cs); 2797 cs.Irm |= modregrm(0,6,0); 2798 cs.Iop = 0x80; 2799 if (tysize(TYldouble) > 10) 2800 { 2801 if (tyml == TYldouble || tyml == TYildouble) 2802 cs.IEV1.Voffset += 10 - 1; 2803 else if (tyml == TYcldouble) 2804 cs.IEV1.Voffset += tysize(TYldouble) + 10 - 1; 2805 else 2806 cs.IEV1.Voffset += sz - 1; 2807 } 2808 else 2809 cs.IEV1.Voffset += sz - 1; 2810 cs.IFL2 = FLconst; 2811 cs.IEV2.Vuns = 0x80; 2812 cdb.gen(&cs); // XOR 7[EA],0x80 2813 if (tycomplex(tyml)) 2814 { 2815 cs.IEV1.Voffset -= sz / 2; 2816 cdb.gen(&cs); // XOR 7[EA],0x80 2817 } 2818 2819 if (*pretregs) 2820 { 2821 switch (tyml) 2822 { 2823 case TYifloat: 2824 case TYfloat: cs.Iop = 0xD9; op = 0; break; 2825 case TYidouble: 2826 case TYdouble: 2827 case TYdouble_alias: cs.Iop = 0xDD; op = 0; break; 2828 case TYildouble: 2829 case TYldouble: cs.Iop = 0xDB; op = 5; break; 2830 default: 2831 assert(0); 2832 } 2833 NEWREG(cs.Irm,op); 2834 cs.IEV1.Voffset -= sz - 1; 2835 push87(cdb); 2836 cdb.gen(&cs); // FLD EA 2837 retregs = mST0; 2838 } 2839 else 2840 retregs = 0; 2841 2842 freenode(e1); 2843 fixresult87(cdb,e,retregs,pretregs); 2844 } 2845 2846 /************************ 2847 * Take care of OPpostinc and OPpostdec. 2848 */ 2849 2850 @trusted 2851 void post87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2852 { 2853 uint op; 2854 opcode_t op1; 2855 reg_t reg; 2856 2857 //printf("post87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2858 code cs; 2859 assert(*pretregs); 2860 getlvalue87(cdb,cs,e.EV.E1,0); 2861 tym_t ty1 = tybasic(e.EV.E1.Ety); 2862 switch (ty1) 2863 { 2864 case TYdouble_alias: 2865 case TYidouble: 2866 case TYdouble: 2867 case TYcdouble: op1 = ESC(MFdouble,1); reg = 0; break; 2868 case TYifloat: 2869 case TYfloat: 2870 case TYcfloat: op1 = ESC(MFfloat,1); reg = 0; break; 2871 case TYildouble: 2872 case TYldouble: 2873 case TYcldouble: op1 = 0xDB; reg = 5; break; 2874 default: 2875 assert(0); 2876 } 2877 NEWREG(cs.Irm, reg); 2878 if (reg == 5) 2879 reg = 7; 2880 else 2881 reg = 3; 2882 cs.Iop = op1; 2883 push87(cdb); 2884 cdb.gen(&cs); // FLD e.EV.E1 2885 if (tycomplex(ty1)) 2886 { 2887 uint sz = _tysize[ty1] / 2; 2888 2889 push87(cdb); 2890 cs.IEV1.Voffset += sz; 2891 cdb.gen(&cs); // FLD e.EV.E1 2892 regm_t retregs = mST0; // note kludge to only load real part 2893 codelem(cdb,e.EV.E2,&retregs,false); // load rvalue 2894 cdb.genf2(0xD8, // FADD/FSUBR ST,ST2 2895 (e.Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2); 2896 NEWREG(cs.Irm,reg); 2897 pop87(); 2898 cs.IEV1.Voffset -= sz; 2899 cdb.gen(&cs); // FSTP e.EV.E1 2900 genfwait(cdb); 2901 freenode(e.EV.E1); 2902 fixresult_complex87(cdb, e, mST01, pretregs); 2903 return; 2904 } 2905 2906 if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) 2907 { // Want the result in a register 2908 push87(cdb); 2909 cdb.genf2(0xD9,0xC0); // FLD ST0 2910 } 2911 if (*pretregs & mPSW) // if result in flags 2912 genftst(cdb,e,0); // FTST ST0 2913 regm_t retregs = mST0; 2914 codelem(cdb,e.EV.E2,&retregs,false); // load rvalue 2915 pop87(); 2916 op = (e.Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1); 2917 cdb.genf2(0xDE,op); // FADDP/FSUBRP ST1 2918 NEWREG(cs.Irm,reg); 2919 pop87(); 2920 cdb.gen(&cs); // FSTP e.EV.E1 2921 genfwait(cdb); 2922 freenode(e.EV.E1); 2923 fixresult87(cdb,e,mPSW | mST0,pretregs); 2924 } 2925 2926 /************************ 2927 * Do the following opcodes: 2928 * OPd_u64 2929 * OPld_u64 2930 */ 2931 void cdd_u64(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2932 { 2933 assert(I32 || I64); 2934 assert(*pretregs); 2935 if (I32) 2936 cdd_u64_I32(cdb, e, pretregs); 2937 else 2938 cdd_u64_I64(cdb, e, pretregs); 2939 } 2940 2941 @trusted 2942 private void cdd_u64_I32(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2943 { 2944 /* Generate: 2945 mov EDX,0x8000_0000 2946 mov floatreg+0,0 2947 mov floatreg+4,EDX 2948 mov floatreg+8,0x0FBF403e // (roundTo0<<16) | adjust 2949 fld real ptr floatreg // adjust (= 1/real.epsilon) 2950 fcomp 2951 fstsw AX 2952 fstcw floatreg+12 2953 fldcw floatreg+10 // roundTo0 2954 test AH,1 2955 jz L1 // jae L1 2956 2957 fld real ptr floatreg // adjust 2958 fsubp ST(1), ST 2959 fistp floatreg 2960 mov EAX,floatreg 2961 add EDX,floatreg+4 2962 fldcw floatreg+12 2963 jmp L2 2964 2965 L1: 2966 fistp floatreg 2967 mov EAX,floatreg 2968 mov EDX,floatreg+4 2969 fldcw floatreg+12 2970 L2: 2971 */ 2972 regm_t retregs = mST0; 2973 codelem(cdb,e.EV.E1, &retregs, false); 2974 tym_t tym = e.Ety; 2975 retregs = *pretregs; 2976 if (!retregs) 2977 retregs = ALLREGS; 2978 reg_t reg, reg2; 2979 allocreg(cdb,&retregs,®,tym); 2980 reg = findreglsw(retregs); 2981 reg2 = findregmsw(retregs); 2982 movregconst(cdb,reg2,0x80000000,0); 2983 getregs(cdb,mask(reg2) | mAX); 2984 2985 cdb.genfltreg(0xC7,0,0); 2986 code *cf1 = cdb.last(); 2987 cf1.IFL2 = FLconst; 2988 cf1.IEV2.Vint = 0; // MOV floatreg+0,0 2989 cdb.genfltreg(STO,reg2,4); // MOV floatreg+4,EDX 2990 cdb.genfltreg(0xC7,0,8); 2991 code *cf3 = cdb.last(); 2992 cf3.IFL2 = FLconst; 2993 cf3.IEV2.Vint = 0xFBF403E; // MOV floatreg+8,(roundTo0<<16)|adjust 2994 2995 push87(cdb); 2996 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 2997 cdb.gen2(0xD8,0xD9); // FCOMP 2998 pop87(); 2999 cdb.gen2(0xDF,0xE0); // FSTSW AX 3000 cdb.genfltreg(0xD9,7,12); // FSTCW floatreg+12 3001 cdb.genfltreg(0xD9,5,10); // FLDCW floatreg+10 3002 cdb.genc2(0xF6,modregrm(3,0,4),1); // TEST AH,1 3003 code *cnop1 = gennop(null); 3004 genjmp(cdb,JE,FLcode,cast(block *)cnop1); // JZ L1 3005 3006 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 3007 cdb.genf2(0xDE,0xE8+1); // FSUBP ST(1),ST 3008 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3009 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3010 cdb.genfltreg(0x03,reg2,4); // ADD reg,floatreg+4 3011 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3012 code *cnop2 = gennop(null); 3013 genjmp(cdb,JMP,FLcode,cast(block *)cnop2); // JMP L2 3014 3015 cdb.append(cnop1); 3016 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3017 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3018 cdb.genfltreg(LOD,reg2,4); // MOV reg,floatreg+4 3019 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3020 cdb.append(cnop2); 3021 3022 pop87(); 3023 fixresult(cdb,e,retregs,pretregs); 3024 } 3025 3026 @trusted 3027 private void cdd_u64_I64(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3028 { 3029 /* Generate: 3030 mov EDX,0x8000_0000 3031 mov floatreg+0,0 3032 mov floatreg+4,EDX 3033 mov floatreg+8,0x0FBF403e // (roundTo0<<16) | adjust 3034 fld real ptr floatreg // adjust 3035 fcomp 3036 fstsw AX 3037 fstcw floatreg+12 3038 fldcw floatreg+10 // roundTo0 3039 test AH,1 3040 jz L1 // jae L1 3041 3042 fld real ptr floatreg // adjust 3043 fsubp ST(1), ST 3044 fistp floatreg 3045 mov RAX,floatreg 3046 shl RDX,32 3047 add RAX,RDX 3048 fldcw floatreg+12 3049 jmp L2 3050 3051 L1: 3052 fistp floatreg 3053 mov RAX,floatreg 3054 fldcw floatreg+12 3055 L2: 3056 */ 3057 regm_t retregs = mST0; 3058 codelem(cdb,e.EV.E1, &retregs, false); 3059 tym_t tym = e.Ety; 3060 retregs = *pretregs; 3061 if (!retregs) 3062 retregs = ALLREGS; 3063 reg_t reg; 3064 allocreg(cdb,&retregs,®,tym); 3065 regm_t regm2 = ALLREGS & ~retregs & ~mAX; 3066 reg_t reg2; 3067 allocreg(cdb,®m2,®2,tym); 3068 movregconst(cdb,reg2,0x80000000,0); 3069 getregs(cdb,mask(reg2) | mAX); 3070 3071 cdb.genfltreg(0xC7,0,0); 3072 code *cf1 = cdb.last(); 3073 cf1.IFL2 = FLconst; 3074 cf1.IEV2.Vint = 0; // MOV floatreg+0,0 3075 cdb.genfltreg(STO,reg2,4); // MOV floatreg+4,EDX 3076 cdb.genfltreg(0xC7,0,8); 3077 code *cf3 = cdb.last(); 3078 cf3.IFL2 = FLconst; 3079 cf3.IEV2.Vint = 0xFBF403E; // MOV floatreg+8,(roundTo0<<16)|adjust 3080 3081 push87(cdb); 3082 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 3083 cdb.gen2(0xD8,0xD9); // FCOMP 3084 pop87(); 3085 cdb.gen2(0xDF,0xE0); // FSTSW AX 3086 cdb.genfltreg(0xD9,7,12); // FSTCW floatreg+12 3087 cdb.genfltreg(0xD9,5,10); // FLDCW floatreg+10 3088 cdb.genc2(0xF6,modregrm(3,0,4),1); // TEST AH,1 3089 code *cnop1 = gennop(null); 3090 genjmp(cdb,JE,FLcode,cast(block *)cnop1); // JZ L1 3091 3092 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 3093 cdb.genf2(0xDE,0xE8+1); // FSUBP ST(1),ST 3094 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3095 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3096 code_orrex(cdb.last(), REX_W); 3097 cdb.genc2(0xC1,(REX_W << 16) | modregrmx(3,4,reg2),32); // SHL reg2,32 3098 cdb.gen2(0x03,(REX_W << 16) | modregxrmx(3,reg,reg2)); // ADD reg,reg2 3099 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3100 code *cnop2 = gennop(null); 3101 genjmp(cdb,JMP,FLcode,cast(block *)cnop2); // JMP L2 3102 3103 cdb.append(cnop1); 3104 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3105 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3106 code_orrex(cdb.last(), REX_W); 3107 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3108 cdb.append(cnop2); 3109 3110 pop87(); 3111 fixresult(cdb,e,retregs,pretregs); 3112 } 3113 3114 /************************ 3115 * Do the following opcodes: 3116 * OPd_u32 3117 */ 3118 @trusted 3119 void cdd_u32(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3120 { 3121 assert(I32 || I64); 3122 3123 /* Generate: 3124 mov floatreg+8,0x0FBF0000 // (roundTo0<<16) 3125 fstcw floatreg+12 3126 fldcw floatreg+10 // roundTo0 3127 fistp floatreg 3128 fldcw floatreg+12 3129 mov EAX,floatreg 3130 */ 3131 regm_t retregs = mST0; 3132 codelem(cdb,e.EV.E1, &retregs, false); 3133 tym_t tym = e.Ety; 3134 retregs = *pretregs & ALLREGS; 3135 if (!retregs) 3136 retregs = ALLREGS; 3137 reg_t reg; 3138 allocreg(cdb,&retregs,®,tym); 3139 3140 cdb.genfltreg(0xC7,0,8); 3141 code *cf3 = cdb.last(); 3142 cf3.IFL2 = FLconst; 3143 cf3.IEV2.Vint = 0x0FBF0000; // MOV floatreg+8,(roundTo0<<16) 3144 3145 cdb.genfltreg(0xD9,7,12); // FSTCW floatreg+12 3146 cdb.genfltreg(0xD9,5,10); // FLDCW floatreg+10 3147 3148 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3149 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3150 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3151 3152 pop87(); 3153 fixresult(cdb,e,retregs,pretregs); 3154 } 3155 3156 /************************ 3157 * Do the following opcodes: 3158 * OPd_s16 3159 * OPd_s32 3160 * OPd_u16 3161 * OPd_s64 3162 */ 3163 3164 @trusted 3165 void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3166 { 3167 regm_t retregs; 3168 uint mf,rf; 3169 reg_t reg; 3170 int clib; 3171 3172 //printf("cnvt87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3173 assert(*pretregs); 3174 tym_t tym = e.Ety; 3175 int sz = tysize(tym); 3176 int szoff = sz; 3177 3178 switch (e.Eoper) 3179 { 3180 case OPd_s16: 3181 clib = CLIB.dblint87; 3182 mf = ESC(MFword,1); 3183 rf = 3; 3184 break; 3185 3186 case OPd_u16: 3187 szoff = 4; 3188 goto case OPd_s32; 3189 3190 case OPd_s32: 3191 clib = CLIB.dbllng87; 3192 mf = ESC(MFlong,1); 3193 rf = 3; 3194 break; 3195 3196 case OPd_s64: 3197 clib = CLIB.dblllng; 3198 mf = 0xDF; 3199 rf = 7; 3200 break; 3201 3202 default: 3203 assert(0); 3204 } 3205 3206 if (I16) // C may change the default control word 3207 { 3208 if (clib == CLIB.dblllng) 3209 { retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 3210 codelem(cdb,e.EV.E1,&retregs,false); 3211 callclib(cdb,e,clib,pretregs,0); 3212 } 3213 else 3214 { retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 3215 codelem(cdb,e.EV.E1,&retregs,false); 3216 callclib(cdb,e,clib,pretregs,0); 3217 pop87(); 3218 } 3219 } 3220 else if (1) 3221 { // Generate: 3222 // sub ESP,12 3223 // fstcw 8[ESP] 3224 // fldcw roundto0 3225 // fistp long64 ptr [ESP] 3226 // fldcw 8[ESP] 3227 // pop lsw 3228 // pop msw 3229 // add ESP,4 3230 3231 uint szpush = szoff + 2; 3232 if (config.flags3 & CFG3pic) 3233 szpush += 2; 3234 szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1); 3235 3236 retregs = mST0; 3237 codelem(cdb,e.EV.E1,&retregs,false); 3238 3239 if (szpush == REGSIZE) 3240 cdb.gen1(0x50 + AX); // PUSH EAX 3241 else 3242 cod3_stackadj(cdb, szpush); 3243 genfwait(cdb); 3244 cdb.genc1(0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP] 3245 3246 genfwait(cdb); 3247 3248 if (config.flags3 & CFG3pic) 3249 { 3250 cdb.genc(0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW.roundto0); // MOV szoff+2[ESP], CW.roundto0 3251 code_orflag(cdb.last(), CFopsize); 3252 cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP] 3253 } 3254 else 3255 genSetRoundingMode(cdb, CW.roundto0); // FLDCW roundto0 3256 3257 pop87(); 3258 3259 genfwait(cdb); 3260 cdb.gen2sib(mf,modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP] 3261 3262 retregs = *pretregs & (ALLREGS | mBP); 3263 if (!retregs) 3264 retregs = ALLREGS; 3265 allocreg(cdb,&retregs,®,tym); 3266 3267 genfwait(cdb); // FWAIT 3268 cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP] 3269 3270 if (szoff > REGSIZE) 3271 { szpush -= REGSIZE; 3272 genpop(cdb,findreglsw(retregs)); // POP lsw 3273 } 3274 szpush -= REGSIZE; 3275 genpop(cdb,reg); // POP reg 3276 3277 if (szpush) 3278 cod3_stackadj(cdb, -szpush); 3279 fixresult(cdb,e,retregs,pretregs); 3280 } 3281 else 3282 { 3283 // This is incorrect. For -inf and nan, the 8087 returns the largest 3284 // negative int (0x80000....). For -inf, 0x7FFFF... should be returned, 3285 // and for nan, 0 should be returned. 3286 retregs = mST0; 3287 codelem(cdb,e.EV.E1,&retregs,false); 3288 3289 genfwait(cdb); 3290 genSetRoundingMode(cdb, CW.roundto0); // FLDCW roundto0 3291 3292 pop87(); 3293 cdb.genfltreg(mf,rf,0); // FISTP floatreg 3294 retregs = *pretregs & (ALLREGS | mBP); 3295 if (!retregs) 3296 retregs = ALLREGS; 3297 allocreg(cdb,&retregs,®,tym); 3298 3299 genfwait(cdb); 3300 3301 if (sz > REGSIZE) 3302 { 3303 cdb.genfltreg(LOD,reg,REGSIZE); // MOV reg,floatreg + REGSIZE 3304 // MOV lsreg,floatreg 3305 cdb.genfltreg(LOD,findreglsw(retregs),0); 3306 } 3307 else 3308 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3309 genSetRoundingMode(cdb, CW.roundtonearest); // FLDCW roundtonearest 3310 fixresult(cdb,e,retregs,pretregs); 3311 } 3312 } 3313 3314 /************************ 3315 * Do OPrndtol. 3316 */ 3317 3318 @trusted 3319 void cdrndtol(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3320 { 3321 if (*pretregs == 0) 3322 { 3323 codelem(cdb,e.EV.E1,pretregs,false); 3324 return; 3325 } 3326 regm_t retregs = mST0; 3327 codelem(cdb,e.EV.E1,&retregs,false); 3328 3329 ubyte op1,op2; 3330 tym_t tym = e.Ety; 3331 uint sz = tysize(tym); 3332 switch (sz) 3333 { case 2: 3334 op1 = 0xDF; 3335 op2 = 3; 3336 break; 3337 case 4: 3338 op1 = 0xDB; 3339 op2 = 3; 3340 break; 3341 case 8: 3342 op1 = 0xDF; 3343 op2 = 7; 3344 break; 3345 default: 3346 assert(0); 3347 } 3348 3349 pop87(); 3350 cdb.genfltreg(op1,op2,0); // FISTP floatreg 3351 retregs = *pretregs & (ALLREGS | mBP); 3352 if (!retregs) 3353 retregs = ALLREGS; 3354 reg_t reg; 3355 allocreg(cdb,&retregs,®,tym); 3356 genfwait(cdb); // FWAIT 3357 if (tysize(tym) > REGSIZE) 3358 { 3359 cdb.genfltreg(LOD,reg,REGSIZE); // MOV reg,floatreg + REGSIZE 3360 // MOV lsreg,floatreg 3361 cdb.genfltreg(LOD,findreglsw(retregs),0); 3362 } 3363 else 3364 { 3365 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3366 if (tysize(tym) == 8 && I64) 3367 code_orrex(cdb.last(), REX_W); 3368 } 3369 fixresult(cdb,e,retregs,pretregs); 3370 } 3371 3372 /************************* 3373 * Do OPscale, OPyl2x, OPyl2xp1. 3374 */ 3375 3376 @trusted 3377 void cdscale(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3378 { 3379 assert(*pretregs != 0); 3380 3381 regm_t retregs = mST0; 3382 codelem(cdb,e.EV.E1,&retregs,false); 3383 note87(e.EV.E1,0,0); 3384 codelem(cdb,e.EV.E2,&retregs,false); 3385 makesure87(cdb,e.EV.E1,0,1,0); // now have x,y on stack; need y,x 3386 switch (e.Eoper) 3387 { 3388 case OPscale: 3389 cdb.genf2(0xD9,0xFD); // FSCALE 3390 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 3391 break; 3392 3393 case OPyl2x: 3394 cdb.genf2(0xD9,0xF1); // FYL2X 3395 break; 3396 3397 case OPyl2xp1: 3398 cdb.genf2(0xD9,0xF9); // FYL2XP1 3399 break; 3400 3401 default: 3402 assert(0); 3403 } 3404 pop87(); 3405 fixresult87(cdb,e,mST0,pretregs); 3406 } 3407 3408 3409 /********************************** 3410 * Unary -, absolute value, square root, sine, cosine 3411 */ 3412 3413 @trusted 3414 void neg87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3415 { 3416 //printf("neg87()\n"); 3417 3418 assert(*pretregs); 3419 opcode_t op; 3420 switch (e.Eoper) 3421 { case OPneg: op = 0xE0; break; 3422 case OPabs: op = 0xE1; break; 3423 case OPsqrt: op = 0xFA; break; 3424 case OPsin: op = 0xFE; break; 3425 case OPcos: op = 0xFF; break; 3426 case OPrint: op = 0xFC; break; // FRNDINT 3427 default: 3428 assert(0); 3429 } 3430 regm_t retregs = mST0; 3431 codelem(cdb,e.EV.E1,&retregs,false); 3432 cdb.genf2(0xD9,op); // FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT 3433 fixresult87(cdb,e,mST0,pretregs); 3434 } 3435 3436 /********************************** 3437 * Unary - for complex operands 3438 */ 3439 3440 @trusted 3441 void neg_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3442 { 3443 assert(e.Eoper == OPneg); 3444 regm_t retregs = mST01; 3445 codelem(cdb,e.EV.E1,&retregs,false); 3446 cdb.genf2(0xD9,0xE0); // FCHS 3447 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 3448 cdb.genf2(0xD9,0xE0); // FCHS 3449 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 3450 fixresult_complex87(cdb,e,mST01,pretregs); 3451 } 3452 3453 /********************************* 3454 */ 3455 3456 @trusted 3457 void cdind87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3458 { 3459 //printf("cdind87(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 3460 code cs; 3461 3462 getlvalue87(cdb,cs,e,0); // get addressing mode 3463 if (*pretregs) 3464 { 3465 switch (tybasic(e.Ety)) 3466 { case TYfloat: 3467 case TYifloat: 3468 cs.Iop = 0xD9; 3469 break; 3470 3471 case TYidouble: 3472 case TYdouble: 3473 case TYdouble_alias: 3474 cs.Iop = 0xDD; 3475 break; 3476 3477 case TYildouble: 3478 case TYldouble: 3479 cs.Iop = 0xDB; 3480 cs.Irm |= modregrm(0,5,0); 3481 break; 3482 3483 default: 3484 assert(0); 3485 } 3486 push87(cdb); 3487 cdb.gen(&cs); // FLD EA 3488 fixresult87(cdb,e,mST0,pretregs); 3489 } 3490 } 3491 3492 /************************************ 3493 * Reset statics for another .obj file. 3494 */ 3495 3496 @trusted 3497 void cg87_reset() 3498 { 3499 memset(&oldd,0,oldd.sizeof); 3500 } 3501 3502 3503 /***************************************** 3504 * Set rounding mode. 3505 * Params: 3506 * cdb = code sink 3507 * cw = control word spedifying rounding mode 3508 */ 3509 3510 @trusted 3511 private void genSetRoundingMode(ref CodeBuilder cdb, CW cw) 3512 { 3513 if (config.flags3 & CFG3pic) 3514 { 3515 cdb.genfltreg(0xC7, 0, 0); // MOV floatreg, cw 3516 code *c1 = cdb.last(); 3517 c1.IFL2 = FLconst; 3518 c1.IEV2.Vuns = cw; 3519 3520 cdb.genfltreg(0xD9, 5, 0); // FLDCW floatreg 3521 } 3522 else 3523 { 3524 if (!oldd.round) // if not initialized 3525 { 3526 oldd.round = 1; 3527 3528 auto cwi = CW.roundto0; // round to 0 3529 oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2); 3530 cwi = CW.roundtonearest; // round to nearest 3531 oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2); 3532 } 3533 Symbol *rnddir = (cw == CW.roundto0) ? oldd.roundto0 : oldd.roundtonearest; 3534 code cs; 3535 cs.Iop = 0xD9; 3536 cs.Iflags = CFoff; 3537 cs.Irex = 0; 3538 cs.IEV1.Vsym = rnddir; 3539 cs.IFL1 = rnddir.Sfl; 3540 cs.IEV1.Voffset = 0; 3541 cs.Irm = modregrm(0,5,BPRM); 3542 cdb.gen(&cs); 3543 } 3544 } 3545 3546 /************************* Complex Numbers *********************/ 3547 3548 /*************************** 3549 * Set the PSW based on the state of ST01. 3550 * Input: 3551 * pop if stack should be popped after test 3552 */ 3553 3554 @trusted 3555 private void genctst(ref CodeBuilder cdb,elem *e,int pop) 3556 { 3557 assert(pop == 0 || pop == 1); 3558 3559 // Generate: 3560 // if (NOSAHF && pop) 3561 // FLDZ 3562 // FUCOMIP 3563 // JNE L1 3564 // JP L1 // if NAN 3565 // FLDZ 3566 // FUCOMIP ST(2) 3567 // L1: 3568 // if (pop) 3569 // FPOP 3570 // FPOP 3571 // if (pop) 3572 // FLDZ 3573 // FUCOMPP 3574 // FSTSW AX 3575 // SAHF 3576 // FLDZ 3577 // FUCOMPP 3578 // JNE L1 3579 // JP L1 // if NAN 3580 // FSTSW AX 3581 // SAHF 3582 // L1: 3583 // else 3584 // FLDZ 3585 // FUCOM 3586 // FSTSW AX 3587 // SAHF 3588 // FUCOMP ST(2) 3589 // JNE L1 3590 // JP L1 // if NAN 3591 // FSTSW AX 3592 // SAHF 3593 // L1: 3594 // FUCOMP doesn't raise exceptions on QNANs, unlike FTST 3595 3596 CodeBuilder cdbnop; 3597 cdbnop.ctor(); 3598 cdbnop.gennop(); 3599 code *cnop = cdbnop.peek(); 3600 push87(cdb); 3601 cdb.gen2(0xD9,0xEE); // FLDZ 3602 if (NOSAHF) 3603 { 3604 cdb.gen2(0xDF,0xE9); // FUCOMIP 3605 pop87(); 3606 genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE L1 3607 genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP L1 3608 cdb.gen2(0xD9,0xEE); // FLDZ 3609 cdb.gen2(0xDF,0xEA); // FUCOMIP ST(2) 3610 if (pop) 3611 { 3612 cdbnop.genf2(0xDD,modregrm(3,3,0)); // FPOP 3613 cdbnop.genf2(0xDD,modregrm(3,3,0)); // FPOP 3614 pop87(); 3615 pop87(); 3616 } 3617 } 3618 else if (pop) 3619 { 3620 cdb.gen2(0xDA,0xE9); // FUCOMPP 3621 pop87(); 3622 pop87(); 3623 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3624 cdb.gen2(0xD9,0xEE); // FLDZ 3625 cdb.gen2(0xDA,0xE9); // FUCOMPP 3626 pop87(); 3627 genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE L1 3628 genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP L1 3629 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3630 } 3631 else 3632 { 3633 cdb.gen2(0xDD,0xE1); // FUCOM 3634 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3635 cdb.gen2(0xDD,0xEA); // FUCOMP ST(2) 3636 pop87(); 3637 genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE L1 3638 genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP L1 3639 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3640 } 3641 cdb.append(cdbnop); 3642 } 3643 3644 /****************************** 3645 * Given the result of an expression is in retregs, 3646 * generate necessary code to return result in *pretregs. 3647 */ 3648 3649 @trusted 3650 void fixresult_complex87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs, bool isReturnValue = false) 3651 { 3652 static if (0) 3653 { 3654 printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n", 3655 e,regm_str(retregs),regm_str(*pretregs)); 3656 } 3657 3658 assert(!*pretregs || retregs); 3659 tym_t tym = tybasic(e.Ety); 3660 uint sz = _tysize[tym]; 3661 3662 if (isReturnValue) 3663 { 3664 // In loadComplex and complex_eq87, complex numbers have the real part 3665 // pushed to the FPU stack first (ST1), then the imaginary part (ST0). 3666 // However, the Intel 64 bit ABI scheme requires that types classified 3667 // as complex x87 instead have the real part returned in ST0, and the 3668 // imaginary part in ST1. 3669 if (retregs == mST01 && I64 && (config.exe & EX_posix)) 3670 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 3671 } 3672 3673 if (*pretregs == 0 && retregs == mST01) 3674 { 3675 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 3676 pop87(); 3677 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 3678 pop87(); 3679 } 3680 else if (tym == TYllong) 3681 { 3682 // passing cfloat through register for I64 3683 assert(retregs & mST01, "this float expression is not implemented"); 3684 pop87(); 3685 cdb.genfltreg(ESC(MFfloat,1),BX,4); // FSTP floatreg 3686 pop87(); 3687 cdb.genfltreg(ESC(MFfloat,1),BX,0); // FSTP floatreg+4 3688 genfwait(cdb); 3689 const reg = findreg(*pretregs); 3690 getregs(cdb,reg); 3691 cdb.genfltreg(LOD, reg, 0); // MOV ECX,floatreg 3692 code_orrex(cdb.last(), REX_W); // extend to RCX 3693 } 3694 else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01) 3695 { 3696 if (*pretregs & mPSW && !(retregs & mPSW)) 3697 genctst(cdb,e,0); // FTST 3698 pop87(); 3699 cdb.genfltreg(ESC(MFfloat,1),3,0); // FSTP floatreg 3700 genfwait(cdb); 3701 getregs(cdb,mDX|mAX); 3702 cdb.genfltreg(LOD, DX, 0); // MOV EDX,floatreg 3703 3704 pop87(); 3705 cdb.genfltreg(ESC(MFfloat,1),3,0); // FSTP floatreg 3706 genfwait(cdb); 3707 cdb.genfltreg(LOD, AX, 0); // MOV EAX,floatreg 3708 } 3709 else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01) 3710 { 3711 push87(cdb); 3712 cdb.genfltreg(STO, AX, 0); // MOV floatreg, EAX 3713 cdb.genfltreg(0xD9, 0, 0); // FLD float ptr floatreg 3714 3715 push87(cdb); 3716 cdb.genfltreg(STO, DX, 0); // MOV floatreg, EDX 3717 cdb.genfltreg(0xD9, 0, 0); // FLD float ptr floatreg 3718 3719 if (*pretregs & mPSW) 3720 genctst(cdb,e,0); // FTST 3721 } 3722 else if ((tym == TYcfloat || tym == TYcdouble) && 3723 *pretregs & (mXMM0|mXMM1) && retregs & mST01) 3724 { 3725 tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble; 3726 uint xop = xmmload(tyf); 3727 uint mf = tyf == TYfloat ? MFfloat : MFdouble; 3728 if (*pretregs & mPSW && !(retregs & mPSW)) 3729 genctst(cdb,e,0); // FTST 3730 pop87(); 3731 cdb.genfltreg(ESC(mf,1),3,0); // FSTP floatreg 3732 genfwait(cdb); 3733 getregs(cdb,mXMM0|mXMM1); 3734 cdb.genxmmreg(xop,XMM1,0,tyf); 3735 3736 pop87(); 3737 cdb.genfltreg(ESC(mf,1),3,0); // FSTP floatreg 3738 genfwait(cdb); 3739 cdb.genxmmreg(xop, XMM0, 0, tyf); // MOVD XMM0,floatreg 3740 } 3741 else if ((tym == TYcfloat || tym == TYcdouble) && 3742 retregs & (mXMM0|mXMM1) && *pretregs & mST01) 3743 { 3744 tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble; 3745 uint xop = xmmstore(tyf); 3746 uint fop = tym == TYcfloat ? 0xD9 : 0xDD; 3747 push87(cdb); 3748 cdb.genfltreg(xop, XMM0-XMM0, 0); // STOS(SD) floatreg, XMM0 3749 checkSetVex(cdb.last(),tyf); 3750 cdb.genfltreg(fop, 0, 0); // FLD double ptr floatreg 3751 3752 push87(cdb); 3753 cdb.genxmmreg(xop, XMM1, 0, tyf); // MOV floatreg, XMM1 3754 cdb.genfltreg(fop, 0, 0); // FLD double ptr floatreg 3755 3756 if (*pretregs & mPSW) 3757 genctst(cdb,e,0); // FTST 3758 } 3759 else 3760 { if (*pretregs & mPSW) 3761 { if (!(retregs & mPSW)) 3762 { assert(retregs & mST01); 3763 genctst(cdb,e,!(*pretregs & mST01)); // FTST 3764 } 3765 } 3766 assert(!(*pretregs & mST01) || (retregs & mST01)); 3767 } 3768 if (*pretregs & mST01) 3769 { note87(e,0,1); 3770 note87(e,sz/2,0); 3771 } 3772 } 3773 3774 /***************************************** 3775 * Operators OPc_r and OPc_i 3776 */ 3777 3778 @trusted 3779 void cdconvt87(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3780 { 3781 regm_t retregs = mST01; 3782 codelem(cdb,e.EV.E1, &retregs, false); 3783 switch (e.Eoper) 3784 { 3785 case OPc_r: 3786 cdb.genf2(0xDD,0xD8 + 0); // FPOP 3787 pop87(); 3788 break; 3789 3790 case OPc_i: 3791 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 3792 pop87(); 3793 break; 3794 3795 default: 3796 assert(0); 3797 } 3798 retregs = mST0; 3799 fixresult87(cdb, e, retregs, pretregs); 3800 } 3801 3802 /************************************** 3803 * Load complex operand into ST01 or flags or both. 3804 */ 3805 3806 @trusted 3807 void cload87(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3808 { 3809 //printf("e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3810 //elem_print(e); 3811 assert(!I16); 3812 debug 3813 if (I32) 3814 { 3815 assert(config.inline8087); 3816 elem_debug(e); 3817 assert(*pretregs & (mST01 | mPSW)); 3818 assert(!(*pretregs & ~(mST01 | mPSW))); 3819 } 3820 3821 tym_t ty = tybasic(e.Ety); 3822 code cs = void; 3823 uint mf; 3824 uint sz; 3825 ubyte ldop; 3826 regm_t retregs; 3827 int i; 3828 3829 //printf("cload87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3830 sz = _tysize[ty] / 2; 3831 memset(&cs, 0, cs.sizeof); 3832 if (ADDFWAIT()) 3833 cs.Iflags = CFwait; 3834 switch (ty) 3835 { 3836 case TYcfloat: mf = MFfloat; break; 3837 case TYcdouble: mf = MFdouble; break; 3838 case TYcldouble: break; 3839 default: assert(0); 3840 } 3841 switch (e.Eoper) 3842 { 3843 case OPvar: 3844 notreg(e); // never enregister this variable 3845 goto case OPind; 3846 3847 case OPind: 3848 push87(cdb); 3849 push87(cdb); 3850 switch (ty) 3851 { 3852 case TYcfloat: 3853 case TYcdouble: 3854 loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0); // FLD var 3855 cs.IEV1.Voffset += sz; 3856 cdb.gen(&cs); 3857 break; 3858 3859 case TYcldouble: 3860 loadea(cdb,e,&cs,0xDB,5,0,0,0); // FLD var 3861 cs.IEV1.Voffset += sz; 3862 cdb.gen(&cs); 3863 break; 3864 3865 default: 3866 assert(0); 3867 } 3868 retregs = mST01; 3869 break; 3870 3871 case OPd_ld: 3872 case OPld_d: 3873 case OPf_d: 3874 case OPd_f: 3875 cload87(cdb,e.EV.E1, pretregs); 3876 freenode(e.EV.E1); 3877 return; 3878 3879 case OPconst: 3880 push87(cdb); 3881 push87(cdb); 3882 for (i = 0; i < 2; i++) 3883 { 3884 ldop = loadconst(e, i); 3885 if (ldop) 3886 { 3887 cdb.genf2(0xD9,ldop); // FLDx 3888 } 3889 else 3890 { 3891 assert(0); 3892 } 3893 } 3894 retregs = mST01; 3895 break; 3896 3897 default: 3898 debug elem_print(e); 3899 assert(0); 3900 } 3901 fixresult_complex87(cdb, e, retregs, pretregs); 3902 } 3903 3904 /********************************************** 3905 * Load OPpair or OPrpair into mST01 3906 */ 3907 @trusted 3908 void loadPair87(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3909 { 3910 assert(e.Eoper == OPpair || e.Eoper == OPrpair); 3911 regm_t retregs = mST0; 3912 codelem(cdb,e.EV.E1, &retregs, false); 3913 note87(e.EV.E1, 0, 0); 3914 codelem(cdb,e.EV.E2, &retregs, false); 3915 makesure87(cdb,e.EV.E1, 0, 1, 0); 3916 if (e.Eoper == OPrpair) 3917 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 3918 retregs = mST01; 3919 fixresult_complex87(cdb, e, retregs, pretregs); 3920 } 3921 3922 /********************************************** 3923 * Round 80 bit precision to 32 or 64 bits. 3924 * OPtoprec 3925 */ 3926 @trusted 3927 void cdtoprec(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3928 { 3929 //printf("cdtoprec: *pretregs = %s\n", regm_str(*pretregs)); 3930 if (!*pretregs) 3931 { 3932 codelem(cdb,e.EV.E1,pretregs,false); 3933 return; 3934 } 3935 3936 assert(config.inline8087); 3937 regm_t retregs = mST0; 3938 codelem(cdb,e.EV.E1, &retregs, false); 3939 if (*pretregs & mST0) 3940 { 3941 const tym = tybasic(e.Ety); 3942 const sz = _tysize[tym]; 3943 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 3944 cdb.genfltreg(ESC(mf,1),3,0); // FSTP float/double ptr fltreg 3945 genfwait(cdb); 3946 cdb.genfltreg(ESC(mf,1),0,0); // FLD float/double ptr fltreg 3947 } 3948 fixresult87(cdb, e, retregs, pretregs); 3949 }