1 /**
2  * x87 FPU code generation
3  *
4  * Compiler implementation of the
5  * $(LINK2 https://www.dlang.org, D programming language).
6  *
7  * Copyright:   Copyright (C) 1987-1995 by Symantec
8  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
9  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
10  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
11  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cg87.d, backend/cg87.d)
12  */
13 
14 module dmd.backend.cg87;
15 
16 version (SCPP)
17     version = COMPILE;
18 version (MARS)
19     version = COMPILE;
20 
21 version (COMPILE)
22 {
23 
24 import core.stdc.stdio;
25 import core.stdc.stdlib;
26 import core.stdc.string;
27 
28 import dmd.backend.barray;
29 import dmd.backend.cc;
30 import dmd.backend.cdef;
31 import dmd.backend.code;
32 import dmd.backend.code_x86;
33 import dmd.backend.codebuilder;
34 import dmd.backend.mem;
35 import dmd.backend.el;
36 import dmd.backend.global;
37 import dmd.backend.oper;
38 import dmd.backend.ty;
39 import dmd.backend.evalu8 : el_toldoubled;
40 
41 extern (C++):
42 
43 nothrow:
44 @safe:
45 
46 // NOTE: this could be a TLS global which would allow this variable to be used in
47 //       a multi-threaded version of the backend
48 __gshared Globals87 global87;
49 
50 private extern (D) uint mask(uint m) { return 1 << m; }
51 
52 
53 // Constants that the 8087 supports directly
54 // BUG: rewrite for 80 bit long doubles
55 enum PI            = 3.14159265358979323846;
56 enum LOG2          = 0.30102999566398119521;
57 enum LN2           = 0.6931471805599453094172321;
58 enum LOG2T         = 3.32192809488736234787;
59 enum LOG2E         = 1.4426950408889634074;   // 1/LN2
60 
61 enum FWAIT = 0x9B;            // FWAIT opcode
62 
63 /* Mark variable referenced by e as not a register candidate            */
64 @trusted
65 uint notreg(elem* e) { return e.EV.Vsym.Sflags &= ~GTregcand; }
66 
67 /* Generate the appropriate ESC instruction     */
68 ubyte ESC(uint MF, uint b) { return cast(ubyte)(0xD8 + (MF << 1) + b); }
69 enum
70 {   // Values for MF
71     MFfloat         = 0,
72     MFlong          = 1,
73     MFdouble        = 2,
74     MFword          = 3
75 }
76 
77 /*********************************
78  */
79 
80 struct Dconst
81 {
82     int round;
83     Symbol *roundto0;
84     Symbol *roundtonearest;
85 }
86 
87 private __gshared Dconst oldd;
88 
89 enum NDPP = 0;       // print out debugging info
90 
91 @trusted
92 bool NOSAHF() { return I64 || config.fpxmmregs; }     // can't use SAHF instruction
93 
94 enum CW_roundto0 = 0xFBF;
95 enum CW_roundtonearest = 0x3BF;
96 
97 
98 /**********************************
99  * When we need to temporarilly save 8087 registers, we record information
100  * about the save into an array of NDP structs.
101  */
102 
103 @trusted
104 private void getlvalue87(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk)
105 {
106     // the x87 instructions cannot read XMM registers
107     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
108         e.EV.Vsym.Sflags &= ~GTregcand;
109 
110     getlvalue(cdb, pcs, e, keepmsk);
111     if (ADDFWAIT())
112         pcs.Iflags |= CFwait;
113     if (I32)
114         pcs.Iflags &= ~CFopsize;
115     else if (I64)
116         pcs.Irex &= ~REX_W;
117 }
118 
119 /****************************************
120  * Store/load to ndp save location i
121  */
122 
123 @trusted
124 private void ndp_fstp(ref CodeBuilder cdb, int i, tym_t ty)
125 {
126     switch (tybasic(ty))
127     {
128         case TYfloat:
129         case TYifloat:
130         case TYcfloat:
131             cdb.genc1(0xD9,modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP]
132             break;
133 
134         case TYdouble:
135         case TYdouble_alias:
136         case TYidouble:
137         case TYcdouble:
138             cdb.genc1(0xDD,modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP]
139             break;
140 
141         case TYldouble:
142         case TYildouble:
143         case TYcldouble:
144             cdb.genc1(0xDB,modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP]
145             break;
146 
147         default:
148             assert(0);
149     }
150 }
151 
152 @trusted
153 private void ndp_fld(ref CodeBuilder cdb, int i, tym_t ty)
154 {
155     switch (tybasic(ty))
156     {
157         case TYfloat:
158         case TYifloat:
159         case TYcfloat:
160             cdb.genc1(0xD9,modregrm(2,0,BPRM),FLndp,i);
161             break;
162 
163         case TYdouble:
164         case TYdouble_alias:
165         case TYidouble:
166         case TYcdouble:
167             cdb.genc1(0xDD,modregrm(2,0,BPRM),FLndp,i);
168             break;
169 
170         case TYldouble:
171         case TYildouble:
172         case TYcldouble:
173             cdb.genc1(0xDB,modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP]
174             break;
175 
176         default:
177             assert(0);
178     }
179 }
180 
181 /**************************
182  * Return index of empty slot in global87.save[].
183  */
184 
185 @trusted
186 private int getemptyslot()
187 {
188     int i;
189 
190     for (i = 0; i < global87.save.length; ++i)
191         if (global87.save[i].e == null)
192             return i;
193 
194     global87.save.push(NDP());
195     return i;
196 }
197 
198 /*********************************
199  * Pop 8087 stack.
200  */
201 
202 void pop87() { pop87(__LINE__, __FILE__); }
203 
204 @trusted
205 void pop87(int line, const(char)* file)
206 {
207     int i;
208 
209     if (NDPP)
210         printf("pop87(%s(%d): stackused=%d)\n", file, line, global87.stackused);
211 
212     --global87.stackused;
213     assert(global87.stackused >= 0);
214     for (i = 0; i < global87.stack.length - 1; i++)
215         global87.stack[i] = global87.stack[i + 1];
216     // end of stack is nothing
217     global87.stack[$ - 1] = NDP();
218 }
219 
220 
221 /*******************************
222  * Push 8087 stack. Generate and return any code
223  * necessary to preserve anything that might run off the end of the stack.
224  */
225 
226 void push87(ref CodeBuilder cdb) { push87(cdb,__LINE__,__FILE__); }
227 
228 @trusted
229 void push87(ref CodeBuilder cdb, int line, const(char)* file)
230 {
231     // if we would lose the top register off of the stack
232     if (global87.stack[7].e != null)
233     {
234         int i = getemptyslot();
235         global87.save[i] = global87.stack[7];
236         cdb.genf2(0xD9,0xF6);                         // FDECSTP
237         genfwait(cdb);
238         ndp_fstp(cdb, i, global87.stack[7].e.Ety);       // FSTP i[BP]
239         assert(global87.stackused == 8);
240         if (NDPP) printf("push87() : overflow\n");
241     }
242     else
243     {
244         if (NDPP) printf("push87(%s(%d): %d)\n", file, line, global87.stackused);
245         global87.stackused++;
246         assert(global87.stackused <= 8);
247     }
248     // Shift the stack up
249     for (int i = 7; i > 0; i--)
250         global87.stack[i] = global87.stack[i - 1];
251     global87.stack[0] = NDP();
252 }
253 
254 /*****************************
255  * Note elem e as being in ST(i) as being a value we want to keep.
256  */
257 
258 void note87(elem *e, uint offset, int i)
259 {
260     note87(e, offset, i, __LINE__);
261 }
262 
263 @trusted
264 void note87(elem *e, uint offset, int i, int linnum)
265 {
266     if (NDPP)
267         printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum);
268 
269     static if (0)
270     {
271         if (global87.stack[i].e)
272             printf("global87.stack[%d].e = %p\n",i,global87.stack[i].e);
273     }
274 
275     debug if (i >= global87.stackused)
276     {
277         printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum);
278         elem_print(e);
279     }
280     assert(i < global87.stackused);
281 
282     while (e.Eoper == OPcomma)
283         e = e.EV.E2;
284     global87.stack[i].e = e;
285     global87.stack[i].offset = offset;
286 }
287 
288 /****************************************************
289  * Exchange two entries in 8087 stack.
290  */
291 
292 @trusted
293 void xchg87(int i, int j)
294 {
295     NDP save;
296 
297     save = global87.stack[i];
298     global87.stack[i] = global87.stack[j];
299     global87.stack[j] = save;
300 }
301 
302 /****************************
303  * Make sure that elem e is in register ST(i). Reload it if necessary.
304  * Input:
305  *      i       0..3    8087 register number
306  *      flag    1       don't bother with FXCH
307  */
308 
309 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag)
310 {
311     makesure87(cdb,e,offset,i,flag,__LINE__);
312 }
313 
314 @trusted
315 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag,int linnum)
316 {
317     debug if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum);
318 
319     while (e.Eoper == OPcomma)
320         e = e.EV.E2;
321     assert(e && i < 4);
322 L1:
323     if (global87.stack[i].e != e || global87.stack[i].offset != offset)
324     {
325         debug if (global87.stack[i].e)
326             printf("global87.stack[%d].e = %p, .offset = %d\n",i,global87.stack[i].e,global87.stack[i].offset);
327 
328         assert(global87.stack[i].e == null);
329         int j;
330         for (j = 0; 1; j++)
331         {
332             if (j >= global87.save.length && e.Eoper == OPcomma)
333             {
334                 e = e.EV.E2;              // try right side
335                 goto L1;
336             }
337 
338             debug if (j >= global87.save.length)
339                 printf("e = %p, global87.save.length = %llu\n",e, cast(ulong) global87.save.length);
340 
341             assert(j < global87.save.length);
342             //printf("\tglobal87.save[%d] = %p, .offset = %d\n", j, global87.save[j].e, global87.save[j].offset);
343             if (e == global87.save[j].e && offset == global87.save[j].offset)
344                 break;
345         }
346         push87(cdb);
347         genfwait(cdb);
348         ndp_fld(cdb, j, e.Ety);         // FLD j[BP]
349         if (!(flag & 1))
350         {
351             while (i != 0)
352             {
353                 cdb.genf2(0xD9,0xC8 + i);       // FXCH ST(i)
354                 i--;
355             }
356         }
357         global87.save[j] = NDP();               // back in 8087
358     }
359     //global87.stack[i].e = null;
360 }
361 
362 /****************************
363  * Save in memory any values in the 8087 that we want to keep.
364  */
365 
366 @trusted
367 void save87(ref CodeBuilder cdb)
368 {
369     bool any = false;
370     while (global87.stack[0].e && global87.stackused)
371     {
372         // Save it
373         int i = getemptyslot();
374         if (NDPP) printf("saving %p in temporary global87.save[%d]\n",global87.stack[0].e,i);
375         global87.save[i] = global87.stack[0];
376 
377         genfwait(cdb);
378         ndp_fstp(cdb,i,global87.stack[0].e.Ety); // FSTP i[BP]
379         pop87();
380         any = true;
381     }
382     if (any)                          // if any stores
383         genfwait(cdb);   // wait for last one to finish
384 }
385 
386 /******************************************
387  * Save any noted values that would be destroyed by n pushes
388  */
389 
390 @trusted
391 void save87regs(ref CodeBuilder cdb, uint n)
392 {
393     assert(n <= 7);
394     uint j = 8 - n;
395     if (global87.stackused > j)
396     {
397         for (uint k = 8; k > j; k--)
398         {
399             cdb.genf2(0xD9,0xF6);     // FDECSTP
400             genfwait(cdb);
401             if (k <= global87.stackused)
402             {
403                 int i = getemptyslot();
404                 ndp_fstp(cdb, i, global87.stack[k - 1].e.Ety);   // FSTP i[BP]
405                 global87.save[i] = global87.stack[k - 1];
406                 global87.stack[k - 1] = NDP();
407             }
408         }
409 
410         for (uint k = 8; k > j; k--)
411         {
412             if (k > global87.stackused)
413             {   cdb.genf2(0xD9,0xF7); // FINCSTP
414                 genfwait(cdb);
415             }
416         }
417         global87.stackused = j;
418     }
419 }
420 
421 /*****************************************************
422  * Save/restore ST0 or ST01
423  */
424 
425 @trusted
426 void gensaverestore87(regm_t regm, ref CodeBuilder cdbsave, ref CodeBuilder cdbrestore)
427 {
428     //printf("gensaverestore87(%s)\n", regm_str(regm));
429     assert(regm == mST0 || regm == mST01);
430 
431     int i = getemptyslot();
432     global87.save[i].e = el_calloc();       // this blocks slot [i] for the life of this function
433     ndp_fstp(cdbsave, i, TYldouble);
434 
435     CodeBuilder cdb2a;
436     cdb2a.ctor();
437     ndp_fld(cdb2a, i, TYldouble);
438 
439     if (regm == mST01)
440     {
441         int j = getemptyslot();
442         global87.save[j].e = el_calloc();
443         ndp_fstp(cdbsave, j, TYldouble);
444         ndp_fld(cdbrestore, j, TYldouble);
445     }
446 
447     cdbrestore.append(cdb2a);
448 }
449 
450 /*************************************
451  * Find which, if any, slot on stack holds elem e.
452  */
453 
454 @trusted
455 private int cse_get(elem *e, uint offset)
456 {
457     int i;
458 
459     for (i = 0; 1; i++)
460     {
461         if (i == global87.stackused)
462         {
463             i = -1;
464             //printf("cse not found\n");
465             //elem_print(e);
466             break;
467         }
468         if (global87.stack[i].e == e &&
469             global87.stack[i].offset == offset)
470         {   //printf("cse found %d\n",i);
471             //elem_print(e);
472             break;
473         }
474     }
475     return i;
476 }
477 
478 /*************************************
479  * Reload common subexpression.
480  */
481 
482 void comsub87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
483 {
484     //printf("comsub87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
485     // Look on 8087 stack
486     int i = cse_get(e, 0);
487 
488     if (tycomplex(e.Ety))
489     {
490         uint sz = tysize(e.Ety);
491         int j = cse_get(e, sz / 2);
492         if (i >= 0 && j >= 0)
493         {
494             push87(cdb);
495             push87(cdb);
496             cdb.genf2(0xD9,0xC0 + i);         // FLD ST(i)
497             cdb.genf2(0xD9,0xC0 + j + 1);     // FLD ST(j + 1)
498             fixresult_complex87(cdb,e,mST01,pretregs);
499         }
500         else
501             // Reload
502             loaddata(cdb,e,pretregs);
503     }
504     else
505     {
506         if (i >= 0)
507         {
508             push87(cdb);
509             cdb.genf2(0xD9,0xC0 + i); // FLD ST(i)
510             if (*pretregs & XMMREGS)
511                 fixresult87(cdb,e,mST0,pretregs);
512             else
513                 fixresult(cdb,e,mST0,pretregs);
514         }
515         else
516             // Reload
517             loaddata(cdb,e,pretregs);
518     }
519 }
520 
521 
522 /*******************************
523  * Decide if we need to gen an FWAIT.
524  */
525 
526 public void genfwait(ref CodeBuilder cdb)
527 {
528     if (ADDFWAIT())
529         cdb.gen1(FWAIT);
530 }
531 
532 
533 /***************************
534  * Put the 8087 flags into the CPU flags.
535  */
536 
537 @trusted
538 private void cg87_87topsw(ref CodeBuilder cdb)
539 {
540     /* Note that SAHF is not available on some early I64 processors
541      * and will cause a seg fault
542      */
543     assert(!NOSAHF);
544     getregs(cdb,mAX);
545     if (config.target_cpu >= TARGET_80286)
546         cdb.genf2(0xDF,0xE0);             // FSTSW AX
547     else
548     {
549         cdb.genfltreg(0xD8+5,7,0);        // FSTSW floatreg[BP]
550         genfwait(cdb);          // FWAIT
551         cdb.genfltreg(0x8A,4,1);          // MOV AH,floatreg+1[BP]
552     }
553     cdb.gen1(0x9E);                       // SAHF
554     code_orflag(cdb.last(),CFpsw);
555 }
556 
557 /*****************************************
558  * Jump to ctarget if condition code C2 is set.
559  */
560 
561 @trusted
562 private void genjmpifC2(ref CodeBuilder cdb, code *ctarget)
563 {
564     if (NOSAHF)
565     {
566         getregs(cdb,mAX);
567         cdb.genf2(0xDF,0xE0);                                    // FSTSW AX
568         cdb.genc2(0xF6,modregrm(3,0,4),4);                       // TEST AH,4
569         genjmp(cdb, JNE, FLcode, cast(block *)ctarget); // JNE ctarget
570     }
571     else
572     {
573         cg87_87topsw(cdb);
574         genjmp(cdb, JP, FLcode, cast(block *)ctarget);  // JP ctarget
575     }
576 }
577 
578 /***************************
579  * Set the PSW based on the state of ST0.
580  * Input:
581  *      pop     if stack should be popped after test
582  * Returns:
583  *      start of code appended to c.
584  */
585 
586 @trusted
587 private void genftst(ref CodeBuilder cdb,elem *e,int pop)
588 {
589     if (NOSAHF)
590     {
591         push87(cdb);
592         cdb.gen2(0xD9,0xEE);          // FLDZ
593         cdb.gen2(0xDF,0xE9);          // FUCOMIP ST1
594         pop87();
595         if (pop)
596         {
597             cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
598             pop87();
599         }
600     }
601     else if (config.flags4 & CFG4fastfloat)  // if fast floating point
602     {
603         cdb.genf2(0xD9,0xE4);                // FTST
604         cg87_87topsw(cdb);                   // put 8087 flags in CPU flags
605         if (pop)
606         {
607             cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
608             pop87();
609         }
610     }
611     else if (config.target_cpu >= TARGET_80386)
612     {
613         // FUCOMP doesn't raise exceptions on QNANs, unlike FTST
614         push87(cdb);
615         cdb.gen2(0xD9,0xEE);                 // FLDZ
616         cdb.gen2(pop ? 0xDA : 0xDD,0xE9);    // FUCOMPP / FUCOMP
617         pop87();
618         if (pop)
619             pop87();
620         cg87_87topsw(cdb);                   // put 8087 flags in CPU flags
621     }
622     else
623     {
624         // Call library function which does not raise exceptions
625         regm_t regm = 0;
626 
627         callclib(cdb,e,CLIB.ftest,&regm,0);
628         if (pop)
629         {
630             cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP
631             pop87();
632         }
633     }
634 }
635 
636 /*************************************
637  * Determine if there is a special 8087 instruction to load
638  * constant e.
639  * Input:
640  *      im      0       load real part
641  *              1       load imaginary part
642  * Returns:
643  *      opcode if found
644  *      0 if not
645  */
646 
647 @trusted
648 ubyte loadconst(elem *e, int im)
649 {
650     elem_debug(e);
651     assert(im == 0 || im == 1);
652 
653     immutable float[7] fval =
654         [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2];
655     immutable double[7] dval =
656         [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2];
657 
658     static if (real.sizeof < 10)
659     {
660         import dmd.root.longdouble;
661         immutable targ_ldouble[7] ldval =
662         [ld_zero,ld_one,ld_pi,ld_log2t,ld_log2e,ld_log2,ld_ln2];
663     }
664     else
665     {
666         enum M_PI_L        = 0x1.921fb54442d1846ap+1L;       // 3.14159 fldpi
667         enum M_LOG2T_L     = 0x1.a934f0979a3715fcp+1L;       // 3.32193 fldl2t
668         enum M_LOG2E_L     = 0x1.71547652b82fe178p+0L;       // 1.4427 fldl2e
669         enum M_LOG2_L      = 0x1.34413509f79fef32p-2L;       // 0.30103 fldlg2
670         enum M_LN2_L       = 0x1.62e42fefa39ef358p-1L;       // 0.693147 fldln2
671         immutable targ_ldouble[7] ldval =
672         [0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L];
673     }
674 
675     immutable ubyte[7 + 1] opcode =
676         /* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */
677         [0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0];
678 
679     int i;
680     targ_float f;
681     targ_double d;
682     targ_ldouble ld;
683     int sz;
684     int zero;
685     void *p;
686     immutable ubyte[16] zeros;
687 
688     if (im == 0)
689     {
690         switch (tybasic(e.Ety))
691         {
692             case TYfloat:
693             case TYifloat:
694             case TYcfloat:
695                 f = e.EV.Vfloat;
696                 sz = 4;
697                 p = &f;
698                 break;
699 
700             case TYdouble:
701             case TYdouble_alias:
702             case TYidouble:
703             case TYcdouble:
704                 d = e.EV.Vdouble;
705                 sz = 8;
706                 p = &d;
707                 break;
708 
709             case TYldouble:
710             case TYildouble:
711             case TYcldouble:
712                 ld = e.EV.Vldouble;
713                 sz = 10;
714                 p = &ld;
715                 break;
716 
717             default:
718                 assert(0);
719         }
720     }
721     else
722     {
723         switch (tybasic(e.Ety))
724         {
725             case TYcfloat:
726                 f = e.EV.Vcfloat.im;
727                 sz = 4;
728                 p = &f;
729                 break;
730 
731             case TYcdouble:
732                 d = e.EV.Vcdouble.im;
733                 sz = 8;
734                 p = &d;
735                 break;
736 
737             case TYcldouble:
738                 ld = e.EV.Vcldouble.im;
739                 sz = 10;
740                 p = &ld;
741                 break;
742 
743             default:
744                 assert(0);
745         }
746     }
747 
748     // Note that for this purpose, -0 is not regarded as +0,
749     // since FLDZ loads a +0
750     assert(sz <= zeros.length);
751     zero = (memcmp(p, zeros.ptr, sz) == 0);
752     if (zero && config.target_cpu >= TARGET_PentiumPro)
753         return 0xEE;            // FLDZ is the only one with 1 micro-op
754 
755     // For some reason, these instructions take more clocks
756     if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
757         return 0;
758 
759     if (zero)
760         return 0xEE;
761 
762     for (i = 1; i < fval.length; i++)
763     {
764         switch (sz)
765         {
766             case 4:
767                 if (fval[i] != f)
768                     continue;
769                 break;
770             case 8:
771                 if (dval[i] != d)
772                     continue;
773                 break;
774             case 10:
775                 if (ldval[i] != ld)
776                     continue;
777                 break;
778             default:
779                 assert(0);
780         }
781         break;
782     }
783     return opcode[i];
784 }
785 
786 /******************************
787  * Given the result of an expression is in retregs,
788  * generate necessary code to return result in *pretregs.
789  */
790 
791 @trusted
792 void fixresult87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs, bool isReturnValue = false)
793 {
794     //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs);
795     //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs));
796     assert(!*pretregs || retregs);
797 
798     if ((*pretregs | retregs) & mST01)
799     {
800         fixresult_complex87(cdb, e, retregs, pretregs, isReturnValue);
801         return;
802     }
803 
804     tym_t tym = tybasic(e.Ety);
805     uint sz = _tysize[tym];
806     //printf("tym = x%x, sz = %d\n", tym, sz);
807 
808     /* if retregs needs to be transferred into the 8087 */
809     if (*pretregs & mST0 && retregs & (mBP | ALLREGS))
810     {
811         debug if (sz > DOUBLESIZE)
812         {
813             elem_print(e);
814             printf("retregs = %s\n", regm_str(retregs));
815         }
816         assert(sz <= DOUBLESIZE);
817         if (!I16)
818         {
819 
820             if (*pretregs & mPSW)
821             {   // Set flags
822                 regm_t r = retregs | mPSW;
823                 fixresult(cdb,e,retregs,&r);
824             }
825             push87(cdb);
826             if (sz == REGSIZE || (I64 && sz == 4))
827             {
828                 const reg = findreg(retregs);
829                 cdb.genfltreg(STO,reg,0);           // MOV fltreg,reg
830                 cdb.genfltreg(0xD9,0,0);            // FLD float ptr fltreg
831             }
832             else
833             {
834                 const msreg = findregmsw(retregs);
835                 const lsreg = findreglsw(retregs);
836                 cdb.genfltreg(STO,lsreg,0);         // MOV fltreg,lsreg
837                 cdb.genfltreg(STO,msreg,4);         // MOV fltreg+4,msreg
838                 cdb.genfltreg(0xDD,0,0);            // FLD double ptr fltreg
839             }
840         }
841         else
842         {
843             regm_t regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS;
844             regm |= *pretregs & mPSW;
845             fixresult(cdb,e,retregs,&regm);
846             regm = 0;           // don't worry about result from CLIB.xxx
847             callclib(cdb,e,
848                     ((sz == FLOATSIZE) ? CLIB.fltto87 : CLIB.dblto87),
849                     &regm,0);
850         }
851     }
852     else if (*pretregs & (mBP | ALLREGS) && retregs & mST0)
853     {
854         assert(sz <= DOUBLESIZE);
855         uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
856         if (*pretregs & mPSW && !(retregs & mPSW))
857             genftst(cdb,e,0);
858         // FSTP floatreg
859         pop87();
860         cdb.genfltreg(ESC(mf,1),3,0);
861         genfwait(cdb);
862         reg_t reg;
863         allocreg(cdb,pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble);
864         if (sz == FLOATSIZE)
865         {
866             if (!I16)
867                 cdb.genfltreg(LOD,reg,0);
868             else
869             {
870                 cdb.genfltreg(LOD,reg,REGSIZE);
871                 cdb.genfltreg(LOD,findreglsw(*pretregs),0);
872             }
873         }
874         else
875         {   assert(sz == DOUBLESIZE);
876             if (I16)
877             {
878                 cdb.genfltreg(LOD,AX,6);
879                 cdb.genfltreg(LOD,BX,4);
880                 cdb.genfltreg(LOD,CX,2);
881                 cdb.genfltreg(LOD,DX,0);
882             }
883             else if (I32)
884             {
885                 cdb.genfltreg(LOD,reg,REGSIZE);
886                 cdb.genfltreg(LOD,findreglsw(*pretregs),0);
887             }
888             else // I64
889             {
890                 cdb.genfltreg(LOD,reg,0);
891                 code_orrex(cdb.last(), REX_W);
892             }
893         }
894     }
895     else if (*pretregs == 0 && retregs == mST0)
896     {
897         cdb.genf2(0xDD,modregrm(3,3,0));    // FPOP
898         pop87();
899     }
900     else
901     {
902         if (*pretregs & mPSW)
903         {
904             if (!(retregs & mPSW))
905             {
906                 genftst(cdb,e,!(*pretregs & (mST0 | XMMREGS))); // FTST
907             }
908         }
909         if (*pretregs & mST0 && retregs & XMMREGS)
910         {
911             assert(sz <= DOUBLESIZE);
912             uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
913             // MOVD floatreg,XMM?
914             const reg = findreg(retregs);
915             cdb.genxmmreg(xmmstore(tym),reg,0,tym);
916             push87(cdb);
917             cdb.genfltreg(ESC(mf,1),0,0);                 // FLD float/double ptr fltreg
918         }
919         else if (retregs & mST0 && *pretregs & XMMREGS)
920         {
921             assert(sz <= DOUBLESIZE);
922             uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
923             // FSTP floatreg
924             pop87();
925             cdb.genfltreg(ESC(mf,1),3,0);
926             genfwait(cdb);
927             // MOVD XMM?,floatreg
928             reg_t reg;
929             allocreg(cdb,pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble);
930             cdb.genxmmreg(xmmload(tym),reg,0,tym);
931         }
932         else
933             assert(!(*pretregs & mST0) || (retregs & mST0));
934     }
935     if (*pretregs & mST0)
936         note87(e,0,0);
937 }
938 
939 /********************************
940  * Generate in-line 8087 code for the following operators:
941  *      add
942  *      min
943  *      mul
944  *      div
945  *      cmp
946  */
947 
948 // Reverse the order that the op is done in
949 __gshared const ubyte[9] oprev = [ cast(ubyte)-1,0,1,2,3,5,4,7,6 ];
950 
951 @trusted
952 void orth87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
953 {
954     //printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
955     // we could be evaluating / for side effects only
956     assert(*pretregs != 0);
957 
958     elem *e1 = e.EV.E1;
959     elem *e2 = e.EV.E2;
960     uint sz2 = tysize(e1.Ety);
961     if (tycomplex(e1.Ety))
962         sz2 /= 2;
963 
964     OPER eoper = e.Eoper;
965     if (eoper == OPmul && e2.Eoper == OPconst && el_toldoubled(e.EV.E2) == 2.0L)
966     {
967         // Perform "mul 2.0" as fadd ST(0), ST
968         regm_t retregs = mST0;
969         codelem(cdb,e1,&retregs,false);
970         cdb.genf2(0xDC, 0xC0);                    // fadd ST(0), ST;
971         fixresult87(cdb,e,mST0,pretregs);         // result is in ST(0).
972         freenode(e2);
973         return;
974     }
975 
976     uint op;
977     if (OTrel(eoper))
978         eoper = OPeqeq;
979     bool imaginary;
980     static uint X(OPER op, uint ty1, uint ty2) { return (op << 16) + ty1 * 256 + ty2; }
981     switch (X(eoper, tybasic(e1.Ety), tybasic(e2.Ety)))
982     {
983         case X(OPadd, TYfloat, TYfloat):
984         case X(OPadd, TYdouble, TYdouble):
985         case X(OPadd, TYdouble_alias, TYdouble_alias):
986         case X(OPadd, TYldouble, TYldouble):
987         case X(OPadd, TYldouble, TYdouble):
988         case X(OPadd, TYdouble, TYldouble):
989         case X(OPadd, TYifloat, TYifloat):
990         case X(OPadd, TYidouble, TYidouble):
991         case X(OPadd, TYildouble, TYildouble):
992             op = 0;                             // FADDP
993             break;
994 
995         case X(OPmin, TYfloat, TYfloat):
996         case X(OPmin, TYdouble, TYdouble):
997         case X(OPmin, TYdouble_alias, TYdouble_alias):
998         case X(OPmin, TYldouble, TYldouble):
999         case X(OPmin, TYldouble, TYdouble):
1000         case X(OPmin, TYdouble, TYldouble):
1001         case X(OPmin, TYifloat, TYifloat):
1002         case X(OPmin, TYidouble, TYidouble):
1003         case X(OPmin, TYildouble, TYildouble):
1004             op = 4;                             // FSUBP
1005             break;
1006 
1007         case X(OPmul, TYfloat, TYfloat):
1008         case X(OPmul, TYdouble, TYdouble):
1009         case X(OPmul, TYdouble_alias, TYdouble_alias):
1010         case X(OPmul, TYldouble, TYldouble):
1011         case X(OPmul, TYldouble, TYdouble):
1012         case X(OPmul, TYdouble, TYldouble):
1013         case X(OPmul, TYifloat, TYifloat):
1014         case X(OPmul, TYidouble, TYidouble):
1015         case X(OPmul, TYildouble, TYildouble):
1016         case X(OPmul, TYfloat, TYifloat):
1017         case X(OPmul, TYdouble, TYidouble):
1018         case X(OPmul, TYldouble, TYildouble):
1019         case X(OPmul, TYifloat, TYfloat):
1020         case X(OPmul, TYidouble, TYdouble):
1021         case X(OPmul, TYildouble, TYldouble):
1022             op = 1;                             // FMULP
1023             break;
1024 
1025         case X(OPdiv, TYfloat, TYfloat):
1026         case X(OPdiv, TYdouble, TYdouble):
1027         case X(OPdiv, TYdouble_alias, TYdouble_alias):
1028         case X(OPdiv, TYldouble, TYldouble):
1029         case X(OPdiv, TYldouble, TYdouble):
1030         case X(OPdiv, TYdouble, TYldouble):
1031         case X(OPdiv, TYifloat, TYifloat):
1032         case X(OPdiv, TYidouble, TYidouble):
1033         case X(OPdiv, TYildouble, TYildouble):
1034             op = 6;                             // FDIVP
1035             break;
1036 
1037         case X(OPmod, TYfloat, TYfloat):
1038         case X(OPmod, TYdouble, TYdouble):
1039         case X(OPmod, TYdouble_alias, TYdouble_alias):
1040         case X(OPmod, TYldouble, TYldouble):
1041         case X(OPmod, TYfloat, TYifloat):
1042         case X(OPmod, TYdouble, TYidouble):
1043         case X(OPmod, TYldouble, TYildouble):
1044         case X(OPmod, TYifloat, TYifloat):
1045         case X(OPmod, TYidouble, TYidouble):
1046         case X(OPmod, TYildouble, TYildouble):
1047         case X(OPmod, TYifloat, TYfloat):
1048         case X(OPmod, TYidouble, TYdouble):
1049         case X(OPmod, TYildouble, TYldouble):
1050             op = cast(uint) -1;
1051             break;
1052 
1053         case X(OPeqeq, TYfloat, TYfloat):
1054         case X(OPeqeq, TYdouble, TYdouble):
1055         case X(OPeqeq, TYdouble_alias, TYdouble_alias):
1056         case X(OPeqeq, TYldouble, TYldouble):
1057         case X(OPeqeq, TYifloat, TYifloat):
1058         case X(OPeqeq, TYidouble, TYidouble):
1059         case X(OPeqeq, TYildouble, TYildouble):
1060         {
1061             assert(OTrel(e.Eoper));
1062             assert((*pretregs & mST0) == 0);
1063             regm_t retregs = mST0;
1064             codelem(cdb,e1,&retregs,false);
1065             note87(e1,0,0);
1066             regm_t resregm = mPSW;
1067 
1068             if (rel_exception(e.Eoper) || config.flags4 & CFG4fastfloat)
1069             {
1070                 if (e2.Eoper == OPconst && !boolres(e2))
1071                 {
1072                     if (NOSAHF)
1073                     {
1074                         push87(cdb);
1075                         cdb.gen2(0xD9,0xEE);             // FLDZ
1076                         cdb.gen2(0xDF,0xF1);             // FCOMIP ST1
1077                         pop87();
1078                     }
1079                     else
1080                     {
1081                         cdb.genf2(0xD9,0xE4);            // FTST
1082                         cg87_87topsw(cdb);
1083                     }
1084                     cdb.genf2(0xDD,modregrm(3,3,0));     // FPOP
1085                     pop87();
1086                 }
1087                 else if (NOSAHF)
1088                 {
1089                     note87(e1,0,0);
1090                     load87(cdb,e2,0,&retregs,e1,-1);
1091                     makesure87(cdb,e1,0,1,0);
1092                     resregm = 0;
1093                     //cdb.genf2(0xD9,0xC8 + 1);          // FXCH ST1
1094                     cdb.gen2(0xDF,0xF1);                 // FCOMIP ST1
1095                     pop87();
1096                     cdb.genf2(0xDD,modregrm(3,3,0));     // FPOP
1097                     pop87();
1098                 }
1099                 else
1100                 {
1101                     load87(cdb,e2, 0, pretregs, e1, 3);  // FCOMPP
1102                 }
1103             }
1104             else
1105             {
1106                 if (e2.Eoper == OPconst && !boolres(e2) &&
1107                     config.target_cpu < TARGET_80386)
1108                 {
1109                     regm_t regm = 0;
1110 
1111                     callclib(cdb,e,CLIB.ftest0,&regm,0);
1112                     pop87();
1113                 }
1114                 else
1115                 {
1116                     note87(e1,0,0);
1117                     load87(cdb,e2,0,&retregs,e1,-1);
1118                     makesure87(cdb,e1,0,1,0);
1119                     resregm = 0;
1120                     if (NOSAHF)
1121                     {
1122                         cdb.gen2(0xDF,0xE9);              // FUCOMIP ST1
1123                         pop87();
1124                         cdb.genf2(0xDD,modregrm(3,3,0));  // FPOP
1125                         pop87();
1126                     }
1127                     else if (config.target_cpu >= TARGET_80386)
1128                     {
1129                         cdb.gen2(0xDA,0xE9);      // FUCOMPP
1130                         cg87_87topsw(cdb);
1131                         pop87();
1132                         pop87();
1133                     }
1134                     else
1135                         // Call a function instead so that exceptions
1136                         // are not generated.
1137                         callclib(cdb,e,CLIB.fcompp,&resregm,0);
1138                 }
1139             }
1140 
1141             freenode(e2);
1142             return;
1143         }
1144 
1145         case X(OPadd, TYcfloat, TYcfloat):
1146         case X(OPadd, TYcdouble, TYcdouble):
1147         case X(OPadd, TYcldouble, TYcldouble):
1148         case X(OPadd, TYcfloat, TYfloat):
1149         case X(OPadd, TYcdouble, TYdouble):
1150         case X(OPadd, TYcldouble, TYldouble):
1151         case X(OPadd, TYfloat, TYcfloat):
1152         case X(OPadd, TYdouble, TYcdouble):
1153         case X(OPadd, TYldouble, TYcldouble):
1154             goto Lcomplex;
1155 
1156         case X(OPadd, TYifloat, TYcfloat):
1157         case X(OPadd, TYidouble, TYcdouble):
1158         case X(OPadd, TYildouble, TYcldouble):
1159             goto Lcomplex2;
1160 
1161         case X(OPmin, TYcfloat, TYcfloat):
1162         case X(OPmin, TYcdouble, TYcdouble):
1163         case X(OPmin, TYcldouble, TYcldouble):
1164         case X(OPmin, TYcfloat, TYfloat):
1165         case X(OPmin, TYcdouble, TYdouble):
1166         case X(OPmin, TYcldouble, TYldouble):
1167         case X(OPmin, TYfloat, TYcfloat):
1168         case X(OPmin, TYdouble, TYcdouble):
1169         case X(OPmin, TYldouble, TYcldouble):
1170             goto Lcomplex;
1171 
1172         case X(OPmin, TYifloat, TYcfloat):
1173         case X(OPmin, TYidouble, TYcdouble):
1174         case X(OPmin, TYildouble, TYcldouble):
1175             goto Lcomplex2;
1176 
1177         case X(OPmul, TYcfloat, TYcfloat):
1178         case X(OPmul, TYcdouble, TYcdouble):
1179         case X(OPmul, TYcldouble, TYcldouble):
1180             goto Lcomplex;
1181 
1182         case X(OPdiv, TYcfloat, TYcfloat):
1183         case X(OPdiv, TYcdouble, TYcdouble):
1184         case X(OPdiv, TYcldouble, TYcldouble):
1185         case X(OPdiv, TYfloat, TYcfloat):
1186         case X(OPdiv, TYdouble, TYcdouble):
1187         case X(OPdiv, TYldouble, TYcldouble):
1188         case X(OPdiv, TYifloat, TYcfloat):
1189         case X(OPdiv, TYidouble, TYcdouble):
1190         case X(OPdiv, TYildouble, TYcldouble):
1191             goto Lcomplex;
1192 
1193         case X(OPdiv, TYifloat,   TYfloat):
1194         case X(OPdiv, TYidouble,  TYdouble):
1195         case X(OPdiv, TYildouble, TYldouble):
1196             op = 6;                             // FDIVP
1197             break;
1198 
1199         Lcomplex:
1200         {
1201             loadComplex(cdb,e1);
1202             loadComplex(cdb,e2);
1203             makesure87(cdb, e1, sz2, 2, 0);
1204             makesure87(cdb, e1, 0, 3, 0);
1205             regm_t retregs = mST01;
1206             if (eoper == OPadd)
1207             {
1208                 cdb.genf2(0xDE, 0xC0+2);    // FADDP ST(2),ST
1209                 cdb.genf2(0xDE, 0xC0+2);    // FADDP ST(2),ST
1210                 pop87();
1211                 pop87();
1212             }
1213             else if (eoper == OPmin)
1214             {
1215                 cdb.genf2(0xDE, 0xE8+2);    // FSUBP ST(2),ST
1216                 cdb.genf2(0xDE, 0xE8+2);    // FSUBP ST(2),ST
1217                 pop87();
1218                 pop87();
1219             }
1220             else
1221             {
1222                 int clib = eoper == OPmul ? CLIB.cmul : CLIB.cdiv;
1223                 callclib(cdb, e, clib, &retregs, 0);
1224             }
1225             fixresult_complex87(cdb, e, retregs, pretregs);
1226             return;
1227         }
1228 
1229         Lcomplex2:
1230         {
1231             regm_t retregs = mST0;
1232             codelem(cdb,e1, &retregs, false);
1233             note87(e1, 0, 0);
1234             loadComplex(cdb,e2);
1235             makesure87(cdb, e1, 0, 2, 0);
1236             retregs = mST01;
1237             if (eoper == OPadd)
1238             {
1239                 cdb.genf2(0xDE, 0xC0+2);   // FADDP ST(2),ST
1240             }
1241             else if (eoper == OPmin)
1242             {
1243                 cdb.genf2(0xDE, 0xE8+2);   // FSUBP ST(2),ST
1244                 cdb.genf2(0xD9, 0xE0);     // FCHS
1245             }
1246             else
1247                 assert(0);
1248             pop87();
1249             cdb.genf2(0xD9, 0xC8 + 1);     // FXCH ST(1)
1250             fixresult_complex87(cdb, e, retregs, pretregs);
1251             return;
1252         }
1253 
1254         case X(OPeqeq, TYcfloat, TYcfloat):
1255         case X(OPeqeq, TYcdouble, TYcdouble):
1256         case X(OPeqeq, TYcldouble, TYcldouble):
1257         case X(OPeqeq, TYcfloat, TYifloat):
1258         case X(OPeqeq, TYcdouble, TYidouble):
1259         case X(OPeqeq, TYcldouble, TYildouble):
1260         case X(OPeqeq, TYcfloat, TYfloat):
1261         case X(OPeqeq, TYcdouble, TYdouble):
1262         case X(OPeqeq, TYcldouble, TYldouble):
1263         case X(OPeqeq, TYifloat, TYcfloat):
1264         case X(OPeqeq, TYidouble, TYcdouble):
1265         case X(OPeqeq, TYildouble, TYcldouble):
1266         case X(OPeqeq, TYfloat, TYcfloat):
1267         case X(OPeqeq, TYdouble, TYcdouble):
1268         case X(OPeqeq, TYldouble, TYcldouble):
1269         case X(OPeqeq, TYfloat, TYifloat):
1270         case X(OPeqeq, TYdouble, TYidouble):
1271         case X(OPeqeq, TYldouble, TYildouble):
1272         case X(OPeqeq, TYifloat, TYfloat):
1273         case X(OPeqeq, TYidouble, TYdouble):
1274         case X(OPeqeq, TYildouble, TYldouble):
1275         {
1276             loadComplex(cdb,e1);
1277             loadComplex(cdb,e2);
1278             makesure87(cdb, e1, sz2, 2, 0);
1279             makesure87(cdb, e1, 0, 3, 0);
1280             regm_t retregs = 0;
1281             callclib(cdb, e, CLIB.ccmp, &retregs, 0);
1282             return;
1283         }
1284 
1285         case X(OPadd, TYfloat, TYifloat):
1286         case X(OPadd, TYdouble, TYidouble):
1287         case X(OPadd, TYldouble, TYildouble):
1288         case X(OPadd, TYifloat, TYfloat):
1289         case X(OPadd, TYidouble, TYdouble):
1290         case X(OPadd, TYildouble, TYldouble):
1291 
1292         case X(OPmin, TYfloat, TYifloat):
1293         case X(OPmin, TYdouble, TYidouble):
1294         case X(OPmin, TYldouble, TYildouble):
1295         case X(OPmin, TYifloat, TYfloat):
1296         case X(OPmin, TYidouble, TYdouble):
1297         case X(OPmin, TYildouble, TYldouble):
1298         {
1299             regm_t retregs = mST0;
1300             codelem(cdb,e1, &retregs, false);
1301             note87(e1, 0, 0);
1302             codelem(cdb,e2, &retregs, false);
1303             makesure87(cdb, e1, 0, 1, 0);
1304             if (eoper == OPmin)
1305                 cdb.genf2(0xD9, 0xE0);     // FCHS
1306             if (tyimaginary(e1.Ety))
1307                 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1)
1308             retregs = mST01;
1309             fixresult_complex87(cdb, e, retregs, pretregs);
1310             return;
1311         }
1312 
1313         case X(OPadd, TYcfloat, TYifloat):
1314         case X(OPadd, TYcdouble, TYidouble):
1315         case X(OPadd, TYcldouble, TYildouble):
1316             op = 0;
1317             goto Lci;
1318 
1319         case X(OPmin, TYcfloat, TYifloat):
1320         case X(OPmin, TYcdouble, TYidouble):
1321         case X(OPmin, TYcldouble, TYildouble):
1322             op = 4;
1323             goto Lci;
1324 
1325         Lci:
1326         {
1327             loadComplex(cdb,e1);
1328             regm_t retregs = mST0;
1329             load87(cdb,e2,sz2,&retregs,e1,op);
1330             freenode(e2);
1331             retregs = mST01;
1332             makesure87(cdb, e1,0,1,0);
1333             fixresult_complex87(cdb,e, retregs, pretregs);
1334             return;
1335         }
1336 
1337         case X(OPmul, TYcfloat, TYfloat):
1338         case X(OPmul, TYcdouble, TYdouble):
1339         case X(OPmul, TYcldouble, TYldouble):
1340             imaginary = false;
1341             goto Lcmul;
1342 
1343         case X(OPmul, TYcfloat, TYifloat):
1344         case X(OPmul, TYcdouble, TYidouble):
1345         case X(OPmul, TYcldouble, TYildouble):
1346             imaginary = true;
1347         Lcmul:
1348         {
1349             loadComplex(cdb,e1);
1350             if (imaginary)
1351             {
1352                 cdb.genf2(0xD9, 0xE0);          // FCHS
1353                 cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
1354                 if (elemisone(e2))
1355                 {
1356                     freenode(e2);
1357                     fixresult_complex87(cdb, e, mST01, pretregs);
1358                     return;
1359                 }
1360             }
1361             regm_t retregs = mST0;
1362             codelem(cdb,e2, &retregs, false);
1363             makesure87(cdb, e1, sz2, 1, 0);
1364             makesure87(cdb, e1, 0, 2, 0);
1365             cdb.genf2(0xDC,0xC8 + 2);           // FMUL ST(2), ST
1366             cdb.genf2(0xDE,0xC8 + 1);           // FMULP ST(1), ST
1367             pop87();
1368             fixresult_complex87(cdb, e, mST01, pretregs);
1369             return;
1370         }
1371 
1372         case X(OPmul, TYfloat, TYcfloat):
1373         case X(OPmul, TYdouble, TYcdouble):
1374         case X(OPmul, TYldouble, TYcldouble):
1375             imaginary = false;
1376             goto Lcmul2;
1377 
1378         case X(OPmul, TYifloat, TYcfloat):
1379         case X(OPmul, TYidouble, TYcdouble):
1380         case X(OPmul, TYildouble, TYcldouble):
1381             imaginary = true;
1382         Lcmul2:
1383         {
1384             regm_t retregs = mST0;
1385             codelem(cdb,e1, &retregs, false);
1386             note87(e1, 0, 0);
1387             loadComplex(cdb,e2);
1388             makesure87(cdb, e1, 0, 2, 0);
1389             cdb.genf2(0xD9, imaginary ? 0xE0 : 0xC8 + 1); // FCHS / FXCH ST(1)
1390             cdb.genf2(0xD9,0xC8 + 2);        // FXCH ST(2)
1391             cdb.genf2(0xDC,0xC8 + 2);        // FMUL ST(2), ST
1392             cdb.genf2(0xDE,0xC8 + 1);        // FMULP ST(1), ST
1393             pop87();
1394             fixresult_complex87(cdb, e, mST01, pretregs);
1395             return;
1396         }
1397 
1398         case X(OPdiv, TYcfloat, TYfloat):
1399         case X(OPdiv, TYcdouble, TYdouble):
1400         case X(OPdiv, TYcldouble, TYldouble):
1401         {
1402             loadComplex(cdb,e1);
1403             regm_t retregs = mST0;
1404             codelem(cdb,e2, &retregs, false);
1405             makesure87(cdb, e1, sz2, 1, 0);
1406             makesure87(cdb, e1, 0, 2, 0);
1407             cdb.genf2(0xDC,0xF8 + 2);            // FDIV ST(2), ST
1408             cdb.genf2(0xDE,0xF8 + 1);            // FDIVP ST(1), ST
1409             pop87();
1410             fixresult_complex87(cdb, e, mST01, pretregs);
1411             return;
1412         }
1413 
1414         case X(OPdiv, TYcfloat, TYifloat):
1415         case X(OPdiv, TYcdouble, TYidouble):
1416         case X(OPdiv, TYcldouble, TYildouble):
1417         {
1418             loadComplex(cdb,e1);
1419             cdb.genf2(0xD9,0xC8 + 1);        // FXCH ST(1)
1420             xchg87(0, 1);
1421             cdb.genf2(0xD9, 0xE0);               // FCHS
1422             regm_t retregs = mST0;
1423             codelem(cdb,e2, &retregs, false);
1424             makesure87(cdb, e1, 0, 1, 0);
1425             makesure87(cdb, e1, sz2, 2, 0);
1426             cdb.genf2(0xDC,0xF8 + 2);        // FDIV ST(2), ST
1427             cdb.genf2(0xDE,0xF8 + 1);             // FDIVP ST(1), ST
1428             pop87();
1429             fixresult_complex87(cdb, e, mST01, pretregs);
1430             return;
1431         }
1432 
1433         case X(OPmod, TYcfloat, TYfloat):
1434         case X(OPmod, TYcdouble, TYdouble):
1435         case X(OPmod, TYcldouble, TYldouble):
1436         case X(OPmod, TYcfloat, TYifloat):
1437         case X(OPmod, TYcdouble, TYidouble):
1438         case X(OPmod, TYcldouble, TYildouble):
1439         {
1440             /*
1441                         fld     E1.re
1442                         fld     E1.im
1443                         fld     E2
1444                         fxch    ST(1)
1445                 FM1:    fprem
1446                         fstsw   word ptr sw
1447                         fwait
1448                         mov     AH, byte ptr sw+1
1449                         jp      FM1
1450                         fxch    ST(2)
1451                 FM2:    fprem
1452                         fstsw   word ptr sw
1453                         fwait
1454                         mov     AH, byte ptr sw+1
1455                         jp      FM2
1456                         fstp    ST(1)
1457                         fxch    ST(1)
1458              */
1459             loadComplex(cdb,e1);
1460             regm_t retregs = mST0;
1461             codelem(cdb,e2, &retregs, false);
1462             makesure87(cdb, e1, sz2, 1, 0);
1463             makesure87(cdb, e1, 0, 2, 0);
1464             cdb.genf2(0xD9, 0xC8 + 1);             // FXCH ST(1)
1465 
1466             cdb.gen2(0xD9, 0xF8);                  // FPREM
1467             code *cfm1 = cdb.last();
1468             genjmpifC2(cdb, cfm1);                 // JC2 FM1
1469             cdb.genf2(0xD9, 0xC8 + 2);             // FXCH ST(2)
1470 
1471             cdb.gen2(0xD9, 0xF8);                  // FPREM
1472             code *cfm2 = cdb.last();
1473 
1474             genjmpifC2(cdb, cfm2);                 // JC2 FM2
1475             cdb.genf2(0xDD,0xD8 + 1);              // FSTP ST(1)
1476             cdb.genf2(0xD9, 0xC8 + 1);             // FXCH ST(1)
1477 
1478             pop87();
1479             fixresult_complex87(cdb, e, mST01, pretregs);
1480             return;
1481         }
1482 
1483         default:
1484 
1485             debug
1486             elem_print(e);
1487 
1488             assert(0);
1489     }
1490 
1491     int reverse = 0;
1492     int e2oper = e2.Eoper;
1493 
1494     /* Move double-sized operand into the second position if there's a chance
1495      * it will allow combining a load with an operation (DMD Bugzilla 2905)
1496      */
1497     if ( ((tybasic(e1.Ety) == TYdouble)
1498           && ((e1.Eoper == OPvar) || (e1.Eoper == OPconst))
1499           && (tybasic(e2.Ety) != TYdouble)) ||
1500         (e1.Eoper == OPconst) ||
1501         (e1.Eoper == OPvar &&
1502          ((e1.Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) ||
1503           (e2oper == OPd_f &&
1504             (e2.EV.E1.Eoper == OPs32_d || e2.EV.E1.Eoper == OPs64_d || e2.EV.E1.Eoper == OPs16_d) &&
1505             e2.EV.E1.EV.E1.Eoper == OPvar
1506           ) ||
1507           ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) &&
1508             e2.EV.E1.Eoper == OPvar
1509           )
1510          )
1511         )
1512        )
1513     {   // Reverse order of evaluation
1514         e1 = e.EV.E2;
1515         e2 = e.EV.E1;
1516         op = oprev[op + 1];
1517         reverse ^= 1;
1518     }
1519 
1520     regm_t retregs1 = mST0;
1521     codelem(cdb,e1,&retregs1,false);
1522     note87(e1,0,0);
1523 
1524     if (config.flags4 & CFG4fdivcall && e.Eoper == OPdiv)
1525     {
1526         regm_t retregs = mST0;
1527         load87(cdb,e2,0,&retregs,e1,-1);
1528         makesure87(cdb, e1,0,1,0);
1529         if (op == 7)                    // if reverse divide
1530             cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
1531         callclib(cdb,e,CLIB.fdiv87,&retregs,0);
1532         pop87();
1533         regm_t resregm = mST0;
1534         freenode(e2);
1535         fixresult87(cdb,e,resregm,pretregs);
1536     }
1537     else if (e.Eoper == OPmod)
1538     {
1539         /*
1540          *              fld     tbyte ptr y
1541          *              fld     tbyte ptr x             // ST = x, ST1 = y
1542          *      FM1:    // We don't use fprem1 because for some inexplicable
1543          *              // reason we get -5 when we do _modulo(15, 10)
1544          *              fprem                           // ST = ST % ST1
1545          *              fstsw   word ptr sw
1546          *              fwait
1547          *              mov     AH,byte ptr sw+1        // get msb of status word in AH
1548          *              sahf                            // transfer to flags
1549          *              jp      FM1                     // continue till ST < ST1
1550          *              fstp    ST(1)                   // leave remainder on stack
1551          */
1552         regm_t retregs = mST0;
1553         load87(cdb,e2,0,&retregs,e1,-1);
1554         makesure87(cdb,e1,0,1,0);       // now have x,y on stack; need y,x
1555         if (!reverse)                           // if not reverse modulo
1556             cdb.genf2(0xD9,0xC8 + 1);           // FXCH ST(1)
1557 
1558         cdb.gen2(0xD9, 0xF8);                   // FM1: FPREM
1559         code *cfm1 = cdb.last();
1560         genjmpifC2(cdb, cfm1);                  // JC2 FM1
1561         cdb.genf2(0xDD,0xD8 + 1);               // FSTP ST(1)
1562 
1563         pop87();
1564         freenode(e2);
1565         fixresult87(cdb,e,mST0,pretregs);
1566     }
1567     else
1568     {
1569         load87(cdb,e2,0,pretregs,e1,op);
1570         freenode(e2);
1571     }
1572     if (*pretregs & mST0)
1573         note87(e,0,0);
1574     //printf("orth87(-e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1575 }
1576 
1577 /*****************************
1578  * Load e into ST01.
1579  */
1580 
1581 @trusted
1582 private void loadComplex(ref CodeBuilder cdb,elem *e)
1583 {
1584     regm_t retregs;
1585 
1586     int sz = tysize(e.Ety);
1587     switch (tybasic(e.Ety))
1588     {
1589         case TYfloat:
1590         case TYdouble:
1591         case TYldouble:
1592             retregs = mST0;
1593             codelem(cdb,e,&retregs,false);
1594             // Convert to complex with a 0 for the imaginary part
1595             push87(cdb);
1596             cdb.gen2(0xD9,0xEE);              // FLDZ
1597             break;
1598 
1599         case TYifloat:
1600         case TYidouble:
1601         case TYildouble:
1602             // Convert to complex with a 0 for the real part
1603             push87(cdb);
1604             cdb.gen2(0xD9,0xEE);              // FLDZ
1605             retregs = mST0;
1606             codelem(cdb,e,&retregs,false);
1607             break;
1608 
1609         case TYcfloat:
1610         case TYcdouble:
1611         case TYcldouble:
1612             sz /= 2;
1613             retregs = mST01;
1614             codelem(cdb,e,&retregs,false);
1615             break;
1616 
1617         default:
1618             assert(0);
1619     }
1620     note87(e, 0, 1);
1621     note87(e, sz, 0);
1622 }
1623 
1624 /*************************
1625  * If op == -1, load expression e into ST0.
1626  * else compute (eleft op e), eleft is in ST0.
1627  * Must follow same logic as cmporder87();
1628  */
1629 
1630 @trusted
1631 void load87(ref CodeBuilder cdb,elem *e,uint eoffset,regm_t *pretregs,elem *eleft,OPER op)
1632 {
1633     code cs;
1634     regm_t retregs;
1635     reg_t reg;
1636     uint mf1;
1637     ubyte ldop;
1638     int i;
1639 
1640     if (NDPP)
1641         printf("+load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused);
1642 
1643     assert(!(NOSAHF && op == 3));
1644     elem_debug(e);
1645     if (ADDFWAIT())
1646         cs.Iflags = CFwait;
1647     else
1648         cs.Iflags = 0;
1649     cs.Irex = 0;
1650     OPER opr = oprev[op + 1];
1651     tym_t ty = tybasic(e.Ety);
1652     uint mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble;
1653     bool noted = false;
1654     if ((ty == TYldouble || ty == TYildouble) &&
1655         op != -1 && e.Eoper != OPd_ld)
1656         goto Ldefault;
1657 L5:
1658     switch (e.Eoper)
1659     {
1660         case OPcomma:
1661             if (op != -1)
1662             {
1663                 note87(eleft,eoffset,0);
1664                 noted = true;
1665             }
1666             docommas(cdb,&e);
1667             goto L5;
1668 
1669         case OPvar:
1670             notreg(e);
1671             goto L2;
1672 
1673         case OPind:
1674         L2:
1675             if (op != -1)
1676             {
1677                 if (e.Ecount && e.Ecount != e.Ecomsub &&
1678                     (i = cse_get(e, 0)) >= 0)
1679                 {
1680                     immutable ubyte[8] b2 = [0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8];
1681 
1682                     cdb.genf2(0xD8,b2[op] + i);        // Fop ST(i)
1683                 }
1684                 else
1685                 {
1686                     getlvalue87(cdb,&cs,e,0);
1687                     makesure87(cdb,eleft,eoffset,0,0);
1688                     cs.Iop = ESC(mf,0);
1689                     cs.Irm |= modregrm(0,op,0);
1690                     cdb.gen(&cs);
1691                 }
1692             }
1693             else
1694             {
1695                 push87(cdb);
1696                 switch (ty)
1697                 {
1698                     case TYfloat:
1699                     case TYdouble:
1700                     case TYifloat:
1701                     case TYidouble:
1702                     case TYcfloat:
1703                     case TYcdouble:
1704                     case TYdouble_alias:
1705                         loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0); // FLD var
1706                         break;
1707                     case TYldouble:
1708                     case TYildouble:
1709                     case TYcldouble:
1710                         loadea(cdb,e,&cs,0xDB,5,0,0,0);      // FLD var
1711                         break;
1712                     default:
1713                         printf("ty = x%x\n", ty);
1714                         assert(0);
1715                 }
1716                 note87(e,0,0);
1717             }
1718             break;
1719 
1720         case OPd_f:
1721         case OPf_d:
1722         case OPd_ld:
1723             mf1 = (tybasic(e.EV.E1.Ety) == TYfloat || tybasic(e.EV.E1.Ety) == TYifloat)
1724                     ? MFfloat : MFdouble;
1725             if (op != -1 && global87.stackused && !noted)
1726                 note87(eleft,eoffset,0);    // don't trash this value
1727             if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind)
1728             {
1729                 static if (1)
1730                 {
1731                   L4:
1732                     getlvalue87(cdb,&cs,e.EV.E1,0);
1733                     cs.Iop = ESC(mf1,0);
1734                     if (op != -1)
1735                     {
1736                         cs.Irm |= modregrm(0,op,0);
1737                         makesure87(cdb,eleft,eoffset,0,0);
1738                     }
1739                     else
1740                     {
1741                         cs.Iop |= 1;
1742                         push87(cdb);
1743                     }
1744                     cdb.gen(&cs);                     // FLD / Fop
1745                 }
1746                 else
1747                 {
1748                     loadea(cdb,e.EV.E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e.EV.E1 */
1749                 }
1750 
1751                 // Variable cannot be put into a register anymore
1752                 if (e.EV.E1.Eoper == OPvar)
1753                     notreg(e.EV.E1);
1754                 freenode(e.EV.E1);
1755             }
1756             else
1757             {
1758                 retregs = mST0;
1759                 codelem(cdb,e.EV.E1,&retregs,false);
1760                 if (op != -1)
1761                 {
1762                     makesure87(cdb,eleft,eoffset,1,0);
1763                     cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP
1764                     pop87();
1765                 }
1766             }
1767             break;
1768 
1769         case OPs64_d:
1770             if (e.EV.E1.Eoper == OPvar ||
1771                 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0))
1772             {
1773                 getlvalue87(cdb,&cs,e.EV.E1,0);
1774                 cs.Iop = 0xDF;
1775                 push87(cdb);
1776                 cs.Irm |= modregrm(0,5,0);
1777                 cdb.gen(&cs);                     // FILD m64
1778                 // Variable cannot be put into a register anymore
1779                 if (e.EV.E1.Eoper == OPvar)
1780                     notreg(e.EV.E1);
1781                 freenode(e.EV.E1);
1782             }
1783             else if (I64)
1784             {
1785                 retregs = ALLREGS;
1786                 codelem(cdb,e.EV.E1,&retregs,false);
1787                 reg = findreg(retregs);
1788                 cdb.genfltreg(STO,reg,0);         // MOV floatreg,reg
1789                 code_orrex(cdb.last(), REX_W);
1790                 push87(cdb);
1791                 cdb.genfltreg(0xDF,5,0);          // FILD long long ptr floatreg
1792             }
1793             else
1794             {
1795                 retregs = ALLREGS;
1796                 codelem(cdb,e.EV.E1,&retregs,false);
1797                 reg = findreglsw(retregs);
1798                 cdb.genfltreg(STO,reg,0);         // MOV floatreg,reglsw
1799                 reg = findregmsw(retregs);
1800                 cdb.genfltreg(STO,reg,4);         // MOV floatreg+4,regmsw
1801                 push87(cdb);
1802                 cdb.genfltreg(0xDF,5,0);          // FILD long long ptr floatreg
1803             }
1804             if (op != -1)
1805             {
1806                 makesure87(cdb,eleft,eoffset,1,0);
1807                 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP
1808                 pop87();
1809             }
1810             break;
1811 
1812         case OPconst:
1813             ldop = loadconst(e, 0);
1814             if (ldop)
1815             {
1816                 push87(cdb);
1817                 cdb.genf2(0xD9,ldop);          // FLDx
1818                 if (op != -1)
1819                 {
1820                     cdb.genf2(0xDE,modregrm(3,opr,1));        // FopRP
1821                     pop87();
1822                 }
1823             }
1824             else
1825             {
1826                 assert(0);
1827             }
1828             break;
1829 
1830         case OPu16_d:
1831         {
1832             /* This opcode should never be generated        */
1833             /* (probably shouldn't be for 16 bit code too)  */
1834             assert(!I32);
1835 
1836             if (op != -1 && !noted)
1837                 note87(eleft,eoffset,0);    // don't trash this value
1838             retregs = ALLREGS & mLSW;
1839             codelem(cdb,e.EV.E1,&retregs,false);
1840             regwithvalue(cdb,ALLREGS & mMSW,0,&reg,0);  // 0-extend
1841             retregs |= mask(reg);
1842             mf1 = MFlong;
1843             goto L3;
1844         }
1845 
1846         case OPs16_d:       mf1 = MFword;   goto L6;
1847         case OPs32_d:       mf1 = MFlong;   goto L6;
1848         L6:
1849             if (e.Ecount)
1850                 goto Ldefault;
1851             if (op != -1 && !noted)
1852                 note87(eleft,eoffset,0);    // don't trash this value
1853             if (e.EV.E1.Eoper == OPvar ||
1854                 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0))
1855             {
1856                 goto L4;
1857             }
1858             else
1859             {
1860                 retregs = ALLREGS;
1861                 codelem(cdb,e.EV.E1,&retregs,false);
1862             L3:
1863                 if (I16 && e.Eoper != OPs16_d)
1864                 {
1865                     /* MOV floatreg+2,reg   */
1866                     reg = findregmsw(retregs);
1867                     cdb.genfltreg(STO,reg,REGSIZE);
1868                     retregs &= mLSW;
1869                 }
1870                 reg = findreg(retregs);
1871                 cdb.genfltreg(STO,reg,0);         // MOV floatreg,reg
1872                 if (op != -1)
1873                 {
1874                     makesure87(cdb,eleft,eoffset,0,0);
1875                     cdb.genfltreg(ESC(mf1,0),op,0);   // Fop floatreg
1876                 }
1877                 else
1878                 {
1879                     /* FLD long ptr floatreg        */
1880                     push87(cdb);
1881                     cdb.genfltreg(ESC(mf1,1),0,0);
1882                 }
1883             }
1884             break;
1885         default:
1886         Ldefault:
1887             retregs = mST0;
1888             codelem(cdb,e,&retregs,2);
1889 
1890             if (op != -1)
1891             {
1892                 makesure87(cdb,eleft,eoffset,1,(op == 0 || op == 1));
1893                 pop87();
1894                 if (op == 4 || op == 6)     // sub or div
1895                 {
1896                     code *cl = cdb.last();
1897                     if (cl && cl.Iop == 0xD9 && cl.Irm == 0xC9)   // FXCH ST(1)
1898                     {   cl.Iop = NOP;
1899                         opr = op;           // reverse operands
1900                     }
1901                 }
1902                 cdb.genf2(0xDE,modregrm(3,opr,1));        // FopRP
1903             }
1904             break;
1905     }
1906     if (op == 3)                    // FCOMP
1907     {   pop87();                    // extra pop was done
1908         cg87_87topsw(cdb);
1909     }
1910     fixresult87(cdb,e,((op == 3) ? mPSW : mST0),pretregs);
1911     if (NDPP)
1912         printf("-load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused);
1913 }
1914 
1915 /********************************
1916  * Determine if a compare is to be done forwards (return 0)
1917  * or backwards (return 1).
1918  * Must follow same logic as load87().
1919  */
1920 
1921 @trusted
1922 int cmporder87(elem *e)
1923 {
1924     //printf("cmporder87(%p)\n",e);
1925   L1:
1926     switch (e.Eoper)
1927     {
1928         case OPcomma:
1929             e = e.EV.E2;
1930             goto L1;
1931 
1932         case OPd_f:
1933         case OPf_d:
1934         case OPd_ld:
1935             if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind)
1936                 goto ret0;
1937             else
1938                 goto ret1;
1939 
1940         case OPconst:
1941             if (loadconst(e, 0) || tybasic(e.Ety) == TYldouble
1942                                 || tybasic(e.Ety) == TYildouble)
1943             {
1944                 //printf("ret 1, loadconst(e) = %d\n", loadconst(e));
1945                 goto ret1;
1946             }
1947             goto ret0;
1948 
1949         case OPvar:
1950         case OPind:
1951             if (tybasic(e.Ety) == TYldouble ||
1952                 tybasic(e.Ety) == TYildouble)
1953                 goto ret1;
1954             goto ret0;
1955 
1956         case OPu16_d:
1957         case OPs16_d:
1958         case OPs32_d:
1959             goto ret0;
1960 
1961         case OPs64_d:
1962             goto ret1;
1963 
1964         default:
1965             goto ret1;
1966     }
1967 
1968 ret1:
1969     return 1;
1970 
1971 ret0:
1972     return 0;
1973 }
1974 
1975 /*******************************
1976  * Perform an assignment to a long double/double/float.
1977  */
1978 
1979 @trusted
1980 void eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
1981 {
1982     code cs;
1983     opcode_t op1;
1984     uint op2;
1985 
1986     //printf("+eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
1987     assert(e.Eoper == OPeq);
1988     regm_t retregs = mST0 | (*pretregs & mPSW);
1989     codelem(cdb,e.EV.E2,&retregs,false);
1990     tym_t ty1 = tybasic(e.EV.E1.Ety);
1991     switch (ty1)
1992     {
1993         case TYdouble_alias:
1994         case TYidouble:
1995         case TYdouble:      op1 = ESC(MFdouble,1);  op2 = 3; break;
1996 
1997         case TYifloat:
1998         case TYfloat:       op1 = ESC(MFfloat,1);   op2 = 3; break;
1999 
2000         case TYildouble:
2001         case TYldouble:     op1 = 0xDB;             op2 = 7; break;
2002 
2003         default:
2004             assert(0);
2005     }
2006     if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too
2007     {
2008         if (ty1 == TYldouble || ty1 == TYildouble)
2009         {
2010             push87(cdb);
2011             cdb.genf2(0xD9,0xC0);           // FLD ST(0)
2012             pop87();
2013         }
2014         else
2015             op2 = 2;                        // FST e.EV.E1
2016     }
2017     else
2018     {   // FSTP e.EV.E1
2019         pop87();
2020     }
2021 
2022     static if (0)
2023     {
2024         // Doesn't work if ST(0) gets saved to the stack by getlvalue()
2025         loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0);
2026     }
2027     else
2028     {
2029         cs.Irex = 0;
2030         cs.Iflags = 0;
2031         cs.Iop = op1;
2032         if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too
2033         {   // Make sure it's still there
2034             elem *e2 = e.EV.E2;
2035             while (e2.Eoper == OPcomma)
2036                 e2 = e2.EV.E2;
2037             note87(e2,0,0);
2038             getlvalue87(cdb, &cs, e.EV.E1, 0);
2039             makesure87(cdb,e2,0,0,1);
2040         }
2041         else
2042         {
2043             getlvalue87(cdb, &cs, e.EV.E1, 0);
2044         }
2045         cs.Irm |= modregrm(0,op2,0);            // OR in reg field
2046         cdb.gen(&cs);
2047         if (tysize(TYldouble) == 12)
2048         {
2049             /* This deals with the fact that 10 byte reals really
2050              * occupy 12 bytes by zeroing the extra 2 bytes.
2051              */
2052             if (op1 == 0xDB)
2053             {
2054                 cs.Iop = 0xC7;                      // MOV EA+10,0
2055                 NEWREG(cs.Irm, 0);
2056                 cs.IEV1.Voffset += 10;
2057                 cs.IFL2 = FLconst;
2058                 cs.IEV2.Vint = 0;
2059                 cs.Iflags |= CFopsize;
2060                 cdb.gen(&cs);
2061             }
2062         }
2063         else if (tysize(TYldouble) == 16)
2064         {
2065             /* This deals with the fact that 10 byte reals really
2066              * occupy 16 bytes by zeroing the extra 6 bytes.
2067              */
2068             if (op1 == 0xDB)
2069             {
2070                 cs.Irex &= ~REX_W;
2071                 cs.Iop = 0xC7;                      // MOV EA+10,0
2072                 NEWREG(cs.Irm, 0);
2073                 cs.IEV1.Voffset += 10;
2074                 cs.IFL2 = FLconst;
2075                 cs.IEV2.Vint = 0;
2076                 cs.Iflags |= CFopsize;
2077                 cdb.gen(&cs);
2078 
2079                 cs.IEV1.Voffset += 2;
2080                 cs.Iflags &= ~CFopsize;
2081                 cdb.gen(&cs);
2082             }
2083         }
2084     }
2085     genfwait(cdb);
2086     freenode(e.EV.E1);
2087     fixresult87(cdb,e,mST0 | mPSW,pretregs);
2088 }
2089 
2090 /*******************************
2091  * Perform an assignment to a long double/double/float.
2092  */
2093 
2094 @trusted
2095 void complex_eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2096 {
2097     code cs;
2098     opcode_t op1;
2099     uint op2;
2100     uint sz;
2101     int fxch = 0;
2102 
2103     //printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2104     assert(e.Eoper == OPeq);
2105     cs.Iflags = ADDFWAIT() ? CFwait : 0;
2106     cs.Irex = 0;
2107     regm_t retregs = mST01 | (*pretregs & mPSW);
2108     codelem(cdb,e.EV.E2,&retregs,false);
2109     tym_t ty1 = tybasic(e.EV.E1.Ety);
2110     switch (ty1)
2111     {
2112         case TYcdouble:     op1 = ESC(MFdouble,1);  op2 = 3; break;
2113         case TYcfloat:      op1 = ESC(MFfloat,1);   op2 = 3; break;
2114         case TYcldouble:    op1 = 0xDB;             op2 = 7; break;
2115         default:
2116             assert(0);
2117     }
2118     if (*pretregs & (mST01 | mXMM0 | mXMM1))  // if want result on stack too
2119     {
2120         if (ty1 == TYcldouble)
2121         {
2122             push87(cdb);
2123             push87(cdb);
2124             cdb.genf2(0xD9,0xC0 + 1);       // FLD ST(1)
2125             cdb.genf2(0xD9,0xC0 + 1);       // FLD ST(1)
2126             pop87();
2127             pop87();
2128         }
2129         else
2130         {   op2 = 2;                        // FST e.EV.E1
2131             fxch = 1;
2132         }
2133     }
2134     else
2135     {   // FSTP e.EV.E1
2136         pop87();
2137         pop87();
2138     }
2139     sz = tysize(ty1) / 2;
2140     if (*pretregs & (mST01 | mXMM0 | mXMM1))
2141     {
2142         cs.Iflags = 0;
2143         cs.Irex = 0;
2144         cs.Iop = op1;
2145         getlvalue87(cdb, &cs, e.EV.E1, 0);
2146         cs.IEV1.Voffset += sz;
2147         cs.Irm |= modregrm(0, op2, 0);
2148         makesure87(cdb,e.EV.E2, sz, 0, 0);
2149         cdb.gen(&cs);
2150         genfwait(cdb);
2151         makesure87(cdb,e.EV.E2,  0, 1, 0);
2152     }
2153     else
2154     {
2155         loadea(cdb,e.EV.E1,&cs,op1,op2,sz,0,0);
2156         genfwait(cdb);
2157     }
2158     if (fxch)
2159         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2160     cs.IEV1.Voffset -= sz;
2161     cdb.gen(&cs);
2162     if (fxch)
2163         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2164     if (tysize(TYldouble) == 12)
2165     {
2166         if (op1 == 0xDB)
2167         {
2168             cs.Iop = 0xC7;              // MOV EA+10,0
2169             NEWREG(cs.Irm, 0);
2170             cs.IEV1.Voffset += 10;
2171             cs.IFL2 = FLconst;
2172             cs.IEV2.Vint = 0;
2173             cs.Iflags |= CFopsize;
2174             cdb.gen(&cs);
2175             cs.IEV1.Voffset += 12;
2176             cdb.gen(&cs);               // MOV EA+22,0
2177         }
2178     }
2179     if (tysize(TYldouble) == 16)
2180     {
2181         if (op1 == 0xDB)
2182         {
2183             cs.Iop = 0xC7;              // MOV EA+10,0
2184             NEWREG(cs.Irm, 0);
2185             cs.IEV1.Voffset += 10;
2186             cs.IFL2 = FLconst;
2187             cs.IEV2.Vint = 0;
2188             cs.Iflags |= CFopsize;
2189             cdb.gen(&cs);
2190 
2191             cs.IEV1.Voffset += 2;
2192             cs.Iflags &= ~CFopsize;
2193             cdb.gen(&cs);
2194 
2195             cs.IEV1.Voffset += 14;
2196             cs.Iflags |= CFopsize;
2197             cdb.gen(&cs);
2198 
2199             cs.IEV1.Voffset += 2;
2200             cs.Iflags &= ~CFopsize;
2201             cdb.gen(&cs);
2202         }
2203     }
2204     genfwait(cdb);
2205     freenode(e.EV.E1);
2206     fixresult_complex87(cdb, e,mST01 | mPSW,pretregs);
2207 }
2208 
2209 /*******************************
2210  * Perform an assignment while converting to integral type,
2211  * i.e. handle (e1 = (int) e2)
2212  */
2213 
2214 @trusted
2215 private void cnvteq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2216 {
2217     code cs;
2218     opcode_t op1;
2219     uint op2;
2220 
2221     assert(e.Eoper == OPeq);
2222     assert(!*pretregs);
2223     regm_t retregs = mST0;
2224     elem_debug(e.EV.E2);
2225     codelem(cdb,e.EV.E2.EV.E1,&retregs,false);
2226 
2227     switch (e.EV.E2.Eoper)
2228     {   case OPd_s16:
2229             op1 = ESC(MFword,1);
2230             op2 = 3;
2231             break;
2232         case OPd_s32:
2233         case OPd_u16:
2234             op1 = ESC(MFlong,1);
2235             op2 = 3;
2236             break;
2237         case OPd_s64:
2238             op1 = 0xDF;
2239             op2 = 7;
2240             break;
2241         default:
2242             assert(0);
2243     }
2244     freenode(e.EV.E2);
2245 
2246     genfwait(cdb);
2247     genrnd(cdb, CW_roundto0);               // FLDCW roundto0
2248 
2249     pop87();
2250     cs.Iflags = ADDFWAIT() ? CFwait : 0;
2251     if (e.EV.E1.Eoper == OPvar)
2252         notreg(e.EV.E1);                    // cannot be put in register anymore
2253     loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0);
2254 
2255     genfwait(cdb);
2256     genrnd(cdb, CW_roundtonearest);         // FLDCW roundtonearest
2257 
2258     freenode(e.EV.E1);
2259 }
2260 
2261 /**********************************
2262  * Perform +=, -=, *= and /= for doubles.
2263  */
2264 
2265 @trusted
2266 public void opass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2267 {
2268     code cs;
2269     uint op;
2270     opcode_t opld;
2271     opcode_t op1;
2272     uint op2;
2273     tym_t ty1 = tybasic(e.EV.E1.Ety);
2274 
2275     switch (ty1)
2276     {
2277         case TYdouble_alias:
2278         case TYidouble:
2279         case TYdouble:      op1 = ESC(MFdouble,1);  op2 = 3; break;
2280         case TYifloat:
2281         case TYfloat:       op1 = ESC(MFfloat,1);   op2 = 3; break;
2282         case TYildouble:
2283         case TYldouble:     op1 = 0xDB;             op2 = 7; break;
2284 
2285         case TYcfloat:
2286         case TYcdouble:
2287         case TYcldouble:
2288             if (e.Eoper == OPmodass)
2289                 opmod_complex87(cdb, e, pretregs);
2290             else
2291                 opass_complex87(cdb, e, pretregs);
2292             return;
2293 
2294         default:
2295             assert(0);
2296     }
2297     switch (e.Eoper)
2298     {
2299         case OPpostinc:
2300         case OPaddass:      op = 0 << 3;    opld = 0xC1;    break;  // FADD
2301         case OPpostdec:
2302         case OPminass:      op = 5 << 3;    opld = 0xE1; /*0xE9;*/  break;  // FSUBR
2303         case OPmulass:      op = 1 << 3;    opld = 0xC9;    break;  // FMUL
2304         case OPdivass:      op = 7 << 3;    opld = 0xF1;    break;  // FDIVR
2305         case OPmodass:      break;
2306         default:            assert(0);
2307     }
2308     regm_t retregs = mST0;
2309     codelem(cdb,e.EV.E2,&retregs,false);     // evaluate rvalue
2310     note87(e.EV.E2,0,0);
2311     getlvalue87(cdb,&cs,e.EV.E1,e.Eoper==OPmodass?mAX:0);
2312     makesure87(cdb,e.EV.E2,0,0,0);
2313     if (config.flags4 & CFG4fdivcall && e.Eoper == OPdivass)
2314     {
2315         push87(cdb);
2316         cs.Iop = op1;
2317         if (ty1 == TYldouble || ty1 == TYildouble)
2318             cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2319         cdb.gen(&cs);
2320         cdb.genf2(0xD9,0xC8 + 1);           // FXCH ST(1)
2321         callclib(cdb,e,CLIB.fdiv87,&retregs,0);
2322         pop87();
2323     }
2324     else if (e.Eoper == OPmodass)
2325     {
2326         /*
2327          *          fld     tbyte ptr y
2328          *          fld     tbyte ptr x             // ST = x, ST1 = y
2329          *  FM1:    // We don't use fprem1 because for some inexplicable
2330          *          // reason we get -5 when we do _modulo(15, 10)
2331          *          fprem                           // ST = ST % ST1
2332          *          fstsw   word ptr sw
2333          *          fwait
2334          *          mov     AH,byte ptr sw+1        // get msb of status word in AH
2335          *          sahf                            // transfer to flags
2336          *          jp      FM1                     // continue till ST < ST1
2337          *          fstp    ST(1)                   // leave remainder on stack
2338          */
2339         code *c1;
2340 
2341         push87(cdb);
2342         cs.Iop = op1;
2343         if (ty1 == TYldouble || ty1 == TYildouble)
2344             cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2345         cdb.gen(&cs);                       // FLD   e.EV.E1
2346 
2347         cdb.gen2(0xD9, 0xF8);               // FPREM
2348         code *cfm1 = cdb.last();
2349         genjmpifC2(cdb, cfm1);              // JC2 FM1
2350         cdb.genf2(0xDD,0xD8 + 1);           // FSTP ST(1)
2351 
2352         pop87();
2353     }
2354     else if (ty1 == TYldouble || ty1 == TYildouble)
2355     {
2356         push87(cdb);
2357         cs.Iop = op1;
2358         cs.Irm |= modregrm(0, 5, 0);        // FLD tbyte ptr ...
2359         cdb.gen(&cs);                       // FLD   e.EV.E1
2360         cdb.genf2(0xDE,opld);               // FopP  ST(1)
2361         pop87();
2362     }
2363     else
2364     {
2365         cs.Iop = op1 & ~1;
2366         cs.Irm |= op;
2367         cdb.gen(&cs);                       // Fop e.EV.E1
2368     }
2369     if (*pretregs & mPSW)
2370         genftst(cdb,e,0);                   // FTST ST0
2371     // if want result in registers
2372     if (*pretregs & (mST0 | ALLREGS | mBP))
2373     {
2374         if (ty1 == TYldouble || ty1 == TYildouble)
2375         {
2376             push87(cdb);
2377             cdb.genf2(0xD9,0xC0);           // FLD ST(0)
2378             pop87();
2379         }
2380         else
2381             op2 = 2;                        // FST e.EV.E1
2382     }
2383     else
2384     {   // FSTP
2385         pop87();
2386     }
2387     cs.Iop = op1;
2388     NEWREG(cs.Irm,op2);                     // FSTx e.EV.E1
2389     freenode(e.EV.E1);
2390     cdb.gen(&cs);
2391     genfwait(cdb);
2392     fixresult87(cdb,e,mST0 | mPSW,pretregs);
2393 }
2394 
2395 /***********************************
2396  * Perform %= where E1 is complex and E2 is real or imaginary.
2397  */
2398 
2399 @trusted
2400 private void opmod_complex87(ref CodeBuilder cdb, elem *e,regm_t *pretregs)
2401 {
2402 
2403     /*          fld     E2
2404                 fld     E1.re
2405         FM1:    fprem
2406                 fstsw   word ptr sw
2407                 fwait
2408                 mov     AH, byte ptr sw+1
2409                 jp      FM1
2410                 fxch    ST(1)
2411                 fld     E1.im
2412         FM2:    fprem
2413                 fstsw   word ptr sw
2414                 fwait
2415                 mov     AH, byte ptr sw+1
2416                 jp      FM2
2417                 fstp    ST(1)
2418      */
2419 
2420     code cs;
2421 
2422     tym_t ty1 = tybasic(e.EV.E1.Ety);
2423     uint sz2 = _tysize[ty1] / 2;
2424 
2425     regm_t retregs = mST0;
2426     codelem(cdb,e.EV.E2,&retregs,false);         // FLD E2
2427     note87(e.EV.E2,0,0);
2428     getlvalue87(cdb,&cs,e.EV.E1,0);
2429     makesure87(cdb,e.EV.E2,0,0,0);
2430 
2431     push87(cdb);
2432     switch (ty1)
2433     {
2434         case TYcdouble:  cs.Iop = ESC(MFdouble,1);      break;
2435         case TYcfloat:   cs.Iop = ESC(MFfloat,1);       break;
2436         case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break;
2437         default:
2438             assert(0);
2439     }
2440     cdb.gen(&cs);                               // FLD E1.re
2441 
2442     cdb.gen2(0xD9, 0xF8);                       // FPREM
2443     code *cfm1 = cdb.last();
2444     genjmpifC2(cdb, cfm1);                      // JC2 FM1
2445     cdb.genf2(0xD9, 0xC8 + 1);                  // FXCH ST(1)
2446 
2447     push87(cdb);
2448     cs.IEV1.Voffset += sz2;
2449     cdb.gen(&cs);                               // FLD E1.im
2450 
2451     cdb.gen2(0xD9, 0xF8);                       // FPREM
2452     code *cfm2 = cdb.last();
2453     genjmpifC2(cdb, cfm2);                      // JC2 FM2
2454     cdb.genf2(0xDD,0xD8 + 1);                   // FSTP ST(1)
2455 
2456     pop87();
2457 
2458     if (*pretregs & (mST01 | mPSW))
2459     {
2460         cs.Irm |= modregrm(0, 2, 0);
2461         cdb.gen(&cs);            // FST mreal.im
2462         cs.IEV1.Voffset -= sz2;
2463         cdb.gen(&cs);            // FST mreal.re
2464         retregs = mST01;
2465     }
2466     else
2467     {
2468         cs.Irm |= modregrm(0, 3, 0);
2469         cdb.gen(&cs);            // FSTP mreal.im
2470         cs.IEV1.Voffset -= sz2;
2471         cdb.gen(&cs);            // FSTP mreal.re
2472         pop87();
2473         pop87();
2474         retregs = 0;
2475     }
2476     freenode(e.EV.E1);
2477     genfwait(cdb);
2478     fixresult_complex87(cdb,e,retregs,pretregs);
2479 }
2480 
2481 /**********************************
2482  * Perform +=, -=, *= and /= for the lvalue being complex.
2483  */
2484 
2485 @trusted
2486 private void opass_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2487 {
2488     regm_t retregs;
2489     regm_t idxregs;
2490     code cs;
2491     uint op;
2492     opcode_t op2;
2493 
2494     tym_t ty1 = tybasic(e.EV.E1.Ety);
2495     uint sz2 = _tysize[ty1] / 2;
2496     switch (e.Eoper)
2497     {
2498         case OPpostinc:
2499         case OPaddass:  op = 0 << 3;            // FADD
2500                         op2 = 0xC0;             // FADDP ST(i),ST
2501                         break;
2502 
2503         case OPpostdec:
2504         case OPminass:  op = 5 << 3;            // FSUBR
2505                         op2 = 0xE0;             // FSUBRP ST(i),ST
2506                         break;
2507 
2508         case OPmulass:  op = 1 << 3;            // FMUL
2509                         op2 = 0xC8;             // FMULP ST(i),ST
2510                         break;
2511 
2512         case OPdivass:  op = 7 << 3;            // FDIVR
2513                         op2 = 0xF0;             // FDIVRP ST(i),ST
2514                         break;
2515 
2516         default:        assert(0);
2517     }
2518 
2519     if (!tycomplex(e.EV.E2.Ety) &&
2520         (e.Eoper == OPmulass || e.Eoper == OPdivass))
2521     {
2522         retregs = mST0;
2523         codelem(cdb,e.EV.E2, &retregs, false);
2524         note87(e.EV.E2, 0, 0);
2525         getlvalue87(cdb,&cs, e.EV.E1, 0);
2526         makesure87(cdb,e.EV.E2,0,0,0);
2527         push87(cdb);
2528         cdb.genf2(0xD9,0xC0);                   // FLD ST(0)
2529         goto L1;
2530     }
2531     else
2532     {
2533         loadComplex(cdb,e.EV.E2);
2534         getlvalue87(cdb,&cs,e.EV.E1,0);
2535         makesure87(cdb,e.EV.E2,sz2,0,0);
2536         makesure87(cdb,e.EV.E2,0,1,0);
2537     }
2538 
2539     switch (e.Eoper)
2540     {
2541         case OPpostinc:
2542         case OPaddass:
2543         case OPpostdec:
2544         case OPminass:
2545         L1:
2546             if (ty1 == TYcldouble)
2547             {
2548                 push87(cdb);
2549                 push87(cdb);
2550                 cs.Iop = 0xDB;
2551                 cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2552                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2553                 cs.IEV1.Voffset += sz2;
2554                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2555                 cdb.genf2(0xDE, op2 + 2);       // FADDP/FSUBRP ST(2),ST
2556                 cdb.genf2(0xDE, op2 + 2);       // FADDP/FSUBRP ST(2),ST
2557                 pop87();
2558                 pop87();
2559                 if (tyimaginary(e.EV.E2.Ety))
2560                 {
2561                     if (e.Eoper == OPmulass)
2562                     {
2563                         cdb.genf2(0xD9, 0xE0);   // FCHS
2564                         cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1)
2565                     }
2566                     else if (e.Eoper == OPdivass)
2567                     {
2568                         cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1)
2569                         cdb.genf2(0xD9, 0xE0);   // FCHS
2570                     }
2571                 }
2572             L2:
2573                 if (*pretregs & (mST01 | mPSW))
2574                 {
2575                     push87(cdb);
2576                     push87(cdb);
2577                     cdb.genf2(0xD9,0xC1);       // FLD ST(1)
2578                     cdb.genf2(0xD9,0xC1);       // FLD ST(1)
2579                     retregs = mST01;
2580                 }
2581                 else
2582                     retregs = 0;
2583                 cs.Iop = 0xDB;
2584                 cs.Irm |= modregrm(0,7,0);
2585                 cdb.gen(&cs);                   // FSTP e.EV.E1.im
2586                 cs.IEV1.Voffset -= sz2;
2587                 cdb.gen(&cs);                   // FSTP e.EV.E1.re
2588                 pop87();
2589                 pop87();
2590 
2591             }
2592             else
2593             {
2594                 ubyte rmop = cast(ubyte)(cs.Irm | op);
2595                 ubyte rmfst = cs.Irm | modregrm(0,2,0);
2596                 ubyte rmfstp = cs.Irm | modregrm(0,3,0);
2597                 ubyte iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2598                 opcode_t iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC;
2599 
2600                 cs.Iop = iop;
2601                 cs.Irm = rmop;
2602                 cs.IEV1.Voffset += sz2;
2603                 cdb.gen(&cs);                           // FSUBR mreal.im
2604                 if (tyimaginary(e.EV.E2.Ety) && (e.Eoper == OPmulass || e.Eoper == OPdivass))
2605                 {
2606                     if (e.Eoper == OPmulass)
2607                         cdb.genf2(0xD9, 0xE0);          // FCHS
2608                     cdb.genf2(0xD9,0xC8 + 1);           // FXCH ST(1)
2609                     cs.IEV1.Voffset -= sz2;
2610                     cdb.gen(&cs);                       // FMUL mreal.re
2611                     if (e.Eoper == OPdivass)
2612                         cdb.genf2(0xD9, 0xE0);          // FCHS
2613                     if (*pretregs & (mST01 | mPSW))
2614                     {
2615                         cs.Iop = iopfst;
2616                         cs.Irm = rmfst;
2617                         cs.IEV1.Voffset += sz2;
2618                         cdb.gen(&cs);                   // FST mreal.im
2619                         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2620                         cs.IEV1.Voffset -= sz2;
2621                         cdb.gen(&cs);                   // FST mreal.re
2622                         cdb.genf2(0xD9,0xC8 + 1);       // FXCH ST(1)
2623                         retregs = mST01;
2624                     }
2625                     else
2626                     {
2627                         cs.Iop = iopfst;
2628                         cs.Irm = rmfstp;
2629                         cs.IEV1.Voffset += sz2;
2630                         cdb.gen(&cs);                   // FSTP mreal.im
2631                         pop87();
2632                         cs.IEV1.Voffset -= sz2;
2633                         cdb.gen(&cs);                   // FSTP mreal.re
2634                         pop87();
2635                         retregs = 0;
2636                     }
2637                     goto L3;
2638                 }
2639 
2640                 if (*pretregs & (mST01 | mPSW))
2641                 {
2642                     cs.Iop = iopfst;
2643                     cs.Irm = rmfst;
2644                     cdb.gen(&cs);               // FST mreal.im
2645                     cdb.genf2(0xD9,0xC8 + 1);   // FXCH ST(1)
2646                     cs.Iop = iop;
2647                     cs.Irm = rmop;
2648                     cs.IEV1.Voffset -= sz2;
2649                     cdb.gen(&cs);               // FSUBR mreal.re
2650                     cs.Iop = iopfst;
2651                     cs.Irm = rmfst;
2652                     cdb.gen(&cs);               // FST mreal.re
2653                     cdb.genf2(0xD9,0xC8 + 1);   // FXCH ST(1)
2654                     retregs = mST01;
2655                 }
2656                 else
2657                 {
2658                     cs.Iop = iopfst;
2659                     cs.Irm = rmfstp;
2660                     cdb.gen(&cs);               // FSTP mreal.im
2661                     pop87();
2662                     cs.Iop = iop;
2663                     cs.Irm = rmop;
2664                     cs.IEV1.Voffset -= sz2;
2665                     cdb.gen(&cs);               // FSUBR mreal.re
2666                     cs.Iop = iopfst;
2667                     cs.Irm = rmfstp;
2668                     cdb.gen(&cs);               // FSTP mreal.re
2669                     pop87();
2670                     retregs = 0;
2671                 }
2672             }
2673         L3:
2674             freenode(e.EV.E1);
2675             genfwait(cdb);
2676             fixresult_complex87(cdb,e,retregs,pretregs);
2677             return;
2678 
2679         case OPmulass:
2680             push87(cdb);
2681             push87(cdb);
2682             if (ty1 == TYcldouble)
2683             {
2684                 cs.Iop = 0xDB;
2685                 cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2686                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2687                 cs.IEV1.Voffset += sz2;
2688                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2689                 retregs = mST01;
2690                 callclib(cdb, e, CLIB.cmul, &retregs, 0);
2691                 goto L2;
2692             }
2693             else
2694             {
2695                 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2696                 cs.Irm |= modregrm(0, 0, 0);    // FLD tbyte ptr ...
2697                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2698                 cs.IEV1.Voffset += sz2;
2699                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2700                 retregs = mST01;
2701                 callclib(cdb, e, CLIB.cmul, &retregs, 0);
2702                 if (*pretregs & (mST01 | mPSW))
2703                 {
2704                     cs.Irm |= modregrm(0, 2, 0);
2705                     cdb.gen(&cs);               // FST mreal.im
2706                     cs.IEV1.Voffset -= sz2;
2707                     cdb.gen(&cs);               // FST mreal.re
2708                     retregs = mST01;
2709                 }
2710                 else
2711                 {
2712                     cs.Irm |= modregrm(0, 3, 0);
2713                     cdb.gen(&cs);               // FSTP mreal.im
2714                     cs.IEV1.Voffset -= sz2;
2715                     cdb.gen(&cs);               // FSTP mreal.re
2716                     pop87();
2717                     pop87();
2718                     retregs = 0;
2719                 }
2720                 goto L3;
2721             }
2722 
2723         case OPdivass:
2724             push87(cdb);
2725             push87(cdb);
2726             idxregs = idxregm(&cs);             // mask of index regs used
2727             if (ty1 == TYcldouble)
2728             {
2729                 cs.Iop = 0xDB;
2730                 cs.Irm |= modregrm(0, 5, 0);    // FLD tbyte ptr ...
2731                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2732                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2733                 cs.IEV1.Voffset += sz2;
2734                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2735                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2736                 retregs = mST01;
2737                 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs);
2738                 goto L2;
2739             }
2740             else
2741             {
2742                 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
2743                 cs.Irm |= modregrm(0, 0, 0);    // FLD tbyte ptr ...
2744                 cdb.gen(&cs);                   // FLD e.EV.E1.re
2745                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2746                 cs.IEV1.Voffset += sz2;
2747                 cdb.gen(&cs);                   // FLD e.EV.E1.im
2748                 cdb.genf2(0xD9,0xC8 + 2);       // FXCH ST(2)
2749                 retregs = mST01;
2750                 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs);
2751                 if (*pretregs & (mST01 | mPSW))
2752                 {
2753                     cs.Irm |= modregrm(0, 2, 0);
2754                     cdb.gen(&cs);               // FST mreal.im
2755                     cs.IEV1.Voffset -= sz2;
2756                     cdb.gen(&cs);               // FST mreal.re
2757                     retregs = mST01;
2758                 }
2759                 else
2760                 {
2761                     cs.Irm |= modregrm(0, 3, 0);
2762                     cdb.gen(&cs);               // FSTP mreal.im
2763                     cs.IEV1.Voffset -= sz2;
2764                     cdb.gen(&cs);               // FSTP mreal.re
2765                     pop87();
2766                     pop87();
2767                     retregs = 0;
2768                 }
2769                 goto L3;
2770             }
2771 
2772         default:
2773             assert(0);
2774     }
2775 }
2776 
2777 /**************************
2778  * OPnegass
2779  */
2780 
2781 @trusted
2782 void cdnegass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2783 {
2784     regm_t retregs;
2785     uint op;
2786 
2787     //printf("cdnegass87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2788     elem *e1 = e.EV.E1;
2789     tym_t tyml = tybasic(e1.Ety);            // type of lvalue
2790     int sz = _tysize[tyml];
2791 
2792     code cs;
2793     getlvalue87(cdb,&cs,e1,0);
2794 
2795     /* If the EA is really an XMM register, modEA() will fail.
2796      * So disallow putting e1 into a register.
2797      * A better way would be to negate the XMM register in place.
2798      */
2799     if (e1.Eoper == OPvar)
2800         e1.EV.Vsym.Sflags &= ~GTregcand;
2801 
2802     modEA(cdb,&cs);
2803     cs.Irm |= modregrm(0,6,0);
2804     cs.Iop = 0x80;
2805     if (tysize(TYldouble) > 10)
2806     {
2807         if (tyml == TYldouble || tyml == TYildouble)
2808             cs.IEV1.Voffset += 10 - 1;
2809         else if (tyml == TYcldouble)
2810             cs.IEV1.Voffset += tysize(TYldouble) + 10 - 1;
2811         else
2812             cs.IEV1.Voffset += sz - 1;
2813     }
2814     else
2815         cs.IEV1.Voffset += sz - 1;
2816     cs.IFL2 = FLconst;
2817     cs.IEV2.Vuns = 0x80;
2818     cdb.gen(&cs);                       // XOR 7[EA],0x80
2819     if (tycomplex(tyml))
2820     {
2821         cs.IEV1.Voffset -= sz / 2;
2822         cdb.gen(&cs);                   // XOR 7[EA],0x80
2823     }
2824 
2825     if (*pretregs)
2826     {
2827         switch (tyml)
2828         {
2829             case TYifloat:
2830             case TYfloat:               cs.Iop = 0xD9;  op = 0; break;
2831             case TYidouble:
2832             case TYdouble:
2833             case TYdouble_alias:        cs.Iop = 0xDD;  op = 0; break;
2834             case TYildouble:
2835             case TYldouble:             cs.Iop = 0xDB;  op = 5; break;
2836             default:
2837                 assert(0);
2838         }
2839         NEWREG(cs.Irm,op);
2840         cs.IEV1.Voffset -= sz - 1;
2841         push87(cdb);
2842         cdb.gen(&cs);                   // FLD EA
2843         retregs = mST0;
2844     }
2845     else
2846         retregs = 0;
2847 
2848     freenode(e1);
2849     fixresult87(cdb,e,retregs,pretregs);
2850 }
2851 
2852 /************************
2853  * Take care of OPpostinc and OPpostdec.
2854  */
2855 
2856 @trusted
2857 void post87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2858 {
2859     uint op;
2860     opcode_t op1;
2861     reg_t reg;
2862 
2863     //printf("post87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
2864     code cs;
2865     assert(*pretregs);
2866     getlvalue87(cdb,&cs,e.EV.E1,0);
2867     tym_t ty1 = tybasic(e.EV.E1.Ety);
2868     switch (ty1)
2869     {
2870         case TYdouble_alias:
2871         case TYidouble:
2872         case TYdouble:
2873         case TYcdouble:     op1 = ESC(MFdouble,1);  reg = 0;        break;
2874         case TYifloat:
2875         case TYfloat:
2876         case TYcfloat:      op1 = ESC(MFfloat,1);   reg = 0;        break;
2877         case TYildouble:
2878         case TYldouble:
2879         case TYcldouble:    op1 = 0xDB;             reg = 5;        break;
2880         default:
2881             assert(0);
2882     }
2883     NEWREG(cs.Irm, reg);
2884     if (reg == 5)
2885         reg = 7;
2886     else
2887         reg = 3;
2888     cs.Iop = op1;
2889     push87(cdb);
2890     cdb.gen(&cs);                   // FLD e.EV.E1
2891     if (tycomplex(ty1))
2892     {
2893         uint sz = _tysize[ty1] / 2;
2894 
2895         push87(cdb);
2896         cs.IEV1.Voffset += sz;
2897         cdb.gen(&cs);               // FLD e.EV.E1
2898         regm_t retregs = mST0;      // note kludge to only load real part
2899         codelem(cdb,e.EV.E2,&retregs,false); // load rvalue
2900         cdb.genf2(0xD8,             // FADD/FSUBR ST,ST2
2901             (e.Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2);
2902         NEWREG(cs.Irm,reg);
2903         pop87();
2904         cs.IEV1.Voffset -= sz;
2905         cdb.gen(&cs);               // FSTP e.EV.E1
2906         genfwait(cdb);
2907         freenode(e.EV.E1);
2908         fixresult_complex87(cdb, e, mST01, pretregs);
2909         return;
2910     }
2911 
2912     if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS))
2913     {   // Want the result in a register
2914         push87(cdb);
2915         cdb.genf2(0xD9,0xC0);       // FLD ST0
2916     }
2917     if (*pretregs & mPSW)           // if result in flags
2918         genftst(cdb,e,0);           // FTST ST0
2919     regm_t retregs = mST0;
2920     codelem(cdb,e.EV.E2,&retregs,false);    // load rvalue
2921     pop87();
2922     op = (e.Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1);
2923     cdb.genf2(0xDE,op);             // FADDP/FSUBRP ST1
2924     NEWREG(cs.Irm,reg);
2925     pop87();
2926     cdb.gen(&cs);                   // FSTP e.EV.E1
2927     genfwait(cdb);
2928     freenode(e.EV.E1);
2929     fixresult87(cdb,e,mPSW | mST0,pretregs);
2930 }
2931 
2932 /************************
2933  * Do the following opcodes:
2934  *      OPd_u64
2935  *      OPld_u64
2936  */
2937 void cdd_u64(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2938 {
2939     assert(I32 || I64);
2940     assert(*pretregs);
2941     if (I32)
2942         cdd_u64_I32(cdb, e, pretregs);
2943     else
2944         cdd_u64_I64(cdb, e, pretregs);
2945 }
2946 
2947 @trusted
2948 private void cdd_u64_I32(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2949 {
2950     /* Generate:
2951             mov         EDX,0x8000_0000
2952             mov         floatreg+0,0
2953             mov         floatreg+4,EDX
2954             mov         floatreg+8,0x0FBF403e       // (roundTo0<<16) | adjust
2955             fld         real ptr floatreg           // adjust (= 1/real.epsilon)
2956             fcomp
2957             fstsw       AX
2958             fstcw       floatreg+12
2959             fldcw       floatreg+10                 // roundTo0
2960             test        AH,1
2961             jz          L1                          // jae L1
2962 
2963             fld         real ptr floatreg           // adjust
2964             fsubp       ST(1), ST
2965             fistp       floatreg
2966             mov         EAX,floatreg
2967             add         EDX,floatreg+4
2968             fldcw       floatreg+12
2969             jmp         L2
2970 
2971     L1:
2972             fistp       floatreg
2973             mov         EAX,floatreg
2974             mov         EDX,floatreg+4
2975             fldcw       floatreg+12
2976     L2:
2977      */
2978     regm_t retregs = mST0;
2979     codelem(cdb,e.EV.E1, &retregs, false);
2980     tym_t tym = e.Ety;
2981     retregs = *pretregs;
2982     if (!retregs)
2983         retregs = ALLREGS;
2984     reg_t reg, reg2;
2985     allocreg(cdb,&retregs,&reg,tym);
2986     reg  = findreglsw(retregs);
2987     reg2 = findregmsw(retregs);
2988     movregconst(cdb,reg2,0x80000000,0);
2989     getregs(cdb,mask(reg2) | mAX);
2990 
2991     cdb.genfltreg(0xC7,0,0);
2992     code *cf1 = cdb.last();
2993     cf1.IFL2 = FLconst;
2994     cf1.IEV2.Vint = 0;                             // MOV floatreg+0,0
2995     cdb.genfltreg(STO,reg2,4);                      // MOV floatreg+4,EDX
2996     cdb.genfltreg(0xC7,0,8);
2997     code *cf3 = cdb.last();
2998     cf3.IFL2 = FLconst;
2999     cf3.IEV2.Vint = 0xFBF403E;                     // MOV floatreg+8,(roundTo0<<16)|adjust
3000 
3001     push87(cdb);
3002     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
3003     cdb.gen2(0xD8,0xD9);                            // FCOMP
3004     pop87();
3005     cdb.gen2(0xDF,0xE0);                            // FSTSW AX
3006     cdb.genfltreg(0xD9,7,12);                       // FSTCW floatreg+12
3007     cdb.genfltreg(0xD9,5,10);                       // FLDCW floatreg+10
3008     cdb.genc2(0xF6,modregrm(3,0,4),1);              // TEST AH,1
3009     code *cnop1 = gennop(null);
3010     genjmp(cdb,JE,FLcode,cast(block *)cnop1);       // JZ L1
3011 
3012     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
3013     cdb.genf2(0xDE,0xE8+1);                         // FSUBP ST(1),ST
3014     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3015     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3016     cdb.genfltreg(0x03,reg2,4);                     // ADD reg,floatreg+4
3017     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3018     code *cnop2 = gennop(null);
3019     genjmp(cdb,JMP,FLcode,cast(block *)cnop2);      // JMP L2
3020 
3021     cdb.append(cnop1);
3022     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3023     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3024     cdb.genfltreg(LOD,reg2,4);                      // MOV reg,floatreg+4
3025     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3026     cdb.append(cnop2);
3027 
3028     pop87();
3029     fixresult(cdb,e,retregs,pretregs);
3030 }
3031 
3032 @trusted
3033 private void cdd_u64_I64(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3034 {
3035     /* Generate:
3036             mov         EDX,0x8000_0000
3037             mov         floatreg+0,0
3038             mov         floatreg+4,EDX
3039             mov         floatreg+8,0x0FBF403e       // (roundTo0<<16) | adjust
3040             fld         real ptr floatreg           // adjust
3041             fcomp
3042             fstsw       AX
3043             fstcw       floatreg+12
3044             fldcw       floatreg+10                 // roundTo0
3045             test        AH,1
3046             jz          L1                          // jae L1
3047 
3048             fld         real ptr floatreg           // adjust
3049             fsubp       ST(1), ST
3050             fistp       floatreg
3051             mov         RAX,floatreg
3052             shl         RDX,32
3053             add         RAX,RDX
3054             fldcw       floatreg+12
3055             jmp         L2
3056 
3057     L1:
3058             fistp       floatreg
3059             mov         RAX,floatreg
3060             fldcw       floatreg+12
3061     L2:
3062      */
3063     regm_t retregs = mST0;
3064     codelem(cdb,e.EV.E1, &retregs, false);
3065     tym_t tym = e.Ety;
3066     retregs = *pretregs;
3067     if (!retregs)
3068         retregs = ALLREGS;
3069     reg_t reg;
3070     allocreg(cdb,&retregs,&reg,tym);
3071     regm_t regm2 = ALLREGS & ~retregs & ~mAX;
3072     reg_t reg2;
3073     allocreg(cdb,&regm2,&reg2,tym);
3074     movregconst(cdb,reg2,0x80000000,0);
3075     getregs(cdb,mask(reg2) | mAX);
3076 
3077     cdb.genfltreg(0xC7,0,0);
3078     code *cf1 = cdb.last();
3079     cf1.IFL2 = FLconst;
3080     cf1.IEV2.Vint = 0;                             // MOV floatreg+0,0
3081     cdb.genfltreg(STO,reg2,4);                      // MOV floatreg+4,EDX
3082     cdb.genfltreg(0xC7,0,8);
3083     code *cf3 = cdb.last();
3084     cf3.IFL2 = FLconst;
3085     cf3.IEV2.Vint = 0xFBF403E;                     // MOV floatreg+8,(roundTo0<<16)|adjust
3086 
3087     push87(cdb);
3088     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
3089     cdb.gen2(0xD8,0xD9);                            // FCOMP
3090     pop87();
3091     cdb.gen2(0xDF,0xE0);                            // FSTSW AX
3092     cdb.genfltreg(0xD9,7,12);                       // FSTCW floatreg+12
3093     cdb.genfltreg(0xD9,5,10);                       // FLDCW floatreg+10
3094     cdb.genc2(0xF6,modregrm(3,0,4),1);              // TEST AH,1
3095     code *cnop1 = gennop(null);
3096     genjmp(cdb,JE,FLcode,cast(block *)cnop1);       // JZ L1
3097 
3098     cdb.genfltreg(0xDB,5,0);                        // FLD real ptr floatreg
3099     cdb.genf2(0xDE,0xE8+1);                         // FSUBP ST(1),ST
3100     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3101     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3102     code_orrex(cdb.last(), REX_W);
3103     cdb.genc2(0xC1,(REX_W << 16) | modregrmx(3,4,reg2),32); // SHL reg2,32
3104     cdb.gen2(0x03,(REX_W << 16) | modregxrmx(3,reg,reg2));  // ADD reg,reg2
3105     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3106     code *cnop2 = gennop(null);
3107     genjmp(cdb,JMP,FLcode,cast(block *)cnop2);      // JMP L2
3108 
3109     cdb.append(cnop1);
3110     cdb.genfltreg(0xDF,7,0);                        // FISTP dword ptr floatreg
3111     cdb.genfltreg(LOD,reg,0);                       // MOV reg,floatreg
3112     code_orrex(cdb.last(), REX_W);
3113     cdb.genfltreg(0xD9,5,12);                       // FLDCW floatreg+12
3114     cdb.append(cnop2);
3115 
3116     pop87();
3117     fixresult(cdb,e,retregs,pretregs);
3118 }
3119 
3120 /************************
3121  * Do the following opcodes:
3122  *      OPd_u32
3123  */
3124 @trusted
3125 void cdd_u32(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3126 {
3127     assert(I32 || I64);
3128 
3129     /* Generate:
3130             mov         floatreg+8,0x0FBF0000   // (roundTo0<<16)
3131             fstcw       floatreg+12
3132             fldcw       floatreg+10             // roundTo0
3133             fistp       floatreg
3134             fldcw       floatreg+12
3135             mov         EAX,floatreg
3136      */
3137     regm_t retregs = mST0;
3138     codelem(cdb,e.EV.E1, &retregs, false);
3139     tym_t tym = e.Ety;
3140     retregs = *pretregs & ALLREGS;
3141     if (!retregs)
3142         retregs = ALLREGS;
3143     reg_t reg;
3144     allocreg(cdb,&retregs,&reg,tym);
3145 
3146     cdb.genfltreg(0xC7,0,8);
3147     code *cf3 = cdb.last();
3148     cf3.IFL2 = FLconst;
3149     cf3.IEV2.Vint = 0x0FBF0000;                 // MOV floatreg+8,(roundTo0<<16)
3150 
3151     cdb.genfltreg(0xD9,7,12);                    // FSTCW floatreg+12
3152     cdb.genfltreg(0xD9,5,10);                    // FLDCW floatreg+10
3153 
3154     cdb.genfltreg(0xDF,7,0);                     // FISTP dword ptr floatreg
3155     cdb.genfltreg(0xD9,5,12);                    // FLDCW floatreg+12
3156     cdb.genfltreg(LOD,reg,0);                    // MOV reg,floatreg
3157 
3158     pop87();
3159     fixresult(cdb,e,retregs,pretregs);
3160 }
3161 
3162 /************************
3163  * Do the following opcodes:
3164  *      OPd_s16
3165  *      OPd_s32
3166  *      OPd_u16
3167  *      OPd_s64
3168  */
3169 
3170 @trusted
3171 void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3172 {
3173     regm_t retregs;
3174     uint mf,rf;
3175     reg_t reg;
3176     int clib;
3177 
3178     //printf("cnvt87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3179     assert(*pretregs);
3180     tym_t tym = e.Ety;
3181     int sz = tysize(tym);
3182     int szoff = sz;
3183 
3184     switch (e.Eoper)
3185     {
3186         case OPd_s16:
3187             clib = CLIB.dblint87;
3188             mf = ESC(MFword,1);
3189             rf = 3;
3190             break;
3191 
3192         case OPd_u16:
3193             szoff = 4;
3194             goto case OPd_s32;
3195 
3196         case OPd_s32:
3197             clib = CLIB.dbllng87;
3198             mf = ESC(MFlong,1);
3199             rf = 3;
3200             break;
3201 
3202         case OPd_s64:
3203             clib = CLIB.dblllng;
3204             mf = 0xDF;
3205             rf = 7;
3206             break;
3207 
3208         default:
3209             assert(0);
3210     }
3211 
3212     if (I16)                       // C may change the default control word
3213     {
3214         if (clib == CLIB.dblllng)
3215         {   retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
3216             codelem(cdb,e.EV.E1,&retregs,false);
3217             callclib(cdb,e,clib,pretregs,0);
3218         }
3219         else
3220         {   retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
3221             codelem(cdb,e.EV.E1,&retregs,false);
3222             callclib(cdb,e,clib,pretregs,0);
3223             pop87();
3224         }
3225     }
3226     else if (1)
3227     {   //  Generate:
3228         //  sub     ESP,12
3229         //  fstcw   8[ESP]
3230         //  fldcw   roundto0
3231         //  fistp   long64 ptr [ESP]
3232         //  fldcw   8[ESP]
3233         //  pop     lsw
3234         //  pop     msw
3235         //  add     ESP,4
3236 
3237         uint szpush = szoff + 2;
3238         if (config.flags3 & CFG3pic)
3239             szpush += 2;
3240         szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1);
3241 
3242         retregs = mST0;
3243         codelem(cdb,e.EV.E1,&retregs,false);
3244 
3245         if (szpush == REGSIZE)
3246             cdb.gen1(0x50 + AX);                // PUSH EAX
3247         else
3248             cod3_stackadj(cdb, szpush);
3249         genfwait(cdb);
3250         cdb.genc1(0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP]
3251 
3252         genfwait(cdb);
3253 
3254         if (config.flags3 & CFG3pic)
3255         {
3256             cdb.genc(0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0
3257             code_orflag(cdb.last(), CFopsize);
3258             cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP]
3259         }
3260         else
3261             genrnd(cdb, CW_roundto0);   // FLDCW roundto0
3262 
3263         pop87();
3264 
3265         genfwait(cdb);
3266         cdb.gen2sib(mf,modregrm(0,rf,4),modregrm(0,4,SP));                   // FISTP [ESP]
3267 
3268         retregs = *pretregs & (ALLREGS | mBP);
3269         if (!retregs)
3270                 retregs = ALLREGS;
3271         allocreg(cdb,&retregs,&reg,tym);
3272 
3273         genfwait(cdb);                                           // FWAIT
3274         cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP]
3275 
3276         if (szoff > REGSIZE)
3277         {   szpush -= REGSIZE;
3278             genpop(cdb,findreglsw(retregs));       // POP lsw
3279         }
3280         szpush -= REGSIZE;
3281         genpop(cdb,reg);                           // POP reg
3282 
3283         if (szpush)
3284             cod3_stackadj(cdb, -szpush);
3285         fixresult(cdb,e,retregs,pretregs);
3286     }
3287     else
3288     {
3289         // This is incorrect. For -inf and nan, the 8087 returns the largest
3290         // negative int (0x80000....). For -inf, 0x7FFFF... should be returned,
3291         // and for nan, 0 should be returned.
3292         retregs = mST0;
3293         codelem(cdb,e.EV.E1,&retregs,false);
3294 
3295         genfwait(cdb);
3296         genrnd(cdb, CW_roundto0);                  // FLDCW roundto0
3297 
3298         pop87();
3299         cdb.genfltreg(mf,rf,0);                    // FISTP floatreg
3300         retregs = *pretregs & (ALLREGS | mBP);
3301         if (!retregs)
3302                 retregs = ALLREGS;
3303         allocreg(cdb,&retregs,&reg,tym);
3304 
3305         genfwait(cdb);
3306 
3307         if (sz > REGSIZE)
3308         {
3309             cdb.genfltreg(LOD,reg,REGSIZE);          // MOV reg,floatreg + REGSIZE
3310                                                      // MOV lsreg,floatreg
3311             cdb.genfltreg(LOD,findreglsw(retregs),0);
3312         }
3313         else
3314             cdb.genfltreg(LOD,reg,0);                // MOV reg,floatreg
3315         genrnd(cdb, CW_roundtonearest);              // FLDCW roundtonearest
3316         fixresult(cdb,e,retregs,pretregs);
3317     }
3318 }
3319 
3320 /************************
3321  * Do OPrndtol.
3322  */
3323 
3324 @trusted
3325 void cdrndtol(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3326 {
3327     if (*pretregs == 0)
3328     {
3329         codelem(cdb,e.EV.E1,pretregs,false);
3330         return;
3331     }
3332     regm_t retregs = mST0;
3333     codelem(cdb,e.EV.E1,&retregs,false);
3334 
3335     ubyte op1,op2;
3336     tym_t tym = e.Ety;
3337     uint sz = tysize(tym);
3338     switch (sz)
3339     {   case 2:
3340             op1 = 0xDF;
3341             op2 = 3;
3342             break;
3343         case 4:
3344             op1 = 0xDB;
3345             op2 = 3;
3346             break;
3347         case 8:
3348             op1 = 0xDF;
3349             op2 = 7;
3350             break;
3351         default:
3352             assert(0);
3353     }
3354 
3355     pop87();
3356     cdb.genfltreg(op1,op2,0);           // FISTP floatreg
3357     retregs = *pretregs & (ALLREGS | mBP);
3358     if (!retregs)
3359         retregs = ALLREGS;
3360     reg_t reg;
3361     allocreg(cdb,&retregs,&reg,tym);
3362     genfwait(cdb);                      // FWAIT
3363     if (tysize(tym) > REGSIZE)
3364     {
3365         cdb.genfltreg(LOD,reg,REGSIZE);             // MOV reg,floatreg + REGSIZE
3366                                                     // MOV lsreg,floatreg
3367         cdb.genfltreg(LOD,findreglsw(retregs),0);
3368     }
3369     else
3370     {
3371         cdb.genfltreg(LOD,reg,0);       // MOV reg,floatreg
3372         if (tysize(tym) == 8 && I64)
3373             code_orrex(cdb.last(), REX_W);
3374     }
3375     fixresult(cdb,e,retregs,pretregs);
3376 }
3377 
3378 /*************************
3379  * Do OPscale, OPyl2x, OPyl2xp1.
3380  */
3381 
3382 @trusted
3383 void cdscale(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3384 {
3385     assert(*pretregs != 0);
3386 
3387     regm_t retregs = mST0;
3388     codelem(cdb,e.EV.E1,&retregs,false);
3389     note87(e.EV.E1,0,0);
3390     codelem(cdb,e.EV.E2,&retregs,false);
3391     makesure87(cdb,e.EV.E1,0,1,0);       // now have x,y on stack; need y,x
3392     switch (e.Eoper)
3393     {
3394         case OPscale:
3395             cdb.genf2(0xD9,0xFD);                   // FSCALE
3396             cdb.genf2(0xDD,0xD8 + 1);                    // FSTP ST(1)
3397             break;
3398 
3399         case OPyl2x:
3400             cdb.genf2(0xD9,0xF1);                   // FYL2X
3401             break;
3402 
3403         case OPyl2xp1:
3404             cdb.genf2(0xD9,0xF9);                   // FYL2XP1
3405             break;
3406 
3407         default:
3408             assert(0);
3409     }
3410     pop87();
3411     fixresult87(cdb,e,mST0,pretregs);
3412 }
3413 
3414 
3415 /**********************************
3416  * Unary -, absolute value, square root, sine, cosine
3417  */
3418 
3419 @trusted
3420 void neg87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3421 {
3422     //printf("neg87()\n");
3423 
3424     assert(*pretregs);
3425     opcode_t op;
3426     switch (e.Eoper)
3427     {   case OPneg:  op = 0xE0;     break;
3428         case OPabs:  op = 0xE1;     break;
3429         case OPsqrt: op = 0xFA;     break;
3430         case OPsin:  op = 0xFE;     break;
3431         case OPcos:  op = 0xFF;     break;
3432         case OPrint: op = 0xFC;     break;  // FRNDINT
3433         default:
3434             assert(0);
3435     }
3436     regm_t retregs = mST0;
3437     codelem(cdb,e.EV.E1,&retregs,false);
3438     cdb.genf2(0xD9,op);                 // FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT
3439     fixresult87(cdb,e,mST0,pretregs);
3440 }
3441 
3442 /**********************************
3443  * Unary - for complex operands
3444  */
3445 
3446 @trusted
3447 void neg_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3448 {
3449     assert(e.Eoper == OPneg);
3450     regm_t retregs = mST01;
3451     codelem(cdb,e.EV.E1,&retregs,false);
3452     cdb.genf2(0xD9,0xE0);           // FCHS
3453     cdb.genf2(0xD9,0xC8 + 1);            // FXCH ST(1)
3454     cdb.genf2(0xD9,0xE0);                // FCHS
3455     cdb.genf2(0xD9,0xC8 + 1);            // FXCH ST(1)
3456     fixresult_complex87(cdb,e,mST01,pretregs);
3457 }
3458 
3459 /*********************************
3460  */
3461 
3462 @trusted
3463 void cdind87(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
3464 {
3465     //printf("cdind87(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
3466     code cs;
3467 
3468     getlvalue87(cdb,&cs,e,0);           // get addressing mode
3469     if (*pretregs)
3470     {
3471         switch (tybasic(e.Ety))
3472         {   case TYfloat:
3473             case TYifloat:
3474                 cs.Iop = 0xD9;
3475                 break;
3476 
3477             case TYidouble:
3478             case TYdouble:
3479             case TYdouble_alias:
3480                 cs.Iop = 0xDD;
3481                 break;
3482 
3483             case TYildouble:
3484             case TYldouble:
3485                 cs.Iop = 0xDB;
3486                 cs.Irm |= modregrm(0,5,0);
3487                 break;
3488 
3489             default:
3490                 assert(0);
3491         }
3492         push87(cdb);
3493         cdb.gen(&cs);                 // FLD EA
3494         fixresult87(cdb,e,mST0,pretregs);
3495     }
3496 }
3497 
3498 /************************************
3499  * Reset statics for another .obj file.
3500  */
3501 
3502 @trusted
3503 void cg87_reset()
3504 {
3505     memset(&oldd,0,oldd.sizeof);
3506 }
3507 
3508 
3509 /*****************************************
3510  * Initialize control word constants.
3511  */
3512 
3513 @trusted
3514 private void genrnd(ref CodeBuilder cdb, short cw)
3515 {
3516     if (config.flags3 & CFG3pic)
3517     {
3518         cdb.genfltreg(0xC7, 0, 0);       // MOV floatreg, cw
3519         code *c1 = cdb.last();
3520         c1.IFL2 = FLconst;
3521         c1.IEV2.Vuns = cw;
3522 
3523         cdb.genfltreg(0xD9, 5, 0);         // FLDCW floatreg
3524     }
3525     else
3526     {
3527         if (!oldd.round)                // if not initialized
3528         {
3529             short cwi;
3530 
3531             oldd.round = 1;
3532 
3533             cwi = CW_roundto0;          // round to 0
3534             oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2);
3535             cwi = CW_roundtonearest;            // round to nearest
3536             oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2);
3537         }
3538         Symbol *rnddir = (cw == CW_roundto0) ? oldd.roundto0 : oldd.roundtonearest;
3539         code cs;
3540         cs.Iop = 0xD9;
3541         cs.Iflags = CFoff;
3542         cs.Irex = 0;
3543         cs.IEV1.Vsym = rnddir;
3544         cs.IFL1 = rnddir.Sfl;
3545         cs.IEV1.Voffset = 0;
3546         cs.Irm = modregrm(0,5,BPRM);
3547         cdb.gen(&cs);
3548     }
3549 }
3550 
3551 /************************* Complex Numbers *********************/
3552 
3553 /***************************
3554  * Set the PSW based on the state of ST01.
3555  * Input:
3556  *      pop     if stack should be popped after test
3557  */
3558 
3559 @trusted
3560 private void genctst(ref CodeBuilder cdb,elem *e,int pop)
3561 {
3562     assert(pop == 0 || pop == 1);
3563 
3564     // Generate:
3565     //  if (NOSAHF && pop)
3566     //          FLDZ
3567     //          FUCOMIP
3568     //          JNE     L1
3569     //          JP      L1              // if NAN
3570     //          FLDZ
3571     //          FUCOMIP ST(2)
3572     //      L1:
3573     //        if (pop)
3574     //          FPOP
3575     //          FPOP
3576     //  if (pop)
3577     //          FLDZ
3578     //          FUCOMPP
3579     //          FSTSW   AX
3580     //          SAHF
3581     //          FLDZ
3582     //          FUCOMPP
3583     //          JNE     L1
3584     //          JP      L1              // if NAN
3585     //          FSTSW   AX
3586     //          SAHF
3587     //      L1:
3588     //  else
3589     //          FLDZ
3590     //          FUCOM
3591     //          FSTSW   AX
3592     //          SAHF
3593     //          FUCOMP  ST(2)
3594     //          JNE     L1
3595     //          JP      L1              // if NAN
3596     //          FSTSW   AX
3597     //          SAHF
3598     //      L1:
3599     // FUCOMP doesn't raise exceptions on QNANs, unlike FTST
3600 
3601     CodeBuilder cdbnop;
3602     cdbnop.ctor();
3603     cdbnop.gennop();
3604     code *cnop = cdbnop.peek();
3605     push87(cdb);
3606     cdb.gen2(0xD9,0xEE);                       // FLDZ
3607     if (NOSAHF)
3608     {
3609         cdb.gen2(0xDF,0xE9);                   // FUCOMIP
3610         pop87();
3611         genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE     L1
3612         genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP      L1
3613         cdb.gen2(0xD9,0xEE);                   // FLDZ
3614         cdb.gen2(0xDF,0xEA);                   // FUCOMIP ST(2)
3615         if (pop)
3616         {
3617             cdbnop.genf2(0xDD,modregrm(3,3,0));  // FPOP
3618             cdbnop.genf2(0xDD,modregrm(3,3,0));  // FPOP
3619             pop87();
3620             pop87();
3621         }
3622     }
3623     else if (pop)
3624     {
3625         cdb.gen2(0xDA,0xE9);                   // FUCOMPP
3626         pop87();
3627         pop87();
3628         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3629         cdb.gen2(0xD9,0xEE);                   // FLDZ
3630         cdb.gen2(0xDA,0xE9);                   // FUCOMPP
3631         pop87();
3632         genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE     L1
3633         genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP      L1
3634         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3635     }
3636     else
3637     {
3638         cdb.gen2(0xDD,0xE1);                   // FUCOM
3639         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3640         cdb.gen2(0xDD,0xEA);                   // FUCOMP ST(2)
3641         pop87();
3642         genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE     L1
3643         genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP      L1
3644         cg87_87topsw(cdb);                     // put 8087 flags in CPU flags
3645     }
3646     cdb.append(cdbnop);
3647 }
3648 
3649 /******************************
3650  * Given the result of an expression is in retregs,
3651  * generate necessary code to return result in *pretregs.
3652  */
3653 
3654 @trusted
3655 void fixresult_complex87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs, bool isReturnValue = false)
3656 {
3657     static if (0)
3658     {
3659         printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n",
3660             e,regm_str(retregs),regm_str(*pretregs));
3661     }
3662 
3663     assert(!*pretregs || retregs);
3664     tym_t tym = tybasic(e.Ety);
3665     uint sz = _tysize[tym];
3666 
3667     if (isReturnValue)
3668     {
3669         // In loadComplex and complex_eq87, complex numbers have the real part
3670         // pushed to the FPU stack first (ST1), then the imaginary part (ST0).
3671         // However, the Intel 64 bit ABI scheme requires that types classified
3672         // as complex x87 instead have the real part returned in ST0, and the
3673         // imaginary part in ST1.
3674         if (retregs == mST01 && I64 && (config.exe & EX_posix))
3675             cdb.genf2(0xD9, 0xC8 + 1);          // FXCH ST(1)
3676     }
3677 
3678     if (*pretregs == 0 && retregs == mST01)
3679     {
3680         cdb.genf2(0xDD,modregrm(3,3,0));        // FPOP
3681         pop87();
3682         cdb.genf2(0xDD,modregrm(3,3,0));        // FPOP
3683         pop87();
3684     }
3685     else if (tym == TYllong)
3686     {
3687         // passing cfloat through register for I64
3688         assert(retregs & mST01, "this float expression is not implemented");
3689         pop87();
3690         cdb.genfltreg(ESC(MFfloat,1),BX,4);     // FSTP floatreg
3691         pop87();
3692         cdb.genfltreg(ESC(MFfloat,1),BX,0);     // FSTP floatreg+4
3693         genfwait(cdb);
3694         const reg = findreg(*pretregs);
3695         getregs(cdb,reg);
3696         cdb.genfltreg(LOD, reg, 0);             // MOV ECX,floatreg
3697         code_orrex(cdb.last(), REX_W);          // extend to RCX
3698     }
3699     else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01)
3700     {
3701         if (*pretregs & mPSW && !(retregs & mPSW))
3702             genctst(cdb,e,0);                   // FTST
3703         pop87();
3704         cdb.genfltreg(ESC(MFfloat,1),3,0);      // FSTP floatreg
3705         genfwait(cdb);
3706         getregs(cdb,mDX|mAX);
3707         cdb.genfltreg(LOD, DX, 0);              // MOV EDX,floatreg
3708 
3709         pop87();
3710         cdb.genfltreg(ESC(MFfloat,1),3,0);      // FSTP floatreg
3711         genfwait(cdb);
3712         cdb.genfltreg(LOD, AX, 0);              // MOV EAX,floatreg
3713     }
3714     else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01)
3715     {
3716         push87(cdb);
3717         cdb.genfltreg(STO, AX, 0);              // MOV floatreg, EAX
3718         cdb.genfltreg(0xD9, 0, 0);              // FLD float ptr floatreg
3719 
3720         push87(cdb);
3721         cdb.genfltreg(STO, DX, 0);              // MOV floatreg, EDX
3722         cdb.genfltreg(0xD9, 0, 0);              // FLD float ptr floatreg
3723 
3724         if (*pretregs & mPSW)
3725             genctst(cdb,e,0);                   // FTST
3726     }
3727     else if ((tym == TYcfloat || tym == TYcdouble) &&
3728              *pretregs & (mXMM0|mXMM1) && retregs & mST01)
3729     {
3730         tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble;
3731         uint xop = xmmload(tyf);
3732         uint mf = tyf == TYfloat ? MFfloat : MFdouble;
3733         if (*pretregs & mPSW && !(retregs & mPSW))
3734             genctst(cdb,e,0);                   // FTST
3735         pop87();
3736         cdb.genfltreg(ESC(mf,1),3,0);           // FSTP floatreg
3737         genfwait(cdb);
3738         getregs(cdb,mXMM0|mXMM1);
3739         cdb.genxmmreg(xop,XMM1,0,tyf);
3740 
3741         pop87();
3742         cdb.genfltreg(ESC(mf,1),3,0);           // FSTP floatreg
3743         genfwait(cdb);
3744         cdb.genxmmreg(xop, XMM0, 0, tyf);       // MOVD XMM0,floatreg
3745     }
3746     else if ((tym == TYcfloat || tym == TYcdouble) &&
3747              retregs & (mXMM0|mXMM1) && *pretregs & mST01)
3748     {
3749         tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble;
3750         uint xop = xmmstore(tyf);
3751         uint fop = tym == TYcfloat ? 0xD9 : 0xDD;
3752         push87(cdb);
3753         cdb.genfltreg(xop, XMM0-XMM0, 0);       // STOS(SD) floatreg, XMM0
3754         checkSetVex(cdb.last(),tyf);
3755         cdb.genfltreg(fop, 0, 0);               // FLD double ptr floatreg
3756 
3757         push87(cdb);
3758         cdb.genxmmreg(xop, XMM1, 0, tyf);       // MOV floatreg, XMM1
3759         cdb.genfltreg(fop, 0, 0);               // FLD double ptr floatreg
3760 
3761         if (*pretregs & mPSW)
3762             genctst(cdb,e,0);                   // FTST
3763     }
3764     else
3765     {   if (*pretregs & mPSW)
3766         {   if (!(retregs & mPSW))
3767             {   assert(retregs & mST01);
3768                 genctst(cdb,e,!(*pretregs & mST01));        // FTST
3769             }
3770         }
3771         assert(!(*pretregs & mST01) || (retregs & mST01));
3772     }
3773     if (*pretregs & mST01)
3774     {   note87(e,0,1);
3775         note87(e,sz/2,0);
3776     }
3777 }
3778 
3779 /*****************************************
3780  * Operators OPc_r and OPc_i
3781  */
3782 
3783 @trusted
3784 void cdconvt87(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3785 {
3786     regm_t retregs = mST01;
3787     codelem(cdb,e.EV.E1, &retregs, false);
3788     switch (e.Eoper)
3789     {
3790         case OPc_r:
3791             cdb.genf2(0xDD,0xD8 + 0); // FPOP
3792             pop87();
3793             break;
3794 
3795         case OPc_i:
3796             cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1)
3797             pop87();
3798             break;
3799 
3800         default:
3801             assert(0);
3802     }
3803     retregs = mST0;
3804     fixresult87(cdb, e, retregs, pretregs);
3805 }
3806 
3807 /**************************************
3808  * Load complex operand into ST01 or flags or both.
3809  */
3810 
3811 @trusted
3812 void cload87(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3813 {
3814     //printf("e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3815     //elem_print(e);
3816     assert(!I16);
3817     debug
3818     if (I32)
3819     {
3820         assert(config.inline8087);
3821         elem_debug(e);
3822         assert(*pretregs & (mST01 | mPSW));
3823         assert(!(*pretregs & ~(mST01 | mPSW)));
3824     }
3825 
3826     tym_t ty = tybasic(e.Ety);
3827     code cs = void;
3828     uint mf;
3829     uint sz;
3830     ubyte ldop;
3831     regm_t retregs;
3832     int i;
3833 
3834     //printf("cload87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
3835     sz = _tysize[ty] / 2;
3836     memset(&cs, 0, cs.sizeof);
3837     if (ADDFWAIT())
3838         cs.Iflags = CFwait;
3839     switch (ty)
3840     {
3841         case TYcfloat:      mf = MFfloat;           break;
3842         case TYcdouble:     mf = MFdouble;          break;
3843         case TYcldouble:    break;
3844         default:            assert(0);
3845     }
3846     switch (e.Eoper)
3847     {
3848         case OPvar:
3849             notreg(e);                  // never enregister this variable
3850             goto case OPind;
3851 
3852         case OPind:
3853             push87(cdb);
3854             push87(cdb);
3855             switch (ty)
3856             {
3857                 case TYcfloat:
3858                 case TYcdouble:
3859                     loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0);        // FLD var
3860                     cs.IEV1.Voffset += sz;
3861                     cdb.gen(&cs);
3862                     break;
3863 
3864                 case TYcldouble:
3865                     loadea(cdb,e,&cs,0xDB,5,0,0,0);             // FLD var
3866                     cs.IEV1.Voffset += sz;
3867                     cdb.gen(&cs);
3868                     break;
3869 
3870                 default:
3871                     assert(0);
3872             }
3873             retregs = mST01;
3874             break;
3875 
3876         case OPd_ld:
3877         case OPld_d:
3878         case OPf_d:
3879         case OPd_f:
3880             cload87(cdb,e.EV.E1, pretregs);
3881             freenode(e.EV.E1);
3882             return;
3883 
3884         case OPconst:
3885             push87(cdb);
3886             push87(cdb);
3887             for (i = 0; i < 2; i++)
3888             {
3889                 ldop = loadconst(e, i);
3890                 if (ldop)
3891                 {
3892                     cdb.genf2(0xD9,ldop);             // FLDx
3893                 }
3894                 else
3895                 {
3896                     assert(0);
3897                 }
3898             }
3899             retregs = mST01;
3900             break;
3901 
3902         default:
3903             debug elem_print(e);
3904             assert(0);
3905     }
3906     fixresult_complex87(cdb, e, retregs, pretregs);
3907 }
3908 
3909 /**********************************************
3910  * Load OPpair or OPrpair into mST01
3911  */
3912 @trusted
3913 void loadPair87(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
3914 {
3915     assert(e.Eoper == OPpair || e.Eoper == OPrpair);
3916     regm_t retregs = mST0;
3917     codelem(cdb,e.EV.E1, &retregs, false);
3918     note87(e.EV.E1, 0, 0);
3919     codelem(cdb,e.EV.E2, &retregs, false);
3920     makesure87(cdb,e.EV.E1, 0, 1, 0);
3921     if (e.Eoper == OPrpair)
3922         cdb.genf2(0xD9, 0xC8 + 1);   // FXCH ST(1)
3923     retregs = mST01;
3924     fixresult_complex87(cdb, e, retregs, pretregs);
3925 }
3926 
3927 /**********************************************
3928  * Round 80 bit precision to 32 or 64 bits.
3929  * OPtoprec
3930  */
3931 @trusted
3932 void cdtoprec(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
3933 {
3934     //printf("cdtoprec: *pretregs = %s\n", regm_str(*pretregs));
3935     if (!*pretregs)
3936     {
3937         codelem(cdb,e.EV.E1,pretregs,false);
3938         return;
3939     }
3940 
3941     assert(config.inline8087);
3942     regm_t retregs = mST0;
3943     codelem(cdb,e.EV.E1, &retregs, false);
3944     if (*pretregs & mST0)
3945     {
3946         const tym = tybasic(e.Ety);
3947         const sz = _tysize[tym];
3948         uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
3949         cdb.genfltreg(ESC(mf,1),3,0);   // FSTP float/double ptr fltreg
3950         genfwait(cdb);
3951         cdb.genfltreg(ESC(mf,1),0,0);   // FLD float/double ptr fltreg
3952     }
3953     fixresult87(cdb, e, retregs, pretregs);
3954 }
3955 
3956 }