1 /**
2  * Code generation 1
3  *
4  * Handles function calls: putting arguments in registers / on the stack, and jumping to the function.
5  *
6  * Compiler implementation of the
7  * $(LINK2 https://www.dlang.org, D programming language).
8  *
9  * Copyright:   Copyright (C) 1984-1998 by Symantec
10  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
11  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
12  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
13  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod1.d, backend/cod1.d)
14  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod1.d
15  */
16 
17 module dmd.backend.cod1;
18 
19 version (SCPP)
20     version = COMPILE;
21 version (MARS)
22     version = COMPILE;
23 
24 version (COMPILE)
25 {
26 
27 import core.bitop;
28 import core.stdc.stdio;
29 import core.stdc.stdlib;
30 import core.stdc.string;
31 
32 import dmd.backend.backend;
33 import dmd.backend.cc;
34 import dmd.backend.cdef;
35 import dmd.backend.code;
36 import dmd.backend.code_x86;
37 import dmd.backend.codebuilder;
38 import dmd.backend.mem;
39 import dmd.backend.el;
40 import dmd.backend.exh;
41 import dmd.backend.global;
42 import dmd.backend.obj;
43 import dmd.backend.oper;
44 import dmd.backend.rtlsym;
45 import dmd.backend.ty;
46 import dmd.backend.type;
47 import dmd.backend.xmm;
48 
49 extern (C++):
50 
51 nothrow:
52 @safe:
53 
54 extern __gshared CGstate cgstate;
55 extern __gshared ubyte[FLMAX] segfl;
56 extern __gshared bool[FLMAX] stackfl;
57 
58 private extern (D) uint mask(uint m) { return 1 << m; }
59 
60 private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); }
61 
62 /* array to convert from index register to r/m field    */
63                                        /* AX CX DX BX SP BP SI DI       */
64 private __gshared const byte[8] regtorm32 =   [  0, 1, 2, 3,-1, 5, 6, 7 ];
65 __gshared const   byte[8] regtorm   =   [ -1,-1,-1, 7,-1, 6, 4, 5 ];
66 
67 //void funccall(ref CodeBuilder cdb,elem *e,uint numpara,uint numalign,
68 //        regm_t *pretregs,regm_t keepmsk, bool usefuncarg);
69 
70 /*********************************
71  * Determine if we should leave parameter `s` in the register it
72  * came in, or allocate a register it using the register
73  * allocator.
74  * Params:
75  *      s = parameter Symbol
76  * Returns:
77  *      `true` if `s` is a register parameter and leave it in the register it came in
78  */
79 @trusted
80 bool regParamInPreg(Symbol* s)
81 {
82     //printf("regPAramInPreg %s\n", s.Sident.ptr);
83     return (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) &&
84         (!(config.flags4 & CFG4optimized) || !(s.Sflags & GTregcand));
85 }
86 
87 
88 /**************************
89  * Determine if e is a 32 bit scaled index addressing mode.
90  * Returns:
91  *      0       not a scaled index addressing mode
92  *      !=0     the value for ss in the SIB byte
93  */
94 
95 @trusted
96 int isscaledindex(elem *e)
97 {
98     targ_uns ss;
99 
100     assert(!I16);
101     while (e.Eoper == OPcomma)
102         e = e.EV.E2;
103     if (!(e.Eoper == OPshl && !e.Ecount &&
104           e.EV.E2.Eoper == OPconst &&
105           (ss = e.EV.E2.EV.Vuns) <= 3
106          )
107        )
108         ss = 0;
109     return ss;
110 }
111 
112 /*********************************************
113  * Generate code for which isscaledindex(e) returned a non-zero result.
114  */
115 
116 @trusted
117 /*private*/ void cdisscaledindex(ref CodeBuilder cdb,elem *e,regm_t *pidxregs,regm_t keepmsk)
118 {
119     // Load index register with result of e.EV.E1
120     while (e.Eoper == OPcomma)
121     {
122         regm_t r = 0;
123         scodelem(cdb, e.EV.E1, &r, keepmsk, true);
124         freenode(e);
125         e = e.EV.E2;
126     }
127     assert(e.Eoper == OPshl);
128     scodelem(cdb, e.EV.E1, pidxregs, keepmsk, true);
129     freenode(e.EV.E2);
130     freenode(e);
131 }
132 
133 /***********************************
134  * Determine index if we can do two LEA instructions as a multiply.
135  * Returns:
136  *      0       can't do it
137  */
138 
139 enum
140 {
141     SSFLnobp       = 1,       /// can't have EBP in relconst
142     SSFLnobase1    = 2,       /// no base register for first LEA
143     SSFLnobase     = 4,       /// no base register
144     SSFLlea        = 8,       /// can do it in one LEA
145 }
146 
147 struct Ssindex
148 {
149     targ_uns product;
150     ubyte ss1;
151     ubyte ss2;
152     ubyte ssflags;       /// SSFLxxxx
153 }
154 
155 private __gshared const Ssindex[21] ssindex_array =
156 [
157     { 0, 0, 0 },               // [0] is a place holder
158 
159     { 3,  1, 0, SSFLnobp | SSFLlea },
160     { 5,  2, 0, SSFLnobp | SSFLlea },
161     { 9,  3, 0, SSFLnobp | SSFLlea },
162 
163     { 6,  1, 1, SSFLnobase },
164     { 12, 1, 2, SSFLnobase },
165     { 24, 1, 3, SSFLnobase },
166     { 10, 2, 1, SSFLnobase },
167     { 20, 2, 2, SSFLnobase },
168     { 40, 2, 3, SSFLnobase },
169     { 18, 3, 1, SSFLnobase },
170     { 36, 3, 2, SSFLnobase },
171     { 72, 3, 3, SSFLnobase },
172 
173     { 15, 2, 1, SSFLnobp },
174     { 25, 2, 2, SSFLnobp },
175     { 27, 3, 1, SSFLnobp },
176     { 45, 3, 2, SSFLnobp },
177     { 81, 3, 3, SSFLnobp },
178 
179     { 16, 3, 1, SSFLnobase1 | SSFLnobase },
180     { 32, 3, 2, SSFLnobase1 | SSFLnobase },
181     { 64, 3, 3, SSFLnobase1 | SSFLnobase },
182 ];
183 
184 int ssindex(OPER op,targ_uns product)
185 {
186     if (op == OPshl)
187         product = 1 << product;
188     for (size_t i = 1; i < ssindex_array.length; i++)
189     {
190         if (ssindex_array[i].product == product)
191             return cast(int)i;
192     }
193     return 0;
194 }
195 
196 /***************************************
197  * Build an EA of the form disp[base][index*scale].
198  * Input:
199  *      c       struct to fill in
200  *      base    base register (-1 if none)
201  *      index   index register (-1 if none)
202  *      scale   scale factor - 1,2,4,8
203  *      disp    displacement
204  */
205 
206 void buildEA(code *c,int base,int index,int scale,targ_size_t disp)
207 {
208     ubyte rm;
209     ubyte sib;
210     ubyte rex = 0;
211 
212     sib = 0;
213     if (!I16)
214     {   uint ss;
215 
216         assert(index != SP);
217 
218         switch (scale)
219         {   case 1:     ss = 0; break;
220             case 2:     ss = 1; break;
221             case 4:     ss = 2; break;
222             case 8:     ss = 3; break;
223             default:    assert(0);
224         }
225 
226         if (base == -1)
227         {
228             if (index == -1)
229                 rm = modregrm(0,0,5);
230             else
231             {
232                 rm  = modregrm(0,0,4);
233                 sib = modregrm(ss,index & 7,5);
234                 if (index & 8)
235                     rex |= REX_X;
236             }
237         }
238         else if (index == -1)
239         {
240             if (base == SP)
241             {
242                 rm  = modregrm(2, 0, 4);
243                 sib = modregrm(0, 4, SP);
244             }
245             else
246             {   rm = modregrm(2, 0, base & 7);
247                 if (base & 8)
248                 {   rex |= REX_B;
249                     if (base == R12)
250                     {
251                         rm  = modregrm(2, 0, 4);
252                         sib = modregrm(0, 4, 4);
253                     }
254                 }
255             }
256         }
257         else
258         {
259             rm  = modregrm(2, 0, 4);
260             sib = modregrm(ss,index & 7,base & 7);
261             if (index & 8)
262                 rex |= REX_X;
263             if (base & 8)
264                 rex |= REX_B;
265         }
266     }
267     else
268     {
269         // -1 AX CX DX BX SP BP SI DI
270         static immutable ubyte[9][9] EA16rm =
271         [
272             [   0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85,   ],      // -1
273             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // AX
274             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // CX
275             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // DX
276             [   0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81,   ],      // BX
277             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // SP
278             [   0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83,   ],      // BP
279             [   0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09,   ],      // SI
280             [   0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09,   ]       // DI
281         ];
282 
283         assert(scale == 1);
284         rm = EA16rm[base + 1][index + 1];
285         assert(rm != 9);
286     }
287     c.Irm = rm;
288     c.Isib = sib;
289     c.Irex = rex;
290     c.IFL1 = FLconst;
291     c.IEV1.Vuns = cast(targ_uns)disp;
292 }
293 
294 /*********************************************
295  * Build REX, modregrm and sib bytes
296  */
297 
298 uint buildModregrm(int mod, int reg, int rm)
299 {
300     uint m;
301     if (I16)
302         m = modregrm(mod, reg, rm);
303     else
304     {
305         if ((rm & 7) == SP && mod != 3)
306             m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4);
307         else
308             m = modregrm(mod,reg & 7,rm & 7);
309         if (reg & 8)
310             m |= REX_R << 16;
311         if (rm & 8)
312             m |= REX_B << 16;
313     }
314     return m;
315 }
316 
317 /****************************************
318  * Generate code for eecontext
319  */
320 
321 @trusted
322 void genEEcode()
323 {
324     CodeBuilder cdb;
325     cdb.ctor();
326 
327     eecontext.EEin++;
328     regcon.immed.mval = 0;
329     regm_t retregs = 0;    //regmask(eecontext.EEelem.Ety);
330     assert(EEStack.offset >= REGSIZE);
331     cod3_stackadj(cdb, cast(int)(EEStack.offset - REGSIZE));
332     cdb.gen1(0x50 + SI);                      // PUSH ESI
333     cdb.genadjesp(cast(int)EEStack.offset);
334     gencodelem(cdb, eecontext.EEelem, &retregs, false);
335     code *c = cdb.finish();
336     assignaddrc(c);
337     pinholeopt(c,null);
338     jmpaddr(c);
339     eecontext.EEcode = gen1(c, 0xCC);        // INT 3
340     eecontext.EEin--;
341 }
342 
343 
344 /********************************************
345  * Gen a save/restore sequence for mask of registers.
346  * Params:
347  *      regm = mask of registers to save
348  *      cdbsave = save code appended here
349  *      cdbrestore = restore code appended here
350  * Returns:
351  *      amount of stack consumed
352  */
353 @trusted
354 uint gensaverestore(regm_t regm,ref CodeBuilder cdbsave,ref CodeBuilder cdbrestore)
355 {
356     //printf("gensaverestore2(%s)\n", regm_str(regm));
357     regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01;
358     if (!regm)
359         return 0;
360 
361     uint stackused = 0;
362 
363     code *[regm.sizeof * 8] restore;
364 
365     reg_t i;
366     for (i = 0; regm; i++)
367     {
368         if (regm & 1)
369         {
370             code *cs2;
371             if (i == ES && I16)
372             {
373                 stackused += REGSIZE;
374                 cdbsave.gen1(0x06);                     // PUSH ES
375                 cs2 = gen1(null, 0x07);                 // POP  ES
376             }
377             else if (i == ST0 || i == ST01)
378             {
379                 CodeBuilder cdb;
380                 cdb.ctor();
381                 gensaverestore87(1 << i, cdbsave, cdb);
382                 cs2 = cdb.finish();
383             }
384             else if (i >= XMM0 || I64 || cgstate.funcarg.size)
385             {   uint idx;
386                 regsave.save(cdbsave, i, &idx);
387                 CodeBuilder cdb;
388                 cdb.ctor();
389                 regsave.restore(cdb, i, idx);
390                 cs2 = cdb.finish();
391             }
392             else
393             {
394                 stackused += REGSIZE;
395                 cdbsave.gen1(0x50 + (i & 7));           // PUSH i
396                 cs2 = gen1(null, 0x58 + (i & 7));       // POP  i
397                 if (i & 8)
398                 {   code_orrex(cdbsave.last(), REX_B);
399                     code_orrex(cs2, REX_B);
400                 }
401             }
402             restore[i] = cs2;
403         }
404         else
405             restore[i] = null;
406         regm >>= 1;
407     }
408 
409     while (i)
410     {
411         code *c = restore[--i];
412         if (c)
413         {
414             cdbrestore.append(c);
415         }
416     }
417 
418     return stackused;
419 }
420 
421 
422 /****************************************
423  * Clean parameters off stack.
424  * Input:
425  *      numpara         amount to adjust stack pointer
426  *      keepmsk         mask of registers to not destroy
427  */
428 
429 @trusted
430 void genstackclean(ref CodeBuilder cdb,uint numpara,regm_t keepmsk)
431 {
432     //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean);
433     if (numpara && (cgstate.stackclean || STACKALIGN >= 16))
434     {
435 /+
436         if (0 &&                                // won't work if operand of scodelem
437             numpara == stackpush &&             // if this is all those pushed
438             needframe &&                        // and there will be a BP
439             !config.windows &&
440             !(regcon.mvar & fregsaved)          // and no registers will be pushed
441         )
442             genregs(cdb,0x89,BP,SP);  // MOV SP,BP
443         else
444 +/
445         {
446             regm_t scratchm = 0;
447 
448             if (numpara == REGSIZE && config.flags4 & CFG4space)
449             {
450                 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar;
451             }
452 
453             if (scratchm)
454             {
455                 reg_t r;
456                 allocreg(cdb, &scratchm, &r, TYint);
457                 cdb.gen1(0x58 + r);           // POP r
458             }
459             else
460                 cod3_stackadj(cdb, -numpara);
461         }
462         stackpush -= numpara;
463         cdb.genadjesp(-numpara);
464     }
465 }
466 
467 /*********************************
468  * Generate code for a logical expression.
469  * Input:
470  *      e       elem
471  *      jcond
472  *         bit 1 if true then goto jump address if e
473  *               if false then goto jump address if !e
474  *         2    don't call save87()
475  *      fltarg   FLcode or FLblock, flavor of target if e evaluates to jcond
476  *      targ    either code or block pointer to destination
477  */
478 
479 @trusted
480 void logexp(ref CodeBuilder cdb, elem *e, int jcond, uint fltarg, code *targ)
481 {
482     //printf("logexp(e = %p, jcond = %d)\n", e, jcond); elem_print(e);
483     if (tybasic(e.Ety) == TYnoreturn)
484     {
485         con_t regconsave = regcon;
486         regm_t retregs = 0;
487         codelem(cdb,e,&retregs,0);
488         regconsave.used |= regcon.used;
489         regcon = regconsave;
490         return;
491     }
492 
493     int no87 = (jcond & 2) == 0;
494     docommas(cdb, &e);             // scan down commas
495     cgstate.stackclean++;
496 
497     code* c, ce;
498     if (!OTleaf(e.Eoper) && !e.Ecount)     // if operator and not common sub
499     {
500         switch (e.Eoper)
501         {
502             case OPoror:
503             {
504                 con_t regconsave;
505                 if (jcond & 1)
506                 {
507                     logexp(cdb, e.EV.E1, jcond, fltarg, targ);
508                     regconsave = regcon;
509                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
510                 }
511                 else
512                 {
513                     code *cnop = gennop(null);
514                     logexp(cdb, e.EV.E1, jcond | 1, FLcode, cnop);
515                     regconsave = regcon;
516                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
517                     cdb.append(cnop);
518                 }
519                 andregcon(&regconsave);
520                 freenode(e);
521                 cgstate.stackclean--;
522                 return;
523             }
524 
525             case OPandand:
526             {
527                 con_t regconsave;
528                 if (jcond & 1)
529                 {
530                     code *cnop = gennop(null);    // a dummy target address
531                     logexp(cdb, e.EV.E1, jcond & ~1, FLcode, cnop);
532                     regconsave = regcon;
533                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
534                     cdb.append(cnop);
535                 }
536                 else
537                 {
538                     logexp(cdb, e.EV.E1, jcond, fltarg, targ);
539                     regconsave = regcon;
540                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
541                 }
542                 andregcon(&regconsave);
543                 freenode(e);
544                 cgstate.stackclean--;
545                 return;
546             }
547 
548             case OPnot:
549                 jcond ^= 1;
550                 goto case OPbool;
551 
552             case OPbool:
553             case OPs8_16:
554             case OPu8_16:
555             case OPs16_32:
556             case OPu16_32:
557             case OPs32_64:
558             case OPu32_64:
559             case OPu32_d:
560             case OPd_ld:
561                 logexp(cdb, e.EV.E1, jcond, fltarg, targ);
562                 freenode(e);
563                 cgstate.stackclean--;
564                 return;
565 
566             case OPcond:
567             {
568                 code *cnop2 = gennop(null);   // addresses of start of leaves
569                 code *cnop = gennop(null);
570                 logexp(cdb, e.EV.E1, false, FLcode, cnop2);   // eval condition
571                 con_t regconold = regcon;
572                 logexp(cdb, e.EV.E2.EV.E1, jcond, fltarg, targ);
573                 genjmp(cdb, JMP, FLcode, cast(block *) cnop); // skip second leaf
574 
575                 con_t regconsave = regcon;
576                 regcon = regconold;
577 
578                 cdb.append(cnop2);
579                 logexp(cdb, e.EV.E2.EV.E2, jcond, fltarg, targ);
580                 andregcon(&regconold);
581                 andregcon(&regconsave);
582                 freenode(e.EV.E2);
583                 freenode(e);
584                 cdb.append(cnop);
585                 cgstate.stackclean--;
586                 return;
587             }
588 
589             default:
590                 break;
591         }
592     }
593 
594     /* Special code for signed long compare.
595      * Not necessary for I64 until we do cents.
596      */
597     if (OTrel2(e.Eoper) &&               // if < <= >= >
598         !e.Ecount &&
599         ( (I16 && tybasic(e.EV.E1.Ety) == TYlong  && tybasic(e.EV.E2.Ety) == TYlong) ||
600           (I32 && tybasic(e.EV.E1.Ety) == TYllong && tybasic(e.EV.E2.Ety) == TYllong))
601        )
602     {
603         longcmp(cdb, e, jcond != 0, fltarg, targ);
604         cgstate.stackclean--;
605         return;
606     }
607 
608     regm_t retregs = mPSW;                // return result in flags
609     opcode_t op = jmpopcode(e);           // get jump opcode
610     if (!(jcond & 1))
611         op ^= 0x101;                      // toggle jump condition(s)
612     codelem(cdb, e, &retregs, true);         // evaluate elem
613     if (no87)
614         cse_flush(cdb,no87);              // flush CSE's to memory
615     genjmp(cdb, op, fltarg, cast(block *) targ); // generate jmp instruction
616     cgstate.stackclean--;
617 }
618 
619 /******************************
620  * Routine to aid in setting things up for gen().
621  * Look for common subexpression.
622  * Can handle indirection operators, but not if they're common subs.
623  * Input:
624  *      e ->    elem where we get some of the data from
625  *      cs ->   partially filled code to add
626  *      op =    opcode
627  *      reg =   reg field of (mod reg r/m)
628  *      offset = data to be added to Voffset field
629  *      keepmsk = mask of registers we must not destroy
630  *      desmsk  = mask of registers destroyed by executing the instruction
631  * Returns:
632  *      pointer to code generated
633  */
634 
635 @trusted
636 void loadea(ref CodeBuilder cdb,elem *e,code *cs,uint op,uint reg,targ_size_t offset,
637             regm_t keepmsk,regm_t desmsk)
638 {
639     code* c, cg, cd;
640 
641     debug
642     if (debugw)
643         printf("loadea: e=%p cs=%p op=x%x reg=%s offset=%lld keepmsk=%s desmsk=%s\n",
644                e, cs, op, regstring[reg], cast(ulong)offset, regm_str(keepmsk), regm_str(desmsk));
645     assert(e);
646     cs.Iflags = 0;
647     cs.Irex = 0;
648     cs.Iop = op;
649     tym_t tym = e.Ety;
650     int sz = tysize(tym);
651 
652     /* Determine if location we want to get is in a register. If so,      */
653     /* substitute the register for the EA.                                */
654     /* Note that operators don't go through this. CSE'd operators are     */
655     /* picked up by comsub().                                             */
656     if (e.Ecount &&                      /* if cse                       */
657         e.Ecount != e.Ecomsub &&        /* and cse was generated        */
658         op != LEA && op != 0xC4 &&        /* and not an LEA or LES        */
659         (op != 0xFF || reg != 3) &&       /* and not CALLF MEM16          */
660         (op & 0xFFF8) != 0xD8)            // and not 8087 opcode
661     {
662         assert(OTleaf(e.Eoper));                /* can't handle this            */
663         regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs
664         if (op == 0xFF && reg == 6)
665             rm &= ~XMMREGS;             // can't PUSH an XMM register
666         if (sz > REGSIZE)               // value is in 2 or 4 registers
667         {
668             if (I16 && sz == 8)     // value is in 4 registers
669             {
670                 static immutable regm_t[4] rmask = [ mDX,mCX,mBX,mAX ];
671                 rm &= rmask[cast(size_t)(offset >> 1)];
672             }
673             else if (offset)
674                 rm &= mMSW;             /* only high words      */
675             else
676                 rm &= mLSW;             /* only low words       */
677         }
678         for (uint i = 0; rm; i++)
679         {
680             if (mask(i) & rm)
681             {
682                 if (regcon.cse.value[i] == e && // if register has elem
683                     /* watch out for a CWD destroying DX        */
684                    !(i == DX && op == 0xF7 && desmsk & mDX))
685                 {
686                     /* if ES, then it can only be a load    */
687                     if (i == ES)
688                     {
689                         if (op != 0x8B)
690                             break;      // not a load
691                         cs.Iop = 0x8C; /* MOV reg,ES   */
692                         cs.Irm = modregrm(3, 0, reg & 7);
693                         if (reg & 8)
694                             code_orrex(cs, REX_B);
695                     }
696                     else    // XXX reg,i
697                     {
698                         cs.Irm = modregrm(3, reg & 7, i & 7);
699                         if (reg & 8)
700                             cs.Irex |= REX_R;
701                         if (i & 8)
702                             cs.Irex |= REX_B;
703                         if (sz == 1 && I64 && (i >= 4 || reg >= 4))
704                             cs.Irex |= REX;
705                         if (I64 && (sz == 8 || sz == 16))
706                             cs.Irex |= REX_W;
707                     }
708                     goto L2;
709                 }
710                 rm &= ~mask(i);
711             }
712         }
713     }
714 
715     getlvalue(cdb, cs, e, keepmsk);
716     if (offset == REGSIZE)
717         getlvalue_msw(cs);
718     else
719         cs.IEV1.Voffset += offset;
720     if (I64)
721     {
722         if (reg >= 4 && sz == 1)               // if byte register
723             // Can only address those 8 bit registers if a REX byte is present
724             cs.Irex |= REX;
725         if ((op & 0xFFFFFFF8) == 0xD8)
726             cs.Irex &= ~REX_W;                 // not needed for x87 ops
727         if (mask(reg) & XMMREGS &&
728             (op == LODSD || op == STOSD))
729             cs.Irex &= ~REX_W;                 // not needed for xmm ops
730     }
731     code_newreg(cs, reg);                         // OR in reg field
732     if (!I16)
733     {
734         if (reg == 6 && op == 0xFF ||             /* don't PUSH a word    */
735             op == MOVZXw || op == MOVSXw ||       /* MOVZX/MOVSX          */
736             (op & 0xFFF8) == 0xD8 ||              /* 8087 instructions    */
737             op == LEA)                            /* LEA                  */
738         {
739             cs.Iflags &= ~CFopsize;
740             if (reg == 6 && op == 0xFF)         // if PUSH
741                 cs.Irex &= ~REX_W;             // REX is ignored for PUSH anyway
742         }
743     }
744     else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT())
745         cs.Iflags |= CFwait;
746 L2:
747     getregs(cdb, desmsk);                  // save any regs we destroy
748 
749     /* KLUDGE! fix up DX for divide instructions */
750     if (op == 0xF7 && desmsk == (mAX|mDX))        /* if we need to fix DX */
751     {
752         if (reg == 7)                           /* if IDIV              */
753         {
754             cdb.gen1(0x99);                     // CWD
755             if (I64 && sz == 8)
756                 code_orrex(cdb.last(), REX_W);
757         }
758         else if (reg == 6)                      // if DIV
759             genregs(cdb, 0x33, DX, DX);        // XOR DX,DX
760     }
761 
762     // Eliminate MOV reg,reg
763     if ((cs.Iop & ~3) == 0x88 &&
764         (cs.Irm & 0xC7) == modregrm(3,0,reg & 7))
765     {
766         uint r = cs.Irm & 7;
767         if (cs.Irex & REX_B)
768             r |= 8;
769         if (r == reg)
770             cs.Iop = NOP;
771     }
772 
773     // Eliminate MOV xmmreg,xmmreg
774     if ((cs.Iop & ~(LODSD ^ STOSS)) == LODSD &&    // detect LODSD, LODSS, STOSD, STOSS
775         (cs.Irm & 0xC7) == modregrm(3,0,reg & 7))
776     {
777         reg_t r = cs.Irm & 7;
778         if (cs.Irex & REX_B)
779             r |= 8;
780         if (r == (reg - XMM0))
781             cs.Iop = NOP;
782     }
783 
784     cdb.gen(cs);
785 }
786 
787 
788 /**************************
789  * Get addressing mode.
790  */
791 
792 @trusted
793 uint getaddrmode(regm_t idxregs)
794 {
795     uint mode;
796 
797     if (I16)
798     {
799         static ubyte error() { assert(0); }
800 
801         mode =  (idxregs & mBX) ? modregrm(2,0,7) :     /* [BX] */
802                 (idxregs & mDI) ? modregrm(2,0,5):      /* [DI] */
803                 (idxregs & mSI) ? modregrm(2,0,4):      /* [SI] */
804                                   error();
805     }
806     else
807     {
808         const reg = findreg(idxregs & (ALLREGS | mBP));
809         if (reg == R12)
810             mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4);
811         else
812             mode = modregrmx(2,0,reg);
813     }
814     return mode;
815 }
816 
817 void setaddrmode(code *c, regm_t idxregs)
818 {
819     uint mode = getaddrmode(idxregs);
820     c.Irm = mode & 0xFF;
821     c.Isib = (mode >> 8) & 0xFF;
822     c.Irex &= ~REX_B;
823     c.Irex |= mode >> 16;
824 }
825 
826 /**********************************************
827  */
828 
829 @trusted
830 void getlvalue_msw(code *c)
831 {
832     if (c.IFL1 == FLreg)
833     {
834         const regmsw = c.IEV1.Vsym.Sregmsw;
835         c.Irm = (c.Irm & ~7) | (regmsw & 7);
836         if (regmsw & 8)
837             c.Irex |= REX_B;
838         else
839             c.Irex &= ~REX_B;
840     }
841     else
842         c.IEV1.Voffset += REGSIZE;
843 }
844 
845 /**********************************************
846  */
847 
848 @trusted
849 void getlvalue_lsw(code *c)
850 {
851     if (c.IFL1 == FLreg)
852     {
853         const reglsw = c.IEV1.Vsym.Sreglsw;
854         c.Irm = (c.Irm & ~7) | (reglsw & 7);
855         if (reglsw & 8)
856             c.Irex |= REX_B;
857         else
858             c.Irex &= ~REX_B;
859     }
860     else
861         c.IEV1.Voffset -= REGSIZE;
862 }
863 
864 /******************
865  * Compute addressing mode.
866  * Generate & return sequence of code (if any).
867  * Return in cs the info on it.
868  * Input:
869  *      pcs ->  where to store data about addressing mode
870  *      e ->    the lvalue elem
871  *      keepmsk mask of registers we must not destroy or use
872  *              if (keepmsk & RMstore), this will be only a store operation
873  *              into the lvalue
874  *              if (keepmsk & RMload), this will be a read operation only
875  */
876 
877 @trusted
878 void getlvalue(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk)
879 {
880     uint fl, f, opsave;
881     elem* e1, e11, e12;
882     bool e1isadd, e1free;
883     reg_t reg;
884     tym_t e1ty;
885     Symbol* s;
886 
887     //printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk));
888     //elem_print(e);
889     assert(e);
890     elem_debug(e);
891     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
892     {
893         s = e.EV.Vsym;
894         fl = s.Sfl;
895         if (tyfloating(s.ty()))
896             objmod.fltused();
897     }
898     else
899         fl = FLoper;
900     pcs.IFL1 = cast(ubyte)fl;
901     pcs.Iflags = CFoff;                  /* only want offsets            */
902     pcs.Irex = 0;
903     pcs.IEV1.Voffset = 0;
904 
905     tym_t ty = e.Ety;
906     uint sz = tysize(ty);
907     if (tyfloating(ty))
908         objmod.fltused();
909     if (I64 && (sz == 8 || sz == 16) && !tyvector(ty))
910         pcs.Irex |= REX_W;
911     if (!I16 && sz == SHORTSIZE)
912         pcs.Iflags |= CFopsize;
913     if (ty & mTYvolatile)
914         pcs.Iflags |= CFvolatile;
915 
916     switch (fl)
917     {
918         case FLoper:
919             debug
920             if (debugw) printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk));
921 
922             switch (e.Eoper)
923             {
924                 case OPadd:                 // this way when we want to do LEA
925                     e1 = e;
926                     e1free = false;
927                     e1isadd = true;
928                     break;
929 
930                 case OPind:
931                 case OPpostinc:             // when doing (*p++ = ...)
932                 case OPpostdec:             // when doing (*p-- = ...)
933                 case OPbt:
934                 case OPbtc:
935                 case OPbtr:
936                 case OPbts:
937                 case OPvecfill:
938                     e1 = e.EV.E1;
939                     e1free = true;
940                     e1isadd = e1.Eoper == OPadd;
941                     break;
942 
943                 default:
944                     printf("function: %s\n", funcsym_p.Sident.ptr);
945                     elem_print(e);
946                     assert(0);
947             }
948             e1ty = tybasic(e1.Ety);
949             if (e1isadd)
950             {
951                 e12 = e1.EV.E2;
952                 e11 = e1.EV.E1;
953             }
954 
955             /* First see if we can replace *(e+&v) with
956              *      MOV     idxreg,e
957              *      EA =    [ES:] &v+idxreg
958              */
959             f = FLconst;
960 
961             /* Is address of `s` relative to RIP ?
962              */
963             static bool relativeToRIP(Symbol* s)
964             {
965                 if (!I64)
966                     return false;
967                 if (config.exe == EX_WIN64)
968                     return true;
969                 if (config.flags3 & CFG3pie)
970                 {
971                     if (s.Sfl == FLtlsdata || s.ty() & mTYthread)
972                     {
973                         if (s.Sclass == SC.global || s.Sclass == SC.static_ || s.Sclass == SC.locstat)
974                             return false;
975                     }
976                     return true;
977                 }
978                 else
979                     return (config.flags3 & CFG3pic) != 0;
980             }
981 
982             if (e1isadd &&
983                 ((e12.Eoper == OPrelconst &&
984                   !relativeToRIP(e12.EV.Vsym) &&
985                   (f = el_fl(e12)) != FLfardata
986                  ) ||
987                  (e12.Eoper == OPconst && !I16 && !e1.Ecount && (!I64 || el_signx32(e12)))) &&
988                 e1.Ecount == e1.Ecomsub &&
989                 (!e1.Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) &&
990                 tysize(e11.Ety) == REGSIZE
991                )
992             {
993                 uint t;            /* component of r/m field */
994                 int ss;
995                 int ssi;
996 
997                 if (e12.Eoper == OPrelconst)
998                     f = el_fl(e12);
999                 /*assert(datafl[f]);*/              /* what if addr of func? */
1000                 if (!I16)
1001                 {   /* Any register can be an index register        */
1002                     regm_t idxregs = allregs & ~keepmsk;
1003                     assert(idxregs);
1004 
1005                     /* See if e1.EV.E1 can be a scaled index  */
1006                     ss = isscaledindex(e11);
1007                     if (ss)
1008                     {
1009                         /* Load index register with result of e11.EV.E1       */
1010                         cdisscaledindex(cdb, e11, &idxregs, keepmsk);
1011                         reg = findreg(idxregs);
1012                         {
1013                             t = stackfl[f] ? 2 : 0;
1014                             pcs.Irm = modregrm(t, 0, 4);
1015                             pcs.Isib = modregrm(ss, reg & 7, 5);
1016                             if (reg & 8)
1017                                 pcs.Irex |= REX_X;
1018                         }
1019                     }
1020                     else if ((e11.Eoper == OPmul || e11.Eoper == OPshl) &&
1021                              !e11.Ecount &&
1022                              e11.EV.E2.Eoper == OPconst &&
1023                              (ssi = ssindex(e11.Eoper, e11.EV.E2.EV.Vuns)) != 0
1024                             )
1025                     {
1026                         regm_t scratchm;
1027 
1028                         char ssflags = ssindex_array[ssi].ssflags;
1029                         if (ssflags & SSFLnobp && stackfl[f])
1030                             goto L6;
1031 
1032                         // Load index register with result of e11.EV.E1
1033                         scodelem(cdb, e11.EV.E1, &idxregs, keepmsk, true);
1034                         reg = findreg(idxregs);
1035 
1036                         int ss1 = ssindex_array[ssi].ss1;
1037                         if (ssflags & SSFLlea)
1038                         {
1039                             assert(!stackfl[f]);
1040                             pcs.Irm = modregrm(2,0,4);
1041                             pcs.Isib = modregrm(ss1, reg & 7, reg & 7);
1042                             if (reg & 8)
1043                                 pcs.Irex |= REX_X | REX_B;
1044                         }
1045                         else
1046                         {
1047                             int rbase;
1048                             reg_t r;
1049 
1050                             scratchm = ALLREGS & ~keepmsk;
1051                             allocreg(cdb, &scratchm, &r, TYint);
1052 
1053                             if (ssflags & SSFLnobase1)
1054                             {
1055                                 t = 0;
1056                                 rbase = 5;
1057                             }
1058                             else
1059                             {
1060                                 t = 0;
1061                                 rbase = reg;
1062                                 if (rbase == BP || rbase == R13)
1063                                 {
1064                                     static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1];
1065 
1066                                     // IMUL r,BP,imm32
1067                                     cdb.genc2(0x69, modregxrmx(3, r, rbase), imm32[ss1]);
1068                                     goto L7;
1069                                 }
1070                             }
1071 
1072                             cdb.gen2sib(LEA, modregxrm(t, r, 4), modregrm(ss1, reg & 7 ,rbase & 7));
1073                             if (reg & 8)
1074                                 code_orrex(cdb.last(), REX_X);
1075                             if (rbase & 8)
1076                                 code_orrex(cdb.last(), REX_B);
1077                             if (I64)
1078                                 code_orrex(cdb.last(), REX_W);
1079 
1080                             if (ssflags & SSFLnobase1)
1081                             {
1082                                 cdb.last().IFL1 = FLconst;
1083                                 cdb.last().IEV1.Vuns = 0;
1084                             }
1085                         L7:
1086                             if (ssflags & SSFLnobase)
1087                             {
1088                                 t = stackfl[f] ? 2 : 0;
1089                                 rbase = 5;
1090                             }
1091                             else
1092                             {
1093                                 t = 2;
1094                                 rbase = r;
1095                                 assert(rbase != BP);
1096                             }
1097                             pcs.Irm = modregrm(t, 0, 4);
1098                             pcs.Isib = modregrm(ssindex_array[ssi].ss2, r & 7, rbase & 7);
1099                             if (r & 8)
1100                                 pcs.Irex |= REX_X;
1101                             if (rbase & 8)
1102                                 pcs.Irex |= REX_B;
1103                         }
1104                         freenode(e11.EV.E2);
1105                         freenode(e11);
1106                     }
1107                     else
1108                     {
1109                      L6:
1110                         /* Load index register with result of e11   */
1111                         scodelem(cdb, e11, &idxregs, keepmsk, true);
1112                         setaddrmode(pcs, idxregs);
1113                         if (stackfl[f])             /* if we need [EBP] too */
1114                         {
1115                             uint idx = pcs.Irm & 7;
1116                             if (pcs.Irex & REX_B)
1117                                 pcs.Irex = (pcs.Irex & ~REX_B) | REX_X;
1118                             pcs.Isib = modregrm(0, idx, BP);
1119                             pcs.Irm = modregrm(2, 0, 4);
1120                         }
1121                     }
1122                 }
1123                 else
1124                 {
1125                     regm_t idxregs = IDXREGS & ~keepmsk;   /* only these can be index regs */
1126                     assert(idxregs);
1127                     if (stackfl[f])                 /* if stack data type   */
1128                     {
1129                         idxregs &= mSI | mDI;       /* BX can't index off stack */
1130                         if (!idxregs) goto L1;      /* index regs aren't avail */
1131                         t = 6;                      /* [BP+SI+disp]         */
1132                     }
1133                     else
1134                         t = 0;                      /* [SI + disp]          */
1135                     scodelem(cdb, e11, &idxregs, keepmsk, true); // load idx reg
1136                     pcs.Irm = cast(ubyte)(getaddrmode(idxregs) ^ t);
1137                 }
1138                 if (f == FLpara)
1139                     refparam = true;
1140                 else if (f == FLauto || f == FLbprel || f == FLfltreg || f == FLfast)
1141                     reflocal = true;
1142                 else if (f == FLcsdata || tybasic(e12.Ety) == TYcptr)
1143                     pcs.Iflags |= CFcs;
1144                 else
1145                     assert(f != FLreg);
1146                 pcs.IFL1 = cast(ubyte)f;
1147                 if (f != FLconst)
1148                     pcs.IEV1.Vsym = e12.EV.Vsym;
1149                 pcs.IEV1.Voffset = e12.EV.Voffset; /* += ??? */
1150 
1151                 /* If e1 is a CSE, we must generate an addressing mode      */
1152                 /* but also leave EA in registers so others can use it      */
1153                 if (e1.Ecount)
1154                 {
1155                     uint flagsave;
1156 
1157                     regm_t idxregs = IDXREGS & ~keepmsk;
1158                     allocreg(cdb, &idxregs, &reg, TYoffset);
1159 
1160                     /* If desired result is a far pointer, we'll have       */
1161                     /* to load another register with the segment of v       */
1162                     if (e1ty == TYfptr)
1163                     {
1164                         reg_t msreg;
1165 
1166                         idxregs |= mMSW & ALLREGS & ~keepmsk;
1167                         allocreg(cdb, &idxregs, &msreg, TYfptr);
1168                         msreg = findregmsw(idxregs);
1169                                                     /* MOV msreg,segreg     */
1170                         genregs(cdb, 0x8C, segfl[f], msreg);
1171                     }
1172                     opsave = pcs.Iop;
1173                     flagsave = pcs.Iflags;
1174                     ubyte rexsave = pcs.Irex;
1175                     pcs.Iop = LEA;
1176                     code_newreg(pcs, reg);
1177                     if (!I16)
1178                         pcs.Iflags &= ~CFopsize;
1179                     if (I64)
1180                         pcs.Irex |= REX_W;
1181                     cdb.gen(pcs);                 // LEA idxreg,EA
1182                     cssave(e1,idxregs,true);
1183                     if (!I16)
1184                     {
1185                         pcs.Iflags = flagsave;
1186                         pcs.Irex = rexsave;
1187                     }
1188                     if (stackfl[f] && (config.wflags & WFssneds))   // if pointer into stack
1189                         pcs.Iflags |= CFss;        // add SS: override
1190                     pcs.Iop = opsave;
1191                     pcs.IFL1 = FLoffset;
1192                     pcs.IEV1.Vuns = 0;
1193                     setaddrmode(pcs, idxregs);
1194                 }
1195                 freenode(e12);
1196                 if (e1free)
1197                     freenode(e1);
1198                 goto Lptr;
1199             }
1200 
1201             L1:
1202 
1203             /* The rest of the cases could be a far pointer */
1204 
1205             regm_t idxregs;
1206             idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs
1207             assert(idxregs);
1208             if (!I16 &&
1209                 (sz == REGSIZE || (I64 && sz == 4)) &&
1210                 keepmsk & RMstore)
1211                 idxregs |= regcon.mvar;
1212 
1213             switch (e1ty)
1214             {
1215                 case TYfptr:                        /* if far pointer       */
1216                 case TYhptr:
1217                     idxregs = (mES | IDXREGS) & ~keepmsk;   // need segment too
1218                     assert(idxregs & mES);
1219                     pcs.Iflags |= CFes;            /* ES segment override  */
1220                     break;
1221 
1222                 case TYsptr:                        /* if pointer to stack  */
1223                     if (config.wflags & WFssneds)   // if SS != DS
1224                         pcs.Iflags |= CFss;        /* then need SS: override */
1225                     break;
1226 
1227                 case TYfgPtr:
1228                     if (I32)
1229                         pcs.Iflags |= CFgs;
1230                     else if (I64)
1231                         pcs.Iflags |= CFfs;
1232                     else
1233                         assert(0);
1234                     break;
1235 
1236                 case TYcptr:                        /* if pointer to code   */
1237                     pcs.Iflags |= CFcs;            /* then need CS: override */
1238                     break;
1239 
1240                 default:
1241                     break;
1242             }
1243             pcs.IFL1 = FLoffset;
1244             pcs.IEV1.Vuns = 0;
1245 
1246             /* see if we can replace *(e+c) with
1247              *      MOV     idxreg,e
1248              *      [MOV    ES,segment]
1249              *      EA =    [ES:]c[idxreg]
1250              */
1251             if (e1isadd && e12.Eoper == OPconst &&
1252                 (!I64 || el_signx32(e12)) &&
1253                 (tysize(e12.Ety) == REGSIZE || (I64 && tysize(e12.Ety) == 4)) &&
1254                 (!e1.Ecount || !e1free)
1255                )
1256             {
1257                 int ss;
1258 
1259                 pcs.IEV1.Vuns = e12.EV.Vuns;
1260                 freenode(e12);
1261                 if (e1free) freenode(e1);
1262                 if (!I16 && e11.Eoper == OPadd && !e11.Ecount &&
1263                     tysize(e11.Ety) == REGSIZE)
1264                 {
1265                     e12 = e11.EV.E2;
1266                     e11 = e11.EV.E1;
1267                     e1 = e1.EV.E1;
1268                     e1free = true;
1269                     goto L4;
1270                 }
1271                 if (!I16 && (ss = isscaledindex(e11)) != 0)
1272                 {   // (v * scale) + const
1273                     cdisscaledindex(cdb, e11, &idxregs, keepmsk);
1274                     reg = findreg(idxregs);
1275                     pcs.Irm = modregrm(0, 0, 4);
1276                     pcs.Isib = modregrm(ss, reg & 7, 5);
1277                     if (reg & 8)
1278                         pcs.Irex |= REX_X;
1279                 }
1280                 else
1281                 {
1282                     scodelem(cdb, e11, &idxregs, keepmsk, true); // load index reg
1283                     setaddrmode(pcs, idxregs);
1284                 }
1285                 goto Lptr;
1286             }
1287 
1288             /* Look for *(v1 + v2)
1289              *      EA = [v1][v2]
1290              */
1291 
1292             if (!I16 && e1isadd && (!e1.Ecount || !e1free) &&
1293                 (_tysize[e1ty] == REGSIZE || (I64 && _tysize[e1ty] == 4)))
1294             {
1295             L4:
1296                 regm_t idxregs2;
1297                 uint base, index;
1298 
1299                 // Look for *(v1 + v2 << scale)
1300                 int ss = isscaledindex(e12);
1301                 if (ss)
1302                 {
1303                     scodelem(cdb, e11, &idxregs, keepmsk, true);
1304                     idxregs2 = allregs & ~(idxregs | keepmsk);
1305                     cdisscaledindex(cdb, e12, &idxregs2, keepmsk | idxregs);
1306                 }
1307 
1308                 // Look for *(v1 << scale + v2)
1309                 else if ((ss = isscaledindex(e11)) != 0)
1310                 {
1311                     idxregs2 = idxregs;
1312                     cdisscaledindex(cdb, e11, &idxregs2, keepmsk);
1313                     idxregs = allregs & ~(idxregs2 | keepmsk);
1314                     scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true);
1315                 }
1316                 // Look for *(((v1 << scale) + c1) + v2)
1317                 else if (e11.Eoper == OPadd && !e11.Ecount &&
1318                          e11.EV.E2.Eoper == OPconst &&
1319                          (ss = isscaledindex(e11.EV.E1)) != 0
1320                         )
1321                 {
1322                     pcs.IEV1.Vuns = e11.EV.E2.EV.Vuns;
1323                     idxregs2 = idxregs;
1324                     cdisscaledindex(cdb, e11.EV.E1, &idxregs2, keepmsk);
1325                     idxregs = allregs & ~(idxregs2 | keepmsk);
1326                     scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true);
1327                     freenode(e11.EV.E2);
1328                     freenode(e11);
1329                 }
1330                 else
1331                 {
1332                     scodelem(cdb, e11, &idxregs, keepmsk, true);
1333                     idxregs2 = allregs & ~(idxregs | keepmsk);
1334                     scodelem(cdb, e12, &idxregs2, keepmsk | idxregs, true);
1335                 }
1336                 base = findreg(idxregs);
1337                 index = findreg(idxregs2);
1338                 pcs.Irm  = modregrm(2, 0, 4);
1339                 pcs.Isib = modregrm(ss, index & 7, base & 7);
1340                 if (index & 8)
1341                     pcs.Irex |= REX_X;
1342                 if (base & 8)
1343                     pcs.Irex |= REX_B;
1344                 if (e1free)
1345                     freenode(e1);
1346 
1347                 goto Lptr;
1348             }
1349 
1350             /* give up and replace *e1 with
1351              *      MOV     idxreg,e
1352              *      EA =    0[idxreg]
1353              * pinholeopt() will usually correct the 0, we need it in case
1354              * we have a pointer to a long and need an offset to the second
1355              * word.
1356              */
1357 
1358             assert(e1free);
1359             scodelem(cdb, e1, &idxregs, keepmsk, true);  // load index register
1360             setaddrmode(pcs, idxregs);
1361         Lptr:
1362             if (config.flags3 & CFG3ptrchk)
1363                 cod3_ptrchk(cdb, pcs, keepmsk);        // validate pointer code
1364             break;
1365 
1366         case FLdatseg:
1367             assert(0);
1368         static if (0)
1369         {
1370             pcs.Irm = modregrm(0, 0, BPRM);
1371             pcs.IEVpointer1 = e.EVpointer;
1372             break;
1373         }
1374 
1375         case FLfltreg:
1376             reflocal = true;
1377             pcs.Irm = modregrm(2, 0, BPRM);
1378             pcs.IEV1.Vint = 0;
1379             break;
1380 
1381         case FLreg:
1382             goto L2;
1383 
1384         case FLpara:
1385             if (s.Sclass == SC.shadowreg)
1386                 goto case FLfast;
1387         Lpara:
1388             refparam = true;
1389             pcs.Irm = modregrm(2, 0, BPRM);
1390             goto L2;
1391 
1392         case FLauto:
1393         case FLfast:
1394             if (regParamInPreg(s))
1395             {
1396                 regm_t pregm = s.Spregm();
1397                 /* See if the parameter is still hanging about in a register,
1398                  * and so can we load from that register instead.
1399                  */
1400                 if (regcon.params & pregm /*&& s.Spreg2 == NOREG && !(pregm & XMMREGS)*/)
1401                 {
1402                     if (keepmsk & RMload && !anyiasm)
1403                     {
1404                         auto voffset = e.EV.Voffset;
1405                         if (sz <= REGSIZE)
1406                         {
1407                             const reg_t preg = (voffset >= REGSIZE) ? s.Spreg2 : s.Spreg;
1408                             if (voffset >= REGSIZE)
1409                                 voffset -= REGSIZE;
1410 
1411                             /* preg could be NOREG if it's a variadic function and we're
1412                              * in Win64 shadow regs and we're offsetting to get to the start
1413                              * of the variadic args.
1414                              */
1415                             if (preg != NOREG && regcon.params & mask(preg))
1416                             {
1417                                 //printf("sz %d, preg %s, Voffset %d\n", cast(int)sz, regm_str(mask(preg)), cast(int)voffset);
1418                                 if (mask(preg) & XMMREGS && sz != REGSIZE)
1419                                 {
1420                                     /* The following fails with this from std.math on Linux64:
1421                                         void main()
1422                                         {
1423                                             alias T = float;
1424                                             T x = T.infinity;
1425                                             T e = T.infinity;
1426                                             int eptr;
1427                                             T v = frexp(x, eptr);
1428                                             assert(isIdentical(e, v));
1429                                         }
1430                                      */
1431                                 }
1432                                 else if (voffset == 0)
1433                                 {
1434                                     pcs.Irm = modregrm(3, 0, preg & 7);
1435                                     if (preg & 8)
1436                                         pcs.Irex |= REX_B;
1437                                     if (I64 && sz == 1 && preg >= 4)
1438                                         pcs.Irex |= REX;
1439                                     regcon.used |= mask(preg);
1440                                     break;
1441                                 }
1442                                 else if (voffset == 1 && sz == 1 && preg < 4)
1443                                 {
1444                                     pcs.Irm = modregrm(3, 0, 4 | preg); // use H register
1445                                     regcon.used |= mask(preg);
1446                                     break;
1447                                 }
1448                             }
1449                         }
1450                     }
1451                     else
1452                         regcon.params &= ~pregm;
1453                 }
1454             }
1455             if (s.Sclass == SC.shadowreg)
1456                 goto Lpara;
1457             goto case FLbprel;
1458 
1459         case FLbprel:
1460             reflocal = true;
1461             pcs.Irm = modregrm(2, 0, BPRM);
1462             goto L2;
1463 
1464         case FLextern:
1465             if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0)
1466             {
1467                 if (config.exe & EX_windos)
1468                 {
1469                     if (I64)
1470                     {   // GS:[88]
1471                         pcs.Irm = modregrm(0, 0, 4);
1472                         pcs.Isib = modregrm(0, 4, 5);  // don't use [RIP] addressing
1473                         pcs.IFL1 = FLconst;
1474                         pcs.IEV1.Vuns = 88;
1475                         pcs.Iflags = CFgs;
1476                         pcs.Irex |= REX_W;
1477                         break;
1478                     }
1479                     else
1480                     {
1481                         pcs.Iflags |= CFfs;    // add FS: override
1482                     }
1483                 }
1484                 else if (config.exe & (EX_OSX | EX_OSX64))
1485                 {
1486                 }
1487                 else if (config.exe & EX_posix)
1488                     assert(0);
1489             }
1490             if (s.ty() & mTYcs && cast(bool) LARGECODE)
1491                 goto Lfardata;
1492             goto L3;
1493 
1494         case FLtlsdata:
1495             if (config.exe & EX_posix)
1496                 goto L3;
1497             assert(0);
1498 
1499         case FLdata:
1500         case FLudata:
1501         case FLcsdata:
1502         case FLgot:
1503         case FLgotoff:
1504         L3:
1505             pcs.Irm = modregrm(0, 0, BPRM);
1506         L2:
1507             if (fl == FLreg)
1508             {
1509                 //printf("test: FLreg, %s %d regcon.mvar = %s\n",
1510                 // s.Sident.ptr, cast(int)e.EV.Voffset, regm_str(regcon.mvar));
1511                 if (!(s.Sregm & regcon.mvar))
1512                     symbol_print(s);
1513                 assert(s.Sregm & regcon.mvar);
1514 
1515                 /* Attempting to paint a float as an integer or an integer as a float
1516                  * will cause serious problems since the EA is loaded separatedly from
1517                  * the opcode. The only way to deal with this is to prevent enregistering
1518                  * such variables.
1519                  */
1520                 if (tyxmmreg(ty) && !(s.Sregm & XMMREGS) ||
1521                     !tyxmmreg(ty) && (s.Sregm & XMMREGS))
1522                     cgreg_unregister(s.Sregm);
1523 
1524                 if (
1525                     s.Sclass == SC.regpar ||
1526                     s.Sclass == SC.parameter)
1527                 {   refparam = true;
1528                     reflocal = true;        // kludge to set up prolog
1529                 }
1530                 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7);
1531                 if (s.Sreglsw & 8)
1532                     pcs.Irex |= REX_B;
1533                 if (e.EV.Voffset == REGSIZE && sz == REGSIZE)
1534                 {
1535                     pcs.Irm = modregrm(3, 0, s.Sregmsw & 7);
1536                     if (s.Sregmsw & 8)
1537                         pcs.Irex |= REX_B;
1538                     else
1539                         pcs.Irex &= ~REX_B;
1540                 }
1541                 else if (e.EV.Voffset == 1 && sz == 1)
1542                 {
1543                     assert(s.Sregm & BYTEREGS);
1544                     assert(s.Sreglsw < 4);
1545                     pcs.Irm |= 4;                  // use 2nd byte of register
1546                 }
1547                 else
1548                 {
1549                     assert(!e.EV.Voffset);
1550                     if (I64 && sz == 1 && s.Sreglsw >= 4)
1551                         pcs.Irex |= REX;
1552                 }
1553             }
1554             else if (s.ty() & mTYcs && !(fl == FLextern && LARGECODE))
1555             {
1556                 pcs.Iflags |= CFcs | CFoff;
1557             }
1558             if (config.flags3 & CFG3pic &&
1559                 (fl == FLtlsdata || s.ty() & mTYthread))
1560             {
1561                 if (I32)
1562                 {
1563                     if (config.flags3 & CFG3pie)
1564                     {
1565                         pcs.Iflags |= CFgs;
1566                     }
1567                 }
1568                 else if (I64)
1569                 {
1570                     if (config.flags3 & CFG3pie &&
1571                         (s.Sclass == SC.global || s.Sclass == SC.static_ || s.Sclass == SC.locstat))
1572                     {
1573                         pcs.Iflags |= CFfs;
1574                         pcs.Irm = modregrm(0, 0, 4);
1575                         pcs.Isib = modregrm(0, 4, 5);  // don't use [RIP] addressing
1576                     }
1577                     else
1578                     {
1579                         //pcs.Iflags |= CFopsize; //I don't know what this was for
1580                         pcs.Irex = 0x48;
1581                     }
1582                 }
1583             }
1584             pcs.IEV1.Vsym = s;
1585             pcs.IEV1.Voffset = e.EV.Voffset;
1586             if (sz == 1)
1587             {   /* Don't use SI or DI for this variable     */
1588                 s.Sflags |= GTbyte;
1589                 if (I64 ? e.EV.Voffset > 0 : e.EV.Voffset > 1)
1590                 {
1591                     debug if (debugr) printf("'%s' not reg cand due to byte offset\n", s.Sident.ptr);
1592                     s.Sflags &= ~GTregcand;
1593                 }
1594             }
1595             else if (sz == 2 && tyxmmreg(s.ty()) && config.fpxmmregs)
1596             {
1597                 debug if (debugr) printf("'%s' not XMM reg cand due to short access\n", s.Sident.ptr);
1598                 s.Sflags &= ~GTregcand;
1599             }
1600             else if (e.EV.Voffset || sz > tysize(s.Stype.Tty))
1601             {
1602                 debug if (debugr) printf("'%s' not reg cand due to offset or size\n", s.Sident.ptr);
1603                 s.Sflags &= ~GTregcand;
1604             }
1605             else if (tyvector(s.Stype.Tty) && sz < tysize(s.Stype.Tty))
1606             {
1607                 // https://issues.dlang.org/show_bug.cgi?id=21673
1608                 // https://issues.dlang.org/show_bug.cgi?id=21676
1609                 // https://issues.dlang.org/show_bug.cgi?id=23009
1610                 // PR: https://github.com/dlang/dmd/pull/13977
1611                 // cannot read or write to partial vector
1612                 debug if (debugr) printf("'%s' not reg cand due to vector type\n", s.Sident.ptr);
1613                 s.Sflags &= ~GTregcand;
1614             }
1615 
1616             if (config.fpxmmregs && tyfloating(s.ty()) && !tyfloating(ty))
1617             {
1618                 debug if (debugr) printf("'%s' not reg cand due to mix float and int\n", s.Sident.ptr);
1619                 // Can't successfully mix XMM register variables accessed as integers
1620                 s.Sflags &= ~GTregcand;
1621             }
1622 
1623             if (!(keepmsk & RMstore))               // if not store only
1624                 s.Sflags |= SFLread;               // assume we are doing a read
1625             break;
1626 
1627         case FLpseudo:
1628             version (MARS)
1629             {
1630                 {
1631                     getregs(cdb, mask(s.Sreglsw));
1632                     pcs.Irm = modregrm(3, 0, s.Sreglsw & 7);
1633                     if (s.Sreglsw & 8)
1634                         pcs.Irex |= REX_B;
1635                     if (e.EV.Voffset == 1 && sz == 1)
1636                     {   assert(s.Sregm & BYTEREGS);
1637                         assert(s.Sreglsw < 4);
1638                         pcs.Irm |= 4;                  // use 2nd byte of register
1639                     }
1640                     else
1641                     {   assert(!e.EV.Voffset);
1642                         if (I64 && sz == 1 && s.Sreglsw >= 4)
1643                             pcs.Irex |= REX;
1644                     }
1645                     break;
1646                 }
1647             }
1648             else
1649             {
1650                 {
1651                     uint u = s.Sreglsw;
1652                     getregs(cdb, pseudomask[u]);
1653                     pcs.Irm = modregrm(3, 0, pseudoreg[u] & 7);
1654                     break;
1655                 }
1656             }
1657 
1658         case FLfardata:
1659         case FLfunc:                                /* reading from code seg */
1660             if (config.exe & EX_flat)
1661                 goto L3;
1662         Lfardata:
1663         {
1664             regm_t regm = ALLREGS & ~keepmsk;       // need scratch register
1665             allocreg(cdb, &regm, &reg, TYint);
1666             getregs(cdb,mES);
1667             // MOV mreg,seg of symbol
1668             cdb.gencs(0xB8 + reg, 0, FLextern, s);
1669             cdb.last().Iflags = CFseg;
1670             cdb.gen2(0x8E, modregrmx(3, 0, reg));     // MOV ES,reg
1671             pcs.Iflags |= CFes | CFoff;            /* ES segment override  */
1672             goto L3;
1673         }
1674 
1675         case FLstack:
1676             assert(!I16);
1677             pcs.Irm = modregrm(2, 0, 4);
1678             pcs.Isib = modregrm(0, 4, SP);
1679             pcs.IEV1.Vsym = s;
1680             pcs.IEV1.Voffset = e.EV.Voffset;
1681             break;
1682 
1683         default:
1684             WRFL(cast(FL)fl);
1685             symbol_print(s);
1686             assert(0);
1687     }
1688 }
1689 
1690 /*****************************
1691  * Given an opcode and EA in cs, generate code
1692  * for each floating register in turn.
1693  * Input:
1694  *      tym     either TYdouble or TYfloat
1695  */
1696 
1697 @trusted
1698 void fltregs(ref CodeBuilder cdb, code* pcs, tym_t tym)
1699 {
1700     assert(!I64);
1701     tym = tybasic(tym);
1702     if (I32)
1703     {
1704         getregs(cdb,(tym == TYfloat) ? mAX : mAX | mDX);
1705         if (tym != TYfloat)
1706         {
1707             pcs.IEV1.Voffset += REGSIZE;
1708             NEWREG(pcs.Irm,DX);
1709             cdb.gen(pcs);
1710             pcs.IEV1.Voffset -= REGSIZE;
1711         }
1712         NEWREG(pcs.Irm,AX);
1713         cdb.gen(pcs);
1714     }
1715     else
1716     {
1717         getregs(cdb,(tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16);
1718         pcs.IEV1.Voffset += (tym == TYfloat) ? 2 : 6;
1719         if (tym == TYfloat)
1720             NEWREG(pcs.Irm, DX);
1721         else
1722             NEWREG(pcs.Irm, AX);
1723         cdb.gen(pcs);
1724         pcs.IEV1.Voffset -= 2;
1725         if (tym == TYfloat)
1726             NEWREG(pcs.Irm, AX);
1727         else
1728             NEWREG(pcs.Irm, BX);
1729         cdb.gen(pcs);
1730         if (tym != TYfloat)
1731         {
1732             pcs.IEV1.Voffset -= 2;
1733             NEWREG(pcs.Irm, CX);
1734             cdb.gen(pcs);
1735             pcs.IEV1.Voffset -= 2;     /* note that exit is with Voffset unaltered */
1736             NEWREG(pcs.Irm, DX);
1737             cdb.gen(pcs);
1738         }
1739     }
1740 }
1741 
1742 
1743 /*****************************
1744  * Given a result in registers, test it for true or false.
1745  * Will fail if TYfptr and the reg is ES!
1746  * If saveflag is true, preserve the contents of the
1747  * registers.
1748  */
1749 
1750 @trusted
1751 void tstresult(ref CodeBuilder cdb, regm_t regm, tym_t tym, uint saveflag)
1752 {
1753     reg_t scrreg;                      // scratch register
1754     regm_t scrregm;
1755 
1756     //if (!(regm & (mBP | ALLREGS)))
1757         //printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n",
1758             //regm_str(regm),tym,saveflag);
1759 
1760     assert(regm & (XMMREGS | mBP | ALLREGS));
1761     tym = tybasic(tym);
1762     reg_t reg = findreg(regm);
1763     uint sz = _tysize[tym];
1764     if (sz == 1)
1765     {
1766         assert(regm & BYTEREGS);
1767         genregs(cdb, 0x84, reg, reg);        // TEST regL,regL
1768         if (I64 && reg >= 4)
1769             code_orrex(cdb.last(), REX);
1770         return;
1771     }
1772     if (regm & XMMREGS)
1773     {
1774         reg_t xreg;
1775         regm_t xregs = XMMREGS & ~regm;
1776         allocreg(cdb,&xregs, &xreg, TYdouble);
1777         opcode_t op = 0;
1778         if (tym == TYdouble || tym == TYidouble || tym == TYcdouble)
1779             op = 0x660000;
1780         cdb.gen2(op | XORPS, modregrm(3, xreg-XMM0, xreg-XMM0));      // XORPS xreg,xreg
1781         cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0));     // UCOMISS xreg,reg
1782         if (tym == TYcfloat || tym == TYcdouble)
1783         {   code *cnop = gennop(null);
1784             genjmp(cdb, JNE, FLcode, cast(block *) cnop); // JNE     L1
1785             genjmp(cdb,  JP, FLcode, cast(block *) cnop); // JP      L1
1786             reg = findreg(regm & ~mask(reg));
1787             cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg
1788             cdb.append(cnop);
1789         }
1790         return;
1791     }
1792     if (sz <= REGSIZE)
1793     {
1794         if (!I16)
1795         {
1796             if (tym == TYfloat)
1797             {
1798                 if (saveflag)
1799                 {
1800                     scrregm = allregs & ~regm;              // possible scratch regs
1801                     allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg
1802                     genmovreg(cdb, scrreg, reg);  // MOV scrreg,msreg
1803                     reg = scrreg;
1804                 }
1805                 getregs(cdb, mask(reg));
1806                 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1
1807                 return;
1808             }
1809             gentstreg(cdb,reg);                 // TEST reg,reg
1810             if (sz == SHORTSIZE)
1811                 cdb.last().Iflags |= CFopsize;             // 16 bit operands
1812             else if (sz == 8)
1813                 code_orrex(cdb.last(), REX_W);
1814         }
1815         else
1816             gentstreg(cdb, reg);                 // TEST reg,reg
1817         return;
1818     }
1819 
1820     if (saveflag || tyfv(tym))
1821     {
1822     L1:
1823         scrregm = ALLREGS & ~regm;              // possible scratch regs
1824         allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg
1825         if (I32 || sz == REGSIZE * 2)
1826         {
1827             assert(regm & mMSW && regm & mLSW);
1828 
1829             reg = findregmsw(regm);
1830             if (I32)
1831             {
1832                 if (tyfv(tym))
1833                     genregs(cdb, MOVZXw, scrreg, reg); // MOVZX scrreg,msreg
1834                 else
1835                 {
1836                     genmovreg(cdb, scrreg, reg);      // MOV scrreg,msreg
1837                     if (tym == TYdouble || tym == TYdouble_alias)
1838                         cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1
1839                 }
1840             }
1841             else
1842             {
1843                 genmovreg(cdb, scrreg, reg);  // MOV scrreg,msreg
1844                 if (tym == TYfloat)
1845                     cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1
1846             }
1847             reg = findreglsw(regm);
1848             genorreg(cdb, scrreg, reg);           // OR scrreg,lsreg
1849         }
1850         else if (sz == 8)
1851         {
1852             // !I32
1853             genmovreg(cdb, scrreg, AX);           // MOV scrreg,AX
1854             if (tym == TYdouble || tym == TYdouble_alias)
1855                 cdb.gen2(0xD1 ,modregrm(3, 4, scrreg));         // SHL scrreg,1
1856             genorreg(cdb, scrreg, BX);            // OR scrreg,BX
1857             genorreg(cdb, scrreg, CX);            // OR scrreg,CX
1858             genorreg(cdb, scrreg, DX);            // OR scrreg,DX
1859         }
1860         else
1861             assert(0);
1862     }
1863     else
1864     {
1865         if (I32 || sz == REGSIZE * 2)
1866         {
1867             // can't test ES:LSW for 0
1868             assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP));
1869 
1870             reg = findregmsw(regm);
1871             if (regcon.mvar & mask(reg))        // if register variable
1872                 goto L1;                        // don't trash it
1873             getregs(cdb, mask(reg));            // we're going to trash reg
1874             if (tyfloating(tym) && sz == 2 * _tysize[TYint])
1875                 cdb.gen2(0xD1, modregrm(3 ,4, reg));   // SHL reg,1
1876             genorreg(cdb, reg, findreglsw(regm));     // OR reg,reg+1
1877             if (I64)
1878                 code_orrex(cdb.last(), REX_W);
1879        }
1880         else if (sz == 8)
1881         {   assert(regm == DOUBLEREGS_16);
1882             getregs(cdb,mAX);                  // allocate AX
1883             if (tym == TYdouble || tym == TYdouble_alias)
1884                 cdb.gen2(0xD1, modregrm(3, 4, AX));       // SHL AX,1
1885             genorreg(cdb, AX, BX);          // OR AX,BX
1886             genorreg(cdb, AX, CX);          // OR AX,CX
1887             genorreg(cdb, AX, DX);          // OR AX,DX
1888         }
1889         else
1890             assert(0);
1891     }
1892     code_orflag(cdb.last(),CFpsw);
1893 }
1894 
1895 /******************************
1896  * Given the result of an expression is in retregs,
1897  * generate necessary code to return result in *pretregs.
1898  */
1899 
1900 @trusted
1901 void fixresult(ref CodeBuilder cdb, elem *e, regm_t retregs, regm_t *pretregs)
1902 {
1903     //printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs));
1904     if (*pretregs == 0) return;           // if don't want result
1905     assert(e && retregs);                 // need something to work with
1906     regm_t forccs = *pretregs & mPSW;
1907     regm_t forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS);
1908     tym_t tym = tybasic(e.Ety);
1909 
1910     if (tym == TYstruct)
1911     {
1912         if (e.Eoper == OPpair || e.Eoper == OPrpair)
1913         {
1914             if (I64)
1915                 tym = TYucent;
1916             else
1917                 tym = TYullong;
1918         }
1919         else
1920             // Hack to support cdstreq()
1921             tym = (forregs & mMSW) ? TYfptr : TYnptr;
1922     }
1923     int sz = _tysize[tym];
1924 
1925     if (sz == 1)
1926     {
1927         assert(retregs & BYTEREGS);
1928         const reg = findreg(retregs);
1929         if (e.Eoper == OPvar &&
1930             e.EV.Voffset == 1 &&
1931             e.EV.Vsym.Sfl == FLreg)
1932         {
1933             assert(reg < 4);
1934             if (forccs)
1935                 cdb.gen2(0x84, modregrm(3, reg | 4, reg | 4));   // TEST regH,regH
1936             forccs = 0;
1937         }
1938     }
1939 
1940     reg_t reg,rreg;
1941     if ((retregs & forregs) == retregs)   // if already in right registers
1942         *pretregs = retregs;
1943     else if (forregs)             // if return the result in registers
1944     {
1945         if ((forregs | retregs) & (mST01 | mST0))
1946         {
1947             fixresult87(cdb, e, retregs, pretregs);
1948             return;
1949         }
1950         uint opsflag = false;
1951         if (I16 && sz == 8)
1952         {
1953             if (forregs & mSTACK)
1954             {
1955                 assert(retregs == DOUBLEREGS_16);
1956                 // Push floating regs
1957                 cdb.gen1(0x50 + AX);
1958                 cdb.gen1(0x50 + BX);
1959                 cdb.gen1(0x50 + CX);
1960                 cdb.gen1(0x50 + DX);
1961                 stackpush += DOUBLESIZE;
1962             }
1963             else if (retregs & mSTACK)
1964             {
1965                 assert(forregs == DOUBLEREGS_16);
1966                 // Pop floating regs
1967                 getregs(cdb,forregs);
1968                 cdb.gen1(0x58 + DX);
1969                 cdb.gen1(0x58 + CX);
1970                 cdb.gen1(0x58 + BX);
1971                 cdb.gen1(0x58 + AX);
1972                 stackpush -= DOUBLESIZE;
1973                 retregs = DOUBLEREGS_16; // for tstresult() below
1974             }
1975             else
1976             {
1977                 debug
1978                 printf("retregs = %s, forregs = %s\n", regm_str(retregs), regm_str(forregs)),
1979                 assert(0);
1980             }
1981             if (!OTleaf(e.Eoper))
1982                 opsflag = true;
1983         }
1984         else
1985         {
1986             allocreg(cdb, pretregs, &rreg, tym);  // allocate return regs
1987             if (retregs & XMMREGS)
1988             {
1989                 reg = findreg(retregs & XMMREGS);
1990                 if (mask(rreg) & XMMREGS)
1991                     genmovreg(cdb, rreg, reg, tym);
1992                 else
1993                 {
1994                     // MOVSD floatreg, XMM?
1995                     cdb.genxmmreg(xmmstore(tym), reg, 0, tym);
1996                     // MOV rreg,floatreg
1997                     cdb.genfltreg(0x8B,rreg,0);
1998                     if (sz == 8)
1999                     {
2000                         if (I32)
2001                         {
2002                             rreg = findregmsw(*pretregs);
2003                             cdb.genfltreg(0x8B, rreg,4);
2004                         }
2005                         else
2006                             code_orrex(cdb.last(),REX_W);
2007                     }
2008                 }
2009             }
2010             else if (forregs & XMMREGS)
2011             {
2012                 reg = findreg(retregs & (mBP | ALLREGS));
2013                 switch (sz)
2014                 {
2015                     case 4:
2016                         cdb.gen2(LODD, modregxrmx(3, rreg - XMM0, reg)); // MOVD xmm,reg
2017                         break;
2018 
2019                     case 8:
2020                         if (I32)
2021                         {
2022                             cdb.genfltreg(0x89, reg, 0);
2023                             reg = findregmsw(retregs);
2024                             cdb.genfltreg(0x89, reg, 4);
2025                             cdb.genxmmreg(xmmload(tym), rreg, 0, tym); // MOVQ xmm,mem
2026                         }
2027                         else
2028                         {
2029                             cdb.gen2(LODD /* [sic!] */, modregxrmx(3, rreg - XMM0, reg));
2030                             code_orrex(cdb.last(), REX_W); // MOVQ xmm,reg
2031                         }
2032                         break;
2033 
2034                     default:
2035                         assert(false);
2036                 }
2037                 checkSetVex(cdb.last(), tym);
2038             }
2039             else if (sz > REGSIZE)
2040             {
2041                 uint msreg = findregmsw(retregs);
2042                 uint lsreg = findreglsw(retregs);
2043                 uint msrreg = findregmsw(*pretregs);
2044                 uint lsrreg = findreglsw(*pretregs);
2045 
2046                 genmovreg(cdb, msrreg, msreg); // MOV msrreg,msreg
2047                 genmovreg(cdb, lsrreg, lsreg); // MOV lsrreg,lsreg
2048             }
2049             else
2050             {
2051                 assert(!(retregs & XMMREGS));
2052                 assert(!(forregs & XMMREGS));
2053                 reg = findreg(retregs & (mBP | ALLREGS));
2054                 if (I64 && sz <= 4)
2055                     genregs(cdb, 0x89, reg, rreg);  // only move 32 bits, and zero the top 32 bits
2056                 else
2057                     genmovreg(cdb, rreg, reg);    // MOV rreg,reg
2058             }
2059         }
2060         cssave(e,retregs | *pretregs,opsflag);
2061         // Commented out due to Bugzilla 8840
2062         //forregs = 0;    // don't care about result in reg cuz real result is in rreg
2063         retregs = *pretregs & ~mPSW;
2064     }
2065     if (forccs)                           // if return result in flags
2066     {
2067         if (retregs & (mST01 | mST0))
2068         {
2069             *pretregs |= forccs;
2070             fixresult87(cdb, e, retregs, pretregs);
2071         }
2072         else
2073             tstresult(cdb, retregs, tym, forregs);
2074     }
2075 }
2076 
2077 /*******************************
2078  * Extra information about each CLIB runtime library function.
2079  */
2080 
2081 enum
2082 {
2083     INF32         = 1,      /// if 32 bit only
2084     INFfloat      = 2,      /// if this is floating point
2085     INFwkdone     = 4,      /// if weak extern is already done
2086     INF64         = 8,      /// if 64 bit only
2087     INFpushebx    = 0x10,   /// push EBX before load_localgot()
2088     INFpusheabcdx = 0x20,   /// pass EAX/EBX/ECX/EDX on stack, callee does ret 16
2089 }
2090 
2091 struct ClibInfo
2092 {
2093     regm_t retregs16;   /* registers that 16 bit result is returned in  */
2094     regm_t retregs32;   /* registers that 32 bit result is returned in  */
2095     ubyte pop;          // # of bytes popped off of stack upon return
2096     ubyte flags;        /// INFxxx
2097     byte push87;                        // # of pushes onto the 8087 stack
2098     byte pop87;                         // # of pops off of the 8087 stack
2099 }
2100 
2101 __gshared int clib_inited = false;          // true if initialized
2102 
2103 @trusted
2104 Symbol* symboly(const(char)* name, regm_t desregs)
2105 {
2106     Symbol *s = symbol_calloc(name[0 .. strlen(name)]);
2107     s.Stype = tsclib;
2108     s.Sclass = SC.extern_;
2109     s.Sfl = FLfunc;
2110     s.Ssymnum = 0;
2111     s.Sregsaved = ~desregs & (mBP | mES | ALLREGS);
2112     return s;
2113 }
2114 
2115 @trusted
2116 void getClibInfo(uint clib, Symbol** ps, ClibInfo** pinfo)
2117 {
2118     __gshared Symbol*[CLIB.MAX] clibsyms;
2119     __gshared ClibInfo[CLIB.MAX] clibinfo;
2120 
2121     if (!clib_inited)
2122     {
2123         for (size_t i = 0; i < CLIB.MAX; ++i)
2124         {
2125             Symbol* s = clibsyms[i];
2126             if (s)
2127             {
2128                 s.Sxtrnnum = 0;
2129                 s.Stypidx = 0;
2130                 clibinfo[i].flags &= ~INFwkdone;
2131             }
2132         }
2133         clib_inited = true;
2134     }
2135 
2136     const uint ex_unix = (EX_LINUX   | EX_LINUX64   |
2137                           EX_OSX     | EX_OSX64     |
2138                           EX_FREEBSD | EX_FREEBSD64 |
2139                           EX_OPENBSD | EX_OPENBSD64 |
2140                           EX_DRAGONFLYBSD64 |
2141                           EX_SOLARIS | EX_SOLARIS64);
2142 
2143     ClibInfo* cinfo = &clibinfo[clib];
2144     Symbol* s = clibsyms[clib];
2145     if (!s)
2146     {
2147 
2148         switch (clib)
2149         {
2150             case CLIB.lcmp:
2151                 {
2152                     const(char)* name = (config.exe & ex_unix) ? "__LCMP__" : "_LCMP@";
2153                     s = symboly(name, 0);
2154                 }
2155                 break;
2156 
2157             case CLIB.lmul:
2158                 {
2159                     const(char)* name = (config.exe & ex_unix) ? "__LMUL__" : "_LMUL@";
2160                     s = symboly(name, mAX|mCX|mDX);
2161                     cinfo.retregs16 = mDX|mAX;
2162                     cinfo.retregs32 = mDX|mAX;
2163                 }
2164                 break;
2165 
2166             case CLIB.ldiv:
2167                 cinfo.retregs16 = mDX|mAX;
2168                 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD))
2169                 {
2170                     s = symboly("__divdi3", mAX|mBX|mCX|mDX);
2171                     cinfo.flags = INFpushebx;
2172                     cinfo.retregs32 = mDX|mAX;
2173                 }
2174                 else if (config.exe & EX_SOLARIS)
2175                 {
2176                     s = symboly("__LDIV2__", mAX|mBX|mCX|mDX);
2177                     cinfo.flags = INFpushebx;
2178                     cinfo.retregs32 = mDX|mAX;
2179                 }
2180                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2181                 {
2182                     s = symboly("_alldiv", mAX|mBX|mCX|mDX);
2183                     cinfo.flags = INFpusheabcdx;
2184                     cinfo.retregs32 = mDX|mAX;
2185                 }
2186                 else
2187                 {
2188                     const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@";
2189                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2190                     cinfo.retregs32 = mDX|mAX;
2191                 }
2192                 break;
2193 
2194             case CLIB.lmod:
2195                 cinfo.retregs16 = mCX|mBX;
2196                 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD))
2197                 {
2198                     s = symboly("__moddi3", mAX|mBX|mCX|mDX);
2199                     cinfo.flags = INFpushebx;
2200                     cinfo.retregs32 = mDX|mAX;
2201                 }
2202                 else if (config.exe & EX_SOLARIS)
2203                 {
2204                     s = symboly("__LDIV2__", mAX|mBX|mCX|mDX);
2205                     cinfo.flags = INFpushebx;
2206                     cinfo.retregs32 = mCX|mBX;
2207                 }
2208                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2209                 {
2210                     s = symboly("_allrem", mAX|mBX|mCX|mDX);
2211                     cinfo.flags = INFpusheabcdx;
2212                     cinfo.retregs32 = mAX|mDX;
2213                 }
2214                 else
2215                 {
2216                     const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@";
2217                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2218                     cinfo.retregs32 = mCX|mBX;
2219                 }
2220                 break;
2221 
2222             case CLIB.uldiv:
2223                 cinfo.retregs16 = mDX|mAX;
2224                 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD))
2225                 {
2226                     s = symboly("__udivdi3", mAX|mBX|mCX|mDX);
2227                     cinfo.flags = INFpushebx;
2228                     cinfo.retregs32 = mDX|mAX;
2229                 }
2230                 else if (config.exe & EX_SOLARIS)
2231                 {
2232                     s = symboly("__ULDIV2__", mAX|mBX|mCX|mDX);
2233                     cinfo.flags = INFpushebx;
2234                     cinfo.retregs32 = mDX|mAX;
2235                 }
2236                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2237                 {
2238                     s = symboly("_aulldiv", mAX|mBX|mCX|mDX);
2239                     cinfo.flags = INFpusheabcdx;
2240                     cinfo.retregs32 = mDX|mAX;
2241                 }
2242                 else
2243                 {
2244                     const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@";
2245                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2246                     cinfo.retregs32 = mDX|mAX;
2247                 }
2248                 break;
2249 
2250             case CLIB.ulmod:
2251                 cinfo.retregs16 = mCX|mBX;
2252                 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD))
2253                 {
2254                     s = symboly("__umoddi3", mAX|mBX|mCX|mDX);
2255                     cinfo.flags = INFpushebx;
2256                     cinfo.retregs32 = mDX|mAX;
2257                 }
2258                 else if (config.exe & EX_SOLARIS)
2259                 {
2260                     s = symboly("__LDIV2__", mAX|mBX|mCX|mDX);
2261                     cinfo.flags = INFpushebx;
2262                     cinfo.retregs32 = mCX|mBX;
2263                 }
2264                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2265                 {
2266                     s = symboly("_aullrem", mAX|mBX|mCX|mDX);
2267                     cinfo.flags = INFpusheabcdx;
2268                     cinfo.retregs32 = mAX|mDX;
2269                 }
2270                 else
2271                 {
2272                     const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@";
2273                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2274                     cinfo.retregs32 = mCX|mBX;
2275                 }
2276                 break;
2277 
2278             // This section is only for Windows and DOS (i.e. machines without the x87 FPU)
2279             case CLIB.dmul:
2280                 s = symboly("_DMUL@",mAX|mBX|mCX|mDX);
2281                 cinfo.retregs16 = DOUBLEREGS_16;
2282                 cinfo.retregs32 = DOUBLEREGS_32;
2283                 cinfo.pop = 8;
2284                 cinfo.flags = INFfloat;
2285                 cinfo.push87 = 1;
2286                 cinfo.pop87 = 1;
2287                 break;
2288 
2289             case CLIB.ddiv:
2290                 s = symboly("_DDIV@",mAX|mBX|mCX|mDX);
2291                 cinfo.retregs16 = DOUBLEREGS_16;
2292                 cinfo.retregs32 = DOUBLEREGS_32;
2293                 cinfo.pop = 8;
2294                 cinfo.flags = INFfloat;
2295                 cinfo.push87 = 1;
2296                 cinfo.pop87 = 1;
2297                 break;
2298 
2299             case CLIB.dtst0:
2300                 s = symboly("_DTST0@",0);
2301                 cinfo.flags = INFfloat;
2302                 break;
2303 
2304             case CLIB.dtst0exc:
2305                 s = symboly("_DTST0EXC@",0);
2306                 cinfo.flags = INFfloat;
2307                 break;
2308 
2309             case CLIB.dcmp:
2310                 s = symboly("_DCMP@",0);
2311                 cinfo.pop = 8;
2312                 cinfo.flags = INFfloat;
2313                 cinfo.push87 = 1;
2314                 cinfo.pop87 = 1;
2315                 break;
2316 
2317             case CLIB.dcmpexc:
2318                 s = symboly("_DCMPEXC@",0);
2319                 cinfo.pop = 8;
2320                 cinfo.flags = INFfloat;
2321                 cinfo.push87 = 1;
2322                 cinfo.pop87 = 1;
2323                 break;
2324 
2325             case CLIB.dneg:
2326                 s = symboly("_DNEG@",I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2327                 cinfo.retregs16 = DOUBLEREGS_16;
2328                 cinfo.retregs32 = DOUBLEREGS_32;
2329                 cinfo.flags = INFfloat;
2330                 break;
2331 
2332             case CLIB.dadd:
2333                 s = symboly("_DADD@",mAX|mBX|mCX|mDX);
2334                 cinfo.retregs16 = DOUBLEREGS_16;
2335                 cinfo.retregs32 = DOUBLEREGS_32;
2336                 cinfo.pop = 8;
2337                 cinfo.flags = INFfloat;
2338                 cinfo.push87 = 1;
2339                 cinfo.pop87 = 1;
2340                 break;
2341 
2342             case CLIB.dsub:
2343                 s = symboly("_DSUB@",mAX|mBX|mCX|mDX);
2344                 cinfo.retregs16 = DOUBLEREGS_16;
2345                 cinfo.retregs32 = DOUBLEREGS_32;
2346                 cinfo.pop = 8;
2347                 cinfo.flags = INFfloat;
2348                 cinfo.push87 = 1;
2349                 cinfo.pop87 = 1;
2350                 break;
2351 
2352             case CLIB.fmul:
2353                 s = symboly("_FMUL@",mAX|mBX|mCX|mDX);
2354                 cinfo.retregs16 = FLOATREGS_16;
2355                 cinfo.retregs32 = FLOATREGS_32;
2356                 cinfo.flags = INFfloat;
2357                 cinfo.push87 = 1;
2358                 cinfo.pop87 = 1;
2359                 break;
2360 
2361             case CLIB.fdiv:
2362                 s = symboly("_FDIV@",mAX|mBX|mCX|mDX);
2363                 cinfo.retregs16 = FLOATREGS_16;
2364                 cinfo.retregs32 = FLOATREGS_32;
2365                 cinfo.flags = INFfloat;
2366                 cinfo.push87 = 1;
2367                 cinfo.pop87 = 1;
2368                 break;
2369 
2370             case CLIB.ftst0:
2371                 s = symboly("_FTST0@",0);
2372                 cinfo.flags = INFfloat;
2373                 break;
2374 
2375             case CLIB.ftst0exc:
2376                 s = symboly("_FTST0EXC@",0);
2377                 cinfo.flags = INFfloat;
2378                 break;
2379 
2380             case CLIB.fcmp:
2381                 s = symboly("_FCMP@",0);
2382                 cinfo.flags = INFfloat;
2383                 cinfo.push87 = 1;
2384                 cinfo.pop87 = 1;
2385                 break;
2386 
2387             case CLIB.fcmpexc:
2388                 s = symboly("_FCMPEXC@",0);
2389                 cinfo.flags = INFfloat;
2390                 cinfo.push87 = 1;
2391                 cinfo.pop87 = 1;
2392                 break;
2393 
2394             case CLIB.fneg:
2395                 s = symboly("_FNEG@",I16 ? FLOATREGS_16 : FLOATREGS_32);
2396                 cinfo.retregs16 = FLOATREGS_16;
2397                 cinfo.retregs32 = FLOATREGS_32;
2398                 cinfo.flags = INFfloat;
2399                 break;
2400 
2401             case CLIB.fadd:
2402                 s = symboly("_FADD@",mAX|mBX|mCX|mDX);
2403                 cinfo.retregs16 = FLOATREGS_16;
2404                 cinfo.retregs32 = FLOATREGS_32;
2405                 cinfo.flags = INFfloat;
2406                 cinfo.push87 = 1;
2407                 cinfo.pop87 = 1;
2408                 break;
2409 
2410             case CLIB.fsub:
2411                 s = symboly("_FSUB@",mAX|mBX|mCX|mDX);
2412                 cinfo.retregs16 = FLOATREGS_16;
2413                 cinfo.retregs32 = FLOATREGS_32;
2414                 cinfo.flags = INFfloat;
2415                 cinfo.push87 = 1;
2416                 cinfo.pop87 = 1;
2417                 break;
2418 
2419             case CLIB.dbllng:
2420             {
2421                 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG" : "_DBLLNG@";
2422                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2423                 cinfo.retregs16 = mDX | mAX;
2424                 cinfo.retregs32 = mAX;
2425                 cinfo.flags = INFfloat;
2426                 cinfo.push87 = 1;
2427                 cinfo.pop87 = 1;
2428                 break;
2429             }
2430 
2431             case CLIB.lngdbl:
2432             {
2433                 const(char)* name = (config.exe & ex_unix) ? "__LNGDBL" : "_LNGDBL@";
2434                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2435                 cinfo.retregs16 = DOUBLEREGS_16;
2436                 cinfo.retregs32 = DOUBLEREGS_32;
2437                 cinfo.flags = INFfloat;
2438                 cinfo.push87 = 1;
2439                 cinfo.pop87 = 1;
2440                 break;
2441             }
2442 
2443             case CLIB.dblint:
2444             {
2445                 const(char)* name = (config.exe & ex_unix) ? "__DBLINT" : "_DBLINT@";
2446                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2447                 cinfo.retregs16 = mAX;
2448                 cinfo.retregs32 = mAX;
2449                 cinfo.flags = INFfloat;
2450                 cinfo.push87 = 1;
2451                 cinfo.pop87 = 1;
2452                 break;
2453             }
2454 
2455             case CLIB.intdbl:
2456             {
2457                 const(char)* name = (config.exe & ex_unix) ? "__INTDBL" : "_INTDBL@";
2458                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2459                 cinfo.retregs16 = DOUBLEREGS_16;
2460                 cinfo.retregs32 = DOUBLEREGS_32;
2461                 cinfo.flags = INFfloat;
2462                 cinfo.push87 = 1;
2463                 cinfo.pop87 = 1;
2464                 break;
2465             }
2466 
2467             case CLIB.dbluns:
2468             {
2469                 const(char)* name = (config.exe & ex_unix) ? "__DBLUNS" : "_DBLUNS@";
2470                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2471                 cinfo.retregs16 = mAX;
2472                 cinfo.retregs32 = mAX;
2473                 cinfo.flags = INFfloat;
2474                 cinfo.push87 = 1;
2475                 cinfo.pop87 = 1;
2476                 break;
2477             }
2478 
2479             case CLIB.unsdbl:
2480                 // Y(DOUBLEREGS_32,"__UNSDBL"),         // CLIB.unsdbl
2481                 // Y(DOUBLEREGS_16,"_UNSDBL@"),
2482                 // {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1},       // _UNSDBL@     unsdbl
2483             {
2484                 const(char)* name = (config.exe & ex_unix) ? "__UNSDBL" : "_UNSDBL@";
2485                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2486                 cinfo.retregs16 = DOUBLEREGS_16;
2487                 cinfo.retregs32 = DOUBLEREGS_32;
2488                 cinfo.flags = INFfloat;
2489                 cinfo.push87 = 1;
2490                 cinfo.pop87 = 1;
2491                 break;
2492             }
2493 
2494             case CLIB.dblulng:
2495             {
2496                 const(char)* name = (config.exe & ex_unix) ? "__DBLULNG" : "_DBLULNG@";
2497                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2498                 cinfo.retregs16 = mDX|mAX;
2499                 cinfo.retregs32 = mAX;
2500                 cinfo.flags = (config.exe & ex_unix) ? INFfloat | INF32 : INFfloat;
2501                 cinfo.push87 = (config.exe & ex_unix) ? 0 : 1;
2502                 cinfo.pop87 = 1;
2503                 break;
2504             }
2505 
2506             case CLIB.ulngdbl:
2507             {
2508                 const(char)* name = (config.exe & ex_unix) ? "__ULNGDBL@" : "_ULNGDBL@";
2509                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2510                 cinfo.retregs16 = DOUBLEREGS_16;
2511                 cinfo.retregs32 = DOUBLEREGS_32;
2512                 cinfo.flags = INFfloat;
2513                 cinfo.push87 = 1;
2514                 cinfo.pop87 = 1;
2515                 break;
2516             }
2517 
2518             case CLIB.dblflt:
2519             {
2520                 const(char)* name = (config.exe & ex_unix) ? "__DBLFLT" : "_DBLFLT@";
2521                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2522                 cinfo.retregs16 = FLOATREGS_16;
2523                 cinfo.retregs32 = FLOATREGS_32;
2524                 cinfo.flags = INFfloat;
2525                 cinfo.push87 = 1;
2526                 cinfo.pop87 = 1;
2527                 break;
2528             }
2529 
2530             case CLIB.fltdbl:
2531             {
2532                 const(char)* name = (config.exe & ex_unix) ? "__FLTDBL" : "_FLTDBL@";
2533                 s = symboly(name, I16 ? ALLREGS : DOUBLEREGS_32);
2534                 cinfo.retregs16 = DOUBLEREGS_16;
2535                 cinfo.retregs32 = DOUBLEREGS_32;
2536                 cinfo.flags = INFfloat;
2537                 cinfo.push87 = 1;
2538                 cinfo.pop87 = 1;
2539                 break;
2540             }
2541 
2542             case CLIB.dblllng:
2543             {
2544                 const(char)* name = (config.exe & ex_unix) ? "__DBLLLNG" : "_DBLLLNG@";
2545                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2546                 cinfo.retregs16 = DOUBLEREGS_16;
2547                 cinfo.retregs32 = mDX|mAX;
2548                 cinfo.flags = INFfloat;
2549                 cinfo.push87 = 1;
2550                 cinfo.pop87 = 1;
2551                 break;
2552             }
2553 
2554             case CLIB.llngdbl:
2555             {
2556                 const(char)* name = (config.exe & ex_unix) ? "__LLNGDBL" : "_LLNGDBL@";
2557                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2558                 cinfo.retregs16 = DOUBLEREGS_16;
2559                 cinfo.retregs32 = DOUBLEREGS_32;
2560                 cinfo.flags = INFfloat;
2561                 cinfo.push87 = 1;
2562                 cinfo.pop87 = 1;
2563                 break;
2564             }
2565 
2566             case CLIB.dblullng:
2567             {
2568                 if (config.exe == EX_WIN64)
2569                 {
2570                     s = symboly("__DBLULLNG", DOUBLEREGS_32);
2571                     cinfo.retregs32 = mAX;
2572                     cinfo.flags = INFfloat;
2573                     cinfo.push87 = 2;
2574                     cinfo.pop87 = 2;
2575                 }
2576                 else
2577                 {
2578                     const(char)* name = (config.exe & ex_unix) ? "__DBLULLNG" : "_DBLULLNG@";
2579                     s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2580                     cinfo.retregs16 = DOUBLEREGS_16;
2581                     cinfo.retregs32 = I64 ? mAX : mDX|mAX;
2582                     cinfo.flags = INFfloat;
2583                     cinfo.push87 = (config.exe & ex_unix) ? 2 : 1;
2584                     cinfo.pop87 = (config.exe & ex_unix) ? 2 : 1;
2585                 }
2586                 break;
2587             }
2588 
2589             case CLIB.ullngdbl:
2590             {
2591                 if (config.exe == EX_WIN64)
2592                 {
2593                     s = symboly("__ULLNGDBL", DOUBLEREGS_32);
2594                     cinfo.retregs32 = mAX;
2595                     cinfo.flags = INFfloat;
2596                     cinfo.push87 = 1;
2597                     cinfo.pop87 = 1;
2598                 }
2599                 else
2600                 {
2601                     const(char)* name = (config.exe & ex_unix) ? "__ULLNGDBL" : "_ULLNGDBL@";
2602                     s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2603                     cinfo.retregs16 = DOUBLEREGS_16;
2604                     cinfo.retregs32 = I64 ? mAX : DOUBLEREGS_32;
2605                     cinfo.flags = INFfloat;
2606                     cinfo.push87 = 1;
2607                     cinfo.pop87 = 1;
2608                 }
2609                 break;
2610             }
2611 
2612             case CLIB.dtst:
2613             {
2614                 const(char)* name = (config.exe & ex_unix) ? "__DTST" : "_DTST@";
2615                 s = symboly(name, 0);
2616                 cinfo.flags = INFfloat;
2617                 break;
2618             }
2619 
2620             case CLIB.vptrfptr:
2621             {
2622                 const(char)* name = (config.exe & ex_unix) ? "__HTOFPTR" : "_HTOFPTR@";
2623                 s = symboly(name, mES|mBX);
2624                 cinfo.retregs16 = mES|mBX;
2625                 cinfo.retregs32 = mES|mBX;
2626                 break;
2627             }
2628 
2629             case CLIB.cvptrfptr:
2630             {
2631                 const(char)* name = (config.exe & ex_unix) ? "__HCTOFPTR" : "_HCTOFPTR@";
2632                 s = symboly(name, mES|mBX);
2633                 cinfo.retregs16 = mES|mBX;
2634                 cinfo.retregs32 = mES|mBX;
2635                 break;
2636             }
2637 
2638             case CLIB._87topsw:
2639             {
2640                 const(char)* name = (config.exe & ex_unix) ? "__87TOPSW" : "_87TOPSW@";
2641                 s = symboly(name, 0);
2642                 cinfo.flags = INFfloat;
2643                 break;
2644             }
2645 
2646             case CLIB.fltto87:
2647             {
2648                 const(char)* name = (config.exe & ex_unix) ? "__FLTTO87" : "_FLTTO87@";
2649                 s = symboly(name, mST0);
2650                 cinfo.retregs16 = mST0;
2651                 cinfo.retregs32 = mST0;
2652                 cinfo.flags = INFfloat;
2653                 cinfo.push87 = 1;
2654                 break;
2655             }
2656 
2657             case CLIB.dblto87:
2658             {
2659                 const(char)* name = (config.exe & ex_unix) ? "__DBLTO87" : "_DBLTO87@";
2660                 s = symboly(name, mST0);
2661                 cinfo.retregs16 = mST0;
2662                 cinfo.retregs32 = mST0;
2663                 cinfo.flags = INFfloat;
2664                 cinfo.push87 = 1;
2665                 break;
2666             }
2667 
2668             case CLIB.dblint87:
2669             {
2670                 const(char)* name = (config.exe & ex_unix) ? "__DBLINT87" : "_DBLINT87@";
2671                 s = symboly(name, mST0|mAX);
2672                 cinfo.retregs16 = mAX;
2673                 cinfo.retregs32 = mAX;
2674                 cinfo.flags = INFfloat;
2675                 break;
2676             }
2677 
2678             case CLIB.dbllng87:
2679             {
2680                 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG87" : "_DBLLNG87@";
2681                 s = symboly(name, mST0|mAX|mDX);
2682                 cinfo.retregs16 = mDX|mAX;
2683                 cinfo.retregs32 = mAX;
2684                 cinfo.flags = INFfloat;
2685                 break;
2686             }
2687 
2688             case CLIB.ftst:
2689             {
2690                 const(char)* name = (config.exe & ex_unix) ? "__FTST" : "_FTST@";
2691                 s = symboly(name, 0);
2692                 cinfo.flags = INFfloat;
2693                 break;
2694             }
2695 
2696             case CLIB.fcompp:
2697             {
2698                 const(char)* name = (config.exe & ex_unix) ? "__FCOMPP" : "_FCOMPP@";
2699                 s = symboly(name, 0);
2700                 cinfo.retregs16 = mPSW;
2701                 cinfo.retregs32 = mPSW;
2702                 cinfo.flags = INFfloat;
2703                 cinfo.pop87 = 2;
2704                 break;
2705             }
2706 
2707             case CLIB.ftest:
2708             {
2709                 const(char)* name = (config.exe & ex_unix) ? "__FTEST" : "_FTEST@";
2710                 s = symboly(name, 0);
2711                 cinfo.retregs16 = mPSW;
2712                 cinfo.retregs32 = mPSW;
2713                 cinfo.flags = INFfloat;
2714                 break;
2715             }
2716 
2717             case CLIB.ftest0:
2718             {
2719                 const(char)* name = (config.exe & ex_unix) ? "__FTEST0" : "_FTEST0@";
2720                 s = symboly(name, 0);
2721                 cinfo.retregs16 = mPSW;
2722                 cinfo.retregs32 = mPSW;
2723                 cinfo.flags = INFfloat;
2724                 break;
2725             }
2726 
2727             case CLIB.fdiv87:
2728             {
2729                 const(char)* name = (config.exe & ex_unix) ? "__FDIVP" : "_FDIVP";
2730                 s = symboly(name, mST0|mAX|mBX|mCX|mDX);
2731                 cinfo.retregs16 = mST0;
2732                 cinfo.retregs32 = mST0;
2733                 cinfo.flags = INFfloat;
2734                 cinfo.push87 = 1;
2735                 cinfo.pop87 = 1;
2736                 break;
2737             }
2738 
2739             // Complex numbers
2740             case CLIB.cmul:
2741             {
2742                 s = symboly("_Cmul", mST0|mST01);
2743                 cinfo.retregs16 = mST01;
2744                 cinfo.retregs32 = mST01;
2745                 cinfo.flags = INF32|INFfloat;
2746                 cinfo.push87 = 3;
2747                 cinfo.pop87 = 5;
2748                 break;
2749             }
2750 
2751             case CLIB.cdiv:
2752             {
2753                 s = symboly("_Cdiv", mAX|mCX|mDX|mST0|mST01);
2754                 cinfo.retregs16 = mST01;
2755                 cinfo.retregs32 = mST01;
2756                 cinfo.flags = INF32|INFfloat;
2757                 cinfo.push87 = 0;
2758                 cinfo.pop87 = 2;
2759                 break;
2760             }
2761 
2762             case CLIB.ccmp:
2763             {
2764                 s = symboly("_Ccmp", mAX|mST0|mST01);
2765                 cinfo.retregs16 = mPSW;
2766                 cinfo.retregs32 = mPSW;
2767                 cinfo.flags = INF32|INFfloat;
2768                 cinfo.push87 = 0;
2769                 cinfo.pop87 = 4;
2770                 break;
2771             }
2772 
2773             case CLIB.u64_ldbl:
2774             {
2775                 const(char)* name = (config.exe & ex_unix) ? "__U64_LDBL" : "_U64_LDBL";
2776                 s = symboly(name, mST0);
2777                 cinfo.retregs16 = mST0;
2778                 cinfo.retregs32 = mST0;
2779                 cinfo.flags = INF32|INF64|INFfloat;
2780                 cinfo.push87 = 2;
2781                 cinfo.pop87 = 1;
2782                 break;
2783             }
2784 
2785             case CLIB.ld_u64:
2786             {
2787                 const(char)* name = (config.exe & ex_unix) ? (config.objfmt == OBJ_ELF ||
2788                                                              config.objfmt == OBJ_MACH ?
2789                                                                 "__LDBLULLNG" : "___LDBLULLNG")
2790                                                           : "__LDBLULLNG";
2791                 s = symboly(name, mST0|mAX|mDX);
2792                 cinfo.retregs16 = 0;
2793                 cinfo.retregs32 = mDX|mAX;
2794                 cinfo.flags = INF32|INF64|INFfloat;
2795                 cinfo.push87 = 1;
2796                 cinfo.pop87 = 2;
2797                 break;
2798             }
2799 
2800             default:
2801                 assert(0);
2802         }
2803         clibsyms[clib] = s;
2804     }
2805 
2806     *ps = s;
2807     *pinfo = cinfo;
2808 }
2809 
2810 /********************************
2811  * Generate code sequence to call C runtime library support routine.
2812  *      clib = CLIB.xxxx
2813  *      keepmask = mask of registers not to destroy. Currently can
2814  *              handle only 1. Should use a temporary rather than
2815  *              push/pop for speed.
2816  */
2817 
2818 @trusted
2819 void callclib(ref CodeBuilder cdb, elem* e, uint clib, regm_t* pretregs, regm_t keepmask)
2820 {
2821     //printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask));
2822     //elem_print(e);
2823 
2824     Symbol* s;
2825     ClibInfo* cinfo;
2826     getClibInfo(clib, &s, &cinfo);
2827 
2828     if (I16)
2829         assert(!(cinfo.flags & (INF32 | INF64)));
2830     getregs(cdb,(~s.Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed
2831     keepmask &= ~s.Sregsaved;
2832     int npushed = popcnt(keepmask);
2833     CodeBuilder cdbpop;
2834     cdbpop.ctor();
2835     gensaverestore(keepmask, cdb, cdbpop);
2836 
2837     save87regs(cdb,cinfo.push87);
2838     for (int i = 0; i < cinfo.push87; i++)
2839         push87(cdb);
2840 
2841     for (int i = 0; i < cinfo.pop87; i++)
2842         pop87();
2843 
2844     if (config.target_cpu >= TARGET_80386 && clib == CLIB.lmul && !I32)
2845     {
2846         static immutable ubyte[23] lmul =
2847         [
2848             0x66,0xc1,0xe1,0x10,        // shl  ECX,16
2849             0x8b,0xcb,                  // mov  CX,BX           ;ECX = CX,BX
2850             0x66,0xc1,0xe0,0x10,        // shl  EAX,16
2851             0x66,0x0f,0xac,0xd0,0x10,   // shrd EAX,EDX,16      ;EAX = DX,AX
2852             0x66,0xf7,0xe1,             // mul  ECX
2853             0x66,0x0f,0xa4,0xc2,0x10,   // shld EDX,EAX,16      ;DX,AX = EAX
2854         ];
2855 
2856         cdb.genasm(cast(char*)lmul.ptr, lmul.sizeof);
2857     }
2858     else
2859     {
2860         makeitextern(s);
2861         int nalign = 0;
2862         int pushebx = (cinfo.flags & INFpushebx) != 0;
2863         int pushall = (cinfo.flags & INFpusheabcdx) != 0;
2864         if (STACKALIGN >= 16)
2865         {   // Align the stack (assume no args on stack)
2866             int npush = (npushed + pushebx + 4 * pushall) * REGSIZE + stackpush;
2867             if (npush & (STACKALIGN - 1))
2868             {   nalign = STACKALIGN - (npush & (STACKALIGN - 1));
2869                 cod3_stackadj(cdb, nalign);
2870             }
2871         }
2872         if (pushebx)
2873         {
2874             if (config.exe & (EX_LINUX | EX_LINUX64 | EX_FREEBSD | EX_FREEBSD64 | EX_OPENBSD | EX_OPENBSD64 | EX_DRAGONFLYBSD64))
2875             {
2876                 cdb.gen1(0x50 + CX);                             // PUSH ECX
2877                 cdb.gen1(0x50 + BX);                             // PUSH EBX
2878                 cdb.gen1(0x50 + DX);                             // PUSH EDX
2879                 cdb.gen1(0x50 + AX);                             // PUSH EAX
2880                 nalign += 4 * REGSIZE;
2881             }
2882             else
2883             {
2884                 cdb.gen1(0x50 + BX);                             // PUSH EBX
2885                 nalign += REGSIZE;
2886             }
2887         }
2888         if (pushall)
2889         {
2890             cdb.gen1(0x50 + CX);                                 // PUSH ECX
2891             cdb.gen1(0x50 + BX);                                 // PUSH EBX
2892             cdb.gen1(0x50 + DX);                                 // PUSH EDX
2893             cdb.gen1(0x50 + AX);                                 // PUSH EAX
2894         }
2895         if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS))
2896         {
2897             // Note: not for OSX
2898             /* Pass EBX on the stack instead, this is because EBX is used
2899              * for shared library function calls
2900              */
2901             if (config.flags3 & CFG3pic)
2902             {
2903                 load_localgot(cdb);     // EBX gets set to this value
2904             }
2905         }
2906 
2907         cdb.gencs(LARGECODE ? 0x9A : 0xE8,0,FLfunc,s);  // CALL s
2908         if (nalign)
2909             cod3_stackadj(cdb, -nalign);
2910         calledafunc = 1;
2911 
2912         version (SCPP)
2913         {
2914             if (I16 &&                                   // bug in Optlink for weak references
2915                 config.flags3 & CFG3wkfloat &&
2916                 (cinfo.flags & (INFfloat | INFwkdone)) == INFfloat)
2917             {
2918                 cinfo.flags |= INFwkdone;
2919                 makeitextern(getRtlsym(RTLSYM.INTONLY));
2920                 objmod.wkext(s, getRtlsym(RTLSYM.INTONLY));
2921             }
2922         }
2923     }
2924     if (I16)
2925         stackpush -= cinfo.pop;
2926     regm_t retregs = I16 ? cinfo.retregs16 : cinfo.retregs32;
2927     cdb.append(cdbpop);
2928     fixresult(cdb, e, retregs, pretregs);
2929 }
2930 
2931 
2932 /*************************************************
2933  * Helper function for converting OPparam's into array of Parameters.
2934  */
2935 struct Parameter { elem* e; reg_t reg; reg_t reg2; uint numalign; }
2936 
2937 //void fillParameters(elem* e, Parameter* parameters, int* pi);
2938 
2939 @trusted
2940 void fillParameters(elem* e, Parameter* parameters, int* pi)
2941 {
2942     if (e.Eoper == OPparam)
2943     {
2944         fillParameters(e.EV.E1, parameters, pi);
2945         fillParameters(e.EV.E2, parameters, pi);
2946         freenode(e);
2947     }
2948     else
2949     {
2950         parameters[*pi].e = e;
2951         (*pi)++;
2952     }
2953 }
2954 
2955 /***********************************
2956  * tyf: type of the function
2957  */
2958 @trusted
2959 FuncParamRegs FuncParamRegs_create(tym_t tyf)
2960 {
2961     FuncParamRegs result;
2962 
2963     result.tyf = tyf;
2964 
2965     if (I16)
2966     {
2967         result.numintegerregs = 0;
2968         result.numfloatregs = 0;
2969     }
2970     else if (I32)
2971     {
2972         if (tyf == TYjfunc)
2973         {
2974             static immutable ubyte[1] reglist1 = [ AX ];
2975             result.argregs = &reglist1[0];
2976             result.numintegerregs = reglist1.length;
2977         }
2978         else if (tyf == TYmfunc)
2979         {
2980             static immutable ubyte[1] reglist2 = [ CX ];
2981             result.argregs = &reglist2[0];
2982             result.numintegerregs = reglist2.length;
2983         }
2984         else
2985             result.numintegerregs = 0;
2986         result.numfloatregs = 0;
2987     }
2988     else if (I64 && config.exe == EX_WIN64)
2989     {
2990         static immutable ubyte[4] reglist3 = [ CX,DX,R8,R9 ];
2991         result.argregs = &reglist3[0];
2992         result.numintegerregs = reglist3.length;
2993 
2994         static immutable ubyte[4] freglist3 = [ XMM0, XMM1, XMM2, XMM3 ];
2995         result.floatregs = &freglist3[0];
2996         result.numfloatregs = freglist3.length;
2997     }
2998     else if (I64)
2999     {
3000         static immutable ubyte[6] reglist4 = [ DI,SI,DX,CX,R8,R9 ];
3001         result.argregs = &reglist4[0];
3002         result.numintegerregs = reglist4.length;
3003 
3004         static immutable ubyte[8] freglist4 = [ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 ];
3005         result.floatregs = &freglist4[0];
3006         result.numfloatregs = freglist4.length;
3007     }
3008     else
3009         assert(0);
3010     return result;
3011 }
3012 
3013 /*****************************************
3014  * Allocate parameter of type t and ty to registers *preg1 and *preg2.
3015  * Params:
3016  *      t = type, valid only if ty is TYstruct or TYarray
3017  * Returns:
3018  *      false       not allocated to any register
3019  *      true        *preg1, *preg2 set to allocated register pair
3020  */
3021 
3022 //bool type_jparam2(type* t, tym_t ty);
3023 
3024 @trusted
3025 private bool type_jparam2(type* t, tym_t ty)
3026 {
3027     ty = tybasic(ty);
3028 
3029     if (tyfloating(ty))
3030         return false;
3031     else if (ty == TYstruct || ty == TYarray)
3032     {
3033         type_debug(t);
3034         targ_size_t sz = type_size(t);
3035         return (sz <= _tysize[TYnptr]) &&
3036                (config.exe == EX_WIN64 || sz == 1 || sz == 2 || sz == 4 || sz == 8);
3037     }
3038     else if (tysize(ty) <= _tysize[TYnptr])
3039         return true;
3040     return false;
3041 }
3042 
3043 @trusted
3044 int FuncParamRegs_alloc(ref FuncParamRegs fpr, type* t, tym_t ty, reg_t* preg1, reg_t* preg2)
3045 {
3046     //printf("FuncParamRegs::alloc(ty: TY%sm t: %p)\n", tystring[tybasic(ty)], t);
3047     //if (t) type_print(t);
3048 
3049     *preg1 = NOREG;
3050     *preg2 = NOREG;
3051 
3052     type* t2 = null;
3053     tym_t ty2 = TYMAX;
3054 
3055     // SROA with mixed registers
3056     if (ty & mTYxmmgpr)
3057     {
3058         ty = TYdouble;
3059         ty2 = TYllong;
3060     }
3061     else if (ty & mTYgprxmm)
3062     {
3063         ty = TYllong;
3064         ty2 = TYdouble;
3065     }
3066 
3067     // Treat array of 1 the same as its element type
3068     // (Don't put volatile parameters in registers)
3069     if (tybasic(ty) == TYarray && tybasic(t.Tty) == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile)
3070         && type_size(t.Tnext) > 1)
3071     {
3072         t = t.Tnext;
3073         ty = t.Tty;
3074     }
3075 
3076     if (tybasic(ty) == TYstruct && type_zeroSize(t, fpr.tyf))
3077         return 0;               // don't allocate into registers
3078 
3079     ++fpr.i;
3080 
3081     // If struct or array
3082     if (tyaggregate(ty))
3083     {
3084         assert(t);
3085         if (config.exe == EX_WIN64)
3086         {
3087             /* Structs occupy a general purpose register, regardless of the struct
3088              * size or the number & types of its fields.
3089              */
3090             t = null;
3091             ty = TYnptr;
3092         }
3093         else
3094         {
3095             type* targ1, targ2;
3096             if (tybasic(t.Tty) == TYstruct)
3097             {
3098                 targ1 = t.Ttag.Sstruct.Sarg1type;
3099                 targ2 = t.Ttag.Sstruct.Sarg2type;
3100             }
3101             else if (tybasic(t.Tty) == TYarray)
3102             {
3103                 if (I64)
3104                     argtypes(t, targ1, targ2);
3105             }
3106             else
3107                 assert(0);
3108 
3109             if (targ1)
3110             {
3111                 t = targ1;
3112                 ty = t.Tty;
3113                 if (targ2)
3114                 {
3115                     t2 = targ2;
3116                     ty2 = t2.Tty;
3117                 }
3118             }
3119             else if (I64 && !targ2)
3120                 return 0;
3121         }
3122     }
3123 
3124     reg_t* preg = preg1;
3125     int regcntsave = fpr.regcnt;
3126     int xmmcntsave = fpr.xmmcnt;
3127 
3128     if (config.exe == EX_WIN64)
3129     {
3130         if (tybasic(ty) == TYcfloat)
3131         {
3132             ty = TYnptr;                // treat like a struct
3133         }
3134     }
3135     else if (I64)
3136     {
3137         if ((tybasic(ty) == TYcent || tybasic(ty) == TYucent) &&
3138             fpr.numintegerregs - fpr.regcnt >= 2)
3139         {
3140             // Allocate to register pair
3141             *preg1 = fpr.argregs[fpr.regcnt];
3142             *preg2 = fpr.argregs[fpr.regcnt + 1];
3143             fpr.regcnt += 2;
3144             return 1;
3145         }
3146 
3147         if (tybasic(ty) == TYcdouble &&
3148             fpr.numfloatregs - fpr.xmmcnt >= 2)
3149         {
3150             // Allocate to register pair
3151             *preg1 = fpr.floatregs[fpr.xmmcnt];
3152             *preg2 = fpr.floatregs[fpr.xmmcnt + 1];
3153             fpr.xmmcnt += 2;
3154             return 1;
3155         }
3156 
3157         if (tybasic(ty) == TYcfloat
3158             && fpr.numfloatregs - fpr.xmmcnt >= 1)
3159         {
3160             // Allocate XMM register
3161             *preg1 = fpr.floatregs[fpr.xmmcnt++];
3162             return 1;
3163         }
3164     }
3165 
3166     foreach (j; 0 .. 2)
3167     {
3168         if (fpr.regcnt < fpr.numintegerregs)
3169         {
3170             if ((I64 || (fpr.i == 1 && (fpr.tyf == TYjfunc || fpr.tyf == TYmfunc))) &&
3171                 type_jparam2(t, ty))
3172             {
3173                 *preg = fpr.argregs[fpr.regcnt];
3174                 ++fpr.regcnt;
3175                 if (config.exe == EX_WIN64)
3176                     ++fpr.xmmcnt;
3177                 goto Lnext;
3178             }
3179         }
3180         if (fpr.xmmcnt < fpr.numfloatregs)
3181         {
3182             if (tyxmmreg(ty))
3183             {
3184                 *preg = fpr.floatregs[fpr.xmmcnt];
3185                 if (config.exe == EX_WIN64)
3186                     ++fpr.regcnt;
3187                 ++fpr.xmmcnt;
3188                 goto Lnext;
3189             }
3190         }
3191         // Failed to allocate to a register
3192         if (j == 1)
3193         {   /* Unwind first preg1 assignment, because it's both or nothing
3194              */
3195             *preg1 = NOREG;
3196             fpr.regcnt = regcntsave;
3197             fpr.xmmcnt = xmmcntsave;
3198         }
3199         return 0;
3200 
3201      Lnext:
3202         if (tybasic(ty2) == TYMAX)
3203             break;
3204         preg = preg2;
3205         t = t2;
3206         ty = ty2;
3207     }
3208     return 1;
3209 }
3210 
3211 /***************************************
3212  * Finds replacement types for register passing of aggregates.
3213  */
3214 @trusted
3215 void argtypes(type* t, ref type* arg1type, ref type* arg2type)
3216 {
3217     if (!t) return;
3218 
3219     tym_t ty = t.Tty;
3220 
3221     if (!tyaggregate(ty))
3222         return;
3223 
3224     arg1type = arg2type = null;
3225 
3226     if (tybasic(ty) == TYarray)
3227     {
3228         size_t sz = cast(size_t) type_size(t);
3229         if (sz == 0)
3230             return;
3231 
3232         if ((I32 || config.exe == EX_WIN64) && (sz & (sz - 1)))  // power of 2
3233             return;
3234 
3235         if (config.exe == EX_WIN64 && sz > REGSIZE)
3236             return;
3237 
3238         if (sz <= 2 * REGSIZE)
3239         {
3240             type** argtype = &arg1type;
3241             size_t argsz = sz < REGSIZE ? sz : REGSIZE;
3242             foreach (v; 0 .. (sz > REGSIZE) + 1)
3243             {
3244                 *argtype = argsz == 1 ? tstypes[TYchar]
3245                          : argsz == 2 ? tstypes[TYshort]
3246                          : argsz <= 4 ? tstypes[TYlong]
3247                          : tstypes[TYllong];
3248                 argtype = &arg2type;
3249                 argsz = sz - REGSIZE;
3250             }
3251         }
3252 
3253         if (I64 && config.exe != EX_WIN64)
3254         {
3255             type* tn = t.Tnext;
3256             tym_t tyn = tn.Tty;
3257             while (tyn == TYarray)
3258             {
3259                 tn = tn.Tnext;
3260                 assert(tn);
3261                 tyn = tybasic(tn.Tty);
3262             }
3263 
3264             if (tybasic(tyn) == TYstruct)
3265             {
3266                 if (type_size(tn) == sz) // array(s) of size 1
3267                 {
3268                     arg1type = tn.Ttag.Sstruct.Sarg1type;
3269                     arg2type = tn.Ttag.Sstruct.Sarg2type;
3270                     return;
3271                 }
3272 
3273                 type* t1 = tn.Ttag.Sstruct.Sarg1type;
3274                 if (t1)
3275                 {
3276                     tn = t1;
3277                     tyn = tn.Tty;
3278                 }
3279             }
3280 
3281             if (sz == tysize(tyn))
3282             {
3283                 if (tysimd(tyn))
3284                 {
3285                     type* ts = type_fake(tybasic(tyn));
3286                     ts.Tcount = 1;
3287                     arg1type = ts;
3288                     return;
3289                 }
3290                 else if (tybasic(tyn) == TYldouble || tybasic(tyn) == TYildouble)
3291                 {
3292                     arg1type = tstypes[tybasic(tyn)];
3293                     return;
3294                 }
3295             }
3296 
3297             if (sz <= 16)
3298             {
3299                 if (tyfloating(tyn))
3300                 {
3301                     arg1type = sz <= 4 ? tstypes[TYfloat] : tstypes[TYdouble];
3302                     if (sz > 8)
3303                         arg2type = (sz - 8) <= 4 ? tstypes[TYfloat] : tstypes[TYdouble];
3304                 }
3305             }
3306         }
3307     }
3308     else if (tybasic(ty) == TYstruct)
3309     {
3310         // TODO: Move code from `cgelem.d:elstruct()` here
3311     }
3312 }
3313 
3314 /*******************************
3315  * Generate code sequence for function call.
3316  */
3317 
3318 @trusted
3319 void cdfunc(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
3320 {
3321     //printf("cdfunc()\n"); elem_print(e);
3322     assert(e);
3323     uint numpara = 0;               // bytes of parameters
3324     uint numalign = 0;              // bytes to align stack before pushing parameters
3325     uint stackpushsave = stackpush;            // so we can compute # of parameters
3326     cgstate.stackclean++;
3327     regm_t keepmsk = 0;
3328     int xmmcnt = 0;
3329     tym_t tyf = tybasic(e.EV.E1.Ety);        // the function type
3330 
3331     // Easier to deal with parameters as an array: parameters[0..np]
3332     int np = OTbinary(e.Eoper) ? el_nparams(e.EV.E2) : 0;
3333     Parameter *parameters = cast(Parameter *)alloca(np * Parameter.sizeof);
3334 
3335     if (np)
3336     {
3337         int n = 0;
3338         fillParameters(e.EV.E2, parameters, &n);
3339         assert(n == np);
3340     }
3341 
3342     Symbol *sf = null;                  // symbol of the function being called
3343     if (e.EV.E1.Eoper == OPvar)
3344         sf = e.EV.E1.EV.Vsym;
3345 
3346     /* Assume called function access statics
3347      */
3348     if (config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_FREEBSD | EX_FREEBSD64 | EX_OPENBSD | EX_OPENBSD64) &&
3349         config.flags3 & CFG3pic)
3350         cgstate.accessedTLS = true;
3351 
3352     /* Special handling for call to __tls_get_addr, we must save registers
3353      * before evaluating the parameter, so that the parameter load and call
3354      * are adjacent.
3355      */
3356     if (np == 1 && sf)
3357     {
3358         if (sf == tls_get_addr_sym)
3359             getregs(cdb, ~sf.Sregsaved & (mBP | ALLREGS | mES | XMMREGS));
3360     }
3361 
3362     uint stackalign = REGSIZE;
3363     if (tyf == TYf16func)
3364         stackalign = 2;
3365     // Figure out which parameters go in registers.
3366     // Compute numpara, the total bytes pushed on the stack
3367     FuncParamRegs fpr = FuncParamRegs_create(tyf);
3368     for (int i = np; --i >= 0;)
3369     {
3370         elem *ep = parameters[i].e;
3371         uint psize = cast(uint)_align(stackalign, paramsize(ep, tyf));     // align on stack boundary
3372         if (config.exe == EX_WIN64)
3373         {
3374             //printf("[%d] size = %u, numpara = %d ep = %p %s\n", i, psize, numpara, ep, tym_str(ep.Ety));
3375             debug
3376             if (psize > REGSIZE) elem_print(e);
3377 
3378             assert(psize <= REGSIZE);
3379             psize = REGSIZE;
3380         }
3381         //printf("[%d] size = %u, numpara = %d %s\n", i, psize, numpara, tym_str(ep.Ety));
3382         if (FuncParamRegs_alloc(fpr, ep.ET, ep.Ety, &parameters[i].reg, &parameters[i].reg2))
3383         {
3384             if (config.exe == EX_WIN64)
3385                 numpara += REGSIZE;             // allocate stack space for it anyway
3386             continue;   // goes in register, not stack
3387         }
3388 
3389         // Parameter i goes on the stack
3390         parameters[i].reg = NOREG;
3391         uint alignsize = el_alignsize(ep);
3392         parameters[i].numalign = 0;
3393         if (alignsize > stackalign &&
3394             (I64 || (alignsize >= 16 &&
3395                 (config.exe & (EX_OSX | EX_LINUX) && (tyaggregate(ep.Ety) || tyvector(ep.Ety))))))
3396         {
3397             if (alignsize > STACKALIGN)
3398             {
3399                 STACKALIGN = alignsize;
3400                 enforcealign = true;
3401             }
3402             uint newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1);
3403             parameters[i].numalign = newnumpara - numpara;
3404             numpara = newnumpara;
3405             assert(config.exe != EX_WIN64);
3406         }
3407         numpara += psize;
3408     }
3409 
3410     if (config.exe == EX_WIN64)
3411     {
3412         if (numpara < 4 * REGSIZE)
3413             numpara = 4 * REGSIZE;
3414     }
3415 
3416     //printf("numpara = %d, stackpush = %d\n", numpara, stackpush);
3417     assert((numpara & (REGSIZE - 1)) == 0);
3418     assert((stackpush & (REGSIZE - 1)) == 0);
3419 
3420     /* Should consider reordering the order of evaluation of the parameters
3421      * so that args that go into registers are evaluated after args that get
3422      * pushed. We can reorder args that are constants or relconst's.
3423      */
3424 
3425     /* Determine if we should use cgstate.funcarg for the parameters or push them
3426      */
3427     bool usefuncarg = false;
3428     static if (0)
3429     {
3430         printf("test1 %d %d %d %d %d %d %d %d\n", (config.flags4 & CFG4speed)!=0, !Alloca.size,
3431             !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)),
3432             cast(int)numpara, !stackpush,
3433             (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos),
3434             (!typfunc(tyf) || sf && sf.Sflags & SFLexit), !I16);
3435     }
3436     if (config.flags4 & CFG4speed &&
3437         !Alloca.size &&
3438         /* The cleanup code calls a local function, leaving the return address on
3439          * the top of the stack. If parameters are placed there, the return address
3440          * is stepped on.
3441          * A better solution is turn this off only inside the cleanup code.
3442          */
3443         !usednteh &&
3444         !calledFinally &&
3445         (numpara || config.exe == EX_WIN64) &&
3446         stackpush == 0 &&               // cgstate.funcarg needs to be at top of stack
3447         (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos) &&
3448         (!(typfunc(tyf) || tyf == TYhfunc) || sf && sf.Sflags & SFLexit) &&
3449         !anyiasm && !I16
3450        )
3451     {
3452         for (int i = 0; i < np; i++)
3453         {
3454             elem* ep = parameters[i].e;
3455             int preg = parameters[i].reg;
3456             //printf("parameter[%d] = %d, np = %d\n", i, preg, np);
3457             if (preg == NOREG)
3458             {
3459                 switch (ep.Eoper)
3460                 {
3461                     case OPstrctor:
3462                     case OPstrthis:
3463                     case OPstrpar:
3464                     case OPnp_fp:
3465                         goto Lno;
3466 
3467                     default:
3468                         break;
3469                 }
3470             }
3471         }
3472 
3473         if (numpara > cgstate.funcarg.size)
3474         {   // New high water mark
3475             //printf("increasing size from %d to %d\n", cast(int)cgstate.funcarg.size, cast(int)numpara);
3476             cgstate.funcarg.size = numpara;
3477         }
3478         usefuncarg = true;
3479     }
3480   Lno:
3481 
3482     /* Adjust start of the stack so after all args are pushed,
3483      * the stack will be aligned.
3484      */
3485     if (!usefuncarg && STACKALIGN >= 16 && (numpara + stackpush) & (STACKALIGN - 1))
3486     {
3487         numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1));
3488         cod3_stackadj(cdb, numalign);
3489         cdb.genadjesp(numalign);
3490         stackpush += numalign;
3491         stackpushsave += numalign;
3492     }
3493     assert(stackpush == stackpushsave);
3494     if (config.exe == EX_WIN64)
3495     {
3496         //printf("np = %d, numpara = %d, stackpush = %d\n", np, numpara, stackpush);
3497         assert(numpara == ((np < 4) ? 4 * REGSIZE : np * REGSIZE));
3498 
3499         // Allocate stack space for four entries anyway
3500         // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29
3501     }
3502 
3503     int[XMM7 + 1] regsaved = void;
3504     memset(regsaved.ptr, -1, regsaved.sizeof);
3505     CodeBuilder cdbrestore;
3506     cdbrestore.ctor();
3507     regm_t saved = 0;
3508     targ_size_t funcargtossave = cgstate.funcargtos;
3509     targ_size_t funcargtos = numpara;
3510     //printf("funcargtos1 = %d\n", cast(int)funcargtos);
3511 
3512     /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9
3513      * float and double parameters go into XMM0..XMM7
3514      * For variadic functions, count of XMM registers used goes in AL
3515      */
3516     for (int i = 0; i < np; i++)
3517     {
3518         elem* ep = parameters[i].e;
3519         int preg = parameters[i].reg;
3520         //printf("parameter[%d] = %d, np = %d\n", i, preg, np);
3521         if (preg == NOREG)
3522         {
3523             /* Push parameter on stack, but keep track of registers used
3524              * in the process. If they interfere with keepmsk, we'll have
3525              * to save/restore them.
3526              */
3527             CodeBuilder cdbsave;
3528             cdbsave.ctor();
3529             regm_t overlap = msavereg & keepmsk;
3530             msavereg |= keepmsk;
3531             CodeBuilder cdbparams;
3532             cdbparams.ctor();
3533             if (usefuncarg)
3534                 movParams(cdbparams, ep, stackalign, cast(uint)funcargtos, tyf);
3535             else
3536                 pushParams(cdbparams,ep,stackalign, tyf);
3537             regm_t tosave = keepmsk & ~msavereg;
3538             msavereg &= ~keepmsk | overlap;
3539 
3540             // tosave is the mask to save and restore
3541             for (reg_t j = 0; tosave; j++)
3542             {
3543                 regm_t mi = mask(j);
3544                 assert(j <= XMM7);
3545                 if (mi & tosave)
3546                 {
3547                     uint idx;
3548                     regsave.save(cdbsave, j, &idx);
3549                     regsave.restore(cdbrestore, j, idx);
3550                     saved |= mi;
3551                     keepmsk &= ~mi;             // don't need to keep these for rest of params
3552                     tosave &= ~mi;
3553                 }
3554             }
3555 
3556             cdb.append(cdbsave);
3557             cdb.append(cdbparams);
3558 
3559             // Alignment for parameter comes after it got pushed
3560             const uint numalignx = parameters[i].numalign;
3561             if (usefuncarg)
3562             {
3563                 funcargtos -= _align(stackalign, paramsize(ep, tyf)) + numalignx;
3564                 cgstate.funcargtos = funcargtos;
3565             }
3566             else if (numalignx)
3567             {
3568                 cod3_stackadj(cdb, numalignx);
3569                 cdb.genadjesp(numalignx);
3570                 stackpush += numalignx;
3571             }
3572         }
3573         else
3574         {
3575             // Goes in register preg, not stack
3576             regm_t retregs = mask(preg);
3577             if (retregs & XMMREGS)
3578                 ++xmmcnt;
3579             int preg2 = parameters[i].reg2;
3580             reg_t mreg,lreg;
3581             if (preg2 != NOREG || tybasic(ep.Ety) == TYcfloat)
3582             {
3583                 assert(ep.Eoper != OPstrthis);
3584                 if (mask(preg2) & XMMREGS)
3585                     ++xmmcnt;
3586                 if (tybasic(ep.Ety) == TYcfloat)
3587                 {
3588                     lreg = ST01;
3589                     mreg = NOREG;
3590                 }
3591                 else if (tyrelax(ep.Ety) == TYcent)
3592                 {
3593                     lreg = mask(preg ) & mLSW ? cast(reg_t)preg  : AX;
3594                     mreg = mask(preg2) & mMSW ? cast(reg_t)preg2 : DX;
3595                 }
3596                 else
3597                 {
3598                     lreg = XMM0;
3599                     mreg = XMM1;
3600                 }
3601                 retregs = (mask(mreg) | mask(lreg)) & ~mask(NOREG);
3602                 CodeBuilder cdbsave;
3603                 cdbsave.ctor();
3604                 if (keepmsk & retregs)
3605                 {
3606                     regm_t tosave = keepmsk & retregs;
3607 
3608                     // tosave is the mask to save and restore
3609                     for (reg_t j = 0; tosave; j++)
3610                     {
3611                         regm_t mi = mask(j);
3612                         assert(j <= XMM7);
3613                         if (mi & tosave)
3614                         {
3615                             uint idx;
3616                             regsave.save(cdbsave, j, &idx);
3617                             regsave.restore(cdbrestore, j, idx);
3618                             saved |= mi;
3619                             keepmsk &= ~mi;             // don't need to keep these for rest of params
3620                             tosave &= ~mi;
3621                         }
3622                     }
3623                 }
3624                 cdb.append(cdbsave);
3625 
3626                 scodelem(cdb, ep, &retregs, keepmsk, false);
3627 
3628                 // Move result [mreg,lreg] into parameter registers from [preg2,preg]
3629                 retregs = 0;
3630                 if (preg != lreg)
3631                     retregs |= mask(preg);
3632                 if (preg2 != mreg)
3633                     retregs |= mask(preg2);
3634                 retregs &= ~mask(NOREG);
3635                 getregs(cdb,retregs);
3636 
3637                 tym_t ty1 = tybasic(ep.Ety);
3638                 tym_t ty2 = ty1;
3639                 if (ep.Ety & mTYgprxmm)
3640                 {
3641                     ty1 = TYllong;
3642                     ty2 = TYdouble;
3643                 }
3644                 else if (ep.Ety & mTYxmmgpr)
3645                 {
3646                     ty1 = TYdouble;
3647                     ty2 = TYllong;
3648                 }
3649                 else if (ty1 == TYstruct)
3650                 {
3651                     type* targ1 = ep.ET.Ttag.Sstruct.Sarg1type;
3652                     type* targ2 = ep.ET.Ttag.Sstruct.Sarg2type;
3653                     if (targ1)
3654                         ty1 = targ1.Tty;
3655                     if (targ2)
3656                         ty2 = targ2.Tty;
3657                 }
3658                 else if (tyrelax(ty1) == TYcent)
3659                     ty1 = ty2 = TYllong;
3660                 else if (tybasic(ty1) == TYcdouble)
3661                     ty1 = ty2 = TYdouble;
3662 
3663                 if (tybasic(ep.Ety) == TYcfloat)
3664                 {
3665                     assert(I64);
3666                     assert(lreg == ST01 && mreg == NOREG);
3667                     // spill
3668                     pop87();
3669                     pop87();
3670                     cdb.genfltreg(0xD9, 3, tysize(TYfloat));
3671                     genfwait(cdb);
3672                     cdb.genfltreg(0xD9, 3, 0);
3673                     genfwait(cdb);
3674                     // reload
3675                     if (config.exe == EX_WIN64)
3676                     {
3677                         cdb.genfltreg(LOD, preg, 0);
3678                         code_orrex(cdb.last(), REX_W);
3679                     }
3680                     else
3681                     {
3682                         assert(mask(preg) & XMMREGS);
3683                         cdb.genxmmreg(xmmload(TYdouble), cast(reg_t) preg, 0, TYdouble);
3684                     }
3685                 }
3686                 else foreach (v; 0 .. 2)
3687                 {
3688                     if (v ^ (preg != mreg))
3689                         genmovreg(cdb, preg, lreg, ty1);
3690                     else
3691                         genmovreg(cdb, preg2, mreg, ty2);
3692                 }
3693 
3694                 retregs = (mask(preg) | mask(preg2)) & ~mask(NOREG);
3695             }
3696             else if (ep.Eoper == OPstrthis)
3697             {
3698                 getregs(cdb,retregs);
3699                 // LEA preg,np[RSP]
3700                 uint delta = stackpush - ep.EV.Vuns;   // stack delta to parameter
3701                 cdb.genc1(LEA,
3702                         (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,delta);
3703                 if (I64)
3704                     code_orrex(cdb.last(), REX_W);
3705             }
3706             else if (ep.Eoper == OPstrpar && config.exe == EX_WIN64 && type_size(ep.ET) == 0)
3707             {
3708                 retregs = 0;
3709                 scodelem(cdb, ep.EV.E1, &retregs, keepmsk, false);
3710                 freenode(ep);
3711             }
3712             else
3713             {
3714                 scodelem(cdb, ep, &retregs, keepmsk, false);
3715             }
3716             keepmsk |= retregs;      // don't change preg when evaluating func address
3717         }
3718     }
3719 
3720     if (config.exe == EX_WIN64)
3721     {   // Allocate stack space for four entries anyway
3722         // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29
3723         {   uint sz = 4 * REGSIZE;
3724             if (usefuncarg)
3725             {
3726                 funcargtos -= sz;
3727                 cgstate.funcargtos = funcargtos;
3728             }
3729             else
3730             {
3731                 cod3_stackadj(cdb, sz);
3732                 cdb.genadjesp(sz);
3733                 stackpush += sz;
3734             }
3735         }
3736 
3737         /* Variadic functions store XMM parameters into their corresponding GP registers
3738          */
3739         for (int i = 0; i < np; i++)
3740         {
3741             int preg = parameters[i].reg;
3742             regm_t retregs = mask(preg);
3743             if (retregs & XMMREGS)
3744             {
3745                 reg_t reg;
3746                 switch (preg)
3747                 {
3748                     case XMM0: reg = CX; break;
3749                     case XMM1: reg = DX; break;
3750                     case XMM2: reg = R8; break;
3751                     case XMM3: reg = R9; break;
3752 
3753                     default:   assert(0);
3754                 }
3755                 getregs(cdb,mask(reg));
3756                 cdb.gen2(STOD,(REX_W << 16) | modregxrmx(3,preg-XMM0,reg)); // MOVD reg,preg
3757             }
3758         }
3759     }
3760 
3761     // Restore any register parameters we saved
3762     getregs(cdb,saved);
3763     cdb.append(cdbrestore);
3764     keepmsk |= saved;
3765 
3766     // Variadic functions store the number of XMM registers used in AL
3767     if (I64 && config.exe != EX_WIN64 && e.Eflags & EFLAGS_variadic)
3768     {
3769         getregs(cdb,mAX);
3770         movregconst(cdb,AX,xmmcnt,1);
3771         keepmsk |= mAX;
3772     }
3773 
3774     //printf("funcargtos2 = %d\n", cast(int)funcargtos);
3775     assert(!usefuncarg || (funcargtos == 0 && cgstate.funcargtos == 0));
3776     cgstate.stackclean--;
3777 
3778     debug
3779     if (!usefuncarg && numpara != stackpush - stackpushsave)
3780     {
3781         printf("function %s\n", funcsym_p.Sident.ptr);
3782         printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave);
3783         elem_print(e);
3784     }
3785 
3786     assert(usefuncarg || numpara == stackpush - stackpushsave);
3787 
3788     funccall(cdb,e,numpara,numalign,pretregs,keepmsk,usefuncarg);
3789     cgstate.funcargtos = funcargtossave;
3790 }
3791 
3792 /***********************************
3793  */
3794 
3795 @trusted
3796 void cdstrthis(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
3797 {
3798     assert(tysize(e.Ety) == REGSIZE);
3799     const reg = findreg(*pretregs & allregs);
3800     getregs(cdb,mask(reg));
3801     // LEA reg,np[ESP]
3802     uint np = stackpush - e.EV.Vuns;        // stack delta to parameter
3803     cdb.genc1(LEA,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np);
3804     if (I64)
3805         code_orrex(cdb.last(), REX_W);
3806     fixresult(cdb, e, mask(reg), pretregs);
3807 }
3808 
3809 /******************************
3810  * Call function. All parameters have already been pushed onto the stack.
3811  * Params:
3812  *      e          = function call
3813  *      numpara    = size in bytes of all the parameters
3814  *      numalign   = amount the stack was aligned by before the parameters were pushed
3815  *      pretregs   = where return value goes
3816  *      keepmsk    = registers to not change when evaluating the function address
3817  *      usefuncarg = using cgstate.funcarg, so no need to adjust stack after func return
3818  */
3819 
3820 @trusted
3821 private void funccall(ref CodeBuilder cdb, elem* e, uint numpara, uint numalign,
3822                       regm_t* pretregs,regm_t keepmsk, bool usefuncarg)
3823 {
3824     //printf("funccall(e = %p, *pretregs = %s, numpara = %d, numalign = %d, usefuncarg=%d)\n",e,regm_str(*pretregs),numpara,numalign,usefuncarg);
3825     //printf("  from %s\n", funcsym_p.Sident.ptr);
3826     //elem_print(e);
3827     calledafunc = 1;
3828     // Determine if we need frame for function prolog/epilog
3829 
3830     if (config.memmodel == Vmodel)
3831     {
3832         if (tyfarfunc(funcsym_p.ty()))
3833             needframe = true;
3834     }
3835 
3836     code cs;
3837     regm_t retregs;
3838     Symbol* s;
3839 
3840     elem* e1 = e.EV.E1;
3841     tym_t tym1 = tybasic(e1.Ety);
3842     char farfunc = tyfarfunc(tym1) || tym1 == TYifunc;
3843 
3844     CodeBuilder cdbe;
3845     cdbe.ctor();
3846 
3847     if (e1.Eoper == OPvar)
3848     {   // Call function directly
3849 
3850         if (!tyfunc(tym1))
3851             printf("%s\n", tym_str(tym1));
3852         assert(tyfunc(tym1));
3853         s = e1.EV.Vsym;
3854         if (s.Sflags & SFLexit)
3855         { }
3856         else if (s != tls_get_addr_sym)
3857             save87(cdb);               // assume 8087 regs are all trashed
3858 
3859         // Function calls may throw Errors, unless marked that they don't
3860         if (s == funcsym_p || !s.Sfunc || !(s.Sfunc.Fflags3 & Fnothrow))
3861             funcsym_p.Sfunc.Fflags3 &= ~Fnothrow;
3862 
3863         if (s.Sflags & SFLexit)
3864         {
3865             // Function doesn't return, so don't worry about registers
3866             // it may use
3867         }
3868         else if (!tyfunc(s.ty()) || !(config.flags4 & CFG4optimized))
3869             // so we can replace func at runtime
3870             getregs(cdbe,~fregsaved & (mBP | ALLREGS | mES | XMMREGS));
3871         else
3872             getregs(cdbe,~s.Sregsaved & (mBP | ALLREGS | mES | XMMREGS));
3873         if (strcmp(s.Sident.ptr, "alloca") == 0)
3874         {
3875             s = getRtlsym(RTLSYM.ALLOCA);
3876             makeitextern(s);
3877             int areg = CX;
3878             if (config.exe == EX_WIN64)
3879                 areg = DX;
3880             getregs(cdbe, mask(areg));
3881             cdbe.genc(LEA, modregrm(2, areg, BPRM), FLallocatmp, 0, 0, 0);  // LEA areg,&localsize[BP]
3882             if (I64)
3883                 code_orrex(cdbe.last(), REX_W);
3884             Alloca.size = REGSIZE;
3885         }
3886         if (sytab[s.Sclass] & SCSS)    // if function is on stack (!)
3887         {
3888             retregs = allregs & ~keepmsk;
3889             s.Sflags &= ~GTregcand;
3890             s.Sflags |= SFLread;
3891             cdrelconst(cdbe,e1,&retregs);
3892             if (farfunc)
3893             {
3894                 const reg = findregmsw(retregs);
3895                 const lsreg = findreglsw(retregs);
3896                 floatreg = true;                // use float register
3897                 reflocal = true;
3898                 cdbe.genc1(0x89,                 // MOV floatreg+2,reg
3899                         modregrm(2, reg, BPRM), FLfltreg, REGSIZE);
3900                 cdbe.genc1(0x89,                 // MOV floatreg,lsreg
3901                         modregrm(2, lsreg, BPRM), FLfltreg, 0);
3902                 if (tym1 == TYifunc)
3903                     cdbe.gen1(0x9C);             // PUSHF
3904                 cdbe.genc1(0xFF,                 // CALL [floatreg]
3905                         modregrm(2, 3, BPRM), FLfltreg, 0);
3906             }
3907             else
3908             {
3909                 const reg = findreg(retregs);
3910                 cdbe.gen2(0xFF, modregrmx(3, 2, reg));   // CALL reg
3911                 if (I64)
3912                     code_orrex(cdbe.last(), REX_W);
3913             }
3914         }
3915         else
3916         {
3917             int fl = FLfunc;
3918             if (!tyfunc(s.ty()))
3919                 fl = el_fl(e1);
3920             if (tym1 == TYifunc)
3921                 cdbe.gen1(0x9C);                             // PUSHF
3922             if (config.exe & (EX_windos | EX_OSX | EX_OSX64))
3923             {
3924                 cdbe.gencs(farfunc ? 0x9A : 0xE8,0,fl,s);    // CALL extern
3925             }
3926             else
3927             {
3928                 assert(!farfunc);
3929                 if (s != tls_get_addr_sym)
3930                 {
3931                     //printf("call %s\n", s.Sident.ptr);
3932                     load_localgot(cdb);
3933                     cdbe.gencs(0xE8, 0, fl, s);    // CALL extern
3934                 }
3935                 else if (I64)
3936                 {
3937                     /* Prepend 66 66 48 so GNU linker has patch room
3938                      */
3939                     assert(!farfunc);
3940                     cdbe.gen1(0x66);
3941                     cdbe.gen1(0x66);
3942                     cdbe.gencs(0xE8, 0, fl, s);      // CALL extern
3943                     cdbe.last().Irex = REX | REX_W;
3944                 }
3945                 else
3946                     cdbe.gencs(0xE8, 0, fl, s);    // CALL extern
3947             }
3948             code_orflag(cdbe.last(), farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff));
3949         }
3950     }
3951     else
3952     {   // Call function via pointer
3953 
3954         // Function calls may throw Errors
3955         funcsym_p.Sfunc.Fflags3 &= ~Fnothrow;
3956 
3957         if (e1.Eoper != OPind) { WRFL(cast(FL)el_fl(e1)); printf("e1.Eoper: %s\n", oper_str(e1.Eoper)); }
3958         save87(cdb);                   // assume 8087 regs are all trashed
3959         assert(e1.Eoper == OPind);
3960         elem *e11 = e1.EV.E1;
3961         tym_t e11ty = tybasic(e11.Ety);
3962         assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr)));
3963         load_localgot(cdb);
3964         if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // 32 bit only
3965         {
3966             if (config.flags3 & CFG3pic)
3967                 keepmsk |= mBX;
3968         }
3969 
3970         /* Mask of registers destroyed by the function call
3971          */
3972         regm_t desmsk = (mBP | ALLREGS | mES | XMMREGS) & ~fregsaved;
3973 
3974         // if we can't use loadea()
3975         if ((!OTleaf(e11.Eoper) || e11.Eoper == OPconst) &&
3976             (e11.Eoper != OPind || e11.Ecount))
3977         {
3978             retregs = allregs & ~keepmsk;
3979             cgstate.stackclean++;
3980             scodelem(cdbe,e11,&retregs,keepmsk,true);
3981             cgstate.stackclean--;
3982             // Kill registers destroyed by an arbitrary function call
3983             getregs(cdbe,desmsk);
3984             if (e11ty == TYfptr)
3985             {
3986                 const reg = findregmsw(retregs);
3987                 const lsreg = findreglsw(retregs);
3988                 floatreg = true;                // use float register
3989                 reflocal = true;
3990                 cdbe.genc1(0x89,                 // MOV floatreg+2,reg
3991                         modregrm(2, reg, BPRM), FLfltreg, REGSIZE);
3992                 cdbe.genc1(0x89,                 // MOV floatreg,lsreg
3993                         modregrm(2, lsreg, BPRM), FLfltreg, 0);
3994                 if (tym1 == TYifunc)
3995                     cdbe.gen1(0x9C);             // PUSHF
3996                 cdbe.genc1(0xFF,                 // CALL [floatreg]
3997                         modregrm(2, 3, BPRM), FLfltreg, 0);
3998             }
3999             else
4000             {
4001                 const reg = findreg(retregs);
4002                 cdbe.gen2(0xFF, modregrmx(3, 2, reg));   // CALL reg
4003                 if (I64)
4004                     code_orrex(cdbe.last(), REX_W);
4005             }
4006         }
4007         else
4008         {
4009             if (tym1 == TYifunc)
4010                 cdb.gen1(0x9C);                 // PUSHF
4011                                                 // CALL [function]
4012             cs.Iflags = 0;
4013             cgstate.stackclean++;
4014             loadea(cdbe, e11, &cs, 0xFF, farfunc ? 3 : 2, 0, keepmsk, desmsk);
4015             cgstate.stackclean--;
4016             freenode(e11);
4017         }
4018         s = null;
4019     }
4020     cdb.append(cdbe);
4021     freenode(e1);
4022 
4023     /* See if we will need the frame pointer.
4024        Calculate it here so we can possibly use BP to fix the stack.
4025      */
4026 static if (0)
4027 {
4028     if (!needframe)
4029     {
4030         // If there is a register available for this basic block
4031         if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used))
4032         { }
4033         else
4034         {
4035             for (SYMIDX si = 0; si < globsym.length; si++)
4036             {
4037                 Symbol* s = globsym[si];
4038 
4039                 if (s.Sflags & GTregcand && type_size(s.Stype) != 0)
4040                 {
4041                     if (config.flags4 & CFG4optimized)
4042                     {   // If symbol is live in this basic block and
4043                         // isn't already in a register
4044                         if (s.Srange && vec_testbit(dfoidx, s.Srange) &&
4045                             s.Sfl != FLreg)
4046                         {   // Then symbol must be allocated on stack
4047                             needframe = true;
4048                             break;
4049                         }
4050                     }
4051                     else
4052                     {   if (mfuncreg == 0)      // if no registers left
4053                         {   needframe = true;
4054                             break;
4055                         }
4056                     }
4057                 }
4058             }
4059         }
4060     }
4061 }
4062 
4063     reg_t reg1, reg2;
4064     retregs = allocretregs(e.Ety, e.ET, tym1, reg1, reg2);
4065 
4066     assert(retregs || !*pretregs);
4067 
4068     if (!usefuncarg)
4069     {
4070         // If stack needs cleanup
4071         if  (s && s.Sflags & SFLexit)
4072         {
4073             if (config.fulltypes && TARGET_WINDOS)
4074             {
4075                 // the stack walker evaluates the return address, not a byte of the
4076                 // call instruction, so ensure there is an instruction byte after
4077                 // the call that still has the same line number information
4078                 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3);
4079             }
4080             /* Function never returns, so don't need to generate stack
4081              * cleanup code. But still need to log the stack cleanup
4082              * as if it did return.
4083              */
4084             cdb.genadjesp(-(numpara + numalign));
4085             stackpush -= numpara + numalign;
4086         }
4087         else if ((OTbinary(e.Eoper) || config.exe == EX_WIN64) &&
4088             (!typfunc(tym1) || config.exe == EX_WIN64))
4089         {
4090             if (tym1 == TYhfunc)
4091             {   // Hidden parameter is popped off by the callee
4092                 cdb.genadjesp(-REGSIZE);
4093                 stackpush -= REGSIZE;
4094                 if (numpara + numalign > REGSIZE)
4095                     genstackclean(cdb, numpara + numalign - REGSIZE, retregs);
4096             }
4097             else
4098                 genstackclean(cdb, numpara + numalign, retregs);
4099         }
4100         else
4101         {
4102             cdb.genadjesp(-numpara);  // popped off by the callee's 'RET numpara'
4103             stackpush -= numpara;
4104             if (numalign)               // callee doesn't know about alignment adjustment
4105                 genstackclean(cdb,numalign,retregs);
4106         }
4107     }
4108 
4109     /* Special handling for functions which return a floating point
4110        value in the top of the 8087 stack.
4111      */
4112 
4113     if (retregs & mST0)
4114     {
4115         cdb.genadjfpu(1);
4116         if (*pretregs)                  // if we want the result
4117         {
4118             //assert(global87.stackused == 0);
4119             push87(cdb);                // one item on 8087 stack
4120             fixresult87(cdb,e,retregs,pretregs);
4121             return;
4122         }
4123         else
4124             // Pop unused result off 8087 stack
4125             cdb.gen2(0xDD, modregrm(3, 3, 0));           // FPOP
4126     }
4127     else if (retregs & mST01)
4128     {
4129         cdb.genadjfpu(2);
4130         if (*pretregs)                  // if we want the result
4131         {
4132             assert(global87.stackused == 0);
4133             push87(cdb);
4134             push87(cdb);                // two items on 8087 stack
4135             fixresult_complex87(cdb, e, retregs, pretregs, true);
4136             return;
4137         }
4138         else
4139         {
4140             // Pop unused result off 8087 stack
4141             cdb.gen2(0xDD, modregrm(3, 3, 0));           // FPOP
4142             cdb.gen2(0xDD, modregrm(3, 3, 0));           // FPOP
4143         }
4144     }
4145 
4146     /* Special handling for functions that return one part
4147        in XMM0 and the other part in AX
4148      */
4149     if (*pretregs && retregs)
4150     {
4151         if (reg1 == NOREG || reg2 == NOREG)
4152         {}
4153         else if ((0 == (mask(reg1) & XMMREGS)) ^ (0 == (mask(reg2) & XMMREGS)))
4154         {
4155             reg_t lreg, mreg;
4156             if (mask(reg1) & XMMREGS)
4157             {
4158                 lreg = XMM0;
4159                 mreg = XMM1;
4160             }
4161             else
4162             {
4163                 lreg = mask(reg1) & mLSW ? reg1 : AX;
4164                 mreg = mask(reg2) & mMSW ? reg2 : DX;
4165             }
4166             for (int v = 0; v < 2; v++)
4167             {
4168                 if (v ^ (reg2 != lreg))
4169                     genmovreg(cdb,lreg,reg1);
4170                 else
4171                     genmovreg(cdb,mreg,reg2);
4172             }
4173             retregs = mask(lreg) | mask(mreg);
4174         }
4175     }
4176 
4177     /* Special handling for functions which return complex float in XMM0 or RAX. */
4178 
4179     if (I64
4180         && config.exe != EX_WIN64 // broken
4181         && *pretregs && tybasic(e.Ety) == TYcfloat)
4182     {
4183         assert(reg2 == NOREG);
4184         // spill
4185         if (config.exe == EX_WIN64)
4186         {
4187             assert(reg1 == AX);
4188             cdb.genfltreg(STO, reg1, 0);
4189             code_orrex(cdb.last(), REX_W);
4190         }
4191         else
4192         {
4193             assert(reg1 == XMM0);
4194             cdb.genxmmreg(xmmstore(TYdouble), reg1, 0, TYdouble);
4195         }
4196         // reload real
4197         push87(cdb);
4198         cdb.genfltreg(0xD9, 0, 0);
4199         genfwait(cdb);
4200         // reload imaginary
4201         push87(cdb);
4202         cdb.genfltreg(0xD9, 0, tysize(TYfloat));
4203         genfwait(cdb);
4204 
4205         retregs = mST01;
4206     }
4207 
4208     fixresult(cdb, e, retregs, pretregs);
4209 }
4210 
4211 /***************************
4212  * Determine size of argument e that will be pushed.
4213  */
4214 
4215 @trusted
4216 targ_size_t paramsize(elem* e, tym_t tyf)
4217 {
4218     assert(e.Eoper != OPparam);
4219     targ_size_t szb;
4220     tym_t tym = tybasic(e.Ety);
4221     if (tyscalar(tym))
4222         szb = size(tym);
4223     else if (tym == TYstruct || tym == TYarray)
4224         szb = type_parameterSize(e.ET, tyf);
4225     else
4226     {
4227         printf("%s\n", tym_str(tym));
4228         assert(0);
4229     }
4230     return szb;
4231 }
4232 
4233 /***************************
4234  * Generate code to move argument e on the stack.
4235  */
4236 
4237 @trusted
4238 private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funcargtos, tym_t tyf)
4239 {
4240     //printf("movParams(e = %p, stackalign = %d, funcargtos = %d)\n", e, stackalign, funcargtos);
4241     //printf("movParams()\n"); elem_print(e);
4242     assert(!I16);
4243     assert(e && e.Eoper != OPparam);
4244 
4245     tym_t tym = tybasic(e.Ety);
4246     if (tyfloating(tym))
4247         objmod.fltused();
4248 
4249     int grex = I64 ? REX_W << 16 : 0;
4250 
4251     targ_size_t szb = paramsize(e, tyf);          // size before alignment
4252     targ_size_t sz = _align(stackalign, szb);       // size after alignment
4253     assert((sz & (stackalign - 1)) == 0);         // ensure that alignment worked
4254     assert((sz & (REGSIZE - 1)) == 0);
4255     //printf("szb = %d sz = %d\n", cast(int)szb, cast(int)sz);
4256 
4257     code cs;
4258     cs.Iflags = 0;
4259     cs.Irex = 0;
4260     switch (e.Eoper)
4261     {
4262         case OPstrctor:
4263         case OPstrthis:
4264         case OPstrpar:
4265         case OPnp_fp:
4266             assert(0);
4267 
4268         case OPrelconst:
4269         {
4270             int fl;
4271             if (!evalinregister(e) &&
4272                 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) &&
4273                 ((fl = el_fl(e)) == FLdata || fl == FLudata || fl == FLextern)
4274                )
4275             {
4276                 // MOV -stackoffset[EBP],&variable
4277                 cs.Iop = 0xC7;
4278                 cs.Irm = modregrm(2,0,BPRM);
4279                 if (I64 && sz == 8)
4280                     cs.Irex |= REX_W;
4281                 cs.IFL1 = FLfuncarg;
4282                 cs.IEV1.Voffset = funcargtos - REGSIZE;
4283                 cs.IEV2.Voffset = e.EV.Voffset;
4284                 cs.IFL2 = cast(ubyte)fl;
4285                 cs.IEV2.Vsym = e.EV.Vsym;
4286                 cs.Iflags |= CFoff;
4287                 cdb.gen(&cs);
4288                 return;
4289             }
4290             break;
4291         }
4292 
4293         case OPconst:
4294             if (!evalinregister(e))
4295             {
4296                 cs.Iop = (sz == 1) ? 0xC6 : 0xC7;
4297                 cs.Irm = modregrm(2,0,BPRM);
4298                 cs.IFL1 = FLfuncarg;
4299                 cs.IEV1.Voffset = funcargtos - sz;
4300                 cs.IFL2 = FLconst;
4301                 targ_size_t *p = cast(targ_size_t *) &(e.EV);
4302                 cs.IEV2.Vsize_t = *p;
4303                 if (I64 && tym == TYcldouble)
4304                     // The alignment of EV.Vcldouble is not the same on the compiler
4305                     // as on the target
4306                     goto Lbreak;
4307                 if (I64 && sz >= 8)
4308                 {
4309                     int i = cast(int)sz;
4310                     do
4311                     {
4312                         if (*p >= 0x80000000)
4313                         {   // Use 64 bit register MOV, as the 32 bit one gets sign extended
4314                             // MOV reg,imm64
4315                             // MOV EA,reg
4316                             goto Lbreak;
4317                         }
4318                         p = cast(targ_size_t *)(cast(char *) p + REGSIZE);
4319                         i -= REGSIZE;
4320                     } while (i > 0);
4321                     p = cast(targ_size_t *) &(e.EV);
4322                 }
4323 
4324                 int i = cast(int)sz;
4325                 do
4326                 {   int regsize = REGSIZE;
4327                     regm_t retregs = (sz == 1) ? BYTEREGS : allregs;
4328                     reg_t reg;
4329                     if (reghasvalue(retregs,*p,&reg))
4330                     {
4331                         cs.Iop = (cs.Iop & 1) | 0x88;
4332                         cs.Irm |= modregrm(0, reg & 7, 0); // MOV EA,reg
4333                         if (reg & 8)
4334                             cs.Irex |= REX_R;
4335                         if (I64 && sz == 1 && reg >= 4)
4336                             cs.Irex |= REX;
4337                     }
4338                     if (I64 && sz >= 8)
4339                         cs.Irex |= REX_W;
4340                     cdb.gen(&cs);           // MOV EA,const
4341 
4342                     p = cast(targ_size_t *)(cast(char *) p + regsize);
4343                     cs.Iop = 0xC7;
4344                     cs.Irm &= cast(ubyte)~cast(int)modregrm(0, 7, 0);
4345                     cs.Irex &= ~REX_R;
4346                     cs.IEV1.Voffset += regsize;
4347                     cs.IEV2.Vint = cast(targ_int)*p;
4348                     i -= regsize;
4349                 } while (i > 0);
4350                 return;
4351             }
4352 
4353         Lbreak:
4354             break;
4355 
4356         default:
4357             break;
4358     }
4359     regm_t retregs = tybyte(tym) ? BYTEREGS : allregs;
4360     if (tyvector(tym) ||
4361         config.fpxmmregs && tyxmmreg(tym) &&
4362         // If not already in x87 register from function call return
4363         !((e.Eoper == OPcall || e.Eoper == OPucall) && I32))
4364     {
4365         retregs = XMMREGS;
4366         codelem(cdb, e, &retregs, false);
4367         const op = xmmstore(tym);
4368         const r = findreg(retregs);
4369         cdb.genc1(op, modregxrm(2, r - XMM0, BPRM), FLfuncarg, funcargtos - sz);   // MOV funcarg[EBP],r
4370         checkSetVex(cdb.last(),tym);
4371         return;
4372     }
4373     else if (tyfloating(tym))
4374     {
4375         if (config.inline8087)
4376         {
4377             retregs = tycomplex(tym) ? mST01 : mST0;
4378             codelem(cdb, e, &retregs, false);
4379 
4380             opcode_t op;
4381             uint r;
4382             switch (tym)
4383             {
4384                 case TYfloat:
4385                 case TYifloat:
4386                 case TYcfloat:
4387                     op = 0xD9;
4388                     r = 3;
4389                     break;
4390 
4391                 case TYdouble:
4392                 case TYidouble:
4393                 case TYdouble_alias:
4394                 case TYcdouble:
4395                     op = 0xDD;
4396                     r = 3;
4397                     break;
4398 
4399                 case TYldouble:
4400                 case TYildouble:
4401                 case TYcldouble:
4402                     op = 0xDB;
4403                     r = 7;
4404                     break;
4405 
4406                 default:
4407                     assert(0);
4408             }
4409             if (tycomplex(tym))
4410             {
4411                 // FSTP sz/2[ESP]
4412                 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz/2);
4413                 pop87();
4414             }
4415             pop87();
4416             cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz);    // FSTP -sz[EBP]
4417             return;
4418         }
4419     }
4420     scodelem(cdb, e, &retregs, 0, true);
4421     if (sz <= REGSIZE)
4422     {
4423         uint r = findreg(retregs);
4424         cdb.genc1(0x89, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE);   // MOV -REGSIZE[EBP],r
4425         if (sz == 8)
4426             code_orrex(cdb.last(), REX_W);
4427     }
4428     else if (sz == REGSIZE * 2)
4429     {
4430         uint r = findregmsw(retregs);
4431         cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE);    // MOV -REGSIZE[EBP],r
4432         r = findreglsw(retregs);
4433         cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE * 2); // MOV -2*REGSIZE[EBP],r
4434     }
4435     else
4436         assert(0);
4437 }
4438 
4439 
4440 /***************************
4441  * Generate code to push argument e on the stack.
4442  * stackpush is incremented by stackalign for each PUSH.
4443  */
4444 
4445 @trusted
4446 void pushParams(ref CodeBuilder cdb, elem* e, uint stackalign, tym_t tyf)
4447 {
4448     //printf("params(e = %p, stackalign = %d)\n", e, stackalign);
4449     //printf("params()\n"); elem_print(e);
4450     stackchanged = 1;
4451     assert(e && e.Eoper != OPparam);
4452 
4453     tym_t tym = tybasic(e.Ety);
4454     if (tyfloating(tym))
4455         objmod.fltused();
4456 
4457     int grex = I64 ? REX_W << 16 : 0;
4458 
4459     targ_size_t szb = paramsize(e, tyf);          // size before alignment
4460     targ_size_t sz = _align(stackalign,szb);      // size after alignment
4461     assert((sz & (stackalign - 1)) == 0);         // ensure that alignment worked
4462     assert((sz & (REGSIZE - 1)) == 0);
4463 
4464     switch (e.Eoper)
4465     {
4466     version (SCPP)
4467     {
4468         case OPstrctor:
4469         {
4470             elem* e1 = e.EV.E1;
4471             docommas(cdb,&e1);              // skip over any comma expressions
4472 
4473             cod3_stackadj(cdb, sz);
4474             stackpush += sz;
4475             cdb.genadjesp(sz);
4476 
4477             // Find OPstrthis and set it to stackpush
4478             exp2_setstrthis(e1, null, stackpush, null);
4479 
4480             regm_t retregs = 0;
4481             codelem(cdb, e1, &retregs, true);
4482             freenode(e);
4483             return;
4484         }
4485         case OPstrthis:
4486             // This is the parameter for the 'this' pointer corresponding to
4487             // OPstrctor. We push a pointer to an object that was already
4488             // allocated on the stack by OPstrctor.
4489         {
4490             regm_t retregs = allregs;
4491             reg_t reg;
4492             allocreg(cdb, &retregs, &reg, TYoffset);
4493             genregs(cdb, 0x89, SP, reg);        // MOV reg,SP
4494             if (I64)
4495                 code_orrex(cdb.last(), REX_W);
4496             uint np = stackpush - e.EV.Vuns;         // stack delta to parameter
4497             cdb.genc2(0x81, grex | modregrmx(3, 0, reg), np); // ADD reg,np
4498             if (sz > REGSIZE)
4499             {
4500                 cdb.gen1(0x16);                     // PUSH SS
4501                 stackpush += REGSIZE;
4502             }
4503             cdb.gen1(0x50 + (reg & 7));             // PUSH reg
4504             if (reg & 8)
4505                 code_orrex(cdb.last(), REX_B);
4506             stackpush += REGSIZE;
4507             cdb.genadjesp(sz);
4508             freenode(e);
4509             return;
4510         }
4511     }
4512 
4513         case OPstrpar:
4514         {
4515             uint rm;
4516 
4517             elem* e1 = e.EV.E1;
4518             if (sz == 0)
4519             {
4520                 docommas(cdb, &e1); // skip over any commas
4521 
4522                 const stackpushsave = stackpush;
4523                 const stackcleansave = cgstate.stackclean;
4524                 cgstate.stackclean = 0;
4525 
4526                 regm_t retregs = 0;
4527                 codelem(cdb,e1,&retregs,true);
4528 
4529                 assert(cgstate.stackclean == 0);
4530                 cgstate.stackclean = stackcleansave;
4531                 genstackclean(cdb,stackpush - stackpushsave,0);
4532 
4533                 freenode(e);
4534                 return;
4535             }
4536             if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1.Eoper == OPvar)
4537             {
4538                 freenode(e);
4539                 e = e1;
4540                 goto L1;
4541             }
4542             docommas(cdb,&e1);             // skip over any commas
4543             code_flags_t seg = 0;          // assume no seg override
4544             regm_t retregs = sz ? IDXREGS : 0;
4545             bool doneoff = false;
4546             uint pushsize = REGSIZE;
4547             uint op16 = 0;
4548             if (!I16 && sz & 2)     // if odd number of words to push
4549             {
4550                 pushsize = 2;
4551                 op16 = 1;
4552             }
4553             else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0)
4554             {
4555                 pushsize = 4;       // push DWORDs at a time
4556                 op16 = 1;
4557             }
4558             uint npushes = cast(uint)(sz / pushsize);
4559             switch (e1.Eoper)
4560             {
4561                 case OPind:
4562                     if (sz)
4563                     {
4564                         switch (tybasic(e1.EV.E1.Ety))
4565                         {
4566                             case TYfptr:
4567                             case TYhptr:
4568                                 seg = CFes;
4569                                 retregs |= mES;
4570                                 break;
4571 
4572                             case TYsptr:
4573                                 if (config.wflags & WFssneds)
4574                                     seg = CFss;
4575                                 break;
4576 
4577                             case TYfgPtr:
4578                                 if (I32)
4579                                      seg = CFgs;
4580                                 else if (I64)
4581                                      seg = CFfs;
4582                                 else
4583                                      assert(0);
4584                                 break;
4585 
4586                             case TYcptr:
4587                                 seg = CFcs;
4588                                 break;
4589 
4590                             default:
4591                                 break;
4592                         }
4593                     }
4594                     codelem(cdb, e1.EV.E1, &retregs, false);
4595                     freenode(e1);
4596                     break;
4597 
4598                 case OPvar:
4599                     /* Symbol is no longer a candidate for a register */
4600                     e1.EV.Vsym.Sflags &= ~GTregcand;
4601 
4602                     if (!e1.Ecount && npushes > 4)
4603                     {
4604                         /* Kludge to point at last word in struct. */
4605                         /* Don't screw up CSEs.                 */
4606                         e1.EV.Voffset += sz - pushsize;
4607                         doneoff = true;
4608                     }
4609                     //if (LARGEDATA) /* if default isn't DS */
4610                     {
4611                         static immutable uint[4] segtocf = [ CFes,CFcs,CFss,0 ];
4612 
4613                         int fl = el_fl(e1);
4614                         if (fl == FLfardata)
4615                         {
4616                             seg = CFes;
4617                             retregs |= mES;
4618                         }
4619                         else
4620                         {
4621                             uint s = segfl[fl];
4622                             assert(s < 4);
4623                             seg = segtocf[s];
4624                             if (seg == CFss && !(config.wflags & WFssneds))
4625                                 seg = 0;
4626                         }
4627                     }
4628                     if (e1.Ety & mTYfar)
4629                     {
4630                         seg = CFes;
4631                         retregs |= mES;
4632                     }
4633                     cdrelconst(cdb, e1, &retregs);
4634                     // Reverse the effect of the previous add
4635                     if (doneoff)
4636                         e1.EV.Voffset -= sz - pushsize;
4637                     freenode(e1);
4638                     break;
4639 
4640                 case OPstreq:
4641                 //case OPcond:
4642                     if (config.exe & EX_segmented)
4643                     {
4644                         seg = CFes;
4645                         retregs |= mES;
4646                     }
4647                     codelem(cdb, e1, &retregs, false);
4648                     break;
4649 
4650                 case OPpair:
4651                 case OPrpair:
4652                     pushParams(cdb, e1, stackalign, tyf);
4653                     freenode(e);
4654                     return;
4655 
4656                 default:
4657                     elem_print(e1);
4658                     assert(0);
4659             }
4660             reg_t reg = findreglsw(retregs);
4661             rm = I16 ? regtorm[reg] : regtorm32[reg];
4662             if (op16)
4663                 seg |= CFopsize;            // operand size
4664             if (npushes <= 4)
4665             {
4666                 assert(!doneoff);
4667                 for (; npushes > 1; --npushes)
4668                 {
4669                     cdb.genc1(0xFF, buildModregrm(2, 6, rm), FLconst, pushsize * (npushes - 1));  // PUSH [reg]
4670                     code_orflag(cdb.last(),seg);
4671                     cdb.genadjesp(pushsize);
4672                 }
4673                 cdb.gen2(0xFF,buildModregrm(0, 6, rm));     // PUSH [reg]
4674                 cdb.last().Iflags |= seg;
4675                 cdb.genadjesp(pushsize);
4676             }
4677             else if (sz)
4678             {
4679                 getregs_imm(cdb, mCX | retregs);
4680                                                     // MOV CX,sz/2
4681                 movregconst(cdb, CX, npushes, 0);
4682                 if (!doneoff)
4683                 {   // This should be done when
4684                     // reg is loaded. Fix later
4685                                                     // ADD reg,sz-pushsize
4686                     cdb.genc2(0x81, grex | modregrmx(3, 0, reg), sz-pushsize);
4687                 }
4688                 getregs(cdb,mCX);                       // the LOOP decrements it
4689                 cdb.gen2(0xFF, buildModregrm(0, 6, rm));   // PUSH [reg]
4690                 cdb.last().Iflags |= seg | CFtarg2;
4691                 code* c3 = cdb.last();
4692                 cdb.genc2(0x81,grex | buildModregrm(3, 5,reg), pushsize);  // SUB reg,pushsize
4693                 if (I16 || config.flags4 & CFG4space)
4694                     genjmp(cdb,0xE2,FLcode,cast(block *)c3);// LOOP c3
4695                 else
4696                 {
4697                     if (I64)
4698                         cdb.gen2(0xFF, modregrm(3, 1, CX));// DEC CX
4699                     else
4700                         cdb.gen1(0x48 + CX);            // DEC CX
4701                     genjmp(cdb, JNE, FLcode, cast(block *)c3); // JNE c3
4702                 }
4703                 regimmed_set(CX,0);
4704                 cdb.genadjesp(cast(int)sz);
4705             }
4706             stackpush += sz;
4707             freenode(e);
4708             return;
4709         }
4710 
4711         case OPind:
4712             if (!e.Ecount)                         /* if *e1       */
4713             {
4714                 if (sz < REGSIZE)
4715                 {
4716                     /* Don't push REGSIZE quantity because it may
4717                      * straddle past the end of valid memory
4718                      */
4719                     break;
4720                 }
4721                 if (sz == REGSIZE)
4722                     goto case OPvar;    // handle it with loadea()
4723 
4724                 // Avoid PUSH MEM on the Pentium when optimizing for speed
4725                 if (config.flags4 & CFG4speed &&
4726                     (config.target_cpu >= TARGET_80486 &&
4727                      config.target_cpu <= TARGET_PentiumMMX) &&
4728                     sz <= 2 * REGSIZE &&
4729                     !tyfloating(tym))
4730                     break;
4731 
4732                 if (tym == TYldouble || tym == TYildouble || tycomplex(tym))
4733                     break;
4734 
4735                 code cs;
4736                 cs.Iflags = 0;
4737                 cs.Irex = 0;
4738                 if (I32)
4739                 {
4740                     assert(sz >= REGSIZE * 2);
4741                     loadea(cdb, e, &cs, 0xFF, 6, sz - REGSIZE, 0, 0); // PUSH EA+4
4742                     cdb.genadjesp(REGSIZE);
4743                     stackpush += REGSIZE;
4744                     sz -= REGSIZE;
4745 
4746                     if (sz > REGSIZE)
4747                     {
4748                         while (sz)
4749                         {
4750                             cs.IEV1.Voffset -= REGSIZE;
4751                             cdb.gen(&cs);                    // PUSH EA+...
4752                             cdb.genadjesp(REGSIZE);
4753                             stackpush += REGSIZE;
4754                             sz -= REGSIZE;
4755                         }
4756                         freenode(e);
4757                         return;
4758                     }
4759                 }
4760                 else
4761                 {
4762                     if (sz == DOUBLESIZE)
4763                     {
4764                         loadea(cdb, e, &cs, 0xFF, 6, DOUBLESIZE - REGSIZE, 0, 0); // PUSH EA+6
4765                         cs.IEV1.Voffset -= REGSIZE;
4766                         cdb.gen(&cs);                    // PUSH EA+4
4767                         cdb.genadjesp(REGSIZE);
4768                         getlvalue_lsw(&cs);
4769                         cdb.gen(&cs);                    // PUSH EA+2
4770                     }
4771                     else /* TYlong */
4772                         loadea(cdb, e, &cs, 0xFF, 6, REGSIZE, 0, 0); // PUSH EA+2
4773                     cdb.genadjesp(REGSIZE);
4774                 }
4775                 stackpush += sz;
4776                 getlvalue_lsw(&cs);
4777                 cdb.gen(&cs);                            // PUSH EA
4778                 cdb.genadjesp(REGSIZE);
4779                 freenode(e);
4780                 return;
4781             }
4782             break;
4783 
4784         case OPnp_fp:
4785             if (!e.Ecount)                         /* if (far *)e1 */
4786             {
4787                 elem* e1 = e.EV.E1;
4788                 tym_t tym1 = tybasic(e1.Ety);
4789                 /* BUG: what about pointers to functions?   */
4790                 int segreg;
4791                 switch (tym1)
4792                 {
4793                     case TYnptr: segreg = 3<<3; break;
4794                     case TYcptr: segreg = 1<<3; break;
4795                     default:     segreg = 2<<3; break;
4796                 }
4797                 if (I32 && stackalign == 2)
4798                     cdb.gen1(0x66);                 // push a word
4799                 cdb.gen1(0x06 + segreg);            // PUSH SEGREG
4800                 if (I32 && stackalign == 2)
4801                     code_orflag(cdb.last(), CFopsize);        // push a word
4802                 cdb.genadjesp(stackalign);
4803                 stackpush += stackalign;
4804                 pushParams(cdb, e1, stackalign, tyf);
4805                 freenode(e);
4806                 return;
4807             }
4808             break;
4809 
4810         case OPrelconst:
4811             if (config.exe & EX_segmented)
4812             {
4813                 /* Determine if we can just push the segment register           */
4814                 /* Test size of type rather than TYfptr because of (long)(&v)   */
4815                 Symbol* s = e.EV.Vsym;
4816                 //if (sytab[s.Sclass] & SCSS && !I32)  // if variable is on stack
4817                 //    needframe = true;                 // then we need stack frame
4818                 int fl;
4819                 if (_tysize[tym] == tysize(TYfptr) &&
4820                     (fl = s.Sfl) != FLfardata &&
4821                     /* not a function that CS might not be the segment of       */
4822                     (!((fl == FLfunc || s.ty() & mTYcs) &&
4823                       (s.Sclass == SC.comdat || s.Sclass == SC.extern_ ||
4824                        s.Sclass == SC.inline || config.wflags & WFthunk)) ||
4825                      (fl == FLfunc && config.exe == EX_DOSX)
4826                     )
4827                    )
4828                 {
4829                     stackpush += sz;
4830                     cdb.gen1(0x06 +           // PUSH SEGREG
4831                             (((fl == FLfunc || s.ty() & mTYcs) ? 1 : segfl[fl]) << 3));
4832                     cdb.genadjesp(REGSIZE);
4833 
4834                     if (config.target_cpu >= TARGET_80286 && !e.Ecount)
4835                     {
4836                         getoffset(cdb, e, STACK);
4837                         freenode(e);
4838                         return;
4839                     }
4840                     else
4841                     {
4842                         regm_t retregs;
4843                         offsetinreg(cdb, e, &retregs);
4844                         const reg = findreg(retregs);
4845                         genpush(cdb,reg);                    // PUSH reg
4846                         cdb.genadjesp(REGSIZE);
4847                     }
4848                     return;
4849                 }
4850                 if (config.target_cpu >= TARGET_80286 && !e.Ecount)
4851                 {
4852                     stackpush += sz;
4853                     if (_tysize[tym] == tysize(TYfptr))
4854                     {
4855                         // PUSH SEG e
4856                         cdb.gencs(0x68,0,FLextern,s);
4857                         cdb.last().Iflags = CFseg;
4858                         cdb.genadjesp(REGSIZE);
4859                     }
4860                     getoffset(cdb, e, STACK);
4861                     freenode(e);
4862                     return;
4863                 }
4864             }
4865             break;                          /* else must evaluate expression */
4866 
4867         case OPvar:
4868         L1:
4869             if (config.flags4 & CFG4speed &&
4870                      (config.target_cpu >= TARGET_80486 &&
4871                       config.target_cpu <= TARGET_PentiumMMX) &&
4872                      sz <= 2 * REGSIZE &&
4873                      !tyfloating(tym))
4874             {   // Avoid PUSH MEM on the Pentium when optimizing for speed
4875                 break;
4876             }
4877             else if (movOnly(e) || (tyxmmreg(tym) && config.fpxmmregs) || tyvector(tym))
4878                 break;                      // no PUSH MEM
4879             else
4880             {
4881                 int regsize = REGSIZE;
4882                 uint flag = 0;
4883                 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 &&
4884                     !e.Ecount)
4885                 {
4886                     regsize = 4;
4887                     flag |= CFopsize;
4888                 }
4889                 code cs;
4890                 cs.Iflags = 0;
4891                 cs.Irex = 0;
4892                 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0);    // PUSH EA+sz-2
4893                 code_orflag(cdb.last(), flag);
4894                 cdb.genadjesp(REGSIZE);
4895                 stackpush += sz;
4896                 while (cast(targ_int)(sz -= regsize) > 0)
4897                 {
4898                     loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0);
4899                     code_orflag(cdb.last(), flag);
4900                     cdb.genadjesp(REGSIZE);
4901                 }
4902                 freenode(e);
4903                 return;
4904             }
4905 
4906         case OPconst:
4907         {
4908             char pushi = 0;
4909             uint flag = 0;
4910             int regsize = REGSIZE;
4911 
4912             if (tycomplex(tym))
4913                 break;
4914 
4915             if (I64 && tyfloating(tym) && sz > 4 && boolres(e))
4916                 // Can't push 64 bit non-zero args directly
4917                 break;
4918 
4919             if (I32 && szb == 10)           // special case for long double constants
4920             {
4921                 assert(sz == 12);
4922                 targ_int value = e.EV.Vushort8[4]; // pick upper 2 bytes of Vldouble
4923                 stackpush += sz;
4924                 cdb.genadjesp(cast(int)sz);
4925                 for (int i = 0; i < 3; ++i)
4926                 {
4927                     reg_t reg;
4928                     if (reghasvalue(allregs, value, &reg))
4929                         cdb.gen1(0x50 + reg);           // PUSH reg
4930                     else
4931                         cdb.genc2(0x68,0,value);        // PUSH value
4932                     value = e.EV.Vulong4[i ^ 1];       // treat Vldouble as 2 element array of 32 bit uint
4933                 }
4934                 freenode(e);
4935                 return;
4936             }
4937 
4938             assert(I64 || sz <= tysize(TYldouble));
4939             int i = cast(int)sz;
4940             if (!I16 && i == 2)
4941                 flag = CFopsize;
4942 
4943             if (config.target_cpu >= TARGET_80286)
4944     //       && (e.Ecount == 0 || e.Ecount != e.Ecomsub))
4945             {
4946                 pushi = 1;
4947                 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4)
4948                 {
4949                     regsize = 4;
4950                     flag = CFopsize;
4951                 }
4952             }
4953             else if (i == REGSIZE)
4954                 break;
4955 
4956             stackpush += sz;
4957             cdb.genadjesp(cast(int)sz);
4958             targ_uns* pi = &e.EV.Vuns;     // point to start of Vdouble
4959             targ_ushort* ps = cast(targ_ushort *) pi;
4960             targ_ullong* pl = cast(targ_ullong *)pi;
4961             i /= regsize;
4962             do
4963             {
4964                 if (i)                      /* be careful not to go negative */
4965                     i--;
4966 
4967                 targ_size_t value;
4968                 switch (regsize)
4969                 {
4970                     case 2:
4971                         value = ps[i];
4972                         break;
4973 
4974                     case 4:
4975                         if (tym == TYldouble || tym == TYildouble)
4976                             /* The size is 10 bytes, and since we have 2 bytes left over,
4977                              * just read those 2 bytes, not 4.
4978                              * Otherwise we're reading uninitialized data.
4979                              * I.e. read 4 bytes, 4 bytes, then 2 bytes
4980                              */
4981                             value = i == 2 ? ps[4] : pi[i]; // 80 bits
4982                         else
4983                             value = pi[i];
4984                         break;
4985 
4986                     case 8:
4987                         value = cast(targ_size_t)pl[i];
4988                         break;
4989 
4990                     default:
4991                         assert(0);
4992                 }
4993 
4994                 reg_t reg;
4995                 if (pushi)
4996                 {
4997                     if (I64 && regsize == 8 && value != cast(int)value)
4998                     {
4999                         regwithvalue(cdb,allregs,value,&reg,64);
5000                         goto Preg;          // cannot push imm64 unless it is sign extended 32 bit value
5001                     }
5002                     if (regsize == REGSIZE && reghasvalue(allregs,value,&reg))
5003                         goto Preg;
5004                     cdb.genc2((szb == 1) ? 0x6A : 0x68, 0, value); // PUSH value
5005                 }
5006                 else
5007                 {
5008                     regwithvalue(cdb, allregs, value, &reg, 0);
5009                 Preg:
5010                     genpush(cdb,reg);         // PUSH reg
5011                 }
5012                 code_orflag(cdb.last(), flag);              // operand size
5013             } while (i);
5014             freenode(e);
5015             return;
5016         }
5017 
5018         case OPpair:
5019         {
5020             if (e.Ecount)
5021                 break;
5022             const op1 = e.EV.E1.Eoper;
5023             const op2 = e.EV.E2.Eoper;
5024             if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) &&
5025                 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst))
5026             {
5027                 pushParams(cdb, e.EV.E2, stackalign, tyf);
5028                 pushParams(cdb, e.EV.E1, stackalign, tyf);
5029                 freenode(e);
5030             }
5031             else if (tyfloating(e.EV.E1.Ety) ||
5032                      tyfloating(e.EV.E2.Ety))
5033             {
5034                 // Need special handling because of order of evaluation of e1 and e2
5035                 break;
5036             }
5037             else
5038             {
5039                 regm_t regs = allregs;
5040                 codelem(cdb, e, &regs, false);
5041                 genpush(cdb, findregmsw(regs)); // PUSH msreg
5042                 genpush(cdb, findreglsw(regs)); // PUSH lsreg
5043                 cdb.genadjesp(cast(int)sz);
5044                 stackpush += sz;
5045             }
5046             return;
5047         }
5048 
5049         case OPrpair:
5050         {
5051             if (e.Ecount)
5052                 break;
5053             const op1 = e.EV.E1.Eoper;
5054             const op2 = e.EV.E2.Eoper;
5055             if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) &&
5056                 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst))
5057             {
5058                 pushParams(cdb, e.EV.E1, stackalign, tyf);
5059                 pushParams(cdb, e.EV.E2, stackalign, tyf);
5060                 freenode(e);
5061             }
5062             else if (tyfloating(e.EV.E1.Ety) ||
5063                      tyfloating(e.EV.E2.Ety))
5064             {
5065                 // Need special handling because of order of evaluation of e1 and e2
5066                 break;
5067             }
5068             else
5069             {
5070                 regm_t regs = allregs;
5071                 codelem(cdb, e, &regs, false);
5072                 genpush(cdb, findregmsw(regs)); // PUSH msreg
5073                 genpush(cdb, findreglsw(regs)); // PUSH lsreg
5074                 cdb.genadjesp(cast(int)sz);
5075                 stackpush += sz;
5076             }
5077             return;
5078         }
5079 
5080         default:
5081             break;
5082     }
5083 
5084     regm_t retregs = tybyte(tym) ? BYTEREGS : allregs;
5085     if (tyvector(tym) || (tyxmmreg(tym) && config.fpxmmregs))
5086     {
5087         regm_t retxmm = XMMREGS;
5088         codelem(cdb, e, &retxmm, false);
5089         stackpush += sz;
5090         cdb.genadjesp(cast(int)sz);
5091         cod3_stackadj(cdb, cast(int)sz);
5092         const op = xmmstore(tym);
5093         const r = findreg(retxmm);
5094         cdb.gen2sib(op, modregxrm(0, r - XMM0,4 ), modregrm(0, 4, SP));   // MOV [ESP],r
5095         checkSetVex(cdb.last(),tym);
5096         return;
5097     }
5098     else if (tyfloating(tym))
5099     {
5100         if (config.inline8087)
5101         {
5102             retregs = tycomplex(tym) ? mST01 : mST0;
5103             codelem(cdb, e, &retregs, false);
5104             stackpush += sz;
5105             cdb.genadjesp(cast(int)sz);
5106             cod3_stackadj(cdb, cast(int)sz);
5107             opcode_t op;
5108             uint r;
5109             switch (tym)
5110             {
5111                 case TYfloat:
5112                 case TYifloat:
5113                 case TYcfloat:
5114                     op = 0xD9;
5115                     r = 3;
5116                     break;
5117 
5118                 case TYdouble:
5119                 case TYidouble:
5120                 case TYdouble_alias:
5121                 case TYcdouble:
5122                     op = 0xDD;
5123                     r = 3;
5124                     break;
5125 
5126                 case TYldouble:
5127                 case TYildouble:
5128                 case TYcldouble:
5129                     op = 0xDB;
5130                     r = 7;
5131                     break;
5132 
5133                 default:
5134                     assert(0);
5135             }
5136             if (!I16)
5137             {
5138                 if (tycomplex(tym))
5139                 {
5140                     // FSTP sz/2[ESP]
5141                     cdb.genc1(op, (modregrm(0, 4, SP) << 8) | modregxrm(2, r, 4),FLconst, sz/2);
5142                     pop87();
5143                 }
5144                 pop87();
5145                 cdb.gen2sib(op, modregrm(0, r, 4),modregrm(0, 4, SP));   // FSTP [ESP]
5146             }
5147             else
5148             {
5149                 retregs = IDXREGS;                             // get an index reg
5150                 reg_t reg;
5151                 allocreg(cdb, &retregs, &reg, TYoffset);
5152                 genregs(cdb, 0x89, SP, reg);         // MOV reg,SP
5153                 pop87();
5154                 cdb.gen2(op, modregrm(0, r, regtorm[reg]));       // FSTP [reg]
5155             }
5156             if (LARGEDATA)
5157                 cdb.last().Iflags |= CFss;     // want to store into stack
5158             genfwait(cdb);         // FWAIT
5159             return;
5160         }
5161         else if (I16 && (tym == TYdouble || tym == TYdouble_alias))
5162             retregs = mSTACK;
5163     }
5164     else if (I16 && sz == 8)             // if long long
5165         retregs = mSTACK;
5166 
5167     scodelem(cdb,e,&retregs,0,true);
5168     if (retregs != mSTACK)                // if stackpush not already inc'd
5169         stackpush += sz;
5170     if (sz <= REGSIZE)
5171     {
5172         genpush(cdb,findreg(retregs));        // PUSH reg
5173         cdb.genadjesp(cast(int)REGSIZE);
5174     }
5175     else if (sz == REGSIZE * 2)
5176     {
5177         genpush(cdb,findregmsw(retregs));     // PUSH msreg
5178         genpush(cdb,findreglsw(retregs));     // PUSH lsreg
5179         cdb.genadjesp(cast(int)sz);
5180     }
5181 }
5182 
5183 /*******************************
5184  * Get offset portion of e, and store it in an index
5185  * register. Return mask of index register in *pretregs.
5186  */
5187 
5188 @trusted
5189 void offsetinreg(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
5190 {
5191     reg_t reg;
5192     regm_t retregs = mLSW;                     // want only offset
5193     if (e.Ecount && e.Ecount != e.Ecomsub)
5194     {
5195         regm_t rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */
5196         for (uint i = 0; rm; i++)
5197         {
5198             if (mask(i) & rm && regcon.cse.value[i] == e)
5199             {
5200                 *pretregs = mask(i);
5201                 getregs(cdb, *pretregs);
5202                 goto L3;
5203             }
5204             rm &= ~mask(i);
5205         }
5206     }
5207 
5208     *pretregs = retregs;
5209     allocreg(cdb, pretregs, &reg, TYoffset);
5210     getoffset(cdb,e,reg);
5211 L3:
5212     cssave(e, *pretregs,false);
5213     freenode(e);
5214 }
5215 
5216 /******************************
5217  * Generate code to load data into registers.
5218  */
5219 
5220 
5221 @trusted
5222 void loaddata(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
5223 {
5224     reg_t reg;
5225     reg_t nreg;
5226     reg_t sreg;
5227     opcode_t op;
5228     tym_t tym;
5229     code cs;
5230     regm_t flags, forregs, regm;
5231 
5232     debug
5233     {
5234     //  if (debugw)
5235     //        printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs));
5236     //  elem_print(e);
5237     }
5238 
5239     assert(e);
5240     elem_debug(e);
5241     if (*pretregs == 0)
5242         return;
5243     tym = tybasic(e.Ety);
5244     if (tym == TYstruct)
5245     {
5246         cdrelconst(cdb,e,pretregs);
5247         return;
5248     }
5249     if (tyfloating(tym))
5250     {
5251         objmod.fltused();
5252         if (config.fpxmmregs &&
5253             (tym == TYcfloat || tym == TYcdouble) &&
5254             (*pretregs & (XMMREGS | mPSW))
5255            )
5256         {
5257             cloadxmm(cdb, e, pretregs);
5258             return;
5259         }
5260         else if (config.inline8087)
5261         {
5262             if (*pretregs & mST0)
5263             {
5264                 load87(cdb, e, 0, pretregs, null, -1);
5265                 return;
5266             }
5267             else if (tycomplex(tym))
5268             {
5269                 cload87(cdb, e, pretregs);
5270                 return;
5271             }
5272         }
5273     }
5274     int sz = _tysize[tym];
5275     cs.Iflags = 0;
5276     cs.Irex = 0;
5277     if (*pretregs == mPSW)
5278     {
5279         Symbol *s;
5280         regm = allregs;
5281         if (e.Eoper == OPconst)
5282         {       /* true:        OR SP,SP        (SP is never 0)         */
5283                 /* false:       CMP SP,SP       (always equal)          */
5284                 genregs(cdb, (boolres(e)) ? 0x09 : 0x39 , SP, SP);
5285                 if (I64)
5286                     code_orrex(cdb.last(), REX_W);
5287         }
5288         else if (e.Eoper == OPvar &&
5289             (s = e.EV.Vsym).Sfl == FLreg &&
5290             s.Sregm & XMMREGS &&
5291             (tym == TYfloat || tym == TYifloat || tym == TYdouble || tym ==TYidouble))
5292         {
5293             /* Evaluate using XMM register and XMM instruction.
5294              * This affects jmpopcode()
5295              */
5296             if (s.Sclass == SC.parameter)
5297                 refparam = true;
5298             tstresult(cdb,s.Sregm,e.Ety,true);
5299         }
5300         else if (sz <= REGSIZE)
5301         {
5302             if (!I16 && (tym == TYfloat || tym == TYifloat))
5303             {
5304                 allocreg(cdb, &regm, &reg, TYoffset);   // get a register
5305                 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0);    // MOV reg,data
5306                 cdb.gen2(0xD1,modregrmx(3,4,reg));           // SHL reg,1
5307             }
5308             else if (I64 && (tym == TYdouble || tym ==TYidouble))
5309             {
5310                 allocreg(cdb, &regm, &reg, TYoffset);   // get a register
5311                 loadea(cdb, e,&cs, 0x8B, reg, 0, 0, 0);    // MOV reg,data
5312                 // remove sign bit, so that -0.0 == 0.0
5313                 cdb.gen2(0xD1, modregrmx(3, 4, reg));           // SHL reg,1
5314                 code_orrex(cdb.last(), REX_W);
5315             }
5316             else if (TARGET_OSX && e.Eoper == OPvar && movOnly(e))
5317             {
5318                 allocreg(cdb, &regm, &reg, TYoffset);   // get a register
5319                 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0);    // MOV reg,data
5320                 fixresult(cdb, e, regm, pretregs);
5321             }
5322             else
5323             {   cs.IFL2 = FLconst;
5324                 cs.IEV2.Vsize_t = 0;
5325                 op = (sz == 1) ? 0x80 : 0x81;
5326                 loadea(cdb, e, &cs, op, 7, 0, 0, 0);        // CMP EA,0
5327 
5328                 // Convert to TEST instruction if EA is a register
5329                 // (to avoid register contention on Pentium)
5330                 code *c = cdb.last();
5331                 if ((c.Iop & ~1) == 0x38 &&
5332                     (c.Irm & modregrm(3, 0, 0)) == modregrm(3, 0, 0)
5333                    )
5334                 {
5335                     c.Iop = (c.Iop & 1) | 0x84;
5336                     code_newreg(c, c.Irm & 7);
5337                     if (c.Irex & REX_B)
5338                         //c.Irex = (c.Irex & ~REX_B) | REX_R;
5339                         c.Irex |= REX_R;
5340                 }
5341             }
5342         }
5343         else if (sz < 8)
5344         {
5345             allocreg(cdb, &regm, &reg, TYoffset);  // get a register
5346             if (I32)                                    // it's a 48 bit pointer
5347                 loadea(cdb, e, &cs, MOVZXw, reg, REGSIZE, 0, 0); // MOVZX reg,data+4
5348             else
5349             {
5350                 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, 0, 0); // MOV reg,data+2
5351                 if (tym == TYfloat || tym == TYifloat)       // dump sign bit
5352                     cdb.gen2(0xD1, modregrm(3, 4, reg));        // SHL reg,1
5353             }
5354             loadea(cdb,e,&cs,0x0B,reg,0,regm,0);     // OR reg,data
5355         }
5356         else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym)))
5357         {
5358             allocreg(cdb, &regm, &reg, TYoffset);       // get a register
5359             int i = sz - REGSIZE;
5360             loadea(cdb, e, &cs, 0x8B, reg, i, 0, 0);        // MOV reg,data+6
5361             if (tyfloating(tym))                             // TYdouble or TYdouble_alias
5362                 cdb.gen2(0xD1, modregrm(3, 4, reg));            // SHL reg,1
5363 
5364             while ((i -= REGSIZE) >= 0)
5365             {
5366                 loadea(cdb, e, &cs, 0x0B, reg, i, regm, 0); // OR reg,data+i
5367                 code *c = cdb.last();
5368                 if (i == 0)
5369                     c.Iflags |= CFpsw;                      // need the flags on last OR
5370             }
5371         }
5372         else if (sz == tysize(TYldouble))               // TYldouble
5373             load87(cdb, e, 0, pretregs, null, -1);
5374         else
5375         {
5376             elem_print(e);
5377             assert(0);
5378         }
5379         return;
5380     }
5381     /* not for flags only */
5382     flags = *pretregs & mPSW;             /* save original                */
5383     forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS);
5384     if (*pretregs & mSTACK)
5385         forregs |= DOUBLEREGS;
5386     if (e.Eoper == OPconst)
5387     {
5388         if (tyvector(tym) && forregs & XMMREGS)
5389         {
5390             assert(!flags);
5391             reg_t xreg;
5392             allocreg(cdb, &forregs, &xreg, tym);     // allocate registers
5393             movxmmconst(cdb, xreg, tym, &e.EV, flags);
5394             fixresult(cdb, e, forregs, pretregs);
5395             return;
5396         }
5397 
5398         targ_size_t value = e.EV.Vint;
5399         if (sz == 8)
5400             value = cast(targ_size_t)e.EV.Vullong;
5401 
5402         if (sz == REGSIZE && reghasvalue(forregs, value, &reg))
5403             forregs = mask(reg);
5404 
5405         regm_t save = regcon.immed.mval;
5406         allocreg(cdb, &forregs, &reg, tym);        // allocate registers
5407         regcon.immed.mval = save;               // allocreg could unnecessarily clear .mval
5408         if (sz <= REGSIZE)
5409         {
5410             if (sz == 1)
5411                 flags |= 1;
5412             else if (!I16 && sz == SHORTSIZE &&
5413                      !(mask(reg) & regcon.mvar) &&
5414                      !(config.flags4 & CFG4speed)
5415                     )
5416                 flags |= 2;
5417             if (sz == 8)
5418                 flags |= 64;
5419             if (isXMMreg(reg))
5420             {
5421                 movxmmconst(cdb, reg, tym, &e.EV, 0);
5422                 flags = 0;
5423             }
5424             else
5425             {
5426                 movregconst(cdb, reg, value, flags);
5427                 flags = 0;                          // flags are already set
5428             }
5429         }
5430         else if (sz < 8)        // far pointers, longs for 16 bit targets
5431         {
5432             targ_int msw = I32 ? e.EV.Vseg
5433                         : (e.EV.Vulong >> 16);
5434             targ_int lsw = e.EV.Voff;
5435             regm_t mswflags = 0;
5436             if (forregs & mES)
5437             {
5438                 movregconst(cdb, reg, msw, 0); // MOV reg,segment
5439                 genregs(cdb, 0x8E, 0, reg);    // MOV ES,reg
5440                 msw = lsw;                               // MOV reg,offset
5441             }
5442             else
5443             {
5444                 sreg = findreglsw(forregs);
5445                 movregconst(cdb, sreg, lsw, 0);
5446                 reg = findregmsw(forregs);
5447                 /* Decide if we need to set flags when we load msw      */
5448                 if (flags && (msw && msw|lsw || !(msw|lsw)))
5449                 {   mswflags = mPSW;
5450                     flags = 0;
5451                 }
5452             }
5453             movregconst(cdb, reg, msw, mswflags);
5454         }
5455         else if (sz == 8)
5456         {
5457             if (I32)
5458             {
5459                 targ_long *p = cast(targ_long *)cast(void*)&e.EV.Vdouble;
5460                 if (isXMMreg(reg))
5461                 {   /* This comes about because 0, 1, pi, etc., constants don't get stored
5462                      * in the data segment, because they are x87 opcodes.
5463                      * Not so efficient. We should at least do a PXOR for 0.
5464                      */
5465                     reg_t r;
5466                     regm_t rm = ALLREGS;
5467                     allocreg(cdb, &rm, &r, TYint);    // allocate scratch register
5468                     movregconst(cdb, r, p[0], 0);
5469                     cdb.genfltreg(0x89, r, 0);               // MOV floatreg,r
5470                     movregconst(cdb, r, p[1], 0);
5471                     cdb.genfltreg(0x89, r, 4);               // MOV floatreg+4,r
5472 
5473                     const opmv = xmmload(tym);
5474                     cdb.genxmmreg(opmv, reg, 0, tym);           // MOVSS/MOVSD XMMreg,floatreg
5475                 }
5476                 else
5477                 {
5478                     movregconst(cdb, findreglsw(forregs) ,p[0], 0);
5479                     movregconst(cdb, findregmsw(forregs) ,p[1], 0);
5480                 }
5481             }
5482             else
5483             {   targ_short *p = &e.EV.Vshort;  // point to start of Vdouble
5484 
5485                 assert(reg == AX);
5486                 movregconst(cdb, AX, p[3], 0);   // MOV AX,p[3]
5487                 movregconst(cdb, DX, p[0], 0);
5488                 movregconst(cdb, CX, p[1], 0);
5489                 movregconst(cdb, BX, p[2], 0);
5490             }
5491         }
5492         else if (I64 && sz == 16)
5493         {
5494             movregconst(cdb, findreglsw(forregs), cast(targ_size_t)e.EV.Vcent.lo, 64);
5495             movregconst(cdb, findregmsw(forregs), cast(targ_size_t)e.EV.Vcent.hi, 64);
5496         }
5497         else
5498             assert(0);
5499         // Flags may already be set
5500         *pretregs &= flags | ~mPSW;
5501         fixresult(cdb, e, forregs, pretregs);
5502         return;
5503     }
5504     else
5505     {
5506         // See if we can use register that parameter was passed in
5507         if (regcon.params &&
5508             regParamInPreg(e.EV.Vsym) &&
5509             !anyiasm &&   // may have written to the memory for the parameter
5510             (regcon.params & mask(e.EV.Vsym.Spreg) && e.EV.Voffset == 0 ||
5511              regcon.params & mask(e.EV.Vsym.Spreg2) && e.EV.Voffset == REGSIZE) &&
5512             sz <= REGSIZE)                  // make sure no 'paint' to a larger size happened
5513         {
5514             const reg_t preg = e.EV.Voffset ? e.EV.Vsym.Spreg2 : e.EV.Vsym.Spreg;
5515             const regm_t pregm = mask(preg);
5516 
5517             if (!(sz <= 2 && pregm & XMMREGS))   // no SIMD instructions to load 1 or 2 byte quantities
5518             {
5519                 if (debugr)
5520                     printf("%s.%d is fastpar and using register %s\n",
5521                            e.EV.Vsym.Sident.ptr,
5522                            cast(int)e.EV.Voffset,
5523                            regm_str(pregm));
5524 
5525                 mfuncreg &= ~pregm;
5526                 regcon.used |= pregm;
5527                 fixresult(cdb,e,pregm,pretregs);
5528                 return;
5529             }
5530         }
5531 
5532         allocreg(cdb, &forregs, &reg, tym);            // allocate registers
5533 
5534         if (sz == 1)
5535         {   regm_t nregm;
5536 
5537             debug
5538             if (!(forregs & BYTEREGS))
5539             {   elem_print(e);
5540                     printf("forregs = %s\n", regm_str(forregs));
5541             }
5542 
5543             opcode_t opmv = 0x8A;                               // byte MOV
5544             if (config.exe & (EX_OSX | EX_OSX64))
5545             {
5546                 if (movOnly(e))
5547                     opmv = 0x8B;
5548             }
5549             assert(forregs & BYTEREGS);
5550             if (!I16)
5551             {
5552                 if (config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed &&
5553                     // Workaround for OSX linker bug:
5554                     //   ld: GOT load reloc does not point to a movq instruction in test42 for x86_64
5555                     !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS))
5556                    )
5557                 {
5558 //                    opmv = tyuns(tym) ? MOVZXb : MOVSXb;      // MOVZX/MOVSX
5559                 }
5560                 loadea(cdb, e, &cs, opmv, reg, 0, 0, 0);     // MOV regL,data
5561             }
5562             else
5563             {
5564                 nregm = tyuns(tym) ? BYTEREGS : cast(regm_t) mAX;
5565                 if (*pretregs & nregm)
5566                     nreg = reg;                             // already allocated
5567                 else
5568                     allocreg(cdb, &nregm, &nreg, tym);
5569                 loadea(cdb, e, &cs, opmv, nreg, 0, 0, 0);    // MOV nregL,data
5570                 if (reg != nreg)
5571                 {
5572                     genmovreg(cdb, reg, nreg);   // MOV reg,nreg
5573                     cssave(e, mask(nreg), false);
5574                 }
5575             }
5576         }
5577         else if (forregs & XMMREGS)
5578         {
5579             // Can't load from registers directly to XMM regs
5580             //e.EV.Vsym.Sflags &= ~GTregcand;
5581 
5582             opcode_t opmv = xmmload(tym, xmmIsAligned(e));
5583             if (e.Eoper == OPvar)
5584             {
5585                 Symbol *s = e.EV.Vsym;
5586                 if (s.Sfl == FLreg && !(mask(s.Sreglsw) & XMMREGS))
5587                 {   opmv = LODD;          // MOVD/MOVQ
5588                     /* getlvalue() will unwind this and unregister s; could use a better solution */
5589                 }
5590             }
5591             loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); // MOVSS/MOVSD reg,data
5592             checkSetVex(cdb.last(),tym);
5593         }
5594         else if (sz <= REGSIZE)
5595         {
5596             opcode_t opmv = 0x8B;                     // MOV reg,data
5597             if (sz == 2 && !I16 && config.target_cpu >= TARGET_PentiumPro &&
5598                 // Workaround for OSX linker bug:
5599                 //   ld: GOT load reloc does not point to a movq instruction in test42 for x86_64
5600                 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS))
5601                )
5602             {
5603 //                opmv = tyuns(tym) ? MOVZXw : MOVSXw;  // MOVZX/MOVSX
5604             }
5605             loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0);
5606         }
5607         else if (sz <= 2 * REGSIZE && forregs & mES)
5608         {
5609             loadea(cdb, e, &cs, 0xC4, reg, 0, 0, mES);    // LES data
5610         }
5611         else if (sz <= 2 * REGSIZE)
5612         {
5613             if (I32 && sz == 8 &&
5614                 (*pretregs & (mSTACK | mPSW)) == mSTACK)
5615             {
5616                 assert(0);
5617     /+
5618                 /* Note that we allocreg(DOUBLEREGS) needlessly     */
5619                 stackchanged = 1;
5620                 int i = DOUBLESIZE - REGSIZE;
5621                 do
5622                 {
5623                     loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i
5624                     cdb.genadjesp(REGSIZE);
5625                     stackpush += REGSIZE;
5626                     i -= REGSIZE;
5627                 }
5628                 while (i >= 0);
5629                 return;
5630     +/
5631             }
5632 
5633             reg = findregmsw(forregs);
5634             loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, forregs, 0); // MOV reg,data+2
5635             if (I32 && sz == REGSIZE + 2)
5636                 cdb.last().Iflags |= CFopsize;                   // seg is 16 bits
5637             reg = findreglsw(forregs);
5638             loadea(cdb, e, &cs, 0x8B, reg, 0, forregs, 0);       // MOV reg,data
5639         }
5640         else if (sz >= 8)
5641         {
5642             assert(!I32);
5643             if ((*pretregs & (mSTACK | mPSW)) == mSTACK)
5644             {
5645                 // Note that we allocreg(DOUBLEREGS) needlessly
5646                 stackchanged = 1;
5647                 int i = sz - REGSIZE;
5648                 do
5649                 {
5650                     loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i
5651                     cdb.genadjesp(REGSIZE);
5652                     stackpush += REGSIZE;
5653                     i -= REGSIZE;
5654                 }
5655                 while (i >= 0);
5656                 return;
5657             }
5658             else
5659             {
5660                 assert(reg == AX);
5661                 loadea(cdb, e, &cs, 0x8B, AX, 6, 0,           0); // MOV AX,data+6
5662                 loadea(cdb, e, &cs, 0x8B, BX, 4, mAX,         0); // MOV BX,data+4
5663                 loadea(cdb, e, &cs, 0x8B, CX, 2, mAX|mBX,     0); // MOV CX,data+2
5664                 loadea(cdb, e, &cs, 0x8B, DX, 0, mAX|mCX|mCX, 0); // MOV DX,data
5665             }
5666         }
5667         else
5668             assert(0);
5669         // Flags may already be set
5670         *pretregs &= flags | ~mPSW;
5671         fixresult(cdb, e, forregs, pretregs);
5672         return;
5673     }
5674 }
5675 
5676 }