1 /**
2  * Code generation 1
3  *
4  * Handles function calls: putting arguments in registers / on the stack, and jumping to the function.
5  *
6  * Compiler implementation of the
7  * $(LINK2 https://www.dlang.org, D programming language).
8  *
9  * Copyright:   Copyright (C) 1984-1998 by Symantec
10  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
11  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
12  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
13  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod1.d, backend/cod1.d)
14  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod1.d
15  */
16 
17 module dmd.backend.cod1;
18 
19 import core.bitop;
20 import core.stdc.stdio;
21 import core.stdc.stdlib;
22 import core.stdc.string;
23 
24 import dmd.backend.backend;
25 import dmd.backend.cc;
26 import dmd.backend.cdef;
27 import dmd.backend.code;
28 import dmd.backend.code_x86;
29 import dmd.backend.codebuilder;
30 import dmd.backend.mem;
31 import dmd.backend.el;
32 import dmd.backend.global;
33 import dmd.backend.obj;
34 import dmd.backend.oper;
35 import dmd.backend.rtlsym;
36 import dmd.backend.ty;
37 import dmd.backend.type;
38 import dmd.backend.xmm;
39 
40 
41 import dmd.backend.cg : segfl, stackfl;
42 
43 nothrow:
44 @safe:
45 
46 private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); }
47 
48 /* array to convert from index register to r/m field    */
49                                        /* AX CX DX BX SP BP SI DI       */
50 private __gshared const byte[8] regtorm32 =   [  0, 1, 2, 3,-1, 5, 6, 7 ];
51 __gshared const   byte[8] regtorm   =   [ -1,-1,-1, 7,-1, 6, 4, 5 ];
52 
53 //void funccall(ref CodeBuilder cdb,elem *e,uint numpara,uint numalign,
54 //        regm_t *pretregs,regm_t keepmsk, bool usefuncarg);
55 
56 /*********************************
57  * Determine if we should leave parameter `s` in the register it
58  * came in, or allocate a register it using the register
59  * allocator.
60  * Params:
61  *      s = parameter Symbol
62  * Returns:
63  *      `true` if `s` is a register parameter and leave it in the register it came in
64  */
65 @trusted
66 bool regParamInPreg(Symbol* s)
67 {
68     //printf("regPAramInPreg %s\n", s.Sident.ptr);
69     return (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg) &&
70         (!(config.flags4 & CFG4optimized) || !(s.Sflags & GTregcand));
71 }
72 
73 
74 /**************************
75  * Determine if e is a 32 bit scaled index addressing mode.
76  * Returns:
77  *      0       not a scaled index addressing mode
78  *      !=0     the value for ss in the SIB byte
79  */
80 
81 @trusted
82 int isscaledindex(elem *e)
83 {
84     targ_uns ss;
85 
86     assert(!I16);
87     while (e.Eoper == OPcomma)
88         e = e.EV.E2;
89     if (!(e.Eoper == OPshl && !e.Ecount &&
90           e.EV.E2.Eoper == OPconst &&
91           (ss = e.EV.E2.EV.Vuns) <= 3
92          )
93        )
94         ss = 0;
95     return ss;
96 }
97 
98 /*********************************************
99  * Generate code for which isscaledindex(e) returned a non-zero result.
100  */
101 
102 @trusted
103 /*private*/ void cdisscaledindex(ref CodeBuilder cdb,elem *e,regm_t *pidxregs,regm_t keepmsk)
104 {
105     // Load index register with result of e.EV.E1
106     while (e.Eoper == OPcomma)
107     {
108         regm_t r = 0;
109         scodelem(cdb, e.EV.E1, &r, keepmsk, true);
110         freenode(e);
111         e = e.EV.E2;
112     }
113     assert(e.Eoper == OPshl);
114     scodelem(cdb, e.EV.E1, pidxregs, keepmsk, true);
115     freenode(e.EV.E2);
116     freenode(e);
117 }
118 
119 /***********************************
120  * Determine index if we can do two LEA instructions as a multiply.
121  * Returns:
122  *      0       can't do it
123  */
124 
125 enum
126 {
127     SSFLnobp       = 1,       /// can't have EBP in relconst
128     SSFLnobase1    = 2,       /// no base register for first LEA
129     SSFLnobase     = 4,       /// no base register
130     SSFLlea        = 8,       /// can do it in one LEA
131 }
132 
133 struct Ssindex
134 {
135     targ_uns product;
136     ubyte ss1;
137     ubyte ss2;
138     ubyte ssflags;       /// SSFLxxxx
139 }
140 
141 private __gshared const Ssindex[21] ssindex_array =
142 [
143     { 0, 0, 0 },               // [0] is a place holder
144 
145     { 3,  1, 0, SSFLnobp | SSFLlea },
146     { 5,  2, 0, SSFLnobp | SSFLlea },
147     { 9,  3, 0, SSFLnobp | SSFLlea },
148 
149     { 6,  1, 1, SSFLnobase },
150     { 12, 1, 2, SSFLnobase },
151     { 24, 1, 3, SSFLnobase },
152     { 10, 2, 1, SSFLnobase },
153     { 20, 2, 2, SSFLnobase },
154     { 40, 2, 3, SSFLnobase },
155     { 18, 3, 1, SSFLnobase },
156     { 36, 3, 2, SSFLnobase },
157     { 72, 3, 3, SSFLnobase },
158 
159     { 15, 2, 1, SSFLnobp },
160     { 25, 2, 2, SSFLnobp },
161     { 27, 3, 1, SSFLnobp },
162     { 45, 3, 2, SSFLnobp },
163     { 81, 3, 3, SSFLnobp },
164 
165     { 16, 3, 1, SSFLnobase1 | SSFLnobase },
166     { 32, 3, 2, SSFLnobase1 | SSFLnobase },
167     { 64, 3, 3, SSFLnobase1 | SSFLnobase },
168 ];
169 
170 int ssindex(OPER op,targ_uns product)
171 {
172     if (op == OPshl)
173         product = 1 << product;
174     for (size_t i = 1; i < ssindex_array.length; i++)
175     {
176         if (ssindex_array[i].product == product)
177             return cast(int)i;
178     }
179     return 0;
180 }
181 
182 /***************************************
183  * Build an EA of the form disp[base][index*scale].
184  * Input:
185  *      c       struct to fill in
186  *      base    base register (-1 if none)
187  *      index   index register (-1 if none)
188  *      scale   scale factor - 1,2,4,8
189  *      disp    displacement
190  */
191 
192 void buildEA(code *c,int base,int index,int scale,targ_size_t disp)
193 {
194     ubyte rm;
195     ubyte sib;
196     ubyte rex = 0;
197 
198     sib = 0;
199     if (!I16)
200     {   uint ss;
201 
202         assert(index != SP);
203 
204         switch (scale)
205         {   case 1:     ss = 0; break;
206             case 2:     ss = 1; break;
207             case 4:     ss = 2; break;
208             case 8:     ss = 3; break;
209             default:    assert(0);
210         }
211 
212         if (base == -1)
213         {
214             if (index == -1)
215                 rm = modregrm(0,0,5);
216             else
217             {
218                 rm  = modregrm(0,0,4);
219                 sib = modregrm(ss,index & 7,5);
220                 if (index & 8)
221                     rex |= REX_X;
222             }
223         }
224         else if (index == -1)
225         {
226             if (base == SP)
227             {
228                 rm  = modregrm(2, 0, 4);
229                 sib = modregrm(0, 4, SP);
230             }
231             else
232             {   rm = modregrm(2, 0, base & 7);
233                 if (base & 8)
234                 {   rex |= REX_B;
235                     if (base == R12)
236                     {
237                         rm  = modregrm(2, 0, 4);
238                         sib = modregrm(0, 4, 4);
239                     }
240                 }
241             }
242         }
243         else
244         {
245             rm  = modregrm(2, 0, 4);
246             sib = modregrm(ss,index & 7,base & 7);
247             if (index & 8)
248                 rex |= REX_X;
249             if (base & 8)
250                 rex |= REX_B;
251         }
252     }
253     else
254     {
255         // -1 AX CX DX BX SP BP SI DI
256         static immutable ubyte[9][9] EA16rm =
257         [
258             [   0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85,   ],      // -1
259             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // AX
260             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // CX
261             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // DX
262             [   0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81,   ],      // BX
263             [   0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,   ],      // SP
264             [   0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83,   ],      // BP
265             [   0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09,   ],      // SI
266             [   0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09,   ]       // DI
267         ];
268 
269         assert(scale == 1);
270         rm = EA16rm[base + 1][index + 1];
271         assert(rm != 9);
272     }
273     c.Irm = rm;
274     c.Isib = sib;
275     c.Irex = rex;
276     c.IFL1 = FLconst;
277     c.IEV1.Vuns = cast(targ_uns)disp;
278 }
279 
280 /*********************************************
281  * Build REX, modregrm and sib bytes
282  */
283 
284 uint buildModregrm(int mod, int reg, int rm)
285 {
286     uint m;
287     if (I16)
288         m = modregrm(mod, reg, rm);
289     else
290     {
291         if ((rm & 7) == SP && mod != 3)
292             m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4);
293         else
294             m = modregrm(mod,reg & 7,rm & 7);
295         if (reg & 8)
296             m |= REX_R << 16;
297         if (rm & 8)
298             m |= REX_B << 16;
299     }
300     return m;
301 }
302 
303 /****************************************
304  * Generate code for eecontext
305  */
306 
307 @trusted
308 void genEEcode()
309 {
310     CodeBuilder cdb;
311     cdb.ctor();
312 
313     eecontext.EEin++;
314     regcon.immed.mval = 0;
315     regm_t retregs = 0;    //regmask(eecontext.EEelem.Ety);
316     assert(EEStack.offset >= REGSIZE);
317     cod3_stackadj(cdb, cast(int)(EEStack.offset - REGSIZE));
318     cdb.gen1(0x50 + SI);                      // PUSH ESI
319     cdb.genadjesp(cast(int)EEStack.offset);
320     gencodelem(cdb, eecontext.EEelem, &retregs, false);
321     code *c = cdb.finish();
322     assignaddrc(c);
323     pinholeopt(c,null);
324     jmpaddr(c);
325     eecontext.EEcode = gen1(c, 0xCC);        // INT 3
326     eecontext.EEin--;
327 }
328 
329 
330 /********************************************
331  * Gen a save/restore sequence for mask of registers.
332  * Params:
333  *      regm = mask of registers to save
334  *      cdbsave = save code appended here
335  *      cdbrestore = restore code appended here
336  * Returns:
337  *      amount of stack consumed
338  */
339 @trusted
340 uint gensaverestore(regm_t regm,ref CodeBuilder cdbsave,ref CodeBuilder cdbrestore)
341 {
342     //printf("gensaverestore2(%s)\n", regm_str(regm));
343     regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01;
344     if (!regm)
345         return 0;
346 
347     uint stackused = 0;
348 
349     code *[regm.sizeof * 8] restore;
350 
351     reg_t i;
352     for (i = 0; regm; i++)
353     {
354         if (regm & 1)
355         {
356             code *cs2;
357             if (i == ES && I16)
358             {
359                 stackused += REGSIZE;
360                 cdbsave.gen1(0x06);                     // PUSH ES
361                 cs2 = gen1(null, 0x07);                 // POP  ES
362             }
363             else if (i == ST0 || i == ST01)
364             {
365                 CodeBuilder cdb;
366                 cdb.ctor();
367                 gensaverestore87(1 << i, cdbsave, cdb);
368                 cs2 = cdb.finish();
369             }
370             else if (i >= XMM0 || I64 || cgstate.funcarg.size)
371             {   uint idx;
372                 regsave.save(cdbsave, i, &idx);
373                 CodeBuilder cdb;
374                 cdb.ctor();
375                 regsave.restore(cdb, i, idx);
376                 cs2 = cdb.finish();
377             }
378             else
379             {
380                 stackused += REGSIZE;
381                 cdbsave.gen1(0x50 + (i & 7));           // PUSH i
382                 cs2 = gen1(null, 0x58 + (i & 7));       // POP  i
383                 if (i & 8)
384                 {   code_orrex(cdbsave.last(), REX_B);
385                     code_orrex(cs2, REX_B);
386                 }
387             }
388             restore[i] = cs2;
389         }
390         else
391             restore[i] = null;
392         regm >>= 1;
393     }
394 
395     while (i)
396     {
397         code *c = restore[--i];
398         if (c)
399         {
400             cdbrestore.append(c);
401         }
402     }
403 
404     return stackused;
405 }
406 
407 
408 /****************************************
409  * Clean parameters off stack.
410  * Input:
411  *      numpara         amount to adjust stack pointer
412  *      keepmsk         mask of registers to not destroy
413  */
414 
415 @trusted
416 void genstackclean(ref CodeBuilder cdb,uint numpara,regm_t keepmsk)
417 {
418     //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean);
419     if (numpara && (cgstate.stackclean || STACKALIGN >= 16))
420     {
421 /+
422         if (0 &&                                // won't work if operand of scodelem
423             numpara == stackpush &&             // if this is all those pushed
424             needframe &&                        // and there will be a BP
425             !config.windows &&
426             !(regcon.mvar & fregsaved)          // and no registers will be pushed
427         )
428             genregs(cdb,0x89,BP,SP);  // MOV SP,BP
429         else
430 +/
431         {
432             regm_t scratchm = 0;
433 
434             if (numpara == REGSIZE && config.flags4 & CFG4space)
435             {
436                 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar;
437             }
438 
439             if (scratchm)
440             {
441                 reg_t r;
442                 allocreg(cdb, &scratchm, &r, TYint);
443                 cdb.gen1(0x58 + r);           // POP r
444             }
445             else
446                 cod3_stackadj(cdb, -numpara);
447         }
448         stackpush -= numpara;
449         cdb.genadjesp(-numpara);
450     }
451 }
452 
453 /*********************************
454  * Generate code for a logical expression.
455  * Input:
456  *      e       elem
457  *      jcond
458  *         bit 1 if true then goto jump address if e
459  *               if false then goto jump address if !e
460  *         2    don't call save87()
461  *      fltarg   FLcode or FLblock, flavor of target if e evaluates to jcond
462  *      targ    either code or block pointer to destination
463  */
464 
465 @trusted
466 void logexp(ref CodeBuilder cdb, elem *e, int jcond, uint fltarg, code *targ)
467 {
468     //printf("logexp(e = %p, jcond = %d)\n", e, jcond); elem_print(e);
469     if (tybasic(e.Ety) == TYnoreturn)
470     {
471         con_t regconsave = regcon;
472         regm_t retregs = 0;
473         codelem(cdb,e,&retregs,0);
474         regconsave.used |= regcon.used;
475         regcon = regconsave;
476         return;
477     }
478 
479     int no87 = (jcond & 2) == 0;
480     docommas(cdb, e);             // scan down commas
481     cgstate.stackclean++;
482 
483     code* c, ce;
484     if (!OTleaf(e.Eoper) && !e.Ecount)     // if operator and not common sub
485     {
486         switch (e.Eoper)
487         {
488             case OPoror:
489             {
490                 con_t regconsave;
491                 if (jcond & 1)
492                 {
493                     logexp(cdb, e.EV.E1, jcond, fltarg, targ);
494                     regconsave = regcon;
495                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
496                 }
497                 else
498                 {
499                     code *cnop = gennop(null);
500                     logexp(cdb, e.EV.E1, jcond | 1, FLcode, cnop);
501                     regconsave = regcon;
502                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
503                     cdb.append(cnop);
504                 }
505                 andregcon(regconsave);
506                 freenode(e);
507                 cgstate.stackclean--;
508                 return;
509             }
510 
511             case OPandand:
512             {
513                 con_t regconsave;
514                 if (jcond & 1)
515                 {
516                     code *cnop = gennop(null);    // a dummy target address
517                     logexp(cdb, e.EV.E1, jcond & ~1, FLcode, cnop);
518                     regconsave = regcon;
519                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
520                     cdb.append(cnop);
521                 }
522                 else
523                 {
524                     logexp(cdb, e.EV.E1, jcond, fltarg, targ);
525                     regconsave = regcon;
526                     logexp(cdb, e.EV.E2, jcond, fltarg, targ);
527                 }
528                 andregcon(regconsave);
529                 freenode(e);
530                 cgstate.stackclean--;
531                 return;
532             }
533 
534             case OPnot:
535                 jcond ^= 1;
536                 goto case OPbool;
537 
538             case OPbool:
539             case OPs8_16:
540             case OPu8_16:
541             case OPs16_32:
542             case OPu16_32:
543             case OPs32_64:
544             case OPu32_64:
545             case OPu32_d:
546             case OPd_ld:
547                 logexp(cdb, e.EV.E1, jcond, fltarg, targ);
548                 freenode(e);
549                 cgstate.stackclean--;
550                 return;
551 
552             case OPcond:
553             {
554                 code *cnop2 = gennop(null);   // addresses of start of leaves
555                 code *cnop = gennop(null);
556                 logexp(cdb, e.EV.E1, false, FLcode, cnop2);   // eval condition
557                 con_t regconold = regcon;
558                 logexp(cdb, e.EV.E2.EV.E1, jcond, fltarg, targ);
559                 genjmp(cdb, JMP, FLcode, cast(block *) cnop); // skip second leaf
560 
561                 con_t regconsave = regcon;
562                 regcon = regconold;
563 
564                 cdb.append(cnop2);
565                 logexp(cdb, e.EV.E2.EV.E2, jcond, fltarg, targ);
566                 andregcon(regconold);
567                 andregcon(regconsave);
568                 freenode(e.EV.E2);
569                 freenode(e);
570                 cdb.append(cnop);
571                 cgstate.stackclean--;
572                 return;
573             }
574 
575             default:
576                 break;
577         }
578     }
579 
580     /* Special code for signed long compare.
581      * Not necessary for I64 until we do cents.
582      */
583     if (OTrel2(e.Eoper) &&               // if < <= >= >
584         !e.Ecount &&
585         ( (I16 && tybasic(e.EV.E1.Ety) == TYlong  && tybasic(e.EV.E2.Ety) == TYlong) ||
586           (I32 && tybasic(e.EV.E1.Ety) == TYllong && tybasic(e.EV.E2.Ety) == TYllong))
587        )
588     {
589         longcmp(cdb, e, jcond != 0, fltarg, targ);
590         cgstate.stackclean--;
591         return;
592     }
593 
594     regm_t retregs = mPSW;                // return result in flags
595     opcode_t op = jmpopcode(e);           // get jump opcode
596     if (!(jcond & 1))
597         op ^= 0x101;                      // toggle jump condition(s)
598     codelem(cdb, e, &retregs, true);         // evaluate elem
599     if (no87)
600         cse_flush(cdb,no87);              // flush CSE's to memory
601     genjmp(cdb, op, fltarg, cast(block *) targ); // generate jmp instruction
602     cgstate.stackclean--;
603 }
604 
605 /******************************
606  * Routine to aid in setting things up for gen().
607  * Look for common subexpression.
608  * Can handle indirection operators, but not if they're common subs.
609  * Input:
610  *      e ->    elem where we get some of the data from
611  *      cs ->   partially filled code to add
612  *      op =    opcode
613  *      reg =   reg field of (mod reg r/m)
614  *      offset = data to be added to Voffset field
615  *      keepmsk = mask of registers we must not destroy
616  *      desmsk  = mask of registers destroyed by executing the instruction
617  * Returns:
618  *      pointer to code generated
619  */
620 
621 @trusted
622 void loadea(ref CodeBuilder cdb,elem *e,code *cs,uint op,uint reg,targ_size_t offset,
623             regm_t keepmsk,regm_t desmsk)
624 {
625     code* c, cg, cd;
626 
627     debug
628     if (debugw)
629         printf("loadea: e=%p cs=%p op=x%x reg=%s offset=%lld keepmsk=%s desmsk=%s\n",
630                e, cs, op, regstring[reg], cast(ulong)offset, regm_str(keepmsk), regm_str(desmsk));
631     assert(e);
632     cs.Iflags = 0;
633     cs.Irex = 0;
634     cs.Iop = op;
635     tym_t tym = e.Ety;
636     int sz = tysize(tym);
637 
638     /* Determine if location we want to get is in a register. If so,      */
639     /* substitute the register for the EA.                                */
640     /* Note that operators don't go through this. CSE'd operators are     */
641     /* picked up by comsub().                                             */
642     if (e.Ecount &&                      /* if cse                       */
643         e.Ecount != e.Ecomsub &&        /* and cse was generated        */
644         op != LEA && op != 0xC4 &&        /* and not an LEA or LES        */
645         (op != 0xFF || reg != 3) &&       /* and not CALLF MEM16          */
646         (op & 0xFFF8) != 0xD8)            // and not 8087 opcode
647     {
648         assert(OTleaf(e.Eoper));                /* can't handle this            */
649         regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs
650         if (op == 0xFF && reg == 6)
651             rm &= ~XMMREGS;             // can't PUSH an XMM register
652         if (sz > REGSIZE)               // value is in 2 or 4 registers
653         {
654             if (I16 && sz == 8)     // value is in 4 registers
655             {
656                 static immutable regm_t[4] rmask = [ mDX,mCX,mBX,mAX ];
657                 rm &= rmask[cast(size_t)(offset >> 1)];
658             }
659             else if (offset)
660                 rm &= mMSW;             /* only high words      */
661             else
662                 rm &= mLSW;             /* only low words       */
663         }
664         for (uint i = 0; rm; i++)
665         {
666             if (mask(i) & rm)
667             {
668                 if (regcon.cse.value[i] == e && // if register has elem
669                     /* watch out for a CWD destroying DX        */
670                    !(i == DX && op == 0xF7 && desmsk & mDX))
671                 {
672                     /* if ES, then it can only be a load    */
673                     if (i == ES)
674                     {
675                         if (op != 0x8B)
676                             break;      // not a load
677                         cs.Iop = 0x8C; /* MOV reg,ES   */
678                         cs.Irm = modregrm(3, 0, reg & 7);
679                         if (reg & 8)
680                             code_orrex(cs, REX_B);
681                     }
682                     else    // XXX reg,i
683                     {
684                         cs.Irm = modregrm(3, reg & 7, i & 7);
685                         if (reg & 8)
686                             cs.Irex |= REX_R;
687                         if (i & 8)
688                             cs.Irex |= REX_B;
689                         if (sz == 1 && I64 && (i >= 4 || reg >= 4))
690                             cs.Irex |= REX;
691                         if (I64 && (sz == 8 || sz == 16))
692                             cs.Irex |= REX_W;
693                     }
694                     goto L2;
695                 }
696                 rm &= ~mask(i);
697             }
698         }
699     }
700 
701     getlvalue(cdb, cs, e, keepmsk);
702     if (offset == REGSIZE)
703         getlvalue_msw(cs);
704     else
705         cs.IEV1.Voffset += offset;
706     if (I64)
707     {
708         if (reg >= 4 && sz == 1)               // if byte register
709             // Can only address those 8 bit registers if a REX byte is present
710             cs.Irex |= REX;
711         if ((op & 0xFFFFFFF8) == 0xD8)
712             cs.Irex &= ~REX_W;                 // not needed for x87 ops
713         if (mask(reg) & XMMREGS &&
714             (op == LODSD || op == STOSD))
715             cs.Irex &= ~REX_W;                 // not needed for xmm ops
716     }
717     code_newreg(cs, reg);                         // OR in reg field
718     if (!I16)
719     {
720         if (reg == 6 && op == 0xFF ||             /* don't PUSH a word    */
721             op == MOVZXw || op == MOVSXw ||       /* MOVZX/MOVSX          */
722             (op & 0xFFF8) == 0xD8 ||              /* 8087 instructions    */
723             op == LEA)                            /* LEA                  */
724         {
725             cs.Iflags &= ~CFopsize;
726             if (reg == 6 && op == 0xFF)         // if PUSH
727                 cs.Irex &= ~REX_W;             // REX is ignored for PUSH anyway
728         }
729     }
730     else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT())
731         cs.Iflags |= CFwait;
732 L2:
733     getregs(cdb, desmsk);                  // save any regs we destroy
734 
735     /* KLUDGE! fix up DX for divide instructions */
736     if (op == 0xF7 && desmsk == (mAX|mDX))        /* if we need to fix DX */
737     {
738         if (reg == 7)                           /* if IDIV              */
739         {
740             cdb.gen1(0x99);                     // CWD
741             if (I64 && sz == 8)
742                 code_orrex(cdb.last(), REX_W);
743         }
744         else if (reg == 6)                      // if DIV
745             genregs(cdb, 0x33, DX, DX);        // XOR DX,DX
746     }
747 
748     // Eliminate MOV reg,reg
749     if ((cs.Iop & ~3) == 0x88 &&
750         (cs.Irm & 0xC7) == modregrm(3,0,reg & 7))
751     {
752         uint r = cs.Irm & 7;
753         if (cs.Irex & REX_B)
754             r |= 8;
755         if (r == reg)
756             cs.Iop = NOP;
757     }
758 
759     // Eliminate MOV xmmreg,xmmreg
760     if ((cs.Iop & ~(LODSD ^ STOSS)) == LODSD &&    // detect LODSD, LODSS, STOSD, STOSS
761         (cs.Irm & 0xC7) == modregrm(3,0,reg & 7))
762     {
763         reg_t r = cs.Irm & 7;
764         if (cs.Irex & REX_B)
765             r |= 8;
766         if (r == (reg - XMM0))
767             cs.Iop = NOP;
768     }
769 
770     cdb.gen(cs);
771 }
772 
773 
774 /**************************
775  * Get addressing mode.
776  */
777 
778 @trusted
779 uint getaddrmode(regm_t idxregs)
780 {
781     uint mode;
782 
783     if (I16)
784     {
785         static ubyte error() { assert(0); }
786 
787         mode =  (idxregs & mBX) ? modregrm(2,0,7) :     /* [BX] */
788                 (idxregs & mDI) ? modregrm(2,0,5):      /* [DI] */
789                 (idxregs & mSI) ? modregrm(2,0,4):      /* [SI] */
790                                   error();
791     }
792     else
793     {
794         const reg = findreg(idxregs & (ALLREGS | mBP));
795         if (reg == R12)
796             mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4);
797         else
798             mode = modregrmx(2,0,reg);
799     }
800     return mode;
801 }
802 
803 void setaddrmode(code *c, regm_t idxregs)
804 {
805     uint mode = getaddrmode(idxregs);
806     c.Irm = mode & 0xFF;
807     c.Isib = (mode >> 8) & 0xFF;
808     c.Irex &= ~REX_B;
809     c.Irex |= mode >> 16;
810 }
811 
812 /**********************************************
813  */
814 
815 @trusted
816 void getlvalue_msw(code *c)
817 {
818     if (c.IFL1 == FLreg)
819     {
820         const regmsw = c.IEV1.Vsym.Sregmsw;
821         c.Irm = (c.Irm & ~7) | (regmsw & 7);
822         if (regmsw & 8)
823             c.Irex |= REX_B;
824         else
825             c.Irex &= ~REX_B;
826     }
827     else
828         c.IEV1.Voffset += REGSIZE;
829 }
830 
831 /**********************************************
832  */
833 
834 @trusted
835 void getlvalue_lsw(code *c)
836 {
837     if (c.IFL1 == FLreg)
838     {
839         const reglsw = c.IEV1.Vsym.Sreglsw;
840         c.Irm = (c.Irm & ~7) | (reglsw & 7);
841         if (reglsw & 8)
842             c.Irex |= REX_B;
843         else
844             c.Irex &= ~REX_B;
845     }
846     else
847         c.IEV1.Voffset -= REGSIZE;
848 }
849 
850 /******************
851  * Compute addressing mode.
852  * Generate & return sequence of code (if any).
853  * Return in cs the info on it.
854  * Input:
855  *      pcs ->  where to store data about addressing mode
856  *      e ->    the lvalue elem
857  *      keepmsk mask of registers we must not destroy or use
858  *              if (keepmsk & RMstore), this will be only a store operation
859  *              into the lvalue
860  *              if (keepmsk & RMload), this will be a read operation only
861  */
862 
863 @trusted
864 void getlvalue(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk)
865 {
866     FL fl;
867     uint f, opsave;
868     elem* e1, e11, e12;
869     bool e1isadd, e1free;
870     reg_t reg;
871     tym_t e1ty;
872     Symbol* s;
873 
874     //printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk));
875     //elem_print(e);
876     assert(e);
877     elem_debug(e);
878     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
879     {
880         s = e.EV.Vsym;
881         fl = s.Sfl;
882         if (tyfloating(s.ty()))
883             objmod.fltused();
884     }
885     else
886         fl = FLoper;
887     pcs.IFL1 = cast(ubyte)fl;
888     pcs.Iflags = CFoff;                  /* only want offsets            */
889     pcs.Irex = 0;
890     pcs.IEV1.Voffset = 0;
891 
892     tym_t ty = e.Ety;
893     uint sz = tysize(ty);
894     if (tyfloating(ty))
895         objmod.fltused();
896     if (I64 && (sz == 8 || sz == 16) && !tyvector(ty))
897         pcs.Irex |= REX_W;
898     if (!I16 && sz == SHORTSIZE)
899         pcs.Iflags |= CFopsize;
900     if (ty & mTYvolatile)
901         pcs.Iflags |= CFvolatile;
902 
903     switch (fl)
904     {
905         case FLoper:
906             debug
907             if (debugw) printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk));
908 
909             switch (e.Eoper)
910             {
911                 case OPadd:                 // this way when we want to do LEA
912                     e1 = e;
913                     e1free = false;
914                     e1isadd = true;
915                     break;
916 
917                 case OPind:
918                 case OPpostinc:             // when doing (*p++ = ...)
919                 case OPpostdec:             // when doing (*p-- = ...)
920                 case OPbt:
921                 case OPbtc:
922                 case OPbtr:
923                 case OPbts:
924                 case OPvecfill:
925                     e1 = e.EV.E1;
926                     e1free = true;
927                     e1isadd = e1.Eoper == OPadd;
928                     break;
929 
930                 default:
931                     printf("function: %s\n", funcsym_p.Sident.ptr);
932                     elem_print(e);
933                     assert(0);
934             }
935             e1ty = tybasic(e1.Ety);
936             if (e1isadd)
937             {
938                 e12 = e1.EV.E2;
939                 e11 = e1.EV.E1;
940             }
941 
942             /* First see if we can replace *(e+&v) with
943              *      MOV     idxreg,e
944              *      EA =    [ES:] &v+idxreg
945              */
946             f = FLconst;
947 
948             /* Is address of `s` relative to RIP ?
949              */
950             static bool relativeToRIP(Symbol* s)
951             {
952                 if (!I64)
953                     return false;
954                 if (config.exe == EX_WIN64)
955                     return true;
956                 if (config.flags3 & CFG3pie)
957                 {
958                     if (s.Sfl == FLtlsdata || s.ty() & mTYthread)
959                     {
960                         if (s.Sclass == SC.global || s.Sclass == SC.static_ || s.Sclass == SC.locstat)
961                             return false;
962                     }
963                     return true;
964                 }
965                 else
966                     return (config.flags3 & CFG3pic) != 0;
967             }
968 
969             if (e1isadd &&
970                 ((e12.Eoper == OPrelconst &&
971                   !relativeToRIP(e12.EV.Vsym) &&
972                   (f = el_fl(e12)) != FLfardata
973                  ) ||
974                  (e12.Eoper == OPconst && !I16 && !e1.Ecount && (!I64 || el_signx32(e12)))) &&
975                 e1.Ecount == e1.Ecomsub &&
976                 (!e1.Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) &&
977                 tysize(e11.Ety) == REGSIZE
978                )
979             {
980                 uint t;            /* component of r/m field */
981                 int ss;
982                 int ssi;
983 
984                 if (e12.Eoper == OPrelconst)
985                     f = el_fl(e12);
986                 /*assert(datafl[f]);*/              /* what if addr of func? */
987                 if (!I16)
988                 {   /* Any register can be an index register        */
989                     regm_t idxregs = allregs & ~keepmsk;
990                     assert(idxregs);
991 
992                     /* See if e1.EV.E1 can be a scaled index  */
993                     ss = isscaledindex(e11);
994                     if (ss)
995                     {
996                         /* Load index register with result of e11.EV.E1       */
997                         cdisscaledindex(cdb, e11, &idxregs, keepmsk);
998                         reg = findreg(idxregs);
999                         {
1000                             t = stackfl[f] ? 2 : 0;
1001                             pcs.Irm = modregrm(t, 0, 4);
1002                             pcs.Isib = modregrm(ss, reg & 7, 5);
1003                             if (reg & 8)
1004                                 pcs.Irex |= REX_X;
1005                         }
1006                     }
1007                     else if ((e11.Eoper == OPmul || e11.Eoper == OPshl) &&
1008                              !e11.Ecount &&
1009                              e11.EV.E2.Eoper == OPconst &&
1010                              (ssi = ssindex(e11.Eoper, e11.EV.E2.EV.Vuns)) != 0
1011                             )
1012                     {
1013                         regm_t scratchm;
1014 
1015                         char ssflags = ssindex_array[ssi].ssflags;
1016                         if (ssflags & SSFLnobp && stackfl[f])
1017                             goto L6;
1018 
1019                         // Load index register with result of e11.EV.E1
1020                         scodelem(cdb, e11.EV.E1, &idxregs, keepmsk, true);
1021                         reg = findreg(idxregs);
1022 
1023                         int ss1 = ssindex_array[ssi].ss1;
1024                         if (ssflags & SSFLlea)
1025                         {
1026                             assert(!stackfl[f]);
1027                             pcs.Irm = modregrm(2,0,4);
1028                             pcs.Isib = modregrm(ss1, reg & 7, reg & 7);
1029                             if (reg & 8)
1030                                 pcs.Irex |= REX_X | REX_B;
1031                         }
1032                         else
1033                         {
1034                             int rbase;
1035                             reg_t r;
1036 
1037                             scratchm = ALLREGS & ~keepmsk;
1038                             allocreg(cdb, &scratchm, &r, TYint);
1039 
1040                             if (ssflags & SSFLnobase1)
1041                             {
1042                                 t = 0;
1043                                 rbase = 5;
1044                             }
1045                             else
1046                             {
1047                                 t = 0;
1048                                 rbase = reg;
1049                                 if (rbase == BP || rbase == R13)
1050                                 {
1051                                     static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1];
1052 
1053                                     // IMUL r,BP,imm32
1054                                     cdb.genc2(0x69, modregxrmx(3, r, rbase), imm32[ss1]);
1055                                     goto L7;
1056                                 }
1057                             }
1058 
1059                             cdb.gen2sib(LEA, modregxrm(t, r, 4), modregrm(ss1, reg & 7 ,rbase & 7));
1060                             if (reg & 8)
1061                                 code_orrex(cdb.last(), REX_X);
1062                             if (rbase & 8)
1063                                 code_orrex(cdb.last(), REX_B);
1064                             if (I64)
1065                                 code_orrex(cdb.last(), REX_W);
1066 
1067                             if (ssflags & SSFLnobase1)
1068                             {
1069                                 cdb.last().IFL1 = FLconst;
1070                                 cdb.last().IEV1.Vuns = 0;
1071                             }
1072                         L7:
1073                             if (ssflags & SSFLnobase)
1074                             {
1075                                 t = stackfl[f] ? 2 : 0;
1076                                 rbase = 5;
1077                             }
1078                             else
1079                             {
1080                                 t = 2;
1081                                 rbase = r;
1082                                 assert(rbase != BP);
1083                             }
1084                             pcs.Irm = modregrm(t, 0, 4);
1085                             pcs.Isib = modregrm(ssindex_array[ssi].ss2, r & 7, rbase & 7);
1086                             if (r & 8)
1087                                 pcs.Irex |= REX_X;
1088                             if (rbase & 8)
1089                                 pcs.Irex |= REX_B;
1090                         }
1091                         freenode(e11.EV.E2);
1092                         freenode(e11);
1093                     }
1094                     else
1095                     {
1096                      L6:
1097                         /* Load index register with result of e11   */
1098                         scodelem(cdb, e11, &idxregs, keepmsk, true);
1099                         setaddrmode(pcs, idxregs);
1100                         if (stackfl[f])             /* if we need [EBP] too */
1101                         {
1102                             uint idx = pcs.Irm & 7;
1103                             if (pcs.Irex & REX_B)
1104                                 pcs.Irex = (pcs.Irex & ~REX_B) | REX_X;
1105                             pcs.Isib = modregrm(0, idx, BP);
1106                             pcs.Irm = modregrm(2, 0, 4);
1107                         }
1108                     }
1109                 }
1110                 else
1111                 {
1112                     regm_t idxregs = IDXREGS & ~keepmsk;   /* only these can be index regs */
1113                     assert(idxregs);
1114                     if (stackfl[f])                 /* if stack data type   */
1115                     {
1116                         idxregs &= mSI | mDI;       /* BX can't index off stack */
1117                         if (!idxregs) goto L1;      /* index regs aren't avail */
1118                         t = 6;                      /* [BP+SI+disp]         */
1119                     }
1120                     else
1121                         t = 0;                      /* [SI + disp]          */
1122                     scodelem(cdb, e11, &idxregs, keepmsk, true); // load idx reg
1123                     pcs.Irm = cast(ubyte)(getaddrmode(idxregs) ^ t);
1124                 }
1125                 if (f == FLpara)
1126                     refparam = true;
1127                 else if (f == FLauto || f == FLbprel || f == FLfltreg || f == FLfast)
1128                     reflocal = true;
1129                 else if (f == FLcsdata || tybasic(e12.Ety) == TYcptr)
1130                     pcs.Iflags |= CFcs;
1131                 else
1132                     assert(f != FLreg);
1133                 pcs.IFL1 = cast(ubyte)f;
1134                 if (f != FLconst)
1135                     pcs.IEV1.Vsym = e12.EV.Vsym;
1136                 pcs.IEV1.Voffset = e12.EV.Voffset; /* += ??? */
1137 
1138                 /* If e1 is a CSE, we must generate an addressing mode      */
1139                 /* but also leave EA in registers so others can use it      */
1140                 if (e1.Ecount)
1141                 {
1142                     uint flagsave;
1143 
1144                     regm_t idxregs = IDXREGS & ~keepmsk;
1145                     allocreg(cdb, &idxregs, &reg, TYoffset);
1146 
1147                     /* If desired result is a far pointer, we'll have       */
1148                     /* to load another register with the segment of v       */
1149                     if (e1ty == TYfptr)
1150                     {
1151                         reg_t msreg;
1152 
1153                         idxregs |= mMSW & ALLREGS & ~keepmsk;
1154                         allocreg(cdb, &idxregs, &msreg, TYfptr);
1155                         msreg = findregmsw(idxregs);
1156                                                     /* MOV msreg,segreg     */
1157                         genregs(cdb, 0x8C, segfl[f], msreg);
1158                     }
1159                     opsave = pcs.Iop;
1160                     flagsave = pcs.Iflags;
1161                     ubyte rexsave = pcs.Irex;
1162                     pcs.Iop = LEA;
1163                     code_newreg(pcs, reg);
1164                     if (!I16)
1165                         pcs.Iflags &= ~CFopsize;
1166                     if (I64)
1167                         pcs.Irex |= REX_W;
1168                     cdb.gen(pcs);                 // LEA idxreg,EA
1169                     cssave(e1,idxregs,true);
1170                     if (!I16)
1171                     {
1172                         pcs.Iflags = flagsave;
1173                         pcs.Irex = rexsave;
1174                     }
1175                     if (stackfl[f] && (config.wflags & WFssneds))   // if pointer into stack
1176                         pcs.Iflags |= CFss;        // add SS: override
1177                     pcs.Iop = opsave;
1178                     pcs.IFL1 = FLoffset;
1179                     pcs.IEV1.Vuns = 0;
1180                     setaddrmode(pcs, idxregs);
1181                 }
1182                 freenode(e12);
1183                 if (e1free)
1184                     freenode(e1);
1185                 goto Lptr;
1186             }
1187 
1188             L1:
1189 
1190             /* The rest of the cases could be a far pointer */
1191 
1192             regm_t idxregs;
1193             idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs
1194             assert(idxregs);
1195             if (!I16 &&
1196                 (sz == REGSIZE || (I64 && sz == 4)) &&
1197                 keepmsk & RMstore)
1198                 idxregs |= regcon.mvar;
1199 
1200             switch (e1ty)
1201             {
1202                 case TYfptr:                        /* if far pointer       */
1203                 case TYhptr:
1204                     idxregs = (mES | IDXREGS) & ~keepmsk;   // need segment too
1205                     assert(idxregs & mES);
1206                     pcs.Iflags |= CFes;            /* ES segment override  */
1207                     break;
1208 
1209                 case TYsptr:                        /* if pointer to stack  */
1210                     if (config.wflags & WFssneds)   // if SS != DS
1211                         pcs.Iflags |= CFss;        /* then need SS: override */
1212                     break;
1213 
1214                 case TYfgPtr:
1215                     if (I32)
1216                         pcs.Iflags |= CFgs;
1217                     else if (I64)
1218                         pcs.Iflags |= CFfs;
1219                     else
1220                         assert(0);
1221                     break;
1222 
1223                 case TYcptr:                        /* if pointer to code   */
1224                     pcs.Iflags |= CFcs;            /* then need CS: override */
1225                     break;
1226 
1227                 default:
1228                     break;
1229             }
1230             pcs.IFL1 = FLoffset;
1231             pcs.IEV1.Vuns = 0;
1232 
1233             /* see if we can replace *(e+c) with
1234              *      MOV     idxreg,e
1235              *      [MOV    ES,segment]
1236              *      EA =    [ES:]c[idxreg]
1237              */
1238             if (e1isadd && e12.Eoper == OPconst &&
1239                 (!I64 || el_signx32(e12)) &&
1240                 (tysize(e12.Ety) == REGSIZE || (I64 && tysize(e12.Ety) == 4)) &&
1241                 (!e1.Ecount || !e1free)
1242                )
1243             {
1244                 int ss;
1245 
1246                 pcs.IEV1.Vuns = e12.EV.Vuns;
1247                 freenode(e12);
1248                 if (e1free) freenode(e1);
1249                 if (!I16 && e11.Eoper == OPadd && !e11.Ecount &&
1250                     tysize(e11.Ety) == REGSIZE)
1251                 {
1252                     e12 = e11.EV.E2;
1253                     e11 = e11.EV.E1;
1254                     e1 = e1.EV.E1;
1255                     e1free = true;
1256                     goto L4;
1257                 }
1258                 if (!I16 && (ss = isscaledindex(e11)) != 0)
1259                 {   // (v * scale) + const
1260                     cdisscaledindex(cdb, e11, &idxregs, keepmsk);
1261                     reg = findreg(idxregs);
1262                     pcs.Irm = modregrm(0, 0, 4);
1263                     pcs.Isib = modregrm(ss, reg & 7, 5);
1264                     if (reg & 8)
1265                         pcs.Irex |= REX_X;
1266                 }
1267                 else
1268                 {
1269                     scodelem(cdb, e11, &idxregs, keepmsk, true); // load index reg
1270                     setaddrmode(pcs, idxregs);
1271                 }
1272                 goto Lptr;
1273             }
1274 
1275             /* Look for *(v1 + v2)
1276              *      EA = [v1][v2]
1277              */
1278 
1279             if (!I16 && e1isadd && (!e1.Ecount || !e1free) &&
1280                 (_tysize[e1ty] == REGSIZE || (I64 && _tysize[e1ty] == 4)))
1281             {
1282             L4:
1283                 regm_t idxregs2;
1284                 uint base, index;
1285 
1286                 // Look for *(v1 + v2 << scale)
1287                 int ss = isscaledindex(e12);
1288                 if (ss)
1289                 {
1290                     scodelem(cdb, e11, &idxregs, keepmsk, true);
1291                     idxregs2 = allregs & ~(idxregs | keepmsk);
1292                     cdisscaledindex(cdb, e12, &idxregs2, keepmsk | idxregs);
1293                 }
1294 
1295                 // Look for *(v1 << scale + v2)
1296                 else if ((ss = isscaledindex(e11)) != 0)
1297                 {
1298                     idxregs2 = idxregs;
1299                     cdisscaledindex(cdb, e11, &idxregs2, keepmsk);
1300                     idxregs = allregs & ~(idxregs2 | keepmsk);
1301                     scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true);
1302                 }
1303                 // Look for *(((v1 << scale) + c1) + v2)
1304                 else if (e11.Eoper == OPadd && !e11.Ecount &&
1305                          e11.EV.E2.Eoper == OPconst &&
1306                          (ss = isscaledindex(e11.EV.E1)) != 0
1307                         )
1308                 {
1309                     pcs.IEV1.Vuns = e11.EV.E2.EV.Vuns;
1310                     idxregs2 = idxregs;
1311                     cdisscaledindex(cdb, e11.EV.E1, &idxregs2, keepmsk);
1312                     idxregs = allregs & ~(idxregs2 | keepmsk);
1313                     scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true);
1314                     freenode(e11.EV.E2);
1315                     freenode(e11);
1316                 }
1317                 else
1318                 {
1319                     scodelem(cdb, e11, &idxregs, keepmsk, true);
1320                     idxregs2 = allregs & ~(idxregs | keepmsk);
1321                     scodelem(cdb, e12, &idxregs2, keepmsk | idxregs, true);
1322                 }
1323                 base = findreg(idxregs);
1324                 index = findreg(idxregs2);
1325                 pcs.Irm  = modregrm(2, 0, 4);
1326                 pcs.Isib = modregrm(ss, index & 7, base & 7);
1327                 if (index & 8)
1328                     pcs.Irex |= REX_X;
1329                 if (base & 8)
1330                     pcs.Irex |= REX_B;
1331                 if (e1free)
1332                     freenode(e1);
1333 
1334                 goto Lptr;
1335             }
1336 
1337             /* give up and replace *e1 with
1338              *      MOV     idxreg,e
1339              *      EA =    0[idxreg]
1340              * pinholeopt() will usually correct the 0, we need it in case
1341              * we have a pointer to a long and need an offset to the second
1342              * word.
1343              */
1344 
1345             assert(e1free);
1346             scodelem(cdb, e1, &idxregs, keepmsk, true);  // load index register
1347             setaddrmode(pcs, idxregs);
1348         Lptr:
1349             if (config.flags3 & CFG3ptrchk)
1350                 cod3_ptrchk(cdb, pcs, keepmsk);        // validate pointer code
1351             break;
1352 
1353         case FLdatseg:
1354             assert(0);
1355         static if (0)
1356         {
1357             pcs.Irm = modregrm(0, 0, BPRM);
1358             pcs.IEVpointer1 = e.EVpointer;
1359             break;
1360         }
1361 
1362         case FLfltreg:
1363             reflocal = true;
1364             pcs.Irm = modregrm(2, 0, BPRM);
1365             pcs.IEV1.Vint = 0;
1366             break;
1367 
1368         case FLreg:
1369             goto L2;
1370 
1371         case FLpara:
1372             if (s.Sclass == SC.shadowreg)
1373                 goto case FLfast;
1374         Lpara:
1375             refparam = true;
1376             pcs.Irm = modregrm(2, 0, BPRM);
1377             goto L2;
1378 
1379         case FLauto:
1380         case FLfast:
1381             if (regParamInPreg(s))
1382             {
1383                 regm_t pregm = s.Spregm();
1384                 /* See if the parameter is still hanging about in a register,
1385                  * and so can we load from that register instead.
1386                  */
1387                 if (regcon.params & pregm /*&& s.Spreg2 == NOREG && !(pregm & XMMREGS)*/)
1388                 {
1389                     if (keepmsk & RMload && !anyiasm)
1390                     {
1391                         auto voffset = e.EV.Voffset;
1392                         if (sz <= REGSIZE)
1393                         {
1394                             const reg_t preg = (voffset >= REGSIZE) ? s.Spreg2 : s.Spreg;
1395                             if (voffset >= REGSIZE)
1396                                 voffset -= REGSIZE;
1397 
1398                             /* preg could be NOREG if it's a variadic function and we're
1399                              * in Win64 shadow regs and we're offsetting to get to the start
1400                              * of the variadic args.
1401                              */
1402                             if (preg != NOREG && regcon.params & mask(preg))
1403                             {
1404                                 //printf("sz %d, preg %s, Voffset %d\n", cast(int)sz, regm_str(mask(preg)), cast(int)voffset);
1405                                 if (mask(preg) & XMMREGS)
1406                                 {
1407                                     /* The following fails with this from std.math on Linux64:
1408                                         void main()
1409                                         {
1410                                             alias T = float;
1411                                             T x = T.infinity;
1412                                             T e = T.infinity;
1413                                             int eptr;
1414                                             T v = frexp(x, eptr);
1415                                             assert(isIdentical(e, v));
1416                                         }
1417                                      */
1418                                 }
1419                                 else if (voffset == 0)
1420                                 {
1421                                     pcs.Irm = modregrm(3, 0, preg & 7);
1422                                     if (preg & 8)
1423                                         pcs.Irex |= REX_B;
1424                                     if (I64 && sz == 1 && preg >= 4)
1425                                         pcs.Irex |= REX;
1426                                     regcon.used |= mask(preg);
1427                                     break;
1428                                 }
1429                                 else if (voffset == 1 && sz == 1 && preg < 4)
1430                                 {
1431                                     pcs.Irm = modregrm(3, 0, 4 | preg); // use H register
1432                                     regcon.used |= mask(preg);
1433                                     break;
1434                                 }
1435                             }
1436                         }
1437                     }
1438                     else
1439                         regcon.params &= ~pregm;
1440                 }
1441             }
1442             if (s.Sclass == SC.shadowreg)
1443                 goto Lpara;
1444             goto case FLbprel;
1445 
1446         case FLbprel:
1447             reflocal = true;
1448             pcs.Irm = modregrm(2, 0, BPRM);
1449             goto L2;
1450 
1451         case FLextern:
1452             if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0)
1453             {
1454                 if (config.exe & EX_windos)
1455                 {
1456                     if (I64)
1457                     {   // GS:[88]
1458                         pcs.Irm = modregrm(0, 0, 4);
1459                         pcs.Isib = modregrm(0, 4, 5);  // don't use [RIP] addressing
1460                         pcs.IFL1 = FLconst;
1461                         pcs.IEV1.Vuns = 88;
1462                         pcs.Iflags = CFgs;
1463                         pcs.Irex |= REX_W;
1464                         break;
1465                     }
1466                     else
1467                     {
1468                         pcs.Iflags |= CFfs;    // add FS: override
1469                     }
1470                 }
1471                 else if (config.exe & (EX_OSX | EX_OSX64))
1472                 {
1473                 }
1474                 else if (config.exe & EX_posix)
1475                     assert(0);
1476             }
1477             if (s.ty() & mTYcs && cast(bool) LARGECODE)
1478                 goto Lfardata;
1479             goto L3;
1480 
1481         case FLtlsdata:
1482             if (config.exe & EX_posix)
1483                 goto L3;
1484             assert(0);
1485 
1486         case FLdata:
1487         case FLudata:
1488         case FLcsdata:
1489         case FLgot:
1490         case FLgotoff:
1491         L3:
1492             pcs.Irm = modregrm(0, 0, BPRM);
1493         L2:
1494             if (fl == FLreg)
1495             {
1496                 //printf("test: FLreg, %s %d regcon.mvar = %s\n",
1497                 // s.Sident.ptr, cast(int)e.EV.Voffset, regm_str(regcon.mvar));
1498                 if (!(s.Sregm & regcon.mvar))
1499                     symbol_print(s);
1500                 assert(s.Sregm & regcon.mvar);
1501 
1502                 /* Attempting to paint a float as an integer or an integer as a float
1503                  * will cause serious problems since the EA is loaded separatedly from
1504                  * the opcode. The only way to deal with this is to prevent enregistering
1505                  * such variables.
1506                  */
1507                 if (tyxmmreg(ty) && !(s.Sregm & XMMREGS) ||
1508                     !tyxmmreg(ty) && (s.Sregm & XMMREGS))
1509                     cgreg_unregister(s.Sregm);
1510 
1511                 if (
1512                     s.Sclass == SC.regpar ||
1513                     s.Sclass == SC.parameter)
1514                 {   refparam = true;
1515                     reflocal = true;        // kludge to set up prolog
1516                 }
1517                 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7);
1518                 if (s.Sreglsw & 8)
1519                     pcs.Irex |= REX_B;
1520                 if (e.EV.Voffset == REGSIZE && sz == REGSIZE)
1521                 {
1522                     pcs.Irm = modregrm(3, 0, s.Sregmsw & 7);
1523                     if (s.Sregmsw & 8)
1524                         pcs.Irex |= REX_B;
1525                     else
1526                         pcs.Irex &= ~REX_B;
1527                 }
1528                 else if (e.EV.Voffset == 1 && sz == 1)
1529                 {
1530                     assert(s.Sregm & BYTEREGS);
1531                     assert(s.Sreglsw < 4);
1532                     pcs.Irm |= 4;                  // use 2nd byte of register
1533                 }
1534                 else
1535                 {
1536                     assert(!e.EV.Voffset);
1537                     if (I64 && sz == 1 && s.Sreglsw >= 4)
1538                         pcs.Irex |= REX;
1539                 }
1540             }
1541             else if (s.ty() & mTYcs && !(fl == FLextern && LARGECODE))
1542             {
1543                 pcs.Iflags |= CFcs | CFoff;
1544             }
1545             if (config.flags3 & CFG3pic &&
1546                 (fl == FLtlsdata || s.ty() & mTYthread))
1547             {
1548                 if (I32)
1549                 {
1550                     if (config.flags3 & CFG3pie)
1551                     {
1552                         pcs.Iflags |= CFgs;
1553                     }
1554                 }
1555                 else if (I64)
1556                 {
1557                     if (config.flags3 & CFG3pie &&
1558                         (s.Sclass == SC.global || s.Sclass == SC.static_ || s.Sclass == SC.locstat))
1559                     {
1560                         pcs.Iflags |= CFfs;
1561                         pcs.Irm = modregrm(0, 0, 4);
1562                         pcs.Isib = modregrm(0, 4, 5);  // don't use [RIP] addressing
1563                     }
1564                     else
1565                     {
1566                         //pcs.Iflags |= CFopsize; //I don't know what this was for
1567                         pcs.Irex = 0x48;
1568                     }
1569                 }
1570             }
1571             pcs.IEV1.Vsym = s;
1572             pcs.IEV1.Voffset = e.EV.Voffset;
1573             if (sz == 1)
1574             {   /* Don't use SI or DI for this variable     */
1575                 s.Sflags |= GTbyte;
1576                 if (I64 ? e.EV.Voffset > 0 : e.EV.Voffset > 1)
1577                 {
1578                     debug if (debugr) printf("'%s' not reg cand due to byte offset\n", s.Sident.ptr);
1579                     s.Sflags &= ~GTregcand;
1580                 }
1581             }
1582             else if (sz == 2 && tyxmmreg(s.ty()) && config.fpxmmregs)
1583             {
1584                 debug if (debugr) printf("'%s' not XMM reg cand due to short access\n", s.Sident.ptr);
1585                 s.Sflags &= ~GTregcand;
1586             }
1587             else if (e.EV.Voffset || sz > tysize(s.Stype.Tty))
1588             {
1589                 debug if (debugr) printf("'%s' not reg cand due to offset or size\n", s.Sident.ptr);
1590                 s.Sflags &= ~GTregcand;
1591             }
1592             else if (tyvector(s.Stype.Tty) && sz < tysize(s.Stype.Tty))
1593             {
1594                 // https://issues.dlang.org/show_bug.cgi?id=21673
1595                 // https://issues.dlang.org/show_bug.cgi?id=21676
1596                 // https://issues.dlang.org/show_bug.cgi?id=23009
1597                 // PR: https://github.com/dlang/dmd/pull/13977
1598                 // cannot read or write to partial vector
1599                 debug if (debugr) printf("'%s' not reg cand due to vector type\n", s.Sident.ptr);
1600                 s.Sflags &= ~GTregcand;
1601             }
1602 
1603             if (config.fpxmmregs && tyfloating(s.ty()) && !tyfloating(ty))
1604             {
1605                 debug if (debugr) printf("'%s' not reg cand due to mix float and int\n", s.Sident.ptr);
1606                 // Can't successfully mix XMM register variables accessed as integers
1607                 s.Sflags &= ~GTregcand;
1608             }
1609 
1610             if (!(keepmsk & RMstore))               // if not store only
1611                 s.Sflags |= SFLread;               // assume we are doing a read
1612             break;
1613 
1614         case FLpseudo:
1615             {
1616                 getregs(cdb, mask(s.Sreglsw));
1617                 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7);
1618                 if (s.Sreglsw & 8)
1619                     pcs.Irex |= REX_B;
1620                 if (e.EV.Voffset == 1 && sz == 1)
1621                 {   assert(s.Sregm & BYTEREGS);
1622                     assert(s.Sreglsw < 4);
1623                     pcs.Irm |= 4;                  // use 2nd byte of register
1624                 }
1625                 else
1626                 {   assert(!e.EV.Voffset);
1627                     if (I64 && sz == 1 && s.Sreglsw >= 4)
1628                         pcs.Irex |= REX;
1629                 }
1630                 break;
1631             }
1632 
1633         case FLfardata:
1634         case FLfunc:                                /* reading from code seg */
1635             if (config.exe & EX_flat)
1636                 goto L3;
1637         Lfardata:
1638         {
1639             regm_t regm = ALLREGS & ~keepmsk;       // need scratch register
1640             allocreg(cdb, &regm, &reg, TYint);
1641             getregs(cdb,mES);
1642             // MOV mreg,seg of symbol
1643             cdb.gencs(0xB8 + reg, 0, FLextern, s);
1644             cdb.last().Iflags = CFseg;
1645             cdb.gen2(0x8E, modregrmx(3, 0, reg));     // MOV ES,reg
1646             pcs.Iflags |= CFes | CFoff;            /* ES segment override  */
1647             goto L3;
1648         }
1649 
1650         case FLstack:
1651             assert(!I16);
1652             pcs.Irm = modregrm(2, 0, 4);
1653             pcs.Isib = modregrm(0, 4, SP);
1654             pcs.IEV1.Vsym = s;
1655             pcs.IEV1.Voffset = e.EV.Voffset;
1656             break;
1657 
1658         default:
1659             WRFL(fl);
1660             symbol_print(s);
1661             assert(0);
1662     }
1663 }
1664 
1665 /*****************************
1666  * Given an opcode and EA in cs, generate code
1667  * for each floating register in turn.
1668  * Input:
1669  *      tym     either TYdouble or TYfloat
1670  */
1671 
1672 @trusted
1673 void fltregs(ref CodeBuilder cdb, code* pcs, tym_t tym)
1674 {
1675     assert(!I64);
1676     tym = tybasic(tym);
1677     if (I32)
1678     {
1679         getregs(cdb,(tym == TYfloat) ? mAX : mAX | mDX);
1680         if (tym != TYfloat)
1681         {
1682             pcs.IEV1.Voffset += REGSIZE;
1683             NEWREG(pcs.Irm,DX);
1684             cdb.gen(pcs);
1685             pcs.IEV1.Voffset -= REGSIZE;
1686         }
1687         NEWREG(pcs.Irm,AX);
1688         cdb.gen(pcs);
1689     }
1690     else
1691     {
1692         getregs(cdb,(tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16);
1693         pcs.IEV1.Voffset += (tym == TYfloat) ? 2 : 6;
1694         if (tym == TYfloat)
1695             NEWREG(pcs.Irm, DX);
1696         else
1697             NEWREG(pcs.Irm, AX);
1698         cdb.gen(pcs);
1699         pcs.IEV1.Voffset -= 2;
1700         if (tym == TYfloat)
1701             NEWREG(pcs.Irm, AX);
1702         else
1703             NEWREG(pcs.Irm, BX);
1704         cdb.gen(pcs);
1705         if (tym != TYfloat)
1706         {
1707             pcs.IEV1.Voffset -= 2;
1708             NEWREG(pcs.Irm, CX);
1709             cdb.gen(pcs);
1710             pcs.IEV1.Voffset -= 2;     /* note that exit is with Voffset unaltered */
1711             NEWREG(pcs.Irm, DX);
1712             cdb.gen(pcs);
1713         }
1714     }
1715 }
1716 
1717 
1718 /*****************************
1719  * Given a result in registers, test it for true or false.
1720  * Will fail if TYfptr and the reg is ES!
1721  * If saveflag is true, preserve the contents of the
1722  * registers.
1723  */
1724 
1725 @trusted
1726 void tstresult(ref CodeBuilder cdb, regm_t regm, tym_t tym, uint saveflag)
1727 {
1728     reg_t scrreg;                      // scratch register
1729     regm_t scrregm;
1730 
1731     //if (!(regm & (mBP | ALLREGS)))
1732         //printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n",
1733             //regm_str(regm),tym,saveflag);
1734 
1735     assert(regm & (XMMREGS | mBP | ALLREGS));
1736     tym = tybasic(tym);
1737     reg_t reg = findreg(regm);
1738     uint sz = _tysize[tym];
1739     if (sz == 1)
1740     {
1741         assert(regm & BYTEREGS);
1742         genregs(cdb, 0x84, reg, reg);        // TEST regL,regL
1743         if (I64 && reg >= 4)
1744             code_orrex(cdb.last(), REX);
1745         return;
1746     }
1747     if (regm & XMMREGS)
1748     {
1749         reg_t xreg;
1750         regm_t xregs = XMMREGS & ~regm;
1751         allocreg(cdb,&xregs, &xreg, TYdouble);
1752         opcode_t op = 0;
1753         if (tym == TYdouble || tym == TYidouble || tym == TYcdouble)
1754             op = 0x660000;
1755         cdb.gen2(op | XORPS, modregrm(3, xreg-XMM0, xreg-XMM0));      // XORPS xreg,xreg
1756         cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0));     // UCOMISS xreg,reg
1757         if (tym == TYcfloat || tym == TYcdouble)
1758         {   code *cnop = gennop(null);
1759             genjmp(cdb, JNE, FLcode, cast(block *) cnop); // JNE     L1
1760             genjmp(cdb,  JP, FLcode, cast(block *) cnop); // JP      L1
1761             reg = findreg(regm & ~mask(reg));
1762             cdb.gen2(op | UCOMISS, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg
1763             cdb.append(cnop);
1764         }
1765         return;
1766     }
1767     if (sz <= REGSIZE)
1768     {
1769         if (!I16)
1770         {
1771             if (tym == TYfloat)
1772             {
1773                 if (saveflag)
1774                 {
1775                     scrregm = allregs & ~regm;              // possible scratch regs
1776                     allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg
1777                     genmovreg(cdb, scrreg, reg);  // MOV scrreg,msreg
1778                     reg = scrreg;
1779                 }
1780                 getregs(cdb, mask(reg));
1781                 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1
1782                 return;
1783             }
1784             gentstreg(cdb,reg);                 // TEST reg,reg
1785             if (sz == SHORTSIZE)
1786                 cdb.last().Iflags |= CFopsize;             // 16 bit operands
1787             else if (sz == 8)
1788                 code_orrex(cdb.last(), REX_W);
1789         }
1790         else
1791             gentstreg(cdb, reg);                 // TEST reg,reg
1792         return;
1793     }
1794 
1795     if (saveflag || tyfv(tym))
1796     {
1797     L1:
1798         scrregm = ALLREGS & ~regm;              // possible scratch regs
1799         allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg
1800         if (I32 || sz == REGSIZE * 2)
1801         {
1802             assert(regm & mMSW && regm & mLSW);
1803 
1804             reg = findregmsw(regm);
1805             if (I32)
1806             {
1807                 if (tyfv(tym))
1808                     genregs(cdb, MOVZXw, scrreg, reg); // MOVZX scrreg,msreg
1809                 else
1810                 {
1811                     genmovreg(cdb, scrreg, reg);      // MOV scrreg,msreg
1812                     if (tym == TYdouble || tym == TYdouble_alias)
1813                         cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1
1814                 }
1815             }
1816             else
1817             {
1818                 genmovreg(cdb, scrreg, reg);  // MOV scrreg,msreg
1819                 if (tym == TYfloat)
1820                     cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1
1821             }
1822             reg = findreglsw(regm);
1823             genorreg(cdb, scrreg, reg);           // OR scrreg,lsreg
1824         }
1825         else if (sz == 8)
1826         {
1827             // !I32
1828             genmovreg(cdb, scrreg, AX);           // MOV scrreg,AX
1829             if (tym == TYdouble || tym == TYdouble_alias)
1830                 cdb.gen2(0xD1 ,modregrm(3, 4, scrreg));         // SHL scrreg,1
1831             genorreg(cdb, scrreg, BX);            // OR scrreg,BX
1832             genorreg(cdb, scrreg, CX);            // OR scrreg,CX
1833             genorreg(cdb, scrreg, DX);            // OR scrreg,DX
1834         }
1835         else
1836             assert(0);
1837     }
1838     else
1839     {
1840         if (I32 || sz == REGSIZE * 2)
1841         {
1842             // can't test ES:LSW for 0
1843             assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP));
1844 
1845             reg = findregmsw(regm);
1846             if (regcon.mvar & mask(reg))        // if register variable
1847                 goto L1;                        // don't trash it
1848             getregs(cdb, mask(reg));            // we're going to trash reg
1849             if (tyfloating(tym) && sz == 2 * _tysize[TYint])
1850                 cdb.gen2(0xD1, modregrm(3 ,4, reg));   // SHL reg,1
1851             genorreg(cdb, reg, findreglsw(regm));     // OR reg,reg+1
1852             if (I64)
1853                 code_orrex(cdb.last(), REX_W);
1854        }
1855         else if (sz == 8)
1856         {   assert(regm == DOUBLEREGS_16);
1857             getregs(cdb,mAX);                  // allocate AX
1858             if (tym == TYdouble || tym == TYdouble_alias)
1859                 cdb.gen2(0xD1, modregrm(3, 4, AX));       // SHL AX,1
1860             genorreg(cdb, AX, BX);          // OR AX,BX
1861             genorreg(cdb, AX, CX);          // OR AX,CX
1862             genorreg(cdb, AX, DX);          // OR AX,DX
1863         }
1864         else
1865             assert(0);
1866     }
1867     code_orflag(cdb.last(),CFpsw);
1868 }
1869 
1870 /******************************
1871  * Given the result of an expression is in retregs,
1872  * generate necessary code to return result in *pretregs.
1873  */
1874 
1875 @trusted
1876 void fixresult(ref CodeBuilder cdb, elem *e, regm_t retregs, regm_t *pretregs)
1877 {
1878     //printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs));
1879     if (*pretregs == 0) return;           // if don't want result
1880     assert(e && retregs);                 // need something to work with
1881     regm_t forccs = *pretregs & mPSW;
1882     regm_t forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS);
1883     tym_t tym = tybasic(e.Ety);
1884 
1885     if (tym == TYstruct)
1886     {
1887         if (e.Eoper == OPpair || e.Eoper == OPrpair)
1888         {
1889             if (I64)
1890                 tym = TYucent;
1891             else
1892                 tym = TYullong;
1893         }
1894         else
1895             // Hack to support cdstreq()
1896             tym = (forregs & mMSW) ? TYfptr : TYnptr;
1897     }
1898     int sz = _tysize[tym];
1899 
1900     if (sz == 1)
1901     {
1902         assert(retregs & BYTEREGS);
1903         const reg = findreg(retregs);
1904         if (e.Eoper == OPvar &&
1905             e.EV.Voffset == 1 &&
1906             e.EV.Vsym.Sfl == FLreg)
1907         {
1908             assert(reg < 4);
1909             if (forccs)
1910                 cdb.gen2(0x84, modregrm(3, reg | 4, reg | 4));   // TEST regH,regH
1911             forccs = 0;
1912         }
1913     }
1914 
1915     reg_t reg,rreg;
1916     if ((retregs & forregs) == retregs)   // if already in right registers
1917         *pretregs = retregs;
1918     else if (forregs)             // if return the result in registers
1919     {
1920         if ((forregs | retregs) & (mST01 | mST0))
1921         {
1922             fixresult87(cdb, e, retregs, pretregs);
1923             return;
1924         }
1925         uint opsflag = false;
1926         if (I16 && sz == 8)
1927         {
1928             if (forregs & mSTACK)
1929             {
1930                 assert(retregs == DOUBLEREGS_16);
1931                 // Push floating regs
1932                 cdb.gen1(0x50 + AX);
1933                 cdb.gen1(0x50 + BX);
1934                 cdb.gen1(0x50 + CX);
1935                 cdb.gen1(0x50 + DX);
1936                 stackpush += DOUBLESIZE;
1937             }
1938             else if (retregs & mSTACK)
1939             {
1940                 assert(forregs == DOUBLEREGS_16);
1941                 // Pop floating regs
1942                 getregs(cdb,forregs);
1943                 cdb.gen1(0x58 + DX);
1944                 cdb.gen1(0x58 + CX);
1945                 cdb.gen1(0x58 + BX);
1946                 cdb.gen1(0x58 + AX);
1947                 stackpush -= DOUBLESIZE;
1948                 retregs = DOUBLEREGS_16; // for tstresult() below
1949             }
1950             else
1951             {
1952                 debug
1953                 printf("retregs = %s, forregs = %s\n", regm_str(retregs), regm_str(forregs)),
1954                 assert(0);
1955             }
1956             if (!OTleaf(e.Eoper))
1957                 opsflag = true;
1958         }
1959         else
1960         {
1961             allocreg(cdb, pretregs, &rreg, tym);  // allocate return regs
1962             if (retregs & XMMREGS)
1963             {
1964                 reg = findreg(retregs & XMMREGS);
1965                 if (mask(rreg) & XMMREGS)
1966                     genmovreg(cdb, rreg, reg, tym);
1967                 else
1968                 {
1969                     // MOVSD floatreg, XMM?
1970                     cdb.genxmmreg(xmmstore(tym), reg, 0, tym);
1971                     // MOV rreg,floatreg
1972                     cdb.genfltreg(0x8B,rreg,0);
1973                     if (sz == 8)
1974                     {
1975                         if (I32)
1976                         {
1977                             rreg = findregmsw(*pretregs);
1978                             cdb.genfltreg(0x8B, rreg,4);
1979                         }
1980                         else
1981                             code_orrex(cdb.last(),REX_W);
1982                     }
1983                 }
1984             }
1985             else if (forregs & XMMREGS)
1986             {
1987                 reg = findreg(retregs & (mBP | ALLREGS));
1988                 switch (sz)
1989                 {
1990                     case 4:
1991                         cdb.gen2(LODD, modregxrmx(3, rreg - XMM0, reg)); // MOVD xmm,reg
1992                         break;
1993 
1994                     case 8:
1995                         if (I32)
1996                         {
1997                             cdb.genfltreg(0x89, reg, 0);
1998                             reg = findregmsw(retregs);
1999                             cdb.genfltreg(0x89, reg, 4);
2000                             cdb.genxmmreg(xmmload(tym), rreg, 0, tym); // MOVQ xmm,mem
2001                         }
2002                         else
2003                         {
2004                             cdb.gen2(LODD /* [sic!] */, modregxrmx(3, rreg - XMM0, reg));
2005                             code_orrex(cdb.last(), REX_W); // MOVQ xmm,reg
2006                         }
2007                         break;
2008 
2009                     default:
2010                         assert(false);
2011                 }
2012                 checkSetVex(cdb.last(), tym);
2013             }
2014             else if (sz > REGSIZE)
2015             {
2016                 uint msreg = findregmsw(retregs);
2017                 uint lsreg = findreglsw(retregs);
2018                 uint msrreg = findregmsw(*pretregs);
2019                 uint lsrreg = findreglsw(*pretregs);
2020 
2021                 genmovreg(cdb, msrreg, msreg); // MOV msrreg,msreg
2022                 genmovreg(cdb, lsrreg, lsreg); // MOV lsrreg,lsreg
2023             }
2024             else
2025             {
2026                 assert(!(retregs & XMMREGS));
2027                 assert(!(forregs & XMMREGS));
2028                 reg = findreg(retregs & (mBP | ALLREGS));
2029                 if (I64 && sz <= 4)
2030                     genregs(cdb, 0x89, reg, rreg);  // only move 32 bits, and zero the top 32 bits
2031                 else
2032                     genmovreg(cdb, rreg, reg);    // MOV rreg,reg
2033             }
2034         }
2035         cssave(e,retregs | *pretregs,opsflag);
2036         // Commented out due to Bugzilla 8840
2037         //forregs = 0;    // don't care about result in reg cuz real result is in rreg
2038         retregs = *pretregs & ~mPSW;
2039     }
2040     if (forccs)                           // if return result in flags
2041     {
2042         if (retregs & (mST01 | mST0))
2043         {
2044             *pretregs |= forccs;
2045             fixresult87(cdb, e, retregs, pretregs);
2046         }
2047         else
2048             tstresult(cdb, retregs, tym, forregs);
2049     }
2050 }
2051 
2052 /*******************************
2053  * Extra information about each CLIB runtime library function.
2054  */
2055 
2056 enum
2057 {
2058     INF32         = 1,      /// if 32 bit only
2059     INFfloat      = 2,      /// if this is floating point
2060     INFwkdone     = 4,      /// if weak extern is already done
2061     INF64         = 8,      /// if 64 bit only
2062     INFpushebx    = 0x10,   /// push EBX before load_localgot()
2063     INFpusheabcdx = 0x20,   /// pass EAX/EBX/ECX/EDX on stack, callee does ret 16
2064 }
2065 
2066 struct ClibInfo
2067 {
2068     regm_t retregs16;   /* registers that 16 bit result is returned in  */
2069     regm_t retregs32;   /* registers that 32 bit result is returned in  */
2070     ubyte pop;          // # of bytes popped off of stack upon return
2071     ubyte flags;        /// INFxxx
2072     byte push87;                        // # of pushes onto the 8087 stack
2073     byte pop87;                         // # of pops off of the 8087 stack
2074 }
2075 
2076 __gshared int clib_inited = false;          // true if initialized
2077 
2078 @trusted
2079 Symbol* symboly(const(char)* name, regm_t desregs)
2080 {
2081     Symbol *s = symbol_calloc(name[0 .. strlen(name)]);
2082     s.Stype = tsclib;
2083     s.Sclass = SC.extern_;
2084     s.Sfl = FLfunc;
2085     s.Ssymnum = 0;
2086     s.Sregsaved = ~desregs & (mBP | mES | ALLREGS);
2087     return s;
2088 }
2089 
2090 @trusted
2091 void getClibInfo(uint clib, Symbol** ps, ClibInfo** pinfo)
2092 {
2093     __gshared Symbol*[CLIB.MAX] clibsyms;
2094     __gshared ClibInfo[CLIB.MAX] clibinfo;
2095 
2096     if (!clib_inited)
2097     {
2098         for (size_t i = 0; i < CLIB.MAX; ++i)
2099         {
2100             Symbol* s = clibsyms[i];
2101             if (s)
2102             {
2103                 s.Sxtrnnum = 0;
2104                 s.Stypidx = 0;
2105                 clibinfo[i].flags &= ~INFwkdone;
2106             }
2107         }
2108         clib_inited = true;
2109     }
2110 
2111     const uint ex_unix = (EX_LINUX   | EX_LINUX64   |
2112                           EX_OSX     | EX_OSX64     |
2113                           EX_FREEBSD | EX_FREEBSD64 |
2114                           EX_OPENBSD | EX_OPENBSD64 |
2115                           EX_DRAGONFLYBSD64 |
2116                           EX_SOLARIS | EX_SOLARIS64);
2117 
2118     ClibInfo* cinfo = &clibinfo[clib];
2119     Symbol* s = clibsyms[clib];
2120     if (!s)
2121     {
2122 
2123         switch (clib)
2124         {
2125             case CLIB.lcmp:
2126                 {
2127                     const(char)* name = (config.exe & ex_unix) ? "__LCMP__" : "_LCMP@";
2128                     s = symboly(name, 0);
2129                 }
2130                 break;
2131 
2132             case CLIB.lmul:
2133                 {
2134                     const(char)* name = (config.exe & ex_unix) ? "__LMUL__" : "_LMUL@";
2135                     s = symboly(name, mAX|mCX|mDX);
2136                     cinfo.retregs16 = mDX|mAX;
2137                     cinfo.retregs32 = mDX|mAX;
2138                 }
2139                 break;
2140 
2141             case CLIB.ldiv:
2142                 cinfo.retregs16 = mDX|mAX;
2143                 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD))
2144                 {
2145                     s = symboly("__divdi3", mAX|mBX|mCX|mDX);
2146                     cinfo.flags = INFpushebx;
2147                     cinfo.retregs32 = mDX|mAX;
2148                 }
2149                 else if (config.exe & EX_SOLARIS)
2150                 {
2151                     s = symboly("__LDIV2__", mAX|mBX|mCX|mDX);
2152                     cinfo.flags = INFpushebx;
2153                     cinfo.retregs32 = mDX|mAX;
2154                 }
2155                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2156                 {
2157                     s = symboly("_alldiv", mAX|mBX|mCX|mDX);
2158                     cinfo.flags = INFpusheabcdx;
2159                     cinfo.retregs32 = mDX|mAX;
2160                 }
2161                 else
2162                 {
2163                     const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@";
2164                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2165                     cinfo.retregs32 = mDX|mAX;
2166                 }
2167                 break;
2168 
2169             case CLIB.lmod:
2170                 cinfo.retregs16 = mCX|mBX;
2171                 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD))
2172                 {
2173                     s = symboly("__moddi3", mAX|mBX|mCX|mDX);
2174                     cinfo.flags = INFpushebx;
2175                     cinfo.retregs32 = mDX|mAX;
2176                 }
2177                 else if (config.exe & EX_SOLARIS)
2178                 {
2179                     s = symboly("__LDIV2__", mAX|mBX|mCX|mDX);
2180                     cinfo.flags = INFpushebx;
2181                     cinfo.retregs32 = mCX|mBX;
2182                 }
2183                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2184                 {
2185                     s = symboly("_allrem", mAX|mBX|mCX|mDX);
2186                     cinfo.flags = INFpusheabcdx;
2187                     cinfo.retregs32 = mAX|mDX;
2188                 }
2189                 else
2190                 {
2191                     const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@";
2192                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2193                     cinfo.retregs32 = mCX|mBX;
2194                 }
2195                 break;
2196 
2197             case CLIB.uldiv:
2198                 cinfo.retregs16 = mDX|mAX;
2199                 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD))
2200                 {
2201                     s = symboly("__udivdi3", mAX|mBX|mCX|mDX);
2202                     cinfo.flags = INFpushebx;
2203                     cinfo.retregs32 = mDX|mAX;
2204                 }
2205                 else if (config.exe & EX_SOLARIS)
2206                 {
2207                     s = symboly("__ULDIV2__", mAX|mBX|mCX|mDX);
2208                     cinfo.flags = INFpushebx;
2209                     cinfo.retregs32 = mDX|mAX;
2210                 }
2211                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2212                 {
2213                     s = symboly("_aulldiv", mAX|mBX|mCX|mDX);
2214                     cinfo.flags = INFpusheabcdx;
2215                     cinfo.retregs32 = mDX|mAX;
2216                 }
2217                 else
2218                 {
2219                     const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@";
2220                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2221                     cinfo.retregs32 = mDX|mAX;
2222                 }
2223                 break;
2224 
2225             case CLIB.ulmod:
2226                 cinfo.retregs16 = mCX|mBX;
2227                 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD))
2228                 {
2229                     s = symboly("__umoddi3", mAX|mBX|mCX|mDX);
2230                     cinfo.flags = INFpushebx;
2231                     cinfo.retregs32 = mDX|mAX;
2232                 }
2233                 else if (config.exe & EX_SOLARIS)
2234                 {
2235                     s = symboly("__LDIV2__", mAX|mBX|mCX|mDX);
2236                     cinfo.flags = INFpushebx;
2237                     cinfo.retregs32 = mCX|mBX;
2238                 }
2239                 else if (I32 && config.objfmt == OBJ_MSCOFF)
2240                 {
2241                     s = symboly("_aullrem", mAX|mBX|mCX|mDX);
2242                     cinfo.flags = INFpusheabcdx;
2243                     cinfo.retregs32 = mAX|mDX;
2244                 }
2245                 else
2246                 {
2247                     const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@";
2248                     s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS);
2249                     cinfo.retregs32 = mCX|mBX;
2250                 }
2251                 break;
2252 
2253             // This section is only for Windows and DOS (i.e. machines without the x87 FPU)
2254             case CLIB.dmul:
2255                 s = symboly("_DMUL@",mAX|mBX|mCX|mDX);
2256                 cinfo.retregs16 = DOUBLEREGS_16;
2257                 cinfo.retregs32 = DOUBLEREGS_32;
2258                 cinfo.pop = 8;
2259                 cinfo.flags = INFfloat;
2260                 cinfo.push87 = 1;
2261                 cinfo.pop87 = 1;
2262                 break;
2263 
2264             case CLIB.ddiv:
2265                 s = symboly("_DDIV@",mAX|mBX|mCX|mDX);
2266                 cinfo.retregs16 = DOUBLEREGS_16;
2267                 cinfo.retregs32 = DOUBLEREGS_32;
2268                 cinfo.pop = 8;
2269                 cinfo.flags = INFfloat;
2270                 cinfo.push87 = 1;
2271                 cinfo.pop87 = 1;
2272                 break;
2273 
2274             case CLIB.dtst0:
2275                 s = symboly("_DTST0@",0);
2276                 cinfo.flags = INFfloat;
2277                 break;
2278 
2279             case CLIB.dtst0exc:
2280                 s = symboly("_DTST0EXC@",0);
2281                 cinfo.flags = INFfloat;
2282                 break;
2283 
2284             case CLIB.dcmp:
2285                 s = symboly("_DCMP@",0);
2286                 cinfo.pop = 8;
2287                 cinfo.flags = INFfloat;
2288                 cinfo.push87 = 1;
2289                 cinfo.pop87 = 1;
2290                 break;
2291 
2292             case CLIB.dcmpexc:
2293                 s = symboly("_DCMPEXC@",0);
2294                 cinfo.pop = 8;
2295                 cinfo.flags = INFfloat;
2296                 cinfo.push87 = 1;
2297                 cinfo.pop87 = 1;
2298                 break;
2299 
2300             case CLIB.dneg:
2301                 s = symboly("_DNEG@",I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2302                 cinfo.retregs16 = DOUBLEREGS_16;
2303                 cinfo.retregs32 = DOUBLEREGS_32;
2304                 cinfo.flags = INFfloat;
2305                 break;
2306 
2307             case CLIB.dadd:
2308                 s = symboly("_DADD@",mAX|mBX|mCX|mDX);
2309                 cinfo.retregs16 = DOUBLEREGS_16;
2310                 cinfo.retregs32 = DOUBLEREGS_32;
2311                 cinfo.pop = 8;
2312                 cinfo.flags = INFfloat;
2313                 cinfo.push87 = 1;
2314                 cinfo.pop87 = 1;
2315                 break;
2316 
2317             case CLIB.dsub:
2318                 s = symboly("_DSUB@",mAX|mBX|mCX|mDX);
2319                 cinfo.retregs16 = DOUBLEREGS_16;
2320                 cinfo.retregs32 = DOUBLEREGS_32;
2321                 cinfo.pop = 8;
2322                 cinfo.flags = INFfloat;
2323                 cinfo.push87 = 1;
2324                 cinfo.pop87 = 1;
2325                 break;
2326 
2327             case CLIB.fmul:
2328                 s = symboly("_FMUL@",mAX|mBX|mCX|mDX);
2329                 cinfo.retregs16 = FLOATREGS_16;
2330                 cinfo.retregs32 = FLOATREGS_32;
2331                 cinfo.flags = INFfloat;
2332                 cinfo.push87 = 1;
2333                 cinfo.pop87 = 1;
2334                 break;
2335 
2336             case CLIB.fdiv:
2337                 s = symboly("_FDIV@",mAX|mBX|mCX|mDX);
2338                 cinfo.retregs16 = FLOATREGS_16;
2339                 cinfo.retregs32 = FLOATREGS_32;
2340                 cinfo.flags = INFfloat;
2341                 cinfo.push87 = 1;
2342                 cinfo.pop87 = 1;
2343                 break;
2344 
2345             case CLIB.ftst0:
2346                 s = symboly("_FTST0@",0);
2347                 cinfo.flags = INFfloat;
2348                 break;
2349 
2350             case CLIB.ftst0exc:
2351                 s = symboly("_FTST0EXC@",0);
2352                 cinfo.flags = INFfloat;
2353                 break;
2354 
2355             case CLIB.fcmp:
2356                 s = symboly("_FCMP@",0);
2357                 cinfo.flags = INFfloat;
2358                 cinfo.push87 = 1;
2359                 cinfo.pop87 = 1;
2360                 break;
2361 
2362             case CLIB.fcmpexc:
2363                 s = symboly("_FCMPEXC@",0);
2364                 cinfo.flags = INFfloat;
2365                 cinfo.push87 = 1;
2366                 cinfo.pop87 = 1;
2367                 break;
2368 
2369             case CLIB.fneg:
2370                 s = symboly("_FNEG@",I16 ? FLOATREGS_16 : FLOATREGS_32);
2371                 cinfo.retregs16 = FLOATREGS_16;
2372                 cinfo.retregs32 = FLOATREGS_32;
2373                 cinfo.flags = INFfloat;
2374                 break;
2375 
2376             case CLIB.fadd:
2377                 s = symboly("_FADD@",mAX|mBX|mCX|mDX);
2378                 cinfo.retregs16 = FLOATREGS_16;
2379                 cinfo.retregs32 = FLOATREGS_32;
2380                 cinfo.flags = INFfloat;
2381                 cinfo.push87 = 1;
2382                 cinfo.pop87 = 1;
2383                 break;
2384 
2385             case CLIB.fsub:
2386                 s = symboly("_FSUB@",mAX|mBX|mCX|mDX);
2387                 cinfo.retregs16 = FLOATREGS_16;
2388                 cinfo.retregs32 = FLOATREGS_32;
2389                 cinfo.flags = INFfloat;
2390                 cinfo.push87 = 1;
2391                 cinfo.pop87 = 1;
2392                 break;
2393 
2394             case CLIB.dbllng:
2395             {
2396                 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG" : "_DBLLNG@";
2397                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2398                 cinfo.retregs16 = mDX | mAX;
2399                 cinfo.retregs32 = mAX;
2400                 cinfo.flags = INFfloat;
2401                 cinfo.push87 = 1;
2402                 cinfo.pop87 = 1;
2403                 break;
2404             }
2405 
2406             case CLIB.lngdbl:
2407             {
2408                 const(char)* name = (config.exe & ex_unix) ? "__LNGDBL" : "_LNGDBL@";
2409                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2410                 cinfo.retregs16 = DOUBLEREGS_16;
2411                 cinfo.retregs32 = DOUBLEREGS_32;
2412                 cinfo.flags = INFfloat;
2413                 cinfo.push87 = 1;
2414                 cinfo.pop87 = 1;
2415                 break;
2416             }
2417 
2418             case CLIB.dblint:
2419             {
2420                 const(char)* name = (config.exe & ex_unix) ? "__DBLINT" : "_DBLINT@";
2421                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2422                 cinfo.retregs16 = mAX;
2423                 cinfo.retregs32 = mAX;
2424                 cinfo.flags = INFfloat;
2425                 cinfo.push87 = 1;
2426                 cinfo.pop87 = 1;
2427                 break;
2428             }
2429 
2430             case CLIB.intdbl:
2431             {
2432                 const(char)* name = (config.exe & ex_unix) ? "__INTDBL" : "_INTDBL@";
2433                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2434                 cinfo.retregs16 = DOUBLEREGS_16;
2435                 cinfo.retregs32 = DOUBLEREGS_32;
2436                 cinfo.flags = INFfloat;
2437                 cinfo.push87 = 1;
2438                 cinfo.pop87 = 1;
2439                 break;
2440             }
2441 
2442             case CLIB.dbluns:
2443             {
2444                 const(char)* name = (config.exe & ex_unix) ? "__DBLUNS" : "_DBLUNS@";
2445                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2446                 cinfo.retregs16 = mAX;
2447                 cinfo.retregs32 = mAX;
2448                 cinfo.flags = INFfloat;
2449                 cinfo.push87 = 1;
2450                 cinfo.pop87 = 1;
2451                 break;
2452             }
2453 
2454             case CLIB.unsdbl:
2455                 // Y(DOUBLEREGS_32,"__UNSDBL"),         // CLIB.unsdbl
2456                 // Y(DOUBLEREGS_16,"_UNSDBL@"),
2457                 // {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1},       // _UNSDBL@     unsdbl
2458             {
2459                 const(char)* name = (config.exe & ex_unix) ? "__UNSDBL" : "_UNSDBL@";
2460                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2461                 cinfo.retregs16 = DOUBLEREGS_16;
2462                 cinfo.retregs32 = DOUBLEREGS_32;
2463                 cinfo.flags = INFfloat;
2464                 cinfo.push87 = 1;
2465                 cinfo.pop87 = 1;
2466                 break;
2467             }
2468 
2469             case CLIB.dblulng:
2470             {
2471                 const(char)* name = (config.exe & ex_unix) ? "__DBLULNG" : "_DBLULNG@";
2472                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2473                 cinfo.retregs16 = mDX|mAX;
2474                 cinfo.retregs32 = mAX;
2475                 cinfo.flags = (config.exe & ex_unix) ? INFfloat | INF32 : INFfloat;
2476                 cinfo.push87 = (config.exe & ex_unix) ? 0 : 1;
2477                 cinfo.pop87 = 1;
2478                 break;
2479             }
2480 
2481             case CLIB.ulngdbl:
2482             {
2483                 const(char)* name = (config.exe & ex_unix) ? "__ULNGDBL@" : "_ULNGDBL@";
2484                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2485                 cinfo.retregs16 = DOUBLEREGS_16;
2486                 cinfo.retregs32 = DOUBLEREGS_32;
2487                 cinfo.flags = INFfloat;
2488                 cinfo.push87 = 1;
2489                 cinfo.pop87 = 1;
2490                 break;
2491             }
2492 
2493             case CLIB.dblflt:
2494             {
2495                 const(char)* name = (config.exe & ex_unix) ? "__DBLFLT" : "_DBLFLT@";
2496                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2497                 cinfo.retregs16 = FLOATREGS_16;
2498                 cinfo.retregs32 = FLOATREGS_32;
2499                 cinfo.flags = INFfloat;
2500                 cinfo.push87 = 1;
2501                 cinfo.pop87 = 1;
2502                 break;
2503             }
2504 
2505             case CLIB.fltdbl:
2506             {
2507                 const(char)* name = (config.exe & ex_unix) ? "__FLTDBL" : "_FLTDBL@";
2508                 s = symboly(name, I16 ? ALLREGS : DOUBLEREGS_32);
2509                 cinfo.retregs16 = DOUBLEREGS_16;
2510                 cinfo.retregs32 = DOUBLEREGS_32;
2511                 cinfo.flags = INFfloat;
2512                 cinfo.push87 = 1;
2513                 cinfo.pop87 = 1;
2514                 break;
2515             }
2516 
2517             case CLIB.dblllng:
2518             {
2519                 const(char)* name = (config.exe & ex_unix) ? "__DBLLLNG" : "_DBLLLNG@";
2520                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2521                 cinfo.retregs16 = DOUBLEREGS_16;
2522                 cinfo.retregs32 = mDX|mAX;
2523                 cinfo.flags = INFfloat;
2524                 cinfo.push87 = 1;
2525                 cinfo.pop87 = 1;
2526                 break;
2527             }
2528 
2529             case CLIB.llngdbl:
2530             {
2531                 const(char)* name = (config.exe & ex_unix) ? "__LLNGDBL" : "_LLNGDBL@";
2532                 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2533                 cinfo.retregs16 = DOUBLEREGS_16;
2534                 cinfo.retregs32 = DOUBLEREGS_32;
2535                 cinfo.flags = INFfloat;
2536                 cinfo.push87 = 1;
2537                 cinfo.pop87 = 1;
2538                 break;
2539             }
2540 
2541             case CLIB.dblullng:
2542             {
2543                 if (config.exe == EX_WIN64)
2544                 {
2545                     s = symboly("__DBLULLNG", DOUBLEREGS_32);
2546                     cinfo.retregs32 = mAX;
2547                     cinfo.flags = INFfloat;
2548                     cinfo.push87 = 2;
2549                     cinfo.pop87 = 2;
2550                 }
2551                 else
2552                 {
2553                     const(char)* name = (config.exe & ex_unix) ? "__DBLULLNG" : "_DBLULLNG@";
2554                     s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2555                     cinfo.retregs16 = DOUBLEREGS_16;
2556                     cinfo.retregs32 = I64 ? mAX : mDX|mAX;
2557                     cinfo.flags = INFfloat;
2558                     cinfo.push87 = (config.exe & ex_unix) ? 2 : 1;
2559                     cinfo.pop87 = (config.exe & ex_unix) ? 2 : 1;
2560                 }
2561                 break;
2562             }
2563 
2564             case CLIB.ullngdbl:
2565             {
2566                 if (config.exe == EX_WIN64)
2567                 {
2568                     s = symboly("__ULLNGDBL", DOUBLEREGS_32);
2569                     cinfo.retregs32 = mAX;
2570                     cinfo.flags = INFfloat;
2571                     cinfo.push87 = 1;
2572                     cinfo.pop87 = 1;
2573                 }
2574                 else
2575                 {
2576                     const(char)* name = (config.exe & ex_unix) ? "__ULLNGDBL" : "_ULLNGDBL@";
2577                     s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32);
2578                     cinfo.retregs16 = DOUBLEREGS_16;
2579                     cinfo.retregs32 = I64 ? mAX : DOUBLEREGS_32;
2580                     cinfo.flags = INFfloat;
2581                     cinfo.push87 = 1;
2582                     cinfo.pop87 = 1;
2583                 }
2584                 break;
2585             }
2586 
2587             case CLIB.dtst:
2588             {
2589                 const(char)* name = (config.exe & ex_unix) ? "__DTST" : "_DTST@";
2590                 s = symboly(name, 0);
2591                 cinfo.flags = INFfloat;
2592                 break;
2593             }
2594 
2595             case CLIB.vptrfptr:
2596             {
2597                 const(char)* name = (config.exe & ex_unix) ? "__HTOFPTR" : "_HTOFPTR@";
2598                 s = symboly(name, mES|mBX);
2599                 cinfo.retregs16 = mES|mBX;
2600                 cinfo.retregs32 = mES|mBX;
2601                 break;
2602             }
2603 
2604             case CLIB.cvptrfptr:
2605             {
2606                 const(char)* name = (config.exe & ex_unix) ? "__HCTOFPTR" : "_HCTOFPTR@";
2607                 s = symboly(name, mES|mBX);
2608                 cinfo.retregs16 = mES|mBX;
2609                 cinfo.retregs32 = mES|mBX;
2610                 break;
2611             }
2612 
2613             case CLIB._87topsw:
2614             {
2615                 const(char)* name = (config.exe & ex_unix) ? "__87TOPSW" : "_87TOPSW@";
2616                 s = symboly(name, 0);
2617                 cinfo.flags = INFfloat;
2618                 break;
2619             }
2620 
2621             case CLIB.fltto87:
2622             {
2623                 const(char)* name = (config.exe & ex_unix) ? "__FLTTO87" : "_FLTTO87@";
2624                 s = symboly(name, mST0);
2625                 cinfo.retregs16 = mST0;
2626                 cinfo.retregs32 = mST0;
2627                 cinfo.flags = INFfloat;
2628                 cinfo.push87 = 1;
2629                 break;
2630             }
2631 
2632             case CLIB.dblto87:
2633             {
2634                 const(char)* name = (config.exe & ex_unix) ? "__DBLTO87" : "_DBLTO87@";
2635                 s = symboly(name, mST0);
2636                 cinfo.retregs16 = mST0;
2637                 cinfo.retregs32 = mST0;
2638                 cinfo.flags = INFfloat;
2639                 cinfo.push87 = 1;
2640                 break;
2641             }
2642 
2643             case CLIB.dblint87:
2644             {
2645                 const(char)* name = (config.exe & ex_unix) ? "__DBLINT87" : "_DBLINT87@";
2646                 s = symboly(name, mST0|mAX);
2647                 cinfo.retregs16 = mAX;
2648                 cinfo.retregs32 = mAX;
2649                 cinfo.flags = INFfloat;
2650                 break;
2651             }
2652 
2653             case CLIB.dbllng87:
2654             {
2655                 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG87" : "_DBLLNG87@";
2656                 s = symboly(name, mST0|mAX|mDX);
2657                 cinfo.retregs16 = mDX|mAX;
2658                 cinfo.retregs32 = mAX;
2659                 cinfo.flags = INFfloat;
2660                 break;
2661             }
2662 
2663             case CLIB.ftst:
2664             {
2665                 const(char)* name = (config.exe & ex_unix) ? "__FTST" : "_FTST@";
2666                 s = symboly(name, 0);
2667                 cinfo.flags = INFfloat;
2668                 break;
2669             }
2670 
2671             case CLIB.fcompp:
2672             {
2673                 const(char)* name = (config.exe & ex_unix) ? "__FCOMPP" : "_FCOMPP@";
2674                 s = symboly(name, 0);
2675                 cinfo.retregs16 = mPSW;
2676                 cinfo.retregs32 = mPSW;
2677                 cinfo.flags = INFfloat;
2678                 cinfo.pop87 = 2;
2679                 break;
2680             }
2681 
2682             case CLIB.ftest:
2683             {
2684                 const(char)* name = (config.exe & ex_unix) ? "__FTEST" : "_FTEST@";
2685                 s = symboly(name, 0);
2686                 cinfo.retregs16 = mPSW;
2687                 cinfo.retregs32 = mPSW;
2688                 cinfo.flags = INFfloat;
2689                 break;
2690             }
2691 
2692             case CLIB.ftest0:
2693             {
2694                 const(char)* name = (config.exe & ex_unix) ? "__FTEST0" : "_FTEST0@";
2695                 s = symboly(name, 0);
2696                 cinfo.retregs16 = mPSW;
2697                 cinfo.retregs32 = mPSW;
2698                 cinfo.flags = INFfloat;
2699                 break;
2700             }
2701 
2702             case CLIB.fdiv87:
2703             {
2704                 const(char)* name = (config.exe & ex_unix) ? "__FDIVP" : "_FDIVP";
2705                 s = symboly(name, mST0|mAX|mBX|mCX|mDX);
2706                 cinfo.retregs16 = mST0;
2707                 cinfo.retregs32 = mST0;
2708                 cinfo.flags = INFfloat;
2709                 cinfo.push87 = 1;
2710                 cinfo.pop87 = 1;
2711                 break;
2712             }
2713 
2714             // Complex numbers
2715             case CLIB.cmul:
2716             {
2717                 s = symboly("_Cmul", mST0|mST01);
2718                 cinfo.retregs16 = mST01;
2719                 cinfo.retregs32 = mST01;
2720                 cinfo.flags = INF32|INFfloat;
2721                 cinfo.push87 = 3;
2722                 cinfo.pop87 = 5;
2723                 break;
2724             }
2725 
2726             case CLIB.cdiv:
2727             {
2728                 s = symboly("_Cdiv", mAX|mCX|mDX|mST0|mST01);
2729                 cinfo.retregs16 = mST01;
2730                 cinfo.retregs32 = mST01;
2731                 cinfo.flags = INF32|INFfloat;
2732                 cinfo.push87 = 0;
2733                 cinfo.pop87 = 2;
2734                 break;
2735             }
2736 
2737             case CLIB.ccmp:
2738             {
2739                 s = symboly("_Ccmp", mAX|mST0|mST01);
2740                 cinfo.retregs16 = mPSW;
2741                 cinfo.retregs32 = mPSW;
2742                 cinfo.flags = INF32|INFfloat;
2743                 cinfo.push87 = 0;
2744                 cinfo.pop87 = 4;
2745                 break;
2746             }
2747 
2748             case CLIB.u64_ldbl:
2749             {
2750                 const(char)* name = (config.exe & ex_unix) ? "__U64_LDBL" : "_U64_LDBL";
2751                 s = symboly(name, mST0);
2752                 cinfo.retregs16 = mST0;
2753                 cinfo.retregs32 = mST0;
2754                 cinfo.flags = INF32|INF64|INFfloat;
2755                 cinfo.push87 = 2;
2756                 cinfo.pop87 = 1;
2757                 break;
2758             }
2759 
2760             case CLIB.ld_u64:
2761             {
2762                 const(char)* name = (config.exe & ex_unix) ? (config.objfmt == OBJ_ELF ||
2763                                                              config.objfmt == OBJ_MACH ?
2764                                                                 "__LDBLULLNG" : "___LDBLULLNG")
2765                                                           : "__LDBLULLNG";
2766                 s = symboly(name, mST0|mAX|mDX);
2767                 cinfo.retregs16 = 0;
2768                 cinfo.retregs32 = mDX|mAX;
2769                 cinfo.flags = INF32|INF64|INFfloat;
2770                 cinfo.push87 = 1;
2771                 cinfo.pop87 = 2;
2772                 break;
2773             }
2774 
2775             default:
2776                 assert(0);
2777         }
2778         clibsyms[clib] = s;
2779     }
2780 
2781     *ps = s;
2782     *pinfo = cinfo;
2783 }
2784 
2785 /********************************
2786  * Generate code sequence to call C runtime library support routine.
2787  *      clib = CLIB.xxxx
2788  *      keepmask = mask of registers not to destroy. Currently can
2789  *              handle only 1. Should use a temporary rather than
2790  *              push/pop for speed.
2791  */
2792 
2793 @trusted
2794 void callclib(ref CodeBuilder cdb, elem* e, uint clib, regm_t* pretregs, regm_t keepmask)
2795 {
2796     //printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask));
2797     //elem_print(e);
2798 
2799     Symbol* s;
2800     ClibInfo* cinfo;
2801     getClibInfo(clib, &s, &cinfo);
2802 
2803     if (I16)
2804         assert(!(cinfo.flags & (INF32 | INF64)));
2805     getregs(cdb,(~s.Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed
2806     keepmask &= ~s.Sregsaved;
2807     int npushed = popcnt(keepmask);
2808     CodeBuilder cdbpop;
2809     cdbpop.ctor();
2810     gensaverestore(keepmask, cdb, cdbpop);
2811 
2812     save87regs(cdb,cinfo.push87);
2813     for (int i = 0; i < cinfo.push87; i++)
2814         push87(cdb);
2815 
2816     for (int i = 0; i < cinfo.pop87; i++)
2817         pop87();
2818 
2819     if (config.target_cpu >= TARGET_80386 && clib == CLIB.lmul && !I32)
2820     {
2821         static immutable ubyte[23] lmul =
2822         [
2823             0x66,0xc1,0xe1,0x10,        // shl  ECX,16
2824             0x8b,0xcb,                  // mov  CX,BX           ;ECX = CX,BX
2825             0x66,0xc1,0xe0,0x10,        // shl  EAX,16
2826             0x66,0x0f,0xac,0xd0,0x10,   // shrd EAX,EDX,16      ;EAX = DX,AX
2827             0x66,0xf7,0xe1,             // mul  ECX
2828             0x66,0x0f,0xa4,0xc2,0x10,   // shld EDX,EAX,16      ;DX,AX = EAX
2829         ];
2830 
2831         cdb.genasm(lmul[]);
2832     }
2833     else
2834     {
2835         makeitextern(s);
2836         int nalign = 0;
2837         int pushebx = (cinfo.flags & INFpushebx) != 0;
2838         int pushall = (cinfo.flags & INFpusheabcdx) != 0;
2839         if (STACKALIGN >= 16)
2840         {   // Align the stack (assume no args on stack)
2841             int npush = (npushed + pushebx + 4 * pushall) * REGSIZE + stackpush;
2842             if (npush & (STACKALIGN - 1))
2843             {   nalign = STACKALIGN - (npush & (STACKALIGN - 1));
2844                 cod3_stackadj(cdb, nalign);
2845             }
2846         }
2847         if (pushebx)
2848         {
2849             if (config.exe & (EX_LINUX | EX_LINUX64 | EX_FREEBSD | EX_FREEBSD64 | EX_OPENBSD | EX_OPENBSD64 | EX_DRAGONFLYBSD64))
2850             {
2851                 cdb.gen1(0x50 + CX);                             // PUSH ECX
2852                 cdb.gen1(0x50 + BX);                             // PUSH EBX
2853                 cdb.gen1(0x50 + DX);                             // PUSH EDX
2854                 cdb.gen1(0x50 + AX);                             // PUSH EAX
2855                 nalign += 4 * REGSIZE;
2856             }
2857             else
2858             {
2859                 cdb.gen1(0x50 + BX);                             // PUSH EBX
2860                 nalign += REGSIZE;
2861             }
2862         }
2863         if (pushall)
2864         {
2865             cdb.gen1(0x50 + CX);                                 // PUSH ECX
2866             cdb.gen1(0x50 + BX);                                 // PUSH EBX
2867             cdb.gen1(0x50 + DX);                                 // PUSH EDX
2868             cdb.gen1(0x50 + AX);                                 // PUSH EAX
2869         }
2870         if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS))
2871         {
2872             // Note: not for OSX
2873             /* Pass EBX on the stack instead, this is because EBX is used
2874              * for shared library function calls
2875              */
2876             if (config.flags3 & CFG3pic)
2877             {
2878                 load_localgot(cdb);     // EBX gets set to this value
2879             }
2880         }
2881 
2882         cdb.gencs(LARGECODE ? 0x9A : 0xE8,0,FLfunc,s);  // CALL s
2883         if (nalign)
2884             cod3_stackadj(cdb, -nalign);
2885         calledafunc = 1;
2886     }
2887     if (I16)
2888         stackpush -= cinfo.pop;
2889     regm_t retregs = I16 ? cinfo.retregs16 : cinfo.retregs32;
2890     cdb.append(cdbpop);
2891     fixresult(cdb, e, retregs, pretregs);
2892 }
2893 
2894 
2895 /*************************************************
2896  * Helper function for converting OPparam's into array of Parameters.
2897  */
2898 struct Parameter { elem* e; reg_t reg; reg_t reg2; uint numalign; }
2899 
2900 @trusted
2901 void fillParameters(elem* e, Parameter* parameters, int* pi)
2902 {
2903     if (e.Eoper == OPparam)
2904     {
2905         fillParameters(e.EV.E1, parameters, pi);
2906         fillParameters(e.EV.E2, parameters, pi);
2907         freenode(e);
2908     }
2909     else
2910     {
2911         parameters[*pi].e = e;
2912         (*pi)++;
2913     }
2914 }
2915 
2916 /***********************************
2917  * tyf: type of the function
2918  */
2919 @trusted
2920 FuncParamRegs FuncParamRegs_create(tym_t tyf)
2921 {
2922     FuncParamRegs result;
2923 
2924     result.tyf = tyf;
2925 
2926     if (I16)
2927     {
2928         result.numintegerregs = 0;
2929         result.numfloatregs = 0;
2930     }
2931     else if (I32)
2932     {
2933         if (tyf == TYjfunc)
2934         {
2935             static immutable ubyte[1] reglist1 = [ AX ];
2936             result.argregs = &reglist1[0];
2937             result.numintegerregs = reglist1.length;
2938         }
2939         else if (tyf == TYmfunc)
2940         {
2941             static immutable ubyte[1] reglist2 = [ CX ];
2942             result.argregs = &reglist2[0];
2943             result.numintegerregs = reglist2.length;
2944         }
2945         else
2946             result.numintegerregs = 0;
2947         result.numfloatregs = 0;
2948     }
2949     else if (I64 && config.exe == EX_WIN64)
2950     {
2951         static immutable ubyte[4] reglist3 = [ CX,DX,R8,R9 ];
2952         result.argregs = &reglist3[0];
2953         result.numintegerregs = reglist3.length;
2954 
2955         static immutable ubyte[4] freglist3 = [ XMM0, XMM1, XMM2, XMM3 ];
2956         result.floatregs = &freglist3[0];
2957         result.numfloatregs = freglist3.length;
2958     }
2959     else if (I64)
2960     {
2961         static immutable ubyte[6] reglist4 = [ DI,SI,DX,CX,R8,R9 ];
2962         result.argregs = &reglist4[0];
2963         result.numintegerregs = reglist4.length;
2964 
2965         static immutable ubyte[8] freglist4 = [ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 ];
2966         result.floatregs = &freglist4[0];
2967         result.numfloatregs = freglist4.length;
2968     }
2969     else
2970         assert(0);
2971     return result;
2972 }
2973 
2974 /*****************************************
2975  * Allocate parameter of type t and ty to registers *preg1 and *preg2.
2976  * Params:
2977  *      t = type, valid only if ty is TYstruct or TYarray
2978  * Returns:
2979  *      false       not allocated to any register
2980  *      true        *preg1, *preg2 set to allocated register pair
2981  */
2982 
2983 @trusted
2984 private bool type_jparam2(type* t, tym_t ty)
2985 {
2986     ty = tybasic(ty);
2987 
2988     if (tyfloating(ty))
2989         return false;
2990     else if (ty == TYstruct || ty == TYarray)
2991     {
2992         type_debug(t);
2993         targ_size_t sz = type_size(t);
2994         return (sz <= _tysize[TYnptr]) &&
2995                (config.exe == EX_WIN64 || sz == 1 || sz == 2 || sz == 4 || sz == 8);
2996     }
2997     else if (tysize(ty) <= _tysize[TYnptr])
2998         return true;
2999     return false;
3000 }
3001 
3002 @trusted
3003 int FuncParamRegs_alloc(ref FuncParamRegs fpr, type* t, tym_t ty, reg_t* preg1, reg_t* preg2)
3004 {
3005     //printf("FuncParamRegs::alloc(ty: TY%sm t: %p)\n", tystring[tybasic(ty)], t);
3006     //if (t) type_print(t);
3007 
3008     *preg1 = NOREG;
3009     *preg2 = NOREG;
3010 
3011     type* t2 = null;
3012     tym_t ty2 = TYMAX;
3013 
3014     // SROA with mixed registers
3015     if (ty & mTYxmmgpr)
3016     {
3017         ty = TYdouble;
3018         ty2 = TYllong;
3019     }
3020     else if (ty & mTYgprxmm)
3021     {
3022         ty = TYllong;
3023         ty2 = TYdouble;
3024     }
3025 
3026     // Treat array of 1 the same as its element type
3027     // (Don't put volatile parameters in registers)
3028     if (tybasic(ty) == TYarray && tybasic(t.Tty) == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile)
3029         && type_size(t.Tnext) > 1)
3030     {
3031         t = t.Tnext;
3032         ty = t.Tty;
3033     }
3034 
3035     if (tybasic(ty) == TYstruct && type_zeroSize(t, fpr.tyf))
3036         return 0;               // don't allocate into registers
3037 
3038     ++fpr.i;
3039 
3040     // If struct or array
3041     if (tyaggregate(ty))
3042     {
3043         assert(t);
3044         if (config.exe == EX_WIN64)
3045         {
3046             /* Structs occupy a general purpose register, regardless of the struct
3047              * size or the number & types of its fields.
3048              */
3049             t = null;
3050             ty = TYnptr;
3051         }
3052         else
3053         {
3054             type* targ1, targ2;
3055             if (tybasic(t.Tty) == TYstruct)
3056             {
3057                 targ1 = t.Ttag.Sstruct.Sarg1type;
3058                 targ2 = t.Ttag.Sstruct.Sarg2type;
3059             }
3060             else if (tybasic(t.Tty) == TYarray)
3061             {
3062                 if (I64)
3063                     argtypes(t, targ1, targ2);
3064             }
3065             else
3066                 assert(0);
3067 
3068             if (targ1)
3069             {
3070                 t = targ1;
3071                 ty = t.Tty;
3072                 if (targ2)
3073                 {
3074                     t2 = targ2;
3075                     ty2 = t2.Tty;
3076                 }
3077             }
3078             else if (I64 && !targ2)
3079                 return 0;
3080         }
3081     }
3082 
3083     reg_t* preg = preg1;
3084     int regcntsave = fpr.regcnt;
3085     int xmmcntsave = fpr.xmmcnt;
3086 
3087     if (config.exe == EX_WIN64)
3088     {
3089         if (tybasic(ty) == TYcfloat)
3090         {
3091             ty = TYnptr;                // treat like a struct
3092         }
3093     }
3094     else if (I64)
3095     {
3096         if ((tybasic(ty) == TYcent || tybasic(ty) == TYucent) &&
3097             fpr.numintegerregs - fpr.regcnt >= 2)
3098         {
3099             // Allocate to register pair
3100             *preg1 = fpr.argregs[fpr.regcnt];
3101             *preg2 = fpr.argregs[fpr.regcnt + 1];
3102             fpr.regcnt += 2;
3103             return 1;
3104         }
3105 
3106         if (tybasic(ty) == TYcdouble &&
3107             fpr.numfloatregs - fpr.xmmcnt >= 2)
3108         {
3109             // Allocate to register pair
3110             *preg1 = fpr.floatregs[fpr.xmmcnt];
3111             *preg2 = fpr.floatregs[fpr.xmmcnt + 1];
3112             fpr.xmmcnt += 2;
3113             return 1;
3114         }
3115 
3116         if (tybasic(ty) == TYcfloat
3117             && fpr.numfloatregs - fpr.xmmcnt >= 1)
3118         {
3119             // Allocate XMM register
3120             *preg1 = fpr.floatregs[fpr.xmmcnt++];
3121             return 1;
3122         }
3123     }
3124 
3125     foreach (j; 0 .. 2)
3126     {
3127         if (fpr.regcnt < fpr.numintegerregs)
3128         {
3129             if ((I64 || (fpr.i == 1 && (fpr.tyf == TYjfunc || fpr.tyf == TYmfunc))) &&
3130                 type_jparam2(t, ty))
3131             {
3132                 *preg = fpr.argregs[fpr.regcnt];
3133                 ++fpr.regcnt;
3134                 if (config.exe == EX_WIN64)
3135                     ++fpr.xmmcnt;
3136                 goto Lnext;
3137             }
3138         }
3139         if (fpr.xmmcnt < fpr.numfloatregs)
3140         {
3141             if (tyxmmreg(ty))
3142             {
3143                 *preg = fpr.floatregs[fpr.xmmcnt];
3144                 if (config.exe == EX_WIN64)
3145                     ++fpr.regcnt;
3146                 ++fpr.xmmcnt;
3147                 goto Lnext;
3148             }
3149         }
3150         // Failed to allocate to a register
3151         if (j == 1)
3152         {   /* Unwind first preg1 assignment, because it's both or nothing
3153              */
3154             *preg1 = NOREG;
3155             fpr.regcnt = regcntsave;
3156             fpr.xmmcnt = xmmcntsave;
3157         }
3158         return 0;
3159 
3160      Lnext:
3161         if (tybasic(ty2) == TYMAX)
3162             break;
3163         preg = preg2;
3164         t = t2;
3165         ty = ty2;
3166     }
3167     return 1;
3168 }
3169 
3170 /***************************************
3171  * Finds replacement types for register passing of aggregates.
3172  */
3173 @trusted
3174 void argtypes(type* t, ref type* arg1type, ref type* arg2type)
3175 {
3176     if (!t) return;
3177 
3178     tym_t ty = t.Tty;
3179 
3180     if (!tyaggregate(ty))
3181         return;
3182 
3183     arg1type = arg2type = null;
3184 
3185     if (tybasic(ty) == TYarray)
3186     {
3187         size_t sz = cast(size_t) type_size(t);
3188         if (sz == 0)
3189             return;
3190 
3191         if ((I32 || config.exe == EX_WIN64) && (sz & (sz - 1)))  // power of 2
3192             return;
3193 
3194         if (config.exe == EX_WIN64 && sz > REGSIZE)
3195             return;
3196 
3197         if (sz <= 2 * REGSIZE)
3198         {
3199             type** argtype = &arg1type;
3200             size_t argsz = sz < REGSIZE ? sz : REGSIZE;
3201             foreach (v; 0 .. (sz > REGSIZE) + 1)
3202             {
3203                 *argtype = argsz == 1 ? tstypes[TYchar]
3204                          : argsz == 2 ? tstypes[TYshort]
3205                          : argsz <= 4 ? tstypes[TYlong]
3206                          : tstypes[TYllong];
3207                 argtype = &arg2type;
3208                 argsz = sz - REGSIZE;
3209             }
3210         }
3211 
3212         if (I64 && config.exe != EX_WIN64)
3213         {
3214             type* tn = t.Tnext;
3215             tym_t tyn = tn.Tty;
3216             while (tyn == TYarray)
3217             {
3218                 tn = tn.Tnext;
3219                 assert(tn);
3220                 tyn = tybasic(tn.Tty);
3221             }
3222 
3223             if (tybasic(tyn) == TYstruct)
3224             {
3225                 if (type_size(tn) == sz) // array(s) of size 1
3226                 {
3227                     arg1type = tn.Ttag.Sstruct.Sarg1type;
3228                     arg2type = tn.Ttag.Sstruct.Sarg2type;
3229                     return;
3230                 }
3231 
3232                 type* t1 = tn.Ttag.Sstruct.Sarg1type;
3233                 if (t1)
3234                 {
3235                     tn = t1;
3236                     tyn = tn.Tty;
3237                 }
3238             }
3239 
3240             if (sz == tysize(tyn))
3241             {
3242                 if (tysimd(tyn))
3243                 {
3244                     type* ts = type_fake(tybasic(tyn));
3245                     ts.Tcount = 1;
3246                     arg1type = ts;
3247                     return;
3248                 }
3249                 else if (tybasic(tyn) == TYldouble || tybasic(tyn) == TYildouble)
3250                 {
3251                     arg1type = tstypes[tybasic(tyn)];
3252                     return;
3253                 }
3254             }
3255 
3256             if (sz <= 16)
3257             {
3258                 if (tyfloating(tyn))
3259                 {
3260                     arg1type = sz <= 4 ? tstypes[TYfloat] : tstypes[TYdouble];
3261                     if (sz > 8)
3262                         arg2type = (sz - 8) <= 4 ? tstypes[TYfloat] : tstypes[TYdouble];
3263                 }
3264             }
3265         }
3266     }
3267     else if (tybasic(ty) == TYstruct)
3268     {
3269         // TODO: Move code from `cgelem.d:elstruct()` here
3270     }
3271 }
3272 
3273 /*******************************
3274  * Generate code sequence for function call.
3275  */
3276 
3277 @trusted
3278 void cdfunc(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
3279 {
3280     //printf("cdfunc()\n"); elem_print(e);
3281     assert(e);
3282     uint numpara = 0;               // bytes of parameters
3283     uint numalign = 0;              // bytes to align stack before pushing parameters
3284     uint stackpushsave = stackpush;            // so we can compute # of parameters
3285     cgstate.stackclean++;
3286     regm_t keepmsk = 0;
3287     int xmmcnt = 0;
3288     tym_t tyf = tybasic(e.EV.E1.Ety);        // the function type
3289 
3290     // Easier to deal with parameters as an array: parameters[0..np]
3291     int np = OTbinary(e.Eoper) ? el_nparams(e.EV.E2) : 0;
3292     Parameter *parameters = cast(Parameter *)alloca(np * Parameter.sizeof);
3293 
3294     if (np)
3295     {
3296         int n = 0;
3297         fillParameters(e.EV.E2, parameters, &n);
3298         assert(n == np);
3299     }
3300 
3301     Symbol *sf = null;                  // symbol of the function being called
3302     if (e.EV.E1.Eoper == OPvar)
3303         sf = e.EV.E1.EV.Vsym;
3304 
3305     /* Assume called function access statics
3306      */
3307     if (config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_FREEBSD | EX_FREEBSD64 | EX_OPENBSD | EX_OPENBSD64) &&
3308         config.flags3 & CFG3pic)
3309         cgstate.accessedTLS = true;
3310 
3311     /* Special handling for call to __tls_get_addr, we must save registers
3312      * before evaluating the parameter, so that the parameter load and call
3313      * are adjacent.
3314      */
3315     if (np == 1 && sf)
3316     {
3317         if (sf == tls_get_addr_sym)
3318             getregs(cdb, ~sf.Sregsaved & (mBP | ALLREGS | mES | XMMREGS));
3319     }
3320 
3321     uint stackalign = REGSIZE;
3322     if (tyf == TYf16func)
3323         stackalign = 2;
3324     // Figure out which parameters go in registers.
3325     // Compute numpara, the total bytes pushed on the stack
3326     FuncParamRegs fpr = FuncParamRegs_create(tyf);
3327     for (int i = np; --i >= 0;)
3328     {
3329         elem *ep = parameters[i].e;
3330         uint psize = cast(uint)_align(stackalign, paramsize(ep, tyf));     // align on stack boundary
3331         if (config.exe == EX_WIN64)
3332         {
3333             //printf("[%d] size = %u, numpara = %d ep = %p %s\n", i, psize, numpara, ep, tym_str(ep.Ety));
3334             debug
3335             if (psize > REGSIZE) elem_print(e);
3336 
3337             assert(psize <= REGSIZE);
3338             psize = REGSIZE;
3339         }
3340         //printf("[%d] size = %u, numpara = %d %s\n", i, psize, numpara, tym_str(ep.Ety));
3341         if (FuncParamRegs_alloc(fpr, ep.ET, ep.Ety, &parameters[i].reg, &parameters[i].reg2))
3342         {
3343             if (config.exe == EX_WIN64)
3344                 numpara += REGSIZE;             // allocate stack space for it anyway
3345             continue;   // goes in register, not stack
3346         }
3347 
3348         // Parameter i goes on the stack
3349         parameters[i].reg = NOREG;
3350         uint alignsize = el_alignsize(ep);
3351         parameters[i].numalign = 0;
3352         if (alignsize > stackalign &&
3353             (I64 || (alignsize >= 16 &&
3354                 (config.exe & (EX_OSX | EX_LINUX) && (tyaggregate(ep.Ety) || tyvector(ep.Ety))))))
3355         {
3356             if (alignsize > STACKALIGN)
3357             {
3358                 STACKALIGN = alignsize;
3359                 enforcealign = true;
3360             }
3361             uint newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1);
3362             parameters[i].numalign = newnumpara - numpara;
3363             numpara = newnumpara;
3364             assert(config.exe != EX_WIN64);
3365         }
3366         numpara += psize;
3367     }
3368 
3369     if (config.exe == EX_WIN64)
3370     {
3371         if (numpara < 4 * REGSIZE)
3372             numpara = 4 * REGSIZE;
3373     }
3374 
3375     //printf("numpara = %d, stackpush = %d\n", numpara, stackpush);
3376     assert((numpara & (REGSIZE - 1)) == 0);
3377     assert((stackpush & (REGSIZE - 1)) == 0);
3378 
3379     /* Should consider reordering the order of evaluation of the parameters
3380      * so that args that go into registers are evaluated after args that get
3381      * pushed. We can reorder args that are constants or relconst's.
3382      */
3383 
3384     /* Determine if we should use cgstate.funcarg for the parameters or push them
3385      */
3386     bool usefuncarg = false;
3387     static if (0)
3388     {
3389         printf("test1 %d %d %d %d %d %d %d %d\n", (config.flags4 & CFG4speed)!=0, !Alloca.size,
3390             !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)),
3391             cast(int)numpara, !stackpush,
3392             (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos),
3393             (!typfunc(tyf) || sf && sf.Sflags & SFLexit), !I16);
3394     }
3395     if (config.flags4 & CFG4speed &&
3396         !Alloca.size &&
3397         /* The cleanup code calls a local function, leaving the return address on
3398          * the top of the stack. If parameters are placed there, the return address
3399          * is stepped on.
3400          * A better solution is turn this off only inside the cleanup code.
3401          */
3402         !usednteh &&
3403         !calledFinally &&
3404         (numpara || config.exe == EX_WIN64) &&
3405         stackpush == 0 &&               // cgstate.funcarg needs to be at top of stack
3406         (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos) &&
3407         (!(typfunc(tyf) || tyf == TYhfunc) || sf && sf.Sflags & SFLexit) &&
3408         !anyiasm && !I16
3409        )
3410     {
3411         for (int i = 0; i < np; i++)
3412         {
3413             elem* ep = parameters[i].e;
3414             int preg = parameters[i].reg;
3415             //printf("parameter[%d] = %d, np = %d\n", i, preg, np);
3416             if (preg == NOREG)
3417             {
3418                 switch (ep.Eoper)
3419                 {
3420                     case OPstrctor:
3421                     case OPstrthis:
3422                     case OPstrpar:
3423                     case OPnp_fp:
3424                         goto Lno;
3425 
3426                     default:
3427                         break;
3428                 }
3429             }
3430         }
3431 
3432         if (numpara > cgstate.funcarg.size)
3433         {   // New high water mark
3434             //printf("increasing size from %d to %d\n", cast(int)cgstate.funcarg.size, cast(int)numpara);
3435             cgstate.funcarg.size = numpara;
3436         }
3437         usefuncarg = true;
3438     }
3439   Lno:
3440 
3441     /* Adjust start of the stack so after all args are pushed,
3442      * the stack will be aligned.
3443      */
3444     if (!usefuncarg && STACKALIGN >= 16 && (numpara + stackpush) & (STACKALIGN - 1))
3445     {
3446         numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1));
3447         cod3_stackadj(cdb, numalign);
3448         cdb.genadjesp(numalign);
3449         stackpush += numalign;
3450         stackpushsave += numalign;
3451     }
3452     assert(stackpush == stackpushsave);
3453     if (config.exe == EX_WIN64)
3454     {
3455         //printf("np = %d, numpara = %d, stackpush = %d\n", np, numpara, stackpush);
3456         assert(numpara == ((np < 4) ? 4 * REGSIZE : np * REGSIZE));
3457 
3458         // Allocate stack space for four entries anyway
3459         // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29
3460     }
3461 
3462     int[XMM7 + 1] regsaved = void;
3463     memset(regsaved.ptr, -1, regsaved.sizeof);
3464     CodeBuilder cdbrestore;
3465     cdbrestore.ctor();
3466     regm_t saved = 0;
3467     targ_size_t funcargtossave = cgstate.funcargtos;
3468     targ_size_t funcargtos = numpara;
3469     //printf("funcargtos1 = %d\n", cast(int)funcargtos);
3470 
3471     /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9
3472      * float and double parameters go into XMM0..XMM7
3473      * For variadic functions, count of XMM registers used goes in AL
3474      */
3475     for (int i = 0; i < np; i++)
3476     {
3477         elem* ep = parameters[i].e;
3478         int preg = parameters[i].reg;
3479         //printf("parameter[%d] = %d, np = %d\n", i, preg, np);
3480         if (preg == NOREG)
3481         {
3482             /* Push parameter on stack, but keep track of registers used
3483              * in the process. If they interfere with keepmsk, we'll have
3484              * to save/restore them.
3485              */
3486             CodeBuilder cdbsave;
3487             cdbsave.ctor();
3488             regm_t overlap = msavereg & keepmsk;
3489             msavereg |= keepmsk;
3490             CodeBuilder cdbparams;
3491             cdbparams.ctor();
3492             if (usefuncarg)
3493                 movParams(cdbparams, ep, stackalign, cast(uint)funcargtos, tyf);
3494             else
3495                 pushParams(cdbparams,ep,stackalign, tyf);
3496             regm_t tosave = keepmsk & ~msavereg;
3497             msavereg &= ~keepmsk | overlap;
3498 
3499             // tosave is the mask to save and restore
3500             for (reg_t j = 0; tosave; j++)
3501             {
3502                 regm_t mi = mask(j);
3503                 assert(j <= XMM7);
3504                 if (mi & tosave)
3505                 {
3506                     uint idx;
3507                     regsave.save(cdbsave, j, &idx);
3508                     regsave.restore(cdbrestore, j, idx);
3509                     saved |= mi;
3510                     keepmsk &= ~mi;             // don't need to keep these for rest of params
3511                     tosave &= ~mi;
3512                 }
3513             }
3514 
3515             cdb.append(cdbsave);
3516             cdb.append(cdbparams);
3517 
3518             // Alignment for parameter comes after it got pushed
3519             const uint numalignx = parameters[i].numalign;
3520             if (usefuncarg)
3521             {
3522                 funcargtos -= _align(stackalign, paramsize(ep, tyf)) + numalignx;
3523                 cgstate.funcargtos = funcargtos;
3524             }
3525             else if (numalignx)
3526             {
3527                 cod3_stackadj(cdb, numalignx);
3528                 cdb.genadjesp(numalignx);
3529                 stackpush += numalignx;
3530             }
3531         }
3532         else
3533         {
3534             // Goes in register preg, not stack
3535             regm_t retregs = mask(preg);
3536             if (retregs & XMMREGS)
3537                 ++xmmcnt;
3538             int preg2 = parameters[i].reg2;
3539             reg_t mreg,lreg;
3540             if (preg2 != NOREG || tybasic(ep.Ety) == TYcfloat)
3541             {
3542                 assert(ep.Eoper != OPstrthis);
3543                 if (mask(preg2) & XMMREGS)
3544                     ++xmmcnt;
3545                 if (tybasic(ep.Ety) == TYcfloat)
3546                 {
3547                     lreg = ST01;
3548                     mreg = NOREG;
3549                 }
3550                 else if (tyrelax(ep.Ety) == TYcent)
3551                 {
3552                     lreg = mask(preg ) & mLSW ? cast(reg_t)preg  : AX;
3553                     mreg = mask(preg2) & mMSW ? cast(reg_t)preg2 : DX;
3554                 }
3555                 else
3556                 {
3557                     lreg = XMM0;
3558                     mreg = XMM1;
3559                 }
3560                 retregs = (mask(mreg) | mask(lreg)) & ~mask(NOREG);
3561                 CodeBuilder cdbsave;
3562                 cdbsave.ctor();
3563                 if (keepmsk & retregs)
3564                 {
3565                     regm_t tosave = keepmsk & retregs;
3566 
3567                     // tosave is the mask to save and restore
3568                     for (reg_t j = 0; tosave; j++)
3569                     {
3570                         regm_t mi = mask(j);
3571                         assert(j <= XMM7);
3572                         if (mi & tosave)
3573                         {
3574                             uint idx;
3575                             regsave.save(cdbsave, j, &idx);
3576                             regsave.restore(cdbrestore, j, idx);
3577                             saved |= mi;
3578                             keepmsk &= ~mi;             // don't need to keep these for rest of params
3579                             tosave &= ~mi;
3580                         }
3581                     }
3582                 }
3583                 cdb.append(cdbsave);
3584 
3585                 scodelem(cdb, ep, &retregs, keepmsk, false);
3586 
3587                 // Move result [mreg,lreg] into parameter registers from [preg2,preg]
3588                 retregs = 0;
3589                 if (preg != lreg)
3590                     retregs |= mask(preg);
3591                 if (preg2 != mreg)
3592                     retregs |= mask(preg2);
3593                 retregs &= ~mask(NOREG);
3594                 getregs(cdb,retregs);
3595 
3596                 tym_t ty1 = tybasic(ep.Ety);
3597                 tym_t ty2 = ty1;
3598                 if (ep.Ety & mTYgprxmm)
3599                 {
3600                     ty1 = TYllong;
3601                     ty2 = TYdouble;
3602                 }
3603                 else if (ep.Ety & mTYxmmgpr)
3604                 {
3605                     ty1 = TYdouble;
3606                     ty2 = TYllong;
3607                 }
3608                 else if (ty1 == TYstruct)
3609                 {
3610                     type* targ1 = ep.ET.Ttag.Sstruct.Sarg1type;
3611                     type* targ2 = ep.ET.Ttag.Sstruct.Sarg2type;
3612                     if (targ1)
3613                         ty1 = targ1.Tty;
3614                     if (targ2)
3615                         ty2 = targ2.Tty;
3616                 }
3617                 else if (tyrelax(ty1) == TYcent)
3618                     ty1 = ty2 = TYllong;
3619                 else if (tybasic(ty1) == TYcdouble)
3620                     ty1 = ty2 = TYdouble;
3621 
3622                 if (tybasic(ep.Ety) == TYcfloat)
3623                 {
3624                     assert(I64);
3625                     assert(lreg == ST01 && mreg == NOREG);
3626                     // spill
3627                     pop87();
3628                     pop87();
3629                     cdb.genfltreg(0xD9, 3, tysize(TYfloat));
3630                     genfwait(cdb);
3631                     cdb.genfltreg(0xD9, 3, 0);
3632                     genfwait(cdb);
3633                     // reload
3634                     if (config.exe == EX_WIN64)
3635                     {
3636                         cdb.genfltreg(LOD, preg, 0);
3637                         code_orrex(cdb.last(), REX_W);
3638                     }
3639                     else
3640                     {
3641                         assert(mask(preg) & XMMREGS);
3642                         cdb.genxmmreg(xmmload(TYdouble), cast(reg_t) preg, 0, TYdouble);
3643                     }
3644                 }
3645                 else foreach (v; 0 .. 2)
3646                 {
3647                     if (v ^ (preg != mreg))
3648                         genmovreg(cdb, preg, lreg, ty1);
3649                     else
3650                         genmovreg(cdb, preg2, mreg, ty2);
3651                 }
3652 
3653                 retregs = (mask(preg) | mask(preg2)) & ~mask(NOREG);
3654             }
3655             else if (ep.Eoper == OPstrthis)
3656             {
3657                 getregs(cdb,retregs);
3658                 // LEA preg,np[RSP]
3659                 uint delta = stackpush - ep.EV.Vuns;   // stack delta to parameter
3660                 cdb.genc1(LEA,
3661                         (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,delta);
3662                 if (I64)
3663                     code_orrex(cdb.last(), REX_W);
3664             }
3665             else if (ep.Eoper == OPstrpar && config.exe == EX_WIN64 && type_size(ep.ET) == 0)
3666             {
3667                 retregs = 0;
3668                 scodelem(cdb, ep.EV.E1, &retregs, keepmsk, false);
3669                 freenode(ep);
3670             }
3671             else
3672             {
3673                 scodelem(cdb, ep, &retregs, keepmsk, false);
3674             }
3675             keepmsk |= retregs;      // don't change preg when evaluating func address
3676         }
3677     }
3678 
3679     if (config.exe == EX_WIN64)
3680     {   // Allocate stack space for four entries anyway
3681         // https://msdn.microsoft.com/en-US/library/ew5tede7%28v=vs.100%29
3682         {   uint sz = 4 * REGSIZE;
3683             if (usefuncarg)
3684             {
3685                 funcargtos -= sz;
3686                 cgstate.funcargtos = funcargtos;
3687             }
3688             else
3689             {
3690                 cod3_stackadj(cdb, sz);
3691                 cdb.genadjesp(sz);
3692                 stackpush += sz;
3693             }
3694         }
3695 
3696         /* Variadic functions store XMM parameters into their corresponding GP registers
3697          */
3698         for (int i = 0; i < np; i++)
3699         {
3700             int preg = parameters[i].reg;
3701             regm_t retregs = mask(preg);
3702             if (retregs & XMMREGS)
3703             {
3704                 reg_t reg;
3705                 switch (preg)
3706                 {
3707                     case XMM0: reg = CX; break;
3708                     case XMM1: reg = DX; break;
3709                     case XMM2: reg = R8; break;
3710                     case XMM3: reg = R9; break;
3711 
3712                     default:   assert(0);
3713                 }
3714                 getregs(cdb,mask(reg));
3715                 cdb.gen2(STOD,(REX_W << 16) | modregxrmx(3,preg-XMM0,reg)); // MOVD reg,preg
3716             }
3717         }
3718     }
3719 
3720     // Restore any register parameters we saved
3721     getregs(cdb,saved);
3722     cdb.append(cdbrestore);
3723     keepmsk |= saved;
3724 
3725     // Variadic functions store the number of XMM registers used in AL
3726     if (I64 && config.exe != EX_WIN64 && e.Eflags & EFLAGS_variadic)
3727     {
3728         getregs(cdb,mAX);
3729         movregconst(cdb,AX,xmmcnt,1);
3730         keepmsk |= mAX;
3731     }
3732 
3733     //printf("funcargtos2 = %d\n", cast(int)funcargtos);
3734     assert(!usefuncarg || (funcargtos == 0 && cgstate.funcargtos == 0));
3735     cgstate.stackclean--;
3736 
3737     debug
3738     if (!usefuncarg && numpara != stackpush - stackpushsave)
3739     {
3740         printf("function %s\n", funcsym_p.Sident.ptr);
3741         printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave);
3742         elem_print(e);
3743     }
3744 
3745     assert(usefuncarg || numpara == stackpush - stackpushsave);
3746 
3747     funccall(cdb,e,numpara,numalign,pretregs,keepmsk,usefuncarg);
3748     cgstate.funcargtos = funcargtossave;
3749 }
3750 
3751 /***********************************
3752  */
3753 
3754 @trusted
3755 void cdstrthis(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
3756 {
3757     assert(tysize(e.Ety) == REGSIZE);
3758     const reg = findreg(*pretregs & allregs);
3759     getregs(cdb,mask(reg));
3760     // LEA reg,np[ESP]
3761     uint np = stackpush - e.EV.Vuns;        // stack delta to parameter
3762     cdb.genc1(LEA,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np);
3763     if (I64)
3764         code_orrex(cdb.last(), REX_W);
3765     fixresult(cdb, e, mask(reg), pretregs);
3766 }
3767 
3768 /******************************
3769  * Call function. All parameters have already been pushed onto the stack.
3770  * Params:
3771  *      e          = function call
3772  *      numpara    = size in bytes of all the parameters
3773  *      numalign   = amount the stack was aligned by before the parameters were pushed
3774  *      pretregs   = where return value goes
3775  *      keepmsk    = registers to not change when evaluating the function address
3776  *      usefuncarg = using cgstate.funcarg, so no need to adjust stack after func return
3777  */
3778 
3779 @trusted
3780 private void funccall(ref CodeBuilder cdb, elem* e, uint numpara, uint numalign,
3781                       regm_t* pretregs,regm_t keepmsk, bool usefuncarg)
3782 {
3783     //printf("funccall(e = %p, *pretregs = %s, numpara = %d, numalign = %d, usefuncarg=%d)\n",e,regm_str(*pretregs),numpara,numalign,usefuncarg);
3784     //printf("  from %s\n", funcsym_p.Sident.ptr);
3785     //elem_print(e);
3786     calledafunc = 1;
3787     // Determine if we need frame for function prolog/epilog
3788 
3789     if (config.memmodel == Vmodel)
3790     {
3791         if (tyfarfunc(funcsym_p.ty()))
3792             needframe = true;
3793     }
3794 
3795     code cs;
3796     regm_t retregs;
3797     Symbol* s;
3798 
3799     elem* e1 = e.EV.E1;
3800     tym_t tym1 = tybasic(e1.Ety);
3801     char farfunc = tyfarfunc(tym1) || tym1 == TYifunc;
3802 
3803     CodeBuilder cdbe;
3804     cdbe.ctor();
3805 
3806     if (e1.Eoper == OPvar)
3807     {   // Call function directly
3808 
3809         if (!tyfunc(tym1))
3810             printf("%s\n", tym_str(tym1));
3811         assert(tyfunc(tym1));
3812         s = e1.EV.Vsym;
3813         if (s.Sflags & SFLexit)
3814         { }
3815         else if (s != tls_get_addr_sym)
3816             save87(cdb);               // assume 8087 regs are all trashed
3817 
3818         // Function calls may throw Errors, unless marked that they don't
3819         if (s == funcsym_p || !s.Sfunc || !(s.Sfunc.Fflags3 & Fnothrow))
3820             funcsym_p.Sfunc.Fflags3 &= ~Fnothrow;
3821 
3822         if (s.Sflags & SFLexit)
3823         {
3824             // Function doesn't return, so don't worry about registers
3825             // it may use
3826         }
3827         else if (!tyfunc(s.ty()) || !(config.flags4 & CFG4optimized))
3828             // so we can replace func at runtime
3829             getregs(cdbe,~fregsaved & (mBP | ALLREGS | mES | XMMREGS));
3830         else
3831             getregs(cdbe,~s.Sregsaved & (mBP | ALLREGS | mES | XMMREGS));
3832         if (strcmp(s.Sident.ptr, "alloca") == 0)
3833         {
3834             s = getRtlsym(RTLSYM.ALLOCA);
3835             makeitextern(s);
3836             int areg = CX;
3837             if (config.exe == EX_WIN64)
3838                 areg = DX;
3839             getregs(cdbe, mask(areg));
3840             cdbe.genc(LEA, modregrm(2, areg, BPRM), FLallocatmp, 0, 0, 0);  // LEA areg,&localsize[BP]
3841             if (I64)
3842                 code_orrex(cdbe.last(), REX_W);
3843             Alloca.size = REGSIZE;
3844         }
3845         if (sytab[s.Sclass] & SCSS)    // if function is on stack (!)
3846         {
3847             retregs = allregs & ~keepmsk;
3848             s.Sflags &= ~GTregcand;
3849             s.Sflags |= SFLread;
3850             cdrelconst(cdbe,e1,&retregs);
3851             if (farfunc)
3852             {
3853                 const reg = findregmsw(retregs);
3854                 const lsreg = findreglsw(retregs);
3855                 floatreg = true;                // use float register
3856                 reflocal = true;
3857                 cdbe.genc1(0x89,                 // MOV floatreg+2,reg
3858                         modregrm(2, reg, BPRM), FLfltreg, REGSIZE);
3859                 cdbe.genc1(0x89,                 // MOV floatreg,lsreg
3860                         modregrm(2, lsreg, BPRM), FLfltreg, 0);
3861                 if (tym1 == TYifunc)
3862                     cdbe.gen1(0x9C);             // PUSHF
3863                 cdbe.genc1(0xFF,                 // CALL [floatreg]
3864                         modregrm(2, 3, BPRM), FLfltreg, 0);
3865             }
3866             else
3867             {
3868                 const reg = findreg(retregs);
3869                 cdbe.gen2(0xFF, modregrmx(3, 2, reg));   // CALL reg
3870                 if (I64)
3871                     code_orrex(cdbe.last(), REX_W);
3872             }
3873         }
3874         else
3875         {
3876             FL fl = FLfunc;
3877             if (!tyfunc(s.ty()))
3878                 fl = el_fl(e1);
3879             if (tym1 == TYifunc)
3880                 cdbe.gen1(0x9C);                             // PUSHF
3881             if (config.exe & (EX_windos | EX_OSX | EX_OSX64))
3882             {
3883                 cdbe.gencs(farfunc ? 0x9A : 0xE8,0,fl,s);    // CALL extern
3884             }
3885             else
3886             {
3887                 assert(!farfunc);
3888                 if (s != tls_get_addr_sym)
3889                 {
3890                     //printf("call %s\n", s.Sident.ptr);
3891                     load_localgot(cdb);
3892                     cdbe.gencs(0xE8, 0, fl, s);    // CALL extern
3893                 }
3894                 else if (I64)
3895                 {
3896                     /* Prepend 66 66 48 so GNU linker has patch room
3897                      */
3898                     assert(!farfunc);
3899                     cdbe.gen1(0x66);
3900                     cdbe.gen1(0x66);
3901                     cdbe.gencs(0xE8, 0, fl, s);      // CALL extern
3902                     cdbe.last().Irex = REX | REX_W;
3903                 }
3904                 else
3905                     cdbe.gencs(0xE8, 0, fl, s);    // CALL extern
3906             }
3907             code_orflag(cdbe.last(), farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff));
3908         }
3909     }
3910     else
3911     {   // Call function via pointer
3912 
3913         // Function calls may throw Errors
3914         funcsym_p.Sfunc.Fflags3 &= ~Fnothrow;
3915 
3916         if (e1.Eoper != OPind) { WRFL(el_fl(e1)); printf("e1.Eoper: %s\n", oper_str(e1.Eoper)); }
3917         save87(cdb);                   // assume 8087 regs are all trashed
3918         assert(e1.Eoper == OPind);
3919         elem *e11 = e1.EV.E1;
3920         tym_t e11ty = tybasic(e11.Ety);
3921         assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr)));
3922         load_localgot(cdb);
3923         if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // 32 bit only
3924         {
3925             if (config.flags3 & CFG3pic)
3926                 keepmsk |= mBX;
3927         }
3928 
3929         /* Mask of registers destroyed by the function call
3930          */
3931         regm_t desmsk = (mBP | ALLREGS | mES | XMMREGS) & ~fregsaved;
3932 
3933         // if we can't use loadea()
3934         if ((!OTleaf(e11.Eoper) || e11.Eoper == OPconst) &&
3935             (e11.Eoper != OPind || e11.Ecount))
3936         {
3937             retregs = allregs & ~keepmsk;
3938             cgstate.stackclean++;
3939             scodelem(cdbe,e11,&retregs,keepmsk,true);
3940             cgstate.stackclean--;
3941             // Kill registers destroyed by an arbitrary function call
3942             getregs(cdbe,desmsk);
3943             if (e11ty == TYfptr)
3944             {
3945                 const reg = findregmsw(retregs);
3946                 const lsreg = findreglsw(retregs);
3947                 floatreg = true;                // use float register
3948                 reflocal = true;
3949                 cdbe.genc1(0x89,                 // MOV floatreg+2,reg
3950                         modregrm(2, reg, BPRM), FLfltreg, REGSIZE);
3951                 cdbe.genc1(0x89,                 // MOV floatreg,lsreg
3952                         modregrm(2, lsreg, BPRM), FLfltreg, 0);
3953                 if (tym1 == TYifunc)
3954                     cdbe.gen1(0x9C);             // PUSHF
3955                 cdbe.genc1(0xFF,                 // CALL [floatreg]
3956                         modregrm(2, 3, BPRM), FLfltreg, 0);
3957             }
3958             else
3959             {
3960                 const reg = findreg(retregs);
3961                 cdbe.gen2(0xFF, modregrmx(3, 2, reg));   // CALL reg
3962                 if (I64)
3963                     code_orrex(cdbe.last(), REX_W);
3964             }
3965         }
3966         else
3967         {
3968             if (tym1 == TYifunc)
3969                 cdb.gen1(0x9C);                 // PUSHF
3970                                                 // CALL [function]
3971             cs.Iflags = 0;
3972             cgstate.stackclean++;
3973             loadea(cdbe, e11, &cs, 0xFF, farfunc ? 3 : 2, 0, keepmsk, desmsk);
3974             cgstate.stackclean--;
3975             freenode(e11);
3976         }
3977         s = null;
3978     }
3979     cdb.append(cdbe);
3980     freenode(e1);
3981 
3982     /* See if we will need the frame pointer.
3983        Calculate it here so we can possibly use BP to fix the stack.
3984      */
3985 static if (0)
3986 {
3987     if (!needframe)
3988     {
3989         // If there is a register available for this basic block
3990         if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used))
3991         { }
3992         else
3993         {
3994             for (SYMIDX si = 0; si < globsym.length; si++)
3995             {
3996                 Symbol* s = globsym[si];
3997 
3998                 if (s.Sflags & GTregcand && type_size(s.Stype) != 0)
3999                 {
4000                     if (config.flags4 & CFG4optimized)
4001                     {   // If symbol is live in this basic block and
4002                         // isn't already in a register
4003                         if (s.Srange && vec_testbit(dfoidx, s.Srange) &&
4004                             s.Sfl != FLreg)
4005                         {   // Then symbol must be allocated on stack
4006                             needframe = true;
4007                             break;
4008                         }
4009                     }
4010                     else
4011                     {   if (mfuncreg == 0)      // if no registers left
4012                         {   needframe = true;
4013                             break;
4014                         }
4015                     }
4016                 }
4017             }
4018         }
4019     }
4020 }
4021 
4022     reg_t reg1, reg2;
4023     retregs = allocretregs(e.Ety, e.ET, tym1, reg1, reg2);
4024 
4025     assert(retregs || !*pretregs);
4026 
4027     if (!usefuncarg)
4028     {
4029         // If stack needs cleanup
4030         if  (s && s.Sflags & SFLexit)
4031         {
4032             if (config.fulltypes && TARGET_WINDOS)
4033             {
4034                 // the stack walker evaluates the return address, not a byte of the
4035                 // call instruction, so ensure there is an instruction byte after
4036                 // the call that still has the same line number information
4037                 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3);
4038             }
4039             /* Function never returns, so don't need to generate stack
4040              * cleanup code. But still need to log the stack cleanup
4041              * as if it did return.
4042              */
4043             cdb.genadjesp(-(numpara + numalign));
4044             stackpush -= numpara + numalign;
4045         }
4046         else if ((OTbinary(e.Eoper) || config.exe == EX_WIN64) &&
4047             (!typfunc(tym1) || config.exe == EX_WIN64))
4048         {
4049             if (tym1 == TYhfunc)
4050             {   // Hidden parameter is popped off by the callee
4051                 cdb.genadjesp(-REGSIZE);
4052                 stackpush -= REGSIZE;
4053                 if (numpara + numalign > REGSIZE)
4054                     genstackclean(cdb, numpara + numalign - REGSIZE, retregs);
4055             }
4056             else
4057                 genstackclean(cdb, numpara + numalign, retregs);
4058         }
4059         else
4060         {
4061             cdb.genadjesp(-numpara);  // popped off by the callee's 'RET numpara'
4062             stackpush -= numpara;
4063             if (numalign)               // callee doesn't know about alignment adjustment
4064                 genstackclean(cdb,numalign,retregs);
4065         }
4066     }
4067 
4068     /* Special handling for functions which return a floating point
4069        value in the top of the 8087 stack.
4070      */
4071 
4072     if (retregs & mST0)
4073     {
4074         cdb.genadjfpu(1);
4075         if (*pretregs)                  // if we want the result
4076         {
4077             //assert(global87.stackused == 0);
4078             push87(cdb);                // one item on 8087 stack
4079             fixresult87(cdb,e,retregs,pretregs);
4080             return;
4081         }
4082         else
4083             // Pop unused result off 8087 stack
4084             cdb.gen2(0xDD, modregrm(3, 3, 0));           // FPOP
4085     }
4086     else if (retregs & mST01)
4087     {
4088         cdb.genadjfpu(2);
4089         if (*pretregs)                  // if we want the result
4090         {
4091             assert(global87.stackused == 0);
4092             push87(cdb);
4093             push87(cdb);                // two items on 8087 stack
4094             fixresult_complex87(cdb, e, retregs, pretregs, true);
4095             return;
4096         }
4097         else
4098         {
4099             // Pop unused result off 8087 stack
4100             cdb.gen2(0xDD, modregrm(3, 3, 0));           // FPOP
4101             cdb.gen2(0xDD, modregrm(3, 3, 0));           // FPOP
4102         }
4103     }
4104 
4105     /* Special handling for functions that return one part
4106        in XMM0 and the other part in AX
4107      */
4108     if (*pretregs && retregs)
4109     {
4110         if (reg1 == NOREG || reg2 == NOREG)
4111         {}
4112         else if ((0 == (mask(reg1) & XMMREGS)) ^ (0 == (mask(reg2) & XMMREGS)))
4113         {
4114             reg_t lreg, mreg;
4115             if (mask(reg1) & XMMREGS)
4116             {
4117                 lreg = XMM0;
4118                 mreg = XMM1;
4119             }
4120             else
4121             {
4122                 lreg = mask(reg1) & mLSW ? reg1 : AX;
4123                 mreg = mask(reg2) & mMSW ? reg2 : DX;
4124             }
4125             for (int v = 0; v < 2; v++)
4126             {
4127                 if (v ^ (reg2 != lreg))
4128                     genmovreg(cdb,lreg,reg1);
4129                 else
4130                     genmovreg(cdb,mreg,reg2);
4131             }
4132             retregs = mask(lreg) | mask(mreg);
4133         }
4134     }
4135 
4136     /* Special handling for functions which return complex float in XMM0 or RAX. */
4137 
4138     if (I64
4139         && config.exe != EX_WIN64 // broken
4140         && *pretregs && tybasic(e.Ety) == TYcfloat)
4141     {
4142         assert(reg2 == NOREG);
4143         // spill
4144         if (config.exe == EX_WIN64)
4145         {
4146             assert(reg1 == AX);
4147             cdb.genfltreg(STO, reg1, 0);
4148             code_orrex(cdb.last(), REX_W);
4149         }
4150         else
4151         {
4152             assert(reg1 == XMM0);
4153             cdb.genxmmreg(xmmstore(TYdouble), reg1, 0, TYdouble);
4154         }
4155         // reload real
4156         push87(cdb);
4157         cdb.genfltreg(0xD9, 0, 0);
4158         genfwait(cdb);
4159         // reload imaginary
4160         push87(cdb);
4161         cdb.genfltreg(0xD9, 0, tysize(TYfloat));
4162         genfwait(cdb);
4163 
4164         retregs = mST01;
4165     }
4166 
4167     fixresult(cdb, e, retregs, pretregs);
4168 }
4169 
4170 /***************************
4171  * Determine size of argument e that will be pushed.
4172  */
4173 
4174 @trusted
4175 targ_size_t paramsize(elem* e, tym_t tyf)
4176 {
4177     assert(e.Eoper != OPparam);
4178     targ_size_t szb;
4179     tym_t tym = tybasic(e.Ety);
4180     if (tyscalar(tym))
4181         szb = size(tym);
4182     else if (tym == TYstruct || tym == TYarray)
4183         szb = type_parameterSize(e.ET, tyf);
4184     else
4185     {
4186         printf("%s\n", tym_str(tym));
4187         assert(0);
4188     }
4189     return szb;
4190 }
4191 
4192 /***************************
4193  * Generate code to move argument e on the stack.
4194  */
4195 
4196 @trusted
4197 private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funcargtos, tym_t tyf)
4198 {
4199     //printf("movParams(e = %p, stackalign = %d, funcargtos = %d)\n", e, stackalign, funcargtos);
4200     //printf("movParams()\n"); elem_print(e);
4201     assert(!I16);
4202     assert(e && e.Eoper != OPparam);
4203 
4204     tym_t tym = tybasic(e.Ety);
4205     if (tyfloating(tym))
4206         objmod.fltused();
4207 
4208     int grex = I64 ? REX_W << 16 : 0;
4209 
4210     targ_size_t szb = paramsize(e, tyf);          // size before alignment
4211     targ_size_t sz = _align(stackalign, szb);       // size after alignment
4212     assert((sz & (stackalign - 1)) == 0);         // ensure that alignment worked
4213     assert((sz & (REGSIZE - 1)) == 0);
4214     //printf("szb = %d sz = %d\n", cast(int)szb, cast(int)sz);
4215 
4216     code cs;
4217     cs.Iflags = 0;
4218     cs.Irex = 0;
4219     switch (e.Eoper)
4220     {
4221         case OPstrctor:
4222         case OPstrthis:
4223         case OPstrpar:
4224         case OPnp_fp:
4225             assert(0);
4226 
4227         case OPrelconst:
4228         {
4229             int fl;
4230             if (!evalinregister(e) &&
4231                 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) &&
4232                 ((fl = el_fl(e)) == FLdata || fl == FLudata || fl == FLextern)
4233                )
4234             {
4235                 // MOV -stackoffset[EBP],&variable
4236                 cs.Iop = 0xC7;
4237                 cs.Irm = modregrm(2,0,BPRM);
4238                 if (I64 && sz == 8)
4239                     cs.Irex |= REX_W;
4240                 cs.IFL1 = FLfuncarg;
4241                 cs.IEV1.Voffset = funcargtos - REGSIZE;
4242                 cs.IEV2.Voffset = e.EV.Voffset;
4243                 cs.IFL2 = cast(ubyte)fl;
4244                 cs.IEV2.Vsym = e.EV.Vsym;
4245                 cs.Iflags |= CFoff;
4246                 cdb.gen(&cs);
4247                 return;
4248             }
4249             break;
4250         }
4251 
4252         case OPconst:
4253             if (!evalinregister(e))
4254             {
4255                 cs.Iop = (sz == 1) ? 0xC6 : 0xC7;
4256                 cs.Irm = modregrm(2,0,BPRM);
4257                 cs.IFL1 = FLfuncarg;
4258                 cs.IEV1.Voffset = funcargtos - sz;
4259                 cs.IFL2 = FLconst;
4260                 targ_size_t *p = cast(targ_size_t *) &(e.EV);
4261                 cs.IEV2.Vsize_t = *p;
4262                 if (I64 && tym == TYcldouble)
4263                     // The alignment of EV.Vcldouble is not the same on the compiler
4264                     // as on the target
4265                     goto Lbreak;
4266                 if (I64 && sz >= 8)
4267                 {
4268                     int i = cast(int)sz;
4269                     do
4270                     {
4271                         if (*p >= 0x80000000)
4272                         {   // Use 64 bit register MOV, as the 32 bit one gets sign extended
4273                             // MOV reg,imm64
4274                             // MOV EA,reg
4275                             goto Lbreak;
4276                         }
4277                         p = cast(targ_size_t *)(cast(char *) p + REGSIZE);
4278                         i -= REGSIZE;
4279                     } while (i > 0);
4280                     p = cast(targ_size_t *) &(e.EV);
4281                 }
4282 
4283                 int i = cast(int)sz;
4284                 do
4285                 {   int regsize = REGSIZE;
4286                     regm_t retregs = (sz == 1) ? BYTEREGS : allregs;
4287                     reg_t reg;
4288                     if (reghasvalue(retregs,*p,reg))
4289                     {
4290                         cs.Iop = (cs.Iop & 1) | 0x88;
4291                         cs.Irm |= modregrm(0, reg & 7, 0); // MOV EA,reg
4292                         if (reg & 8)
4293                             cs.Irex |= REX_R;
4294                         if (I64 && sz == 1 && reg >= 4)
4295                             cs.Irex |= REX;
4296                     }
4297                     if (I64 && sz >= 8)
4298                         cs.Irex |= REX_W;
4299                     cdb.gen(&cs);           // MOV EA,const
4300 
4301                     p = cast(targ_size_t *)(cast(char *) p + regsize);
4302                     cs.Iop = 0xC7;
4303                     cs.Irm &= cast(ubyte)~cast(int)modregrm(0, 7, 0);
4304                     cs.Irex &= ~REX_R;
4305                     cs.IEV1.Voffset += regsize;
4306                     cs.IEV2.Vint = cast(targ_int)*p;
4307                     i -= regsize;
4308                 } while (i > 0);
4309                 return;
4310             }
4311 
4312         Lbreak:
4313             break;
4314 
4315         default:
4316             break;
4317     }
4318     regm_t retregs = tybyte(tym) ? BYTEREGS : allregs;
4319     if (tyvector(tym) ||
4320         config.fpxmmregs && tyxmmreg(tym) &&
4321         // If not already in x87 register from function call return
4322         !((e.Eoper == OPcall || e.Eoper == OPucall) && I32))
4323     {
4324         retregs = XMMREGS;
4325         codelem(cdb, e, &retregs, false);
4326         const op = xmmstore(tym);
4327         const r = findreg(retregs);
4328         cdb.genc1(op, modregxrm(2, r - XMM0, BPRM), FLfuncarg, funcargtos - sz);   // MOV funcarg[EBP],r
4329         checkSetVex(cdb.last(),tym);
4330         return;
4331     }
4332     else if (tyfloating(tym))
4333     {
4334         if (config.inline8087)
4335         {
4336             retregs = tycomplex(tym) ? mST01 : mST0;
4337             codelem(cdb, e, &retregs, false);
4338 
4339             opcode_t op;
4340             uint r;
4341             switch (tym)
4342             {
4343                 case TYfloat:
4344                 case TYifloat:
4345                 case TYcfloat:
4346                     op = 0xD9;
4347                     r = 3;
4348                     break;
4349 
4350                 case TYdouble:
4351                 case TYidouble:
4352                 case TYdouble_alias:
4353                 case TYcdouble:
4354                     op = 0xDD;
4355                     r = 3;
4356                     break;
4357 
4358                 case TYldouble:
4359                 case TYildouble:
4360                 case TYcldouble:
4361                     op = 0xDB;
4362                     r = 7;
4363                     break;
4364 
4365                 default:
4366                     assert(0);
4367             }
4368             if (tycomplex(tym))
4369             {
4370                 // FSTP sz/2[ESP]
4371                 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz/2);
4372                 pop87();
4373             }
4374             pop87();
4375             cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz);    // FSTP -sz[EBP]
4376             return;
4377         }
4378     }
4379     scodelem(cdb, e, &retregs, 0, true);
4380     if (sz <= REGSIZE)
4381     {
4382         uint r = findreg(retregs);
4383         cdb.genc1(0x89, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE);   // MOV -REGSIZE[EBP],r
4384         if (sz == 8)
4385             code_orrex(cdb.last(), REX_W);
4386     }
4387     else if (sz == REGSIZE * 2)
4388     {
4389         uint r = findregmsw(retregs);
4390         cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE);    // MOV -REGSIZE[EBP],r
4391         r = findreglsw(retregs);
4392         cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE * 2); // MOV -2*REGSIZE[EBP],r
4393     }
4394     else
4395         assert(0);
4396 }
4397 
4398 
4399 /***************************
4400  * Generate code to push argument e on the stack.
4401  * stackpush is incremented by stackalign for each PUSH.
4402  */
4403 
4404 @trusted
4405 void pushParams(ref CodeBuilder cdb, elem* e, uint stackalign, tym_t tyf)
4406 {
4407     //printf("params(e = %p, stackalign = %d)\n", e, stackalign);
4408     //printf("params()\n"); elem_print(e);
4409     stackchanged = 1;
4410     assert(e && e.Eoper != OPparam);
4411 
4412     tym_t tym = tybasic(e.Ety);
4413     if (tyfloating(tym))
4414         objmod.fltused();
4415 
4416     int grex = I64 ? REX_W << 16 : 0;
4417 
4418     targ_size_t szb = paramsize(e, tyf);          // size before alignment
4419     targ_size_t sz = _align(stackalign,szb);      // size after alignment
4420     assert((sz & (stackalign - 1)) == 0);         // ensure that alignment worked
4421     assert((sz & (REGSIZE - 1)) == 0);
4422 
4423     switch (e.Eoper)
4424     {
4425         case OPstrpar:
4426         {
4427             uint rm;
4428 
4429             elem* e1 = e.EV.E1;
4430             if (sz == 0)
4431             {
4432                 docommas(cdb, e1); // skip over any commas
4433 
4434                 const stackpushsave = stackpush;
4435                 const stackcleansave = cgstate.stackclean;
4436                 cgstate.stackclean = 0;
4437 
4438                 regm_t retregs = 0;
4439                 codelem(cdb,e1,&retregs,true);
4440 
4441                 assert(cgstate.stackclean == 0);
4442                 cgstate.stackclean = stackcleansave;
4443                 genstackclean(cdb,stackpush - stackpushsave,0);
4444 
4445                 freenode(e);
4446                 return;
4447             }
4448             if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1.Eoper == OPvar)
4449             {
4450                 freenode(e);
4451                 e = e1;
4452                 goto L1;
4453             }
4454             docommas(cdb, e1);             // skip over any commas
4455             code_flags_t seg = 0;          // assume no seg override
4456             regm_t retregs = sz ? IDXREGS : 0;
4457             bool doneoff = false;
4458             uint pushsize = REGSIZE;
4459             uint op16 = 0;
4460             if (!I16 && sz & 2)     // if odd number of words to push
4461             {
4462                 pushsize = 2;
4463                 op16 = 1;
4464             }
4465             else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0)
4466             {
4467                 pushsize = 4;       // push DWORDs at a time
4468                 op16 = 1;
4469             }
4470             uint npushes = cast(uint)(sz / pushsize);
4471             switch (e1.Eoper)
4472             {
4473                 case OPind:
4474                     if (sz)
4475                     {
4476                         switch (tybasic(e1.EV.E1.Ety))
4477                         {
4478                             case TYfptr:
4479                             case TYhptr:
4480                                 seg = CFes;
4481                                 retregs |= mES;
4482                                 break;
4483 
4484                             case TYsptr:
4485                                 if (config.wflags & WFssneds)
4486                                     seg = CFss;
4487                                 break;
4488 
4489                             case TYfgPtr:
4490                                 if (I32)
4491                                      seg = CFgs;
4492                                 else if (I64)
4493                                      seg = CFfs;
4494                                 else
4495                                      assert(0);
4496                                 break;
4497 
4498                             case TYcptr:
4499                                 seg = CFcs;
4500                                 break;
4501 
4502                             default:
4503                                 break;
4504                         }
4505                     }
4506                     codelem(cdb, e1.EV.E1, &retregs, false);
4507                     freenode(e1);
4508                     break;
4509 
4510                 case OPvar:
4511                     /* Symbol is no longer a candidate for a register */
4512                     e1.EV.Vsym.Sflags &= ~GTregcand;
4513 
4514                     if (!e1.Ecount && npushes > 4)
4515                     {
4516                         /* Kludge to point at last word in struct. */
4517                         /* Don't screw up CSEs.                 */
4518                         e1.EV.Voffset += sz - pushsize;
4519                         doneoff = true;
4520                     }
4521                     //if (LARGEDATA) /* if default isn't DS */
4522                     {
4523                         static immutable uint[4] segtocf = [ CFes,CFcs,CFss,0 ];
4524 
4525                         int fl = el_fl(e1);
4526                         if (fl == FLfardata)
4527                         {
4528                             seg = CFes;
4529                             retregs |= mES;
4530                         }
4531                         else
4532                         {
4533                             uint s = segfl[fl];
4534                             assert(s < 4);
4535                             seg = segtocf[s];
4536                             if (seg == CFss && !(config.wflags & WFssneds))
4537                                 seg = 0;
4538                         }
4539                     }
4540                     if (e1.Ety & mTYfar)
4541                     {
4542                         seg = CFes;
4543                         retregs |= mES;
4544                     }
4545                     cdrelconst(cdb, e1, &retregs);
4546                     // Reverse the effect of the previous add
4547                     if (doneoff)
4548                         e1.EV.Voffset -= sz - pushsize;
4549                     freenode(e1);
4550                     break;
4551 
4552                 case OPstreq:
4553                 //case OPcond:
4554                     if (config.exe & EX_segmented)
4555                     {
4556                         seg = CFes;
4557                         retregs |= mES;
4558                     }
4559                     codelem(cdb, e1, &retregs, false);
4560                     break;
4561 
4562                 case OPpair:
4563                 case OPrpair:
4564                     pushParams(cdb, e1, stackalign, tyf);
4565                     freenode(e);
4566                     return;
4567 
4568                 default:
4569                     elem_print(e1);
4570                     assert(0);
4571             }
4572             reg_t reg = findreglsw(retregs);
4573             rm = I16 ? regtorm[reg] : regtorm32[reg];
4574             if (op16)
4575                 seg |= CFopsize;            // operand size
4576             if (npushes <= 4)
4577             {
4578                 assert(!doneoff);
4579                 for (; npushes > 1; --npushes)
4580                 {
4581                     cdb.genc1(0xFF, buildModregrm(2, 6, rm), FLconst, pushsize * (npushes - 1));  // PUSH [reg]
4582                     code_orflag(cdb.last(),seg);
4583                     cdb.genadjesp(pushsize);
4584                 }
4585                 cdb.gen2(0xFF,buildModregrm(0, 6, rm));     // PUSH [reg]
4586                 cdb.last().Iflags |= seg;
4587                 cdb.genadjesp(pushsize);
4588             }
4589             else if (sz)
4590             {
4591                 getregs_imm(cdb, mCX | retregs);
4592                                                     // MOV CX,sz/2
4593                 movregconst(cdb, CX, npushes, 0);
4594                 if (!doneoff)
4595                 {   // This should be done when
4596                     // reg is loaded. Fix later
4597                                                     // ADD reg,sz-pushsize
4598                     cdb.genc2(0x81, grex | modregrmx(3, 0, reg), sz-pushsize);
4599                 }
4600                 getregs(cdb,mCX);                       // the LOOP decrements it
4601                 cdb.gen2(0xFF, buildModregrm(0, 6, rm));   // PUSH [reg]
4602                 cdb.last().Iflags |= seg | CFtarg2;
4603                 code* c3 = cdb.last();
4604                 cdb.genc2(0x81,grex | buildModregrm(3, 5,reg), pushsize);  // SUB reg,pushsize
4605                 if (I16 || config.flags4 & CFG4space)
4606                     genjmp(cdb,0xE2,FLcode,cast(block *)c3);// LOOP c3
4607                 else
4608                 {
4609                     if (I64)
4610                         cdb.gen2(0xFF, modregrm(3, 1, CX));// DEC CX
4611                     else
4612                         cdb.gen1(0x48 + CX);            // DEC CX
4613                     genjmp(cdb, JNE, FLcode, cast(block *)c3); // JNE c3
4614                 }
4615                 regimmed_set(CX,0);
4616                 cdb.genadjesp(cast(int)sz);
4617             }
4618             stackpush += sz;
4619             freenode(e);
4620             return;
4621         }
4622 
4623         case OPind:
4624             if (!e.Ecount)                         /* if *e1       */
4625             {
4626                 if (sz < REGSIZE)
4627                 {
4628                     /* Don't push REGSIZE quantity because it may
4629                      * straddle past the end of valid memory
4630                      */
4631                     break;
4632                 }
4633                 if (sz == REGSIZE)
4634                     goto case OPvar;    // handle it with loadea()
4635 
4636                 // Avoid PUSH MEM on the Pentium when optimizing for speed
4637                 if (config.flags4 & CFG4speed &&
4638                     (config.target_cpu >= TARGET_80486 &&
4639                      config.target_cpu <= TARGET_PentiumMMX) &&
4640                     sz <= 2 * REGSIZE &&
4641                     !tyfloating(tym))
4642                     break;
4643 
4644                 if (tym == TYldouble || tym == TYildouble || tycomplex(tym))
4645                     break;
4646 
4647                 code cs;
4648                 cs.Iflags = 0;
4649                 cs.Irex = 0;
4650                 if (I32)
4651                 {
4652                     assert(sz >= REGSIZE * 2);
4653                     loadea(cdb, e, &cs, 0xFF, 6, sz - REGSIZE, 0, 0); // PUSH EA+4
4654                     cdb.genadjesp(REGSIZE);
4655                     stackpush += REGSIZE;
4656                     sz -= REGSIZE;
4657 
4658                     if (sz > REGSIZE)
4659                     {
4660                         while (sz)
4661                         {
4662                             cs.IEV1.Voffset -= REGSIZE;
4663                             cdb.gen(&cs);                    // PUSH EA+...
4664                             cdb.genadjesp(REGSIZE);
4665                             stackpush += REGSIZE;
4666                             sz -= REGSIZE;
4667                         }
4668                         freenode(e);
4669                         return;
4670                     }
4671                 }
4672                 else
4673                 {
4674                     if (sz == DOUBLESIZE)
4675                     {
4676                         loadea(cdb, e, &cs, 0xFF, 6, DOUBLESIZE - REGSIZE, 0, 0); // PUSH EA+6
4677                         cs.IEV1.Voffset -= REGSIZE;
4678                         cdb.gen(&cs);                    // PUSH EA+4
4679                         cdb.genadjesp(REGSIZE);
4680                         getlvalue_lsw(&cs);
4681                         cdb.gen(&cs);                    // PUSH EA+2
4682                     }
4683                     else /* TYlong */
4684                         loadea(cdb, e, &cs, 0xFF, 6, REGSIZE, 0, 0); // PUSH EA+2
4685                     cdb.genadjesp(REGSIZE);
4686                 }
4687                 stackpush += sz;
4688                 getlvalue_lsw(&cs);
4689                 cdb.gen(&cs);                            // PUSH EA
4690                 cdb.genadjesp(REGSIZE);
4691                 freenode(e);
4692                 return;
4693             }
4694             break;
4695 
4696         case OPnp_fp:
4697             if (!e.Ecount)                         /* if (far *)e1 */
4698             {
4699                 elem* e1 = e.EV.E1;
4700                 tym_t tym1 = tybasic(e1.Ety);
4701                 /* BUG: what about pointers to functions?   */
4702                 int segreg;
4703                 switch (tym1)
4704                 {
4705                     case TYnptr: segreg = 3<<3; break;
4706                     case TYcptr: segreg = 1<<3; break;
4707                     default:     segreg = 2<<3; break;
4708                 }
4709                 if (I32 && stackalign == 2)
4710                     cdb.gen1(0x66);                 // push a word
4711                 cdb.gen1(0x06 + segreg);            // PUSH SEGREG
4712                 if (I32 && stackalign == 2)
4713                     code_orflag(cdb.last(), CFopsize);        // push a word
4714                 cdb.genadjesp(stackalign);
4715                 stackpush += stackalign;
4716                 pushParams(cdb, e1, stackalign, tyf);
4717                 freenode(e);
4718                 return;
4719             }
4720             break;
4721 
4722         case OPrelconst:
4723             if (config.exe & EX_segmented)
4724             {
4725                 /* Determine if we can just push the segment register           */
4726                 /* Test size of type rather than TYfptr because of (long)(&v)   */
4727                 Symbol* s = e.EV.Vsym;
4728                 //if (sytab[s.Sclass] & SCSS && !I32)  // if variable is on stack
4729                 //    needframe = true;                 // then we need stack frame
4730                 int fl;
4731                 if (_tysize[tym] == tysize(TYfptr) &&
4732                     (fl = s.Sfl) != FLfardata &&
4733                     /* not a function that CS might not be the segment of       */
4734                     (!((fl == FLfunc || s.ty() & mTYcs) &&
4735                       (s.Sclass == SC.comdat || s.Sclass == SC.extern_ ||
4736                        s.Sclass == SC.inline || config.wflags & WFthunk)) ||
4737                      (fl == FLfunc && config.exe == EX_DOSX)
4738                     )
4739                    )
4740                 {
4741                     stackpush += sz;
4742                     cdb.gen1(0x06 +           // PUSH SEGREG
4743                             (((fl == FLfunc || s.ty() & mTYcs) ? 1 : segfl[fl]) << 3));
4744                     cdb.genadjesp(REGSIZE);
4745 
4746                     if (config.target_cpu >= TARGET_80286 && !e.Ecount)
4747                     {
4748                         getoffset(cdb, e, STACK);
4749                         freenode(e);
4750                         return;
4751                     }
4752                     else
4753                     {
4754                         regm_t retregs;
4755                         offsetinreg(cdb, e, &retregs);
4756                         const reg = findreg(retregs);
4757                         genpush(cdb,reg);                    // PUSH reg
4758                         cdb.genadjesp(REGSIZE);
4759                     }
4760                     return;
4761                 }
4762                 if (config.target_cpu >= TARGET_80286 && !e.Ecount)
4763                 {
4764                     stackpush += sz;
4765                     if (_tysize[tym] == tysize(TYfptr))
4766                     {
4767                         // PUSH SEG e
4768                         cdb.gencs(0x68,0,FLextern,s);
4769                         cdb.last().Iflags = CFseg;
4770                         cdb.genadjesp(REGSIZE);
4771                     }
4772                     getoffset(cdb, e, STACK);
4773                     freenode(e);
4774                     return;
4775                 }
4776             }
4777             break;                          /* else must evaluate expression */
4778 
4779         case OPvar:
4780         L1:
4781             if (config.flags4 & CFG4speed &&
4782                      (config.target_cpu >= TARGET_80486 &&
4783                       config.target_cpu <= TARGET_PentiumMMX) &&
4784                      sz <= 2 * REGSIZE &&
4785                      !tyfloating(tym))
4786             {   // Avoid PUSH MEM on the Pentium when optimizing for speed
4787                 break;
4788             }
4789             else if (movOnly(e) || (tyxmmreg(tym) && config.fpxmmregs) || tyvector(tym))
4790                 break;                      // no PUSH MEM
4791             else
4792             {
4793                 int regsize = REGSIZE;
4794                 uint flag = 0;
4795                 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 &&
4796                     !e.Ecount)
4797                 {
4798                     regsize = 4;
4799                     flag |= CFopsize;
4800                 }
4801                 code cs;
4802                 cs.Iflags = 0;
4803                 cs.Irex = 0;
4804                 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0);    // PUSH EA+sz-2
4805                 code_orflag(cdb.last(), flag);
4806                 cdb.genadjesp(REGSIZE);
4807                 stackpush += sz;
4808                 while (cast(targ_int)(sz -= regsize) > 0)
4809                 {
4810                     loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0);
4811                     code_orflag(cdb.last(), flag);
4812                     cdb.genadjesp(REGSIZE);
4813                 }
4814                 freenode(e);
4815                 return;
4816             }
4817 
4818         case OPconst:
4819         {
4820             char pushi = 0;
4821             uint flag = 0;
4822             int regsize = REGSIZE;
4823 
4824             if (tycomplex(tym))
4825                 break;
4826 
4827             if (I64 && tyfloating(tym) && sz > 4 && boolres(e))
4828                 // Can't push 64 bit non-zero args directly
4829                 break;
4830 
4831             if (I32 && szb == 10)           // special case for long double constants
4832             {
4833                 assert(sz == 12);
4834                 targ_int value = e.EV.Vushort8[4]; // pick upper 2 bytes of Vldouble
4835                 stackpush += sz;
4836                 cdb.genadjesp(cast(int)sz);
4837                 for (int i = 0; i < 3; ++i)
4838                 {
4839                     reg_t reg;
4840                     if (reghasvalue(allregs, value, reg))
4841                         cdb.gen1(0x50 + reg);           // PUSH reg
4842                     else
4843                         cdb.genc2(0x68,0,value);        // PUSH value
4844                     value = e.EV.Vulong4[i ^ 1];       // treat Vldouble as 2 element array of 32 bit uint
4845                 }
4846                 freenode(e);
4847                 return;
4848             }
4849 
4850             assert(I64 || sz <= tysize(TYldouble));
4851             int i = cast(int)sz;
4852             if (!I16 && i == 2)
4853                 flag = CFopsize;
4854 
4855             if (config.target_cpu >= TARGET_80286)
4856     //       && (e.Ecount == 0 || e.Ecount != e.Ecomsub))
4857             {
4858                 pushi = 1;
4859                 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4)
4860                 {
4861                     regsize = 4;
4862                     flag = CFopsize;
4863                 }
4864             }
4865             else if (i == REGSIZE)
4866                 break;
4867 
4868             stackpush += sz;
4869             cdb.genadjesp(cast(int)sz);
4870             targ_uns* pi = &e.EV.Vuns;     // point to start of Vdouble
4871             targ_ushort* ps = cast(targ_ushort *) pi;
4872             targ_ullong* pl = cast(targ_ullong *)pi;
4873             i /= regsize;
4874             do
4875             {
4876                 if (i)                      /* be careful not to go negative */
4877                     i--;
4878 
4879                 targ_size_t value;
4880                 switch (regsize)
4881                 {
4882                     case 2:
4883                         value = ps[i];
4884                         break;
4885 
4886                     case 4:
4887                         if (tym == TYldouble || tym == TYildouble)
4888                             /* The size is 10 bytes, and since we have 2 bytes left over,
4889                              * just read those 2 bytes, not 4.
4890                              * Otherwise we're reading uninitialized data.
4891                              * I.e. read 4 bytes, 4 bytes, then 2 bytes
4892                              */
4893                             value = i == 2 ? ps[4] : pi[i]; // 80 bits
4894                         else
4895                             value = pi[i];
4896                         break;
4897 
4898                     case 8:
4899                         value = cast(targ_size_t)pl[i];
4900                         break;
4901 
4902                     default:
4903                         assert(0);
4904                 }
4905 
4906                 reg_t reg;
4907                 if (pushi)
4908                 {
4909                     if (I64 && regsize == 8 && value != cast(int)value)
4910                     {
4911                         regwithvalue(cdb,allregs,value,reg,64);
4912                         goto Preg;          // cannot push imm64 unless it is sign extended 32 bit value
4913                     }
4914                     if (regsize == REGSIZE && reghasvalue(allregs,value,reg))
4915                         goto Preg;
4916                     cdb.genc2((szb == 1) ? 0x6A : 0x68, 0, value); // PUSH value
4917                 }
4918                 else
4919                 {
4920                     regwithvalue(cdb, allregs, value, reg, 0);
4921                 Preg:
4922                     genpush(cdb,reg);         // PUSH reg
4923                 }
4924                 code_orflag(cdb.last(), flag);              // operand size
4925             } while (i);
4926             freenode(e);
4927             return;
4928         }
4929 
4930         case OPpair:
4931         {
4932             if (e.Ecount)
4933                 break;
4934             const op1 = e.EV.E1.Eoper;
4935             const op2 = e.EV.E2.Eoper;
4936             if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) &&
4937                 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst))
4938             {
4939                 pushParams(cdb, e.EV.E2, stackalign, tyf);
4940                 pushParams(cdb, e.EV.E1, stackalign, tyf);
4941                 freenode(e);
4942             }
4943             else if (tyfloating(e.EV.E1.Ety) ||
4944                      tyfloating(e.EV.E2.Ety))
4945             {
4946                 // Need special handling because of order of evaluation of e1 and e2
4947                 break;
4948             }
4949             else
4950             {
4951                 regm_t regs = allregs;
4952                 codelem(cdb, e, &regs, false);
4953                 genpush(cdb, findregmsw(regs)); // PUSH msreg
4954                 genpush(cdb, findreglsw(regs)); // PUSH lsreg
4955                 cdb.genadjesp(cast(int)sz);
4956                 stackpush += sz;
4957             }
4958             return;
4959         }
4960 
4961         case OPrpair:
4962         {
4963             if (e.Ecount)
4964                 break;
4965             const op1 = e.EV.E1.Eoper;
4966             const op2 = e.EV.E2.Eoper;
4967             if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) &&
4968                 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst))
4969             {
4970                 pushParams(cdb, e.EV.E1, stackalign, tyf);
4971                 pushParams(cdb, e.EV.E2, stackalign, tyf);
4972                 freenode(e);
4973             }
4974             else if (tyfloating(e.EV.E1.Ety) ||
4975                      tyfloating(e.EV.E2.Ety))
4976             {
4977                 // Need special handling because of order of evaluation of e1 and e2
4978                 break;
4979             }
4980             else
4981             {
4982                 regm_t regs = allregs;
4983                 codelem(cdb, e, &regs, false);
4984                 genpush(cdb, findregmsw(regs)); // PUSH msreg
4985                 genpush(cdb, findreglsw(regs)); // PUSH lsreg
4986                 cdb.genadjesp(cast(int)sz);
4987                 stackpush += sz;
4988             }
4989             return;
4990         }
4991 
4992         default:
4993             break;
4994     }
4995 
4996     regm_t retregs = tybyte(tym) ? BYTEREGS : allregs;
4997     if (tyvector(tym) || (tyxmmreg(tym) && config.fpxmmregs))
4998     {
4999         regm_t retxmm = XMMREGS;
5000         codelem(cdb, e, &retxmm, false);
5001         stackpush += sz;
5002         cdb.genadjesp(cast(int)sz);
5003         cod3_stackadj(cdb, cast(int)sz);
5004         const op = xmmstore(tym);
5005         const r = findreg(retxmm);
5006         cdb.gen2sib(op, modregxrm(0, r - XMM0,4 ), modregrm(0, 4, SP));   // MOV [ESP],r
5007         checkSetVex(cdb.last(),tym);
5008         return;
5009     }
5010     else if (tyfloating(tym))
5011     {
5012         if (config.inline8087)
5013         {
5014             retregs = tycomplex(tym) ? mST01 : mST0;
5015             codelem(cdb, e, &retregs, false);
5016             stackpush += sz;
5017             cdb.genadjesp(cast(int)sz);
5018             cod3_stackadj(cdb, cast(int)sz);
5019             opcode_t op;
5020             uint r;
5021             switch (tym)
5022             {
5023                 case TYfloat:
5024                 case TYifloat:
5025                 case TYcfloat:
5026                     op = 0xD9;
5027                     r = 3;
5028                     break;
5029 
5030                 case TYdouble:
5031                 case TYidouble:
5032                 case TYdouble_alias:
5033                 case TYcdouble:
5034                     op = 0xDD;
5035                     r = 3;
5036                     break;
5037 
5038                 case TYldouble:
5039                 case TYildouble:
5040                 case TYcldouble:
5041                     op = 0xDB;
5042                     r = 7;
5043                     break;
5044 
5045                 default:
5046                     assert(0);
5047             }
5048             if (!I16)
5049             {
5050                 if (tycomplex(tym))
5051                 {
5052                     // FSTP sz/2[ESP]
5053                     cdb.genc1(op, (modregrm(0, 4, SP) << 8) | modregxrm(2, r, 4),FLconst, sz/2);
5054                     pop87();
5055                 }
5056                 pop87();
5057                 cdb.gen2sib(op, modregrm(0, r, 4),modregrm(0, 4, SP));   // FSTP [ESP]
5058             }
5059             else
5060             {
5061                 retregs = IDXREGS;                             // get an index reg
5062                 reg_t reg;
5063                 allocreg(cdb, &retregs, &reg, TYoffset);
5064                 genregs(cdb, 0x89, SP, reg);         // MOV reg,SP
5065                 pop87();
5066                 cdb.gen2(op, modregrm(0, r, regtorm[reg]));       // FSTP [reg]
5067             }
5068             if (LARGEDATA)
5069                 cdb.last().Iflags |= CFss;     // want to store into stack
5070             genfwait(cdb);         // FWAIT
5071             return;
5072         }
5073         else if (I16 && (tym == TYdouble || tym == TYdouble_alias))
5074             retregs = mSTACK;
5075     }
5076     else if (I16 && sz == 8)             // if long long
5077         retregs = mSTACK;
5078 
5079     scodelem(cdb,e,&retregs,0,true);
5080     if (retregs != mSTACK)                // if stackpush not already inc'd
5081         stackpush += sz;
5082     if (sz <= REGSIZE)
5083     {
5084         genpush(cdb,findreg(retregs));        // PUSH reg
5085         cdb.genadjesp(cast(int)REGSIZE);
5086     }
5087     else if (sz == REGSIZE * 2)
5088     {
5089         genpush(cdb,findregmsw(retregs));     // PUSH msreg
5090         genpush(cdb,findreglsw(retregs));     // PUSH lsreg
5091         cdb.genadjesp(cast(int)sz);
5092     }
5093 }
5094 
5095 /*******************************
5096  * Get offset portion of e, and store it in an index
5097  * register. Return mask of index register in *pretregs.
5098  */
5099 
5100 @trusted
5101 void offsetinreg(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
5102 {
5103     reg_t reg;
5104     regm_t retregs = mLSW;                     // want only offset
5105     if (e.Ecount && e.Ecount != e.Ecomsub)
5106     {
5107         regm_t rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */
5108         for (uint i = 0; rm; i++)
5109         {
5110             if (mask(i) & rm && regcon.cse.value[i] == e)
5111             {
5112                 *pretregs = mask(i);
5113                 getregs(cdb, *pretregs);
5114                 goto L3;
5115             }
5116             rm &= ~mask(i);
5117         }
5118     }
5119 
5120     *pretregs = retregs;
5121     allocreg(cdb, pretregs, &reg, TYoffset);
5122     getoffset(cdb,e,reg);
5123 L3:
5124     cssave(e, *pretregs,false);
5125     freenode(e);
5126 }
5127 
5128 /******************************
5129  * Generate code to load data into registers.
5130  */
5131 
5132 
5133 @trusted
5134 void loaddata(ref CodeBuilder cdb, elem* e, regm_t* pretregs)
5135 {
5136     reg_t reg;
5137     reg_t nreg;
5138     reg_t sreg;
5139     opcode_t op;
5140     tym_t tym;
5141     code cs;
5142     regm_t flags, forregs, regm;
5143 
5144     debug
5145     {
5146     //  if (debugw)
5147     //        printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs));
5148     //  elem_print(e);
5149     }
5150 
5151     assert(e);
5152     elem_debug(e);
5153     if (*pretregs == 0)
5154         return;
5155     tym = tybasic(e.Ety);
5156     if (tym == TYstruct)
5157     {
5158         cdrelconst(cdb,e,pretregs);
5159         return;
5160     }
5161     if (tyfloating(tym))
5162     {
5163         objmod.fltused();
5164         if (config.fpxmmregs &&
5165             (tym == TYcfloat || tym == TYcdouble) &&
5166             (*pretregs & (XMMREGS | mPSW))
5167            )
5168         {
5169             cloadxmm(cdb, e, pretregs);
5170             return;
5171         }
5172         else if (config.inline8087)
5173         {
5174             if (*pretregs & mST0)
5175             {
5176                 load87(cdb, e, 0, pretregs, null, -1);
5177                 return;
5178             }
5179             else if (tycomplex(tym))
5180             {
5181                 cload87(cdb, e, pretregs);
5182                 return;
5183             }
5184         }
5185     }
5186     int sz = _tysize[tym];
5187     cs.Iflags = 0;
5188     cs.Irex = 0;
5189     if (*pretregs == mPSW)
5190     {
5191         Symbol *s;
5192         regm = allregs;
5193         if (e.Eoper == OPconst)
5194         {       /* true:        OR SP,SP        (SP is never 0)         */
5195                 /* false:       CMP SP,SP       (always equal)          */
5196                 genregs(cdb, (boolres(e)) ? 0x09 : 0x39 , SP, SP);
5197                 if (I64)
5198                     code_orrex(cdb.last(), REX_W);
5199         }
5200         else if (e.Eoper == OPvar &&
5201             (s = e.EV.Vsym).Sfl == FLreg &&
5202             s.Sregm & XMMREGS &&
5203             (tym == TYfloat || tym == TYifloat || tym == TYdouble || tym ==TYidouble))
5204         {
5205             /* Evaluate using XMM register and XMM instruction.
5206              * This affects jmpopcode()
5207              */
5208             if (s.Sclass == SC.parameter)
5209                 refparam = true;
5210             tstresult(cdb,s.Sregm,e.Ety,true);
5211         }
5212         else if (sz <= REGSIZE)
5213         {
5214             if (!I16 && (tym == TYfloat || tym == TYifloat))
5215             {
5216                 allocreg(cdb, &regm, &reg, TYoffset);   // get a register
5217                 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0);    // MOV reg,data
5218                 cdb.gen2(0xD1,modregrmx(3,4,reg));           // SHL reg,1
5219             }
5220             else if (I64 && (tym == TYdouble || tym ==TYidouble))
5221             {
5222                 allocreg(cdb, &regm, &reg, TYoffset);   // get a register
5223                 loadea(cdb, e,&cs, 0x8B, reg, 0, 0, 0);    // MOV reg,data
5224                 // remove sign bit, so that -0.0 == 0.0
5225                 cdb.gen2(0xD1, modregrmx(3, 4, reg));           // SHL reg,1
5226                 code_orrex(cdb.last(), REX_W);
5227             }
5228             else if (TARGET_OSX && e.Eoper == OPvar && movOnly(e))
5229             {
5230                 allocreg(cdb, &regm, &reg, TYoffset);   // get a register
5231                 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0);    // MOV reg,data
5232                 fixresult(cdb, e, regm, pretregs);
5233             }
5234             else
5235             {   cs.IFL2 = FLconst;
5236                 cs.IEV2.Vsize_t = 0;
5237                 op = (sz == 1) ? 0x80 : 0x81;
5238                 loadea(cdb, e, &cs, op, 7, 0, 0, 0);        // CMP EA,0
5239 
5240                 // Convert to TEST instruction if EA is a register
5241                 // (to avoid register contention on Pentium)
5242                 code *c = cdb.last();
5243                 if ((c.Iop & ~1) == 0x38 &&
5244                     (c.Irm & modregrm(3, 0, 0)) == modregrm(3, 0, 0)
5245                    )
5246                 {
5247                     c.Iop = (c.Iop & 1) | 0x84;
5248                     code_newreg(c, c.Irm & 7);
5249                     if (c.Irex & REX_B)
5250                         //c.Irex = (c.Irex & ~REX_B) | REX_R;
5251                         c.Irex |= REX_R;
5252                 }
5253             }
5254         }
5255         else if (sz < 8)
5256         {
5257             allocreg(cdb, &regm, &reg, TYoffset);  // get a register
5258             if (I32)                                    // it's a 48 bit pointer
5259                 loadea(cdb, e, &cs, MOVZXw, reg, REGSIZE, 0, 0); // MOVZX reg,data+4
5260             else
5261             {
5262                 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, 0, 0); // MOV reg,data+2
5263                 if (tym == TYfloat || tym == TYifloat)       // dump sign bit
5264                     cdb.gen2(0xD1, modregrm(3, 4, reg));        // SHL reg,1
5265             }
5266             loadea(cdb,e,&cs,0x0B,reg,0,regm,0);     // OR reg,data
5267         }
5268         else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym)))
5269         {
5270             allocreg(cdb, &regm, &reg, TYoffset);       // get a register
5271             int i = sz - REGSIZE;
5272             loadea(cdb, e, &cs, 0x8B, reg, i, 0, 0);        // MOV reg,data+6
5273             if (tyfloating(tym))                             // TYdouble or TYdouble_alias
5274                 cdb.gen2(0xD1, modregrm(3, 4, reg));            // SHL reg,1
5275 
5276             while ((i -= REGSIZE) >= 0)
5277             {
5278                 loadea(cdb, e, &cs, 0x0B, reg, i, regm, 0); // OR reg,data+i
5279                 code *c = cdb.last();
5280                 if (i == 0)
5281                     c.Iflags |= CFpsw;                      // need the flags on last OR
5282             }
5283         }
5284         else if (sz == tysize(TYldouble))               // TYldouble
5285             load87(cdb, e, 0, pretregs, null, -1);
5286         else
5287         {
5288             elem_print(e);
5289             assert(0);
5290         }
5291         return;
5292     }
5293     /* not for flags only */
5294     flags = *pretregs & mPSW;             /* save original                */
5295     forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS);
5296     if (*pretregs & mSTACK)
5297         forregs |= DOUBLEREGS;
5298     if (e.Eoper == OPconst)
5299     {
5300         if (tyvector(tym) && forregs & XMMREGS)
5301         {
5302             assert(!flags);
5303             reg_t xreg;
5304             allocreg(cdb, &forregs, &xreg, tym);     // allocate registers
5305             movxmmconst(cdb, xreg, tym, &e.EV, flags);
5306             fixresult(cdb, e, forregs, pretregs);
5307             return;
5308         }
5309 
5310         targ_size_t value = e.EV.Vint;
5311         if (sz == 8)
5312             value = cast(targ_size_t)e.EV.Vullong;
5313 
5314         if (sz == REGSIZE && reghasvalue(forregs, value, reg))
5315             forregs = mask(reg);
5316 
5317         regm_t save = regcon.immed.mval;
5318         allocreg(cdb, &forregs, &reg, tym);        // allocate registers
5319         regcon.immed.mval = save;               // allocreg could unnecessarily clear .mval
5320         if (sz <= REGSIZE)
5321         {
5322             if (sz == 1)
5323                 flags |= 1;
5324             else if (!I16 && sz == SHORTSIZE &&
5325                      !(mask(reg) & regcon.mvar) &&
5326                      !(config.flags4 & CFG4speed)
5327                     )
5328                 flags |= 2;
5329             if (sz == 8)
5330                 flags |= 64;
5331             if (isXMMreg(reg))
5332             {
5333                 movxmmconst(cdb, reg, tym, &e.EV, 0);
5334                 flags = 0;
5335             }
5336             else
5337             {
5338                 movregconst(cdb, reg, value, flags);
5339                 flags = 0;                          // flags are already set
5340             }
5341         }
5342         else if (sz < 8)        // far pointers, longs for 16 bit targets
5343         {
5344             targ_int msw = I32 ? e.EV.Vseg
5345                         : (e.EV.Vulong >> 16);
5346             targ_int lsw = e.EV.Voff;
5347             regm_t mswflags = 0;
5348             if (forregs & mES)
5349             {
5350                 movregconst(cdb, reg, msw, 0); // MOV reg,segment
5351                 genregs(cdb, 0x8E, 0, reg);    // MOV ES,reg
5352                 msw = lsw;                               // MOV reg,offset
5353             }
5354             else
5355             {
5356                 sreg = findreglsw(forregs);
5357                 movregconst(cdb, sreg, lsw, 0);
5358                 reg = findregmsw(forregs);
5359                 /* Decide if we need to set flags when we load msw      */
5360                 if (flags && (msw && msw|lsw || !(msw|lsw)))
5361                 {   mswflags = mPSW;
5362                     flags = 0;
5363                 }
5364             }
5365             movregconst(cdb, reg, msw, mswflags);
5366         }
5367         else if (sz == 8)
5368         {
5369             if (I32)
5370             {
5371                 targ_long *p = cast(targ_long *)cast(void*)&e.EV.Vdouble;
5372                 if (isXMMreg(reg))
5373                 {   /* This comes about because 0, 1, pi, etc., constants don't get stored
5374                      * in the data segment, because they are x87 opcodes.
5375                      * Not so efficient. We should at least do a PXOR for 0.
5376                      */
5377                     reg_t r;
5378                     regm_t rm = ALLREGS;
5379                     allocreg(cdb, &rm, &r, TYint);    // allocate scratch register
5380                     movregconst(cdb, r, p[0], 0);
5381                     cdb.genfltreg(0x89, r, 0);               // MOV floatreg,r
5382                     movregconst(cdb, r, p[1], 0);
5383                     cdb.genfltreg(0x89, r, 4);               // MOV floatreg+4,r
5384 
5385                     const opmv = xmmload(tym);
5386                     cdb.genxmmreg(opmv, reg, 0, tym);           // MOVSS/MOVSD XMMreg,floatreg
5387                 }
5388                 else
5389                 {
5390                     movregconst(cdb, findreglsw(forregs) ,p[0], 0);
5391                     movregconst(cdb, findregmsw(forregs) ,p[1], 0);
5392                 }
5393             }
5394             else
5395             {   targ_short *p = &e.EV.Vshort;  // point to start of Vdouble
5396 
5397                 assert(reg == AX);
5398                 movregconst(cdb, AX, p[3], 0);   // MOV AX,p[3]
5399                 movregconst(cdb, DX, p[0], 0);
5400                 movregconst(cdb, CX, p[1], 0);
5401                 movregconst(cdb, BX, p[2], 0);
5402             }
5403         }
5404         else if (I64 && sz == 16)
5405         {
5406             movregconst(cdb, findreglsw(forregs), cast(targ_size_t)e.EV.Vcent.lo, 64);
5407             movregconst(cdb, findregmsw(forregs), cast(targ_size_t)e.EV.Vcent.hi, 64);
5408         }
5409         else
5410             assert(0);
5411         // Flags may already be set
5412         *pretregs &= flags | ~mPSW;
5413         fixresult(cdb, e, forregs, pretregs);
5414         return;
5415     }
5416     else
5417     {
5418         // See if we can use register that parameter was passed in
5419         if (regcon.params &&
5420             regParamInPreg(e.EV.Vsym) &&
5421             !anyiasm &&   // may have written to the memory for the parameter
5422             (regcon.params & mask(e.EV.Vsym.Spreg) && e.EV.Voffset == 0 ||
5423              regcon.params & mask(e.EV.Vsym.Spreg2) && e.EV.Voffset == REGSIZE) &&
5424             sz <= REGSIZE)                  // make sure no 'paint' to a larger size happened
5425         {
5426             const reg_t preg = e.EV.Voffset ? e.EV.Vsym.Spreg2 : e.EV.Vsym.Spreg;
5427             const regm_t pregm = mask(preg);
5428 
5429             if (!(sz <= 2 && pregm & XMMREGS))   // no SIMD instructions to load 1 or 2 byte quantities
5430             {
5431                 if (debugr)
5432                     printf("%s.%d is fastpar and using register %s\n",
5433                            e.EV.Vsym.Sident.ptr,
5434                            cast(int)e.EV.Voffset,
5435                            regm_str(pregm));
5436 
5437                 mfuncreg &= ~pregm;
5438                 regcon.used |= pregm;
5439                 fixresult(cdb,e,pregm,pretregs);
5440                 return;
5441             }
5442         }
5443 
5444         allocreg(cdb, &forregs, &reg, tym);            // allocate registers
5445 
5446         if (sz == 1)
5447         {   regm_t nregm;
5448 
5449             debug
5450             if (!(forregs & BYTEREGS))
5451             {   elem_print(e);
5452                     printf("forregs = %s\n", regm_str(forregs));
5453             }
5454 
5455             opcode_t opmv = 0x8A;                               // byte MOV
5456             if (config.exe & (EX_OSX | EX_OSX64))
5457             {
5458                 if (movOnly(e))
5459                     opmv = 0x8B;
5460             }
5461             assert(forregs & BYTEREGS);
5462             if (!I16)
5463             {
5464                 if (config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed &&
5465                     // Workaround for OSX linker bug:
5466                     //   ld: GOT load reloc does not point to a movq instruction in test42 for x86_64
5467                     !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS))
5468                    )
5469                 {
5470 //                    opmv = tyuns(tym) ? MOVZXb : MOVSXb;      // MOVZX/MOVSX
5471                 }
5472                 loadea(cdb, e, &cs, opmv, reg, 0, 0, 0);     // MOV regL,data
5473             }
5474             else
5475             {
5476                 nregm = tyuns(tym) ? BYTEREGS : cast(regm_t) mAX;
5477                 if (*pretregs & nregm)
5478                     nreg = reg;                             // already allocated
5479                 else
5480                     allocreg(cdb, &nregm, &nreg, tym);
5481                 loadea(cdb, e, &cs, opmv, nreg, 0, 0, 0);    // MOV nregL,data
5482                 if (reg != nreg)
5483                 {
5484                     genmovreg(cdb, reg, nreg);   // MOV reg,nreg
5485                     cssave(e, mask(nreg), false);
5486                 }
5487             }
5488         }
5489         else if (forregs & XMMREGS)
5490         {
5491             // Can't load from registers directly to XMM regs
5492             //e.EV.Vsym.Sflags &= ~GTregcand;
5493 
5494             opcode_t opmv = xmmload(tym, xmmIsAligned(e));
5495             if (e.Eoper == OPvar)
5496             {
5497                 Symbol *s = e.EV.Vsym;
5498                 if (s.Sfl == FLreg && !(mask(s.Sreglsw) & XMMREGS))
5499                 {   opmv = LODD;          // MOVD/MOVQ
5500                     /* getlvalue() will unwind this and unregister s; could use a better solution */
5501                 }
5502             }
5503             loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); // MOVSS/MOVSD reg,data
5504             checkSetVex(cdb.last(),tym);
5505         }
5506         else if (sz <= REGSIZE)
5507         {
5508             opcode_t opmv = 0x8B;                     // MOV reg,data
5509             if (sz == 2 && !I16 && config.target_cpu >= TARGET_PentiumPro &&
5510                 // Workaround for OSX linker bug:
5511                 //   ld: GOT load reloc does not point to a movq instruction in test42 for x86_64
5512                 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS))
5513                )
5514             {
5515 //                opmv = tyuns(tym) ? MOVZXw : MOVSXw;  // MOVZX/MOVSX
5516             }
5517             loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0);
5518         }
5519         else if (sz <= 2 * REGSIZE && forregs & mES)
5520         {
5521             loadea(cdb, e, &cs, 0xC4, reg, 0, 0, mES);    // LES data
5522         }
5523         else if (sz <= 2 * REGSIZE)
5524         {
5525             if (I32 && sz == 8 &&
5526                 (*pretregs & (mSTACK | mPSW)) == mSTACK)
5527             {
5528                 assert(0);
5529     /+
5530                 /* Note that we allocreg(DOUBLEREGS) needlessly     */
5531                 stackchanged = 1;
5532                 int i = DOUBLESIZE - REGSIZE;
5533                 do
5534                 {
5535                     loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i
5536                     cdb.genadjesp(REGSIZE);
5537                     stackpush += REGSIZE;
5538                     i -= REGSIZE;
5539                 }
5540                 while (i >= 0);
5541                 return;
5542     +/
5543             }
5544 
5545             reg = findregmsw(forregs);
5546             loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, forregs, 0); // MOV reg,data+2
5547             if (I32 && sz == REGSIZE + 2)
5548                 cdb.last().Iflags |= CFopsize;                   // seg is 16 bits
5549             reg = findreglsw(forregs);
5550             loadea(cdb, e, &cs, 0x8B, reg, 0, forregs, 0);       // MOV reg,data
5551         }
5552         else if (sz >= 8)
5553         {
5554             assert(!I32);
5555             if ((*pretregs & (mSTACK | mPSW)) == mSTACK)
5556             {
5557                 // Note that we allocreg(DOUBLEREGS) needlessly
5558                 stackchanged = 1;
5559                 int i = sz - REGSIZE;
5560                 do
5561                 {
5562                     loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i
5563                     cdb.genadjesp(REGSIZE);
5564                     stackpush += REGSIZE;
5565                     i -= REGSIZE;
5566                 }
5567                 while (i >= 0);
5568                 return;
5569             }
5570             else
5571             {
5572                 assert(reg == AX);
5573                 loadea(cdb, e, &cs, 0x8B, AX, 6, 0,           0); // MOV AX,data+6
5574                 loadea(cdb, e, &cs, 0x8B, BX, 4, mAX,         0); // MOV BX,data+4
5575                 loadea(cdb, e, &cs, 0x8B, CX, 2, mAX|mBX,     0); // MOV CX,data+2
5576                 loadea(cdb, e, &cs, 0x8B, DX, 0, mAX|mCX|mCX, 0); // MOV DX,data
5577             }
5578         }
5579         else
5580             assert(0);
5581         // Flags may already be set
5582         *pretregs &= flags | ~mPSW;
5583         fixresult(cdb, e, forregs, pretregs);
5584         return;
5585     }
5586 }