1 /**
2  * Top level code for the code generator.
3  *
4  * Copyright:   Copyright (C) 1985-1998 by Symantec
5  *              Copyright (C) 2000-2023 by The D Language Foundation, All Rights Reserved
6  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcod.d, backend/cgcod.d)
9  * Documentation:  https://dlang.org/phobos/dmd_backend_cgcod.html
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcod.d
11  */
12 
13 module dmd.backend.cgcod;
14 
15 version = FRAMEPTR;
16 
17 version (SCPP)
18     version = COMPILE;
19 version (MARS)
20     version = COMPILE;
21 
22 version (COMPILE)
23 {
24 
25 import core.bitop;
26 import core.stdc.stdio;
27 import core.stdc.stdlib;
28 import core.stdc.string;
29 
30 import dmd.backend.backend;
31 import dmd.backend.cc;
32 import dmd.backend.cdef;
33 import dmd.backend.code;
34 import dmd.backend.cgcse;
35 import dmd.backend.code_x86;
36 import dmd.backend.codebuilder;
37 import dmd.backend.disasm86;
38 import dmd.backend.dlist;
39 import dmd.backend.dvec;
40 import dmd.backend.melf;
41 import dmd.backend.mem;
42 import dmd.backend.el;
43 import dmd.backend.exh;
44 import dmd.backend.global;
45 import dmd.backend.obj;
46 import dmd.backend.oper;
47 import dmd.backend.rtlsym;
48 import dmd.backend.symtab;
49 import dmd.backend.ty;
50 import dmd.backend.type;
51 import dmd.backend.xmm;
52 
53 import dmd.backend.barray;
54 
55 version (SCPP)
56 {
57     import parser;
58     import precomp;
59 }
60 
61 extern (C++):
62 
63 nothrow:
64 @safe:
65 
66 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
67 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
68 
69 version (MARS)
70     enum MARS = true;
71 else
72     enum MARS = false;
73 
74 void dwarf_except_gentables(Funcsym *sfunc, uint startoffset, uint retoffset);
75 
76 private extern (D) uint mask(uint m) { return 1 << m; }
77 
78 
79 __gshared
80 {
81 bool floatreg;                  // !=0 if floating register is required
82 
83 int hasframe;                   // !=0 if this function has a stack frame
84 bool enforcealign;              // enforced stack alignment
85 targ_size_t spoff;
86 targ_size_t Foff;               // BP offset of floating register
87 targ_size_t CSoff;              // offset of common sub expressions
88 targ_size_t NDPoff;             // offset of saved 8087 registers
89 targ_size_t pushoff;            // offset of saved registers
90 bool pushoffuse;                // using pushoff
91 int BPoff;                      // offset from BP
92 int EBPtoESP;                   // add to EBP offset to get ESP offset
93 LocalSection Para;              // section of function parameters
94 LocalSection Auto;              // section of automatics and registers
95 LocalSection Fast;              // section of fastpar
96 LocalSection EEStack;           // offset of SCstack variables from ESP
97 LocalSection Alloca;            // data for alloca() temporary
98 
99 REGSAVE regsave;
100 
101 CGstate cgstate;                // state of code generator
102 
103 regm_t BYTEREGS = BYTEREGS_INIT;
104 regm_t ALLREGS = ALLREGS_INIT;
105 
106 
107 /************************************
108  * # of bytes that SP is beyond BP.
109  */
110 
111 uint stackpush;
112 
113 int stackchanged;               /* set to !=0 if any use of the stack
114                                    other than accessing parameters. Used
115                                    to see if we can address parameters
116                                    with ESP rather than EBP.
117                                  */
118 int refparam;           // !=0 if we referenced any parameters
119 int reflocal;           // !=0 if we referenced any locals
120 bool anyiasm;           // !=0 if any inline assembler
121 char calledafunc;       // !=0 if we called a function
122 char needframe;         // if true, then we will need the frame
123                         // pointer (BP for the 8088)
124 char gotref;            // !=0 if the GOTsym was referenced
125 uint usednteh;              // if !=0, then used NT exception handling
126 bool calledFinally;     // true if called a BC_finally block
127 
128 /* Register contents    */
129 con_t regcon;
130 
131 BackendPass pass;
132 
133 private Symbol *retsym;          // set to symbol that should be placed in
134                                 // register AX
135 
136 /****************************
137  * Register masks.
138  */
139 
140 regm_t msavereg;        // Mask of registers that we would like to save.
141                         // they are temporaries (set by scodelem())
142 regm_t mfuncreg;        // Mask of registers preserved by a function
143 
144 regm_t allregs;                // ALLREGS optionally including mBP
145 
146 int dfoidx;                     /* which block we are in                */
147 
148 targ_size_t     funcoffset;     // offset of start of function
149 targ_size_t     prolog_allocoffset;     // offset past adj of stack allocation
150 targ_size_t     startoffset;    // size of function entry code
151 targ_size_t     retoffset;      /* offset from start of func to ret code */
152 targ_size_t     retsize;        /* size of function return              */
153 
154 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs;
155 
156 }
157 
158 /*********************************
159  * Generate code for a function.
160  * Note at the end of this routine mfuncreg will contain the mask
161  * of registers not affected by the function. Some minor optimization
162  * possibilities are here.
163  * Params:
164  *      sfunc = function to generate code for
165  */
166 @trusted
167 void codgen(Symbol *sfunc)
168 {
169     bool flag;
170     block *btry;
171 
172     // Register usage. If a bit is on, the corresponding register is live
173     // in that basic block.
174 
175     //printf("codgen('%s')\n",funcsym_p.Sident.ptr);
176     assert(sfunc == funcsym_p);
177     assert(cseg == funcsym_p.Sseg);
178 
179     cgreg_init();
180     CSE.initialize();
181     tym_t functy = tybasic(sfunc.ty());
182     cod3_initregs();
183     allregs = ALLREGS;
184     pass = BackendPass.initial;
185     Alloca.initialize();
186     anyiasm = 0;
187 
188     if (config.ehmethod == EHmethod.EH_DWARF)
189     {
190         /* The dwarf unwinder relies on the function epilog to exist
191          */
192         for (block* b = startblock; b; b = b.Bnext)
193         {
194             if (b.BC == BCexit)
195                 b.BC = BCret;
196         }
197     }
198 
199 tryagain:
200     debug
201     if (debugr)
202         printf("------------------ PASS%s -----------------\n",
203             (pass == BackendPass.initial) ? "init".ptr : ((pass == BackendPass.reg) ? "reg".ptr : "final".ptr));
204 
205     lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0;
206 
207     // if no parameters, assume we don't need a stack frame
208     needframe = 0;
209     enforcealign = false;
210     gotref = 0;
211     stackchanged = 0;
212     stackpush = 0;
213     refparam = 0;
214     calledafunc = 0;
215     retsym = null;
216 
217     cgstate.stackclean = 1;
218     cgstate.funcarg.initialize();
219     cgstate.funcargtos = ~0;
220     cgstate.accessedTLS = false;
221     STACKALIGN = TARGET_STACKALIGN;
222 
223     regsave.reset();
224     memset(global87.stack.ptr,0,global87.stack.sizeof);
225 
226     calledFinally = false;
227     usednteh = 0;
228 
229     static if (MARS)
230     {
231         if (sfunc.Sfunc.Fflags3 & Fjmonitor &&
232             config.exe & EX_windos)
233             usednteh |= NTEHjmonitor;
234     }
235     else version (SCPP)
236     {
237         if (CPP)
238         {
239             if (config.exe == EX_WIN32 &&
240                 (sfunc.Stype.Tflags & TFemptyexc || sfunc.Stype.Texcspec))
241                 usednteh |= NTEHexcspec;
242             except_reset();
243         }
244     }
245 
246     // Set on a trial basis, turning it off if anything might throw
247     sfunc.Sfunc.Fflags3 |= Fnothrow;
248 
249     floatreg = false;
250     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
251 
252     CSE.start();
253     memset(&regcon,0,regcon.sizeof);
254     regcon.cse.mval = regcon.cse.mops = 0;      // no common subs yet
255     msavereg = 0;
256     uint nretblocks = 0;
257     mfuncreg = fregsaved;               // so we can see which are used
258                                         // (bit is cleared each time
259                                         //  we use one)
260     assert(!(needframe && mfuncreg & mBP)); // needframe needs mBP
261 
262     for (block* b = startblock; b; b = b.Bnext)
263     {
264         memset(&b.Bregcon,0,b.Bregcon.sizeof);       // Clear out values in registers
265         if (b.Belem)
266             resetEcomsub(b.Belem);     // reset all the Ecomsubs
267         if (b.BC == BCasm)
268             anyiasm = 1;                // we have inline assembler
269         if (b.BC == BCret || b.BC == BCretexp)
270             nretblocks++;
271     }
272 
273     if (!config.fulltypes || (config.flags4 & CFG4optimized))
274     {
275         regm_t noparams = 0;
276         for (int i = 0; i < globsym.length; i++)
277         {
278             Symbol *s = globsym[i];
279             s.Sflags &= ~SFLread;
280             switch (s.Sclass)
281             {
282                 case SC.fastpar:
283                 case SC.shadowreg:
284                     regcon.params |= s.Spregm();
285                     goto case SC.parameter;
286 
287                 case SC.parameter:
288                     if (s.Sfl == FLreg)
289                         noparams |= s.Sregm;
290                     break;
291 
292                 default:
293                     break;
294             }
295         }
296         regcon.params &= ~noparams;
297     }
298 
299     if (config.flags4 & CFG4optimized)
300     {
301         if (nretblocks == 0 &&                  // if no return blocks in function
302             !(sfunc.ty() & mTYnaked))      // naked functions may have hidden veys of returning
303             sfunc.Sflags |= SFLexit;       // mark function as never returning
304 
305         assert(dfo);
306 
307         cgreg_reset();
308         for (dfoidx = 0; dfoidx < dfo.length; dfoidx++)
309         {
310             regcon.used = msavereg | regcon.cse.mval;   // registers already in use
311             block* b = dfo[dfoidx];
312             blcodgen(b);                        // gen code in depth-first order
313             //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used));
314             cgreg_used(dfoidx, b.Bregcon.used); // gather register used information
315         }
316     }
317     else
318     {
319         pass = BackendPass.final_;
320         for (block* b = startblock; b; b = b.Bnext)
321             blcodgen(b);                // generate the code for each block
322     }
323     regcon.immed.mval = 0;
324     assert(!regcon.cse.mops);           // should have all been used
325 
326     // See which variables we can put into registers
327     if (pass != BackendPass.final_ &&
328         !anyiasm)                               // possible LEA or LES opcodes
329     {
330         allregs |= cod3_useBP();                // see if we can use EBP
331 
332         // If pic code, but EBX was never needed
333         if (!(allregs & mask(PICREG)) && !gotref)
334         {
335             allregs |= mask(PICREG);            // EBX can now be used
336             cgreg_assign(retsym);
337             pass = BackendPass.reg;
338         }
339         else if (cgreg_assign(retsym))          // if we found some registers
340             pass = BackendPass.reg;
341         else
342             pass = BackendPass.final_;
343         for (block* b = startblock; b; b = b.Bnext)
344         {
345             code_free(b.Bcode);
346             b.Bcode = null;
347         }
348         goto tryagain;
349     }
350     cgreg_term();
351 
352     version (SCPP)
353     {
354         if (CPP)
355             cgcod_eh();
356     }
357 
358     // See if we need to enforce a particular stack alignment
359     foreach (i; 0 .. globsym.length)
360     {
361         Symbol *s = globsym[i];
362 
363         if (Symbol_Sisdead(*s, anyiasm))
364             continue;
365 
366         switch (s.Sclass)
367         {
368             case SC.register:
369             case SC.auto_:
370             case SC.fastpar:
371                 if (s.Sfl == FLreg)
372                     break;
373 
374                 const sz = type_alignsize(s.Stype);
375                 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX))
376                 {
377                     STACKALIGN = sz;
378                     enforcealign = true;
379                 }
380                 break;
381 
382             default:
383                 break;
384         }
385     }
386 
387     stackoffsets(globsym, false);  // compute final offsets of stack variables
388     cod5_prol_epi();            // see where to place prolog/epilog
389     CSE.finish();               // compute addresses and sizes of CSE saves
390 
391     if (configv.addlinenumbers)
392         objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg));
393 
394     // Otherwise, jmp's to startblock will execute the prolog again
395     assert(!startblock.Bpred);
396 
397     CodeBuilder cdbprolog; cdbprolog.ctor();
398     prolog(cdbprolog);           // gen function start code
399     code *cprolog = cdbprolog.finish();
400     if (cprolog)
401         pinholeopt(cprolog,null);       // optimize
402 
403     funcoffset = Offset(sfunc.Sseg);
404     targ_size_t coffset = Offset(sfunc.Sseg);
405 
406     if (eecontext.EEelem)
407         genEEcode();
408 
409     for (block* b = startblock; b; b = b.Bnext)
410     {
411         // We couldn't do this before because localsize was unknown
412         switch (b.BC)
413         {
414             case BCret:
415                 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked))
416                 {
417                     CodeBuilder cdb; cdb.ctor();
418                     cdb.append(b.Bcode);
419                     cdb.genlinnum(b.Bsrcpos);
420                     b.Bcode = cdb.finish();
421                 }
422                 goto case BCretexp;
423 
424             case BCretexp:
425                 epilog(b);
426                 break;
427 
428             default:
429                 if (b.Bflags & BFLepilog)
430                     epilog(b);
431                 break;
432         }
433         assignaddr(b);                  // assign addresses
434         pinholeopt(b.Bcode,b);         // do pinhole optimization
435         if (b.Bflags & BFLprolog)      // do function prolog
436         {
437             startoffset = coffset + calcblksize(cprolog) - funcoffset;
438             b.Bcode = cat(cprolog,b.Bcode);
439         }
440         cgsched_block(b);
441         b.Bsize = calcblksize(b.Bcode);       // calculate block size
442         if (b.Balign)
443         {
444             targ_size_t u = b.Balign - 1;
445             coffset = (coffset + u) & ~u;
446         }
447         b.Boffset = coffset;           /* offset of this block         */
448         coffset += b.Bsize;            /* offset of following block    */
449     }
450 
451     debug
452     debugw && printf("code addr complete\n");
453 
454     // Do jump optimization
455     do
456     {
457         flag = false;
458         for (block* b = startblock; b; b = b.Bnext)
459         {
460             if (b.Bflags & BFLjmpoptdone)      /* if no more jmp opts for this blk */
461                 continue;
462             int i = branch(b,0);            // see if jmp => jmp short
463             if (i)                          // if any bytes saved
464             {   targ_size_t offset;
465 
466                 b.Bsize -= i;
467                 offset = b.Boffset + b.Bsize;
468                 for (block* bn = b.Bnext; bn; bn = bn.Bnext)
469                 {
470                     if (bn.Balign)
471                     {   targ_size_t u = bn.Balign - 1;
472 
473                         offset = (offset + u) & ~u;
474                     }
475                     bn.Boffset = offset;
476                     offset += bn.Bsize;
477                 }
478                 coffset = offset;
479                 flag = true;
480             }
481         }
482         if (!I16 && !(config.flags4 & CFG4optimized))
483             break;                      // use the long conditional jmps
484     } while (flag);                     // loop till no more bytes saved
485 
486     debug
487     debugw && printf("code jump optimization complete\n");
488 
489     version (MARS)
490     {
491         if (usednteh & NTEH_try)
492         {
493             // Do this before code is emitted because we patch some instructions
494             nteh_filltables();
495         }
496     }
497 
498     // Compute starting offset for switch tables
499     targ_size_t swoffset;
500     int jmpseg = -1;
501     if (config.flags & CFGromable)
502     {
503         jmpseg = 0;
504         swoffset = coffset;
505     }
506 
507     // Emit the generated code
508     if (eecontext.EEcompile == 1)
509     {
510         codout(sfunc.Sseg,eecontext.EEcode,null);
511         code_free(eecontext.EEcode);
512         version (SCPP)
513         {
514             el_free(eecontext.EEelem);
515         }
516     }
517     else
518     {
519         __gshared Barray!ubyte disasmBuf;
520         disasmBuf.reset();
521 
522         for (block* b = startblock; b; b = b.Bnext)
523         {
524             if (b.BC == BCjmptab || b.BC == BCswitch)
525             {
526                 if (jmpseg == -1)
527                 {
528                     jmpseg = objmod.jmpTableSegment(sfunc);
529                     swoffset = Offset(jmpseg);
530                 }
531                 swoffset = _align(0,swoffset);
532                 b.Btableoffset = swoffset;     /* offset of sw tab */
533                 swoffset += b.Btablesize;
534             }
535             jmpaddr(b.Bcode);          /* assign jump addresses        */
536 
537             debug
538             if (debugc)
539             {
540                 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n",
541                     cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg));
542                 if (b.Bcode)
543                     printf( "First opcode of block is: %0x\n", b.Bcode.Iop );
544             }
545 
546             if (b.Balign)
547             {   uint u = b.Balign;
548                 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1);
549 
550                 cod3_align_bytes(sfunc.Sseg, nalign);
551             }
552             assert(b.Boffset == Offset(sfunc.Sseg));
553 
554             version (SCPP)
555             {
556                 if (CPP && !(config.exe == EX_WIN32))
557                 {
558                     //printf("b = %p, index = %d\n",b,b.Bindex);
559                     //except_index_set(b.Bindex);
560 
561                     if (btry != b.Btry)
562                     {
563                         btry = b.Btry;
564                         except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset);
565                     }
566                     if (b.BC == BCtry)
567                     {
568                         btry = b;
569                         except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset);
570                     }
571                 }
572             }
573 
574             codout(sfunc.Sseg,b.Bcode,configv.vasm ? &disasmBuf : null);   // output code
575         }
576         if (coffset != Offset(sfunc.Sseg))
577         {
578             debug
579             printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg));
580 
581             assert(0);
582         }
583         sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset;    // size of function
584 
585         if (configv.vasm)
586             disassemble(disasmBuf[]);                   // disassemble the code
587 
588         static if (NTEXCEPTIONS || MARS)
589         {
590             version (MARS)
591                 const nteh = usednteh & NTEH_try;
592             else static if (NTEXCEPTIONS)
593                 const nteh = usednteh & NTEHcpp;
594             else
595                 enum nteh = true;
596             if (nteh)
597             {
598                 assert(!(config.flags & CFGromable));
599                 //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset);
600                 objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset);
601             }
602         }
603 
604         // Write out switch tables
605         flag = false;                       // true if last active block was a ret
606         for (block* b = startblock; b; b = b.Bnext)
607         {
608             switch (b.BC)
609             {
610                 case BCjmptab:              /* if jump table                */
611                     outjmptab(b);           /* write out jump table         */
612                     goto Ldefault;
613 
614                 case BCswitch:
615                     outswitab(b);           /* write out switch table       */
616                     goto Ldefault;
617 
618                 case BCret:
619                 case BCretexp:
620                     /* Compute offset to return code from start of function */
621                     retoffset = b.Boffset + b.Bsize - retsize - funcoffset;
622                     version (MARS)
623                     {
624                         /* Add 3 bytes to retoffset in case we have an exception
625                          * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT
626                          * IT FIXES THE PROBLEM HERE AS WELL.
627                          */
628                         if (usednteh & NTEH_try)
629                             retoffset += 3;
630                     }
631                     flag = true;
632                     break;
633 
634                 default:
635                 Ldefault:
636                     retoffset = b.Boffset + b.Bsize - funcoffset;
637                     break;
638             }
639         }
640         if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked))
641             /* put line number at end of function on the
642                start of the last instruction
643              */
644             /* Instead, try offset to cleanup code  */
645             if (retoffset < sfunc.Ssize)
646                 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset);
647 
648         static if (MARS)
649         {
650             if (config.exe == EX_WIN64)
651                 win64_pdata(sfunc);
652         }
653 
654         static if (MARS)
655         {
656             if (usednteh & NTEH_try)
657             {
658                 // Do this before code is emitted because we patch some instructions
659                 nteh_gentables(sfunc);
660             }
661             if (usednteh & (EHtry | EHcleanup) &&   // saw BCtry or BC_try or OPddtor
662                 config.ehmethod == EHmethod.EH_DM)
663             {
664                 except_gentables();
665             }
666             if (config.ehmethod == EHmethod.EH_DWARF)
667             {
668                 sfunc.Sfunc.Fstartblock = startblock;
669                 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset);
670                 sfunc.Sfunc.Fstartblock = null;
671             }
672         }
673 
674         version (SCPP)
675         {
676             // Write out frame handler
677             if (NTEXCEPTIONS && usednteh & NTEHcpp)
678             {
679                 nteh_framehandler(sfunc, except_gentables());
680             }
681             else
682             {
683                 if (NTEXCEPTIONS && usednteh & NTEH_try)
684                 {
685                     nteh_gentables(sfunc);
686                 }
687                 else
688                 {
689                     if (CPP)
690                         except_gentables();
691                 }
692             }
693         }
694 
695         for (block* b = startblock; b; b = b.Bnext)
696         {
697             code_free(b.Bcode);
698             b.Bcode = null;
699         }
700     }
701 
702     // Mask of regs saved
703     // BUG: do interrupt functions save BP?
704     sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved);
705 
706     debug
707     if (global87.stackused != 0)
708       printf("stackused = %d\n",global87.stackused);
709 
710     assert(global87.stackused == 0);             /* nobody in 8087 stack         */
711 
712     global87.save.dtor();       // clean up ndp save array
713 }
714 
715 /*********************************************
716  * Align sections on the stack.
717  *  base        negative offset of section from frame pointer
718  *  alignment   alignment to use
719  *  bias        difference between where frame pointer points and the STACKALIGNed
720  *              part of the stack
721  * Returns:
722  *  base        revised downward so it is aligned
723  */
724 @trusted
725 targ_size_t alignsection(targ_size_t base, uint alignment, int bias)
726 {
727     assert(cast(long)base <= 0);
728     if (alignment > STACKALIGN)
729         alignment = STACKALIGN;
730     if (alignment)
731     {
732         long sz = cast(long)(-base + bias);
733         assert(sz >= 0);
734         sz &= (alignment - 1);
735         if (sz)
736             base -= alignment - sz;
737     }
738     return base;
739 }
740 
741 /*******************************
742  * Generate code for a function start.
743  * Input:
744  *      Offset(cseg)         address of start of code
745  *      Auto.alignment
746  * Output:
747  *      Offset(cseg)         adjusted for size of code generated
748  *      EBPtoESP
749  *      hasframe
750  *      BPoff
751  */
752 @trusted
753 void prolog(ref CodeBuilder cdb)
754 {
755     bool enter;
756 
757     //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident.ptr, needframe, Auto.alignment);
758     debug debugw && printf("funcstart()\n");
759     regcon.immed.mval = 0;                      /* no values in registers yet   */
760     version (FRAMEPTR)
761         EBPtoESP = 0;
762     else
763         EBPtoESP = -REGSIZE;
764     hasframe = 0;
765     bool pushds = false;
766     BPoff = 0;
767     bool pushalloc = false;
768     tym_t tyf = funcsym_p.ty();
769     tym_t tym = tybasic(tyf);
770     const farfunc = tyfarfunc(tym) != 0;
771 
772     // Special Intel 64 bit ABI prolog setup for variadic functions
773     Symbol *sv64 = null;                        // set to __va_argsave
774     if (I64 && variadic(funcsym_p.Stype))
775     {
776         /* The Intel 64 bit ABI scheme.
777          * abi_sysV_amd64.pdf
778          * Load arguments passed in registers into the varargs save area
779          * so they can be accessed by va_arg().
780          */
781         /* Look for __va_argsave
782          */
783         for (SYMIDX si = 0; si < globsym.length; si++)
784         {
785             Symbol *s = globsym[si];
786             if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0)
787             {
788                 if (!(s.Sflags & SFLdead))
789                     sv64 = s;
790                 break;
791             }
792         }
793     }
794 
795     if (config.flags & CFGalwaysframe ||
796         funcsym_p.Sfunc.Fflags3 & Ffakeeh ||
797         /* The exception stack unwinding mechanism relies on the EBP chain being intact,
798          * so need frame if function can possibly throw
799          */
800         !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) ||
801         cgstate.accessedTLS ||
802         sv64
803        )
804         needframe = 1;
805 
806     CodeBuilder cdbx; cdbx.ctor();
807 
808 Lagain:
809     spoff = 0;
810     char guessneedframe = needframe;
811     int cfa_offset = 0;
812 //    if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)))
813 //      usednteh |= NTEHpassthru;
814 
815     /* Compute BP offsets for variables on stack.
816      * The organization is:
817      *  Para.size    parameters
818      * -------- stack is aligned to STACKALIGN
819      *          seg of return addr      (if far function)
820      *          IP of return addr
821      *  BP.    caller's BP
822      *          DS                      (if Windows prolog/epilog)
823      *          exception handling context symbol
824      *  Fast.size fastpar
825      *  Auto.size    autos and regs
826      *  regsave.off  any saved registers
827      *  Foff    floating register
828      *  Alloca.size  alloca temporary
829      *  CSoff   common subs
830      *  NDPoff  any 8087 saved registers
831      *          monitor context record
832      *          any saved registers
833      */
834 
835     if (tym == TYifunc)
836         Para.size = 26; // how is this number derived?
837     else
838     {
839         version (FRAMEPTR)
840         {
841             bool frame = needframe || tyf & mTYnaked;
842             Para.size = ((farfunc ? 2 : 1) + frame) * REGSIZE;
843             if (frame)
844                 EBPtoESP = -REGSIZE;
845         }
846         else
847             Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE;
848     }
849 
850     /* The real reason for the FAST section is because the implementation of contracts
851      * requires a consistent stack frame location for the 'this' pointer. But if varying
852      * stuff in Auto.offset causes different alignment for that section, the entire block can
853      * shift around, causing a crash in the contracts.
854      * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their
855      * own FAST section, which is never aligned at a size bigger than REGSIZE, and so
856      * its alignment never shifts around.
857      * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned
858      * individually rather than as a group.
859      */
860     Fast.size = 0;
861     static if (NTEXCEPTIONS == 2)
862     {
863         Fast.size -= nteh_contextsym_size();
864         version (MARS)
865         {
866             if (config.exe & EX_windos)
867             {
868                 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0)
869                     Fast.size -= 5 * 4;
870             }
871         }
872     }
873 
874     /* Despite what the comment above says, aligning Fast section to size greater
875      * than REGSIZE does not break contract implementation. Fast.offset and
876      * Fast.alignment must be the same for the overriding and
877      * the overridden function, since they have the same parameters. Fast.size
878      * must be the same because otherwise, contract inheritance wouldn't work
879      * even if we didn't align Fast section to size greater than REGSIZE. Therefore,
880      * the only way aligning the section could cause problems with contract
881      * inheritance is if bias (declared below) differed for the overridden
882      * and the overriding function.
883      *
884      * Bias depends on Para.size and needframe. The value of Para.size depends on
885      * whether the function is an interrupt handler and whether it is a farfunc.
886      * DMD does not have _interrupt attribute and D does not make a distinction
887      * between near and far functions, so Para.size should always be 2 * REGSIZE
888      * for D.
889      *
890      * The value of needframe depends on a global setting that is only set
891      * during backend's initialization and on function flag Ffakeeh. On Windows,
892      * that flag is always set for virtual functions, for which contracts are
893      * defined and on other platforms, it is never set. Because of that
894      * the value of neadframe should always be the same for the overridden
895      * and the overriding function, and so bias should be the same too.
896      */
897 
898 version (FRAMEPTR)
899     int bias = enforcealign ? 0 : cast(int)(Para.size);
900 else
901     int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE));
902 
903     if (Fast.alignment < REGSIZE)
904         Fast.alignment = REGSIZE;
905 
906     Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias);
907 
908     if (Auto.alignment < REGSIZE)
909         Auto.alignment = REGSIZE;       // necessary because localsize must be REGSIZE aligned
910     Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias);
911 
912     regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias);
913     //printf("regsave.off = x%x, size = x%x, alignment = %x\n",
914         //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment);
915 
916     if (floatreg)
917     {
918         uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE;
919         Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias);
920         //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize);
921     }
922     else
923         Foff = regsave.off;
924 
925     Alloca.alignment = REGSIZE;
926     Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias);
927 
928     CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias);
929     //printf("CSoff = x%x, size = x%x, alignment = %x\n",
930         //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment);
931 
932     NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias);
933 
934     regm_t topush = fregsaved & ~mfuncreg;          // mask of registers that need saving
935     pushoffuse = false;
936     pushoff = NDPoff;
937     /* We don't keep track of all the pushes and pops in a function. Hence,
938      * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder
939      * won't be setting ESP correctly. With pushoffuse, the registers are restored
940      * from EBP, which is kept track of properly.
941      */
942     if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64))
943     {
944         /* Instead of pushing the registers onto the stack one by one,
945          * allocate space in the stack frame and copy/restore them there.
946          */
947         int xmmtopush = popcnt(topush & XMMREGS);   // XMM regs take 16 bytes
948         int gptopush = popcnt(topush) - xmmtopush;  // general purpose registers to save
949         if (NDPoff || xmmtopush || cgstate.funcarg.size)
950         {
951             pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16),
952                     xmmtopush ? STACKALIGN : REGSIZE, bias);
953             pushoffuse = true;          // tell others we're using this strategy
954         }
955     }
956 
957     //printf("Fast.size = x%x, Auto.size = x%x\n", cast(int)Fast.size, cast(int)Auto.size);
958 
959     cgstate.funcarg.alignment = STACKALIGN;
960     /* If the function doesn't need the extra alignment, don't do it.
961      * Can expand on this by allowing for locals that don't need extra alignment
962      * and calling functions that don't need it.
963      */
964     if (pushoff == 0 && !calledafunc && config.fpxmmregs && (I32 || I64))
965     {
966         cgstate.funcarg.alignment = I64 ? 8 : 4;
967     }
968 
969     //printf("pushoff = %d, size = %d, alignment = %d, bias = %d\n", cast(int)pushoff, cast(int)cgstate.funcarg.size, cast(int)cgstate.funcarg.alignment, cast(int)bias);
970     cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias);
971 
972     localsize = -cgstate.funcarg.offset;
973 
974     //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n",
975         //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize);
976     assert(cast(targ_ptrdiff_t)localsize >= 0);
977 
978     // Keep the stack aligned by 8 for any subsequent function calls
979     if (!I16 && calledafunc &&
980         (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
981     {
982         int npush = popcnt(topush);            // number of registers that need saving
983         npush += popcnt(topush & XMMREGS);     // XMM regs take 16 bytes, so count them twice
984         if (pushoffuse)
985             npush = 0;
986 
987         //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n",
988                //npush, Para.size, needframe, localsize);
989 
990         int sz = cast(int)(localsize + npush * REGSIZE);
991         if (!enforcealign)
992         {
993             version (FRAMEPTR)
994                 sz += Para.size;
995             else
996                 sz += Para.size + (needframe ? 0 : -REGSIZE);
997         }
998         if (sz & (STACKALIGN - 1))
999             localsize += STACKALIGN - (sz & (STACKALIGN - 1));
1000     }
1001     cgstate.funcarg.offset = -localsize;
1002 
1003     //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n",
1004         //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize);
1005 
1006     uint xlocalsize = cast(uint)localsize;    // amount to subtract from ESP to make room for locals
1007 
1008     if (tyf & mTYnaked)                 // if no prolog/epilog for function
1009     {
1010         hasframe = 1;
1011         return;
1012     }
1013 
1014     if (tym == TYifunc)
1015     {
1016         prolog_ifunc(cdbx,&tyf);
1017         hasframe = 1;
1018         cdb.append(cdbx);
1019         goto Lcont;
1020     }
1021 
1022     /* Determine if we need BP set up   */
1023     if (enforcealign)
1024     {
1025         // we need BP to reset the stack before return
1026         // otherwise the return address is lost
1027         needframe = 1;
1028     }
1029     else if (config.flags & CFGalwaysframe)
1030         needframe = 1;
1031     else
1032     {
1033         if (localsize)
1034         {
1035             if (I16 ||
1036                 !(config.flags4 & CFG4speed) ||
1037                 config.target_cpu < TARGET_Pentium ||
1038                 farfunc ||
1039                 config.flags & CFGstack ||
1040                 xlocalsize >= 0x1000 ||
1041                 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) ||
1042                 anyiasm ||
1043                 Alloca.size
1044                )
1045             {
1046                 needframe = 1;
1047             }
1048         }
1049         if (refparam && (anyiasm || I16))
1050             needframe = 1;
1051     }
1052 
1053     if (needframe)
1054     {
1055         assert(mfuncreg & mBP);         // shouldn't have used mBP
1056 
1057         if (!guessneedframe)            // if guessed wrong
1058             goto Lagain;
1059     }
1060 
1061     if (I16 && config.wflags & WFwindows && farfunc)
1062     {
1063         prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds);
1064         enter = false;                  // don't use ENTER instruction
1065         hasframe = 1;                   // we have a stack frame
1066     }
1067     else if (needframe)                 // if variables or parameters
1068     {
1069         prolog_frame(cdbx, farfunc, xlocalsize, enter, cfa_offset);
1070         hasframe = 1;
1071     }
1072 
1073     /* Align the stack if necessary */
1074     prolog_stackalign(cdbx);
1075 
1076     /* Subtract from stack pointer the size of the local stack frame
1077      */
1078     if (config.flags & CFGstack)        // if stack overflow check
1079     {
1080         prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
1081         if (Alloca.size)
1082             prolog_setupalloca(cdbx);
1083     }
1084     else if (needframe)                      /* if variables or parameters   */
1085     {
1086         if (xlocalsize)                 /* if any stack offset          */
1087         {
1088             prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc);
1089             if (Alloca.size)
1090                 prolog_setupalloca(cdbx);
1091         }
1092         else
1093             assert(Alloca.size == 0);
1094     }
1095     else if (xlocalsize)
1096     {
1097         assert(I32 || I64);
1098         prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc);
1099         version (FRAMEPTR) { } else
1100             BPoff += REGSIZE;
1101     }
1102     else
1103         assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor));
1104     EBPtoESP += xlocalsize;
1105     if (hasframe)
1106         EBPtoESP += REGSIZE;
1107 
1108     /* Win64 unwind needs the amount of code generated so far
1109      */
1110     if (config.exe == EX_WIN64)
1111     {
1112         code *c = cdbx.peek();
1113         pinholeopt(c, null);
1114         prolog_allocoffset = calcblksize(c);
1115     }
1116 
1117     version (SCPP)
1118     {
1119         /*  The idea is to generate trace for all functions if -Nc is not thrown.
1120          *  If -Nc is thrown, generate trace only for global COMDATs, because those
1121          *  are relevant to the FUNCTIONS statement in the linker .DEF file.
1122          *  This same logic should be in epilog().
1123          */
1124         if (config.flags & CFGtrace &&
1125             (!(config.flags4 & CFG4allcomdat) ||
1126              funcsym_p.Sclass == SC.comdat ||
1127              funcsym_p.Sclass == SC.global ||
1128              (config.flags2 & CFG2comdat && SymInline(funcsym_p))
1129             )
1130            )
1131         {
1132             uint spalign = 0;
1133             int sz = cast(int)localsize;
1134             if (!enforcealign)
1135             {
1136                 version (FRAMEPTR)
1137                     sz += Para.size;
1138                 else
1139                     sz += Para.size + (needframe ? 0 : -REGSIZE);
1140             }
1141             if (STACKALIGN >= 16 && (sz & (STACKALIGN - 1)))
1142                 spalign = STACKALIGN - (sz & (STACKALIGN - 1));
1143 
1144             if (spalign)
1145             {   /* This could be avoided by moving the function call to after the
1146                  * registers are saved. But I don't remember why the call is here
1147                  * and not there.
1148                  */
1149                 cod3_stackadj(cdbx, spalign);
1150             }
1151 
1152             uint regsaved;
1153             prolog_trace(cdbx, farfunc, &regsaved);
1154 
1155             if (spalign)
1156                 cod3_stackadj(cdbx, -spalign);
1157             useregs((ALLREGS | mBP | mES) & ~regsaved);
1158         }
1159     }
1160 
1161     version (MARS)
1162     {
1163         if (usednteh & NTEHjmonitor)
1164         {   Symbol *sthis;
1165 
1166             for (SYMIDX si = 0; 1; si++)
1167             {   assert(si < globsym.length);
1168                 sthis = globsym[si];
1169                 if (strcmp(sthis.Sident.ptr,"this".ptr) == 0)
1170                     break;
1171             }
1172             nteh_monitor_prolog(cdbx,sthis);
1173             EBPtoESP += 3 * 4;
1174         }
1175     }
1176 
1177     cdb.append(cdbx);
1178     prolog_saveregs(cdb, topush, cfa_offset);
1179 
1180 Lcont:
1181 
1182     if (config.exe == EX_WIN64)
1183     {
1184         if (variadic(funcsym_p.Stype))
1185             prolog_gen_win64_varargs(cdb);
1186         regm_t namedargs;
1187         prolog_loadparams(cdb, tyf, pushalloc, namedargs);
1188         return;
1189     }
1190 
1191     prolog_ifunc2(cdb, tyf, tym, pushds);
1192 
1193     static if (NTEXCEPTIONS == 2)
1194     {
1195         if (usednteh & NTEH_except)
1196             nteh_setsp(cdb, 0x89);            // MOV __context[EBP].esp,ESP
1197     }
1198 
1199     // Load register parameters off of the stack. Do not use
1200     // assignaddr(), as it will replace the stack reference with
1201     // the register!
1202     regm_t namedargs;
1203     prolog_loadparams(cdb, tyf, pushalloc, namedargs);
1204 
1205     if (sv64)
1206         prolog_genvarargs(cdb, sv64, namedargs);
1207 
1208     /* Alignment checks
1209      */
1210     //assert(Auto.alignment <= STACKALIGN);
1211     //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0);
1212 }
1213 
1214 /************************************
1215  * Predicate for sorting auto symbols for qsort().
1216  * Returns:
1217  *      < 0     s1 goes farther from frame pointer
1218  *      > 0     s1 goes nearer the frame pointer
1219  *      = 0     no difference
1220  */
1221 
1222 @trusted
1223 extern (C) int
1224  autosort_cmp(scope const void *ps1, scope const void *ps2)
1225 {
1226     Symbol *s1 = *cast(Symbol **)ps1;
1227     Symbol *s2 = *cast(Symbol **)ps2;
1228 
1229     /* Largest align size goes furthest away from frame pointer,
1230      * so they get allocated first.
1231      */
1232     uint alignsize1 = Symbol_Salignsize(*s1);
1233     uint alignsize2 = Symbol_Salignsize(*s2);
1234     if (alignsize1 < alignsize2)
1235         return 1;
1236     else if (alignsize1 > alignsize2)
1237         return -1;
1238 
1239     /* move variables nearer the frame pointer that have higher Sweights
1240      * because addressing mode is fewer bytes. Grouping together high Sweight
1241      * variables also may put them in the same cache
1242      */
1243     if (s1.Sweight < s2.Sweight)
1244         return -1;
1245     else if (s1.Sweight > s2.Sweight)
1246         return 1;
1247 
1248     /* More:
1249      * 1. put static arrays nearest the frame pointer, so buffer overflows
1250      *    can't change other variable contents
1251      * 2. Do the coloring at the byte level to minimize stack usage
1252      */
1253     return 0;
1254 }
1255 
1256 /******************************
1257  * Compute stack frame offsets for local variables.
1258  * that did not make it into registers.
1259  * Params:
1260  *      symtab = function's symbol table
1261  *      estimate = true for do estimate only, false for final
1262  */
1263 @trusted
1264 void stackoffsets(ref symtab_t symtab, bool estimate)
1265 {
1266     //printf("stackoffsets() %s\n", funcsym_p.Sident.ptr);
1267 
1268     Para.initialize();        // parameter offset
1269     Fast.initialize();        // SCfastpar offset
1270     Auto.initialize();        // automatic & register offset
1271     EEStack.initialize();     // for SCstack's
1272 
1273     // Set if doing optimization of auto layout
1274     bool doAutoOpt = estimate && config.flags4 & CFG4optimized;
1275 
1276     // Put autos in another array so we can do optimizations on the stack layout
1277     Symbol*[10] autotmp = void;
1278     Symbol **autos = null;
1279     if (doAutoOpt)
1280     {
1281         if (symtab.length <= autotmp.length)
1282             autos = autotmp.ptr;
1283         else
1284         {   autos = cast(Symbol **)malloc(symtab.length * (*autos).sizeof);
1285             assert(autos);
1286         }
1287     }
1288     size_t autosi = 0;  // number used in autos[]
1289 
1290     for (int si = 0; si < symtab.length; si++)
1291     {   Symbol *s = symtab[si];
1292 
1293         /* Don't allocate space for dead or zero size parameters
1294          */
1295         switch (s.Sclass)
1296         {
1297             case SC.fastpar:
1298                 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh))
1299                     goto Ldefault;   // don't need consistent stack frame
1300                 break;
1301 
1302             case SC.parameter:
1303                 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty)))
1304                 {
1305                     Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary
1306                     s.Soffset = Para.offset;
1307                     continue;
1308                 }
1309                 break;          // allocate even if it's dead
1310 
1311             case SC.shadowreg:
1312                 break;          // allocate even if it's dead
1313 
1314             default:
1315             Ldefault:
1316                 if (Symbol_Sisdead(*s, anyiasm))
1317                     continue;       // don't allocate space
1318                 break;
1319         }
1320 
1321         targ_size_t sz = type_size(s.Stype);
1322         if (sz == 0)
1323             sz++;               // can't handle 0 length structs
1324 
1325         uint alignsize = Symbol_Salignsize(*s);
1326         if (alignsize > STACKALIGN)
1327             alignsize = STACKALIGN;         // no point if the stack is less aligned
1328 
1329         //printf("symbol '%s', size = %d, alignsize = %d, read = %x\n",s.Sident.ptr, cast(int)sz, cast(int)alignsize, s.Sflags & SFLread);
1330         assert(cast(int)sz >= 0);
1331 
1332         switch (s.Sclass)
1333         {
1334             case SC.fastpar:
1335                 /* Get these
1336                  * right next to the stack frame pointer, EBP.
1337                  * Needed so we can call nested contract functions
1338                  * frequire and fensure.
1339                  */
1340                 if (s.Sfl == FLreg)        // if allocated in register
1341                     continue;
1342                 /* Needed because storing fastpar's on the stack in prolog()
1343                  * does the entire register
1344                  */
1345                 if (sz < REGSIZE)
1346                     sz = REGSIZE;
1347 
1348                 Fast.offset = _align(sz,Fast.offset);
1349                 s.Soffset = Fast.offset;
1350                 Fast.offset += sz;
1351                 //printf("fastpar '%s' sz = %d, fast offset =  x%x, %p\n", s.Sident, cast(int) sz, cast(int) s.Soffset, s);
1352 
1353                 if (alignsize > Fast.alignment)
1354                     Fast.alignment = alignsize;
1355                 break;
1356 
1357             case SC.register:
1358             case SC.auto_:
1359                 if (s.Sfl == FLreg)        // if allocated in register
1360                     break;
1361 
1362                 if (doAutoOpt)
1363                 {   autos[autosi++] = s;    // deal with later
1364                     break;
1365                 }
1366 
1367                 Auto.offset = _align(sz,Auto.offset);
1368                 s.Soffset = Auto.offset;
1369                 Auto.offset += sz;
1370                 //printf("auto    '%s' sz = %d, auto offset =  x%lx\n", s.Sident,sz, cast(long) s.Soffset);
1371 
1372                 if (alignsize > Auto.alignment)
1373                     Auto.alignment = alignsize;
1374                 break;
1375 
1376             case SC.stack:
1377                 EEStack.offset = _align(sz,EEStack.offset);
1378                 s.Soffset = EEStack.offset;
1379                 //printf("EEStack.offset =  x%lx\n",cast(long)s.Soffset);
1380                 EEStack.offset += sz;
1381                 break;
1382 
1383             case SC.shadowreg:
1384             case SC.parameter:
1385                 if (config.exe == EX_WIN64)
1386                 {
1387                     assert((Para.offset & 7) == 0);
1388                     s.Soffset = Para.offset;
1389                     Para.offset += 8;
1390                     break;
1391                 }
1392                 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general,
1393                  * but are 4 byte aligned on the OSX 32 stack.
1394                  */
1395                 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */
1396                 if (alignsize >= 16 &&
1397                     (I64 || (config.exe == EX_OSX &&
1398                          (tyaggregate(s.ty()) || tyvector(s.ty())))))
1399                     Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1);
1400                 s.Soffset = Para.offset;
1401                 //printf("%s param offset =  x%lx, alignsize = %d\n", s.Sident, cast(long) s.Soffset, cast(int) alignsize);
1402                 Para.offset += (s.Sflags & SFLdouble)
1403                             ? type_size(tstypes[TYdouble])   // float passed as double
1404                             : type_size(s.Stype);
1405                 break;
1406 
1407             case SC.pseudo:
1408             case SC.static_:
1409             case SC.bprel:
1410                 break;
1411             default:
1412                 symbol_print(s);
1413                 assert(0);
1414         }
1415     }
1416 
1417     if (autosi)
1418     {
1419         qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp);
1420 
1421         vec_t tbl = vec_calloc(autosi);
1422 
1423         for (size_t si = 0; si < autosi; si++)
1424         {
1425             Symbol *s = autos[si];
1426 
1427             targ_size_t sz = type_size(s.Stype);
1428             if (sz == 0)
1429                 sz++;               // can't handle 0 length structs
1430 
1431             uint alignsize = Symbol_Salignsize(*s);
1432             if (alignsize > STACKALIGN)
1433                 alignsize = STACKALIGN;         // no point if the stack is less aligned
1434 
1435             /* See if we can share storage with another variable
1436              * if their live ranges do not overlap.
1437              */
1438             if (// Don't share because could stomp on variables
1439                 // used in finally blocks
1440                 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) &&
1441                 s.Srange && !(s.Sflags & SFLspill))
1442             {
1443                 for (size_t i = 0; i < si; i++)
1444                 {
1445                     if (!vec_testbit(i,tbl))
1446                         continue;
1447                     Symbol *sp = autos[i];
1448 //printf("auto    s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange));
1449                     if (vec_disjoint(s.Srange,sp.Srange) &&
1450                         !(sp.Soffset & (alignsize - 1)) &&
1451                         sz <= type_size(sp.Stype))
1452                     {
1453                         vec_or(sp.Srange,sp.Srange,s.Srange);
1454                         //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident);
1455                         s.Soffset = sp.Soffset;
1456                         goto L2;
1457                     }
1458                 }
1459             }
1460             Auto.offset = _align(sz,Auto.offset);
1461             s.Soffset = Auto.offset;
1462             //printf("auto    '%s' sz = %d, auto offset =  x%lx\n", s.Sident, sz, cast(long) s.Soffset);
1463             Auto.offset += sz;
1464             if (s.Srange && !(s.Sflags & SFLspill))
1465                 vec_setbit(si,tbl);
1466 
1467             if (alignsize > Auto.alignment)
1468                 Auto.alignment = alignsize;
1469         L2: { }
1470         }
1471 
1472         vec_free(tbl);
1473 
1474         if (autos != autotmp.ptr)
1475             free(autos);
1476     }
1477 }
1478 
1479 /****************************
1480  * Generate code for a block.
1481  */
1482 
1483 @trusted
1484 private void blcodgen(block *bl)
1485 {
1486     regm_t mfuncregsave = mfuncreg;
1487 
1488     //dbg_printf("blcodgen(%p)\n",bl);
1489 
1490     /* Determine existing immediate values in registers by ANDing
1491         together the values from all the predecessors of b.
1492      */
1493     assert(bl.Bregcon.immed.mval == 0);
1494     regcon.immed.mval = 0;      // assume no previous contents in registers
1495 //    regcon.cse.mval = 0;
1496     foreach (bpl; ListRange(bl.Bpred))
1497     {
1498         block *bp = list_block(bpl);
1499 
1500         if (bpl == bl.Bpred)
1501         {   regcon.immed = bp.Bregcon.immed;
1502             regcon.params = bp.Bregcon.params;
1503 //          regcon.cse = bp.Bregcon.cse;
1504         }
1505         else
1506         {
1507             int i;
1508 
1509             regcon.params &= bp.Bregcon.params;
1510             if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0)
1511                 // Actual values must match, too
1512                 for (i = 0; i < REGMAX; i++)
1513                 {
1514                     if (regcon.immed.value[i] != bp.Bregcon.immed.value[i])
1515                         regcon.immed.mval &= ~mask(i);
1516                 }
1517         }
1518     }
1519     regcon.cse.mops &= regcon.cse.mval;
1520 
1521     // Set regcon.mvar according to what variables are in registers for this block
1522     CodeBuilder cdb; cdb.ctor();
1523     regcon.mvar = 0;
1524     regcon.mpvar = 0;
1525     regcon.indexregs = 1;
1526     int anyspill = 0;
1527     char *sflsave = null;
1528     if (config.flags4 & CFG4optimized)
1529     {
1530         CodeBuilder cdbload; cdbload.ctor();
1531         CodeBuilder cdbstore; cdbstore.ctor();
1532 
1533         sflsave = cast(char *) alloca(globsym.length * char.sizeof);
1534         for (SYMIDX i = 0; i < globsym.length; i++)
1535         {
1536             Symbol *s = globsym[i];
1537 
1538             sflsave[i] = s.Sfl;
1539             if (regParamInPreg(s) &&
1540                 regcon.params & s.Spregm() &&
1541                 vec_testbit(dfoidx,s.Srange))
1542             {
1543 //                regcon.used |= s.Spregm();
1544             }
1545 
1546             if (s.Sfl == FLreg)
1547             {
1548                 if (vec_testbit(dfoidx,s.Srange))
1549                 {
1550                     regcon.mvar |= s.Sregm;
1551                     if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)
1552                         regcon.mpvar |= s.Sregm;
1553                 }
1554             }
1555             else if (s.Sflags & SFLspill)
1556             {
1557                 if (vec_testbit(dfoidx,s.Srange))
1558                 {
1559                     anyspill = cast(int)(i + 1);
1560                     cgreg_spillreg_prolog(bl,s,cdbstore,cdbload);
1561                     if (vec_testbit(dfoidx,s.Slvreg))
1562                     {
1563                         s.Sfl = FLreg;
1564                         regcon.mvar |= s.Sregm;
1565                         regcon.cse.mval &= ~s.Sregm;
1566                         regcon.immed.mval &= ~s.Sregm;
1567                         regcon.params &= ~s.Sregm;
1568                         if (s.Sclass == SC.fastpar || s.Sclass == SC.shadowreg)
1569                             regcon.mpvar |= s.Sregm;
1570                     }
1571                 }
1572             }
1573         }
1574         if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
1575         {
1576             cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval);
1577         }
1578         cdb.append(cdbstore);
1579         cdb.append(cdbload);
1580         mfuncreg &= ~regcon.mvar;               // use these registers
1581         regcon.used |= regcon.mvar;
1582 
1583         // Determine if we have more than 1 uncommitted index register
1584         regcon.indexregs = IDXREGS & ~regcon.mvar;
1585         regcon.indexregs &= regcon.indexregs - 1;
1586     }
1587 
1588     /* This doesn't work when calling the BC_finally function,
1589      * as it is one block calling another.
1590      */
1591     //regsave.idx = 0;
1592 
1593     reflocal = 0;
1594     int refparamsave = refparam;
1595     refparam = 0;
1596     assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops);
1597 
1598     outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave);
1599     bl.Bcode = cdb.finish();
1600 
1601     for (int i = 0; i < anyspill; i++)
1602     {
1603         Symbol *s = globsym[i];
1604         s.Sfl = sflsave[i];    // undo block register assignments
1605     }
1606 
1607     if (reflocal)
1608         bl.Bflags |= BFLreflocal;
1609     if (refparam)
1610         bl.Bflags |= BFLrefparam;
1611     refparam |= refparamsave;
1612     bl.Bregcon.immed = regcon.immed;
1613     bl.Bregcon.cse = regcon.cse;
1614     bl.Bregcon.used = regcon.used;
1615     bl.Bregcon.params = regcon.params;
1616 
1617     debug
1618     debugw && printf("code gen complete\n");
1619 }
1620 
1621 /*****************************************
1622  * Add in exception handling code.
1623  */
1624 
1625 version (SCPP)
1626 {
1627 
1628 private void cgcod_eh()
1629 {
1630     list_t stack;
1631     int idx;
1632     int tryidx;
1633 
1634     if (!(usednteh & (EHtry | EHcleanup)))
1635         return;
1636 
1637     // Compute Bindex for each block
1638     for (block *b = startblock; b; b = b.Bnext)
1639     {
1640         b.Bindex = -1;
1641         b.Bflags &= ~BFLvisited;               /* mark as unvisited    */
1642     }
1643     block *btry = null;
1644     int lastidx = 0;
1645     startblock.Bindex = 0;
1646     for (block *b = startblock; b; b = b.Bnext)
1647     {
1648         if (btry == b.Btry && b.BC == BCcatch)  // if don't need to pop try block
1649         {
1650             block *br = list_block(b.Bpred);          // find corresponding try block
1651             assert(br.BC == BCtry);
1652             b.Bindex = br.Bindex;
1653         }
1654         else if (btry != b.Btry && b.BC != BCcatch ||
1655                  !(b.Bflags & BFLvisited))
1656             b.Bindex = lastidx;
1657         b.Bflags |= BFLvisited;
1658 
1659         debug
1660         if (debuge)
1661         {
1662             printf("%s block (%p) Btry=%p Bindex=%d\n",bc_str(b.BC),b,b.Btry,b.Bindex);
1663         }
1664 
1665         except_index_set(b.Bindex);
1666         if (btry != b.Btry)                    // exited previous try block
1667         {
1668             except_pop(b,null,btry);
1669             btry = b.Btry;
1670         }
1671         if (b.BC == BCtry)
1672         {
1673             except_push(b,null,b);
1674             btry = b;
1675             tryidx = except_index_get();
1676             CodeBuilder cdb; cdb.ctor();
1677             nteh_gensindex(cdb,tryidx - 1);
1678             cdb.append(b.Bcode);
1679             b.Bcode = cdb.finish();
1680         }
1681 
1682         stack = null;
1683         for (code *c = b.Bcode; c; c = code_next(c))
1684         {
1685             if ((c.Iop & ESCAPEmask) == ESCAPE)
1686             {
1687                 code *c1 = null;
1688                 switch (c.Iop & 0xFFFF00)
1689                 {
1690                     case ESCctor:
1691                         //printf("ESCctor\n");
1692                         except_push(c,c.IEV1.Vtor,null);
1693                         goto L1;
1694 
1695                     case ESCdtor:
1696                         //printf("ESCdtor\n");
1697                         except_pop(c,c.IEV1.Vtor,null);
1698                     L1: if (config.exe == EX_WIN32)
1699                         {
1700                             CodeBuilder cdb; cdb.ctor();
1701                             nteh_gensindex(cdb,except_index_get() - 1);
1702                             c1 = cdb.finish();
1703                             c1.next = code_next(c);
1704                             c.next = c1;
1705                         }
1706                         break;
1707 
1708                     case ESCmark:
1709                         //printf("ESCmark\n");
1710                         idx = except_index_get();
1711                         list_prependdata(&stack,idx);
1712                         except_mark();
1713                         break;
1714 
1715                     case ESCrelease:
1716                         //printf("ESCrelease\n");
1717                         version (SCPP)
1718                         {
1719                             idx = list_data(stack);
1720                             list_pop(&stack);
1721                             if (idx != except_index_get())
1722                             {
1723                                 if (config.exe == EX_WIN32)
1724                                 {
1725                                     CodeBuilder cdb; cdb.ctor();
1726                                     nteh_gensindex(cdb,idx - 1);
1727                                     c1 = cdb.finish();
1728                                     c1.next = code_next(c);
1729                                     c.next = c1;
1730                                 }
1731                                 else
1732                                 {   except_pair_append(c,idx - 1);
1733                                     c.Iop = ESCAPE | ESCoffset;
1734                                 }
1735                             }
1736                             except_release();
1737                         }
1738                         break;
1739 
1740                     case ESCmark2:
1741                         //printf("ESCmark2\n");
1742                         except_mark();
1743                         break;
1744 
1745                     case ESCrelease2:
1746                         //printf("ESCrelease2\n");
1747                         version (SCPP)
1748                         {
1749                             except_release();
1750                         }
1751                         break;
1752 
1753                     default:
1754                         break;
1755                 }
1756             }
1757         }
1758         assert(stack == null);
1759         b.Bendindex = except_index_get();
1760 
1761         if (b.BC != BCret && b.BC != BCretexp)
1762             lastidx = b.Bendindex;
1763 
1764         // Set starting index for each of the successors
1765         int i = 0;
1766         foreach (bl; ListRange(b.Bsucc))
1767         {
1768             block *bs = list_block(bl);
1769             if (b.BC == BCtry)
1770             {
1771                 switch (i)
1772                 {
1773                     case 0:                             // block after catches
1774                         bs.Bindex = b.Bendindex;
1775                         break;
1776 
1777                     case 1:                             // 1st catch block
1778                         bs.Bindex = tryidx;
1779                         break;
1780 
1781                     default:                            // subsequent catch blocks
1782                         bs.Bindex = b.Bindex;
1783                         break;
1784                 }
1785 
1786                 debug
1787                 if (debuge)
1788                 {
1789                     printf(" 1setting %p to %d\n",bs,bs.Bindex);
1790                 }
1791             }
1792             else if (!(bs.Bflags & BFLvisited))
1793             {
1794                 bs.Bindex = b.Bendindex;
1795 
1796                 debug
1797                 if (debuge)
1798                 {
1799                     printf(" 2setting %p to %d\n",bs,bs.Bindex);
1800                 }
1801             }
1802             bs.Bflags |= BFLvisited;
1803             i++;
1804         }
1805     }
1806 
1807     if (config.exe == EX_WIN32)
1808         for (block *b = startblock; b; b = b.Bnext)
1809         {
1810             if (/*!b.Bcount ||*/ b.BC == BCtry)
1811                 continue;
1812             foreach (bl; ListRange(b.Bpred))
1813             {
1814                 int pi = list_block(bl).Bendindex;
1815                 if (b.Bindex != pi)
1816                 {
1817                     CodeBuilder cdb; cdb.ctor();
1818                     nteh_gensindex(cdb,b.Bindex - 1);
1819                     cdb.append(b.Bcode);
1820                     b.Bcode = cdb.finish();
1821                     break;
1822                 }
1823             }
1824         }
1825 }
1826 
1827 }
1828 
1829 /******************************
1830  * Given a register mask, find and return the number
1831  * of the first register that fits.
1832  */
1833 
1834 @trusted
1835 reg_t findreg(regm_t regm)
1836 {
1837     return findreg(regm, __LINE__, __FILE__);
1838 }
1839 
1840 @trusted
1841 reg_t findreg(regm_t regm, int line, const(char)* file)
1842 {
1843     debug
1844     regm_t regmsave = regm;
1845 
1846     reg_t i = 0;
1847     while (1)
1848     {
1849         if (!(regm & 0xF))
1850         {
1851             regm >>= 4;
1852             i += 4;
1853             if (!regm)
1854                 break;
1855         }
1856         if (regm & 1)
1857             return i;
1858         regm >>= 1;
1859         i++;
1860     }
1861 
1862     debug
1863     printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr);
1864     fflush(stdout);
1865 
1866 //    *(char*)0=0;
1867     assert(0);
1868 }
1869 
1870 /***************
1871  * Free element (but not its leaves! (assume they are already freed))
1872  * Don't decrement Ecount! This is so we can detect if the common subexp
1873  * has already been evaluated.
1874  * If common subexpression is not required anymore, eliminate
1875  * references to it.
1876  */
1877 
1878 @trusted
1879 void freenode(elem *e)
1880 {
1881     elem_debug(e);
1882     //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount);
1883     if (e.Ecomsub--) return;             /* usage count                  */
1884     if (e.Ecount)                        /* if it was a CSE              */
1885     {
1886         for (size_t i = 0; i < regcon.cse.value.length; i++)
1887         {
1888             if (regcon.cse.value[i] == e)       /* if a register is holding it  */
1889             {
1890                 regcon.cse.mval &= ~mask(cast(uint)i);
1891                 regcon.cse.mops &= ~mask(cast(uint)i);    /* free masks                   */
1892             }
1893         }
1894         CSE.remove(e);
1895     }
1896 }
1897 
1898 /*********************************
1899  * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode().
1900  */
1901 
1902 @trusted
1903 private void resetEcomsub(elem *e)
1904 {
1905     while (1)
1906     {
1907         elem_debug(e);
1908         e.Ecomsub = e.Ecount;
1909         const op = e.Eoper;
1910         if (!OTleaf(op))
1911         {
1912             if (OTbinary(op))
1913                 resetEcomsub(e.EV.E2);
1914             e = e.EV.E1;
1915         }
1916         else
1917             break;
1918     }
1919 }
1920 
1921 /*********************************
1922  * Determine if elem e is a register variable.
1923  * If so:
1924  *      *pregm = mask of registers that make up the variable
1925  *      *preg = the least significant register
1926  *      returns true
1927  * Else
1928  *      returns false
1929  */
1930 
1931 @trusted
1932 int isregvar(elem *e,regm_t *pregm,reg_t *preg)
1933 {
1934     Symbol *s;
1935     uint u;
1936     regm_t m;
1937     regm_t regm;
1938     reg_t reg;
1939 
1940     elem_debug(e);
1941     if (e.Eoper == OPvar || e.Eoper == OPrelconst)
1942     {
1943         s = e.EV.Vsym;
1944         switch (s.Sfl)
1945         {
1946             case FLreg:
1947                 if (s.Sclass == SC.parameter)
1948                 {   refparam = true;
1949                     reflocal = true;
1950                 }
1951                 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw;
1952                 regm = s.Sregm;
1953                 //assert(tyreg(s.ty()));
1954 static if (0)
1955 {
1956                 // Let's just see if there is a CSE in a reg we can use
1957                 // instead. This helps avoid AGI's.
1958                 if (e.Ecount && e.Ecount != e.Ecomsub)
1959                 {   int i;
1960 
1961                     for (i = 0; i < arraysize(regcon.cse.value); i++)
1962                     {
1963                         if (regcon.cse.value[i] == e)
1964                         {   reg = i;
1965                             break;
1966                         }
1967                     }
1968                 }
1969 }
1970                 assert(regm & regcon.mvar && !(regm & ~regcon.mvar));
1971                 goto Lreg;
1972 
1973             case FLpseudo:
1974                 version (MARS)
1975                 {
1976                     u = s.Sreglsw;
1977                     m = mask(u);
1978                     if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H
1979                     {
1980                         reg = u & 7;
1981                         regm = m;
1982                         goto Lreg;
1983                     }
1984                 }
1985                 else
1986                 {
1987                     u = s.Sreglsw;
1988                     m = pseudomask[u];
1989                     if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H
1990                     {
1991                         reg = pseudoreg[u] & 7;
1992                         regm = m;
1993                         goto Lreg;
1994                     }
1995                 }
1996                 break;
1997 
1998             default:
1999                 break;
2000         }
2001     }
2002     return false;
2003 
2004 Lreg:
2005     if (preg)
2006         *preg = reg;
2007     if (pregm)
2008         *pregm = regm;
2009     return true;
2010 }
2011 
2012 /*********************************
2013  * Allocate some registers.
2014  * Input:
2015  *      pretregs        Pointer to mask of registers to make selection from.
2016  *      tym             Mask of type we will store in registers.
2017  * Output:
2018  *      *pretregs       Mask of allocated registers.
2019  *      *preg           Register number of first allocated register.
2020  *      msavereg,mfuncreg       retregs bits are cleared.
2021  *      regcon.cse.mval,regcon.cse.mops updated
2022  * Returns:
2023  *      pointer to code generated if necessary to save any regcon.cse.mops on the
2024  *      stack.
2025  */
2026 
2027 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym)
2028 {
2029     allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__);
2030 }
2031 
2032 @trusted
2033 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym
2034         ,int line,const(char)* file)
2035 {
2036         reg_t reg;
2037 
2038 static if (0)
2039 {
2040         if (pass == BackendPass.final_)
2041         {
2042             printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym %s\n",
2043                 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval),
2044                 regm_str(msavereg),regm_str(*pretregs),tym_str(tym));
2045         }
2046 }
2047         tym = tybasic(tym);
2048         uint size = _tysize[tym];
2049         *pretregs &= mES | allregs | XMMREGS;
2050         regm_t retregs = *pretregs;
2051 
2052         debug if (retregs == 0)
2053             printf("allocreg: file %s(%d)\n", file, line);
2054 
2055         if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars
2056         {
2057             if (size <= REGSIZE || (retregs & XMMREGS))
2058             {
2059                 *preg = findreg(retregs);
2060                 assert(retregs == mask(*preg)); /* no more bits are set */
2061             }
2062             else if (size <= 2 * REGSIZE)
2063             {
2064                 *preg = findregmsw(retregs);
2065                 assert(retregs & mLSW);
2066             }
2067             else
2068                 assert(0);
2069             getregs(cdb,retregs);
2070             return;
2071         }
2072         int count = 0;
2073 L1:
2074         //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
2075         assert(++count < 20);           /* fail instead of hanging if blocked */
2076         assert(retregs);
2077         reg_t msreg = NOREG, lsreg = NOREG;  /* no value assigned yet        */
2078 L3:
2079         //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs));
2080         regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params);
2081         if (!r)
2082         {
2083             r = retregs & ~(msavereg | regcon.cse.mval);
2084             if (!r)
2085             {
2086                 r = retregs & ~(msavereg | regcon.cse.mops);
2087                 if (!r)
2088                 {   r = retregs & ~msavereg;
2089                     if (!r)
2090                         r = retregs;
2091                 }
2092             }
2093         }
2094 
2095         if (size <= REGSIZE || retregs & XMMREGS)
2096         {
2097             if (r & ~mBP)
2098                 r &= ~mBP;
2099 
2100             // If only one index register, prefer to not use LSW registers
2101             if (!regcon.indexregs && r & ~mLSW)
2102                 r &= ~mLSW;
2103 
2104             if (pass == BackendPass.final_ && r & ~lastretregs && !I16)
2105             {   // Try not to always allocate the same register,
2106                 // to schedule better
2107 
2108                 r &= ~lastretregs;
2109                 if (r & ~last2retregs)
2110                 {
2111                     r &= ~last2retregs;
2112                     if (r & ~last3retregs)
2113                     {
2114                         r &= ~last3retregs;
2115                         if (r & ~last4retregs)
2116                         {
2117                             r &= ~last4retregs;
2118 //                          if (r & ~last5retregs)
2119 //                              r &= ~last5retregs;
2120                         }
2121                     }
2122                 }
2123                 if (r & ~mfuncreg)
2124                     r &= ~mfuncreg;
2125             }
2126             reg = findreg(r);
2127             retregs = mask(reg);
2128         }
2129         else if (size <= 2 * REGSIZE)
2130         {
2131             /* Select pair with both regs free. Failing */
2132             /* that, select pair with one reg free.             */
2133 
2134             if (r & mBP)
2135             {
2136                 retregs &= ~mBP;
2137                 goto L3;
2138             }
2139 
2140             if (r & mMSW)
2141             {
2142                 if (r & mDX)
2143                     msreg = DX;                 /* prefer to use DX over CX */
2144                 else
2145                     msreg = findregmsw(r);
2146                 r &= mLSW;                      /* see if there's an LSW also */
2147                 if (r)
2148                     lsreg = findreg(r);
2149                 else if (lsreg == NOREG)   /* if don't have LSW yet */
2150                 {
2151                     retregs &= mLSW;
2152                     goto L3;
2153                 }
2154             }
2155             else
2156             {
2157                 if (I64 && !(r & mLSW))
2158                 {
2159                     retregs = *pretregs & (mMSW | mLSW);
2160                     assert(retregs);
2161                     goto L1;
2162                 }
2163                 lsreg = findreglsw(r);
2164                 if (msreg == NOREG)
2165                 {
2166                     retregs &= mMSW;
2167                     assert(retregs);
2168                     goto L3;
2169                 }
2170             }
2171             reg = (msreg == ES) ? lsreg : msreg;
2172             retregs = mask(msreg) | mask(lsreg);
2173         }
2174         else if (I16 && (tym == TYdouble || tym == TYdouble_alias))
2175         {
2176             debug
2177             if (retregs != DOUBLEREGS)
2178                 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs));
2179 
2180             assert(retregs == DOUBLEREGS);
2181             reg = AX;
2182         }
2183         else
2184         {
2185             debug
2186             {
2187                 printf("%s\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n",
2188                     tym_str(tym),file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym);
2189             }
2190             assert(0);
2191         }
2192         if (retregs & regcon.mvar)              // if conflict with reg vars
2193         {
2194             if (!(size > REGSIZE && *pretregs == (mAX | mDX)))
2195             {
2196                 retregs = (*pretregs &= ~(retregs & regcon.mvar));
2197                 goto L1;                // try other registers
2198             }
2199         }
2200         *preg = reg;
2201         *pretregs = retregs;
2202 
2203         //printf("Allocating %s\n",regm_str(retregs));
2204         last5retregs = last4retregs;
2205         last4retregs = last3retregs;
2206         last3retregs = last2retregs;
2207         last2retregs = lastretregs;
2208         lastretregs = retregs;
2209         getregs(cdb, retregs);
2210 }
2211 
2212 
2213 /*****************************************
2214  * Allocate a scratch register.
2215  * Params:
2216  *      cdb = where to write any generated code to
2217  *      regm = mask of registers to pick one from
2218  * Returns:
2219  *      selected register
2220  */
2221 @trusted
2222 reg_t allocScratchReg(ref CodeBuilder cdb, regm_t regm)
2223 {
2224     reg_t r;
2225     allocreg(cdb, &regm, &r, TYoffset);
2226     return r;
2227 }
2228 
2229 
2230 /******************************
2231  * Determine registers that should be destroyed upon arrival
2232  * to code entry point for exception handling.
2233  */
2234 @trusted
2235 regm_t lpadregs()
2236 {
2237     regm_t used;
2238     if (config.ehmethod == EHmethod.EH_DWARF)
2239         used = allregs & ~mfuncreg;
2240     else
2241         used = (I32 | I64) ? allregs : (ALLREGS | mES);
2242     //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg));
2243     return used;
2244 }
2245 
2246 
2247 /*************************
2248  * Mark registers as used.
2249  */
2250 
2251 @trusted
2252 void useregs(regm_t regm)
2253 {
2254     //printf("useregs(x%x) %s\n", regm, regm_str(regm));
2255     mfuncreg &= ~regm;
2256     regcon.used |= regm;                // registers used in this block
2257     regcon.params &= ~regm;
2258     if (regm & regcon.mpvar)            // if modified a fastpar register variable
2259         regcon.params = 0;              // toss them all out
2260 }
2261 
2262 /*************************
2263  * We are going to use the registers in mask r.
2264  * Generate any code necessary to save any regs.
2265  */
2266 
2267 @trusted
2268 void getregs(ref CodeBuilder cdb, regm_t r)
2269 {
2270     //printf("getregs(x%x) %s\n", r, regm_str(r));
2271     regm_t ms = r & regcon.cse.mops;           // mask of common subs we must save
2272     useregs(r);
2273     regcon.cse.mval &= ~r;
2274     msavereg &= ~r;                     // regs that are destroyed
2275     regcon.immed.mval &= ~r;
2276     if (ms)
2277         cse_save(cdb, ms);
2278 }
2279 
2280 /*************************
2281  * We are going to use the registers in mask r.
2282  * Same as getregs(), but assert if code is needed to be generated.
2283  */
2284 @trusted
2285 void getregsNoSave(regm_t r)
2286 {
2287     //printf("getregsNoSave(x%x) %s\n", r, regm_str(r));
2288     assert(!(r & regcon.cse.mops));            // mask of common subs we must save
2289     useregs(r);
2290     regcon.cse.mval &= ~r;
2291     msavereg &= ~r;                     // regs that are destroyed
2292     regcon.immed.mval &= ~r;
2293 }
2294 
2295 /*****************************************
2296  * Copy registers in cse.mops into memory.
2297  */
2298 
2299 @trusted
2300 private void cse_save(ref CodeBuilder cdb, regm_t ms)
2301 {
2302     assert((ms & regcon.cse.mops) == ms);
2303     regcon.cse.mops &= ~ms;
2304 
2305     /* Skip CSEs that are already saved */
2306     for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1)
2307     {
2308         if (regm & ms)
2309         {
2310             const e = regcon.cse.value[findreg(regm)];
2311             const sz = tysize(e.Ety);
2312             foreach (const ref cse; CSE.filter(e))
2313             {
2314                 if (sz <= REGSIZE ||
2315                     sz <= 2 * REGSIZE &&
2316                         (regm & mMSW && cse.regm & mMSW ||
2317                          regm & mLSW && cse.regm & mLSW) ||
2318                     sz == 4 * REGSIZE && regm == cse.regm
2319                    )
2320                 {
2321                     ms &= ~regm;
2322                     if (!ms)
2323                         return;
2324                     break;
2325                 }
2326             }
2327         }
2328     }
2329 
2330     while (ms)
2331     {
2332         auto cse = CSE.add();
2333         reg_t reg = findreg(ms);          /* the register to save         */
2334         cse.e = regcon.cse.value[reg];
2335         cse.regm = mask(reg);
2336 
2337         ms &= ~mask(reg);           /* turn off reg bit in ms       */
2338 
2339         // If we can simply reload the CSE, we don't need to save it
2340         if (cse_simple(&cse.csimple, cse.e))
2341             cse.flags |= CSEsimple;
2342         else
2343         {
2344             CSE.updateSizeAndAlign(cse.e);
2345             gen_storecse(cdb, cse.e.Ety, reg, cse.slot);
2346             reflocal = true;
2347         }
2348     }
2349 }
2350 
2351 /******************************************
2352  * Getregs without marking immediate register values as gone.
2353  */
2354 
2355 @trusted
2356 void getregs_imm(ref CodeBuilder cdb, regm_t r)
2357 {
2358     regm_t save = regcon.immed.mval;
2359     getregs(cdb,r);
2360     regcon.immed.mval = save;
2361 }
2362 
2363 /******************************************
2364  * Flush all CSE's out of registers and into memory.
2365  * Input:
2366  *      do87    !=0 means save 87 registers too
2367  */
2368 
2369 @trusted
2370 void cse_flush(ref CodeBuilder cdb, int do87)
2371 {
2372     //dbg_printf("cse_flush()\n");
2373     cse_save(cdb,regcon.cse.mops);      // save any CSEs to memory
2374     if (do87)
2375         save87(cdb);    // save any 8087 temporaries
2376 }
2377 
2378 /*************************
2379  * Common subexpressions exist in registers. Note this in regcon.cse.mval.
2380  * Input:
2381  *      e       the subexpression
2382  *      regm    mask of registers holding it
2383  *      opsflag if != 0 then regcon.cse.mops gets set too
2384  * Returns:
2385  *      false   not saved as a CSE
2386  *      true    saved as a CSE
2387  */
2388 
2389 @trusted
2390 bool cssave(elem *e,regm_t regm,uint opsflag)
2391 {
2392     bool result = false;
2393 
2394     /*if (e.Ecount && e.Ecount == e.Ecomsub)*/
2395     if (e.Ecount && e.Ecomsub)
2396     {
2397         if (!opsflag && pass != BackendPass.final_ && (I32 || I64))
2398             return false;
2399 
2400         //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag);
2401         regm &= mBP | ALLREGS | mES | XMMREGS;    /* just to be sure              */
2402 
2403 /+
2404         /* Do not register CSEs if they are register variables and      */
2405         /* are not operator nodes. This forces the register allocation  */
2406         /* to go through allocreg(), which will prevent using register  */
2407         /* variables for scratch.                                       */
2408         if (opsflag || !(regm & regcon.mvar))
2409 +/
2410             for (uint i = 0; regm; i++)
2411             {
2412                 regm_t mi = mask(i);
2413                 if (regm & mi)
2414                 {
2415                     regm &= ~mi;
2416 
2417                     // If we don't need this CSE, and the register already
2418                     // holds a CSE that we do need, don't mark the new one
2419                     if (regcon.cse.mval & mi && regcon.cse.value[i] != e &&
2420                         !opsflag && regcon.cse.mops & mi)
2421                         continue;
2422 
2423                     regcon.cse.mval |= mi;
2424                     if (opsflag)
2425                         regcon.cse.mops |= mi;
2426                     //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e);
2427                     regcon.cse.value[i] = e;
2428                     result = true;
2429                 }
2430             }
2431     }
2432     return result;
2433 }
2434 
2435 /*************************************
2436  * Determine if a computation should be done into a register.
2437  */
2438 
2439 @trusted
2440 bool evalinregister(elem *e)
2441 {
2442     if (config.exe == EX_WIN64 && e.Eoper == OPrelconst)
2443         return true;
2444 
2445     if (e.Ecount == 0)             /* elem is not a CSE, therefore */
2446                                     /* we don't need to evaluate it */
2447                                     /* in a register                */
2448         return false;
2449     if (!OTleaf(e.Eoper))          /* operators are always in register */
2450         return true;
2451 
2452     // Need to rethink this code if float or double can be CSE'd
2453     uint sz = tysize(e.Ety);
2454     if (e.Ecount == e.Ecomsub)    /* elem is a CSE that needs     */
2455                                     /* to be generated              */
2456     {
2457         if ((I32 || I64) &&
2458             //pass == BackendPass.final_ && // bug 8987
2459             sz <= REGSIZE)
2460         {
2461             // Do it only if at least 2 registers are available
2462             regm_t m = allregs & ~regcon.mvar;
2463             if (sz == 1)
2464                 m &= BYTEREGS;
2465             if (m & (m - 1))        // if more than one register
2466             {   // Need to be at least 3 registers available, as
2467                 // addressing modes can use up 2.
2468                 while (!(m & 1))
2469                     m >>= 1;
2470                 m >>= 1;
2471                 if (m & (m - 1))
2472                     return true;
2473             }
2474         }
2475         return false;
2476     }
2477 
2478     /* Elem is now a CSE that might have been generated. If so, and */
2479     /* it's in a register already, the computation should be done   */
2480     /* using that register.                                         */
2481     regm_t emask = 0;
2482     for (uint i = 0; i < regcon.cse.value.length; i++)
2483         if (regcon.cse.value[i] == e)
2484             emask |= mask(i);
2485     emask &= regcon.cse.mval;       // mask of available CSEs
2486     if (sz <= REGSIZE)
2487         return emask != 0;      /* the CSE is in a register     */
2488     else if (sz <= 2 * REGSIZE)
2489         return (emask & mMSW) && (emask & mLSW);
2490     return true;                    /* cop-out for now              */
2491 }
2492 
2493 /*******************************************************
2494  * Return mask of scratch registers.
2495  */
2496 
2497 @trusted
2498 regm_t getscratch()
2499 {
2500     regm_t scratch = 0;
2501     if (pass == BackendPass.final_)
2502     {
2503         scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval |
2504                   regcon.immed.mval | regcon.params | mfuncreg);
2505     }
2506     return scratch;
2507 }
2508 
2509 /******************************
2510  * Evaluate an elem that is a common subexp that has been encountered
2511  * before.
2512  * Look first to see if it is already in a register.
2513  */
2514 
2515 @trusted
2516 private void comsub(ref CodeBuilder cdb,elem *e,regm_t *pretregs)
2517 {
2518     tym_t tym;
2519     regm_t regm,emask;
2520     reg_t reg;
2521     uint byte_,sz;
2522 
2523     //printf("comsub(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs));
2524     elem_debug(e);
2525 
2526     debug
2527     {
2528         if (e.Ecomsub > e.Ecount)
2529             elem_print(e);
2530     }
2531 
2532     assert(e.Ecomsub <= e.Ecount);
2533 
2534     if (*pretregs == 0)        // no possible side effects anyway
2535     {
2536         return;
2537     }
2538 
2539     /* First construct a mask, emask, of all the registers that
2540      * have the right contents.
2541      */
2542     emask = 0;
2543     for (uint i = 0; i < regcon.cse.value.length; i++)
2544     {
2545         //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]);
2546         if (regcon.cse.value[i] == e)   // if contents are right
2547                 emask |= mask(i);       // turn on bit for reg
2548     }
2549     emask &= regcon.cse.mval;                     // make sure all bits are valid
2550 
2551     if (emask & XMMREGS && *pretregs == mPSW)
2552         { }
2553     else if (tyxmmreg(e.Ety) && config.fpxmmregs)
2554     {
2555         if (*pretregs & (mST0 | mST01))
2556         {
2557             regm_t retregs = *pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1;
2558             comsub(cdb, e, &retregs);
2559             fixresult(cdb,e,retregs,pretregs);
2560             return;
2561         }
2562     }
2563     else if (tyfloating(e.Ety) && config.inline8087)
2564     {
2565         comsub87(cdb,e,pretregs);
2566         return;
2567     }
2568 
2569 
2570     /* create mask of CSEs */
2571     regm_t csemask = CSE.mask(e);
2572     csemask &= ~emask;            // stuff already in registers
2573 
2574     debug if (debugw)
2575     {
2576         printf("comsub(e=%p): *pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n",
2577                 e,regm_str(*pretregs),regm_str(emask),regm_str(csemask),
2578                 regm_str(regcon.cse.mval),regm_str(regcon.mvar));
2579         if (regcon.cse.mval & 1)
2580             elem_print(regcon.cse.value[0]);
2581     }
2582 
2583     tym = tybasic(e.Ety);
2584     sz = _tysize[tym];
2585     byte_ = sz == 1;
2586 
2587     if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register
2588     {
2589         /* First see if it is already in a correct register     */
2590 
2591         regm = emask & *pretregs;
2592         if (regm == 0)
2593             regm = emask;               /* try any other register       */
2594         if (regm)                       /* if it's in a register        */
2595         {
2596             if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (*pretregs & regcon.mvar) == *pretregs)
2597             {
2598                 regm = mask(findreg(regm));
2599                 fixresult(cdb,e,regm,pretregs);
2600                 return;
2601             }
2602         }
2603 
2604         if (OTleaf(e.Eoper))                  /* if not op or func            */
2605             goto reload;                      /* reload data                  */
2606 
2607         foreach (ref cse; CSE.filter(e))
2608         {
2609             regm_t retregs;
2610 
2611             if (cse.flags & CSEsimple)
2612             {
2613                 retregs = *pretregs;
2614                 if (byte_ && !(retregs & BYTEREGS))
2615                     retregs = BYTEREGS;
2616                 else if (!(retregs & allregs))
2617                     retregs = allregs;
2618                 allocreg(cdb,&retregs,&reg,tym);
2619                 code *cr = &cse.csimple;
2620                 cr.setReg(reg);
2621                 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1)
2622                     cr.Irex |= REX;
2623                 cdb.gen(cr);
2624                 goto L10;
2625             }
2626             else
2627             {
2628                 reflocal = true;
2629                 cse.flags |= CSEload;
2630                 if (*pretregs == mPSW)  // if result in CCs only
2631                 {
2632                     if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety)))
2633                     {
2634                         retregs = XMMREGS;
2635                         allocreg(cdb,&retregs,&reg,tym);
2636                         gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2637                         regcon.cse.mval |= mask(reg); // cs is in a reg
2638                         regcon.cse.value[reg] = e;
2639                         fixresult(cdb,e,retregs,pretregs);
2640                     }
2641                     else
2642                     {
2643                         // CMP cs[BP],0
2644                         gen_testcse(cdb, cse.e.Ety, sz, cse.slot);
2645                     }
2646                 }
2647                 else
2648                 {
2649                     retregs = *pretregs;
2650                     if (byte_ && !(retregs & BYTEREGS))
2651                         retregs = BYTEREGS;
2652                     allocreg(cdb,&retregs,&reg,tym);
2653                     gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2654                 L10:
2655                     regcon.cse.mval |= mask(reg); // cs is in a reg
2656                     regcon.cse.value[reg] = e;
2657                     fixresult(cdb,e,retregs,pretregs);
2658                 }
2659             }
2660             return;
2661         }
2662 
2663         debug
2664         {
2665             printf("couldn't find cse e = %p, pass = %d\n",e,pass);
2666             elem_print(e);
2667         }
2668         assert(0);                      /* should have found it         */
2669     }
2670     else                                  /* reg pair is req'd            */
2671     if (sz <= 2 * REGSIZE)
2672     {
2673         reg_t msreg,lsreg;
2674 
2675         /* see if we have both  */
2676         if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP)))
2677         {                               /* we don't have both           */
2678             debug if (!OTleaf(e.Eoper))
2679             {
2680                 printf("e = %p, op = x%x, emask = %s, csemask = %s\n",
2681                     e,e.Eoper,regm_str(emask),regm_str(csemask));
2682                 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW);
2683                 elem_print(e);
2684             }
2685 
2686             assert(OTleaf(e.Eoper));        /* must have both for operators */
2687             goto reload;
2688         }
2689 
2690         /* Look for right vals in any regs      */
2691         regm = *pretregs & mMSW;
2692         if (emask & regm)
2693             msreg = findreg(emask & regm);
2694         else if (emask & mMSW)
2695             msreg = findregmsw(emask);
2696         else                    /* reload from cse array        */
2697         {
2698             if (!regm)
2699                 regm = mMSW & ALLREGS;
2700             allocreg(cdb,&regm,&msreg,TYint);
2701             loadcse(cdb,e,msreg,mMSW);
2702         }
2703 
2704         regm = *pretregs & (mLSW | mBP);
2705         if (emask & regm)
2706             lsreg = findreg(emask & regm);
2707         else if (emask & (mLSW | mBP))
2708             lsreg = findreglsw(emask);
2709         else
2710         {
2711             if (!regm)
2712                 regm = mLSW;
2713             allocreg(cdb,&regm,&lsreg,TYint);
2714             loadcse(cdb,e,lsreg,mLSW | mBP);
2715         }
2716 
2717         regm = mask(msreg) | mask(lsreg);       /* mask of result       */
2718         fixresult(cdb,e,regm,pretregs);
2719         return;
2720     }
2721     else if (tym == TYdouble || tym == TYdouble_alias)    // double
2722     {
2723         assert(I16);
2724         if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16)
2725         {
2726             static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d
2727             for (reg = 0; reg != NOREG; reg = dblreg[reg])
2728             {
2729                 assert(cast(int) reg >= 0 && reg <= 7);
2730                 if (mask(reg) & csemask)
2731                     loadcse(cdb,e,reg,mask(reg));
2732             }
2733             regm = DOUBLEREGS_16;
2734             fixresult(cdb,e,regm,pretregs);
2735             return;
2736         }
2737         if (OTleaf(e.Eoper)) goto reload;
2738 
2739         debug
2740         printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask));
2741 
2742         assert(0);
2743     }
2744     else
2745     {
2746         debug
2747         printf("e = %p, tym = x%x\n",e,tym);
2748 
2749         assert(0);
2750     }
2751 
2752 reload:                                 /* reload result from memory    */
2753     switch (e.Eoper)
2754     {
2755         case OPrelconst:
2756             cdrelconst(cdb,e,pretregs);
2757             break;
2758 
2759         case OPgot:
2760             if (config.exe & EX_posix)
2761             {
2762                 cdgot(cdb,e,pretregs);
2763                 break;
2764             }
2765             goto default;
2766 
2767         default:
2768             if (*pretregs == mPSW &&
2769                 config.fpxmmregs &&
2770                 (tyxmmreg(tym) || tysimd(tym)))
2771             {
2772                 regm_t retregs = XMMREGS | mPSW;
2773                 loaddata(cdb,e,&retregs);
2774                 cssave(e,retregs,false);
2775                 return;
2776             }
2777             loaddata(cdb,e,pretregs);
2778             break;
2779     }
2780     cssave(e,*pretregs,false);
2781 }
2782 
2783 
2784 /*****************************
2785  * Load reg from cse save area on stack.
2786  */
2787 
2788 @trusted
2789 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm)
2790 {
2791     foreach (ref cse; CSE.filter(e))
2792     {
2793         //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm));
2794         if (cse.regm & regm)
2795         {
2796             reflocal = true;
2797             cse.flags |= CSEload;    /* it was loaded        */
2798             regcon.cse.value[reg] = e;
2799             regcon.cse.mval |= mask(reg);
2800             getregs(cdb,mask(reg));
2801             gen_loadcse(cdb, cse.e.Ety, reg, cse.slot);
2802             return;
2803         }
2804     }
2805     debug
2806     {
2807         printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm));
2808         elem_print(e);
2809     }
2810     assert(0);
2811 }
2812 
2813 /***************************
2814  * Generate code sequence for an elem.
2815  * Input:
2816  *      pretregs =      mask of possible registers to return result in
2817  *                      Note:   longs are in AX,BX or CX,DX or SI,DI
2818  *                              doubles are AX,BX,CX,DX only
2819  *      constflag =     1 for user of result will not modify the
2820  *                      registers returned in *pretregs.
2821  *                      2 for freenode() not called.
2822  * Output:
2823  *      *pretregs       mask of registers result is returned in
2824  * Returns:
2825  *      pointer to code sequence generated
2826  */
2827 
2828 @trusted
2829 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op)
2830 {
2831     (*cdxxx[op])(cdb,e,pretregs);
2832 }
2833 
2834 // jump table
2835 private extern (C++) __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx =
2836 [
2837     OPunde:    &cderr,
2838     OPadd:     &cdorth,
2839     OPmul:     &cdmul,
2840     OPand:     &cdorth,
2841     OPmin:     &cdorth,
2842     OPnot:     &cdnot,
2843     OPcom:     &cdcom,
2844     OPcond:    &cdcond,
2845     OPcomma:   &cdcomma,
2846     OPremquo:  &cddiv,
2847     OPdiv:     &cddiv,
2848     OPmod:     &cddiv,
2849     OPxor:     &cdorth,
2850     OPstring:  &cderr,
2851     OPrelconst: &cdrelconst,
2852     OPinp:     &cdport,
2853     OPoutp:    &cdport,
2854     OPasm:     &cdasm,
2855     OPinfo:    &cdinfo,
2856     OPdctor:   &cddctor,
2857     OPddtor:   &cdddtor,
2858     OPctor:    &cdctor,
2859     OPdtor:    &cddtor,
2860     OPmark:    &cdmark,
2861     OPvoid:    &cdvoid,
2862     OPhalt:    &cdhalt,
2863     OPnullptr: &cderr,
2864     OPpair:    &cdpair,
2865     OPrpair:   &cdpair,
2866 
2867     OPor:      &cdorth,
2868     OPoror:    &cdloglog,
2869     OPandand:  &cdloglog,
2870     OProl:     &cdshift,
2871     OPror:     &cdshift,
2872     OPshl:     &cdshift,
2873     OPshr:     &cdshift,
2874     OPashr:    &cdshift,
2875     OPbit:     &cderr,
2876     OPind:     &cdind,
2877     OPaddr:    &cderr,
2878     OPneg:     &cdneg,
2879     OPuadd:    &cderr,
2880     OPabs:     &cdabs,
2881     OPtoprec:  &cdtoprec,
2882     OPsqrt:    &cdneg,
2883     OPsin:     &cdneg,
2884     OPcos:     &cdneg,
2885     OPscale:   &cdscale,
2886     OPyl2x:    &cdscale,
2887     OPyl2xp1:  &cdscale,
2888     OPcmpxchg:     &cdcmpxchg,
2889     OPrint:    &cdneg,
2890     OPrndtol:  &cdrndtol,
2891     OPstrlen:  &cdstrlen,
2892     OPstrcpy:  &cdstrcpy,
2893     OPmemcpy:  &cdmemcpy,
2894     OPmemset:  &cdmemset,
2895     OPstrcat:  &cderr,
2896     OPstrcmp:  &cdstrcmp,
2897     OPmemcmp:  &cdmemcmp,
2898     OPsetjmp:  &cdsetjmp,
2899     OPnegass:  &cdaddass,
2900     OPpreinc:  &cderr,
2901     OPpredec:  &cderr,
2902     OPstreq:   &cdstreq,
2903     OPpostinc: &cdpost,
2904     OPpostdec: &cdpost,
2905     OPeq:      &cdeq,
2906     OPaddass:  &cdaddass,
2907     OPminass:  &cdaddass,
2908     OPmulass:  &cdmulass,
2909     OPdivass:  &cddivass,
2910     OPmodass:  &cddivass,
2911     OPshrass:  &cdshass,
2912     OPashrass: &cdshass,
2913     OPshlass:  &cdshass,
2914     OPandass:  &cdaddass,
2915     OPxorass:  &cdaddass,
2916     OPorass:   &cdaddass,
2917 
2918     OPle:      &cdcmp,
2919     OPgt:      &cdcmp,
2920     OPlt:      &cdcmp,
2921     OPge:      &cdcmp,
2922     OPeqeq:    &cdcmp,
2923     OPne:      &cdcmp,
2924 
2925     OPunord:   &cdcmp,
2926     OPlg:      &cdcmp,
2927     OPleg:     &cdcmp,
2928     OPule:     &cdcmp,
2929     OPul:      &cdcmp,
2930     OPuge:     &cdcmp,
2931     OPug:      &cdcmp,
2932     OPue:      &cdcmp,
2933     OPngt:     &cdcmp,
2934     OPnge:     &cdcmp,
2935     OPnlt:     &cdcmp,
2936     OPnle:     &cdcmp,
2937     OPord:     &cdcmp,
2938     OPnlg:     &cdcmp,
2939     OPnleg:    &cdcmp,
2940     OPnule:    &cdcmp,
2941     OPnul:     &cdcmp,
2942     OPnuge:    &cdcmp,
2943     OPnug:     &cdcmp,
2944     OPnue:     &cdcmp,
2945 
2946     OPvp_fp:   &cdcnvt,
2947     OPcvp_fp:  &cdcnvt,
2948     OPoffset:  &cdlngsht,
2949     OPnp_fp:   &cdshtlng,
2950     OPnp_f16p: &cdfar16,
2951     OPf16p_np: &cdfar16,
2952 
2953     OPs16_32:  &cdshtlng,
2954     OPu16_32:  &cdshtlng,
2955     OPd_s32:   &cdcnvt,
2956     OPb_8:     &cdcnvt,
2957     OPs32_d:   &cdcnvt,
2958     OPd_s16:   &cdcnvt,
2959     OPs16_d:   &cdcnvt,
2960     OPd_u16:   &cdcnvt,
2961     OPu16_d:   &cdcnvt,
2962     OPd_u32:   &cdcnvt,
2963     OPu32_d:   &cdcnvt,
2964     OP32_16:   &cdlngsht,
2965     OPd_f:     &cdcnvt,
2966     OPf_d:     &cdcnvt,
2967     OPd_ld:    &cdcnvt,
2968     OPld_d:    &cdcnvt,
2969     OPc_r:     &cdconvt87,
2970     OPc_i:     &cdconvt87,
2971     OPu8_16:   &cdbyteint,
2972     OPs8_16:   &cdbyteint,
2973     OP16_8:    &cdlngsht,
2974     OPu32_64:  &cdshtlng,
2975     OPs32_64:  &cdshtlng,
2976     OP64_32:   &cdlngsht,
2977     OPu64_128: &cdshtlng,
2978     OPs64_128: &cdshtlng,
2979     OP128_64:  &cdlngsht,
2980     OPmsw:     &cdmsw,
2981 
2982     OPd_s64:   &cdcnvt,
2983     OPs64_d:   &cdcnvt,
2984     OPd_u64:   &cdcnvt,
2985     OPu64_d:   &cdcnvt,
2986     OPld_u64:  &cdcnvt,
2987     OPparam:   &cderr,
2988     OPsizeof:  &cderr,
2989     OParrow:   &cderr,
2990     OParrowstar: &cderr,
2991     OPcolon:   &cderr,
2992     OPcolon2:  &cderr,
2993     OPbool:    &cdnot,
2994     OPcall:    &cdfunc,
2995     OPucall:   &cdfunc,
2996     OPcallns:  &cdfunc,
2997     OPucallns: &cdfunc,
2998     OPstrpar:  &cderr,
2999     OPstrctor: &cderr,
3000     OPstrthis: &cdstrthis,
3001     OPconst:   &cderr,
3002     OPvar:     &cderr,
3003     OPnew:     &cderr,
3004     OPanew:    &cderr,
3005     OPdelete:  &cderr,
3006     OPadelete: &cderr,
3007     OPbrack:   &cderr,
3008     OPframeptr: &cdframeptr,
3009     OPgot:     &cdgot,
3010 
3011     OPbsf:     &cdbscan,
3012     OPbsr:     &cdbscan,
3013     OPbtst:    &cdbtst,
3014     OPbt:      &cdbt,
3015     OPbtc:     &cdbt,
3016     OPbtr:     &cdbt,
3017     OPbts:     &cdbt,
3018 
3019     OPbswap:   &cdbswap,
3020     OPpopcnt:  &cdpopcnt,
3021     OPvector:  &cdvector,
3022     OPvecsto:  &cdvecsto,
3023     OPvecfill: &cdvecfill,
3024     OPva_start: &cderr,
3025     OPprefetch: &cdprefetch,
3026 ];
3027 
3028 
3029 @trusted
3030 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag)
3031 {
3032     Symbol *s;
3033 
3034     debug if (debugw)
3035     {
3036         printf("+codelem(e=%p,*pretregs=%s) %s ",e,regm_str(*pretregs),oper_str(e.Eoper));
3037         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3038                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3039         printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
3040     }
3041 
3042     assert(e);
3043     elem_debug(e);
3044     if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops)
3045     {
3046         debug
3047         {
3048             printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs));
3049             elem_print(e);
3050             printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3051                     regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3052             printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub);
3053         }
3054         assert(0);
3055     }
3056 
3057     if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar)
3058         *pretregs &= ~regcon.mvar;                      /* can't use register vars */
3059 
3060     uint op = e.Eoper;
3061     if (e.Ecount && e.Ecount != e.Ecomsub)     // if common subexp
3062     {
3063         comsub(cdb,e,pretregs);
3064         goto L1;
3065     }
3066 
3067     if (configv.addlinenumbers && e.Esrcpos.Slinnum)
3068         cdb.genlinnum(e.Esrcpos);
3069 
3070     switch (op)
3071     {
3072         default:
3073             if (e.Ecount)                          /* if common subexp     */
3074             {
3075                 /* if no return value       */
3076                 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0)
3077                 {
3078                     if (*pretregs & (mST0 | mST01))
3079                     {
3080                         //printf("generate ST0 comsub for:\n");
3081                         //elem_print(e);
3082 
3083                         regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1;
3084                         (*cdxxx[op])(cdb,e,&retregs);
3085                         cssave(e,retregs,!OTleaf(op));
3086                         fixresult(cdb, e, retregs, pretregs);
3087                         goto L1;
3088                     }
3089                     if (tysize(e.Ety) == 1)
3090                         *pretregs |= BYTEREGS;
3091                     else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs)
3092                         *pretregs |= XMMREGS;
3093                     else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias)
3094                         *pretregs |= DOUBLEREGS;
3095                     else
3096                         *pretregs |= ALLREGS;       /* make one             */
3097                 }
3098 
3099                 /* BUG: For CSEs, make sure we have both an MSW             */
3100                 /* and an LSW specified in *pretregs                        */
3101             }
3102             assert(op <= OPMAX);
3103             (*cdxxx[op])(cdb,e,pretregs);
3104             break;
3105 
3106         case OPrelconst:
3107             cdrelconst(cdb,e,pretregs);
3108             break;
3109 
3110         case OPvar:
3111             if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg &&
3112                 (s.Sregm & *pretregs) == s.Sregm)
3113             {
3114                 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE)
3115                     *pretregs &= mPSW | (s.Sregm & mLSW);
3116                 else
3117                     *pretregs &= mPSW | s.Sregm;
3118             }
3119             goto case OPconst;
3120 
3121         case OPconst:
3122             if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile))
3123             {
3124                 switch (tybasic(e.Ety))
3125                 {
3126                     case TYbool:
3127                     case TYchar:
3128                     case TYschar:
3129                     case TYuchar:
3130                         *pretregs |= BYTEREGS;
3131                         break;
3132 
3133                     case TYnref:
3134                     case TYnptr:
3135                     case TYsptr:
3136                     case TYcptr:
3137                     case TYfgPtr:
3138                     case TYimmutPtr:
3139                     case TYsharePtr:
3140                     case TYrestrictPtr:
3141                         *pretregs |= I16 ? IDXREGS : ALLREGS;
3142                         break;
3143 
3144                     case TYshort:
3145                     case TYushort:
3146                     case TYint:
3147                     case TYuint:
3148                     case TYlong:
3149                     case TYulong:
3150                     case TYllong:
3151                     case TYullong:
3152                     case TYcent:
3153                     case TYucent:
3154                     case TYfptr:
3155                     case TYhptr:
3156                     case TYvptr:
3157                         *pretregs |= ALLREGS;
3158                         break;
3159 
3160                     default:
3161                         break;
3162                 }
3163             }
3164             loaddata(cdb,e,pretregs);
3165             break;
3166     }
3167     cssave(e,*pretregs,!OTleaf(op));
3168 L1:
3169     if (!(constflag & 2))
3170         freenode(e);
3171 
3172     debug if (debugw)
3173     {
3174         printf("-codelem(e=%p,*pretregs=%s) %s ",e,regm_str(*pretregs), oper_str(op));
3175         printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n",
3176                 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops));
3177     }
3178 }
3179 
3180 /*******************************
3181  * Same as codelem(), but do not destroy the registers in keepmsk.
3182  * Use scratch registers as much as possible, then use stack.
3183  * Input:
3184  *      constflag       true if user of result will not modify the
3185  *                      registers returned in *pretregs.
3186  */
3187 
3188 @trusted
3189 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag)
3190 {
3191     regm_t touse;
3192 
3193     debug if (debugw)
3194         printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3195                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3196 
3197     elem_debug(e);
3198     if (constflag)
3199     {
3200         regm_t regm;
3201         reg_t reg;
3202 
3203         if (isregvar(e,&regm,&reg) &&           // if e is a register variable
3204             (regm & *pretregs) == regm &&       // in one of the right regs
3205             e.EV.Voffset == 0
3206            )
3207         {
3208             uint sz1 = tysize(e.Ety);
3209             uint sz2 = tysize(e.EV.Vsym.Stype.Tty);
3210             if (sz1 <= REGSIZE && sz2 > REGSIZE)
3211                 regm &= mLSW | XMMREGS;
3212             fixresult(cdb,e,regm,pretregs);
3213             cssave(e,regm,0);
3214             freenode(e);
3215 
3216             debug if (debugw)
3217                 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3218                         e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3219 
3220             return;
3221         }
3222     }
3223     regm_t overlap = msavereg & keepmsk;
3224     msavereg |= keepmsk;          /* add to mask of regs to save          */
3225     regm_t oldregcon = regcon.cse.mval;
3226     regm_t oldregimmed = regcon.immed.mval;
3227     regm_t oldmfuncreg = mfuncreg;       /* remember old one                     */
3228     mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar;
3229     uint stackpushsave = stackpush;
3230     char calledafuncsave = calledafunc;
3231     calledafunc = 0;
3232     CodeBuilder cdbx; cdbx.ctor();
3233     codelem(cdbx,e,pretregs,constflag);    // generate code for the elem
3234 
3235     regm_t tosave = keepmsk & ~msavereg; /* registers to save                    */
3236     if (tosave)
3237     {
3238         cgstate.stackclean++;
3239         genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg);
3240         cgstate.stackclean--;
3241     }
3242 
3243     /* Assert that no new CSEs are generated that are not reflected       */
3244     /* in mfuncreg.                                                       */
3245     debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0)
3246         printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n",
3247                 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar));
3248 
3249     assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0);
3250 
3251     /* bugzilla 3521
3252      * The problem is:
3253      *    reg op (reg = exp)
3254      * where reg must be preserved (in keepregs) while the expression to be evaluated
3255      * must change it.
3256      * The only solution is to make this variable not a register.
3257      */
3258     if (regcon.mvar & tosave)
3259     {
3260         //elem_print(e);
3261         //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave));
3262         cgreg_unregister(regcon.mvar & tosave);
3263     }
3264 
3265     /* which registers can we use to save other registers in? */
3266     if (config.flags4 & CFG4space ||              // if optimize for space
3267         config.target_cpu >= TARGET_80486)        // PUSH/POP ops are 1 cycle
3268         touse = 0;                              // PUSH/POP pairs are always shorter
3269     else
3270     {
3271         touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval);
3272         /* Don't use registers we'll have to save/restore               */
3273         touse &= ~(fregsaved & oldmfuncreg);
3274         /* Don't use registers that have constant values in them, since
3275            the code generated might have used the value.
3276          */
3277         touse &= ~oldregimmed;
3278     }
3279 
3280     CodeBuilder cdbs1; cdbs1.ctor();
3281     code *cs2 = null;
3282     int adjesp = 0;
3283 
3284     for (uint i = 0; tosave; i++)
3285     {
3286         regm_t mi = mask(i);
3287 
3288         assert(i < REGMAX);
3289         if (mi & tosave)        /* i = register to save                 */
3290         {
3291             if (touse)          /* if any scratch registers             */
3292             {
3293                 uint j;
3294                 for (j = 0; j < 8; j++)
3295                 {
3296                     regm_t mj = mask(j);
3297 
3298                     if (touse & mj)
3299                     {
3300                         genmovreg(cdbs1,j,i);
3301                         cs2 = cat(genmovreg(i,j),cs2);
3302                         touse &= ~mj;
3303                         mfuncreg &= ~mj;
3304                         regcon.used |= mj;
3305                         break;
3306                     }
3307                 }
3308                 assert(j < 8);
3309             }
3310             else                        // else use memory
3311             {
3312                 CodeBuilder cdby; cdby.ctor();
3313                 uint size = gensaverestore(mask(i), cdbs1, cdby);
3314                 cs2 = cat(cdby.finish(),cs2);
3315                 if (size)
3316                 {
3317                     stackchanged = 1;
3318                     adjesp += size;
3319                 }
3320             }
3321             getregs(cdbx,mi);
3322             tosave &= ~mi;
3323         }
3324     }
3325     CodeBuilder cdbs2; cdbs2.ctor();
3326     if (adjesp)
3327     {
3328         // If this is done an odd number of times, it
3329         // will throw off the 8 byte stack alignment.
3330         // We should *only* worry about this if a function
3331         // was called in the code generation by codelem().
3332         int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1);
3333         if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign))
3334         {
3335             regm_t mval_save = regcon.immed.mval;
3336             regcon.immed.mval = 0;      // prevent reghasvalue() optimizations
3337                                         // because c hasn't been executed yet
3338             cod3_stackadj(cdbs1, sz);
3339             regcon.immed.mval = mval_save;
3340             cdbs1.genadjesp(sz);
3341 
3342             cod3_stackadj(cdbs2, -sz);
3343             cdbs2.genadjesp(-sz);
3344         }
3345         cdbs2.append(cs2);
3346 
3347 
3348         cdbs1.genadjesp(adjesp);
3349         cdbs2.genadjesp(-adjesp);
3350     }
3351     else
3352         cdbs2.append(cs2);
3353 
3354     calledafunc |= calledafuncsave;
3355     msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save   */
3356     mfuncreg &= oldmfuncreg;        /* update original                    */
3357 
3358     debug if (debugw)
3359         printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n",
3360                 e,regm_str(*pretregs),regm_str(keepmsk),constflag);
3361 
3362     cdb.append(cdbs1);
3363     cdb.append(cdbx);
3364     cdb.append(cdbs2);
3365     return;
3366 }
3367 
3368 /*********************************************
3369  * Turn register mask into a string suitable for printing.
3370  */
3371 
3372 @trusted
3373 const(char)* regm_str(regm_t rm)
3374 {
3375     enum NUM = 10;
3376     enum SMAX = 128;
3377     __gshared char[SMAX + 1][NUM] str;
3378     __gshared int i;
3379 
3380     if (rm == 0)
3381         return "0";
3382     if (rm == ALLREGS)
3383         return "ALLREGS";
3384     if (rm == BYTEREGS)
3385         return "BYTEREGS";
3386     if (rm == allregs)
3387         return "allregs";
3388     if (rm == XMMREGS)
3389         return "XMMREGS";
3390     char *p = str[i].ptr;
3391     if (++i == NUM)
3392         i = 0;
3393     *p = 0;
3394     for (size_t j = 0; j < 32; j++)
3395     {
3396         if (mask(cast(uint)j) & rm)
3397         {
3398             strcat(p,regstring[j]);
3399             rm &= ~mask(cast(uint)j);
3400             if (rm)
3401                 strcat(p,"|");
3402         }
3403     }
3404     if (rm)
3405     {
3406         const pstrlen = strlen(p);
3407         char *s = p + pstrlen;
3408         snprintf(s, SMAX - pstrlen, "x%02x",rm);
3409     }
3410     assert(strlen(p) <= SMAX);
3411     return strdup(p);
3412 }
3413 
3414 /*********************************
3415  * Scan down comma-expressions.
3416  * Output:
3417  *      *pe = first elem down right side that is not an OPcomma
3418  * Returns:
3419  *      code generated for left branches of comma-expressions
3420  */
3421 
3422 @trusted
3423 void docommas(ref CodeBuilder cdb,elem **pe)
3424 {
3425     uint stackpushsave = stackpush;
3426     int stackcleansave = cgstate.stackclean;
3427     cgstate.stackclean = 0;
3428     elem* e = *pe;
3429     while (1)
3430     {
3431         if (configv.addlinenumbers && e.Esrcpos.Slinnum)
3432         {
3433             cdb.genlinnum(e.Esrcpos);
3434             //e.Esrcpos.Slinnum = 0;               // don't do it twice
3435         }
3436         if (e.Eoper != OPcomma)
3437             break;
3438         regm_t retregs = 0;
3439         codelem(cdb,e.EV.E1,&retregs,true);
3440         elem* eold = e;
3441         e = e.EV.E2;
3442         freenode(eold);
3443     }
3444     *pe = e;
3445     assert(cgstate.stackclean == 0);
3446     cgstate.stackclean = stackcleansave;
3447     genstackclean(cdb,stackpush - stackpushsave,0);
3448 }
3449 
3450 /**************************
3451  * For elems in regcon that don't match regconsave,
3452  * clear the corresponding bit in regcon.cse.mval.
3453  * Do same for regcon.immed.
3454  */
3455 
3456 @trusted
3457 void andregcon(con_t *pregconsave)
3458 {
3459     regm_t m = ~1;
3460     for (int i = 0; i < REGMAX; i++)
3461     {
3462         if (pregconsave.cse.value[i] != regcon.cse.value[i])
3463             regcon.cse.mval &= m;
3464         if (pregconsave.immed.value[i] != regcon.immed.value[i])
3465             regcon.immed.mval &= m;
3466         m <<= 1;
3467         m |= 1;
3468     }
3469     //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval));
3470     regcon.used |= pregconsave.used;
3471     regcon.cse.mval &= pregconsave.cse.mval;
3472     regcon.immed.mval &= pregconsave.immed.mval;
3473     regcon.params &= pregconsave.params;
3474     //printf("regcon.cse.mval&regcon.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops));
3475     regcon.cse.mops &= regcon.cse.mval;
3476 }
3477 
3478 
3479 /**********************************************
3480  * Disassemble the code instruction bytes
3481  * Params:
3482  *    code = array of instruction bytes
3483  */
3484 @trusted
3485 private extern (D)
3486 void disassemble(ubyte[] code)
3487 {
3488     printf("%s:\n", funcsym_p.Sident.ptr);
3489     const model = I16 ? 16 : I32 ? 32 : 64;     // 16/32/64
3490     size_t i = 0;
3491     while (i < code.length)
3492     {
3493         printf("%04x:", cast(int)i);
3494         uint pc;
3495         const sz = dmd.backend.disasm86.calccodsize(code, cast(uint)i, pc, model);
3496 
3497         void put(char c) { printf("%c", c); }
3498 
3499         dmd.backend.disasm86.getopstring(&put, code, cast(uint)i, sz, model, model == 16, true,
3500                 null, null, null, null);
3501         printf("\n");
3502         i += sz;
3503     }
3504 }
3505 
3506 }